[Rawstudio-commit] r1566 - trunk/src
Anders Brander
anders at brander.dk
Thu Jan 17 03:24:11 CET 2008
Author: abrander
Date: 2008-01-17 03:24:10 +0100 (Thu, 17 Jan 2008)
New Revision: 1566
Modified:
trunk/src/arch-generic.c
trunk/src/arch-x86.c
trunk/src/rs-image.c
trunk/src/rs-image.h
trunk/src/rs-preload.c
Log:
Added MMX-optimized version of rs_image16_copy_double().
Modified: trunk/src/arch-generic.c
===================================================================
--- trunk/src/arch-generic.c 2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/arch-generic.c 2008-01-17 02:24:10 UTC (rev 1566)
@@ -19,6 +19,7 @@
#include "rawstudio.h"
#include "rs-color-transform.h"
+#include "rs-image.h"
/* Default dsp function binder, defined for all archs so that a common C
* implementation of every optimized function is shared among archs */
@@ -27,6 +28,9 @@
{
/* Bind all default C implementation fucntions */
+ /* Image size doubler */
+ rs_image16_copy_double = rs_image16_copy_double_c;
+
/* Black point and shift applier */
rs_photo_open_dcraw_apply_black_and_shift = rs_photo_open_dcraw_apply_black_and_shift_c;
Modified: trunk/src/arch-x86.c
===================================================================
--- trunk/src/arch-x86.c 2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/arch-x86.c 2008-01-17 02:24:10 UTC (rev 1566)
@@ -21,6 +21,7 @@
#include "rawstudio.h"
#include "rs-color-transform.h"
+#include "rs-image.h"
#include "x86_cpu.h"
@@ -60,6 +61,12 @@
/* Bind functions according to available features */
+ /* Image size doubler */
+ if (cpuflags & _MMX)
+ {
+ rs_image16_copy_double = rs_image16_copy_double_mmx;
+ }
+
/* Black and shift applier */
if (cpuflags & _MMX)
{
Modified: trunk/src/rs-image.c
===================================================================
--- trunk/src/rs-image.c 2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/rs-image.c 2008-01-17 02:24:10 UTC (rev 1566)
@@ -1335,7 +1335,10 @@
* @param out The output image or NULL
*/
RS_IMAGE16 *
-rs_image16_copy_double(RS_IMAGE16 *in, RS_IMAGE16 *out)
+(*rs_image16_copy_double)(RS_IMAGE16 *in, RS_IMAGE16 *out); /* Initialized by arch binder */
+
+RS_IMAGE16 *
+rs_image16_copy_double_c(RS_IMAGE16 *in, RS_IMAGE16 *out)
{
gint row,col;
guint64 *i, *o1, *o2;
@@ -1371,6 +1374,85 @@
return out;
}
+RS_IMAGE16 *
+rs_image16_copy_double_mmx(RS_IMAGE16 *in, RS_IMAGE16 *out)
+{
+ gint row;
+ void *i, *o1, *o2;
+ if (!in) return NULL;
+ if (!out)
+ out = rs_image16_new(in->w*2, in->h*2, in->channels, in->pixelsize);
+
+ out->filters = in->filters;
+ out->fourColorFilters = in->fourColorFilters;
+
+ rs_image16_ref(in);
+ rs_image16_ref(out);
+ for(row=0;row<(out->h-1);row++)
+ {
+ i = (void *) GET_PIXEL(in, 0, row/2);
+ o1 = (void *) GET_PIXEL(out, 0, row);
+ o2 = (void *) GET_PIXEL(out, 0, row+1);
+ asm volatile (
+ "mov %3, %%"REG_a"\n\t" /* copy col to %eax */
+
+ ".p2align 4,,15\n"
+ "rs_image16_copy_double_mmx_inner_loop:\n\t"
+ "movq (%0), %%mm0\n\t" /* load source */
+ "movq 8(%0), %%mm1\n\t"
+ "movq 16(%0), %%mm2\n\t"
+ "movq 24(%0), %%mm3\n\t"
+ "movq %%mm0, (%1)\n\t" /* write destination (twice) */
+ "movq %%mm0, 8(%1)\n\t"
+ "movq %%mm1, 16(%1)\n\t"
+ "movq %%mm1, 24(%1)\n\t"
+ "movq %%mm2, 32(%1)\n\t"
+ "movq %%mm2, 40(%1)\n\t"
+ "movq %%mm3, 48(%1)\n\t"
+ "movq %%mm3, 56(%1)\n\t"
+ "movq %%mm0, (%2)\n\t"
+ "movq %%mm0, 8(%2)\n\t"
+ "movq %%mm1, 16(%2)\n\t"
+ "movq %%mm1, 24(%2)\n\t"
+ "movq %%mm2, 32(%2)\n\t"
+ "movq %%mm2, 40(%2)\n\t"
+ "movq %%mm3, 48(%2)\n\t"
+ "movq %%mm3, 56(%2)\n\t"
+ "sub $4, %%"REG_a"\n\t"
+ "add $32, %0\n\t"
+ "add $64, %1\n\t"
+ "add $64, %2\n\t"
+ "cmp $3, %%"REG_a"\n\t"
+ "jg rs_image16_copy_double_mmx_inner_loop\n\t"
+ "cmp $1, %%"REG_a"\n\t"
+ "jb rs_image16_copy_double_mmx_inner_done\n\t"
+
+ "rs_image16_copy_double_mmx_leftover:\n\t"
+ "movq (%0), %%mm0\n\t" /* leftover pixels */
+ "movq %%mm0, (%1)\n\t"
+ "movq %%mm0, 8(%1)\n\t"
+ "movq %%mm0, (%2)\n\t"
+ "movq %%mm0, 8(%2)\n\t"
+ "sub $1, %%"REG_a"\n\t"
+ "add $32, %0\n\t"
+ "add $64, %1\n\t"
+ "add $64, %2\n\t"
+ "cmp $0, %%"REG_a"\n\t"
+ "jg rs_image16_copy_double_mmx_leftover\n\t"
+
+ "rs_image16_copy_double_mmx_inner_done:\n\t"
+ "emms\n\t" /* clean up */
+ : "+r" (i), "+r" (o1), "+r" (o2)
+ : "r" ((gulong)in->w)
+ : "%"REG_a
+ );
+ }
+ rs_image16_unref(in);
+ rs_image16_unref(out);
+
+ return out;
+}
+
/*
The rest of this file is pretty much copied verbatim from dcraw/ufraw
*/
Modified: trunk/src/rs-image.h
===================================================================
--- trunk/src/rs-image.h 2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/rs-image.h 2008-01-17 02:24:10 UTC (rev 1566)
@@ -119,7 +119,9 @@
* @param in The input image
* @param out The output image or NULL
*/
-extern RS_IMAGE16 *rs_image16_copy_double(RS_IMAGE16 *in, RS_IMAGE16 *out);
+extern RS_IMAGE16 *(*rs_image16_copy_double)(RS_IMAGE16 *in, RS_IMAGE16 *out);
+extern RS_IMAGE16 *rs_image16_copy_double_c(RS_IMAGE16 *in, RS_IMAGE16 *out);
+extern RS_IMAGE16 *rs_image16_copy_double_mmx(RS_IMAGE16 *in, RS_IMAGE16 *out);
/**
* Demosaics a RS_IMAGE16
Modified: trunk/src/rs-preload.c
===================================================================
--- trunk/src/rs-preload.c 2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/rs-preload.c 2008-01-17 02:24:10 UTC (rev 1566)
@@ -257,7 +257,9 @@
PRELOAD_DEBUG("\033[32m%s preloaded\033[0m\n", filename);
photo = rs_photo_new();
p = l->data;
+ GTimer *gt = g_timer_new();
photo->input = rs_image16_copy_double(p->image, NULL);
+ printf("TIME: %.03f\n", g_timer_elapsed(gt, NULL));
photo->filename = g_strdup(p->filename);
}
else
More information about the Rawstudio-commit
mailing list