[Rawstudio-commit] r1566 - trunk/src

Anders Brander anders at brander.dk
Thu Jan 17 03:24:11 CET 2008


Author: abrander
Date: 2008-01-17 03:24:10 +0100 (Thu, 17 Jan 2008)
New Revision: 1566

Modified:
   trunk/src/arch-generic.c
   trunk/src/arch-x86.c
   trunk/src/rs-image.c
   trunk/src/rs-image.h
   trunk/src/rs-preload.c
Log:
Added MMX-optimized version of rs_image16_copy_double().

Modified: trunk/src/arch-generic.c
===================================================================
--- trunk/src/arch-generic.c	2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/arch-generic.c	2008-01-17 02:24:10 UTC (rev 1566)
@@ -19,6 +19,7 @@
 
 #include "rawstudio.h"
 #include "rs-color-transform.h"
+#include "rs-image.h"
 
 /* Default dsp function binder, defined for all archs so that a common C
  * implementation of every optimized function is shared among archs */
@@ -27,6 +28,9 @@
 {
 	/* Bind all default C implementation fucntions */
 
+	/* Image size doubler */
+	rs_image16_copy_double = rs_image16_copy_double_c;
+
 	/* Black point and shift applier */
 	rs_photo_open_dcraw_apply_black_and_shift = rs_photo_open_dcraw_apply_black_and_shift_c;
 

Modified: trunk/src/arch-x86.c
===================================================================
--- trunk/src/arch-x86.c	2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/arch-x86.c	2008-01-17 02:24:10 UTC (rev 1566)
@@ -21,6 +21,7 @@
 
 #include "rawstudio.h"
 #include "rs-color-transform.h"
+#include "rs-image.h"
 
 #include "x86_cpu.h"
 
@@ -60,6 +61,12 @@
 
 	/* Bind functions according to available features */
 
+	/* Image size doubler */
+	if (cpuflags & _MMX)
+	{
+		rs_image16_copy_double = rs_image16_copy_double_mmx;
+	}
+
 	/* Black and shift applier */
 	if (cpuflags & _MMX)
 	{

Modified: trunk/src/rs-image.c
===================================================================
--- trunk/src/rs-image.c	2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/rs-image.c	2008-01-17 02:24:10 UTC (rev 1566)
@@ -1335,7 +1335,10 @@
  * @param out The output image or NULL
  */
 RS_IMAGE16 *
-rs_image16_copy_double(RS_IMAGE16 *in, RS_IMAGE16 *out)
+(*rs_image16_copy_double)(RS_IMAGE16 *in, RS_IMAGE16 *out); /* Initialized by arch binder */
+
+RS_IMAGE16 *
+rs_image16_copy_double_c(RS_IMAGE16 *in, RS_IMAGE16 *out)
 {
 	gint row,col;
 	guint64 *i, *o1, *o2;
@@ -1371,6 +1374,85 @@
 	return out;
 }
 
+RS_IMAGE16 *
+rs_image16_copy_double_mmx(RS_IMAGE16 *in, RS_IMAGE16 *out)
+{
+	gint row;
+	void *i, *o1, *o2;
+	if (!in) return NULL;
+	if (!out)
+		out = rs_image16_new(in->w*2, in->h*2, in->channels, in->pixelsize);
+
+	out->filters = in->filters;
+	out->fourColorFilters = in->fourColorFilters;
+
+	rs_image16_ref(in);
+	rs_image16_ref(out);
+	for(row=0;row<(out->h-1);row++)
+	{
+		i = (void *) GET_PIXEL(in, 0, row/2);
+		o1 = (void *) GET_PIXEL(out, 0, row);
+		o2 = (void *) GET_PIXEL(out, 0, row+1);
+		asm volatile (
+			"mov %3, %%"REG_a"\n\t" /* copy col to %eax */
+
+			".p2align 4,,15\n"
+			"rs_image16_copy_double_mmx_inner_loop:\n\t"
+			"movq (%0), %%mm0\n\t" /* load source */
+			"movq 8(%0), %%mm1\n\t"
+			"movq 16(%0), %%mm2\n\t"
+			"movq 24(%0), %%mm3\n\t"
+			"movq %%mm0, (%1)\n\t" /* write destination (twice) */
+			"movq %%mm0, 8(%1)\n\t"
+			"movq %%mm1, 16(%1)\n\t"
+			"movq %%mm1, 24(%1)\n\t"
+			"movq %%mm2, 32(%1)\n\t"
+			"movq %%mm2, 40(%1)\n\t"
+			"movq %%mm3, 48(%1)\n\t"
+			"movq %%mm3, 56(%1)\n\t"
+			"movq %%mm0, (%2)\n\t"
+			"movq %%mm0, 8(%2)\n\t"
+			"movq %%mm1, 16(%2)\n\t"
+			"movq %%mm1, 24(%2)\n\t"
+			"movq %%mm2, 32(%2)\n\t"
+			"movq %%mm2, 40(%2)\n\t"
+			"movq %%mm3, 48(%2)\n\t"
+			"movq %%mm3, 56(%2)\n\t"
+			"sub $4, %%"REG_a"\n\t"
+			"add $32, %0\n\t"
+			"add $64, %1\n\t"
+			"add $64, %2\n\t"
+			"cmp $3, %%"REG_a"\n\t"
+			"jg rs_image16_copy_double_mmx_inner_loop\n\t"
+			"cmp $1, %%"REG_a"\n\t"
+			"jb rs_image16_copy_double_mmx_inner_done\n\t"
+
+			"rs_image16_copy_double_mmx_leftover:\n\t"
+			"movq (%0), %%mm0\n\t" /* leftover pixels */
+			"movq %%mm0, (%1)\n\t"
+			"movq %%mm0, 8(%1)\n\t"
+			"movq %%mm0, (%2)\n\t"
+			"movq %%mm0, 8(%2)\n\t"
+			"sub $1, %%"REG_a"\n\t"
+			"add $32, %0\n\t"
+			"add $64, %1\n\t"
+			"add $64, %2\n\t"
+			"cmp $0, %%"REG_a"\n\t"
+			"jg rs_image16_copy_double_mmx_leftover\n\t"
+
+			"rs_image16_copy_double_mmx_inner_done:\n\t"
+			"emms\n\t" /* clean up */
+			: "+r" (i), "+r" (o1), "+r" (o2)
+			: "r" ((gulong)in->w)
+			: "%"REG_a
+			);
+	}
+	rs_image16_unref(in);
+	rs_image16_unref(out);
+
+	return out;
+}
+
 /*
 The rest of this file is pretty much copied verbatim from dcraw/ufraw
 */

Modified: trunk/src/rs-image.h
===================================================================
--- trunk/src/rs-image.h	2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/rs-image.h	2008-01-17 02:24:10 UTC (rev 1566)
@@ -119,7 +119,9 @@
  * @param in The input image
  * @param out The output image or NULL
  */
-extern RS_IMAGE16 *rs_image16_copy_double(RS_IMAGE16 *in, RS_IMAGE16 *out);
+extern RS_IMAGE16 *(*rs_image16_copy_double)(RS_IMAGE16 *in, RS_IMAGE16 *out);
+extern RS_IMAGE16 *rs_image16_copy_double_c(RS_IMAGE16 *in, RS_IMAGE16 *out);
+extern RS_IMAGE16 *rs_image16_copy_double_mmx(RS_IMAGE16 *in, RS_IMAGE16 *out);
 
 /**
  * Demosaics a RS_IMAGE16

Modified: trunk/src/rs-preload.c
===================================================================
--- trunk/src/rs-preload.c	2008-01-15 23:18:23 UTC (rev 1565)
+++ trunk/src/rs-preload.c	2008-01-17 02:24:10 UTC (rev 1566)
@@ -257,7 +257,9 @@
 			PRELOAD_DEBUG("\033[32m%s preloaded\033[0m\n", filename);
 			photo = rs_photo_new();
 			p = l->data;
+			GTimer *gt = g_timer_new();
 			photo->input = rs_image16_copy_double(p->image, NULL);
+			printf("TIME: %.03f\n", g_timer_elapsed(gt, NULL));
 			photo->filename = g_strdup(p->filename);
 		}
 		else




More information about the Rawstudio-commit mailing list