[Rawstudio-commit] r771 - trunk/src

Anders Brander anders at brander.dk
Mon Sep 18 11:11:14 CEST 2006


Author: abrander
Date: 2006-09-18 11:11:14 +0200 (Mon, 18 Sep 2006)
New Revision: 771

Added:
   trunk/src/rs-render.c
   trunk/src/rs-render.h
Modified:
   trunk/src/Makefile.am
   trunk/src/drawingarea.c
   trunk/src/gtk-interface.c
   trunk/src/rawstudio.c
   trunk/src/rawstudio.h
   trunk/src/toolbox.c
Log:
Moved (almost) all render code to rs-render.c|h.

Modified: trunk/src/Makefile.am
===================================================================
--- trunk/src/Makefile.am	2006-09-18 08:59:45 UTC (rev 770)
+++ trunk/src/Makefile.am	2006-09-18 09:11:14 UTC (rev 771)
@@ -32,7 +32,8 @@
 	rs-jpeg.c rs-jpeg.h \
 	color.h \
 	gettext.h \
-	filename.c filename.h
+	filename.c filename.h \
+	rs-render.c rs-render.h
 
 rawstudio_LDADD = @PACKAGE_LIBS@ @LIBJPEG@ dcraw_nomain.o $(INTLLIBS)
 

Modified: trunk/src/drawingarea.c
===================================================================
--- trunk/src/drawingarea.c	2006-09-18 08:59:45 UTC (rev 770)
+++ trunk/src/drawingarea.c	2006-09-18 09:11:14 UTC (rev 771)
@@ -62,7 +62,7 @@
 	rs->preview_backing = gdk_pixmap_new(widget->window,
 		widget->allocation.width,
 		widget->allocation.height, -1);
-	update_preview(rs); /* evil hack to catch bogus configure events */
+	update_preview(rs, TRUE); /* evil hack to catch bogus configure events */
 	return(TRUE);
 }
 

Modified: trunk/src/gtk-interface.c
===================================================================
--- trunk/src/gtk-interface.c	2006-09-18 08:59:45 UTC (rev 770)
+++ trunk/src/gtk-interface.c	2006-09-18 09:11:14 UTC (rev 771)
@@ -133,7 +133,7 @@
 	if (rs->photo)
 	{
 		rs_settings_to_rs_settings_double(rs->settings[rs->current_setting], rs->photo->settings[rs->photo->current_setting]);
-		update_preview(rs);
+		update_preview(rs, FALSE);
 	}
 	return(FALSE);
 }
@@ -144,8 +144,7 @@
 	if (rs->photo)
 	{
 		rs_settings_to_rs_settings_double(rs->settings[rs->current_setting], rs->photo->settings[rs->photo->current_setting]);
-		update_previewtable(rs->photo->settings[rs->photo->current_setting]->contrast);
-		update_preview(rs);
+		update_preview(rs, TRUE);
 	}
 	return(FALSE);
 }
@@ -1611,7 +1610,7 @@
 	rs->in_use = FALSE;
 	rs_settings_reset(rs->settings[rs->current_setting], MASK_ALL);
 	rs->in_use = in_use;
-	update_preview(rs);
+	update_preview(rs, TRUE);
 	return;
 }
 
@@ -1637,7 +1636,7 @@
 	else
 	  gui_status_push(_("Hiding exposure mask"));
 	rs->show_exposure_overlay = GTK_CHECK_MENU_ITEM(widget)->active;
-	update_preview(rs);
+	update_preview(rs, FALSE);
 	return;
 }
 
@@ -1655,7 +1654,7 @@
 		rs->settings[rs->photo->current_setting]);
 	photo->filename = NULL;
 	rs_photo_free(photo);
-	update_preview(rs);
+	update_preview(rs, TRUE);
 	return;
 }
 
@@ -1752,7 +1751,7 @@
 				rs->in_use = FALSE;
 				rs_apply_settings_from_double(rs->settings[rs->photo->current_setting], rs->settings_buffer, mask);
 				rs->in_use = in_use;
-				update_preview(rs);
+				update_preview(rs, TRUE);
 
 				gui_status_push(_("Pasted settings"));
 			}

Modified: trunk/src/rawstudio.c
===================================================================
--- trunk/src/rawstudio.c	2006-09-18 08:59:45 UTC (rev 770)
+++ trunk/src/rawstudio.c	2006-09-18 09:11:14 UTC (rev 771)
@@ -42,6 +42,7 @@
 #include "conf_interface.h"
 #include "filename.h"
 #include "rs-jpeg.h"
+#include "rs-render.h"
 
 #define cpuid(n) \
   a = b = c = d = 0x0; \
@@ -51,8 +52,6 @@
 	)
 
 guint cpuflags = 0;
-guchar previewtable[65536];
-gushort previewtable16[65536];
 gushort loadtable[65536];
 
 cmsHPROFILE genericLoadProfile;
@@ -63,13 +62,6 @@
 cmsHTRANSFORM displayTransform;
 cmsHTRANSFORM exportTransform;
 
-void (*rs_render)(RS_PHOTO *photo, gint width, gint height, gushort *in,
-	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
-
-void rs_render_cms(RS_PHOTO *photo, gint width, gint height, gushort *in,
-	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
-void rs_render_nocms(RS_PHOTO *photo, gint width, gint height, gushort *in,
-	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
 inline void rs_photo_prepare(RS_PHOTO *photo);
 void update_scaled(RS_BLOB *rs);
 inline void rs_render_mask(guchar *pixels, guchar *mask, guint length);
@@ -123,32 +115,6 @@
 };
 
 void
-update_previewtable(const gdouble contrast)
-{
-	gint n;
-	gdouble nd;
-	gint res;
-	double gammavalue;
-	const double postadd = 0.5 - (contrast/2.0);
-	gammavalue = (1.0/GAMMA);
-
-	for(n=0;n<65536;n++)
-	{
-		nd = ((gdouble) n) / 65535.0;
-		nd = pow(nd, gammavalue)*contrast+postadd;
-
-		res = (gint) (nd*255.0);
-		_CLAMP255(res);
-		previewtable[n] = res;
-
-		nd = pow(nd, GAMMA);
-		res = (gint) (nd*65535.0);
-		_CLAMP65535(res);
-		previewtable16[n] = res;
-	}
-}
-
-void
 make_gammatable16(gushort *table, gdouble gamma)
 {
 	gint n;
@@ -220,10 +186,12 @@
 }
 
 void
-update_preview(RS_BLOB *rs)
+update_preview(RS_BLOB *rs, gboolean update_table)
 {
 	if(unlikely(!rs->photo)) return;
 
+	if (update_table)
+		rs_render_previewtable(rs->photo->settings[rs->photo->current_setting]->contrast);
 	update_scaled(rs);
 	rs_photo_prepare(rs->photo);
 	update_preview_region(rs, rs->preview_exposed);
@@ -231,7 +199,34 @@
 	/* Reset histogram_table */
 	if (GTK_WIDGET_VISIBLE(rs->histogram_image))
 	{
-		memset(rs->histogram_table, 0x00, sizeof(guint)*3*256);
+#ifdef __i386__
+		if (likely(cpuflags & _MMX))
+		{
+			asm volatile (
+				"movl $3072, %%eax\n\t" /* counter */
+				"pxor %%mm0, %%mm0\n\t" /* 0x0 */
+				".p2align 4,,15\n"
+				"reset_histogram_table_loop:\n\t"
+				"movq %%mm0, (%0)\n\t" /* write dest */
+				"movq %%mm0, 8(%0)\n\t"
+				"movq %%mm0, 16(%0)\n\t"
+				"movq %%mm0, 24(%0)\n\t"
+				"movq %%mm0, 32(%0)\n\t"
+				"movq %%mm0, 40(%0)\n\t"
+				"movq %%mm0, 48(%0)\n\t"
+				"movq %%mm0, 56(%0)\n\t"
+				"add $64, %0\n\t"
+				"sub $64, %%eax\n\t"
+				"jg reset_histogram_table_loop\n\t"
+				"emms\n\t"
+				:
+				: "r" (rs->histogram_table)
+				: "%eax"
+			);
+		}
+		else
+#endif
+			memset(rs->histogram_table, 0x00, sizeof(guint)*3*256);
 		rs_histogram_update_table(rs, rs->histogram_dataset, (guint *) rs->histogram_table);
 		update_histogram(rs);
 	}
@@ -438,478 +433,6 @@
 	return;
 }
 
-void
-rs_render_cms(RS_PHOTO *photo, gint width, gint height, gushort *in,
-	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile)
-{
-	gushort *buffer = g_malloc(width*3*sizeof(gushort));
-#ifdef __i386__
-	if (cpuflags & _SSE)
-	{
-		register gint r,g,b;
-		gint destoffset;
-		gint col;
-		gfloat top[4] align(16) = {65535.0, 65535.0, 65535.0, 65535.0};
-		gfloat mat[12] align(16) = {
-		photo->mat.coeff[0][0],
-		photo->mat.coeff[1][0],
-		photo->mat.coeff[2][0],
-		0.0,
-		photo->mat.coeff[0][1],
-		photo->mat.coeff[1][1],
-		photo->mat.coeff[2][1],
-		0.0,
-		photo->mat.coeff[0][2],
-		photo->mat.coeff[1][2],
-		photo->mat.coeff[2][2],
-		0.0 };
-		asm volatile (
-			"movups (%2), %%xmm2\n\t" /* rs->pre_mul */
-			"movaps (%0), %%xmm3\n\t" /* matrix */
-			"movaps 16(%0), %%xmm4\n\t"
-			"movaps 32(%0), %%xmm5\n\t"
-			"movaps (%1), %%xmm6\n\t" /* top */
-			"pxor %%mm7, %%mm7\n\t" /* 0x0 */
-			:
-			: "r" (mat), "r" (top), "r" (photo->pre_mul)
-			: "memory"
-		);
-		while(height--)
-		{
-			destoffset = 0;
-			col = width;
-			gushort *s = in + height * in_rowstride;
-			while(col--)
-			{
-				asm volatile (
-					/* load */
-					"movq (%3), %%mm0\n\t" /* R | G | B | G2 */
-					"movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
-					"punpcklwd %%mm7, %%mm0\n\t" /* R | G */
-					"punpckhwd %%mm7, %%mm1\n\t" /* B | G2 */
-					"cvtpi2ps %%mm1, %%xmm0\n\t" /* B | G2 | ? | ? */
-					"shufps $0x4E, %%xmm0, %%xmm0\n\t" /* ? | ? | B | G2 */
-					"cvtpi2ps %%mm0, %%xmm0\n\t" /* R | G | B | G2 */
-
-					"mulps %%xmm2, %%xmm0\n\t"
-					"maxps %%xmm7, %%xmm0\n\t"
-					"minps %%xmm6, %%xmm0\n\t"
-
-					"movaps %%xmm0, %%xmm1\n\t"
-					"shufps $0x0, %%xmm0, %%xmm1\n\t"
-					"mulps %%xmm3, %%xmm1\n\t"
-					"addps %%xmm1, %%xmm7\n\t"
-
-					"movaps %%xmm0, %%xmm1\n\t"
-					"shufps $0x55, %%xmm1, %%xmm1\n\t"
-					"mulps %%xmm4, %%xmm1\n\t"
-					"addps %%xmm1, %%xmm7\n\t"
-
-					"movaps %%xmm0, %%xmm1\n\t"
-					"shufps $0xAA, %%xmm1, %%xmm1\n\t"
-					"mulps %%xmm5, %%xmm1\n\t"
-					"addps %%xmm7, %%xmm1\n\t"
-
-					"xorps %%xmm7, %%xmm7\n\t"
-					"minps %%xmm6, %%xmm1\n\t"
-					"maxps %%xmm7, %%xmm1\n\t"
-
-					"cvtss2si %%xmm1, %0\n\t"
-					"shufps $0xF9, %%xmm1, %%xmm1\n\t"
-					"cvtss2si %%xmm1, %1\n\t"
-					"shufps $0xF9, %%xmm1, %%xmm1\n\t"
-					"cvtss2si %%xmm1, %2\n\t"
-					: "=r" (r), "=r" (g), "=r" (b)
-					: "r" (s)
-					: "memory"
-				);
-				buffer[destoffset++] = previewtable16[r];
-				buffer[destoffset++] = previewtable16[g];
-				buffer[destoffset++] = previewtable16[b];
-				s += 4;
-			}
-			cmsDoTransform((cmsHPROFILE) profile, buffer, out+height * out_rowstride, width);
-		}
-		asm volatile("emms\n\t");
-	}
-	else if (cpuflags & _3DNOW)
-	{
-		gint destoffset;
-		gint col;
-		register gint r=0,g=0,b=0;
-		gfloat mat[12] align(8);
-		gfloat top[2] align(8);
-		mat[0] = photo->mat.coeff[0][0];
-		mat[1] = photo->mat.coeff[0][1];
-		mat[2] = photo->mat.coeff[0][2];
-		mat[3] = photo->mat.coeff[0][1]*0.0;
-		mat[4] = photo->mat.coeff[1][0];
-		mat[5] = photo->mat.coeff[1][1];
-		mat[6] = photo->mat.coeff[1][2];
-		mat[7] = photo->mat.coeff[1][1]*0.0;
-		mat[8] = photo->mat.coeff[2][0];
-		mat[9] = photo->mat.coeff[2][1];
-		mat[10] = photo->mat.coeff[2][2];
-		mat[11] = photo->mat.coeff[2][1]*0.0;
-		top[0] = 65535.0;
-		top[1] = 65535.0;
-		asm volatile (
-			"femms\n\t"
-			"pxor %%mm7, %%mm7\n\t" /* 0x0 */
-			"movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
-			"movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
-			"movq (%1), %%mm6\n\t" /* 65535.0 | 65535.0 */
-			:
-			: "r" (&photo->pre_mul), "r" (&top)
-		);
-		while(height--)
-		{
-			destoffset = 0;
-			col = width;
-			gushort *s = in + height * in_rowstride;
-			while(col--)
-			{
-				asm volatile (
-					/* pre multiply */
-					"movq (%0), %%mm0\n\t" /* R | G | B | G2 */
-					"movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
-					"punpcklwd %%mm7, %%mm0\n\t" /* R, G */
-					"punpckhwd %%mm7, %%mm1\n\t" /* B, G2 */
-					"pi2fd %%mm0, %%mm0\n\t" /* to float */
-					"pi2fd %%mm1, %%mm1\n\t"
-					"pfmul %%mm2, %%mm0\n\t" /* pre_mul[R]*R | pre_mul[G]*G */
-					"pfmul %%mm3, %%mm1\n\t" /* pre_mul[B]*B | pre_mul[G2]*G2 */
-					"pfmin %%mm6, %%mm0\n\t"
-					"pfmin %%mm6, %%mm1\n\t"
-					"pfmax %%mm7, %%mm0\n\t"
-					"pfmax %%mm7, %%mm1\n\t"
-
-					"add $8, %0\n\t" /* increment offset */
-
-					/* red */
-					"movq (%4), %%mm4\n\t" /* mat[0] | mat[1] */
-					"movq 8(%4), %%mm5\n\t" /* mat[2] | mat[3] */
-					"pfmul %%mm0, %%mm4\n\t" /* R*[0] | G*[1] */
-					"pfmul %%mm1, %%mm5\n\t" /* B*[2] | G2*[3] */
-					"pfadd %%mm4, %%mm5\n\t" /* R*[0] + B*[2] | G*[1] + G2*[3] */
-					"pfacc %%mm5, %%mm5\n\t" /* R*[0] + B*[2] + G*[1] + G2*[3] | ? */
-					"pfmin %%mm6, %%mm5\n\t"
-					"pfmax %%mm7, %%mm5\n\t"
-					"pf2id %%mm5, %%mm5\n\t" /* to integer */
-					"movd %%mm5, %1\n\t" /* write r */
-
-					/* green */
-					"movq 16(%4), %%mm4\n\t"
-					"movq 24(%4), %%mm5\n\t"
-					"pfmul %%mm0, %%mm4\n\t"
-					"pfmul %%mm1, %%mm5\n\t"
-					"pfadd %%mm4, %%mm5\n\t"
-					"pfacc %%mm5, %%mm5\n\t"
-					"pfmin %%mm6, %%mm5\n\t"
-					"pfmax %%mm7, %%mm5\n\t"
-					"pf2id %%mm5, %%mm5\n\t"
-					"movd %%mm5, %2\n\t"
-
-					/* blue */
-					"movq 32(%4), %%mm4\n\t"
-					"movq 40(%4), %%mm5\n\t"
-					"pfmul %%mm0, %%mm4\n\t"
-					"pfmul %%mm1, %%mm5\n\t"
-					"pfadd %%mm4, %%mm5\n\t"
-					"pfacc %%mm5, %%mm5\n\t"
-					"pfmin %%mm6, %%mm5\n\t"
-					"pfmax %%mm7, %%mm5\n\t"
-					"pf2id %%mm5, %%mm5\n\t"
-					"movd %%mm5, %3\n\t"
-					: "+r" (s), "+r" (r), "+r" (g), "+r" (b)
-					: "r" (&mat)
-				);
-				buffer[destoffset++] = previewtable16[r];
-				buffer[destoffset++] = previewtable16[g];
-				buffer[destoffset++] = previewtable16[b];
-			}
-			cmsDoTransform((cmsHPROFILE) profile, buffer, out+height * out_rowstride, width);
-		}
-		asm volatile ("femms\n\t");
-	}
-	else
-#endif
-	{
-		gint srcoffset, destoffset;
-		register gint x,y;
-		register gint r,g,b;
-		gint rr,gg,bb;
-		gint pre_mul[4];
-		for(x=0;x<4;x++)
-			pre_mul[x] = (gint) (photo->pre_mul[x]*128.0);
-		for(y=0 ; y<height ; y++)
-		{
-			destoffset = 0;
-			srcoffset = y * in_rowstride;
-			for(x=0 ; x<width ; x++)
-			{
-				rr = (in[srcoffset+R]*pre_mul[R])>>7;
-				gg = (in[srcoffset+G]*pre_mul[G])>>7;
-				bb = (in[srcoffset+B]*pre_mul[B])>>7;
-				_CLAMP65535_TRIPLET(rr,gg,bb);
-				r = (rr*photo->mati.coeff[0][0]
-					+ gg*photo->mati.coeff[0][1]
-					+ bb*photo->mati.coeff[0][2])>>MATRIX_RESOLUTION;
-				g = (rr*photo->mati.coeff[1][0]
-					+ gg*photo->mati.coeff[1][1]
-					+ bb*photo->mati.coeff[1][2])>>MATRIX_RESOLUTION;
-				b = (rr*photo->mati.coeff[2][0]
-					+ gg*photo->mati.coeff[2][1]
-					+ bb*photo->mati.coeff[2][2])>>MATRIX_RESOLUTION;
-				_CLAMP65535_TRIPLET(r,g,b);
-				buffer[destoffset++] = previewtable16[r];
-				buffer[destoffset++] = previewtable16[g];
-				buffer[destoffset++] = previewtable16[b];
-				srcoffset+=in_channels;
-			}
-			cmsDoTransform((cmsHPROFILE) profile, buffer, out+y * out_rowstride, width);
-		}
-	}
-	g_free(buffer);
-	return;
-}
-
-void
-rs_render_nocms(RS_PHOTO *photo, gint width, gint height, gushort *in,
-	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile)
-{
-	gushort *buffer = g_malloc(width*3*sizeof(gushort));
-#ifdef __i386__
-	if (cpuflags & _SSE)
-	{
-		register gint r,g,b;
-		gint destoffset;
-		gint col;
-		gfloat top[4] align(16) = {65535.0, 65535.0, 65535.0, 65535.0};
-		gfloat mat[12] align(16) = {
-		photo->mat.coeff[0][0],
-		photo->mat.coeff[1][0],
-		photo->mat.coeff[2][0],
-		0.0,
-		photo->mat.coeff[0][1],
-		photo->mat.coeff[1][1],
-		photo->mat.coeff[2][1],
-		0.0,
-		photo->mat.coeff[0][2],
-		photo->mat.coeff[1][2],
-		photo->mat.coeff[2][2],
-		0.0 };
-		asm volatile (
-			"movups (%2), %%xmm2\n\t" /* rs->pre_mul */
-			"movaps (%0), %%xmm3\n\t" /* matrix */
-			"movaps 16(%0), %%xmm4\n\t"
-			"movaps 32(%0), %%xmm5\n\t"
-			"movaps (%1), %%xmm6\n\t" /* top */
-			"pxor %%mm7, %%mm7\n\t" /* 0x0 */
-			:
-			: "r" (mat), "r" (top), "r" (photo->pre_mul)
-			: "memory"
-		);
-		while(height--)
-		{
-			destoffset = 0;
-			col = width;
-			gushort *s = in + height * in_rowstride;
-			guchar *d = out + height * out_rowstride;
-			while(col--)
-			{
-				asm volatile (
-					/* load */
-					"movq (%3), %%mm0\n\t" /* R | G | B | G2 */
-					"movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
-					"punpcklwd %%mm7, %%mm0\n\t" /* R | G */
-					"punpckhwd %%mm7, %%mm1\n\t" /* B | G2 */
-					"cvtpi2ps %%mm1, %%xmm0\n\t" /* B | G2 | ? | ? */
-					"shufps $0x4E, %%xmm0, %%xmm0\n\t" /* ? | ? | B | G2 */
-					"cvtpi2ps %%mm0, %%xmm0\n\t" /* R | G | B | G2 */
-
-					"mulps %%xmm2, %%xmm0\n\t"
-					"maxps %%xmm7, %%xmm0\n\t"
-					"minps %%xmm6, %%xmm0\n\t"
-
-					"movaps %%xmm0, %%xmm1\n\t"
-					"shufps $0x0, %%xmm0, %%xmm1\n\t"
-					"mulps %%xmm3, %%xmm1\n\t"
-					"addps %%xmm1, %%xmm7\n\t"
-
-					"movaps %%xmm0, %%xmm1\n\t"
-					"shufps $0x55, %%xmm1, %%xmm1\n\t"
-					"mulps %%xmm4, %%xmm1\n\t"
-					"addps %%xmm1, %%xmm7\n\t"
-
-					"movaps %%xmm0, %%xmm1\n\t"
-					"shufps $0xAA, %%xmm1, %%xmm1\n\t"
-					"mulps %%xmm5, %%xmm1\n\t"
-					"addps %%xmm7, %%xmm1\n\t"
-
-					"xorps %%xmm7, %%xmm7\n\t"
-					"minps %%xmm6, %%xmm1\n\t"
-					"maxps %%xmm7, %%xmm1\n\t"
-
-					"cvtss2si %%xmm1, %0\n\t"
-					"shufps $0xF9, %%xmm1, %%xmm1\n\t"
-					"cvtss2si %%xmm1, %1\n\t"
-					"shufps $0xF9, %%xmm1, %%xmm1\n\t"
-					"cvtss2si %%xmm1, %2\n\t"
-					: "=r" (r), "=r" (g), "=r" (b)
-					: "r" (s)
-					: "memory"
-				);
-				d[destoffset++] = previewtable[r];
-				d[destoffset++] = previewtable[g];
-				d[destoffset++] = previewtable[b];
-				s += 4;
-			}
-		}
-		asm volatile("emms\n\t");
-	}
-	else if (cpuflags & _3DNOW)
-	{
-		gint destoffset;
-		gint col;
-		register gint r=0,g=0,b=0;
-		gfloat mat[12] align(8);
-		gfloat top[2] align(8);
-		mat[0] = photo->mat.coeff[0][0];
-		mat[1] = photo->mat.coeff[0][1];
-		mat[2] = photo->mat.coeff[0][2];
-		mat[3] = photo->mat.coeff[0][1]*0.0;
-		mat[4] = photo->mat.coeff[1][0];
-		mat[5] = photo->mat.coeff[1][1];
-		mat[6] = photo->mat.coeff[1][2];
-		mat[7] = photo->mat.coeff[1][1]*0.0;
-		mat[8] = photo->mat.coeff[2][0];
-		mat[9] = photo->mat.coeff[2][1];
-		mat[10] = photo->mat.coeff[2][2];
-		mat[11] = photo->mat.coeff[2][1]*0.0;
-		top[0] = 65535.0;
-		top[1] = 65535.0;
-		asm volatile (
-			"femms\n\t"
-			"pxor %%mm7, %%mm7\n\t" /* 0x0 */
-			"movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
-			"movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
-			"movq (%1), %%mm6\n\t" /* 65535.0 | 65535.0 */
-			:
-			: "r" (&photo->pre_mul), "r" (&top)
-		);
-		while(height--)
-		{
-			destoffset = 0;
-			col = width;
-			gushort *s = in + height * in_rowstride;
-			guchar *d = out + height * out_rowstride;
-			while(col--)
-			{
-				asm volatile (
-					/* pre multiply */
-					"movq (%0), %%mm0\n\t" /* R | G | B | G2 */
-					"movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
-					"punpcklwd %%mm7, %%mm0\n\t" /* R, G */
-					"punpckhwd %%mm7, %%mm1\n\t" /* B, G2 */
-					"pi2fd %%mm0, %%mm0\n\t" /* to float */
-					"pi2fd %%mm1, %%mm1\n\t"
-					"pfmul %%mm2, %%mm0\n\t" /* pre_mul[R]*R | pre_mul[G]*G */
-					"pfmul %%mm3, %%mm1\n\t" /* pre_mul[B]*B | pre_mul[G2]*G2 */
-					"pfmin %%mm6, %%mm0\n\t"
-					"pfmin %%mm6, %%mm1\n\t"
-					"pfmax %%mm7, %%mm0\n\t"
-					"pfmax %%mm7, %%mm1\n\t"
-
-					"add $8, %0\n\t" /* increment offset */
-
-					/* red */
-					"movq (%4), %%mm4\n\t" /* mat[0] | mat[1] */
-					"movq 8(%4), %%mm5\n\t" /* mat[2] | mat[3] */
-					"pfmul %%mm0, %%mm4\n\t" /* R*[0] | G*[1] */
-					"pfmul %%mm1, %%mm5\n\t" /* B*[2] | G2*[3] */
-					"pfadd %%mm4, %%mm5\n\t" /* R*[0] + B*[2] | G*[1] + G2*[3] */
-					"pfacc %%mm5, %%mm5\n\t" /* R*[0] + B*[2] + G*[1] + G2*[3] | ? */
-					"pfmin %%mm6, %%mm5\n\t"
-					"pfmax %%mm7, %%mm5\n\t"
-					"pf2id %%mm5, %%mm5\n\t" /* to integer */
-					"movd %%mm5, %1\n\t" /* write r */
-
-					/* green */
-					"movq 16(%4), %%mm4\n\t"
-					"movq 24(%4), %%mm5\n\t"
-					"pfmul %%mm0, %%mm4\n\t"
-					"pfmul %%mm1, %%mm5\n\t"
-					"pfadd %%mm4, %%mm5\n\t"
-					"pfacc %%mm5, %%mm5\n\t"
-					"pfmin %%mm6, %%mm5\n\t"
-					"pfmax %%mm7, %%mm5\n\t"
-					"pf2id %%mm5, %%mm5\n\t"
-					"movd %%mm5, %2\n\t"
-
-					/* blue */
-					"movq 32(%4), %%mm4\n\t"
-					"movq 40(%4), %%mm5\n\t"
-					"pfmul %%mm0, %%mm4\n\t"
-					"pfmul %%mm1, %%mm5\n\t"
-					"pfadd %%mm4, %%mm5\n\t"
-					"pfacc %%mm5, %%mm5\n\t"
-					"pfmin %%mm6, %%mm5\n\t"
-					"pfmax %%mm7, %%mm5\n\t"
-					"pf2id %%mm5, %%mm5\n\t"
-					"movd %%mm5, %3\n\t"
-					: "+r" (s), "+r" (r), "+r" (g), "+r" (b)
-					: "r" (&mat)
-				);
-				d[destoffset++] = previewtable[r];
-				d[destoffset++] = previewtable[g];
-				d[destoffset++] = previewtable[b];
-			}
-		}
-		asm volatile ("femms\n\t");
-	}
-	else
-#endif
-	{
-		gint srcoffset, destoffset;
-		register gint x,y;
-		register gint r,g,b;
-		gint rr,gg,bb;
-		gint pre_mul[4];
-		for(x=0;x<4;x++)
-			pre_mul[x] = (gint) (photo->pre_mul[x]*128.0);
-		for(y=0 ; y<height ; y++)
-		{
-			destoffset = 0;
-			srcoffset = y * in_rowstride;
-			guchar *d = out + height * out_rowstride;
-			for(x=0 ; x<width ; x++)
-			{
-				rr = (in[srcoffset+R]*pre_mul[R])>>7;
-				gg = (in[srcoffset+G]*pre_mul[G])>>7;
-				bb = (in[srcoffset+B]*pre_mul[B])>>7;
-				_CLAMP65535_TRIPLET(rr,gg,bb);
-				r = (rr*photo->mati.coeff[0][0]
-					+ gg*photo->mati.coeff[0][1]
-					+ bb*photo->mati.coeff[0][2])>>MATRIX_RESOLUTION;
-				g = (rr*photo->mati.coeff[1][0]
-					+ gg*photo->mati.coeff[1][1]
-					+ bb*photo->mati.coeff[1][2])>>MATRIX_RESOLUTION;
-				b = (rr*photo->mati.coeff[2][0]
-					+ gg*photo->mati.coeff[2][1]
-					+ bb*photo->mati.coeff[2][2])>>MATRIX_RESOLUTION;
-				_CLAMP65535_TRIPLET(r,g,b);
-				d[destoffset++] = previewtable[r];
-				d[destoffset++] = previewtable[g];
-				d[destoffset++] = previewtable[b];
-				srcoffset+=in_channels;
-			}
-		}
-	}
-	g_free(buffer);
-	return;
-}
-
 inline void
 rs_histogram_update_table(RS_BLOB *rs, RS_IMAGE16 *input, guint *table)
 {
@@ -918,6 +441,7 @@
 	gint r,g,b,rr,gg,bb;
 	gushort *in;
 	gint pre_mul[4];
+	extern guchar previewtable[]; /* FIXME: Move this to rs-render.c */
 
 	if (unlikely(input==NULL)) return;
 
@@ -962,7 +486,7 @@
 	for(c=0;c<3;c++)
 		rs_settings_reset(rs->settings[c], MASK_ALL);
 	rs->in_use = in_use;
-	update_preview(rs);
+	update_preview(rs, TRUE);
 	return;
 }
 
@@ -1910,10 +1434,7 @@
 			genericRGBProfile, TYPE_RGB_8, rs->cms_intent, 0);
 	cmsSetUserFormatters(exportTransform, TYPE_RGB_16, mycms_unroll_rgb_w, TYPE_RGB_8, mycms_pack_rgb_b);
 
-	if (rs->cms_enabled)
-		rs_render = rs_render_cms;
-	else
-		rs_render = rs_render_nocms;
+	rs_render_select(rs->cms_enabled);
 	return;
 }
 

Modified: trunk/src/rawstudio.h
===================================================================
--- trunk/src/rawstudio.h	2006-09-18 08:59:45 UTC (rev 770)
+++ trunk/src/rawstudio.h	2006-09-18 09:11:14 UTC (rev 771)
@@ -217,10 +217,9 @@
 	void (*load_meta)(const gchar *, RS_METADATA *);
 } RS_FILETYPE;
 
-void update_previewtable(const double contrast);
 void rs_local_cachedir(gboolean new_value);
 void rs_load_gdk(gboolean new_value);
-void update_preview(RS_BLOB *rs);
+void update_preview(RS_BLOB *rs, gboolean update_table);
 void update_preview_region(RS_BLOB *rs, RS_RECT *region);
 gboolean rs_run_batch_idle(RS_QUEUE *queue);
 void rs_reset(RS_BLOB *rs);

Added: trunk/src/rs-render.c
===================================================================
--- trunk/src/rs-render.c	2006-09-18 08:59:45 UTC (rev 770)
+++ trunk/src/rs-render.c	2006-09-18 09:11:14 UTC (rev 771)
@@ -0,0 +1,580 @@
+/*
+ * Copyright (C) 2006 Anders Brander <anders at brander.dk> and 
+ * Anders Kvist <akv at lnxbx.dk>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <gtk/gtk.h>
+#include <lcms.h>
+#include "color.h"
+#include "matrix.h"
+#include "rs-batch.h"
+#include "rawstudio.h"
+
+void rs_render_cms_sse(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
+void rs_render_cms_3dnow(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
+void rs_render_cms(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
+void rs_render_nocms_sse(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
+void rs_render_nocms_3dnow(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
+void rs_render_nocms(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);
+
+guchar previewtable[65536];
+gushort previewtable16[65536];
+
+void
+rs_render_select(gboolean cms)
+{
+	extern gint cpuflags;
+	extern void (*rs_render);
+	if (cms)
+	{
+		if (cpuflags & _SSE)
+			rs_render = rs_render_cms_sse;
+		else if (cpuflags & _3DNOW)
+			rs_render = rs_render_cms_3dnow;
+		else
+			rs_render = rs_render_cms;
+	}
+	else
+	{
+		if (cpuflags & _SSE)
+			rs_render = rs_render_nocms_sse;
+		else if (cpuflags & _3DNOW)
+			rs_render = rs_render_nocms_3dnow;
+		else
+			rs_render = rs_render_nocms;
+	}
+	return;
+}
+
+void
+rs_render_previewtable(const gdouble contrast)
+{
+	register gint n;
+	gdouble nd;
+	register gint res;
+	double gammavalue;
+	const double postadd = 0.5 - (contrast/2.0);
+	gammavalue = (1.0/GAMMA);
+
+	for(n=0;n<65536;n++)
+	{
+		nd = ((gdouble) n) / 65535.0;
+		nd = pow(nd, gammavalue)*contrast+postadd;
+
+		res = (gint) (nd*255.0);
+		_CLAMP255(res);
+		previewtable[n] = res;
+
+		nd = pow(nd, GAMMA);
+		res = (gint) (nd*65535.0);
+		_CLAMP65535(res);
+		previewtable16[n] = res;
+	}
+}
+
+void
+rs_render_cms_sse(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile)
+{
+#ifdef __i386__
+	gushort *buffer = g_malloc(width*3*sizeof(gushort));
+	register gint r,g,b;
+	gint destoffset;
+	gint col;
+	gfloat top[4] align(16) = {65535.0, 65535.0, 65535.0, 65535.0};
+	gfloat mat[12] align(16) = {
+		photo->mat.coeff[0][0],
+		photo->mat.coeff[1][0],
+		photo->mat.coeff[2][0],
+		0.0,
+		photo->mat.coeff[0][1],
+		photo->mat.coeff[1][1],
+		photo->mat.coeff[2][1],
+		0.0,
+		photo->mat.coeff[0][2],
+		photo->mat.coeff[1][2],
+		photo->mat.coeff[2][2],
+		0.0 };
+	asm volatile (
+		"movups (%2), %%xmm2\n\t" /* rs->pre_mul */
+		"movaps (%0), %%xmm3\n\t" /* matrix */
+		"movaps 16(%0), %%xmm4\n\t"
+		"movaps 32(%0), %%xmm5\n\t"
+		"movaps (%1), %%xmm6\n\t" /* top */
+		"pxor %%mm7, %%mm7\n\t" /* 0x0 */
+		:
+		: "r" (mat), "r" (top), "r" (photo->pre_mul)
+		: "memory"
+	);
+	while(height--)
+	{
+		destoffset = 0;
+		col = width;
+		gushort *s = in + height * in_rowstride;
+		while(col--)
+		{
+			asm volatile (
+				/* load */
+				"movq (%3), %%mm0\n\t" /* R | G | B | G2 */
+				"movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
+				"punpcklwd %%mm7, %%mm0\n\t" /* R | G */
+				"punpckhwd %%mm7, %%mm1\n\t" /* B | G2 */
+				"cvtpi2ps %%mm1, %%xmm0\n\t" /* B | G2 | ? | ? */
+				"shufps $0x4E, %%xmm0, %%xmm0\n\t" /* ? | ? | B | G2 */
+				"cvtpi2ps %%mm0, %%xmm0\n\t" /* R | G | B | G2 */
+
+				"mulps %%xmm2, %%xmm0\n\t"
+				"maxps %%xmm7, %%xmm0\n\t"
+				"minps %%xmm6, %%xmm0\n\t"
+
+				"movaps %%xmm0, %%xmm1\n\t"
+				"shufps $0x0, %%xmm0, %%xmm1\n\t"
+				"mulps %%xmm3, %%xmm1\n\t"
+				"addps %%xmm1, %%xmm7\n\t"
+
+				"movaps %%xmm0, %%xmm1\n\t"
+				"shufps $0x55, %%xmm1, %%xmm1\n\t"
+				"mulps %%xmm4, %%xmm1\n\t"
+				"addps %%xmm1, %%xmm7\n\t"
+
+				"movaps %%xmm0, %%xmm1\n\t"
+				"shufps $0xAA, %%xmm1, %%xmm1\n\t"
+				"mulps %%xmm5, %%xmm1\n\t"
+				"addps %%xmm7, %%xmm1\n\t"
+
+				"xorps %%xmm7, %%xmm7\n\t"
+				"minps %%xmm6, %%xmm1\n\t"
+				"maxps %%xmm7, %%xmm1\n\t"
+
+				"cvtss2si %%xmm1, %0\n\t"
+				"shufps $0xF9, %%xmm1, %%xmm1\n\t"
+				"cvtss2si %%xmm1, %1\n\t"
+				"shufps $0xF9, %%xmm1, %%xmm1\n\t"
+				"cvtss2si %%xmm1, %2\n\t"
+				: "=r" (r), "=r" (g), "=r" (b)
+				: "r" (s)
+				: "memory"
+			);
+			buffer[destoffset++] = previewtable16[r];
+			buffer[destoffset++] = previewtable16[g];
+			buffer[destoffset++] = previewtable16[b];
+			s += 4;
+		}
+		cmsDoTransform((cmsHPROFILE) profile, buffer, out+height * out_rowstride, width);
+	}
+	asm volatile("emms\n\t");
+	g_free(buffer);
+#endif
+	return;
+}
+
+void
+rs_render_cms_3dnow(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile)
+{
+#ifdef __i386__
+	gushort *buffer = g_malloc(width*3*sizeof(gushort));
+	gint destoffset;
+	gint col;
+	register gint r=0,g=0,b=0;
+	gfloat mat[12] align(8);
+	gfloat top[2] align(8);
+	mat[0] = photo->mat.coeff[0][0];
+	mat[1] = photo->mat.coeff[0][1];
+	mat[2] = photo->mat.coeff[0][2];
+	mat[3] = photo->mat.coeff[0][1]*0.0;
+	mat[4] = photo->mat.coeff[1][0];
+	mat[5] = photo->mat.coeff[1][1];
+	mat[6] = photo->mat.coeff[1][2];
+	mat[7] = photo->mat.coeff[1][1]*0.0;
+	mat[8] = photo->mat.coeff[2][0];
+	mat[9] = photo->mat.coeff[2][1];
+	mat[10] = photo->mat.coeff[2][2];
+	mat[11] = photo->mat.coeff[2][1]*0.0;
+	top[0] = 65535.0;
+	top[1] = 65535.0;
+	asm volatile (
+		"femms\n\t"
+		"pxor %%mm7, %%mm7\n\t" /* 0x0 */
+		"movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
+		"movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
+		"movq (%1), %%mm6\n\t" /* 65535.0 | 65535.0 */
+		:
+		: "r" (&photo->pre_mul), "r" (&top)
+	);
+	while(height--)
+	{
+		destoffset = 0;
+		col = width;
+		gushort *s = in + height * in_rowstride;
+		while(col--)
+		{
+			asm volatile (
+				/* pre multiply */
+				"movq (%0), %%mm0\n\t" /* R | G | B | G2 */
+				"movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
+				"punpcklwd %%mm7, %%mm0\n\t" /* R, G */
+				"punpckhwd %%mm7, %%mm1\n\t" /* B, G2 */
+				"pi2fd %%mm0, %%mm0\n\t" /* to float */
+				"pi2fd %%mm1, %%mm1\n\t"
+				"pfmul %%mm2, %%mm0\n\t" /* pre_mul[R]*R | pre_mul[G]*G */
+				"pfmul %%mm3, %%mm1\n\t" /* pre_mul[B]*B | pre_mul[G2]*G2 */
+				"pfmin %%mm6, %%mm0\n\t"
+				"pfmin %%mm6, %%mm1\n\t"
+				"pfmax %%mm7, %%mm0\n\t"
+				"pfmax %%mm7, %%mm1\n\t"
+
+				"add $8, %0\n\t" /* increment offset */
+
+				/* red */
+				"movq (%4), %%mm4\n\t" /* mat[0] | mat[1] */
+				"movq 8(%4), %%mm5\n\t" /* mat[2] | mat[3] */
+				"pfmul %%mm0, %%mm4\n\t" /* R*[0] | G*[1] */
+				"pfmul %%mm1, %%mm5\n\t" /* B*[2] | G2*[3] */
+				"pfadd %%mm4, %%mm5\n\t" /* R*[0] + B*[2] | G*[1] + G2*[3] */
+				"pfacc %%mm5, %%mm5\n\t" /* R*[0] + B*[2] + G*[1] + G2*[3] | ? */
+				"pfmin %%mm6, %%mm5\n\t"
+				"pfmax %%mm7, %%mm5\n\t"
+				"pf2id %%mm5, %%mm5\n\t" /* to integer */
+				"movd %%mm5, %1\n\t" /* write r */
+
+				/* green */
+				"movq 16(%4), %%mm4\n\t"
+				"movq 24(%4), %%mm5\n\t"
+				"pfmul %%mm0, %%mm4\n\t"
+				"pfmul %%mm1, %%mm5\n\t"
+				"pfadd %%mm4, %%mm5\n\t"
+				"pfacc %%mm5, %%mm5\n\t"
+				"pfmin %%mm6, %%mm5\n\t"
+				"pfmax %%mm7, %%mm5\n\t"
+				"pf2id %%mm5, %%mm5\n\t"
+				"movd %%mm5, %2\n\t"
+
+				/* blue */
+				"movq 32(%4), %%mm4\n\t"
+				"movq 40(%4), %%mm5\n\t"
+				"pfmul %%mm0, %%mm4\n\t"
+				"pfmul %%mm1, %%mm5\n\t"
+				"pfadd %%mm4, %%mm5\n\t"
+				"pfacc %%mm5, %%mm5\n\t"
+				"pfmin %%mm6, %%mm5\n\t"
+				"pfmax %%mm7, %%mm5\n\t"
+				"pf2id %%mm5, %%mm5\n\t"
+				"movd %%mm5, %3\n\t"
+				: "+r" (s), "+r" (r), "+r" (g), "+r" (b)
+				: "r" (&mat)
+			);
+			buffer[destoffset++] = previewtable16[r];
+			buffer[destoffset++] = previewtable16[g];
+			buffer[destoffset++] = previewtable16[b];
+		}
+		cmsDoTransform((cmsHPROFILE) profile, buffer, out+height * out_rowstride, width);
+	}
+	asm volatile ("femms\n\t");
+	g_free(buffer);
+#endif
+	return;
+}
+
+void
+rs_render_cms(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile)
+{
+	gushort *buffer = g_malloc(width*3*sizeof(gushort));
+	gint srcoffset, destoffset;
+	register gint x,y;
+	register gint r,g,b;
+	gint rr,gg,bb;
+	gint pre_mul[4];
+	for(x=0;x<4;x++)
+		pre_mul[x] = (gint) (photo->pre_mul[x]*128.0);
+	for(y=0 ; y<height ; y++)
+	{
+		destoffset = 0;
+		srcoffset = y * in_rowstride;
+		for(x=0 ; x<width ; x++)
+		{
+			rr = (in[srcoffset+R]*pre_mul[R])>>7;
+			gg = (in[srcoffset+G]*pre_mul[G])>>7;
+			bb = (in[srcoffset+B]*pre_mul[B])>>7;
+			_CLAMP65535_TRIPLET(rr,gg,bb);
+			r = (rr*photo->mati.coeff[0][0]
+				+ gg*photo->mati.coeff[0][1]
+				+ bb*photo->mati.coeff[0][2])>>MATRIX_RESOLUTION;
+			g = (rr*photo->mati.coeff[1][0]
+				+ gg*photo->mati.coeff[1][1]
+				+ bb*photo->mati.coeff[1][2])>>MATRIX_RESOLUTION;
+			b = (rr*photo->mati.coeff[2][0]
+				+ gg*photo->mati.coeff[2][1]
+				+ bb*photo->mati.coeff[2][2])>>MATRIX_RESOLUTION;
+			_CLAMP65535_TRIPLET(r,g,b);
+			buffer[destoffset++] = previewtable16[r];
+			buffer[destoffset++] = previewtable16[g];
+			buffer[destoffset++] = previewtable16[b];
+			srcoffset+=in_channels;
+		}
+		cmsDoTransform((cmsHPROFILE) profile, buffer, out+y * out_rowstride, width);
+	}
+	g_free(buffer);
+	return;
+}
+
+void
+rs_render_nocms_sse(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile)
+{
+#ifdef __i386__
+	register gint r,g,b;
+	gint destoffset;
+	gint col;
+	gfloat top[4] align(16) = {65535.0, 65535.0, 65535.0, 65535.0};
+	gfloat mat[12] align(16) = {
+	photo->mat.coeff[0][0],
+	photo->mat.coeff[1][0],
+	photo->mat.coeff[2][0],
+	0.0,
+	photo->mat.coeff[0][1],
+	photo->mat.coeff[1][1],
+	photo->mat.coeff[2][1],
+	0.0,
+	photo->mat.coeff[0][2],
+	photo->mat.coeff[1][2],
+	photo->mat.coeff[2][2],
+	0.0 };
+	asm volatile (
+		"movups (%2), %%xmm2\n\t" /* rs->pre_mul */
+		"movaps (%0), %%xmm3\n\t" /* matrix */
+		"movaps 16(%0), %%xmm4\n\t"
+		"movaps 32(%0), %%xmm5\n\t"
+		"movaps (%1), %%xmm6\n\t" /* top */
+		"pxor %%mm7, %%mm7\n\t" /* 0x0 */
+		:
+		: "r" (mat), "r" (top), "r" (photo->pre_mul)
+		: "memory"
+	);
+	while(height--)
+	{
+		destoffset = 0;
+		col = width;
+		gushort *s = in + height * in_rowstride;
+		guchar *d = out + height * out_rowstride;
+		while(col--)
+		{
+			asm volatile (
+				/* load */
+				"movq (%3), %%mm0\n\t" /* R | G | B | G2 */
+				"movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
+				"punpcklwd %%mm7, %%mm0\n\t" /* R | G */
+				"punpckhwd %%mm7, %%mm1\n\t" /* B | G2 */
+				"cvtpi2ps %%mm1, %%xmm0\n\t" /* B | G2 | ? | ? */
+				"shufps $0x4E, %%xmm0, %%xmm0\n\t" /* ? | ? | B | G2 */
+				"cvtpi2ps %%mm0, %%xmm0\n\t" /* R | G | B | G2 */
+
+				"mulps %%xmm2, %%xmm0\n\t"
+				"maxps %%xmm7, %%xmm0\n\t"
+				"minps %%xmm6, %%xmm0\n\t"
+
+				"movaps %%xmm0, %%xmm1\n\t"
+				"shufps $0x0, %%xmm0, %%xmm1\n\t"
+				"mulps %%xmm3, %%xmm1\n\t"
+				"addps %%xmm1, %%xmm7\n\t"
+
+				"movaps %%xmm0, %%xmm1\n\t"
+				"shufps $0x55, %%xmm1, %%xmm1\n\t"
+				"mulps %%xmm4, %%xmm1\n\t"
+				"addps %%xmm1, %%xmm7\n\t"
+
+				"movaps %%xmm0, %%xmm1\n\t"
+				"shufps $0xAA, %%xmm1, %%xmm1\n\t"
+				"mulps %%xmm5, %%xmm1\n\t"
+				"addps %%xmm7, %%xmm1\n\t"
+
+				"xorps %%xmm7, %%xmm7\n\t"
+				"minps %%xmm6, %%xmm1\n\t"
+				"maxps %%xmm7, %%xmm1\n\t"
+
+				"cvtss2si %%xmm1, %0\n\t"
+				"shufps $0xF9, %%xmm1, %%xmm1\n\t"
+				"cvtss2si %%xmm1, %1\n\t"
+				"shufps $0xF9, %%xmm1, %%xmm1\n\t"
+				"cvtss2si %%xmm1, %2\n\t"
+				: "=r" (r), "=r" (g), "=r" (b)
+				: "r" (s)
+				: "memory"
+			);
+			d[destoffset++] = previewtable[r];
+			d[destoffset++] = previewtable[g];
+			d[destoffset++] = previewtable[b];
+			s += 4;
+		}
+	}
+	asm volatile("emms\n\t");
+#endif
+	return;
+}
+
+void
+rs_render_nocms_3dnow(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile)
+{
+#ifdef __i386__
+	gint destoffset;
+	gint col;
+	register gint r=0,g=0,b=0;
+	gfloat mat[12] align(8);
+	gfloat top[2] align(8);
+	mat[0] = photo->mat.coeff[0][0];
+	mat[1] = photo->mat.coeff[0][1];
+	mat[2] = photo->mat.coeff[0][2];
+	mat[3] = photo->mat.coeff[0][1]*0.0;
+	mat[4] = photo->mat.coeff[1][0];
+	mat[5] = photo->mat.coeff[1][1];
+	mat[6] = photo->mat.coeff[1][2];
+	mat[7] = photo->mat.coeff[1][1]*0.0;
+	mat[8] = photo->mat.coeff[2][0];
+	mat[9] = photo->mat.coeff[2][1];
+	mat[10] = photo->mat.coeff[2][2];
+	mat[11] = photo->mat.coeff[2][1]*0.0;
+	top[0] = 65535.0;
+	top[1] = 65535.0;
+	asm volatile (
+		"femms\n\t"
+		"pxor %%mm7, %%mm7\n\t" /* 0x0 */
+		"movq (%0), %%mm2\n\t" /* pre_mul R | pre_mul G */
+		"movq 8(%0), %%mm3\n\t" /* pre_mul B | pre_mul G2 */
+		"movq (%1), %%mm6\n\t" /* 65535.0 | 65535.0 */
+		:
+		: "r" (&photo->pre_mul), "r" (&top)
+	);
+	while(height--)
+	{
+		destoffset = 0;
+		col = width;
+		gushort *s = in + height * in_rowstride;
+		guchar *d = out + height * out_rowstride;
+		while(col--)
+		{
+			asm volatile (
+				/* pre multiply */
+				"movq (%0), %%mm0\n\t" /* R | G | B | G2 */
+				"movq %%mm0, %%mm1\n\t" /* R | G | B | G2 */
+				"punpcklwd %%mm7, %%mm0\n\t" /* R, G */
+				"punpckhwd %%mm7, %%mm1\n\t" /* B, G2 */
+				"pi2fd %%mm0, %%mm0\n\t" /* to float */
+				"pi2fd %%mm1, %%mm1\n\t"
+				"pfmul %%mm2, %%mm0\n\t" /* pre_mul[R]*R | pre_mul[G]*G */
+				"pfmul %%mm3, %%mm1\n\t" /* pre_mul[B]*B | pre_mul[G2]*G2 */
+				"pfmin %%mm6, %%mm0\n\t"
+				"pfmin %%mm6, %%mm1\n\t"
+				"pfmax %%mm7, %%mm0\n\t"
+				"pfmax %%mm7, %%mm1\n\t"
+
+				"add $8, %0\n\t" /* increment offset */
+
+				/* red */
+				"movq (%4), %%mm4\n\t" /* mat[0] | mat[1] */
+				"movq 8(%4), %%mm5\n\t" /* mat[2] | mat[3] */
+				"pfmul %%mm0, %%mm4\n\t" /* R*[0] | G*[1] */
+				"pfmul %%mm1, %%mm5\n\t" /* B*[2] | G2*[3] */
+				"pfadd %%mm4, %%mm5\n\t" /* R*[0] + B*[2] | G*[1] + G2*[3] */
+				"pfacc %%mm5, %%mm5\n\t" /* R*[0] + B*[2] + G*[1] + G2*[3] | ? */
+				"pfmin %%mm6, %%mm5\n\t"
+				"pfmax %%mm7, %%mm5\n\t"
+				"pf2id %%mm5, %%mm5\n\t" /* to integer */
+				"movd %%mm5, %1\n\t" /* write r */
+
+				/* green */
+				"movq 16(%4), %%mm4\n\t"
+				"movq 24(%4), %%mm5\n\t"
+				"pfmul %%mm0, %%mm4\n\t"
+				"pfmul %%mm1, %%mm5\n\t"
+				"pfadd %%mm4, %%mm5\n\t"
+				"pfacc %%mm5, %%mm5\n\t"
+				"pfmin %%mm6, %%mm5\n\t"
+				"pfmax %%mm7, %%mm5\n\t"
+				"pf2id %%mm5, %%mm5\n\t"
+				"movd %%mm5, %2\n\t"
+
+				/* blue */
+				"movq 32(%4), %%mm4\n\t"
+				"movq 40(%4), %%mm5\n\t"
+				"pfmul %%mm0, %%mm4\n\t"
+				"pfmul %%mm1, %%mm5\n\t"
+				"pfadd %%mm4, %%mm5\n\t"
+				"pfacc %%mm5, %%mm5\n\t"
+				"pfmin %%mm6, %%mm5\n\t"
+				"pfmax %%mm7, %%mm5\n\t"
+				"pf2id %%mm5, %%mm5\n\t"
+				"movd %%mm5, %3\n\t"
+				: "+r" (s), "+r" (r), "+r" (g), "+r" (b)
+				: "r" (&mat)
+			);
+			d[destoffset++] = previewtable[r];
+			d[destoffset++] = previewtable[g];
+			d[destoffset++] = previewtable[b];
+		}
+	}
+	asm volatile ("femms\n\t");
+#endif
+	return;
+}
+void
+rs_render_nocms(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile)
+{
+	gint srcoffset, destoffset;
+	register gint x,y;
+	register gint r,g,b;
+	gint rr,gg,bb;
+	gint pre_mul[4];
+	for(x=0;x<4;x++)
+		pre_mul[x] = (gint) (photo->pre_mul[x]*128.0);
+	for(y=0 ; y<height ; y++)
+	{
+		destoffset = 0;
+		srcoffset = y * in_rowstride;
+		guchar *d = out + height * out_rowstride;
+		for(x=0 ; x<width ; x++)
+		{
+			rr = (in[srcoffset+R]*pre_mul[R])>>7;
+			gg = (in[srcoffset+G]*pre_mul[G])>>7;
+			bb = (in[srcoffset+B]*pre_mul[B])>>7;
+			_CLAMP65535_TRIPLET(rr,gg,bb);
+			r = (rr*photo->mati.coeff[0][0]
+				+ gg*photo->mati.coeff[0][1]
+				+ bb*photo->mati.coeff[0][2])>>MATRIX_RESOLUTION;
+			g = (rr*photo->mati.coeff[1][0]
+				+ gg*photo->mati.coeff[1][1]
+				+ bb*photo->mati.coeff[1][2])>>MATRIX_RESOLUTION;
+			b = (rr*photo->mati.coeff[2][0]
+				+ gg*photo->mati.coeff[2][1]
+				+ bb*photo->mati.coeff[2][2])>>MATRIX_RESOLUTION;
+			_CLAMP65535_TRIPLET(r,g,b);
+			d[destoffset++] = previewtable[r];
+			d[destoffset++] = previewtable[g];
+			d[destoffset++] = previewtable[b];
+			srcoffset+=in_channels;
+		}
+	}
+	return;
+}

Added: trunk/src/rs-render.h
===================================================================
--- trunk/src/rs-render.h	2006-09-18 08:59:45 UTC (rev 770)
+++ trunk/src/rs-render.h	2006-09-18 09:11:14 UTC (rev 771)
@@ -0,0 +1,4 @@
+void rs_render_select(gboolean cms);
+void rs_render_previewtable(const double contrast);
+void (*rs_render)(RS_PHOTO *photo, gint width, gint height, gushort *in,
+	gint in_rowstride, gint in_channels, guchar *out, gint out_rowstride, void *profile);

Modified: trunk/src/toolbox.c
===================================================================
--- trunk/src/toolbox.c	2006-09-18 08:59:45 UTC (rev 770)
+++ trunk/src/toolbox.c	2006-09-18 09:11:14 UTC (rev 771)
@@ -103,35 +103,35 @@
 gui_transform_rot90_clicked(GtkWidget *w, RS_BLOB *rs)
 {
 	ORIENTATION_90(rs->photo->orientation);
-	update_preview(rs);
+	update_preview(rs, FALSE);
 }
 
 void
 gui_transform_rot180_clicked(GtkWidget *w, RS_BLOB *rs)
 {
 	ORIENTATION_180(rs->photo->orientation);
-	update_preview(rs);
+	update_preview(rs, FALSE);
 }
 
 void
 gui_transform_rot270_clicked(GtkWidget *w, RS_BLOB *rs)
 {
 	ORIENTATION_270(rs->photo->orientation);
-	update_preview(rs);
+	update_preview(rs, FALSE);
 }
 
 void
 gui_transform_mirror_clicked(GtkWidget *w, RS_BLOB *rs)
 {
 	ORIENTATION_MIRROR(rs->photo->orientation);
-	update_preview(rs);
+	update_preview(rs, FALSE);
 }
 
 void
 gui_transform_flip_clicked(GtkWidget *w, RS_BLOB *rs)
 {
 	ORIENTATION_FLIP(rs->photo->orientation);
-	update_preview(rs);
+	update_preview(rs, FALSE);
 }
 
 GtkWidget *




More information about the Rawstudio-commit mailing list