[Rawstudio-commit] r3171 - trunk/plugins/dcp

Klaus Post klauspost at gmail.com
Fri Feb 12 19:51:14 CET 2010


Author: post
Date: 2010-02-12 19:51:13 +0100 (Fri, 12 Feb 2010)
New Revision: 3171

Modified:
   trunk/plugins/dcp/dcp-sse2.c
   trunk/plugins/dcp/dcp.c
Log:
Use 10 bit lookup table with interpolation for tone curve lookups. Avoids posterizing the image unneededly.

Modified: trunk/plugins/dcp/dcp-sse2.c
===================================================================
--- trunk/plugins/dcp/dcp-sse2.c	2010-02-09 22:18:31 UTC (rev 3170)
+++ trunk/plugins/dcp/dcp-sse2.c	2010-02-12 18:51:13 UTC (rev 3171)
@@ -442,12 +442,36 @@
 #define PS(A) _mm_castsi128_ps(A)
 
 static gfloat _16_bit_ps[4] __attribute__ ((aligned (16))) = {65535.0, 65535.0, 65535.0, 65535.0};
+static gfloat _thousand_24_ps[4] __attribute__ ((aligned (16))) = {1023.99999f, 1023.99999f, 1023.99999f, 1023.99999f};
 
+static inline __m128 
+curve_interpolate_lookup(__m128 value, const gfloat * const tone_lut)
+{
+	int xfer[8] __attribute__ ((aligned (16)));
+	/* Convert v to lookup values and interpolate */
+	__m128 mul = _mm_mul_ps(value, _mm_load_ps(_thousand_24_ps));
+	__m128i lookup = _mm_cvtps_epi32(mul);
+	_mm_store_si128((__m128i*)&xfer[0], lookup);
+
+	/* Calculate fractions */
+	__m128 frac = _mm_sub_ps(mul, _mm_floor_positive_ps(mul));
+	__m128 inv_frac = _mm_sub_ps(_mm_load_ps(_ones_ps), frac);
+
+	/* Load two adjacent curve values and interpolate between them */
+	__m128 p0p1 = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&tone_lut[xfer[0]]));
+	__m128 p2p3 = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)&tone_lut[xfer[2]]));
+	p0p1 = _mm_loadh_pi(p0p1, (__m64*)&tone_lut[xfer[1]]);
+	p2p3 = _mm_loadh_pi(p2p3, (__m64*)&tone_lut[xfer[3]]);
+
+	/* Pack all lower values in v0, high in v1 and interpolate */
+	__m128 v0 = _mm_shuffle_ps(p0p1, p2p3, _MM_SHUFFLE(2,0,2,0));
+	__m128 v1 = _mm_shuffle_ps(p0p1, p2p3, _MM_SHUFFLE(3,1,3,1));
+	return _mm_add_ps(_mm_mul_ps(inv_frac, v0), _mm_mul_ps(frac, v1));
+}
+
 void 
 rgb_tone_sse2(__m128* _r, __m128* _g, __m128* _b, const gfloat * const tone_lut)
 {
-	int xfer[8] __attribute__ ((aligned (16)));
-
 	__m128 r = *_r;
 	__m128 g = *_g;
 	__m128 b = *_b;
@@ -462,17 +486,10 @@
 	/* Find largest and smallest values */
 	__m128 lg = _mm_max_ps(b, _mm_max_ps(r, g));
 	__m128 sm = _mm_min_ps(b, _mm_min_ps(r, g));
-	__m128i lookup_max = _mm_cvtps_epi32(_mm_mul_ps(lg,
-										 _mm_load_ps(_16_bit_ps)));
-	__m128i lookup_min = _mm_cvtps_epi32(_mm_mul_ps(sm,
-										 _mm_load_ps(_16_bit_ps)));
 
-	_mm_store_si128((__m128i*)&xfer[0], lookup_max);
-	_mm_store_si128((__m128i*)&xfer[4], lookup_min);
-	
 	/* Lookup */
-	__m128 LG = _mm_set_ps(tone_lut[xfer[3]], tone_lut[xfer[2]], tone_lut[xfer[1]], tone_lut[xfer[0]]);
-	__m128 SM = _mm_set_ps(tone_lut[xfer[7]], tone_lut[xfer[6]], tone_lut[xfer[5]], tone_lut[xfer[4]]);
+	__m128 LG = curve_interpolate_lookup(lg, tone_lut);
+	__m128 SM = curve_interpolate_lookup(sm, tone_lut);
 
 	/* Create masks for largest, smallest and medium values */
 	/* This is done in integer SSE2, since they have double the throughput */

Modified: trunk/plugins/dcp/dcp.c
===================================================================
--- trunk/plugins/dcp/dcp.c	2010-02-09 22:18:31 UTC (rev 3170)
+++ trunk/plugins/dcp/dcp.c	2010-02-12 18:51:13 UTC (rev 3171)
@@ -23,6 +23,7 @@
 #include <math.h> /* pow() */
 #include "dcp.h"
 #include "adobe-camera-raw-tone.h"
+#include <string.h> /* memcpy */
 
 RS_DEFINE_FILTER(rs_dcp, RSDcp)
 
@@ -759,6 +760,15 @@
 	*v = MIN(*v * valScale, 1.0);
 }
 
+static inline gfloat 
+lookup_tone(gfloat value, const gfloat * const tone_lut)
+{
+	gfloat lookup = CLAMP(value * 1024.0f, 0.0f, 1023.9999f);
+	gfloat v0 = tone_lut[(gint)lookup];
+	gfloat v1 = tone_lut[(gint)lookup + 1];
+	lookup -= floorf(lookup);
+	return v0 * (1.0f - lookup) + v1 * lookup;	
+}
 
 /* RefBaselineRGBTone() */
 void
@@ -773,8 +783,8 @@
 
 	#define RGBTone(lg, md, sm, LG, MD, SM)\
 	{\
-		LG = tone_lut[_S(lg)];\
-		SM = tone_lut[_S(sm)];\
+		LG = lookup_tone(lg, tone_lut);\
+		SM = lookup_tone(sm, tone_lut);\
 		\
 		MD = SM + ((LG - SM) * (md - sm) / (lg - sm));\
 		\
@@ -1146,7 +1156,11 @@
 		dcp->tone_curve = rs_spline_new(knots, num_knots, NATURAL);
 		g_free(knots);
 	}
-	dcp->tone_curve_lut = rs_spline_sample(dcp->tone_curve, NULL, 65536);
+	dcp->tone_curve_lut = g_new(gfloat, 1025);
+	gfloat *tc = rs_spline_sample(dcp->tone_curve, NULL, 1024);
+	memcpy(dcp->tone_curve_lut, tc, 1024*sizeof(gfloat));
+	dcp->tone_curve_lut[1024] = dcp->tone_curve_lut[1023];
+	g_free(tc);
 
 	/* ForwardMatrix */
 	dcp->has_forward_matrix1 = rs_dcp_file_get_forward_matrix1(dcp_file, &dcp->forward_matrix1);




More information about the Rawstudio-commit mailing list