[cairo] [PATCH pixman 2/3] New pixman_filter_create_separable_convolution

Mon Aug 18 19:51:08 PDT 2014

Uses filter evaluators that know the size of a pixel, and thus can integrate
with reconstruction filters directly. The result is equivalent to having the
box and impulse sampling filters use a box reconstruction, and all others use
box for sizes smaller than their Nyquist rate and impulse for larger sizes.

For back-compatibility the "reconstruction" filter argument is used at a size
of 1 if it is not box or impulse and the filter size is less than 1.

IMHO this preserves all the useful results of the previous code, is a good
deal simpler, and the caller no longer has to alter the arguments depending on
whether the size is less than one or not.
---
 pixman/pixman-filter.c |  392 ++++++++++++++++++++++++------------------------
 1 file changed, 194 insertions(+), 198 deletions(-)

diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index b2bf53f..1087f6d 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -33,42 +33,83 @@
 #endif
 #include "pixman-private.h"
 
-typedef double (* kernel_func_t) (double x);
+/* Produce contribution of a filter of size r for pixel centered on x.
+ * For a typical low-pass function this evaluates the function at x/r.
+ * If the frequency is higher than 1/2, such as when r is less than 1,
+ * this may need to integrate several samples, see cubic for examples.
+ */
+typedef double (* kernel_func_t) (double x, double r);
 
 typedef struct
 {
     pixman_kernel_t	kernel;
     kernel_func_t	func;
-    double		width;
+    double		width; /* note special handling if <= 1 */
 } filter_info_t;
 
+/* PIXMAN_KERNEL_IMPULSE: Returns pixel nearest the center.  This
+ * matches PIXMAN_FILTER_NEAREST. This is useful if you wish to
+ * combine the result of nearest in one direction with another filter
+ * in the other.
+ */
 static double
-impulse_kernel (double x)
+impulse_kernel (double x, double r)
 {
-    return (x == 0.0)? 1.0 : 0.0;
+    return 1;
 }
 
+/* PIXMAN_KERNEL_BOX: Intersection of a box of width r with square
+ * pixels. This is the smallest possible filter such that the output
+ * image contains an equal contribution from all the input
+ * pixels. Lots of software uses this. The function is a trapazoid of
+ * width r+1, not a box.
+ *
+ * When r == 1.0, PIXMAN_KERNEL_BOX and PIXMAN_KERNEL_LINEAR produce
+ * the same filter, allowing them to be exchanged at this point.
+ */
 static double
-box_kernel (double x)
+box_kernel (double x, double r)
 {
-    return 1;
+    return MAX (0.0, MIN (MIN (r, 1.0),
+			  MIN ((r + 1) / 2 - x, (r + 1) / 2 + x)));
 }
 
+/* PIXMAN_KERNEL_LINEAR: Triangle of width 2r. Lots of software uses
+ * this as a "better" filter, twice the size of a box but smaller than
+ * a cubic.
+ *
+ * When r == 1.0, PIXMAN_KERNEL_BOX and PIXMAN_KERNEL_LINEAR produce
+ * the same filter, allowing them to be exchanged at this point.
+ */
 static double
-linear_kernel (double x)
+linear_kernel (double x, double r)
 {
-    return 1 - fabs (x);
+    if (r < 1.0)
+	return box_kernel(x, r);
+    else
+	return MAX (1.0 - fabs(x / r), 0.0);
 }
 
+/* PIXMAN_KERNEL_GAUSSIAN: Gaussian function, cut off somewhere near
+   +/-2.5. Output looks like it has been blurred by a gaussian filter
+   of size 1. This may be useful if the output is to be blurred
+   further as that blur can be reduced by 1.
+*/
 static double
-gaussian_kernel (double x)
+gaussian_kernel (double x, double r)
 {
-#define SQRT2 (1.4142135623730950488016887242096980785696718753769480)
-#define SIGMA (SQRT2 / 2.0)
-    
-    return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI));
+    if (r < 0.5)
+	return
+	    gaussian_kernel (x * 2 - .5, r * 2) +
+	    gaussian_kernel (x * 2 + .5, r * 2);
+    return exp (- x * x) / sqrt (M_PI);
 }
 
+/* PIXMAN_KERNEL_LANCZOS2: lanczos windowed sinc function from -2 to
+ * +2. This is very close to the cubic filter that is often used
+ * by other software. Should be slightly higher quality.
+ */
+
 static double
 sinc (double x)
 {
@@ -79,60 +120,91 @@ sinc (double x)
 }
 
 static double
-lanczos (double x, int n)
+lanczos (double x, double r, double n)
 {
-    return sinc (x) * sinc (x * (1.0 / n));
+    if (r < 1.0)
+	return
+	    lanczos (x * 2 - .5, r * 2, n) +
+	    lanczos (x * 2 + .5, r * 2, n);
+    x /= r;
+    return fabs (x) < n ? sinc (x) * sinc (x * (1.0 / n)) : 0.0;
 }
 
 static double
-lanczos2_kernel (double x)
+lanczos2_kernel (double x, double r)
 {
-    return lanczos (x, 2);
+    return lanczos (x, r, 2.0);
 }
 
+/* PIXMAN_KERNEL_LANCZOS3: lanczos windowed sinc function from -3 to
+ * +3. Very popular with high-end software though I think any
+ * advantage over cubics is hidden by quantization and programming
+ * mistakes. You will see LANCZOS5 or even 7 sometimes.
+ */
 static double
-lanczos3_kernel (double x)
+lanczos3_kernel (double x, double r)
 {
-    return lanczos (x, 3);
+    return lanczos (x, r, 3.0);
 }
 
+/* PIXMAN_KERNEL_LANCZOS3_STRETCHED - The LANCZOS3 kernel widened by
+ * 4/3.  Recommended by Jim Blinn
+ * http://graphics.cs.cmu.edu/nsp/course/15-462/Fall07/462/papers/jaggy.pdf
+ */
 static double
-nice_kernel (double x)
+nice_kernel (double x, double r)
 {
-    return lanczos3_kernel (x * 0.75);
+    return lanczos3_kernel (x, r * (4.0/3));
 }
 
+/* Cubic functions described in the Mitchell-Netravali paper.
+ * http://mentallandscape.com/Papers_siggraph88.pdf. This describes
+ * all possible cubic functions that can be used for sampling.
+ */
 static double
-general_cubic (double x, double B, double C)
+general_cubic (double x, double r, double B, double C)
 {
-    double ax = fabs(x);
+    double ax;
+    if (r < 1.0)
+	return
+	    general_cubic(x * 2 - .5, r * 2, B, C) +
+	    general_cubic(x * 2 + .5, r * 2, B, C);
+
+    ax = fabs (x / r);
 
     if (ax < 1)
     {
-	return ((12 - 9 * B - 6 * C) * ax * ax * ax +
-		(-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6;
+	return (((12 - 9 * B - 6 * C) * ax +
+		 (-18 + 12 * B + 6 * C)) * ax * ax +
+		(6 - 2 * B)) / 6;
     }
-    else if (ax >= 1 && ax < 2)
+    else if (ax < 2)
     {
-	return ((-B - 6 * C) * ax * ax * ax +
-		(6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) *
-		ax + (8 * B + 24 * C)) / 6;
+	return ((((-B - 6 * C) * ax +
+		 (6 * B + 30 * C)) * ax +
+		(-12 * B - 48 * C)) * ax +
+		(8 * B + 24 * C)) / 6;
     }
     else
     {
-	return 0;
+	return 0.0;
     }
 }
 
+/* PIXMAN_KERNEL_CUBIC: Cubic recommended by the Mitchell-Netravali
+ * paper.  This has negative values and because the values at +/-1 are
+ * not zero it does not interpolate the pixels, meaning it will change
+ * an image even if there is no translation.
+ *
+ * Warning: this is not what a lot of other software calls "cubic".
+ * That often refers to the Catmull-Rom spline, or another interpolating
+ * function. (0.0, 0.5) would give us the Catmull-Rom spline, but
+ * that one seems indistinguishable from Lanczos2.
+ */
 static double
-cubic_kernel (double x)
+cubic_kernel (double x, double r)
 {
-    /* This is the Mitchell-Netravali filter.
-     *
-     * (0.0, 0.5) would give us the Catmull-Rom spline,
-     * but that one seems to be indistinguishable from Lanczos2.
-     */
-    return general_cubic (x, 1/3.0, 1/3.0);
+    return general_cubic (x, r, 1/3.0, 1/3.0);
 }
 
 static const filter_info_t filters[] =
@@ -140,163 +212,62 @@ static const filter_info_t filters[] =
     { PIXMAN_KERNEL_IMPULSE,	        impulse_kernel,   0.0 },
     { PIXMAN_KERNEL_BOX,	        box_kernel,       1.0 },
     { PIXMAN_KERNEL_LINEAR,	        linear_kernel,    2.0 },
-    { PIXMAN_KERNEL_CUBIC,		cubic_kernel,     4.0 },
-    { PIXMAN_KERNEL_GAUSSIAN,	        gaussian_kernel,  6 * SIGMA },
+    { PIXMAN_KERNEL_CUBIC,	        cubic_kernel,     4.0 },
+    { PIXMAN_KERNEL_GAUSSIAN,	        gaussian_kernel,  5.0 },
     { PIXMAN_KERNEL_LANCZOS2,	        lanczos2_kernel,  4.0 },
     { PIXMAN_KERNEL_LANCZOS3,	        lanczos3_kernel,  6.0 },
     { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel,      8.0 },
 };
 
-/* This function scales @kernel2 by @scale, then
- * aligns @x1 in @kernel1 with @x2 in @kernel2 and
- * and integrates the product of the kernels across @width.
- *
- * This function assumes that the intervals are within
- * the kernels in question. E.g., the caller must not
- * try to integrate a linear kernel ouside of [-1:1]
- */
-static double
-integral (pixman_kernel_t kernel1, double x1,
-	  pixman_kernel_t kernel2, double scale, double x2,
-	  double width)
-{
-    /* If the integration interval crosses zero, break it into
-     * two separate integrals. This ensures that filters such
-     * as LINEAR that are not differentiable at 0 will still
-     * integrate properly.
-     */
-    if (x1 < 0 && x1 + width > 0)
-    {
-	return
-	    integral (kernel1, x1, kernel2, scale, x2, - x1) +
-	    integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
-    }
-    else if (x2 < 0 && x2 + width > 0)
-    {
-	return
-	    integral (kernel1, x1, kernel2, scale, x2, - x2) +
-	    integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2);
-    }
-    else if (kernel1 == PIXMAN_KERNEL_IMPULSE)
-    {
-	assert (width == 0.0);
-	return filters[kernel2].func (x2 * scale);
-    }
-    else if (kernel2 == PIXMAN_KERNEL_IMPULSE)
-    {
-	assert (width == 0.0);
-	return filters[kernel1].func (x1);
-    }
-    else
-    {
-	/* Integration via Simpson's rule */
-#define N_SEGMENTS 128
-#define SAMPLE(a1, a2)							\
-	(filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))
-	
-	double s = 0.0;
-	double h = width / (double)N_SEGMENTS;
-	int i;
-
-	s = SAMPLE (x1, x2);
-
-	for (i = 1; i < N_SEGMENTS; i += 2)
-	{
-	    double a1 = x1 + h * i;
-	    double a2 = x2 + h * i;
 
-	    s += 2 * SAMPLE (a1, a2);
-
-	    if (i >= 2 && i < N_SEGMENTS - 1)
-		s += 4 * SAMPLE (a1, a2);
-	}
-
-	s += SAMPLE (x1 + width, x2 + width);
-	
-	return h * s * (1.0 / 3.0);
-    }
-}
-
-static pixman_fixed_t *
-create_1d_filter (int             *width,
-		  pixman_kernel_t  reconstruct,
-		  pixman_kernel_t  sample,
-		  double           scale,
-		  int              n_phases)
+/* Fills in one dimension of the filter array */
+static void get_filter(pixman_kernel_t filter, double r,
+		       int width, int subsample,
+		       pixman_fixed_t* out)
 {
-    pixman_fixed_t *params, *p;
-    double step;
-    double size;
     int i;
+    pixman_fixed_t *p = out;
+    int n_phases = 1 << subsample;
+    double step = 1.0 / n_phases;
+    kernel_func_t func = filters[filter].func;
 
-    size = scale * filters[sample].width + filters[reconstruct].width;
-    *width = ceil (size);
-
-    p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t));
-    if (!params)
-        return NULL;
-
-    step = 1.0 / n_phases;
+    /* special-case the impulse filter: */
+    if (width <= 1)
+    {
+	for (i = 0; i < n_phases; ++i)
+	    *p++ = pixman_fixed_1;
+	return;
+    }
 
     for (i = 0; i < n_phases; ++i)
     {
-        double frac = step / 2.0 + i * step;
-	pixman_fixed_t new_total;
-        int x, x1, x2;
-	double total;
-
-	/* Sample convolution of reconstruction and sampling
-	 * filter. See rounding.txt regarding the rounding
-	 * and sample positions.
-	 */
-
-	x1 = ceil (frac - *width / 2.0 - 0.5);
-        x2 = x1 + *width;
-
-	total = 0;
-        for (x = x1; x < x2; ++x)
-        {
-	    double pos = x + 0.5 - frac;
-	    double rlow = - filters[reconstruct].width / 2.0;
-	    double rhigh = rlow + filters[reconstruct].width;
-	    double slow = pos - scale * filters[sample].width / 2.0;
-	    double shigh = slow + scale * filters[sample].width;
-	    double c = 0.0;
-	    double ilow, ihigh;
-
-	    if (rhigh >= slow && rlow <= shigh)
-	    {
-		ilow = MAX (slow, rlow);
-		ihigh = MIN (shigh, rhigh);
-
-		c = integral (reconstruct, ilow,
-			      sample, 1.0 / scale, ilow - pos,
-			      ihigh - ilow);
-	    }
-
-	    total += c;
-            *p++ = (pixman_fixed_t)(c * 65536.0 + 0.5);
-        }
+	double frac = (i + .5) * step;
+	/* Center of left-most pixel: */
+	double x1 = ceil (frac - width / 2.0 - 0.5) - frac + 0.5;
+	double total = 0;
+	pixman_fixed_t new_total = 0;
+	int j;
+
+	for (j = 0; j < width; ++j)
+	{
+	    double v = func(x1 + j, r);
+	    total += v;
+	    p[j] = pixman_double_to_fixed (v);
+	}
 
 	/* Normalize */
-	p -= *width;
         total = 1 / total;
-        new_total = 0;
-	for (x = x1; x < x2; ++x)
-	{
-	    pixman_fixed_t t = (*p) * total + 0.5;
+	for (j = 0; j < width; ++j)
+	    new_total += (p[j] *= total);
 
-	    new_total += t;
-	    *p++ = t;
-	}
+	/* Put any error on center pixel */
+	p[width / 2] += (pixman_fixed_1 - new_total);
 
-	if (new_total != pixman_fixed_1)
-	    *(p - *width / 2) += (pixman_fixed_1 - new_total);
+	p += width;
     }
-
-    return params;
 }
 
+
 /* Create the parameter list for a SEPARABLE_CONVOLUTION filter
  * with the given kernels and scale parameters
  */
@@ -313,38 +284,63 @@ pixman_filter_create_separable_convolution (int             *n_values,
 {
     double sx = fabs (pixman_fixed_to_double (scale_x));
     double sy = fabs (pixman_fixed_to_double (scale_y));
-    pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL;
-    int subsample_x, subsample_y;
-    int width, height;
+    int width_x, width_y, size_x, size_y;
+    pixman_fixed_t *params;
 
-    subsample_x = (1 << subsample_bits_x);
-    subsample_y = (1 << subsample_bits_y);
+    /* Simulate older version of this function:
+     *
+     * Old version attempted to convolve two filters, the "reconstruct"
+     * filter at a scale of 1 and the main filter at the given scale.
+     *
+     * These filter generators simulate a BOX or IMPULSE
+     * reconstruction filter depending on the scale and other
+     * factors. If another filter is specified as the reconstruction
+     * filter, then it is used at scale = 1 if the scale <= 1 to
+     * simulate the previous results.  Since these filters are
+     * supposed to be low pass filters, the convolution is achieved by
+     * just choosing the larger one.
+     */
+    if (reconstruct_x > PIXMAN_KERNEL_BOX  &&  sx < 1.0)
+    {
+	sample_x = reconstruct_x;
+	sx = 1.0;
+    }
+    if (reconstruct_y > PIXMAN_KERNEL_BOX  &&  sy < 1.0)
+    {
+	sample_y = reconstruct_y;
+	sy = 1.0;
+    }
+
+    width_x = filters[sample_x] . width;
+    if (width_x < 1.0)
+        { width_x = 1.0; subsample_bits_x = 0; }
+    else if (width_x < 2.0)
+        width_x = sx < 1.0 ? 2 : ceil (sx + width_x);
+    else
+        width_x = MAX (2, ceil (sx * width_x));
+
+    width_y = filters[sample_y] . width;
+    if (width_y < 1.0)
+        { width_y = 1.0; subsample_bits_y = 0; }
+    else if (width_y < 2.0)
+        width_y = sy < 1.0 ? 2 : ceil (sy + width_y);
+    else
+        width_y = MAX (2, ceil (sy * width_y));
 
-    horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x);
-    vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y);
+    size_x = (1 << subsample_bits_x) * width_x;
+    size_y = (1 << subsample_bits_y) * width_y;
+    *n_values = 4 + size_x + size_y;
 
-    if (!horz || !vert)
-        goto out;
-    
-    *n_values = 4 + width * subsample_x + height * subsample_y;
-    
     params = malloc (*n_values * sizeof (pixman_fixed_t));
-    if (!params)
-        goto out;
+    if (!params) return 0;
 
-    params[0] = pixman_int_to_fixed (width);
-    params[1] = pixman_int_to_fixed (height);
+    params[0] = pixman_int_to_fixed (width_x);
+    params[1] = pixman_int_to_fixed (width_y);
     params[2] = pixman_int_to_fixed (subsample_bits_x);
     params[3] = pixman_int_to_fixed (subsample_bits_y);
 
-    memcpy (params + 4, horz,
-	    width * subsample_x * sizeof (pixman_fixed_t));
-    memcpy (params + 4 + width * subsample_x, vert,
-	    height * subsample_y * sizeof (pixman_fixed_t));
-
-out:
-    free (horz);
-    free (vert);
+    get_filter(sample_x, sx, width_x, subsample_bits_x, params + 4);
+    get_filter(sample_y, sy, width_y, subsample_bits_y, params + 4 + size_x);
 
     return params;
 }
-- 
1.7.9.5