[cairo] [PATCH 2/4] New pixman_filter_create_separable_convolution

Mon Jul 28 19:30:51 PDT 2014

Uses filter evaluators that know the size of a pixel, and thus can
integrate with reconstruction filters directly. This is much faster
and easier to control. Most of them sample the filter in the center
of the pixel unless the size is less than one, in which case they
numerically integrate 2^n samples.

For back-compatibility the "reconstruction" filter argument is used
at a size of 1 if the size is less than 1. If it is impulse or box
then it is ignored.

The meaning of some of the filters changes:

PIXMAN_KERNEL_LINEAR produces linear interpolation of the two nearest
pixels. Old one produced something different for sizes less than 1.

PIXMAN_KERNEL_GAUSSIAN produces a cubic notch filter. This is blurry
as well but more useful for image reconstruction.

PIXMAN_KERNEL_LANCZOS2 produces a catmull-rom filter. This is almost
identical.

The enumeration is updated in a different later commit to correct this.
---
 pixman/pixman-filter.c |  462 +++++++++++++++++++++++++-----------------------
 1 file changed, 245 insertions(+), 217 deletions(-)

diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index b2bf53f..f75bbd1 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -33,272 +33,287 @@
 #endif
 #include "pixman-private.h"
 
-typedef double (* kernel_func_t) (double x);
+/* Produce contribution of a filter of size r for pixel centered on x.
+   For a typical low-pass function this evaluates the function at x/r.
+   If the frequency is higher than 1/2, such as when r is less than 1,
+   this may need to integrate several samples, see cubic for examples.
+*/
+typedef double (* kernel_func_t) (double x, double r);
+
+/* Return maximum number of pixels that will be non-zero. Except for
+   impluse this is the maximum of 2 and the width of the non-zero part
+   of the filter rounded up to the next integer.
+*/
+typedef int (* kernel_width_func_t) (double r);
 
 typedef struct
 {
     pixman_kernel_t	kernel;
     kernel_func_t	func;
-    double		width;
+    kernel_width_func_t	width;
 } filter_info_t;
 
-static double
-impulse_kernel (double x)
-{
-    return (x == 0.0)? 1.0 : 0.0;
-}
+/* PIXMAN_KERNEL_IMPULSE: Returns pixel nearest the center.  This
+   matches PIXMAN_FILTER_NEAREST. This is useful if you wish to
+   combine the result of nearest in one direction with another filter
+   in the other.
+*/
 
 static double
-box_kernel (double x)
+impulse_kernel (double x, double r)
 {
     return 1;
 }
 
-static double
-linear_kernel (double x)
+static int
+impulse_width (double r)
 {
-    return 1 - fabs (x);
+    return 1;
 }
 
-static double
-gaussian_kernel (double x)
-{
-#define SQRT2 (1.4142135623730950488016887242096980785696718753769480)
-#define SIGMA (SQRT2 / 2.0)
-    
-    return exp (- x * x / (2 * SIGMA * SIGMA)) / (SIGMA * sqrt (2.0 * M_PI));
-}
+/* PIXMAN_KERNEL_BOX: Intersection of a box of width r with square
+   pixels. This is the smallest possible filter such that the output
+   image contains an equal contribution from all the input
+   pixels. Lots of software uses this. The function is a trapazoid of
+   width r+1, not a box.
 
-static double
-sinc (double x)
-{
-    if (x == 0.0)
-	return 1.0;
-    else
-	return sin (M_PI * x) / (M_PI * x);
-}
+   When r == 1.0, PIXMAN_KERNEL_BOX, PIXMAN_KERNEL_LINEAR, and
+   PIXMAN_KERNEL_TENT all produce the same filter, allowing
+   them to be exchanged at this point.
+*/
 
 static double
-lanczos (double x, int n)
+box_kernel (double x, double r)
 {
-    return sinc (x) * sinc (x * (1.0 / n));
+    return MAX (0.0, MIN (MIN (r, 1.0),
+			  MIN ((r + 1) / 2 - x, (r + 1) / 2 + x)));
 }
 
-static double
-lanczos2_kernel (double x)
+static int
+box_width (double r)
 {
-    return lanczos (x, 2);
+    return r < 1.0 ? 2 : ceil(r + 1);
 }
 
+/* PIXMAN_KERNEL_LINEAR: Weighted sum of the two pixels nearest the
+   center, or a triangle of width 2. This matches
+   PIXMAN_FILTER_BILINEAR. This is useful if you wish to combine the
+   result of bilinear in one direction with another filter in the
+   other.  This is not a good filter if r > 1. You may actually want
+   PIXMAN_FILTER_TENT.
+
+   When r == 1.0, PIXMAN_KERNEL_BOX, PIXMAN_KERNEL_LINEAR, and
+   PIXMAN_KERNEL_TENT all produce the same filter, allowing
+   them to be exchanged at this point.
+*/
+
 static double
-lanczos3_kernel (double x)
+linear_kernel (double x, double r)
 {
-    return lanczos (x, 3);
+    return MAX (1.0 - fabs(x), 0.0);
 }
 
-static double
-nice_kernel (double x)
+static int
+linear_width (double r)
 {
-    return lanczos3_kernel (x * 0.75);
+    return 2;
 }
 
+/* Cubic functions described in the Mitchell-Netravali paper.
+   http://mentallandscape.com/Papers_siggraph88.pdf. This describes
+   all possible cubic functions that can be used for sampling.
+*/
+
 static double
-general_cubic (double x, double B, double C)
+general_cubic (double x, double r, double B, double C)
 {
-    double ax = fabs(x);
+    double ax;
+    if (r < 1.0)
+	return
+	    general_cubic(x * 2 - .5, r * 2, B, C) +
+	    general_cubic(x * 2 + .5, r * 2, B, C);
+
+    ax = fabs (x / r);
 
     if (ax < 1)
     {
-	return ((12 - 9 * B - 6 * C) * ax * ax * ax +
-		(-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6;
+	return (((12 - 9 * B - 6 * C) * ax +
+		 (-18 + 12 * B + 6 * C)) * ax * ax +
+		(6 - 2 * B)) / 6;
     }
-    else if (ax >= 1 && ax < 2)
+    else if (ax < 2)
     {
-	return ((-B - 6 * C) * ax * ax * ax +
-		(6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) *
-		ax + (8 * B + 24 * C)) / 6;
+	return ((((-B - 6 * C) * ax +
+		 (6 * B + 30 * C)) * ax +
+		(-12 * B - 48 * C)) * ax +
+		(8 * B + 24 * C)) / 6;
     }
     else
     {
-	return 0;
+	return 0.0;
     }
 }
 
+static int
+cubic_width (double r)
+{
+    return MAX (2, ceil (r * 4));
+}
+
+/* PIXMAN_KERNEL_CATMULL_ROM: Catmull-Rom interpolation. Often called
+   "cubic interpolation", "b-spline", or just "cubic" by other
+   software. This filter has negative values so it can produce ringing
+   and output pixels outside the range of input pixels. This is very
+   close to lanczos2 so there is no reason to supply that as well.
+*/
+
 static double
-cubic_kernel (double x)
+cubic_kernel (double x, double r)
 {
-    /* This is the Mitchell-Netravali filter.
-     *
-     * (0.0, 0.5) would give us the Catmull-Rom spline,
-     * but that one seems to be indistinguishable from Lanczos2.
-     */
-    return general_cubic (x, 1/3.0, 1/3.0);
+    return general_cubic (x, r, 0.0, 0.5);
 }
 
-static const filter_info_t filters[] =
+/* PIXMAN_KERNEL_MITCHELL: Cubic recommended by the Mitchell-Netravali
+   paper.  This has negative values and because the values at +/-1 are
+   not zero it does not interpolate the pixels, meaning it will change
+   an image even if there is no translation.
+*/
+
+static double
+mitchell_kernel (double x, double r)
 {
-    { PIXMAN_KERNEL_IMPULSE,	        impulse_kernel,   0.0 },
-    { PIXMAN_KERNEL_BOX,	        box_kernel,       1.0 },
-    { PIXMAN_KERNEL_LINEAR,	        linear_kernel,    2.0 },
-    { PIXMAN_KERNEL_CUBIC,		cubic_kernel,     4.0 },
-    { PIXMAN_KERNEL_GAUSSIAN,	        gaussian_kernel,  6 * SIGMA },
-    { PIXMAN_KERNEL_LANCZOS2,	        lanczos2_kernel,  4.0 },
-    { PIXMAN_KERNEL_LANCZOS3,	        lanczos3_kernel,  6.0 },
-    { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel,      8.0 },
-};
+    return general_cubic (x, r, 1/3.0, 1/3.0);
+}
+
+/* PIXMAN_KERNEL_NOTCH: Cubic recommended by the Mitchell-Netravali
+   paper to remove postaliasing artifacts. This does not remove
+   aliasing already present in the source image, though it may appear
+   to due to it's excessive blurriness. In any case this is more
+   useful than gaussian for image reconstruction.
+*/
 
-/* This function scales @kernel2 by @scale, then
- * aligns @x1 in @kernel1 with @x2 in @kernel2 and
- * and integrates the product of the kernels across @width.
- *
- * This function assumes that the intervals are within
- * the kernels in question. E.g., the caller must not
- * try to integrate a linear kernel ouside of [-1:1]
- */
 static double
-integral (pixman_kernel_t kernel1, double x1,
-	  pixman_kernel_t kernel2, double scale, double x2,
-	  double width)
+notch_kernel (double x, double r)
 {
-    /* If the integration interval crosses zero, break it into
-     * two separate integrals. This ensures that filters such
-     * as LINEAR that are not differentiable at 0 will still
-     * integrate properly.
-     */
-    if (x1 < 0 && x1 + width > 0)
-    {
-	return
-	    integral (kernel1, x1, kernel2, scale, x2, - x1) +
-	    integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
-    }
-    else if (x2 < 0 && x2 + width > 0)
-    {
+    return general_cubic (x, r, 1.5, -0.25);
+}
+
+/* PIXMAN_KERNEL_LANCZOS3: lanczos windowed sinc function from -3 to
+   +3. Very popular with high-end software though I think any
+   advantage over cubics is hidden by quantization and programming
+   mistakes. You will see LANCZOS5 or even 7 sometimes.
+*/
+
+static double
+sinc (double x)
+{
+    return x ? sin (M_PI * x) / (M_PI * x) : 1.0;
+}
+
+static double
+lanczos (double x, double n)
+{
+    return fabs (x) < n ? sinc (x) * sinc (x * (1.0 / n)) : 0.0;
+}
+
+static double
+lanczos3_kernel (double x, double r)
+{
+    if (r < 1.0)
 	return
-	    integral (kernel1, x1, kernel2, scale, x2, - x2) +
-	    integral (kernel1, x1 - x2, kernel2, scale, 0, width + x2);
-    }
-    else if (kernel1 == PIXMAN_KERNEL_IMPULSE)
-    {
-	assert (width == 0.0);
-	return filters[kernel2].func (x2 * scale);
-    }
-    else if (kernel2 == PIXMAN_KERNEL_IMPULSE)
-    {
-	assert (width == 0.0);
-	return filters[kernel1].func (x1);
-    }
+	    lanczos3_kernel (x * 2 - .5, r * 2) +
+	    lanczos3_kernel (x * 2 + .5, r * 2);
     else
-    {
-	/* Integration via Simpson's rule */
-#define N_SEGMENTS 128
-#define SAMPLE(a1, a2)							\
-	(filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))
-	
-	double s = 0.0;
-	double h = width / (double)N_SEGMENTS;
-	int i;
-
-	s = SAMPLE (x1, x2);
-
-	for (i = 1; i < N_SEGMENTS; i += 2)
-	{
-	    double a1 = x1 + h * i;
-	    double a2 = x2 + h * i;
+	return lanczos (x / r, 3.0);
+}
 
-	    s += 2 * SAMPLE (a1, a2);
+static int
+lanczos3_width (double r)
+{
+    return MAX (2, ceil (r * 6));
+}
 
-	    if (i >= 2 && i < N_SEGMENTS - 1)
-		s += 4 * SAMPLE (a1, a2);
-	}
+/* PIXMAN_KERNEL_LANCZOS3_STRETCHED - The LANCZOS3 kernel widened by
+   4/3.  Recommended by Jim Blinn
+   http://graphics.cs.cmu.edu/nsp/course/15-462/Fall07/462/papers/jaggy.pdf
+*/
 
-	s += SAMPLE (x1 + width, x2 + width);
-	
-	return h * s * (1.0 / 3.0);
-    }
+static double
+nice_kernel (double x, double r)
+{
+    return lanczos3_kernel (x, r * (4.0/3));
 }
 
-static pixman_fixed_t *
-create_1d_filter (int             *width,
-		  pixman_kernel_t  reconstruct,
-		  pixman_kernel_t  sample,
-		  double           scale,
-		  int              n_phases)
+static int
+nice_width (double r)
 {
-    pixman_fixed_t *params, *p;
-    double step;
-    double size;
-    int i;
+    return MAX (2.0, ceil (r * 8));
+}
 
-    size = scale * filters[sample].width + filters[reconstruct].width;
-    *width = ceil (size);
 
-    p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t));
-    if (!params)
-        return NULL;
+static const filter_info_t filters[] =
+{
+    { PIXMAN_KERNEL_IMPULSE,	        impulse_kernel,   impulse_width },
+    { PIXMAN_KERNEL_BOX,	        box_kernel,       box_width },
+    { PIXMAN_KERNEL_LINEAR,	        linear_kernel,    linear_width },
+    { PIXMAN_KERNEL_CUBIC,		mitchell_kernel,  cubic_width },
+    { PIXMAN_KERNEL_GAUSSIAN,	        notch_kernel,     cubic_width },
+    { PIXMAN_KERNEL_LANCZOS2,	        cubic_kernel,     cubic_width },
+    { PIXMAN_KERNEL_LANCZOS3,	        lanczos3_kernel,  lanczos3_width },
+    { PIXMAN_KERNEL_LANCZOS3_STRETCHED, nice_kernel,      nice_width },
+};
 
-    step = 1.0 / n_phases;
+
+/* Fills in one dimension of the filter array */
+static void get_filter(pixman_kernel_t filter, double r,
+		       int width, int subsample,
+		       pixman_fixed_t* out)
+{
+    int i;
+    pixman_fixed_t *p = out;
+    int n_phases = 1 << subsample;
+    double step = 1.0 / n_phases;
+    kernel_func_t func = filters[filter].func;
+
+    /* special-case the impulse filter: */
+    if (width <= 1)
+    {
+	for (i = 0; i < n_phases; ++i)
+	    *p++ = pixman_fixed_1;
+	return;
+    }
 
     for (i = 0; i < n_phases; ++i)
     {
-        double frac = step / 2.0 + i * step;
-	pixman_fixed_t new_total;
-        int x, x1, x2;
-	double total;
-
-	/* Sample convolution of reconstruction and sampling
-	 * filter. See rounding.txt regarding the rounding
-	 * and sample positions.
-	 */
-
-	x1 = ceil (frac - *width / 2.0 - 0.5);
-        x2 = x1 + *width;
-
-	total = 0;
-        for (x = x1; x < x2; ++x)
-        {
-	    double pos = x + 0.5 - frac;
-	    double rlow = - filters[reconstruct].width / 2.0;
-	    double rhigh = rlow + filters[reconstruct].width;
-	    double slow = pos - scale * filters[sample].width / 2.0;
-	    double shigh = slow + scale * filters[sample].width;
-	    double c = 0.0;
-	    double ilow, ihigh;
-
-	    if (rhigh >= slow && rlow <= shigh)
-	    {
-		ilow = MAX (slow, rlow);
-		ihigh = MIN (shigh, rhigh);
-
-		c = integral (reconstruct, ilow,
-			      sample, 1.0 / scale, ilow - pos,
-			      ihigh - ilow);
-	    }
-
-	    total += c;
-            *p++ = (pixman_fixed_t)(c * 65536.0 + 0.5);
-        }
+	double frac = (i + .5) * step;
+	/* Center of left-most pixel: */
+	double x1 = ceil (frac - width / 2.0 - 0.5) - frac + 0.5;
+	double total = 0;
+	pixman_fixed_t new_total = 0;
+	int j;
+
+	for (j = 0; j < width; ++j)
+	{
+	    double v = func(x1 + j, r);
+	    total += v;
+	    p[j] = pixman_double_to_fixed (v);
+	}
 
 	/* Normalize */
-	p -= *width;
         total = 1 / total;
-        new_total = 0;
-	for (x = x1; x < x2; ++x)
-	{
-	    pixman_fixed_t t = (*p) * total + 0.5;
+	for (j = 0; j < width; ++j)
+	    new_total += (p[j] *= total);
 
-	    new_total += t;
-	    *p++ = t;
-	}
+	/* Put any error on center pixel */
+	p[width / 2] += (pixman_fixed_1 - new_total);
 
-	if (new_total != pixman_fixed_1)
-	    *(p - *width / 2) += (pixman_fixed_1 - new_total);
+	p += width;
     }
-
-    return params;
 }
 
+
 /* Create the parameter list for a SEPARABLE_CONVOLUTION filter
- * with the given kernels and scale parameters
+ * with the given kernels and scale parameters.
  */
 PIXMAN_EXPORT pixman_fixed_t *
 pixman_filter_create_separable_convolution (int             *n_values,
@@ -306,45 +321,58 @@ pixman_filter_create_separable_convolution (int             *n_values,
 					    pixman_fixed_t   scale_y,
 					    pixman_kernel_t  reconstruct_x,
 					    pixman_kernel_t  reconstruct_y,
-					    pixman_kernel_t  sample_x,
-					    pixman_kernel_t  sample_y,
+					    pixman_kernel_t  filter_x,
+					    pixman_kernel_t  filter_y,
 					    int              subsample_bits_x,
 					    int	             subsample_bits_y)
 {
     double sx = fabs (pixman_fixed_to_double (scale_x));
     double sy = fabs (pixman_fixed_to_double (scale_y));
-    pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL;
-    int subsample_x, subsample_y;
-    int width, height;
-
-    subsample_x = (1 << subsample_bits_x);
-    subsample_y = (1 << subsample_bits_y);
+    int width_x, width_y, size_x, size_y;
+    pixman_fixed_t *params;
+
+    /* Simulate older version of this function:
+
+       Old version attempted to convolve two filters, the "reconstruct"
+       filter at a scale of 1 and the main filter at the given scale.
+       Since these filters are supposed to be low pass filters, a
+       more useful convolution is achieved by just choosing the
+       larger one.
+
+       The old version did have a quirk that may be relied on and is
+       useful: If the reconstruct was BOX then the result was "boxy"
+       pixels, which is the result of a filter of scale < 1.  Both
+       this and IMPULSE (which was broken) produce this effect.  If
+       BOX reconstruct is really wanted, use LINEAR for it as it is
+       the same at scale=1.
+    */
+    if (reconstruct_x > PIXMAN_KERNEL_BOX  &&  sx < 1.0)
+    {
+	filter_x = reconstruct_x;
+	sx = 1.0;
+    }
+    if (reconstruct_y > PIXMAN_KERNEL_BOX  &&  sy < 1.0)
+    {
+	filter_y = reconstruct_y;
+	sy = 1.0;
+    }
 
-    horz = create_1d_filter (&width, reconstruct_x, sample_x, sx, subsample_x);
-    vert = create_1d_filter (&height, reconstruct_y, sample_y, sy, subsample_y);
+    width_x = filters[filter_x] . width (sx);
+    size_x = (1 << subsample_bits_x) * width_x;
+    width_y = filters[filter_y] . width (sy);
+    size_y = (1 << subsample_bits_y) * width_y;
 
-    if (!horz || !vert)
-        goto out;
-    
-    *n_values = 4 + width * subsample_x + height * subsample_y;
-    
+    *n_values = 4 + size_x + size_y;
     params = malloc (*n_values * sizeof (pixman_fixed_t));
-    if (!params)
-        goto out;
+    if (!params) return 0;
 
-    params[0] = pixman_int_to_fixed (width);
-    params[1] = pixman_int_to_fixed (height);
+    params[0] = pixman_int_to_fixed (width_x);
+    params[1] = pixman_int_to_fixed (width_y);
     params[2] = pixman_int_to_fixed (subsample_bits_x);
     params[3] = pixman_int_to_fixed (subsample_bits_y);
 
-    memcpy (params + 4, horz,
-	    width * subsample_x * sizeof (pixman_fixed_t));
-    memcpy (params + 4 + width * subsample_x, vert,
-	    height * subsample_y * sizeof (pixman_fixed_t));
-
-out:
-    free (horz);
-    free (vert);
+    get_filter(filter_x, sx, width_x, subsample_bits_x, params + 4);
+    get_filter(filter_y, sy, width_y, subsample_bits_y, params + 4 + size_x);
 
     return params;
 }
-- 
1.7.9.5