[cairo] [PATCH] pixman: high qualtiy downscaling

Tue Nov 17 03:04:06 PST 2009

Excerpts from Jeff Muizelaar's message of vie jul 31 22:56:13 +0200 2009:
> The following patch is a cleaned up version of my previous downscaling
> patches. It doesn't currently export any api, but I'd like to get it
> into the pixman repo to avoid any duplication of work while the proper
> api to expose it is worked out.

What's the status of this? any chance of getting this fixed in pixman
soon? Poppler has been affected by this bug[1] for a long time, and it's
the main reason why we still allow building poppler-glib frontend with
the splash backend. If there's anything I can do to help, please let
me know.

[1] http://bugs.freedesktop.org/show_bug.cgi?id=5589

Thanks, 

> -Jeff
> 
> 
> diff --git a/pixman/Makefile.am b/pixman/Makefile.am
> index e19fa6e..8292bfc 100644
> --- a/pixman/Makefile.am
> +++ b/pixman/Makefile.am
> @@ -33,6 +33,10 @@ libpixman_1_la_SOURCES =            \
>      pixman-edge-imp.h            \
>      pixman-trap.c                \
>      pixman-timer.c                \
> +    pixman-rescale.c            \
> +    pixman-rescale-box.c            \
> +    pixman-rescale-integer-box.c        \
> +    pixman-rescale-lanczos.c        \
>      pixman-matrix.c
>  
>  libpixmanincludedir = $(includedir)/pixman-1/
> diff --git a/pixman/pixman-rescale-box.c b/pixman/pixman-rescale-box.c
> new file mode 100644
> index 0000000..6d9a874
> --- /dev/null
> +++ b/pixman/pixman-rescale-box.c
> @@ -0,0 +1,439 @@
> +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
> +/*
> + * Copyright © 2009 Mozilla Corporation
> + *
> + * Permission to use, copy, modify, distribute, and sell this software and its
> + * documentation for any purpose is hereby granted without fee, provided that
> + * the above copyright notice appear in all copies and that both that
> + * copyright notice and this permission notice appear in supporting
> + * documentation, and that the name of Mozilla Corporation not be used in
> + * advertising or publicity pertaining to distribution of the software without
> + * specific, written prior permission.  Mozilla Corporation makes no
> + * representations about the suitability of this software for any purpose.  It
> + * is provided "as is" without express or implied warranty.
> + *
> + * MOZILLA CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
> + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
> + * SHALL MOZILLA CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
> + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
> + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
> + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
> + * OF THIS SOFTWARE.
> + *
> + * Author: Jeff Muizelaar, Mozilla Corp.
> + */
> +
> +/* This implements a box filter that supports non-integer box sizes */
> +
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <assert.h>
> +#include <stdlib.h>
> +#include <math.h>
> +#include "pixman-private.h"
> +#include "pixman-rescale.h"
> +
> +/* we work in fixed point where 1. == 1 << 24 */
> +#define FIXED_SHIFT 24
> +
> +#if 0
> +static void downsample_row_box_filter_sse2(
> +        int start,
> +        int width,
> +        uint32_t *src, uint32_t *dest,
> +                int coverage[], int pixel_coverage)
> +{
> +    /* we need an array of the pixel contribution of each destination pixel on
> the boundaries.
> +     * we invert the value to get the value on the other size of the box */
> +    /*
> +
> +       value  = a * contribution * 1/box_size
> +       value += a * 1/box_size
> +       value += a * 1/box_size
> +       value += a * 1/box_size
> +       value += a * (1 - contribution) * 1/box_size
> +                a * (1/box_size - contribution * 1/box_size)
> +
> +        box size is constant
> +
> +
> +       value = a * contribtion_a * 1/box_size + b * contribution_b * 1/box_size
> +               contribution_b = (1 - contribution_a)
> +                              = (1 - contribution_a_next)
> +    */
> +
> +    /* box size = ceil(src_width/dest_width) */
> +    int x = 0;
> +    while (x < start) {
> +        int box = 1 << FIXED_SHIFT;
> +        int start_coverage = coverage[x];
> +        box -= start_coverage;
> +        src++;
> +        while (box >= pixel_coverage) {
> +            src++;
> +            box -= pixel_coverage;
> +        }
> +        x++;
> +    }
> +    __m128i accum = _mm_setzero_si128();
> +    __m128i zero = _mm_setzero_si128();
> +    while (x < start + width) {
> +        uint32_t a = 0;
> +        uint32_t r = 0;
> +        uint32_t g = 0;
> +        uint32_t b = 0;
> +        int box = 1 << FIXED_SHIFT;
> +        int start_coverage = coverage[x];
> +        __m128i v_src = _mm_cvtsi32_si128(*src);
> +        v_src = _mm_unpacklo_epi16(_mm_unpacklo_epi8(v_src, zero), zero);
> +        __m128i coeff = _mm_cvtsi32_si128(start_coverage);
> +        /* duplicate the filter coefficient */
> +        coeff = _mm_shuffle_epi32(coeff, _MM_SHUFFLE(0, 0, 0, 0));
> +        /* multiply and accumulate the result:
> +         * 0000vvvv_0000vvvv_0000vvvv_0000vvvv *
> 000000aa_000000rr_000000gg_000000bb */
> +        accum = _mm_madd_epi16(v_src, coeff);
> +        // we can't use a 32 bit mulitply in sse2
> +        // sse4.1 gives us pmulld but it is apparenlty slow (6 clocks on
> Nehalem)
> +        src++;
> +        x++;
> +        box -= start_coverage;
> +        while (box >= pixel_coverage) {
> +            a += ((*src >> 24) & 0xff) * pixel_coverage;
> +            r += ((*src >> 16) & 0xff) * pixel_coverage;
> +            g += ((*src >>  8) & 0xff) * pixel_coverage;
> +            b += ((*src >>  0) & 0xff) * pixel_coverage;
> +            src++;
> +
> +            box -= pixel_coverage;
> +        }
> +        /* multiply by whatever is leftover
> +         * this ensures that we don't bias down.
> +         * i.e. start_coverage + n*pixel_coverage + box == 1 << 24 */
> +        if (box > 0) {
> +            a += ((*src >> 24) & 0xff) * box;
> +            r += ((*src >> 16) & 0xff) * box;
> +            g += ((*src >>  8) & 0xff) * box;
> +            b += ((*src >>  0) & 0xff) * box;
> +        }
> +
> +        a >>= FIXED_SHIFT;
> +        r >>= FIXED_SHIFT;
> +        g >>= FIXED_SHIFT;
> +        b >>= FIXED_SHIFT;
> +
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +    }
> +}
> +#endif
> +
> +static void downsample_row_box_filter (
> +        int start, int width,
> +        uint32_t *src, uint32_t *dest,
> +        int coverage[], int pixel_coverage)
> +{
> +    /* we need an array of the pixel contribution of each destination pixel on
> the boundaries.
> +     * we invert the value to get the value on the other size of the box */
> +    /*
> +
> +       value  = a * contribution * 1/box_size
> +       value += a * 1/box_size
> +       value += a * 1/box_size
> +       value += a * 1/box_size
> +       value += a * (1 - contribution) * 1/box_size
> +                a * (1/box_size - contribution * 1/box_size)
> +
> +        box size is constant
> +
> +
> +       value = a * contribtion_a * 1/box_size + b * contribution_b * 1/box_size
> +               contribution_b = (1 - contribution_a)
> +                              = (1 - contribution_a_next)
> +    */
> +
> +    /* box size = ceil(src_width/dest_width) */
> +    int x = 0;
> +
> +    /* skip to start */
> +    /* XXX: it might be possible to do this directly instead of iteratively,
> however
> +     * the iterative solution is simple */
> +    while (x < start)
> +    {
> +        int box = 1 << FIXED_SHIFT;
> +        int start_coverage = coverage[x];
> +        box -= start_coverage;
> +        src++;
> +        while (box >= pixel_coverage)
> +        {
> +            src++;
> +            box -= pixel_coverage;
> +        }
> +        x++;
> +    }
> +
> +    while (x < start + width)
> +    {
> +        uint32_t a = 0;
> +        uint32_t r = 0;
> +        uint32_t g = 0;
> +        uint32_t b = 0;
> +        int box = 1 << FIXED_SHIFT;
> +        int start_coverage = coverage[x];
> +
> +        a = ((*src >> 24) & 0xff) * start_coverage;
> +        r = ((*src >> 16) & 0xff) * start_coverage;
> +        g = ((*src >>  8) & 0xff) * start_coverage;
> +        b = ((*src >>  0) & 0xff) * start_coverage;
> +        src++;
> +        x++;
> +        box -= start_coverage;
> +
> +        while (box >= pixel_coverage)
> +        {
> +            a += ((*src >> 24) & 0xff) * pixel_coverage;
> +            r += ((*src >> 16) & 0xff) * pixel_coverage;
> +            g += ((*src >>  8) & 0xff) * pixel_coverage;
> +            b += ((*src >>  0) & 0xff) * pixel_coverage;
> +            src++;
> +
> +            box -= pixel_coverage;
> +        }
> +
> +        /* multiply by whatever is leftover
> +         * this ensures that we don't bias down.
> +         * i.e. start_coverage + n*pixel_coverage + box == 1 << 24 */
> +        if (box > 0)
> +        {
> +            a += ((*src >> 24) & 0xff) * box;
> +            r += ((*src >> 16) & 0xff) * box;
> +            g += ((*src >>  8) & 0xff) * box;
> +            b += ((*src >>  0) & 0xff) * box;
> +        }
> +
> +        a >>= FIXED_SHIFT;
> +        r >>= FIXED_SHIFT;
> +        g >>= FIXED_SHIFT;
> +        b >>= FIXED_SHIFT;
> +
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +    }
> +}
> +
> +static void downsample_columns_box_filter (
> +        int n,
> +        int start_coverage,
> +        int pixel_coverage,
> +        uint32_t *src, uint32_t *dest)
> +{
> +    int stride = n;
> +    while (n--) {
> +        uint32_t a = 0;
> +        uint32_t r = 0;
> +        uint32_t g = 0;
> +        uint32_t b = 0;
> +        uint32_t *column_src = src;
> +        int box = 1 << FIXED_SHIFT;
> +
> +        a = ((*column_src >> 24) & 0xff) * start_coverage;
> +        r = ((*column_src >> 16) & 0xff) * start_coverage;
> +        g = ((*column_src >>  8) & 0xff) * start_coverage;
> +        b = ((*column_src >>  0) & 0xff) * start_coverage;
> +        column_src += stride;
> +        box -= start_coverage;
> +
> +        while (box >= pixel_coverage)
> +        {
> +            a += ((*column_src >> 24) & 0xff) * pixel_coverage;
> +            r += ((*column_src >> 16) & 0xff) * pixel_coverage;
> +            g += ((*column_src >>  8) & 0xff) * pixel_coverage;
> +            b += ((*column_src >>  0) & 0xff) * pixel_coverage;
> +            column_src += stride;
> +            box -= pixel_coverage;
> +        }
> +
> +        if (box > 0) {
> +            a += ((*column_src >> 24) & 0xff) * box;
> +            r += ((*column_src >> 16) & 0xff) * box;
> +            g += ((*column_src >>  8) & 0xff) * box;
> +            b += ((*column_src >>  0) & 0xff) * box;
> +        }
> +
> +        a >>= FIXED_SHIFT;
> +        r >>= FIXED_SHIFT;
> +        g >>= FIXED_SHIFT;
> +        b >>= FIXED_SHIFT;
> +
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +        src++;
> +    }
> +}
> +
> +static int compute_coverage (int coverage[], int src_length, int dest_length)
> +{
> +    int i;
> +    /* num = src_length/dest_length
> +       total = sum(pixel) / num
> +
> +       pixel * 1/num == pixel * dest_length / src_length
> +    */
> +    /* the average contribution of each source pixel */
> +    int ratio = ((1 << 24)*(long long int)dest_length)/src_length;
> +    /* because ((1 << 24)*(long long int)dest_length) won't always be
> divisible by src_length
> +     * we'll need someplace to put the other bits.
> +     *
> +     * We want to ensure a + n*ratio < 1<<24
> +     *
> +     * 1<<24
> +     * */
> +
> +    double scale = (double)src_length/dest_length;
> +
> +    /* for each destination pixel compute the coverage of the left most pixel
> included in the box */
> +    /* I have a proof of this, which this margin is too narrow to contain */
> +    for (i=0; i<dest_length; i++)
> +    {
> +        float left_side = i*scale;
> +        float right_side = (i+1)*scale;
> +        float right_fract = right_side - floor (right_side);
> +        float left_fract = ceil (left_side) - left_side;
> +        int overage;
> +        /* find out how many source pixels will be used to fill the box */
> +        int count = floor (right_side) - ceil (left_side);
> +        /* what's the maximum value this expression can become?
> +           floor((i+1)*scale) - ceil(i*scale)
> +
> +           (i+1)*scale - i*scale == scale
> +
> +           since floor((i+1)*scale) <= (i+1)*scale
> +           and   ceil(i*scale)      >= i*scale
> +
> +           floor((i+1)*scale) - ceil(i*scale) <= scale
> +
> +           further since: floor((i+1)*scale) - ceil(i*scale) is an integer
> +
> +           therefore:
> +           floor((i+1)*scale) - ceil(i*scale) <= floor(scale)
> +        */
> +
> +        if (left_fract == 0.)
> +            count--;
> +
> +        /* compute how much the right-most pixel contributes */
> +        overage = ratio*(right_fract);
> +
> +        /* the remainder is the the amount that the left-most pixel
> +         * contributes */
> +        coverage[i] = (1<<24) - (count * ratio + overage);
> +    }
> +
> +    return ratio;
> +}
> +
> +PIXMAN_EXPORT
> +int downscale_box_filter(const struct rescale_fetcher *fetcher, void *closure,
> unsigned orig_width, unsigned orig_height,
> +        signed scaled_width, signed scaled_height,
> +        uint16_t start_column, uint16_t start_row,
> +        uint16_t width, uint16_t height,
> +        uint32_t *dest, int dst_stride)
> +{
> +    int pixel_coverage_x, pixel_coverage_y;
> +    int dest_y;
> +    int src_y = 0;
> +
> +    void (*fetch_scanline)(void *closure, int y, uint32_t **scanline) =
> fetcher->fetch_scanline;
> +    uint32_t *scanline = fetcher->init(closure);
> +
> +    int *x_coverage = pixman_malloc_abc (orig_width, 1, sizeof(int));
> +    int *y_coverage = pixman_malloc_abc (orig_height, 1, sizeof(int));
> +
> +    /* we need to allocate enough room for ceil(src_height/dest_height)+1
> +      Example:
> +      src_height = 140
> +      dest_height = 50
> +      src_height/dest_height = 2.8
> +
> +      |-------------|       2.8 pixels
> +      |----|----|----|----| 4 pixels
> +      need to sample 3 pixels
> +
> +         |-------------|       2.8 pixels
> +      |----|----|----|----| 4 pixels
> +      need to sample 4 pixels
> +    */
> +
> +    uint32_t *temp_buf = pixman_malloc_abc ((orig_height +
> scaled_height-1)/scaled_height+1, scaled_width, sizeof(uint32_t));
> +
> +    if (!x_coverage || !y_coverage || !scanline || !temp_buf) {
> +        return -1;
> +    }
> +
> +    pixel_coverage_x = compute_coverage (x_coverage, orig_width, scaled_width);
> +    pixel_coverage_y = compute_coverage (y_coverage, orig_height,
> scaled_height);
> +
> +    assert (width + start_column <= scaled_width);
> +
> +    /* skip the rows at the beginning */
> +    for (dest_y = 0; dest_y < start_row; dest_y++)
> +    {
> +        int box = 1 << FIXED_SHIFT;
> +        int start_coverage_y = y_coverage[dest_y];
> +        box -= start_coverage_y;
> +        src_y++;
> +        while (box >= pixel_coverage_y)
> +        {
> +            box -= pixel_coverage_y;
> +            src_y++;
> +        }
> +    }
> +
> +    for (; dest_y < start_row + height; dest_y++)
> +    {
> +        int columns = 0;
> +        int box = 1 << FIXED_SHIFT;
> +        int start_coverage_y = y_coverage[dest_y];
> +
> +        fetch_scanline (closure, src_y, &scanline);
> +        downsample_row_box_filter (start_column, width, scanline, temp_buf +
> width * columns, x_coverage, pixel_coverage_x);
> +        columns++;
> +        src_y++;
> +        box -= start_coverage_y;
> +
> +        while (box >= pixel_coverage_y)
> +        {
> +            fetch_scanline (closure, src_y, &scanline);
> +            downsample_row_box_filter (start_column, width, scanline, temp_buf
> + width * columns, x_coverage, pixel_coverage_x);
> +            columns++;
> +            src_y++;
> +            box -= pixel_coverage_y;
> +        }
> +
> +        /* downsample any leftovers */
> +        if (box > 0)
> +        {
> +            fetch_scanline (closure, src_y, &scanline);
> +            downsample_row_box_filter (start_column, width, scanline, temp_buf
> + width * columns, x_coverage, pixel_coverage_x);
> +            columns++;
> +        }
> +
> +        /* now scale the rows we just downsampled in the y direction */
> +        downsample_columns_box_filter (width, start_coverage_y,
> pixel_coverage_y, temp_buf, dest);
> +        dest += dst_stride/4;
> +
> +        assert(width*columns <= ((orig_height +
> scaled_height-1)/scaled_height+1) * width);
> +    }
> +
> +    assert (src_y<=orig_height);
> +
> +    fetcher->fini (closure, scanline);
> +    free (x_coverage);
> +    free (y_coverage);
> +    free (temp_buf);
> +
> +    return 0;
> +}
> diff --git a/pixman/pixman-rescale-integer-box.c
> b/pixman/pixman-rescale-integer-box.c
> new file mode 100644
> index 0000000..5142705
> --- /dev/null
> +++ b/pixman/pixman-rescale-integer-box.c
> @@ -0,0 +1,271 @@
> +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
> +/*
> + * Copyright © 2009 Mozilla Corporation
> + *
> + * Permission to use, copy, modify, distribute, and sell this software and its
> + * documentation for any purpose is hereby granted without fee, provided that
> + * the above copyright notice appear in all copies and that both that
> + * copyright notice and this permission notice appear in supporting
> + * documentation, and that the name of Mozilla Corporation not be used in
> + * advertising or publicity pertaining to distribution of the software without
> + * specific, written prior permission.  Mozilla Corporation makes no
> + * representations about the suitability of this software for any purpose.  It
> + * is provided "as is" without express or implied warranty.
> + *
> + * MOZILLA CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
> + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
> + * SHALL MOZILLA CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
> + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
> + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
> + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
> + * OF THIS SOFTWARE.
> + *
> + * Author: Jeff Muizelaar, Mozilla Corp.
> + */
> +
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <stdio.h>
> +#include <assert.h>
> +#include <stdlib.h>
> +#include "pixman-private.h"
> +#include "pixman-rescale.h"
> +
> +/*
> +When box filtering with an integer sized box we only ever have two box sizes.
> +Proof:
> +    Assume w1 > w2.
> +    r = w1/w2
> +
> +    The size of the two boxes is
> +    r1 = ceil(r), r2 = floor(r)
> +
> +    we want to find non-negative integers p and q such that:
> +    w1 = p*r1 + q*r2
> +
> +    if r1 == r2 then
> +        r is an integer and thus w2 evenly divides w1
> +        therefor p = w2 and q = 0
> +    otherwise r1 != r2 and
> +        r1 = r2 + 1
> +        w1 = p*(r2 + 1) + q * r2
> +           = p*r2 + q*r2 + p
> +           = (p + q)*r2 + p
> +
> +        we then choose a value of p such that:
> +            w1 = r2*w2 + p
> +            thus p = w1 mod w2
> +            and  q = w2 - p which is > 0 because
> +                            p = w1 mod w2
> +
> +            subing into:
> +            (p + q)*r2 + p
> +            gives:
> +            w1 = (p + w2 - p)*r2 + p
> +            w1 = w2*r2 + w1 mod w2
> +
> +*/
> +#define AVOID_DIVIDE
> +
> +/* downsample_row_box_filter is a Digital Differential Analyzer (like
> Bresenham's line algorithm)
> + * 'e' is the current fraction or error
> + */
> +void downsample_row_box_filter(
> +        int n,
> +        uint32_t *src, uint32_t *dest,
> +                long dx2, long e, long src_dx)
> +{
> +    /* the filter sums only two different counts of samples.
> +     * we compute the inverses for these two different counts
> +     * so that we can do multiplications instead of divisions */
> +    int div_inv[2];
> +    int div = src_dx/dx2;
> +    /* we can index on the bottom bit of the divisor because the two different
> counts
> +     * are +1 */
> +    div_inv[div & 1] = (1<<24)/div;
> +    div += 1;
> +    div_inv[div & 1] = (1<<24)/div;
> +
> +    while (n--) {
> +        uint32_t a, r, g, b;
> +        int div;
> +        int count = 1;
> +
> +        /* we always have at least one source pixel per destination pixel
> +         * because we are downscaling */
> +        a = (*src >> 24) & 0xff;
> +        r = (*src >> 16) & 0xff;
> +        g = (*src >>  8) & 0xff;
> +        b = (*src >>  0) & 0xff;
> +        e -= dx2;
> +        src++;
> +
> +        /* accumulate the rest of the source pixels */
> +        while (e > 0) {
> +            e -= dx2;
> +            a += (*src >> 24) & 0xff;
> +            r += (*src >> 16) & 0xff;
> +            g += (*src >>  8) & 0xff;
> +            b += (*src >>  0) & 0xff;
> +            count++;
> +            src++;
> +        }
> +
> +#ifdef AVOID_DIVIDE
> +        div = div_inv[count & 1];
> +
> +        a = (a * div + 0x10000) >> 24;
> +        r = (r * div + 0x10000) >> 24;
> +        g = (g * div + 0x10000) >> 24;
> +        b = (b * div + 0x10000) >> 24;
> +#else
> +        a /= count;
> +        r /= count;
> +        g /= count;
> +        b /= count;
> +#endif
> +
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +
> +        e += src_dx;
> +    }
> +}
> +
> +void downsample_columns_box_filter(
> +        int n,
> +        int columns,
> +        uint32_t *src, uint32_t *dest)
> +{
> +    int stride = n;
> +    int div = (1<<24)/columns;
> +    while (n--)
> +    {
> +        uint32_t a, r, g, b;
> +        uint32_t *column_src = src;
> +        int h;
> +
> +        a = (*column_src >> 24) & 0xff;
> +        r = (*column_src >> 16) & 0xff;
> +        g = (*column_src >>  8) & 0xff;
> +        b = (*column_src >>  0) & 0xff;
> +
> +        for (h = 1; h < columns; h++)
> +        {
> +            a += (*column_src >> 24) & 0xff;
> +            r += (*column_src >> 16) & 0xff;
> +            g += (*column_src >>  8) & 0xff;
> +            b += (*column_src >>  0) & 0xff;
> +            column_src += stride;
> +        }
> +
> +#ifdef AVOID_DIVIDE
> +        a = (a * div + 0x10000) >> 24;
> +        r = (r * div + 0x10000) >> 24;
> +        g = (g * div + 0x10000) >> 24;
> +        b = (b * div + 0x10000) >> 24;
> +#else
> +        a /= columns;
> +        r /= columns;
> +        g /= columns ;
> +        b /= columns;
> +#endif
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +        src++;
> +    }
> +}
> +
> +#define ROUND
> +
> +PIXMAN_EXPORT
> +int downscale_integer_box_filter (const struct rescale_fetcher *fetcher, void
> *closure, unsigned orig_width, unsigned orig_height,
> +        signed scaled_width, signed scaled_height,
> +        uint16_t start_column, uint16_t start_row,
> +        uint16_t width, uint16_t height,
> +        uint32_t *dest, int dst_stride)
> +{
> +    uint32_t *temp_buf;
> +    uint32_t *scanline;
> +    int src_offset = 0;
> +    int src_dy, e, dx2;
> +    int dest_dx, src_dx, ex, dx2x;
> +    int y = 0;
> +    int d;
> +
> +    /* setup our parameters for runing our DDA in the y direction */
> +    /* multiplying by 2 is for rounding purposes. i.e. to move the center */
> +    src_dy = orig_height * 2; /* dy *= 2 */
> +    e = (orig_height*2) - scaled_height; /* e = dy*2 - dx */
> +    dx2 = scaled_height*2; /* dx2 = dx*2 */
> +
> +    /* setup our parameters for doing DDA in the x direction */
> +    dest_dx = scaled_width;
> +    src_dx = orig_width;
> +    ex = (src_dx*2) - dest_dx;
> +    dx2x = dest_dx*2;
> +    src_dx *= 2;
> +
> +    /* Compute the src_offset and associated error value:
> +     * There are two ways to do this: */
> +    /* 1. Directly
> +    int nsrc_offset = (src_dx*startColumn + dest_dx)/(dest_dx*2);
> +    long nex = (src_dx*startColumn + dest_dx)%(dest_dx*2) - dest_dx*2 +
> src_dx; */
> +    /* 2. Iteratively */
> +    for (d = 0; d < start_column; d++) {
> +        while (ex > 0) {
> +            src_offset++;
> +            ex -= dx2x;
> +        }
> +        ex += src_dx;
> +    }
> +
> +    /* seek to the begining */
> +    for (d = 0; d < start_row; d++) {
> +        while (e > 0) {
> +            e -= dx2;
> +            y++;
> +        }
> +        e += src_dy;
> +    }
> +
> +    /* we need to allocate enough room for ceil(src_height/dest_height)
> scanlines */
> +    temp_buf = pixman_malloc_abc ((orig_height +
> scaled_height-1)/scaled_height, scaled_width, sizeof(uint32_t));
> +
> +    scanline = fetcher->init (closure);
> +
> +    if (!scanline || !temp_buf) {
> +        return -1;
> +    }
> +
> +    for (d = start_row; d < start_row + height; d++)
> +    {
> +        int columns = 0;
> +        while (e > 0)
> +        {
> +            fetcher->fetch_scanline (closure, y, &scanline);
> +
> +            /* XXX: We could turn this multiplication into addition. It
> +             * probably doesn't matter though  */
> +            downsample_row_box_filter (width, scanline + src_offset, temp_buf
> + width * columns,
> +                                       dx2x, ex, src_dx);
> +
> +            e -= dx2;
> +            columns++;
> +            y++;
> +        }
> +
> +        downsample_columns_box_filter (width, columns, temp_buf, dest);
> +
> +        dest += dst_stride/4;
> +        e += src_dy;
> +    }
> +
> +    fetcher->fini (closure, scanline);
> +    free (temp_buf);
> +
> +    return 0;
> +}
> +
> diff --git a/pixman/pixman-rescale-lanczos.c b/pixman/pixman-rescale-lanczos.c
> new file mode 100644
> index 0000000..864fd6d
> --- /dev/null
> +++ b/pixman/pixman-rescale-lanczos.c
> @@ -0,0 +1,483 @@
> +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
> +/*
> + * Copyright © 2009 Mozilla Corporation
> + *
> + * Permission to use, copy, modify, distribute, and sell this software and its
> + * documentation for any purpose is hereby granted without fee, provided that
> + * the above copyright notice appear in all copies and that both that
> + * copyright notice and this permission notice appear in supporting
> + * documentation, and that the name of Mozilla Corporation not be used in
> + * advertising or publicity pertaining to distribution of the software without
> + * specific, written prior permission.  Mozilla Corporation makes no
> + * representations about the suitability of this software for any purpose.  It
> + * is provided "as is" without express or implied warranty.
> + *
> + * MOZILLA CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
> + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
> + * SHALL MOZILLA CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
> + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
> + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
> + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
> + * OF THIS SOFTWARE.
> + *
> + * Author: Jeff Muizelaar, Mozilla Corp.
> + *
> + * Portions derived from Chromium:
> + *
> + * Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions are
> + * met:
> + *
> + *    * Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + *    * Redistributions in binary form must reproduce the above
> + * copyright notice, this list of conditions and the following disclaimer
> + * in the documentation and/or other materials provided with the
> + * distribution.
> + *    * Neither the name of Google Inc. nor the names of its
> + * contributors may be used to endorse or promote products derived from
> + * this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <assert.h>
> +#include <stdlib.h>
> +#include <float.h>
> +#include <math.h>
> +#include "pixman-private.h"
> +#include "pixman-rescale.h"
> +
> +
> +#define FILTER_SHIFT 14
> +#define LANCZOS_LOBES 2
> +
> +/* contains the filter for each destination pixel */
> +struct filter {
> +    int count; /* filter size */
> +    int16_t *values; /* filter coefficients */
> +    int offset; /* filter start offset */
> +};
> +
> +static int clamp(int a)
> +{
> +    if (a > 255)
> +        return 255;
> +    if (a < 0)
> +        return 0;
> +    return a;
> +}
> +
> +/* from ffmpeg */
> +static inline uint8_t clip_uint8(int a)
> +{
> +    if (a&(~255)) /* check if any of the high bits are set */
> +        return (-a)>>31; /* clip to either 0 or 255 */
> +    else
> +        return a;
> +}
> +
> +#if 0
> +#include <pmmintrin.h>
> +#include <emmintrin.h>
> +static void
> +downsample_row_convolve_sse2(int n,
> +        uint32_t *src, uint32_t *dest,
> +                const struct filter *filter, int src_size)
> +{
> +    int pixel = 0;
> +    while (n--) {
> +        int i;
> +        __m128i accum = _mm_setzero_si128 ();
> +        __m128i zero  = _mm_setzero_si128 ();
> +        for (i=0; i<filter[pixel].count; i++) {
> +            __m128i v_src = _mm_cvtsi32_si128 (src[filter[pixel].offset + i]);
> +            v_src = _mm_unpacklo_epi16 (_mm_unpacklo_epi8 (v_src, zero), zero);
> +
> +            __m128i coeff = _mm_cvtsi32_si128 (filter[pixel].values[i]);
> +            /* duplicate the filter coefficient */
> +            coeff = _mm_shuffle_epi32 (coeff, _MM_SHUFFLE (0, 0, 0, 0));
> +
> +            /* multiply and accumulate the result:
> +             * 0000vvvv_0000vvvv_0000vvvv_0000vvvv *
> 000000aa_000000rr_000000gg_000000bb */
> +            __m128i result = _mm_madd_epi16 (v_src, coeff);
> +            accum = _mm_add_epi32 (accum, result);
> +        }
> +
> +        /* scale the accumulator down */
> +        accum = _mm_srai_epi32 (accum, FILTER_SHIFT);
> +
> +        /* pack 000000aa_000000rr_000000gg_000000bb ->
> 00000000_00000000_00000000_aarrggbb */
> +        accum = _mm_packs_epi32 (accum, accum);
> +
> +        //XXX: this should be need to saturate properly but doesn't seem to
> make a difference
> +        accum = _mm_max_epi16 (accum, zero);
> +
> +        accum = _mm_packus_epi16 (accum, accum);
> +
> +        *dest = _mm_cvtsi128_si32 (accum);
> +        dest++;
> +        pixel++;
> +    }
> +}
> +#endif
> +
> +static void
> +downsample_row_convolve(int n,
> +        uint32_t *src, uint32_t *dest,
> +                struct filter *filter, int src_size)
> +{
> +    int pixel = 0;
> +    while (n--)
> +    {
> +        int32_t a = 0;
> +        int32_t r = 0;
> +        int32_t g = 0;
> +        int32_t b = 0;
> +        int i;
> +
> +        for (i=0; i<filter[pixel].count; i++) {
> +            a += ((src[filter[pixel].offset + i] >> 24) & 0xff) *
> filter[pixel].values[i];
> +            r += ((src[filter[pixel].offset + i] >> 16) & 0xff) *
> filter[pixel].values[i];
> +            g += ((src[filter[pixel].offset + i] >>  8) & 0xff) *
> filter[pixel].values[i];
> +            b += ((src[filter[pixel].offset + i] >>  0) & 0xff) *
> filter[pixel].values[i];
> +        }
> +
> +        a >>= FILTER_SHIFT;
> +        r >>= FILTER_SHIFT;
> +        g >>= FILTER_SHIFT;
> +        b >>= FILTER_SHIFT;
> +
> +        a = clamp (a);
> +        r = clamp (r);
> +        g = clamp (g);
> +        b = clamp (b);
> +
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +        pixel++;
> +    }
> +}
> +
> +/* src1 and src2 are two contiguous regions of samples. We need two to handle
> + * the ring buffer that is used by the caller
> + *
> + * instead of src1, src1_length, src2, filter_length we could pass:
> + * ring_buf, start_index, ring_buf_length, filter_length,
> + * this is implicitly coded in the parameters we do pass:
> + * src1 = ring_buf + start_index
> + * src1_length = min(ring_buf_length - start_index, filter_length)
> + * src2 = ring_buf */
> +static void
> +downsample_columns_convolve(int n,
> +        uint32_t *src1, int src1_length,
> +        uint32_t *src2,
> +        uint32_t *dest,
> +        int16_t *filter, int filter_length)
> +{
> +    int stride = n;
> +    while (n--)
> +    {
> +        int32_t a = 0;
> +        int32_t r = 0;
> +        int32_t g = 0;
> +        int32_t b = 0;
> +        int i;
> +        uint32_t *column_src;
> +
> +        /* we do the accumulation in two steps because the source lines are in
> a ring buffer
> +         * so won't be contiguous */
> +        column_src = src1;
> +        for (i=0; i<src1_length; i++) /* loop till the end of the ring buffer
> */
> +        {
> +            a += ((*column_src >> 24) & 0xff) * filter[i];
> +            r += ((*column_src >> 16) & 0xff) * filter[i];
> +            g += ((*column_src >>  8) & 0xff) * filter[i];
> +            b += ((*column_src >>  0) & 0xff) * filter[i];
> +            column_src += stride;
> +        }
> +
> +        /* accumulate the remaining samples starting at the begining of the
> ring buffer */
> +        column_src = src2;
> +        for (; i<filter_length; i++)
> +        {
> +            a += ((*column_src >> 24) & 0xff) * filter[i];
> +            r += ((*column_src >> 16) & 0xff) * filter[i];
> +            g += ((*column_src >>  8) & 0xff) * filter[i];
> +            b += ((*column_src >>  0) & 0xff) * filter[i];
> +            column_src += stride;
> +        }
> +
> +        a >>= FILTER_SHIFT;
> +        r >>= FILTER_SHIFT;
> +        g >>= FILTER_SHIFT;
> +        b >>= FILTER_SHIFT;
> +
> +        a = clamp (a);
> +        r = clamp (r);
> +        g = clamp (g);
> +        b = clamp (b);
> +
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +        src1++;
> +        src2++;
> +    }
> +}
> +
> +
> +/* Evaluates the Lanczos filter of the given filter size window for the given
> + * position.
> + *
> + * |filter_size| is the width of the filter (the "window"), outside of which
> + * the value of the function is 0. Inside of the window, the value is the
> + * normalized sinc function:
> + *   lanczos(x) = sinc(x) * sinc(x / filter_size);
> + * where
> + *   sinc(x) = sin(pi*x) / (pi*x);
> + */
> +float eval_lanczos(int filter_size, float x)
> +{
> +  if (x <= -filter_size || x >= filter_size)
> +    return 0.0f;  /* Outside of the window. */
> +  if (x > -FLT_EPSILON &&
> +      x < FLT_EPSILON)
> +    return 1.0f;  /* Special case the discontinuity at the origin. */
> +  else {
> +      float xpi = x * (M_PI);
> +      return (sin(xpi) / xpi) *  /* sinc(x) */
> +          sin(xpi / filter_size) / (xpi / filter_size);  /*
> sinc(x/filter_size) */
> +  }
> +}
> +
> +/* dealing with the edges:
> +   some options:
> +   we could always have approximately the same number of samples in the filter
> and just pad the image out
> +   chromium seems to truncate the filter though...
> +   I don't really have a good reason to choose either approach...
> +   one way to get an idea is to see what other implementation do.
> +   - it looks like quartz pads
> +   - chromium truncates the filter
> +   - opera pads
> +*/
> +
> +int floor_int(float a)
> +{
> +    return floor(a);
> +}
> +
> +int ceil_int(float a)
> +{
> +    return ceil(a);
> +}
> +
> +int float_to_fixed(float a)
> +{
> +    return a * (1 << FILTER_SHIFT);
> +}
> +
> +/* this method does not do source clipping
> + * but will do dest clipping */
> +struct filter *compute_lanczos_filter(int dest_subset_lo, int
> dest_subset_size, int src_size, float scale)
> +{
> +    /* this is half the number of pixels that the filter uses in filter space
> */
> +    int dest_support = LANCZOS_LOBES;
> +    float src_support = dest_support / scale;
> +
> +    /* we need to compute a set of filters for each pixel */
> +    /* filter width */
> +    int i;
> +    struct filter *filter = malloc (dest_subset_size * sizeof(struct filter));
> +    int max_filter_size = ceil_int (src_support * 2) - floor_int (src_support
> * -2) + 1;
> +    float *filter_values = malloc (max_filter_size * sizeof(float)); /* this
> is a good candidate for stack allocation */
> +    int dest_subset_hi = dest_subset_lo + dest_subset_size; /* [lo, hi) */
> +
> +
> +    /* When we're doing a magnification, the scale will be larger than one.
> This
> +     * means the destination pixels are much smaller than the source pixels,
> and
> +     * that the range covered by the filter won't necessarily cover any source
> +     * pixel boundaries. Therefore, we use these clamped values (max of 1) for
> +     * some computations.
> +     */
> +    float clamped_scale = MIN (1., scale);
> +
> +    /* Speed up the divisions below by turning them into multiplies. */
> +    float inv_scale = 1. / scale;
> +
> +    /* Loop over all pixels in the output range. We will generate one set of
> +     * filter values for each one. Those values will tell us how to blend the
> +     * source pixels to compute the destination pixel.
> +     */
> +    int dest_subset_i;
> +    int pixel = 0;
> +    for (dest_subset_i = dest_subset_lo; dest_subset_i < dest_subset_hi;
> dest_subset_i++, pixel++) {
> +
> +        float src_pixel = dest_subset_i * inv_scale;
> +
> +        int src_begin = MAX (0, floor_int (src_pixel - src_support));
> +        int src_end = MIN (src_size - 1, ceil_int (src_pixel + src_support));
> +
> +        /* Compute the unnormalized filter value at each location of the source
> +         * it covers. */
> +        float filter_sum = 0.; /* sum of the filter values for normalizing */
> +        int filter_size = 0;
> +        int cur_filter_pixel;
> +        int j = 0;
> +
> +        assert(src_begin >= 0);
> +
> +        for (cur_filter_pixel = src_begin; cur_filter_pixel <= src_end;
> +                cur_filter_pixel++)
> +        {
> +            /* Distance from the center of the filter, this is the filter
> coordinate
> +             * in source space. */
> +            float src_filter_pos = cur_filter_pixel - src_pixel;
> +
> +            /* Since the filter really exists in dest space, map it there. */
> +            float dest_filter_pos = src_filter_pos * clamped_scale;
> +
> +            /* Compute the filter value at that location. */
> +            float filter_value = eval_lanczos(LANCZOS_LOBES, dest_filter_pos);
> +            filter_sum += filter_value;
> +            filter_values[j] = filter_value;
> +
> +            filter_size++;
> +            j++;
> +        }
> +
> +        assert(src_end >= src_begin);
> +        assert(filter_size <= max_filter_size);
> +
> +        {
> +            int16_t leftovers;
> +            /* XXX: eventually we should avoid doing malloc here and just
> malloc a big
> +             * chunk of max_filter_size * sizeof(int16_t) */
> +            int16_t *fixed_filter_values = malloc (filter_size *
> sizeof(int16_t));
> +
> +            /* the filter must be normalized so that we don't affect the
> brightness of
> +             * the image. Convert to normalized fixed point */
> +            /* XXX: It might be better if we didn't have to do this in a
> separate pass */
> +            int16_t fixed_sum = 0; /* XXX: should we use a regular int here? */
> +            for (i=0; i<filter_size; i++)
> +            {
> +                int16_t cur_fixed = float_to_fixed(filter_values[i] /
> filter_sum);
> +                fixed_sum += cur_fixed;
> +                fixed_filter_values[i] = cur_fixed;
> +            }
> +
> +            /* The conversion to fixed point will leave some rounding errors,
> which
> +             * we add back in to avoid affecting the brightness of the image.
> We
> +             * arbitrarily add this to the center of the filter array (this
> won't always
> +             * be the center of the filter function since it could get clipped
> on the
> +             * edges, but it doesn't matter enough to worry about that case).
> */
> +            leftovers = float_to_fixed(1.0f) - fixed_sum;
> +            fixed_filter_values[filter_size / 2] += leftovers;
> +
> +            filter[pixel].values = fixed_filter_values;
> +            filter[pixel].count = filter_size;
> +            filter[pixel].offset = src_begin;
> +            assert (filter[pixel].offset >= 0);
> +            assert (filter[pixel].offset + filter[pixel].count - 1 < src_size);
> +        }
> +    }
> +
> +    free (filter_values);
> +    return filter;
> +}
> +
> +/* start_column and start_row are in destination space */
> +PIXMAN_EXPORT
> +int downscale_lanczos_filter(const struct rescale_fetcher *fetcher, void
> *closure, unsigned orig_width, unsigned orig_height,
> +        signed scaled_width, signed scaled_height,
> +        uint16_t start_column, uint16_t start_row,
> +        uint16_t width, uint16_t height,
> +        uint32_t *dest, int dst_stride)
> +{
> +    struct filter *x_filter, *y_filter;
> +    int filter_index;
> +    int src_index = 0;
> +    int i;
> +
> +    /* XXX: this duplicates code in compute_lanczos_filter */
> +    int dest_support = LANCZOS_LOBES;
> +    float src_support = dest_support / ((float)scaled_height/orig_height);
> +
> +    int max_filter_size = ceil_int (src_support * 2) - floor_int (src_support
> * -2) + 1;
> +
> +    /* we use a ring buffer to store the downsampled rows because we want to
> +     * reuse the downsampled rows across different destination pixels */
> +    int ring_buf_size = max_filter_size;
> +    uint32_t *ring_buf = pixman_malloc_abc (ring_buf_size, scaled_width,
> sizeof(uint32_t));
> +
> +    uint32_t *scanline = fetcher->init (closure);
> +    void (*fetch_scanline)(void *closure, int y, uint32_t **scanline) =
> fetcher->fetch_scanline;
> +
> +    assert (start_column + width  <= scaled_width);
> +    assert (start_row    + height <= scaled_height);
> +
> +    x_filter = compute_lanczos_filter (start_column, width,  orig_width, 
> (float)scaled_width/orig_width);
> +    y_filter = compute_lanczos_filter (start_row,    height, orig_height,
> (float)scaled_height/orig_height);
> +
> +    if (!ring_buf || !scanline || !x_filter || !y_filter)
> +        return -1;
> +
> +    for (filter_index = 0; filter_index < height; filter_index++)
> +    {
> +        int filter_size   = y_filter[filter_index].count;
> +        int filter_offset = y_filter[filter_index].offset;
> +        int src1_index, src1_size;
> +        uint32_t *src1;
> +        assert(filter_offset >= 0);
> +
> +        /* read and downsample the rows needed to downsample the next column */
> +        while (src_index < filter_offset + filter_size)
> +        {
> +            fetch_scanline (closure, src_index, &scanline);
> +            /* downsample_row_convolve_sse2 (width, scanline, ring_buf + width
> * (src_index % ring_buf_size), x_filter, orig_width); */
> +            downsample_row_convolve(width, scanline, ring_buf + width *
> (src_index % ring_buf_size), x_filter, orig_width);
> +            src_index++;
> +        }
> +
> +        src1_index = filter_offset % ring_buf_size;
> +        assert (src1_index >= 0);
> +        assert (filter_size <= ring_buf_size);
> +        src1 = ring_buf + width * src1_index;
> +
> +        src1_size = MIN (ring_buf_size - src1_index, filter_size);
> +        assert (src1_size >= 0);
> +        assert (filter_size >= src1_size);
> +
> +        downsample_columns_convolve (width, src1, src1_size, ring_buf, dest,
> y_filter[filter_index].values, y_filter[filter_index].count);
> +        dest += dst_stride/4;
> +    }
> +
> +    free (ring_buf);
> +    fetcher->fini (closure, scanline);
> +
> +    for (i=0; i<width; i++)
> +        free (x_filter[i].values);
> +    for (i=0; i<height; i++)
> +        free (y_filter[i].values);
> +    free (x_filter);
> +    free (y_filter);
> +
> +    return 0;
> +}
> diff --git a/pixman/pixman-rescale-mult-old.c b/pixman/pixman-rescale-mult-old.c
> new file mode 100644
> index 0000000..861d9e7
> --- /dev/null
> +++ b/pixman/pixman-rescale-mult-old.c
> @@ -0,0 +1,290 @@
> +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
> +/*
> + * Copyright © 2009 Mozilla Corporation
> + *
> + * Permission to use, copy, modify, distribute, and sell this software and its
> + * documentation for any purpose is hereby granted without fee, provided that
> + * the above copyright notice appear in all copies and that both that
> + * copyright notice and this permission notice appear in supporting
> + * documentation, and that the name of Mozilla Corporation not be used in
> + * advertising or publicity pertaining to distribution of the software without
> + * specific, written prior permission.  Mozilla Corporation makes no
> + * representations about the suitability of this software for any purpose.  It
> + * is provided "as is" without express or implied warranty.
> + *
> + * MOZILLA CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
> + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
> + * SHALL MOZILLA CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
> + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
> + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
> + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
> + * OF THIS SOFTWARE.
> + *
> + * Author: Jeff Muizelaar, Mozilla Corp.
> + */
> +
> +#if 0
> +
> +/* This implements a box filter that supports non-integer box sizes */
> +
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <assert.h>
> +#include <stdlib.h>
> +#include "pixman-private.h"
> +#include "pixman-rescale.h"
> +#define abs(a) (a) > 0 ? (a) : -(a)
> +#define sign(x) ((x)>0 ? 1:-1)
> +
> +static void fetch_scanline(void *closure, int y, uint32_t *scanline)
> +{
> +    pixman_image_t *pict = closure;
> +    bits_image_t *bits = &pict->bits;
> +    if (y > pict->bits.height-1)
> +        y = pict->bits.height-1;
> +    if (!pixman_image_has_source_clipping(pict)) {
> +        fetchProc32 fetch = ACCESS(pixman_fetchProcForPicture32)(bits);
> +        fetch(bits, 0, y, pict->bits.width, scanline);
> +    } else {
> +        int x;
> +        fetchPixelProc32 fetch =
> ACCESS(pixman_fetchPixelProcForPicture32)(bits);
> +        for (x=0; x<bits->width; x++) {
> +            if (pixman_region32_contains_point (bits->common.src_clip, x,
> y,NULL))
> +                scanline[x] = fetch (bits, x, y);
> +            else
> +                scanline[x] = 0;
> +        }
> +    }
> +}
> +
> +/* we work in fixed point where 1. == 1 << 24 */
> +#define FIXED_SHIFT 24
> +
> +static void downsample_row_box_filter(
> +        int n,
> +        uint32_t *src, uint32_t *dest,
> +                int coverage[], int pixel_coverage)
> +{
> +    /* we need an array of the pixel contribution of each destination pixel on
> the boundaries.
> +     * we invert the value to get the value on the other size of the box */
> +    /*
> +
> +       value  = a * contribution * 1/box_size
> +       value += a * 1/box_size
> +       value += a * 1/box_size
> +       value += a * 1/box_size
> +       value += a * (1 - contribution) * 1/box_size
> +                a * (1/box_size - contribution * 1/box_size)
> +
> +        box size is constant
> +
> +
> +       value = a * contribtion_a * 1/box_size + b * contribution_b * 1/box_size
> +               contribution_b = (1 - contribution_a)
> +                              = (1 - contribution_a_next)
> +    */
> +
> +    /* box size = ceil(src_width/dest_width) */
> +
> +    int x = 0;
> +    while (n--) {
> +        uint32_t a = 0;
> +        uint32_t r = 0;
> +        uint32_t g = 0;
> +        uint32_t b = 0;
> +        int box = 1 << FIXED_SHIFT;
> +        int start_coverage = coverage[x];
> +        a = ((*src >> 24) & 0xff) * start_coverage;
> +        r = ((*src >> 16) & 0xff) * start_coverage;
> +        g = ((*src >>  8) & 0xff) * start_coverage;
> +        b = ((*src >>  0) & 0xff) * start_coverage;
> +        src++;
> +        x++;
> +        box -= start_coverage;
> +        while (box >= pixel_coverage) {
> +            a += ((*src >> 24) & 0xff) * pixel_coverage;
> +            r += ((*src >> 16) & 0xff) * pixel_coverage;
> +            g += ((*src >>  8) & 0xff) * pixel_coverage;
> +            b += ((*src >>  0) & 0xff) * pixel_coverage;
> +            src++;
> +            x++;
> +
> +            box -= pixel_coverage;
> +        }
> +        /* multiply by whatever is leftover
> +         * this ensures that we don't bias down.
> +         * i.e. start_coverage + n*pixel_coverage + box == 1 << 24 */
> +        if (box > 0) {
> +            a += ((*src >> 24) & 0xff) * box;
> +            r += ((*src >> 16) & 0xff) * box;
> +            g += ((*src >>  8) & 0xff) * box;
> +            b += ((*src >>  0) & 0xff) * box;
> +        }
> +
> +        a >>= FIXED_SHIFT;
> +        r >>= FIXED_SHIFT;
> +        g >>= FIXED_SHIFT;
> +        b >>= FIXED_SHIFT;
> +
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +    }
> +}
> +
> +static void downsample_columns_box_filter(
> +        int n,
> +        int start_coverage,
> +        int pixel_coverage,
> +        uint32_t *src, uint32_t *dest)
> +{
> +    int stride = n;
> +    while (n--) {
> +        uint32_t a = 0;
> +        uint32_t r = 0;
> +        uint32_t g = 0;
> +        uint32_t b = 0;
> +        uint32_t *column_src = src;
> +        int box = 1 << FIXED_SHIFT;
> +        a = ((*column_src >> 24) & 0xff) * start_coverage;
> +        r = ((*column_src >> 16) & 0xff) * start_coverage;
> +        g = ((*column_src >>  8) & 0xff) * start_coverage;
> +        b = ((*column_src >>  0) & 0xff) * start_coverage;
> +        column_src += stride;
> +        box -= start_coverage;
> +        while (box >= pixel_coverage) {
> +            a += ((*column_src >> 24) & 0xff) * pixel_coverage;
> +            r += ((*column_src >> 16) & 0xff) * pixel_coverage;
> +            g += ((*column_src >>  8) & 0xff) * pixel_coverage;
> +            b += ((*column_src >>  0) & 0xff) * pixel_coverage;
> +            column_src += stride;
> +            box -= pixel_coverage;
> +        }
> +        if (box > 0) {
> +            a += ((*column_src >> 24) & 0xff) * box;
> +            r += ((*column_src >> 16) & 0xff) * box;
> +            g += ((*column_src >>  8) & 0xff) * box;
> +            b += ((*column_src >>  0) & 0xff) * box;
> +        }
> +        a >>= FIXED_SHIFT;
> +        r >>= FIXED_SHIFT;
> +        g >>= FIXED_SHIFT;
> +        b >>= FIXED_SHIFT;
> +
> +        *dest = (a << 24) | (r << 16) | (g << 8) | b;
> +        dest++;
> +        src++;
> +    }
> +}
> +
> +#if 1
> +int compute_coverage(int coverage[], int src_length, int dest_length) {
> +    int i;
> +    /* num = src_length/dest_length
> +       total = sum(pixel) / num
> +
> +       pixel * 1/num == pixel * dest_length / src_length
> +    */
> +    int full = 1<<24;
> +    /* the average contribution of each source pixel */
> +    int ratio = ((1 << 24)*(long long int)dest_length)/src_length;
> +    int finished = ((1 << 24)*(long long int)dest_length) - (((1 << 24)*(long
> long int)dest_length)/src_length)*src_length;
> +    int p=0;
> +    int p2;
> +    int remainder = full;
> +    /* because ((1 << 24)*(long long int)dest_length) won't always be
> divisible by src_length
> +     * we'll need someplace to put the other bits.
> +     *
> +     * We want to ensure a + n*ratio < 1<<24
> +     *
> +     * 1<<24
> +     * */
> +    for (i=0; i<src_length; i++) {
> +        if (remainder < ratio) {
> +            p = ratio - remainder;
> +            remainder = full - p;
> +        } else {
> +            p = ratio;
> +            remainder -= ratio;
> +        }
> +
> +        if ((((i+1)*ratio) % full) < ratio) {
> +            p2 = (((i+1)*ratio) % full);
> +        } else {
> +            p2 = ratio;
> +        }
> +
> +        //printf("%d %d %d %d %d\n", i, p, remainder, (i+1)*ratio, p2);
> +        //assert(p == p2);
> +
> +        coverage[i] = p;
> +    }
> +    //assert(remainder == 0);
> +    return ratio;
> +}
> +#endif
> +
> +
> +PIXMAN_EXPORT
> +int downscale_box_mult_old_filter(pixman_image_t *pict, unsigned origWidth,
> unsigned origHeight,
> +        signed scaledWidth, signed scaledHeight,
> +        uint16_t startColumn, uint16_t startRow,
> +        uint16_t width, uint16_t height,
> +        uint32_t *src, int srcStride,
> +        uint32_t *dest, int dstStride)
> +{
> +   // printf("%d %d %d %d\n", scaledWidth, scaledHeight, origWidth,
> origHeight);
> +        int yd1 = 0;
> +        int d;
> +
> +        /* we need to allocate enough room for ceil(src_height/dest_height)
> scanlines */
> +        uint32_t *temp_buf = pixman_malloc_abc ((origHeight +
> scaledHeight-1)/scaledHeight, scaledWidth, sizeof(uint32_t));
> +    if (!temp_buf)
> +            return -1;
> +
> +    //XXX: I suppose we should check whether this will succeed
> +        uint32_t *scanline = pixman_malloc_abc (origWidth, 1,
> sizeof(uint32_t));
> +
> +        int *x_coverage = pixman_malloc_abc (origWidth, 1, sizeof(int));
> +        int *y_coverage = pixman_malloc_abc (origHeight, 1, sizeof(int));
> +        int pixel_coverage_x = compute_coverage(x_coverage, origWidth,
> scaledWidth);
> +        int pixel_coverage_y = compute_coverage(y_coverage, origHeight,
> scaledHeight);
> +
> +        int y = 0;
> +        for (d = 0; d < startRow + height; d++)
> +    {
> +            int columns = 0;
> +            int box = 1 << FIXED_SHIFT;
> +            int start_coverage_y = y_coverage[y];
> +            fetch_scanline(pict, y, scanline);
> +            downsample_row_box_filter(width, scanline, temp_buf + width *
> columns,
> +                    x_coverage, pixel_coverage_x);
> +            columns++;
> +            y++;
> +            box -= start_coverage_y;
> +
> +            while (box >= pixel_coverage_y) {
> +                fetch_scanline(pict, y, scanline);
> +                downsample_row_box_filter(width, scanline, temp_buf + width *
> columns,
> +                    x_coverage, pixel_coverage_x);
> +                columns++;
> +                y++;
> +                box -= pixel_coverage_y;
> +            }
> +            if (box > 0) {
> +                fetch_scanline(pict, y, scanline);
> +                downsample_row_box_filter(width, scanline, temp_buf + width *
> columns,
> +                    x_coverage, pixel_coverage_x);
> +                columns++;
> +            }
> +            downsample_columns_box_filter(width, start_coverage_y,
> pixel_coverage_y, temp_buf, dest + (yd1 - startRow)*dstStride/4);
> +            yd1++;
> +        }
> +        free(temp_buf);
> +        return 0;
> +}
> +
> +#endif
> diff --git a/pixman/pixman-rescale.c b/pixman/pixman-rescale.c
> new file mode 100644
> index 0000000..20e2e7c
> --- /dev/null
> +++ b/pixman/pixman-rescale.c
> @@ -0,0 +1,124 @@
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <stdlib.h>
> +
> +#include "pixman-private.h"
> +#include "pixman-rescale.h"
> +
> +/* Fetcher documentation:
> + * Goals: we need a generic interface to fetch lines of ARGB32
> + * - we want to support converting to argb32
> + * - we also want to support having the rescaler read directly from the buffer
> + *   if the buffer is of a format compatible with the rescaler */
> +
> +static void fetch_convert(void *closure, int y, uint32_t **scanline_ptr)
> +{
> +    pixman_image_t *pict = closure;
> +    uint32_t *scanline = *scanline_ptr;
> +    pict->bits.fetch_scanline_raw_32(pict, 0, y, pict->bits.width, scanline,
> NULL, 0);
> +
> +}
> +
> +static uint32_t *fetch_convert_init(void *closure)
> +{
> +    pixman_image_t *pict = closure;
> +    return pixman_malloc_abc (pict->bits.width, 1, sizeof(uint32_t));
> +}
> +
> +static void fetch_convert_fini(void *closure, uint32_t *scanline)
> +{
> +    free(scanline);
> +}
> +
> +
> +static void fetch_raw(void *closure, int y, uint32_t **scanline)
> +{
> +    pixman_image_t *pict = closure;
> +    bits_image_t *bits = &pict->bits;
> +    /* set the scanline to the begining of the yth row */
> +    *scanline = bits->bits + y*bits->rowstride;
> +}
> +
> +static uint32_t *fetch_raw_init(void *closure)
> +{
> +    return closure;
> +}
> +
> +static void fetch_raw_fini(void *closure, uint32_t *scanline)
> +{
> +}
> +
> +const struct rescale_fetcher fetcher_convert = {
> +    fetch_convert,
> +    fetch_convert_init,
> +    fetch_convert_fini
> +};
> +
> +const struct rescale_fetcher fetcher_raw = {
> +    fetch_raw,
> +    fetch_raw_init,
> +    fetch_raw_fini
> +};
> +
> +/* downscale an pixman_image_t into an ARGB32 buffer
> + *
> + * scaled_width / original_width is the scale ratio
> + * using scaled_width instead of just passing in the ratio avoids any rounding
> errors because
> + * the ratio can be specified exactly
> + *
> + * start_column, start_row, width, height
> + * specify a rectangle in the scaled source space
> + *
> + * dest, dst_stride, width, and height allow specifying
> + * a rectangle in the destination buffer
> + */
> +PIXMAN_EXPORT
> +int downscale_image(pixman_image_t *pict, enum downscale_type type,
> +        int scaled_width, int scaled_height,
> +        int start_column, int start_row,
> +        int width, int height,
> +        uint32_t *dest, int dst_stride)
> +{
> +    const struct rescale_fetcher *fetcher;
> +    int orig_height = pict->bits.height;
> +    int orig_width  = pict->bits.width;
> +
> +    /* choose a fetcher */
> +    if (PIXMAN_FORMAT_BPP(pict->bits.format) == 32 &&
> +            PIXMAN_FORMAT_R(pict->bits.format) == 8 &&
> +            PIXMAN_FORMAT_G(pict->bits.format) == 8 &&
> +            PIXMAN_FORMAT_B(pict->bits.format) == 8)
> +    {
> +        fetcher = &fetcher_raw;
> +    } else
> +    {
> +        fetcher = &fetcher_convert;
> +    }
> +
> +    /* choose and run downscaler */
> +    switch (type)
> +    {
> +        case LANCZOS:
> +            return downscale_lanczos_filter(fetcher, pict, orig_width,
> orig_height,
> +                scaled_width, scaled_height,
> +                start_column, start_row,
> +                width, height,
> +                dest, dst_stride);
> +        case BOX:
> +            return downscale_box_filter(fetcher, pict, orig_width, orig_height,
> +                scaled_width, scaled_height,
> +                start_column, start_row,
> +                width, height,
> +                dest, dst_stride);
> +        case INTEGER_BOX:
> +            return downscale_integer_box_filter(fetcher, pict, orig_width,
> orig_height,
> +                scaled_width, scaled_height,
> +                start_column, start_row,
> +                width, height,
> +                dest, dst_stride);
> +        default:
> +            return -1;
> +    }
> +}
> diff --git a/pixman/pixman-rescale.h b/pixman/pixman-rescale.h
> new file mode 100644
> index 0000000..9c8cf73
> --- /dev/null
> +++ b/pixman/pixman-rescale.h
> @@ -0,0 +1,44 @@
> +#include "pixman-private.h"
> +struct rescale_fetcher
> +{
> +    void (*fetch_scanline)(void *closure, int y, uint32_t **scanline_ptr);
> +    uint32_t *(*init)(void *closure);
> +    void (*fini)(void *closure, uint32_t *scanline);
> +};
> +
> +enum downscale_type
> +{
> +    INTEGER_BOX,
> +    BOX,
> +    LANCZOS
> +};
> +
> +int downscale_image(pixman_image_t *image, enum downscale_type type,
> +        int scaled_width, int scaled_height,
> +        int start_column, int start_row,
> +        int width, int height,
> +        uint32_t *dest, int dst_stride);
> +
> +int downscale_integer_box_filter(
> +        const struct rescale_fetcher *fetcher, void *closure,
> +        unsigned orig_width, unsigned orig_height,
> +        signed scaledWidth, signed scaled_height,
> +        uint16_t start_column, uint16_t start_row,
> +        uint16_t width, uint16_t height,
> +        uint32_t *dest, int dst_stride);
> +
> +int downscale_box_filter(
> +        const struct rescale_fetcher *fetcher, void *closure,
> +        unsigned orig_width, unsigned orig_height,
> +        signed scaled_width, signed scaled_height,
> +        uint16_t start_column, uint16_t start_row,
> +        uint16_t width, uint16_t height,
> +        uint32_t *dest, int dst_stride);
> +
> +int downscale_lanczos_filter(
> +        const struct rescale_fetcher *fetcher, void *closure,
> +        unsigned orig_width, unsigned orig_height,
> +        signed scaledWidth, signed scaled_height,
> +        uint16_t start_column, uint16_t start_row,
> +        uint16_t width, uint16_t height,
> +        uint32_t *dest, int dst_stride);
-- 
Carlos Garcia Campos
PGP key: http://pgp.mit.edu:11371/pks/lookup?op=get&search=0x523E6462
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: not available
Url : http://lists.cairographics.org/archives/cairo/attachments/20091117/1adc406e/attachment-0001.pgp