[cairo] pixman box filtering code prototype

Tue Nov 4 14:37:51 PST 2008

On Fri, Oct 24, 2008 at 11:09:34AM -0400, Jeff Muizelaar wrote:
> On Tue, Oct 14, 2008 at 03:00:21PM -0400, Jeff Muizelaar wrote:
> > For the default case, I think a better approach is to do a rectangular
> > box filter down to the scaled size and then do bilinear sampling from
> > this downscaled version as necessary. I think this should be more
> > performant than my box filtering code and should give the desired
> > quality for any affine matrix...
> 
> I've attached a patch that has a rough implementation of this.

Here's an update. It should be basically correct at this time (although
it doesn't filter masks yet...). It still needs cleanup and
optimization.

-Jeff

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 339e01e..4aa5f6d 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -26,6 +26,7 @@ libpixman_1_la_SOURCES =		\
 	pixman-edge-imp.h		\
 	pixman-trap.c			\
 	pixman-compute-region.c		\
+	pixman-rescale-box.c		\
 	pixman-timer.c
 
 libpixmanincludedir = $(includedir)/pixman-1/
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index e80c479..3c7526d 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -518,6 +518,14 @@ pixman_image_set_source_clipping (pixman_image_t  *image,
 	common->src_clip = &common->full_region;
 }
 
+PIXMAN_EXPORT pixman_bool_t
+pixman_image_has_source_clipping (pixman_image_t *image)
+{
+    image_common_t *common = &image->common;
+
+    return common->src_clip != &common->full_region;
+}
+
 /* Unlike all the other property setters, this function does not
  * copy the content of indexed. Doing this copying is simply
  * way, way too expensive.
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 070b190..0a73c13 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -36,6 +36,7 @@
 #include "pixman-sse2.h"
 #include "pixman-arm.h"
 #include "pixman-combine32.h"
+#include "pixman-rescale-box.h"
 
 #ifdef __GNUC__
 #   define inline __inline__ __attribute__ ((__always_inline__))
@@ -1770,6 +1771,56 @@ pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *p
 
 }
 
+pixman_image_t *create_downscaled_image(pixman_image_t *img, int scaled_width, int scaled_height)
+{
+    pixman_image_t *new_src = pixman_image_create_bits (PIXMAN_a8r8g8b8,
+		            scaled_width, scaled_height, NULL, scaled_width * sizeof(uint32_t*) // XXX: should this come from someplace else
+            );
+    if (!new_src)
+        goto fail;
+
+    if (downscale_box_filter(img, img->bits.width, img->bits.height,
+		scaled_width, scaled_height,
+		0, 0,
+		scaled_width, scaled_height,
+		img->bits.bits, img->bits.rowstride*sizeof(uint32_t),
+		new_src->bits.bits, new_src->bits.rowstride*sizeof(uint32_t)))
+        goto resize_fail;
+
+    pixman_image_set_filter (new_src, img->common.filter,
+		    img->common.filter_params,
+		    img->common.n_filter_params);
+    pixman_transform_t new_transform = *(img->common.transform);
+//#define unscaled_debug
+#ifdef unscaled_debug
+    printf("%x %x %x, %x %x %x\n", 
+             new_transform.matrix[0][0],
+             new_transform.matrix[0][1],
+             new_transform.matrix[0][2],
+             new_transform.matrix[1][0],
+             new_transform.matrix[1][1],
+             new_transform.matrix[1][2]);
+#endif
+    pixman_fixed_t scale_x, scale_y;
+    pixman_extract_scale (&new_transform, &scale_x, &scale_y);
+    pixman_transform_unscale (&new_transform, scale_x, scale_y);
+#if unscaled_debug
+    printf("%x %x %x, %x %x %x\n", 
+             new_transform.matrix[0][0],
+             new_transform.matrix[0][1],
+             new_transform.matrix[0][2],
+             new_transform.matrix[1][0],
+             new_transform.matrix[1][1],
+             new_transform.matrix[1][2]);
+#endif
+    pixman_image_set_transform (new_src, &new_transform);
+    return new_src;
+
+resize_fail:
+    pixman_image_unref(new_src);
+fail:
+    return NULL;
+}
 #if defined(USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
 
 /*
@@ -1788,6 +1839,7 @@ pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *p
  * See https://bugs.freedesktop.org/show_bug.cgi?id=15693
  */
 
+
 __attribute__((__force_align_arg_pointer__))
 #endif
 PIXMAN_EXPORT void
@@ -1811,6 +1863,7 @@ pixman_image_composite (pixman_op_t      op,
     pixman_bool_t srcAlphaMap = pSrc->common.alpha_map != NULL;
     pixman_bool_t maskAlphaMap = FALSE;
     pixman_bool_t dstAlphaMap = pDst->common.alpha_map != NULL;
+    pixman_bool_t downscaled = FALSE;
     CompositeFunc func = NULL;
 
 #ifdef USE_MMX
@@ -1832,6 +1885,26 @@ pixman_image_composite (pixman_op_t      op,
 	srcTransform = FALSE;
     }
 
+
+    if (srcTransform && pSrc->type == BITS) {
+        int scaled_width;
+        int scaled_height;
+        pixman_get_scaled_size(pSrc->common.transform,
+                pSrc->bits.width, pSrc->bits.height,
+                &scaled_width, &scaled_height);
+        //printf("%d %d: %d %d\n", pSrc->bits.width, pSrc->bits.height, scaled_width, scaled_height);
+        if (scaled_width < pSrc->bits.width || scaled_height < pSrc->bits.height) {
+            if (pSrc->bits.format == PIXMAN_a8r8g8b8 && 1) {
+                //printf("downscale\n");
+                pSrc = create_downscaled_image(pSrc, scaled_width, scaled_height);
+                if (!pSrc)
+                    return;
+                srcTransform = pSrc->common.transform != NULL;
+                downscaled = TRUE;
+            }
+        }
+    }
+
     if (pMask && pMask->type == BITS)
     {
 	maskRepeat = pMask->common.repeat == PIXMAN_REPEAT_NORMAL;
@@ -1966,6 +2039,8 @@ pixman_image_composite (pixman_op_t      op,
     pixman_walk_composite_region (op, pSrc, pMask, pDst, xSrc, ySrc,
 				  xMask, yMask, xDst, yDst, width, height,
 				  srcRepeat, maskRepeat, func);
+    if (downscaled)
+        pixman_image_unref(pSrc);
 }
 
 
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index e26a0c3..ec5a6ba 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -709,6 +709,17 @@ pixman_rasterize_edges_accessors (pixman_image_t *image,
 pixman_bool_t
 pixman_image_is_opaque(pixman_image_t *image);
 
+void
+pixman_transform_unscale (pixman_transform_t *transform,
+			 int height, int width);
+void
+pixman_get_scaled_size (pixman_transform_t *transform,
+        int height, int width,
+        int *scaled_height, int *scaled_width);
+
+void
+pixman_extract_scale (pixman_transform_t *transform,
+        int *scale_x, int *scale_y);
 pixman_bool_t
 pixman_image_can_get_solid (pixman_image_t *image);
 
@@ -792,4 +803,45 @@ void pixman_timer_register (PixmanTimer *timer);
 
 #endif /* PIXMAN_TIMING */
 
+/**
+ * mul_fixed:
+ * @a: first number to multiply in fixed-point 16.16 format
+ * @b: second number to multiply in fixed-point 16.16 format
+ * 
+ * Author : Frederic Plourde 
+ **/
+static inline pixman_fixed_t
+mul_fixed (pixman_fixed_t a, pixman_fixed_t b)
+{
+    int64_t r = (int64_t)a * (int64_t)b;
+    return ( pixman_fixed_t )( r >> 16 );
+}
+
+/**
+ * div_fixed:
+ * @a: dividend in fixed-point 16.16 format
+ * @b: divisor  in fixed-point 16.16 format
+ *
+ * Author : Frederic Plourde 
+ **/
+static inline pixman_fixed_t
+div_fixed (pixman_fixed_t a, pixman_fixed_t b)
+{
+    int64_t div;
+
+    int64_t a_64 = ((int64_t)a) << 32 ;
+    int64_t b_64 = ((int64_t)b) << 16 ;
+
+    div = a_64 / b_64;
+
+    return (pixman_fixed_t)div;
+}
+
+static inline pixman_fixed_t
+square_fixed (pixman_fixed_t a)
+{
+    return mul_fixed(a,a);
+}
+
+
 #endif /* PIXMAN_PRIVATE_H */
diff --git a/pixman/pixman-rescale-box.c b/pixman/pixman-rescale-box.c
new file mode 100644
index 0000000..9bf2ab6
--- /dev/null
+++ b/pixman/pixman-rescale-box.c
@@ -0,0 +1,369 @@
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * MOZILLA CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
+ * SHALL MOZILLA CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Author: Jeff Muizelaar, Mozilla Corp.
+ *
+ * Based on the code from "Fast Bitmap Strecthing" by Tomas Möller
+ *
+ * Scaling code from Graphics Gems book III
+ *  http://www.acm.org/pubs/tog/GraphicsGems/gemsiii/fastBitmap.c
+ *
+ *  License states - "All code here can be used without restrictions."
+ *  http://www.acm.org/pubs/tog/GraphicsGems/
+ *
+ * Also inspired by Mozilla's gfx/src/imgScaler.c by
+ * Tim Rowley <tor at cs.brown.edu>
+ */
+
+/* The approach is inspired by Bresenham's line drawing algorithm
+ * A similar approach is used by SDL in its blitting code
+ * although they do have some intrastructure for dynamically
+ * generating code for a particular scaling.
+ *
+ * A more clever approach is used in Evas. It creates table of row indices
+ * and a table of column indices that map the destination coordinates
+ * to source pixels. This approach avoids having an additional conditional
+ * branch inside of the inner loop at the cost of having to precompute and
+ * access these tables.
+ *
+ *   --  Jeff Muizelaar (July 2008) */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+
+#include <stdint.h>
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include "pixman-private.h"
+#include "pixman-rescale-box.h"
+#define abs(a) (a) > 0 ? (a) : -(a)
+#define sign(x) ((x)>0 ? 1:-1)
+
+void fetch_scanline(void *closure, int y, uint32_t *scanline)
+{
+    pixman_image_t *pict = closure;
+    if (!pixman_image_has_source_clipping(pict)) {
+        fetchProc32 fetch = ACCESS(pixman_fetchProcForPicture32)(pict);
+        fetch(pict, 0, y, pict->bits.width, scanline);
+    } else {
+        int x;
+        bits_image_t *bits = &pict->bits;
+        fetchPixelProc32 fetch = ACCESS(pixman_fetchPixelProcForPicture32)(bits);
+        for (x=0; x<bits->width; x++) {
+            if (pixman_region32_contains_point (bits->common.src_clip, x, y,NULL))
+                scanline[x] = fetch (bits, x, y);
+            else
+                scanline[x] = 0;
+        }
+    }
+}
+
+
+/*
+When box filtering with an integer sized box we only ever have two box sizes.
+Proof:
+    Assume w1 > w2.
+    r = w1/w2
+
+    The size of the two boxes is
+    r1 = ceil(r), r2 = floor(r)
+
+    we want to find non-negative integers p and q such that:
+    w1 = p*r1 + q*r2
+
+    if r1 == r2 then
+        r is an integer and thus w2 evenly divides w1
+        therefor p = w2 and q = 0
+    otherwise r1 != r2 and
+        r1 = r2 + 1
+        w1 = p*(r2 + 1) + q * r2
+           = p*r2 + q*r2 + p
+           = (p + q)*r2 + p
+
+        we then choose a value of p such that:
+            w1 = r2*w2 + p
+            thus p = w1 mod r2
+                   = w1 mod w2
+            and  q = w2 - p which is > 0 because
+                            p = w1 mod w2
+
+            subing into:
+            (p + q)*r2 + p
+            gives:
+            w1 = (p + w2 - p)*r2 + p
+            w1 = w2*r2 + w1 mod r2
+
+*/
+
+
+/* we can index on the bottom bit of the divisor.
+ * 
+ * we could also do a multiply per pixel and accumulate precision
+*/
+/**********************************************************
+ Stretches a horizontal source line onto a horizontal
+ destination line. Used by RectStretch.
+**********************************************************/
+void downsample_row_box_filter(
+		int n,
+		uint32_t *src, uint32_t *dest,
+                long dx2, long e, long src_dx)
+{
+    while (n--) {
+        uint32_t a = 0;
+        uint32_t r = 0;
+        uint32_t g = 0;
+        uint32_t b = 0;
+        int count = 1;
+        a = (*src >> 24) & 0xff;
+        r = (*src >> 16) & 0xff;
+        g = (*src >>  8) & 0xff;
+        b = (*src >>  0) & 0xff;
+        while (e > 0) {
+            e -= dx2;
+            a += (*src >> 24) & 0xff;
+            r += (*src >> 16) & 0xff;
+            g += (*src >>  8) & 0xff;
+            b += (*src >>  0) & 0xff;
+            count++;
+            src++;
+        }
+        //XXX counts seem high...
+        a /= count;
+        r /= count;
+        g /= count;
+        b /= count;
+        *dest = (a << 24) | (r << 16) | (g << 8) | b;
+        dest++;
+
+        e += src_dx;
+    }
+}
+
+void downsample_row_box_filter_sse2(
+		int n,
+		uint32_t *src, uint32_t *dest,
+                long dx2, long e, long src_dx)
+{
+#if 0
+    while (n--) {
+        uint32_t a = 0;
+        uint32_t r = 0;
+        uint32_t g = 0;
+        uint32_t b = 0;
+        int count = 1;
+        a = (*src >> 24) & 0xff;
+        r = (*src >> 16) & 0xff;
+        g = (*src >>  8) & 0xff;
+        b = (*src >>  0) & 0xff;
+        while (e > 0) {
+            e -= dx2;
+            a += (*src >> 24) & 0xff;
+            r += (*src >> 16) & 0xff;
+            g += (*src >>  8) & 0xff;
+            b += (*src >>  0) & 0xff;
+            count++;
+            src++;
+        }
+        //XXX counts seem high...
+        a /= count;
+        r /= count;
+        g /= count;
+        b /= count;
+        *dest = (a << 24) | (r << 16) | (g << 8) | b;
+        dest++;
+
+        e += src_dx;
+    }
+#endif
+
+}
+
+
+void downsample_columns_box_filter(
+        int n,
+        int columns,
+        uint32_t *src, uint32_t *dest)
+{
+    int stride = n;
+    while (n--) {
+        uint32_t a = 0;
+        uint32_t r = 0;
+        uint32_t g = 0;
+        uint32_t b = 0;
+        int h;
+        uint32_t *column_src = src;
+        a = (*column_src >> 24) & 0xff;
+        r = (*column_src >> 16) & 0xff;
+        g = (*column_src >>  8) & 0xff;
+        b = (*column_src >>  0) & 0xff;
+        for (h=1; h<columns; h++) {
+            a += (*column_src >> 24) & 0xff;
+            r += (*column_src >> 16) & 0xff;
+            g += (*column_src >>  8) & 0xff;
+            b += (*column_src >>  0) & 0xff;
+            column_src += stride;
+        }
+        a /= columns;
+        r /= columns;
+        g /= columns;
+        b /= columns;
+        *dest = (a << 24) | (r << 16) | (g << 8) | b;
+        dest++;
+        src++;
+    }
+}
+
+/**********************************************************
+ RectStretch enlarges or diminishes a source rectangle of
+ a bitmap to a destination rectangle. The source
+ rectangle is selected by the two points (xs1,ys1) and
+ (xs2,ys2), and the destination rectangle by (xd1,yd1) and
+ (xd2,yd2). Since readability of source-code is wanted,
+ some optimizations have been left out for the reader:
+ It's possible to read one line at a time, by first
+ stretching in x-direction and then stretching that bitmap
+ in y-direction.
+ Entry:
+	xs1,ys1 - first point of source rectangle
+	xs2,ys2 - second point of source rectangle
+	xd1,yd1 - first point of destination rectangle
+	xd2,yd2 - second point of destination rectangle
+**********************************************************/
+/* startColumn, startRow specify the index of the first source pixel from the source image.
+ * width and height specify the number of source pixels to be drawn
+ *
+ * origWidth, origHeight, scaledWidth and scaledHeight specify the scaling exactly
+ *
+ * x_center and y_center specify the offset within a pixel from which to start drawing
+ * they are, of course, specified in destination space
+ */
+#define ROUND
+//XXX:
+int downscale_box_filter(pixman_image_t *pict, unsigned origWidth, unsigned origHeight,
+		signed scaledWidth, signed scaledHeight,
+		uint16_t startColumn, uint16_t startRow,
+		uint16_t width, uint16_t height,
+		uint32_t *src, int srcStride,
+		uint32_t *dest, int dstStride)
+{
+   // printf("%d %d %d %d\n", scaledWidth, scaledHeight, origWidth, origHeight);
+        long xs2, ys2, xd2, yd2;
+        long yd1 = 0, ys1 = 0;
+        long xd1 = 0, xs1 = 0;
+	xs2 = origWidth ;
+	ys2 = origHeight ;
+	//xd2 = abs(scaledWidth) - 1;
+	xd2 = abs(scaledWidth) ;
+	yd2 = abs(scaledHeight) ;
+	long e,d,dx2;
+	short sy;
+	long dest_dy = abs(yd2);
+	long src_dy = abs(ys2);
+        if (scaledHeight < 0) {
+            ys1 = origHeight - ((src_dy<<1)+dest_dy)/(dest_dy << 1); // e = dy*2 - dx
+            sy = -1;
+        }
+	e = (src_dy<<1)-dest_dy; // e = dy*2 - dx
+	dx2 = dest_dy<<1; // dx2 = dx *2
+	src_dy <<= 1; // dy *= 2
+
+        long dest_dx,src_dx,ex,dx2x;
+	short src_sx;
+	dest_dx = abs(xd2);
+	src_dx = abs(xs2);
+        int src_direction = 1;
+#ifdef ROUND
+	ex = (src_dx<<1)-dest_dx;
+	dx2x = dest_dx<<1; // multiplying by 2 is for rounding purposes
+	src_dx <<= 1;
+#else
+        dx2x = dest_dx;
+        ex = src_dx - dest_dx;
+#endif
+
+        /* Compute the src_offset and associated error value:
+         * There are two ways to do this: */
+        /* 1: Directly */
+        int nsrc_offset = (src_dx*startColumn + dest_dx)*src_sx/(dest_dx*2);
+        long nex = (src_dx*startColumn + dest_dx)%(dest_dx*2) - dest_dx*2 + src_dx;
+	/* 2. Iteratively */
+        int src_offset = 0;
+	for (d = 0; d < startColumn; d++) {
+            //XXX: should be 'ex > 0'
+		while (ex > 0) {
+			src_offset += src_sx;
+			ex -= dx2x;
+		}
+		ex += src_dx;
+	}
+#if 0
+        if (nex != ex) {
+            printf("e: %d %d: %d %d %d\n", ex, nex, src_dx, startColumn, dest_dx);
+            assert(nex == ex);
+        }
+        if (nsrc_offset != src_offset) {
+            printf("off: %d %d: %d %d %d\n", src_offset, nsrc_offset, src_dx, startColumn, dest_dx);
+            assert(nsrc_offset == src_offset);
+        }
+
+#endif
+        //printf("src_offset %d %d %d\n", src_offset, nsrc_offset, ex);
+
+        src += src_offset;
+
+        /* we need to allocate enough room for ceil(src_height/dest_height) scanlines */
+	//XXX: I suppose we should check whether this will succeed
+        uint32_t *temp_buf = pixman_malloc_abc ((origHeight + scaledHeight-1)/scaledHeight, scaledWidth, sizeof(uint32_t));
+	if (!temp_buf)
+            return -1;
+
+        uint32_t *scanline = pixman_malloc_abc (origWidth, 1, sizeof(uint32_t));
+
+        int x = 0;
+        int y = 0;
+        for (d = 0; d < startRow + height; d++)
+	{
+            int columns = 0;
+            while (e > 0)
+            {
+                fetch_scanline(pict, y, scanline);
+                //XXX:  we could turn these multiplications into additions
+                downsample_row_box_filter(width, src + ys1 * srcStride/4, temp_buf + width * columns,
+                        dx2x, ex, src_dx);
+
+                ys1 ++;
+                e -= dx2;
+                columns++;
+            }
+            downsample_columns_box_filter(width, columns, temp_buf, dest + (yd1 - startRow)*dstStride/4);
+            yd1 += 1;
+            e += src_dy;
+        }
+        free(temp_buf);
+        return 0;
+}
+
diff --git a/pixman/pixman-transformed.c b/pixman/pixman-transformed.c
index fca1cdb..55cf4a8 100644
--- a/pixman/pixman-transformed.c
+++ b/pixman/pixman-transformed.c
@@ -555,11 +555,19 @@ ACCESS(fbFetchTransformed)(bits_image_t * pict, int x, int y, int width,
     /* when using convolution filters or PIXMAN_REPEAT_PAD one might get here without a transform */
     if (pict->common.transform)
     {
-        if (!pixman_transform_point_3d (pict->common.transform, &v))
+
+        pixman_transform_t transform = *pict->common.transform;
+#if 0
+        if (pict->downscaled) {
+            pict->downscale_width;
+            pict->downscale_height;
+        }
+#endif
+        if (!pixman_transform_point_3d (&transform, &v))
             return;
-        unit.vector[0] = pict->common.transform->matrix[0][0];
-        unit.vector[1] = pict->common.transform->matrix[1][0];
-        unit.vector[2] = pict->common.transform->matrix[2][0];
+        unit.vector[0] = transform.matrix[0][0];
+        unit.vector[1] = transform.matrix[1][0];
+        unit.vector[2] = transform.matrix[2][0];
         affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
     }
     else
diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index 22f522b..0f9af59 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -26,6 +26,7 @@
 #endif
 
 #include <stdlib.h>
+#include <math.h>
 
 #include "pixman-private.h"
 #include "pixman-mmx.h"
@@ -63,6 +64,271 @@ pixman_transform_point_3d (pixman_transform_t *transform,
     return TRUE;
 }
 
+#if 0
+pixman_fixed_inverse(pixman_fixed_t a)
+{
+    /* are there any value that don't have an inverse?
+     * yes. lots */
+}
+#endif
+
+/**
+ * sqrt_fixed:
+ * @x: number to sqrt in fixed-point 16.16 format
+ * 
+ * This code is inspired by Ken Turkowski's fixed-point sqrt.
+ * http://www.worldserver.com/turk/computergraphics/FixedSqrt.pdf
+ *
+ * Author : Frederic Plourde 
+ **/
+pixman_fixed_t
+sqrt_fixed (pixman_fixed_t x)
+{
+    const pixman_fixed_t pixman_fixed_095    = 0x0000F333;
+    const pixman_fixed_t pixman_fixed_sqrt_2 = 0x00016A0A;
+    int32_t y = x;
+    int32_t a = 0;
+
+    if (y <= 0)
+    {
+	return 0;
+    }
+    else if (y > pixman_fixed_1)
+    {
+	while (y & 0xFFFF0000)
+	{
+	    y >>= 1;
+	    a++;
+	}
+    }
+    else
+    {
+	while (!(y & 0xFFFF0000))
+	{
+	    y <<= 1;
+	    a--;
+	}
+	y >>= 1;
+	a++;
+    }
+
+    y = (pixman_fixed_095 + y) >> 1;
+
+    if (a & 0x00000001) // a is odd
+    {
+	a = a >> 1;
+
+	if (a > 0)
+	{
+	    y = mul_fixed(mul_fixed(y, pixman_fixed_1 << a), pixman_fixed_sqrt_2);
+	}
+        else
+	{
+	    y = mul_fixed(mul_fixed(y, pixman_fixed_1 >> -a), pixman_fixed_sqrt_2);
+	}
+    }
+    else // a is even
+    {
+	a = a >> 1;
+
+	if (a > 0)
+	{
+	    y = mul_fixed(y, pixman_fixed_1 << a);
+	}
+	else
+	{
+	    y = mul_fixed(y, pixman_fixed_1 >> -a);
+	}
+    }
+
+    // Iterate two times to improve accuracy.
+    // One of the below two lines may be removed to
+    // increase speed but at the cost of reduced accuracy.
+    y = (y + div_fixed(x, y)) >> 1;
+    y = (y + div_fixed(x, y)) >> 1;
+
+    return y;
+}
+
+void
+pixman_transform_unscale (pixman_transform_t *transform,
+			 pixman_fixed_t xscale, pixman_fixed_t yscale)
+{
+    /*
+      we want to multiply the transform by the inverse of the following scaled
+      transform:
+
+    | xscale  0    0 |^-1     | 1/xscale    0      0 |
+    |   0   yscale 0 |    ==  |    0    1/yscale   0 |
+    |   0     0    1 |        |    0        0      1 |
+
+      multiplying with the transform matrix gives:
+
+    | 1/xscale    0      0 |   |  a  b  c  |     | a/xscale b/xscale c/xscale |
+    |    0    1/yscale   0 | * |  d  e  f  |  =  | d/yscale e/yscale f/yscale |
+    |    0        0      1 |   |  g  h  i  |     |     g        h        i    |
+
+       we perform this transformation directly to take advantage of the
+       increased accuracy */
+    transform->matrix[0][0] = div_fixed(transform->matrix[0][0], xscale);
+    transform->matrix[0][1] = div_fixed(transform->matrix[0][1], xscale);
+    transform->matrix[0][2] = div_fixed(transform->matrix[0][2], xscale);
+
+    transform->matrix[1][0] = div_fixed(transform->matrix[1][0], yscale);
+    transform->matrix[1][1] = div_fixed(transform->matrix[1][1], yscale);
+    transform->matrix[1][2] = div_fixed(transform->matrix[1][2], yscale);
+}
+
+void
+pixman_extract_scale (pixman_transform_t *transform,
+        int *scale_x, int *scale_y)
+{
+    pixman_fixed_t (*matrix)[3] = transform->matrix;
+    pixman_bool_t affine = matrix[2][0] == 0 && matrix[2][1] == 0 && matrix[2][2] == pixman_fixed_1;
+    if (affine) {
+#ifdef FIXED
+        *scale_x  = sqrt_fixed(square_fixed(width) *(square_fixed(matrix[0][0]) + square_fixed(matrix[1][0])));
+        *scale_y = sqrt_fixed(square_fixed(height)*(square_fixed(matrix[0][1]) + square_fixed(matrix[1][1])));
+#else
+        double a  = pixman_fixed_to_double(matrix[0][0]);
+        double b  = pixman_fixed_to_double(matrix[0][1]);
+        double c  = pixman_fixed_to_double(matrix[1][0]);
+        double d  = pixman_fixed_to_double(matrix[1][1]);
+
+        *scale_x = pixman_double_to_fixed(sqrt(a*a + c*c));
+        *scale_y = pixman_double_to_fixed(sqrt(b*b + d*d));
+
+#endif
+    } else {
+#if 0
+        /*
+                               /              2               2              2        \1/2
+                               |         width  ((d w  - y0 f)  (c w - y0 e))          |
+                               |-------------------------------------------------------|
+                               |                                                    2  |
+                               \ (a d w - a y0 f - c b w + c x0 f + e b y0 - e x0 d)   /
+
+                               /              2               2              2        \1/2
+                               |        height  ((b w  - x0 f)  (a w - x0 e))          |
+                               |-------------------------------------------------------|
+                               |                                                    2  |
+                               \ (a d w - a y0 f - c b w + c x0 f + e b y0 - e x0 d)   /
+
+
+        */
+        /* this isn't too nice and hasn't been tested at all. floating point is used to help readability */
+        double a  = matrix[0][0];
+        double b  = matrix[0][1];
+        double x0 = matrix[0][2];
+        double c  = matrix[1][0];
+        double d  = matrix[1][1];
+        double y0 = matrix[1][2];
+        double e  = matrix[2][0];
+        double f  = matrix[2][1];
+        double w  = matrix[2][2];
+
+        double det = (a*d*w - a*y0*f - c*b*w + c*x0*f + e*b*y0 - e*x0*d);
+        double topx = (pow(d*w - y0*f, 2.)+pow(c*w - y0*e, 2.))*width*width;
+        double topy = (pow(b*w - x0*f, 2.)+pow(a*w - x0*e, 2.))*height*height;
+        double distx = sqrt(topx/(det*det));
+        double disty = sqrt(topy/(det*det));
+        *scaled_width  = pixman_double_to_fixed(distx);
+        *scaled_height = pixman_double_to_fixed(disty);
+#endif
+    }
+}
+
+
+//XXX: could be called pixman_get_scaled_size()...
+void
+pixman_get_scaled_size (pixman_transform_t *transform,
+        int height, int width,
+        int *scaled_height, int *scaled_width)
+{
+    pixman_fixed_t (*matrix)[3] = transform->matrix;
+/*
+                            /     2   2    2 \1/2
+                            |width  (d  + c )|
+                distx :=    |----------------|
+                            |             2  |
+                            \  (a d - c b)   /
+
+                            /      2   2    2 \1/2
+                            |height  (b  + a )|
+                disty :=    |-----------------|
+                            |             2   |
+                            \  (a d - c b)    /
+                             /                  \1/2
+                            |width² (d² + c²)|
+                distx :=    |----------------|
+                            \  (a d - c b)²   /
+
+                            ⎛  height² (b² + a²) ⎞ ½
+                disty :=    ⎜ -------------------⎟
+                            ⎝    (a d - c b)²    ⎠
+*/
+    pixman_bool_t affine = matrix[2][0] == 0 && matrix[2][1] == 0 && matrix[2][2] == pixman_fixed_1;
+    if (affine) {
+#if fixed
+        pixman_fixed_t det = mul_fixed(matrix[0][0],matrix[1][1]) - mul_fixed(matrix[0][1],matrix[1][0]);
+
+        pixman_fixed_t topx = square_fixed(width) *(square_fixed(matrix[1][1]) + square_fixed(matrix[1][0]));
+        pixman_fixed_t topy = square_fixed(height)*(square_fixed(matrix[0][0]) + square_fixed(matrix[0][1]));
+
+        *scaled_width  = sqrt_fixed(div_fixed(topx, square_fixed(det)));
+        *scaled_height = sqrt_fixed(div_fixed(topy, square_fixed(det)));
+#else
+        double a  = pixman_fixed_to_double(matrix[0][0]);
+        double b  = pixman_fixed_to_double(matrix[0][1]);
+        double c  = pixman_fixed_to_double(matrix[1][0]);
+        double d  = pixman_fixed_to_double(matrix[1][1]);
+
+        double det = (a*d - c*b);
+        double topx = (d*d + c*c)*width*width;
+        double topy = (b*b + a*a)*height*height;
+        double distx = sqrt(topx/(det*det));
+        double disty = sqrt(topy/(det*det));
+        *scaled_width  = distx;
+        *scaled_height = disty;
+#endif
+
+    } else {
+        /*
+                               /              2               2              2        \1/2
+                               |         width  ((d w  - y0 f)  (c w - y0 e))          |
+                               |-------------------------------------------------------|
+                               |                                                    2  |
+                               \ (a d w - a y0 f - c b w + c x0 f + e b y0 - e x0 d)   /
+
+                               /              2               2              2        \1/2
+                               |        height  ((b w  - x0 f)  (a w - x0 e))          |
+                               |-------------------------------------------------------|
+                               |                                                    2  |
+                               \ (a d w - a y0 f - c b w + c x0 f + e b y0 - e x0 d)   /
+
+
+        */
+        /* this isn't too nice and hasn't been tested at all. floating point is used to help readability */
+        double a  = matrix[0][0];
+        double b  = matrix[0][1];
+        double x0 = matrix[0][2];
+        double c  = matrix[1][0];
+        double d  = matrix[1][1];
+        double y0 = matrix[1][2];
+        double e  = matrix[2][0];
+        double f  = matrix[2][1];
+        double w  = matrix[2][2];
+
+        double det = (a*d*w - a*y0*f - c*b*w + c*x0*f + e*b*y0 - e*x0*d);
+        double topx = (pow(d*w - y0*f, 2.)+pow(c*w - y0*e, 2.))*width*width;
+        double topy = (pow(b*w - x0*f, 2.)+pow(a*w - x0*e, 2.))*height*height;
+        double distx = sqrt(topx/(det*det));
+        double disty = sqrt(topy/(det*det));
+        *scaled_width  = pixman_double_to_fixed(distx);
+        *scaled_height = pixman_double_to_fixed(disty);
+    }
+}
+
 PIXMAN_EXPORT pixman_bool_t
 pixman_blt (uint32_t *src_bits,
 	    uint32_t *dst_bits,
diff --git a/pixman/pixman.h b/pixman/pixman.h
index 36d91a9..23a4f34 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -615,6 +615,7 @@ pixman_bool_t   pixman_image_set_filter              (pixman_image_t
 						      int                           n_filter_params);
 void		pixman_image_set_source_clipping     (pixman_image_t		   *image,
 						      pixman_bool_t                 source_clipping);
+pixman_bool_t	pixman_image_has_source_clipping     (pixman_image_t		   *image);
 void            pixman_image_set_alpha_map           (pixman_image_t               *image,
 						      pixman_image_t               *alpha_map,
 						      int16_t                       x,