[cairo] Image scaling with pixman

Billy Biggs vektor at dumbterm.net
Tue Aug 16 22:12:25 PDT 2005


  I was asked today about image scaling performance with pixman.

  The code from xserver that is now in HEAD should be much faster than
the old pixman code, as it is much better structured.  Unfortunately,
one of the bugfixes to the xserver code slowed down bilinear scaling a
wee bit, but that's about to get fixed.

  The attached patch speeds up both nearest neighbour and bilinear
scaling in the case where the tranformation is not projective.  I went a
little further and unrolled the nasty loop I added to fix a precision
error, which gives a significant savings.  This patch gives a further 2x
speed improvement over the already improved code.

  Comments?

  -Billy

-------------- next part --------------
Index: fbcompose.c
===================================================================
RCS file: /cvs/cairo/cairo/pixman/src/fbcompose.c,v
retrieving revision 1.1
diff -p -u -r1.1 fbcompose.c
--- fbcompose.c	11 Aug 2005 04:10:13 -0000	1.1
+++ fbcompose.c	17 Aug 2005 05:03:28 -0000
@@ -2906,6 +2906,7 @@ static void fbFetchTransformed(PicturePt
 #else
     miIndexedPtr indexed = 0;
 #endif
+    Bool projective = FALSE;
 
     fetch = fetchPixelProcForPicture(pict);
 
@@ -2929,6 +2930,7 @@ static void fbFetchTransformed(PicturePt
         unit.vector[1] = 0;
         unit.vector[2] = 0;
     }
+    projective = (unit.vector[2] != 0);
 
     if (pict->filter == PIXMAN_FILTER_NEAREST || pict->filter == PIXMAN_FILTER_FAST)
     {
@@ -2939,8 +2941,13 @@ static void fbFetchTransformed(PicturePt
                     if (!v.vector[2]) {
                         buffer[i] = 0;
                     } else {
-                        y = MOD(DIV(v.vector[1],v.vector[2]), pict->pDrawable->height);
-                        x = MOD(DIV(v.vector[0],v.vector[2]), pict->pDrawable->width);
+                        if (projective) {
+                            y = MOD(DIV(v.vector[1],v.vector[2]), pict->pDrawable->height);
+                            x = MOD(DIV(v.vector[0],v.vector[2]), pict->pDrawable->width);
+                        } else {
+                            y = MOD(v.vector[1]>>16, pict->pDrawable->height);
+                            x = MOD(v.vector[0]>>16, pict->pDrawable->width);
+                        }
                         buffer[i] = fetch(bits + (y + pict->pDrawable->y)*stride, x + pict->pDrawable->x, indexed);
                     }
                     v.vector[0] += unit.vector[0];
@@ -2952,8 +2959,13 @@ static void fbFetchTransformed(PicturePt
                     if (!v.vector[2]) {
                         buffer[i] = 0;
                     } else {
-                        y = MOD(DIV(v.vector[1],v.vector[2]), pict->pDrawable->height);
-                        x = MOD(DIV(v.vector[0],v.vector[2]), pict->pDrawable->width);
+                        if (projective) {
+                            y = MOD(DIV(v.vector[1],v.vector[2]), pict->pDrawable->height);
+                            x = MOD(DIV(v.vector[0],v.vector[2]), pict->pDrawable->width);
+                        } else {
+                            y = MOD(v.vector[1]>>16, pict->pDrawable->height);
+                            x = MOD(v.vector[0]>>16, pict->pDrawable->width);
+                        }
                         if (pixman_region_contains_point (pict->pCompositeClip, x, y, &box))
                             buffer[i] = fetch(bits + (y + pict->pDrawable->y)*stride, x + pict->pDrawable->x, indexed);
                         else
@@ -2971,8 +2983,13 @@ static void fbFetchTransformed(PicturePt
                     if (!v.vector[2]) {
                         buffer[i] = 0;
                     } else {
-                        y = DIV(v.vector[1],v.vector[2]);
-                        x = DIV(v.vector[0],v.vector[2]);
+                        if (projective) {
+                            y = DIV(v.vector[1],v.vector[2]);
+                            x = DIV(v.vector[0],v.vector[2]);
+                        } else {
+                            y = v.vector[1]>>16;
+                            x = v.vector[0]>>16;
+                        }
                         buffer[i] = ((x < box.x1) | (x >= box.x2) | (y < box.y1) | (y >= box.y2)) ?
                                     0 : fetch(bits + (y + pict->pDrawable->y)*stride, x + pict->pDrawable->x, indexed);
                     }
@@ -2985,8 +3002,13 @@ static void fbFetchTransformed(PicturePt
                     if (!v.vector[2]) {
                         buffer[i] = 0;
                     } else {
-                        y = DIV(v.vector[1],v.vector[2]);
-                        x = DIV(v.vector[0],v.vector[2]);
+                        if (projective) {
+                            y = DIV(v.vector[1],v.vector[2]);
+                            x = DIV(v.vector[0],v.vector[2]);
+                        } else {
+                            y = v.vector[1]>>16;
+                            x = v.vector[0]>>16;
+                        }
                         if (pixman_region_contains_point (pict->pCompositeClip, x, y, &box))
                             buffer[i] = fetch(bits + (y + pict->pDrawable->y)*stride, x + pict->pDrawable->x, indexed);
                         else
@@ -3009,16 +3031,24 @@ static void fbFetchTransformed(PicturePt
                         int x1, x2, y1, y2, distx, idistx, disty, idisty, k;
                         FbBits *b;
                         CARD32 tl, tr, bl, br, r;
-                        xFixed_48_16 div;
+                        CARD32 ft, fb;
 
-                        div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
-                        x1 = div >> 16;
-                        distx = ((xFixed)div >> 8) & 0xff;
-                        x2 = x1 + 1;
-                        div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
-                        y1 = div >> 16;
-                        y2 = y1 + 1;
-                        disty = ((xFixed)div >> 8) & 0xff;
+                        if (projective) {
+                            xFixed_48_16 div;
+                            div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
+                            x1 = div >> 16;
+                            distx = ((xFixed)div >> 8) & 0xff;
+                            x2 = x1 + 1;
+                            div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
+                            y1 = div >> 16;
+                            y2 = y1 + 1;
+                            disty = ((xFixed)div >> 8) & 0xff;
+                        } else {
+                            x1 = v.vector[0] >> 16;
+                            distx = (v.vector[0] >> 8) & 0xff;
+                            y1 = v.vector[1] >> 16;
+                            disty = (v.vector[1] >> 8) & 0xff;
+                        }
 
                         idistx = 256 - distx;
                         idisty = 256 - disty;
@@ -3036,13 +3066,18 @@ static void fbFetchTransformed(PicturePt
                         bl = fetch(b, x1 + pict->pDrawable->x, indexed);
                         br = fetch(b, x2 + pict->pDrawable->x, indexed);
 
-                        r = 0;
-                        for (k = 0; k < 32; k += 8) {
-                            CARD32 t, b;
-                            t = FbGet8(tl,k) * idistx + FbGet8(tr,k) * distx;
-                            b = FbGet8(bl,k) * idistx + FbGet8(br,k) * distx;
-                            r |= ((((t * idisty) + (b * disty)) >> 16) & 0xff) << k;
-                        }
+                        ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                        fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                        r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                        ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                        fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                        r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                        ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                        fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                        r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                        ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                        fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                        r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
                         buffer[i] = r;
                     }
                     v.vector[0] += unit.vector[0];
@@ -3057,16 +3092,24 @@ static void fbFetchTransformed(PicturePt
                         int x1, x2, y1, y2, distx, idistx, disty, idisty, k;
                         FbBits *b;
                         CARD32 tl, tr, bl, br, r;
-                        xFixed_48_16 div;
+                        CARD32 ft, fb;
 
-                        div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
-                        x1 = div >> 16;
-                        distx = ((xFixed)div >> 8) & 0xff;
-                        x2 = x1 + 1;
-                        div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
-                        y1 = div >> 16;
-                        y2 = y1 + 1;
-                        disty = ((xFixed)div >> 8) & 0xff;
+                        if (projective) {
+                            xFixed_48_16 div;
+                            div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
+                            x1 = div >> 16;
+                            distx = ((xFixed)div >> 8) & 0xff;
+                            x2 = x1 + 1;
+                            div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
+                            y1 = div >> 16;
+                            y2 = y1 + 1;
+                            disty = ((xFixed)div >> 8) & 0xff;
+                        } else {
+                            x1 = v.vector[0] >> 16;
+                            distx = (v.vector[0] >> 8) & 0xff;
+                            y1 = v.vector[1] >> 16;
+                            disty = (v.vector[1] >> 8) & 0xff;
+                        }
 
                         idistx = 256 - distx;
                         idisty = 256 - disty;
@@ -3088,13 +3131,18 @@ static void fbFetchTransformed(PicturePt
                         br = pixman_region_contains_point(pict->pCompositeClip, x2, y2, &box)
                              ? fetch(b, x2 + pict->pDrawable->x, indexed) : 0;
 
-                        r = 0;
-                        for (k = 0; k < 32; k += 8) {
-                            CARD32 t, b;
-                            t = FbGet8(tl,k) * idistx + FbGet8(tr,k) * distx;
-                            b = FbGet8(bl,k) * idistx + FbGet8(br,k) * distx;
-                            r |= ((((t * idisty) + (b * disty)) >> 16) & 0xff) << k;
-                        }
+                        ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                        fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                        r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                        ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                        fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                        r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                        ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                        fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                        r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                        ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                        fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                        r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
                         buffer[i] = r;
                     }
                     v.vector[0] += unit.vector[0];
@@ -3113,16 +3161,24 @@ static void fbFetchTransformed(PicturePt
                         FbBits *b;
                         CARD32 tl, tr, bl, br, r;
                         Bool x1_out, x2_out, y1_out, y2_out;
-                        xFixed_48_16 div;
+                        CARD32 ft, fb;
 
-                        div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
-                        x1 = div >> 16;
-                        distx = ((xFixed)div >> 8) & 0xff;
-                        x2 = x1 + 1;
-                        div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
-                        y1 = div >> 16;
-                        y2 = y1 + 1;
-                        disty = ((xFixed)div >> 8) & 0xff;
+                        if (projective) {
+                            xFixed_48_16 div;
+                            div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
+                            x1 = div >> 16;
+                            distx = ((xFixed)div >> 8) & 0xff;
+                            x2 = x1 + 1;
+                            div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
+                            y1 = div >> 16;
+                            y2 = y1 + 1;
+                            disty = ((xFixed)div >> 8) & 0xff;
+                        } else {
+                            x1 = v.vector[0] >> 16;
+                            distx = (v.vector[0] >> 8) & 0xff;
+                            y1 = v.vector[1] >> 16;
+                            disty = (v.vector[1] >> 8) & 0xff;
+                        }
 
                         idistx = 256 - distx;
                         idisty = 256 - disty;
@@ -3141,13 +3197,18 @@ static void fbFetchTransformed(PicturePt
                         bl = x1_out|y2_out ? 0 : fetch(b, x_off, indexed);
                         br = x2_out|y2_out ? 0 : fetch(b, x_off + 1, indexed);
 
-                        r = 0;
-                        for (k = 0; k < 32; k += 8) {
-                            CARD32 t, b;
-                            t = FbGet8(tl,k) * idistx + FbGet8(tr,k) * distx;
-                            b = FbGet8(bl,k) * idistx + FbGet8(br,k) * distx;
-                            r |= ((((t * idisty) + (b * disty)) >> 16) & 0xff) << k;
-                        }
+                        ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                        fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                        r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                        ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                        fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                        r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                        ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                        fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                        r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                        ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                        fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                        r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
                         buffer[i] = r;
                     }
                     v.vector[0] += unit.vector[0];
@@ -3162,16 +3223,24 @@ static void fbFetchTransformed(PicturePt
                         int x1, x2, y1, y2, distx, idistx, disty, idisty, x_off, k;
                         FbBits *b;
                         CARD32 tl, tr, bl, br, r;
-                        xFixed_48_16 div;
+                        CARD32 ft, fb;
 
-                        div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
-                        x1 = div >> 16;
-                        distx = ((xFixed)div >> 8) & 0xff;
-                        x2 = x1 + 1;
-                        div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
-                        y1 = div >> 16;
-                        y2 = y1 + 1;
-                        disty = ((xFixed)div >> 8) & 0xff;
+                        if (projective) {
+                            xFixed_48_16 div;
+                            div = ((xFixed_48_16)v.vector[0] << 16)/v.vector[2];
+                            x1 = div >> 16;
+                            distx = ((xFixed)div >> 8) & 0xff;
+                            x2 = x1 + 1;
+                            div = ((xFixed_48_16)v.vector[1] << 16)/v.vector[2];
+                            y1 = div >> 16;
+                            y2 = y1 + 1;
+                            disty = ((xFixed)div >> 8) & 0xff;
+                        } else {
+                            x1 = v.vector[0] >> 16;
+                            distx = (v.vector[0] >> 8) & 0xff;
+                            y1 = v.vector[1] >> 16;
+                            disty = (v.vector[1] >> 8) & 0xff;
+                        }
 
                         idistx = 256 - distx;
                         idisty = 256 - disty;
@@ -3189,13 +3258,18 @@ static void fbFetchTransformed(PicturePt
                         br = pixman_region_contains_point(pict->pCompositeClip, x2, y2, &box)
                              ? fetch(b, x_off + 1, indexed) : 0;
 
-                        r = 0;
-                        for (k = 0; k < 32; k += 8) {
-                            CARD32 t, b;
-                            t = FbGet8(tl,k) * idistx + FbGet8(tr,k) * distx;
-                            b = FbGet8(bl,k) * idistx + FbGet8(br,k) * distx;
-                            r |= ((((t * idisty) + (b * disty)) >> 16) & 0xff) << k;
-                        }
+                        ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                        fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                        r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                        ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                        fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                        r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                        ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                        fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                        r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                        ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                        fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                        r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
                         buffer[i] = r;
                     }
                     v.vector[0] += unit.vector[0];


More information about the cairo mailing list