[cairo] [PATCH] speed up fbOver

Jeff Muizelaar jeff at infidigm.net
Wed Jul 16 10:10:02 PDT 2008


The attached patch nearly doubles the speed of fbCompositeSrc_8888x8888
on my core duo 2 with gcc 4.0.1.

I'll look at fbOver24 and fbIn next.

-Jeff
-------------- next part --------------
commit 633a8f30f86285f2a1454bdfb74c43fcfd21a9ee
Author: Jeff Muizelaar <jmuizelaar at mozilla.com>
Date:   Wed Jul 16 12:22:32 2008 -0400

    Speed up fbOver
    
    Copies the implementation from FbByteMulAdd while adding some explanatory comments.

diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 7c88a65..e7b7416 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -35,6 +35,10 @@
 #include "pixman-vmx.h"
 #include "pixman-sse.h"
 
+#ifdef __GNUC__
+#   define inline __inline__ __attribute__ ((__always_inline__))
+#endif
+
 #define FbFullMask(n)   ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
 
 #undef READ
@@ -47,18 +51,45 @@ typedef void (* CompositeFunc) (pixman_op_t,
 				int16_t, int16_t, int16_t, int16_t, int16_t, int16_t,
 				uint16_t, uint16_t);
 
-uint32_t
-fbOver (uint32_t x, uint32_t y)
+inline uint32_t
+fbOver (uint32_t src, uint32_t dest)
 {
-    uint16_t  a = ~x >> 24;
-    uint16_t  t;
-    uint32_t  m,n,o,p;
+    // dest = (dest * (255 - alpha)) / 255 + src
 
-    m = FbOverU(x,y,0,a,t);
-    n = FbOverU(x,y,8,a,t);
-    o = FbOverU(x,y,16,a,t);
-    p = FbOverU(x,y,24,a,t);
-    return m|n|o|p;
+    uint32_t a = ~src >>24; // 255 - alpha == 255 + (~alpha + 1) == ~alpha
+
+    // do the computation two components at a time inside of a uint32_t
+
+    // basic formula: trunc((i + 128)*257/65536)
+    // multiply and divide
+    uint32_t t = ((dest & 0xff00ff) * a) + 0x800080;
+    t = (t + ((t >> 8) & 0xff00ff)) >> 8;
+    t &= 0xff00ff;
+
+    // add
+    t += src & 0xff00ff;
+
+    // saturate
+    t |= 0x1000100 - ((t >> 8) & 0xff00ff);
+    t &= 0xff00ff;
+
+    // multiply and divide
+    dest = (((dest >> 8) & 0xff00ff) * a) + 0x800080;
+    dest = (dest + ((dest >> 8) & 0xff00ff)) >> 8;
+    dest &= 0xff00ff;
+
+    // add
+    dest += (src >> 8) & 0xff00ff;
+
+    // saturate
+    dest |= 0x1000100 - ((dest >> 8) & 0xff00ff);
+    dest &= 0xff00ff;
+
+    // recombine
+    dest <<= 8;
+    dest += t;
+
+    return dest;
 }
 
 uint32_t


More information about the cairo mailing list