[cairo] [PATCH] speed up fbOver
Jeff Muizelaar
jeff at infidigm.net
Wed Jul 16 10:10:02 PDT 2008
The attached patch nearly doubles the speed of fbCompositeSrc_8888x8888
on my core duo 2 with gcc 4.0.1.
I'll look at fbOver24 and fbIn next.
-Jeff
-------------- next part --------------
commit 633a8f30f86285f2a1454bdfb74c43fcfd21a9ee
Author: Jeff Muizelaar <jmuizelaar at mozilla.com>
Date: Wed Jul 16 12:22:32 2008 -0400
Speed up fbOver
Copies the implementation from FbByteMulAdd while adding some explanatory comments.
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 7c88a65..e7b7416 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -35,6 +35,10 @@
#include "pixman-vmx.h"
#include "pixman-sse.h"
+#ifdef __GNUC__
+# define inline __inline__ __attribute__ ((__always_inline__))
+#endif
+
#define FbFullMask(n) ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
#undef READ
@@ -47,18 +51,45 @@ typedef void (* CompositeFunc) (pixman_op_t,
int16_t, int16_t, int16_t, int16_t, int16_t, int16_t,
uint16_t, uint16_t);
-uint32_t
-fbOver (uint32_t x, uint32_t y)
+inline uint32_t
+fbOver (uint32_t src, uint32_t dest)
{
- uint16_t a = ~x >> 24;
- uint16_t t;
- uint32_t m,n,o,p;
+ // dest = (dest * (255 - alpha)) / 255 + src
- m = FbOverU(x,y,0,a,t);
- n = FbOverU(x,y,8,a,t);
- o = FbOverU(x,y,16,a,t);
- p = FbOverU(x,y,24,a,t);
- return m|n|o|p;
+ uint32_t a = ~src >>24; // 255 - alpha == 255 + (~alpha + 1) == ~alpha
+
+ // do the computation two components at a time inside of a uint32_t
+
+ // basic formula: trunc((i + 128)*257/65536)
+ // multiply and divide
+ uint32_t t = ((dest & 0xff00ff) * a) + 0x800080;
+ t = (t + ((t >> 8) & 0xff00ff)) >> 8;
+ t &= 0xff00ff;
+
+ // add
+ t += src & 0xff00ff;
+
+ // saturate
+ t |= 0x1000100 - ((t >> 8) & 0xff00ff);
+ t &= 0xff00ff;
+
+ // multiply and divide
+ dest = (((dest >> 8) & 0xff00ff) * a) + 0x800080;
+ dest = (dest + ((dest >> 8) & 0xff00ff)) >> 8;
+ dest &= 0xff00ff;
+
+ // add
+ dest += (src >> 8) & 0xff00ff;
+
+ // saturate
+ dest |= 0x1000100 - ((dest >> 8) & 0xff00ff);
+ dest &= 0xff00ff;
+
+ // recombine
+ dest <<= 8;
+ dest += t;
+
+ return dest;
}
uint32_t
More information about the cairo
mailing list