[cairo] [PATCH] speed up fbOver

Jeff Muizelaar jeff at infidigm.net
Thu Jul 17 10:11:23 PDT 2008


On Thu, Jul 17, 2008 at 06:22:07PM +0200, Soeren Sandmann wrote:
> Jeff Muizelaar <jeff at infidigm.net> writes:
> 
> > In regards to your comment on IRC about copying FbByteMulAdd. There was
> > no great reason. I was looking at doing an ARM assembler version and
> > wanted a more documented version of FbByteMulAdd to work with. Since I
> > had the a documented version I figured it would be nice to include that
> > in the tree. This was a low impact way to do that. However, I'm not that
> > attached to either way and I'll change it if you'd prefer the use
> > of the FbByteMulAdd macro.
> 
> I'd prefer to add the comments to the macro, then using that.

Here you are.

-Jeff
-------------- next part --------------
Comment FbByteMulAdd

From: Jeff Muizelaar <jmuizelaar at mozilla.com>


---

 pixman/combine.h.inc |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/pixman/combine.h.inc b/pixman/combine.h.inc
index 7dd97ae..8c70cb7 100644
--- a/pixman/combine.h.inc
+++ b/pixman/combine.h.inc
@@ -57,19 +57,31 @@
   x_c = (x_c * a) / 255 + y
 */
 #define FbByteMulAdd(x, a, y) do {                                      \
+        /* multiply and divide: trunc((i + 128)*257/65536) */           \
         comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
         t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
         t &= RB_MASK;                                                   \
+                                                                        \
+        /* add */                                                       \
         t += y & RB_MASK;                                               \
+                                                                        \
+        /* saturate */                                                  \
         t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK);      \
         t &= RB_MASK;                                                   \
                                                                         \
+        /* multiply and divide */                                       \
         x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
         x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
         x &= RB_MASK;                                                   \
+                                                                        \
+        /* add */                                                       \
         x += (y >> COMPONENT_SIZE) & RB_MASK;                           \
+                                                                        \
+        /* saturate */                                                  \
         x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK);      \
         x &= RB_MASK;                                                   \
+                                                                        \
+        /* recombine */                                                 \
         x <<= COMPONENT_SIZE;                                           \
         x += t;                                                         \
     } while (0)
-------------- next part --------------
Speed up fbOver

From: Jeff Muizelaar <jmuizelaar at mozilla.com>

Use FbByteMulAdd to operate on two components at a time and force the function
to be inlined.
---

 pixman/pixman-pict.c |   21 +++++++++++----------
 1 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 7c88a65..142cf1f 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -34,6 +34,11 @@
 #include "pixman-mmx.h"
 #include "pixman-vmx.h"
 #include "pixman-sse.h"
+#include "pixman-combine32.h"
+
+#ifdef __GNUC__
+#   define inline __inline__ __attribute__ ((__always_inline__))
+#endif
 
 #define FbFullMask(n)   ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
 
@@ -47,18 +52,14 @@ typedef void (* CompositeFunc) (pixman_op_t,
 				int16_t, int16_t, int16_t, int16_t, int16_t, int16_t,
 				uint16_t, uint16_t);
 
-uint32_t
-fbOver (uint32_t x, uint32_t y)
+inline uint32_t
+fbOver (uint32_t src, uint32_t dest)
 {
-    uint16_t  a = ~x >> 24;
-    uint16_t  t;
-    uint32_t  m,n,o,p;
+    // dest = (dest * (255 - alpha)) / 255 + src
+    uint32_t a = ~src >> 24; // 255 - alpha == 255 + (~alpha + 1) == ~alpha
+    FbByteMulAdd(dest, a, src);
 
-    m = FbOverU(x,y,0,a,t);
-    n = FbOverU(x,y,8,a,t);
-    o = FbOverU(x,y,16,a,t);
-    p = FbOverU(x,y,24,a,t);
-    return m|n|o|p;
+    return dest;
 }
 
 uint32_t


More information about the cairo mailing list