[cairo] [PATCH] speed up fbOver
Jeff Muizelaar
jeff at infidigm.net
Thu Jul 17 10:11:23 PDT 2008
On Thu, Jul 17, 2008 at 06:22:07PM +0200, Soeren Sandmann wrote:
> Jeff Muizelaar <jeff at infidigm.net> writes:
>
> > In regards to your comment on IRC about copying FbByteMulAdd. There was
> > no great reason. I was looking at doing an ARM assembler version and
> > wanted a more documented version of FbByteMulAdd to work with. Since I
> > had the a documented version I figured it would be nice to include that
> > in the tree. This was a low impact way to do that. However, I'm not that
> > attached to either way and I'll change it if you'd prefer the use
> > of the FbByteMulAdd macro.
>
> I'd prefer to add the comments to the macro, then using that.
Here you are.
-Jeff
-------------- next part --------------
Comment FbByteMulAdd
From: Jeff Muizelaar <jmuizelaar at mozilla.com>
---
pixman/combine.h.inc | 12 ++++++++++++
1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/pixman/combine.h.inc b/pixman/combine.h.inc
index 7dd97ae..8c70cb7 100644
--- a/pixman/combine.h.inc
+++ b/pixman/combine.h.inc
@@ -57,19 +57,31 @@
x_c = (x_c * a) / 255 + y
*/
#define FbByteMulAdd(x, a, y) do { \
+ /* multiply and divide: trunc((i + 128)*257/65536) */ \
comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF; \
t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE; \
t &= RB_MASK; \
+ \
+ /* add */ \
t += y & RB_MASK; \
+ \
+ /* saturate */ \
t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK); \
t &= RB_MASK; \
\
+ /* multiply and divide */ \
x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF; \
x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE; \
x &= RB_MASK; \
+ \
+ /* add */ \
x += (y >> COMPONENT_SIZE) & RB_MASK; \
+ \
+ /* saturate */ \
x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK); \
x &= RB_MASK; \
+ \
+ /* recombine */ \
x <<= COMPONENT_SIZE; \
x += t; \
} while (0)
-------------- next part --------------
Speed up fbOver
From: Jeff Muizelaar <jmuizelaar at mozilla.com>
Use FbByteMulAdd to operate on two components at a time and force the function
to be inlined.
---
pixman/pixman-pict.c | 21 +++++++++++----------
1 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 7c88a65..142cf1f 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -34,6 +34,11 @@
#include "pixman-mmx.h"
#include "pixman-vmx.h"
#include "pixman-sse.h"
+#include "pixman-combine32.h"
+
+#ifdef __GNUC__
+# define inline __inline__ __attribute__ ((__always_inline__))
+#endif
#define FbFullMask(n) ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
@@ -47,18 +52,14 @@ typedef void (* CompositeFunc) (pixman_op_t,
int16_t, int16_t, int16_t, int16_t, int16_t, int16_t,
uint16_t, uint16_t);
-uint32_t
-fbOver (uint32_t x, uint32_t y)
+inline uint32_t
+fbOver (uint32_t src, uint32_t dest)
{
- uint16_t a = ~x >> 24;
- uint16_t t;
- uint32_t m,n,o,p;
+ // dest = (dest * (255 - alpha)) / 255 + src
+ uint32_t a = ~src >> 24; // 255 - alpha == 255 + (~alpha + 1) == ~alpha
+ FbByteMulAdd(dest, a, src);
- m = FbOverU(x,y,0,a,t);
- n = FbOverU(x,y,8,a,t);
- o = FbOverU(x,y,16,a,t);
- p = FbOverU(x,y,24,a,t);
- return m|n|o|p;
+ return dest;
}
uint32_t
More information about the cairo
mailing list