[cairo] Optimize spans in the trapezoid rasterizer

Billy Biggs vektor at dumbterm.net
Sun Jul 24 20:14:49 PDT 2005


  The attached patch modifies the 8-bit alpha trapezoid rasterizer in
libpixman's fbedge.c to specifically handle opaque spans in the middle
of a trapezoid.  It significantly speeds up large trapezoids without
slowing down small ones.  The code also allows for optimized
implementations of a saturated 8-bit add (MMX paddusb) and memset-to-255
to be easily dropped in.

  If accepted, this patch should probably also get applied to the copy
in Xorg.

  -Billy

-------------- next part --------------
Index: src/fbedge.c
===================================================================
RCS file: /cvs/cairo/libpixman/src/fbedge.c,v
retrieving revision 1.2
diff -p -u -r1.2 fbedge.c
--- src/fbedge.c	21 Jan 2005 18:26:28 -0000	1.2
+++ src/fbedge.c	25 Jul 2005 02:18:25 -0000
@@ -27,31 +27,6 @@
 #ifdef RENDER
 
 /*
- * 8 bit alpha
- */
-
-#define N_BITS	8
-#define rasterizeEdges   fbRasterizeEdges8
-
-#define DefineAlpha(line,x) \
-    CARD8	*__ap = (CARD8 *) line + (x)
-
-#define StepAlpha	__ap++
-
-#define AddAlpha(a) {				    \
-    CARD16 __a = a + *__ap;			    \
-    *__ap = ((CARD8) ((__a) | (0 - ((__a) >> 8)))); \
-}
-
-#include "fbedgeimp.h"
-
-#undef AddAlpha
-#undef StepAlpha
-#undef DefineAlpha
-#undef rasterizeEdges
-#undef N_BITS
-
-/*
  * 4 bit alpha
  */
 
@@ -100,6 +75,220 @@
 #undef rasterizeEdges
 #undef N_BITS
 
+/*
+ * 8 bit alpha
+ */
+
+#if defined (__GNUC__) && !defined (NO_INLINES)
+#define INLINE inline __attribute__ ((always_inline,const))
+#else
+#define INLINE
+#endif
+
+static INLINE CARD8
+clip255 (int x)
+{
+    if (x > 255) return 255;
+    return x;
+}
+
+static INLINE void
+add_saturate_8(CARD8 *buf, int value, int length)
+{
+    while (length--)
+    {
+        *buf = clip255 (*buf + value);
+        buf++;
+    }
+}
+
+static INLINE void
+memset_255(CARD8 *buf, int length)
+{
+    while (length--) *buf++ = 255;
+}
+
+/*
+ * We want to detect the case where we add the same value to a long
+ * span of pixels.  The triangles on the end are filled in while we
+ * count how many sub-pixel scanlines contribute to the middle section.
+ *
+ *                 +--------------------------+
+ *  fill_height =|   \                      /
+ *                     +------------------+
+ *                      |================|
+ *                   fill_start       fill_end
+ */
+static void
+fbRasterizeEdges8 (FbBits	*buf,
+		   int		width,
+		   int		stride,
+		   RenderEdge	*l,
+		   RenderEdge	*r,
+		   xFixed	t,
+		   xFixed	b)
+{
+    xFixed  y = t;
+    FbBits  *line;
+    int fill_start = -1, fill_end = -1;
+    int fill_size = 0;
+
+    line = buf + xFixedToInt (y) * stride;
+
+    for (;;)
+    {
+        CARD8 *ap = (CARD8 *) line;
+	xFixed	lx, rx;
+	int	lxi, rxi;
+	
+	/* clip X */
+	lx = l->x;
+	if (lx < 0)
+	    lx = 0;
+	rx = r->x;
+	if (xFixedToInt (rx) >= width)
+	    rx = IntToxFixed (width);
+	
+	/* Skip empty (or backwards) sections */
+	if (rx > lx)
+	{
+            int lxs, rxs;
+
+	    /* Find pixel bounds for span. */
+	    lxi = xFixedToInt (lx);
+	    rxi = xFixedToInt (rx);
+
+            /* Sample coverage for edge pixels */
+            lxs = RenderSamplesX (lx, 8);
+            rxs = RenderSamplesX (rx, 8);
+
+            /* Add coverage across row */
+            if (lxi == rxi)
+            {
+                ap[lxi] = clip255 (ap[lxi] + rxs - lxs);
+            }
+            else
+            {
+                ap[lxi] = clip255 (ap[lxi] + N_X_FRAC(8) - lxs);
+
+                /* Move forward so that lxi/rxi is the pixel span */
+                lxi++;
+
+                /* Don't bother trying to optimize the fill unless
+                 * the span is longer than 4 pixels. */
+                if (rxi - lxi > 4)
+                {
+                    if (fill_start < 0)
+                    {
+                        fill_start = lxi;
+                        fill_end = rxi;
+                        fill_size++;
+                    }
+                    else
+                    {
+                        if (lxi >= fill_end || rxi < fill_start)
+                        {
+                            /* We're beyond what we saved, just fill it */
+                            add_saturate_8 (ap + fill_start,
+                                            fill_size * N_X_FRAC(8),
+                                            fill_end - fill_start);
+                            fill_start = lxi;
+                            fill_end = rxi;
+                            fill_size = 1;
+                        }
+                        else
+                        {
+                            /* Update fill_start */
+                            if (lxi > fill_start)
+                            {
+                                add_saturate_8 (ap + fill_start,
+                                                fill_size * N_X_FRAC(8),
+                                                lxi - fill_start);
+                                fill_start = lxi;
+                            }
+                            else if (lxi < fill_start)
+                            {
+                                add_saturate_8 (ap + lxi, N_X_FRAC(8),
+                                                fill_start - lxi);
+                            }
+
+                            /* Update fill_end */
+                            if (rxi < fill_end)
+                            {
+                                add_saturate_8 (ap + rxi,
+                                                fill_size * N_X_FRAC(8),
+                                                fill_end - rxi);
+                                fill_end = rxi;
+                            }
+                            else if (fill_end < rxi)
+                            {
+                                add_saturate_8 (ap + fill_end,
+                                                N_X_FRAC(8),
+                                                rxi - fill_end);
+                            }
+                            fill_size++;
+                        }
+                    }
+                }
+                else
+                {
+                    add_saturate_8 (ap + lxi, N_X_FRAC(8), rxi - lxi);
+                }
+
+                /* Do not add in a 0 alpha here. This check is
+                 * necessary to avoid a buffer overrun, (when rx
+                 * is exactly on a pixel boundary). */
+                if (rxs)
+                    ap[rxi] = clip255 (ap[rxi] + rxs);
+            }
+	}
+
+	if (y == b) {
+            /* We're done, make sure we clean up any remaining fill. */
+            if (fill_start != fill_end) {
+                if (fill_size == N_Y_FRAC(8))
+                {
+                    memset_255 (ap + fill_start, fill_end - fill_start);
+                }
+                else
+                {
+                    add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8),
+                                    fill_end - fill_start);
+                }
+            }
+	    break;
+        }
+
+	if (xFixedFrac (y) != Y_FRAC_LAST(8))
+	{
+	    RenderEdgeStepSmall (l);
+	    RenderEdgeStepSmall (r);
+	    y += STEP_Y_SMALL(8);
+	}
+	else
+	{
+	    RenderEdgeStepBig (l);
+	    RenderEdgeStepBig (r);
+	    y += STEP_Y_BIG(8);
+            if (fill_start != fill_end)
+            {
+                if (fill_size == N_Y_FRAC(8))
+                {
+                    memset_255 (ap + fill_start, fill_end - fill_start);
+                }
+                else
+                {
+                    add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8),
+                                    fill_end - fill_start);
+                }
+                fill_start = fill_end = -1;
+                fill_size = 0;
+            }
+	    line += stride;
+	}
+    }
+}
+
 void
 fbRasterizeEdges (FbBits	*buf,
 		  int		bpp,


More information about the cairo mailing list