[cairo-commit] 4 commits - src/cairo-image-compositor.c src/cairo-spans-compositor.c test/reference

Chris Wilson ickle at kemper.freedesktop.org
Mon Mar 5 09:08:22 PST 2012


 src/cairo-image-compositor.c                    |  570 ++++++++++++++++++++++--
 src/cairo-spans-compositor.c                    |    4 
 test/reference/clip-operator.argb32.ref.png     |binary
 test/reference/clip-operator.rgb24.ref.png      |binary
 test/reference/fallback.argb32.ref.png          |binary
 test/reference/fallback.rgb24.ref.png           |binary
 test/reference/hatchings.ref.png                |binary
 test/reference/operator-source.argb32.ref.png   |binary
 test/reference/operator-source.rgb24.ref.png    |binary
 test/reference/overlapping-boxes.argb32.ref.png |binary
 test/reference/overlapping-boxes.rgb24.ref.png  |binary
 11 files changed, 549 insertions(+), 25 deletions(-)

New commits:
commit c65ed9c79dea9c906db5f34d074500c821ad3228
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Mar 5 14:23:57 2012 +0000

    image: Try performing span composition a row at a time
    
    In theory, this should be more cache efficient and allow us to trim the
    operation to the width of row, shaving a few texel fetches. The cost is
    that we cause pixman to evaluate the composite operation per-row. This
    should only be a temporary solution until we can do something better
    through pixman...
    
    On a i5-2520m, ymmv,
    
      firefox-fishtank  64585.38 -> 56823.41:  1.14x speedup
      swfdec-fill-rate   1383.24  -> 1665.88:  1.20x slowdown
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/cairo-image-compositor.c b/src/cairo-image-compositor.c
index 16b5a11..fdbe2de 100644
--- a/src/cairo-image-compositor.c
+++ b/src/cairo-image-compositor.c
@@ -1332,7 +1332,6 @@ typedef struct _cairo_image_span_renderer {
     cairo_span_renderer_t base;
 
     const cairo_composite_rectangles_t *composite;
-    cairo_rectangle_int_t extents;
 
     float opacity;
     uint8_t op;
@@ -1357,6 +1356,7 @@ typedef struct _cairo_image_span_renderer {
 	    int mask_x, mask_y;
 	} composite;
 	struct finish {
+	    cairo_rectangle_int_t extents;
 	    int src_x, src_y;
 	    int stride;
 	    uint8_t *data;
@@ -1379,8 +1379,8 @@ _cairo_image_spans (void *abstract_renderer,
     if (num_spans == 0)
 	return CAIRO_STATUS_SUCCESS;
 
-    mask = r->u.mask.data + (y - r->extents.y) * r->u.mask.stride;
-    mask += spans[0].x - r->extents.x;
+    mask = r->u.mask.data + (y - r->u.mask.extents.y) * r->u.mask.stride;
+    mask += spans[0].x - r->u.mask.extents.x;
     row = mask;
 
     do {
@@ -1415,21 +1415,21 @@ _cairo_image_spans_and_zero (void *abstract_renderer,
     int len;
 
     mask = r->u.mask.data;
-    if (y > r->extents.y) {
-	len = (y - r->extents.y) * r->u.mask.stride;
+    if (y > r->u.mask.extents.y) {
+	len = (y - r->u.mask.extents.y) * r->u.mask.stride;
 	memset (mask, 0, len);
 	mask += len;
     }
 
-    r->extents.y = y + height;
+    r->u.mask.extents.y = y + height;
     r->u.mask.data = mask + height * r->u.mask.stride;
     if (num_spans == 0) {
 	memset (mask, 0, height * r->u.mask.stride);
     } else {
 	uint8_t *row = mask;
 
-	if (spans[0].x != r->extents.x) {
-	    len = spans[0].x - r->extents.x;
+	if (spans[0].x != r->u.mask.extents.x) {
+	    len = spans[0].x - r->u.mask.extents.x;
 	    memset (row, 0, len);
 	    row += len;
 	}
@@ -1444,15 +1444,15 @@ _cairo_image_spans_and_zero (void *abstract_renderer,
 	    spans++;
 	} while (--num_spans > 1);
 
-	if (spans[0].x != r->extents.x + r->extents.width) {
-	    len = r->extents.x + r->extents.width - spans[0].x;
+	if (spans[0].x != r->u.mask.extents.x + r->u.mask.extents.width) {
+	    len = r->u.mask.extents.x + r->u.mask.extents.width - spans[0].x;
 	    memset (row, 0, len);
 	}
 
 	row = mask;
 	while (--height) {
 	    mask += r->u.mask.stride;
-	    memcpy (mask, row, r->extents.width);
+	    memcpy (mask, row, r->u.mask.extents.width);
 	}
     }
 
@@ -1464,8 +1464,8 @@ _cairo_image_finish_spans_and_zero (void *abstract_renderer)
 {
     cairo_image_span_renderer_t *r = abstract_renderer;
 
-    if (r->extents.y < r->extents.height)
-	memset (r->u.mask.data, 0, (r->extents.height - r->extents.y) * r->u.mask.stride);
+    if (r->u.mask.extents.y < r->u.mask.extents.height)
+	memset (r->u.mask.data, 0, (r->u.mask.extents.height - r->u.mask.extents.y) * r->u.mask.stride);
 
     return CAIRO_STATUS_SUCCESS;
 }
@@ -2134,6 +2134,52 @@ _blit_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
     return CAIRO_STATUS_SUCCESS;
 }
 
+static cairo_status_t
+_inplace_spans (void *abstract_renderer,
+		int y, int h,
+		const cairo_half_open_span_t *spans,
+		unsigned num_spans)
+{
+    cairo_image_span_renderer_t *r = abstract_renderer;
+    uint8_t *mask;
+    int x0, x1;
+
+    if (num_spans == 0)
+	return CAIRO_STATUS_SUCCESS;
+
+    if (num_spans == 2 && spans[0].coverage == 0xff) {
+	pixman_image_composite32 (r->op, r->src, NULL, r->u.composite.dst,
+				  spans[0].x + r->u.composite.src_x,
+				  y + r->u.composite.src_y,
+				  0, 0,
+				  spans[0].x, y,
+				  spans[1].x - spans[0].x, h);
+	return CAIRO_STATUS_SUCCESS;
+    }
+
+    mask = (uint8_t *)pixman_image_get_data (r->mask);
+    x0 = spans[0].x;
+    do {
+	int len = spans[1].x - spans[0].x;
+	*mask++ = spans[0].coverage;
+	if (len > 1) {
+	    memset (mask, spans[0].coverage, --len);
+	    mask += len;
+	}
+	x1 = spans[1].x;
+	spans++;
+    } while (--num_spans > 1);
+
+    pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst,
+			      x0 + r->u.composite.src_x,
+			      y + r->u.composite.src_y,
+			      0, 0,
+			      x0, y,
+			      x1 - x0, h);
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
 static cairo_int_status_t
 inplace_renderer_init (cairo_image_span_renderer_t	*r,
 		       const cairo_composite_rectangles_t *composite,
@@ -2222,8 +2268,53 @@ inplace_renderer_init (cairo_image_span_renderer_t	*r,
 	    r->base.render_rows = _blit_xrgb32_lerp_spans;
 	}
     }
-    if (r->base.render_rows == NULL)
-	return CAIRO_INT_STATUS_UNSUPPORTED;
+    if (r->base.render_rows == NULL) {
+	unsigned int width;
+
+	if (r->op != 0xff)
+	    return CAIRO_INT_STATUS_UNSUPPORTED;
+
+	if (composite->is_bounded == 0)
+	    return CAIRO_INT_STATUS_UNSUPPORTED;
+
+	if (dst->base.is_clear &&
+	    (composite->op == CAIRO_OPERATOR_SOURCE ||
+	     composite->op == CAIRO_OPERATOR_OVER ||
+	     composite->op == CAIRO_OPERATOR_ADD)) {
+	    r->op = PIXMAN_OP_SRC;
+	} else {
+	    if (composite->op == CAIRO_OPERATOR_SOURCE ||
+		composite->op == CAIRO_OPERATOR_CLEAR)
+		return CAIRO_INT_STATUS_UNSUPPORTED;
+
+	    r->op = _pixman_operator (composite->op);
+	}
+
+	width = (composite->bounded.width + 3) & ~3;
+	if (width > sizeof (r->buf))
+	    return CAIRO_INT_STATUS_UNSUPPORTED;
+
+	r->src = _pixman_image_for_pattern (dst,
+					    &composite->source_pattern.base, FALSE,
+					    &composite->bounded,
+					    &composite->source_sample_area,
+					    &r->u.composite.src_x, &r->u.composite.src_y);
+	if (unlikely (r->src == NULL))
+	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+
+	/* Create an effectively unbounded mask by repeating the single line */
+	r->mask = pixman_image_create_bits (PIXMAN_a8,
+					    composite->bounded.width,
+					    composite->bounded.height,
+					    (uint32_t *)r->buf, 0);
+	if (unlikely (r->mask == NULL)) {
+	    pixman_image_unref (r->src);
+	    return _cairo_error(CAIRO_STATUS_NO_MEMORY);
+	}
+
+	r->u.composite.dst = dst->pixman_image;
+	r->base.render_rows = _inplace_spans;
+    }
 
     r->base.finish = NULL;
     r->bpp = PIXMAN_FORMAT_BPP(dst->pixman_format);
@@ -2325,20 +2416,20 @@ span_renderer_init (cairo_abstract_span_renderer_t	*_r,
 	}
     }
 
-    r->extents = composite->unbounded;
-    r->u.mask.stride = (r->extents.width + 3) & ~3;
-    if (r->extents.height * r->u.mask.stride > (int)sizeof (r->buf)) {
+    r->u.mask.extents = composite->unbounded;
+    r->u.mask.stride = (r->u.mask.extents.width + 3) & ~3;
+    if (r->u.mask.extents.height * r->u.mask.stride > (int)sizeof (r->buf)) {
 	r->mask = pixman_image_create_bits (PIXMAN_a8,
-					    r->extents.width,
-					    r->extents.height,
+					    r->u.mask.extents.width,
+					    r->u.mask.extents.height,
 					    NULL, 0);
 
 	r->base.render_rows = _cairo_image_spans;
 	r->base.finish = NULL;
     } else {
 	r->mask = pixman_image_create_bits (PIXMAN_a8,
-					    r->extents.width,
-					    r->extents.height,
+					    r->u.mask.extents.width,
+					    r->u.mask.extents.height,
 					    (uint32_t *)r->buf, r->u.mask.stride);
 
 	r->base.render_rows = _cairo_image_spans_and_zero;
@@ -2350,7 +2441,7 @@ span_renderer_init (cairo_abstract_span_renderer_t	*_r,
     r->u.mask.data = (uint8_t *) pixman_image_get_data (r->mask);
     r->u.mask.stride = pixman_image_get_stride (r->mask);
 
-    r->extents.height += r->extents.y;
+    r->u.mask.extents.height += r->u.mask.extents.y;
     return CAIRO_STATUS_SUCCESS;
 }
 
commit c19bf1499a95bed5fb1be145eefd8e0c6ee2a634
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Mar 5 10:06:49 2012 +0000

    image: Add a simple inplace blitter for spans
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/cairo-image-compositor.c b/src/cairo-image-compositor.c
index e0911df..16b5a11 100644
--- a/src/cairo-image-compositor.c
+++ b/src/cairo-image-compositor.c
@@ -1773,13 +1773,13 @@ mono_renderer_init (cairo_image_span_renderer_t	*r,
 						 &tx, &ty) &&
 	    composite->bounded.x + tx >= 0 &&
 	    composite->bounded.y + ty >= 0 &&
-	    composite->bounded.x + composite->bounded.width + tx <= src->width &&
+	    composite->bounded.x + composite->bounded.width +  tx <= src->width &&
 	    composite->bounded.y + composite->bounded.height + ty <= src->height) {
 
 	    r->u.blit.stride = dst->stride;
 	    r->u.blit.data = dst->data;
 	    r->u.blit.src_stride = src->stride;
-	    r->u.blit.src_data = src->data + src->stride * ty + tx * PIXMAN_FORMAT_BPP(src->format)/8;
+	    r->u.blit.src_data = src->data + src->stride * ty + tx * 4;
 	    r->base.render_rows = _blit_spans;
 	}
     }
@@ -1812,42 +1812,38 @@ mono_renderer_init (cairo_image_span_renderer_t	*r,
 #define RB_ONE_HALF 0x007f007f
 #define RB_MASK_PLUS_ONE 0x01000100
 #define G_SHIFT 8
-#define UNc_rb_MUL_UNc(x, a, t)						\
-    do {								\
-	t  = ((x) & RB_MASK) * (a);					\
-	t += RB_ONE_HALF;						\
-	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;		\
-	x &= RB_MASK;							\
-    } while (0)
-#define UNc_rb_ADD_UNc_rb(x, y, t)					\
-    do {								\
-	t = ((x) + (y));						\
-	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);		\
-	x = (t & RB_MASK);						\
-    } while (0)
+static inline uint32_t
+mul8x2_8 (uint32_t a, uint8_t b)
+{
+    uint32_t t = (a & RB_MASK) * b + RB_ONE_HALF;
+    return ((t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT) & RB_MASK;
+}
+
+static inline uint32_t
+add8x2_8x2 (uint32_t a, uint32_t b)
+{
+    uint32_t t = a + b;
+    t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);
+    return t & RB_MASK;
+}
+
 static inline uint8_t
-mul8 (uint8_t a, uint8_t b)
+mul8_8 (uint8_t a, uint8_t b)
 {
     uint16_t t = a * (uint16_t)b + ONE_HALF;
     return ((t >> G_SHIFT) + t) >> G_SHIFT;
 }
+
 static inline uint32_t
 lerp8x4 (uint32_t src, uint8_t a, uint32_t dst)
 {
     uint8_t ia = ~a;
-    uint32_t r1, r2, r3, t;
+    uint32_t r1, r2;
 
-    r1 = src;
-    r2 = dst;
-    UNc_rb_MUL_UNc (r1, a, t);
-    UNc_rb_MUL_UNc (r2, ia, t);
-    UNc_rb_ADD_UNc_rb (r1, r2, t);
-
-    r2 = src >> G_SHIFT;
-    r3 = dst >> G_SHIFT;
-    UNc_rb_MUL_UNc (r2, a, t);
-    UNc_rb_MUL_UNc (r3, ia, t);
-    UNc_rb_ADD_UNc_rb (r2, r3, t);
+    r1 = add8x2_8x2 (mul8x2_8 (src, a),
+		     mul8x2_8 (dst, ia));
+    r2 = add8x2_8x2 (mul8x2_8 (src >> G_SHIFT, a),
+		     mul8x2_8 (dst >> G_SHIFT, ia));
 
     return r1 | (r2 << G_SHIFT);
 }
@@ -1995,7 +1991,7 @@ _fill_a8_lerp_spans (void *abstract_renderer, int y, int h,
 
     if (likely(h == 1)) {
 	do {
-	    uint8_t a = mul8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
 	    if (a) {
 		int len = spans[1].x - spans[0].x;
 		uint8_t *d = r->u.fill.data + r->u.fill.stride*y + spans[0].x;
@@ -2010,7 +2006,7 @@ _fill_a8_lerp_spans (void *abstract_renderer, int y, int h,
 	} while (--num_spans > 1);
     } else {
 	do {
-	    uint8_t a = mul8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
 	    if (a) {
 		int yy = y, hh = h;
 		uint16_t p = (uint16_t)a * r->u.fill.pixel + 0x7f;
@@ -2043,7 +2039,7 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
 
     if (likely(h == 1)) {
 	do {
-	    uint8_t a = mul8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
 	    if (a) {
 		int len = spans[1].x - spans[0].x;
 		uint32_t *d = (uint32_t*)(r->u.fill.data + r->u.fill.stride*y + spans[0].x*4);
@@ -2056,7 +2052,7 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
 	} while (--num_spans > 1);
     } else {
 	do {
-	    uint8_t a = mul8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
 	    if (a) {
 		int yy = y, hh = h;
 		do {
@@ -2076,6 +2072,68 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
     return CAIRO_STATUS_SUCCESS;
 }
 
+static cairo_status_t
+_blit_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
+			 const cairo_half_open_span_t *spans, unsigned num_spans)
+{
+    cairo_image_span_renderer_t *r = abstract_renderer;
+
+    if (num_spans == 0)
+	return CAIRO_STATUS_SUCCESS;
+
+    if (likely(h == 1)) {
+	uint8_t *src = r->u.blit.src_data + y*r->u.blit.src_stride;
+	uint8_t *dst = r->u.blit.data + y*r->u.blit.stride;
+	do {
+	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
+	    if (a) {
+		uint32_t *s = (uint32_t*)src + spans[0].x;
+		uint32_t *d = (uint32_t*)dst + spans[0].x;
+		int len = spans[1].x - spans[0].x;
+		if (a == 0xff) {
+		    if (len == 1)
+			*d = *s;
+		    else
+			memcpy(d, s, len*4);
+		} else {
+		    while (len--) {
+			*d = lerp8x4 (*s, a, *d);
+			s++, d++;
+		    }
+		}
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    } else {
+	do {
+	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
+	    if (a) {
+		int yy = y, hh = h;
+		do {
+		    uint32_t *s = (uint32_t *)(r->u.blit.src_data + yy*r->u.blit.src_stride + spans[0].x * 4);
+		    uint32_t *d = (uint32_t *)(r->u.blit.data + yy*r->u.blit.stride + spans[0].x * 4);
+		    int len = spans[1].x - spans[0].x;
+		    if (a == 0xff) {
+			if (len == 1)
+			    *d = *s;
+			else
+			    memcpy(d, s, len * 4);
+		    } else {
+			while (len--) {
+			    *d = lerp8x4 (*s, a, *d);
+			    s++, d++;
+			}
+		    }
+		    yy++;
+		} while (--hh);
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    }
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
 static cairo_int_status_t
 inplace_renderer_init (cairo_image_span_renderer_t	*r,
 		       const cairo_composite_rectangles_t *composite,
@@ -2137,6 +2195,32 @@ inplace_renderer_init (cairo_image_span_renderer_t	*r,
 	    r->u.fill.data = dst->data;
 	    r->u.fill.stride = dst->stride;
 	}
+    } else if ((dst->format == CAIRO_FORMAT_ARGB32 || dst->format == CAIRO_FORMAT_RGB24) &&
+	       (composite->op == CAIRO_OPERATOR_SOURCE ||
+		(composite->op == CAIRO_OPERATOR_OVER &&
+		 (dst->base.is_clear || (dst->base.content & CAIRO_CONTENT_ALPHA) == 0))) &&
+	       composite->source_pattern.base.type == CAIRO_PATTERN_TYPE_SURFACE &&
+	       composite->source_pattern.surface.surface->backend->type == CAIRO_SURFACE_TYPE_IMAGE &&
+	       to_image_surface(composite->source_pattern.surface.surface)->format == dst->format)
+    {
+       cairo_image_surface_t *src =
+	   to_image_surface(composite->source_pattern.surface.surface);
+       int tx, ty;
+
+	if (_cairo_matrix_is_integer_translation(&composite->source_pattern.base.matrix,
+						 &tx, &ty) &&
+	    composite->bounded.x + tx >= 0 &&
+	    composite->bounded.y + ty >= 0 &&
+	    composite->bounded.x + composite->bounded.width + tx <= src->width &&
+	    composite->bounded.y + composite->bounded.height + ty <= src->height) {
+
+	    assert(PIXMAN_FORMAT_BPP(dst->pixman_format) == 32);
+	    r->u.blit.stride = dst->stride;
+	    r->u.blit.data = dst->data;
+	    r->u.blit.src_stride = src->stride;
+	    r->u.blit.src_data = src->data + src->stride * ty + tx * 4;
+	    r->base.render_rows = _blit_xrgb32_lerp_spans;
+	}
     }
     if (r->base.render_rows == NULL)
 	return CAIRO_INT_STATUS_UNSUPPORTED;
diff --git a/test/reference/recording-surface-extend-none.rgb24.ref.png b/test/reference/recording-surface-extend-none.rgb24.ref.png
index 3481673..bd84338 100644
Binary files a/test/reference/recording-surface-extend-none.rgb24.ref.png and b/test/reference/recording-surface-extend-none.rgb24.ref.png differ
diff --git a/test/reference/recording-surface-over.rgb24.ref.png b/test/reference/recording-surface-over.rgb24.ref.png
index 3481673..bd84338 100644
Binary files a/test/reference/recording-surface-over.rgb24.ref.png and b/test/reference/recording-surface-over.rgb24.ref.png differ
commit 9f83ac5c63684d6576fcbd6c902ee127c457f724
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Mar 5 05:15:52 2012 +0000

    image: Perform the simple solid-fill spans inplace
    
    Reducing the number of passes has the usual change in the antialiasing
    side-effects, as well as the boon of being faster (and theorectically more
    accurate through reduced loss of dynamic range.)
    
    On an i5-2520m:
        swfdec-giant-steps-full   3240.43  ->  2651.36:  1.22x speedup
                 grads-heat-map    166.84  ->   136.79:  1.22x speedup
             swfdec-giant-steps    940.19  ->   796.24:  1.18x speedup
                      ocitysmap    953.51  ->   831.96:  1.15x speedup
            webkit-canvas-alpha  13924.01  -> 13115.70:  1.06x speedup
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/cairo-image-compositor.c b/src/cairo-image-compositor.c
index 0f8142b..e0911df 100644
--- a/src/cairo-image-compositor.c
+++ b/src/cairo-image-compositor.c
@@ -1740,13 +1740,16 @@ mono_renderer_init (cairo_image_span_renderer_t	*r,
 	const cairo_color_t *color;
 
 	color = &composite->source_pattern.solid.color;
+	if (composite->op == CAIRO_OPERATOR_CLEAR)
+	    color = CAIRO_COLOR_TRANSPARENT;
+
 	if (fill_reduces_to_source (composite->op, color, dst) &&
 	    color_to_pixel (color, dst->pixman_format, &r->u.fill.pixel)) {
 	    /* Use plain C for the fill operations as the span length is
 	     * typically small, too small to payback the startup overheads of
 	     * using SSE2 etc.
 	     */
-	    switch (r->bpp) {
+	    switch (PIXMAN_FORMAT_BPP(dst->pixman_format)) {
 	    case 8: r->base.render_rows = _fill8_spans; break;
 	    case 16: r->base.render_rows = _fill16_spans; break;
 	    case 32: r->base.render_rows = _fill32_spans; break;
@@ -1804,6 +1807,346 @@ mono_renderer_init (cairo_image_span_renderer_t	*r,
     return CAIRO_INT_STATUS_SUCCESS;
 }
 
+#define ONE_HALF 0x7f
+#define RB_MASK 0x00ff00ff
+#define RB_ONE_HALF 0x007f007f
+#define RB_MASK_PLUS_ONE 0x01000100
+#define G_SHIFT 8
+#define UNc_rb_MUL_UNc(x, a, t)						\
+    do {								\
+	t  = ((x) & RB_MASK) * (a);					\
+	t += RB_ONE_HALF;						\
+	x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;		\
+	x &= RB_MASK;							\
+    } while (0)
+#define UNc_rb_ADD_UNc_rb(x, y, t)					\
+    do {								\
+	t = ((x) + (y));						\
+	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);		\
+	x = (t & RB_MASK);						\
+    } while (0)
+static inline uint8_t
+mul8 (uint8_t a, uint8_t b)
+{
+    uint16_t t = a * (uint16_t)b + ONE_HALF;
+    return ((t >> G_SHIFT) + t) >> G_SHIFT;
+}
+static inline uint32_t
+lerp8x4 (uint32_t src, uint8_t a, uint32_t dst)
+{
+    uint8_t ia = ~a;
+    uint32_t r1, r2, r3, t;
+
+    r1 = src;
+    r2 = dst;
+    UNc_rb_MUL_UNc (r1, a, t);
+    UNc_rb_MUL_UNc (r2, ia, t);
+    UNc_rb_ADD_UNc_rb (r1, r2, t);
+
+    r2 = src >> G_SHIFT;
+    r3 = dst >> G_SHIFT;
+    UNc_rb_MUL_UNc (r2, a, t);
+    UNc_rb_MUL_UNc (r3, ia, t);
+    UNc_rb_ADD_UNc_rb (r2, r3, t);
+
+    return r1 | (r2 << G_SHIFT);
+}
+
+static cairo_status_t
+_fill_a8_lerp_opaque_spans (void *abstract_renderer, int y, int h,
+			    const cairo_half_open_span_t *spans, unsigned num_spans)
+{
+    cairo_image_span_renderer_t *r = abstract_renderer;
+
+    if (num_spans == 0)
+	return CAIRO_STATUS_SUCCESS;
+
+    if (likely(h == 1)) {
+	uint8_t *d = r->u.fill.data + r->u.fill.stride*y;
+	do {
+	    uint8_t a = spans[0].coverage;
+	    if (a) {
+		int len = spans[1].x - spans[0].x;
+		if (a == 0xff) {
+		    memset(d + spans[0].x, r->u.fill.pixel, len);
+		} else {
+		    uint16_t p = (uint16_t)a * r->u.fill.pixel + 0x7f;
+		    uint16_t ia = ~a;
+		    uint8_t *dst = d + spans[0].x;
+		    while (len--) {
+			uint16_t t = *dst*ia + p;
+			*dst++ = (t + (t>>8)) >> 8;
+		    }
+		}
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    } else {
+	do {
+	    uint8_t a = spans[0].coverage;
+	    if (a) {
+		int yy = y, hh = h;
+		if (a == 0xff) {
+		    do {
+			int len = spans[1].x - spans[0].x;
+			uint8_t *d = r->u.fill.data + r->u.fill.stride*yy + spans[0].x;
+			memset(d, r->u.fill.pixel, len);
+			yy++;
+		    } while (--hh);
+		} else {
+		    uint16_t p = (uint16_t)a * r->u.fill.pixel + 0x7f;
+		    uint16_t ia = ~a;
+		    do {
+			int len = spans[1].x - spans[0].x;
+			uint8_t *d = r->u.fill.data + r->u.fill.stride*yy + spans[0].x;
+			while (len--) {
+			    uint16_t t = *d*ia + p;
+			    *d++ = (t + (t>>8)) >> 8;
+			}
+			yy++;
+		    } while (--hh);
+		}
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    }
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
+static cairo_status_t
+_fill_xrgb32_lerp_opaque_spans (void *abstract_renderer, int y, int h,
+				const cairo_half_open_span_t *spans, unsigned num_spans)
+{
+    cairo_image_span_renderer_t *r = abstract_renderer;
+
+    if (num_spans == 0)
+	return CAIRO_STATUS_SUCCESS;
+
+    if (likely(h == 1)) {
+	do {
+	    uint8_t a = spans[0].coverage;
+	    if (a) {
+		int len = spans[1].x - spans[0].x;
+		uint32_t *d = (uint32_t*)(r->u.fill.data + r->u.fill.stride*y + spans[0].x*4);
+		if (a == 0xff) {
+		    if (len > 31) {
+			pixman_fill ((uint32_t *)r->u.fill.data, r->u.fill.stride / sizeof(uint32_t), r->bpp,
+				     spans[0].x, y, len, 1, r->u.fill.pixel);
+		    } else {
+			uint32_t *d = (uint32_t*)(r->u.fill.data + r->u.fill.stride*y + spans[0].x*4);
+			while (len--)
+			    *d++ = r->u.fill.pixel;
+		    }
+		} else while (len--) {
+		    *d = lerp8x4 (r->u.fill.pixel, a, *d);
+		    d++;
+		}
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    } else {
+	do {
+	    uint8_t a = spans[0].coverage;
+	    if (a) {
+		if (a == 0xff) {
+		    if (spans[1].x - spans[0].x > 16) {
+			pixman_fill ((uint32_t *)r->u.fill.data, r->u.fill.stride / sizeof(uint32_t), r->bpp,
+				     spans[0].x, y, spans[1].x - spans[0].x, h,
+				     r->u.fill.pixel);
+		    } else {
+			int yy = y, hh = h;
+			do {
+			    int len = spans[1].x - spans[0].x;
+			    uint32_t *d = (uint32_t*)(r->u.fill.data + r->u.fill.stride*yy + spans[0].x*4);
+			    while (len--)
+				*d++ = r->u.fill.pixel;
+			    yy++;
+			} while (--hh);
+		    }
+		} else {
+		    int yy = y, hh = h;
+		    do {
+			int len = spans[1].x - spans[0].x;
+			uint32_t *d = (uint32_t *)(r->u.fill.data + r->u.fill.stride*yy + spans[0].x*4);
+			while (len--) {
+			    *d = lerp8x4 (r->u.fill.pixel, a, *d);
+			    d++;
+			}
+			yy++;
+		    } while (--hh);
+		}
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    }
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
+static cairo_status_t
+_fill_a8_lerp_spans (void *abstract_renderer, int y, int h,
+		     const cairo_half_open_span_t *spans, unsigned num_spans)
+{
+    cairo_image_span_renderer_t *r = abstract_renderer;
+
+    if (num_spans == 0)
+	return CAIRO_STATUS_SUCCESS;
+
+    if (likely(h == 1)) {
+	do {
+	    uint8_t a = mul8 (spans[0].coverage, r->op);
+	    if (a) {
+		int len = spans[1].x - spans[0].x;
+		uint8_t *d = r->u.fill.data + r->u.fill.stride*y + spans[0].x;
+		uint16_t p = (uint16_t)a * r->u.fill.pixel + 0x7f;
+		uint16_t ia = ~a;
+		while (len--) {
+		    uint16_t t = *d*ia + p;
+		    *d++ = (t + (t>>8)) >> 8;
+		}
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    } else {
+	do {
+	    uint8_t a = mul8 (spans[0].coverage, r->op);
+	    if (a) {
+		int yy = y, hh = h;
+		uint16_t p = (uint16_t)a * r->u.fill.pixel + 0x7f;
+		uint16_t ia = ~a;
+		do {
+		    int len = spans[1].x - spans[0].x;
+		    uint8_t *d = r->u.fill.data + r->u.fill.stride*yy + spans[0].x;
+		    while (len--) {
+			uint16_t t = *d*ia + p;
+			*d++ = (t + (t>>8)) >> 8;
+		    }
+		    yy++;
+		} while (--hh);
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    }
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
+static cairo_status_t
+_fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
+			 const cairo_half_open_span_t *spans, unsigned num_spans)
+{
+    cairo_image_span_renderer_t *r = abstract_renderer;
+
+    if (num_spans == 0)
+	return CAIRO_STATUS_SUCCESS;
+
+    if (likely(h == 1)) {
+	do {
+	    uint8_t a = mul8 (spans[0].coverage, r->op);
+	    if (a) {
+		int len = spans[1].x - spans[0].x;
+		uint32_t *d = (uint32_t*)(r->u.fill.data + r->u.fill.stride*y + spans[0].x*4);
+		while (len--) {
+		    *d = lerp8x4 (r->u.fill.pixel, a, *d);
+		    d++;
+		}
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    } else {
+	do {
+	    uint8_t a = mul8 (spans[0].coverage, r->op);
+	    if (a) {
+		int yy = y, hh = h;
+		do {
+		    int len = spans[1].x - spans[0].x;
+		    uint32_t *d = (uint32_t *)(r->u.fill.data + r->u.fill.stride*yy + spans[0].x*4);
+		    while (len--) {
+			*d = lerp8x4 (r->u.fill.pixel, a, *d);
+			d++;
+		    }
+		    yy++;
+		} while (--hh);
+	    }
+	    spans++;
+	} while (--num_spans > 1);
+    }
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
+static cairo_int_status_t
+inplace_renderer_init (cairo_image_span_renderer_t	*r,
+		       const cairo_composite_rectangles_t *composite,
+		       cairo_antialias_t		 antialias,
+		       cairo_bool_t			 needs_clip)
+{
+    cairo_image_surface_t *dst = (cairo_image_surface_t *)composite->surface;
+
+    if (composite->mask_pattern.base.type != CAIRO_PATTERN_TYPE_SOLID)
+	return CAIRO_INT_STATUS_UNSUPPORTED;
+
+    r->base.render_rows = NULL;
+    r->op = composite->mask_pattern.solid.color.alpha_short >> 8;
+
+    if (composite->source_pattern.base.type == CAIRO_PATTERN_TYPE_SOLID) {
+	const cairo_color_t *color;
+
+	color = &composite->source_pattern.solid.color;
+	if (composite->op == CAIRO_OPERATOR_CLEAR)
+	    color = CAIRO_COLOR_TRANSPARENT;
+
+	if (fill_reduces_to_source (composite->op, color, dst) &&
+	    color_to_pixel (color, dst->pixman_format, &r->u.fill.pixel)) {
+	    /* Use plain C for the fill operations as the span length is
+	     * typically small, too small to payback the startup overheads of
+	     * using SSE2 etc.
+	     */
+	    if (r->op == 0xff) {
+		switch (dst->format) {
+		case CAIRO_FORMAT_A8:
+		    r->base.render_rows = _fill_a8_lerp_opaque_spans;
+		    break;
+		case CAIRO_FORMAT_RGB24:
+		case CAIRO_FORMAT_ARGB32:
+		    r->base.render_rows = _fill_xrgb32_lerp_opaque_spans;
+		    break;
+		case CAIRO_FORMAT_A1:
+		case CAIRO_FORMAT_RGB16_565:
+		case CAIRO_FORMAT_RGB30:
+		case CAIRO_FORMAT_INVALID:
+		default: break;
+		}
+	    } else {
+		switch (dst->format) {
+		case CAIRO_FORMAT_A8:
+		    r->base.render_rows = _fill_a8_lerp_spans;
+		    break;
+		case CAIRO_FORMAT_RGB24:
+		case CAIRO_FORMAT_ARGB32:
+		    r->base.render_rows = _fill_xrgb32_lerp_spans;
+		    break;
+		case CAIRO_FORMAT_A1:
+		case CAIRO_FORMAT_RGB16_565:
+		case CAIRO_FORMAT_RGB30:
+		case CAIRO_FORMAT_INVALID:
+		default: break;
+		}
+	    }
+	    r->u.fill.data = dst->data;
+	    r->u.fill.stride = dst->stride;
+	}
+    }
+    if (r->base.render_rows == NULL)
+	return CAIRO_INT_STATUS_UNSUPPORTED;
+
+    r->base.finish = NULL;
+    r->bpp = PIXMAN_FORMAT_BPP(dst->pixman_format);
+
+    return CAIRO_INT_STATUS_SUCCESS;
+}
+
 static cairo_int_status_t
 span_renderer_init (cairo_abstract_span_renderer_t	*_r,
 		    const cairo_composite_rectangles_t *composite,
@@ -1829,6 +2172,10 @@ span_renderer_init (cairo_abstract_span_renderer_t	*_r,
     if (status != CAIRO_INT_STATUS_UNSUPPORTED)
 	return status;
 
+    status = inplace_renderer_init (r, composite, antialias, needs_clip);
+    if (status != CAIRO_INT_STATUS_UNSUPPORTED)
+	return status;
+
     r->bpp = 0;
 
     if (op == CAIRO_OPERATOR_CLEAR) {
diff --git a/test/reference/clip-operator.argb32.ref.png b/test/reference/clip-operator.argb32.ref.png
index 5ab9631..9c90984 100644
Binary files a/test/reference/clip-operator.argb32.ref.png and b/test/reference/clip-operator.argb32.ref.png differ
diff --git a/test/reference/clip-operator.rgb24.ref.png b/test/reference/clip-operator.rgb24.ref.png
index bcf474d..63a0758 100644
Binary files a/test/reference/clip-operator.rgb24.ref.png and b/test/reference/clip-operator.rgb24.ref.png differ
diff --git a/test/reference/fallback.argb32.ref.png b/test/reference/fallback.argb32.ref.png
index b7ce573..32386d5 100644
Binary files a/test/reference/fallback.argb32.ref.png and b/test/reference/fallback.argb32.ref.png differ
diff --git a/test/reference/fallback.rgb24.ref.png b/test/reference/fallback.rgb24.ref.png
index 16d3c14..6d728ab 100644
Binary files a/test/reference/fallback.rgb24.ref.png and b/test/reference/fallback.rgb24.ref.png differ
diff --git a/test/reference/hatchings.ref.png b/test/reference/hatchings.ref.png
index 7f367a1..d4c18b4 100644
Binary files a/test/reference/hatchings.ref.png and b/test/reference/hatchings.ref.png differ
diff --git a/test/reference/operator-source.argb32.ref.png b/test/reference/operator-source.argb32.ref.png
index 74ad1da..ca3d18c 100644
Binary files a/test/reference/operator-source.argb32.ref.png and b/test/reference/operator-source.argb32.ref.png differ
diff --git a/test/reference/operator-source.rgb24.ref.png b/test/reference/operator-source.rgb24.ref.png
index c003356..8109d9c 100644
Binary files a/test/reference/operator-source.rgb24.ref.png and b/test/reference/operator-source.rgb24.ref.png differ
diff --git a/test/reference/overlapping-boxes.argb32.ref.png b/test/reference/overlapping-boxes.argb32.ref.png
index 1c428e1..af2352a 100644
Binary files a/test/reference/overlapping-boxes.argb32.ref.png and b/test/reference/overlapping-boxes.argb32.ref.png differ
diff --git a/test/reference/overlapping-boxes.rgb24.ref.png b/test/reference/overlapping-boxes.rgb24.ref.png
index 58ec73c..76d9566 100644
Binary files a/test/reference/overlapping-boxes.rgb24.ref.png and b/test/reference/overlapping-boxes.rgb24.ref.png differ
diff --git a/test/reference/recording-surface-extend-none.rgb24.ref.png b/test/reference/recording-surface-extend-none.rgb24.ref.png
index bd84338..3481673 100644
Binary files a/test/reference/recording-surface-extend-none.rgb24.ref.png and b/test/reference/recording-surface-extend-none.rgb24.ref.png differ
diff --git a/test/reference/recording-surface-over.rgb24.ref.png b/test/reference/recording-surface-over.rgb24.ref.png
index bd84338..3481673 100644
Binary files a/test/reference/recording-surface-over.rgb24.ref.png and b/test/reference/recording-surface-over.rgb24.ref.png differ
commit e572ae253a9fe62ba5d61bc6e98c9efc502d7414
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Mar 5 11:49:50 2012 +0000

    spans: Handle fallbacks from upload-boxes by reverting to the normal composite
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/cairo-spans-compositor.c b/src/cairo-spans-compositor.c
index 86d9896..e9fedcd 100644
--- a/src/cairo-spans-compositor.c
+++ b/src/cairo-spans-compositor.c
@@ -538,6 +538,7 @@ composite_aligned_boxes (const cairo_spans_compositor_t		*compositor,
 	return status;
     }
 
+    status = CAIRO_INT_STATUS_UNSUPPORTED;
     if (! need_clip_mask && no_mask && source->type == CAIRO_PATTERN_TYPE_SOLID) {
 	const cairo_color_t *color;
 
@@ -547,7 +548,8 @@ composite_aligned_boxes (const cairo_spans_compositor_t		*compositor,
 	status = compositor->fill_boxes (dst, op, color, boxes);
     } else if (inplace && source->type == CAIRO_PATTERN_TYPE_SURFACE) {
 	status = upload_boxes (compositor, extents, boxes);
-    } else {
+    }
+    if (status == CAIRO_INT_STATUS_UNSUPPORTED) {
 	cairo_surface_t *src;
 	cairo_surface_t *mask = NULL;
 	int src_x, src_y;


More information about the cairo-commit mailing list