[cairo-commit] src/cairo-image-compositor.c src/cairo-spans-compositor-private.h

Chris Wilson ickle at kemper.freedesktop.org
Sat Jan 26 08:14:12 PST 2013


 src/cairo-image-compositor.c         |  190 +++++++++++++++++++++++++++++------
 src/cairo-spans-compositor-private.h |    2 
 2 files changed, 162 insertions(+), 30 deletions(-)

New commits:
commit c986a7310bb06582b7d8a566d5f007ba4e5e75bf
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 24 08:55:54 2013 +0000

    image: Enable inplace compositing with opacities for general routines
    
    On a SNB i5-2500:
    
    Speedups
    ========
             firefox-chalkboard  34284.16 -> 19637.40:  1.74x speedup
             swfdec-giant-steps    778.35 ->   665.37:  1.17x speedup
                      ocitysmap    485.64 ->   431.94:  1.12x speedup
    
    Slowdowns
    =========
               firefox-fishbowl  46878.98 -> 54407.14:  1.16x slowdown
    
    That slow down is due to overhead of the increased number of calls to
    pixman_image_composite32() (pixman_transform_point for analyzing the
    source extents in particular) outweighing any advantage gained by
    performing the rasterisation in a single pass and eliding gaps. The
    solution that has been floated in the past is for an interface into
    pixman to only perform the analysis once and then to return a kernel to
    use for all spans.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/cairo-image-compositor.c b/src/cairo-image-compositor.c
index 9715250..7e905ce 100644
--- a/src/cairo-image-compositor.c
+++ b/src/cairo-image-compositor.c
@@ -1547,7 +1547,8 @@ typedef struct _cairo_image_span_renderer {
 	    uint8_t *data;
 	} mask;
     } u;
-    uint8_t buf[sizeof(cairo_abstract_span_renderer_t)-128];
+    uint8_t _buf[0];
+#define SZ_BUF (sizeof (cairo_abstract_span_renderer_t) - sizeof (cairo_image_span_renderer_t))
 } cairo_image_span_renderer_t;
 COMPILE_TIME_ASSERT (sizeof (cairo_image_span_renderer_t) <= sizeof (cairo_abstract_span_renderer_t));
 
@@ -2251,7 +2252,7 @@ _fill_a8_lerp_spans (void *abstract_renderer, int y, int h,
 
     if (likely(h == 1)) {
 	do {
-	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
 	    if (a) {
 		int len = spans[1].x - spans[0].x;
 		uint8_t *d = r->u.fill.data + r->u.fill.stride*y + spans[0].x;
@@ -2266,7 +2267,7 @@ _fill_a8_lerp_spans (void *abstract_renderer, int y, int h,
 	} while (--num_spans > 1);
     } else {
 	do {
-	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
 	    if (a) {
 		int yy = y, hh = h;
 		uint16_t p = (uint16_t)a * r->u.fill.pixel + 0x7f;
@@ -2299,7 +2300,7 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
 
     if (likely(h == 1)) {
 	do {
-	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
 	    if (a) {
 		int len = spans[1].x - spans[0].x;
 		uint32_t *d = (uint32_t*)(r->u.fill.data + r->u.fill.stride*y + spans[0].x*4);
@@ -2312,7 +2313,7 @@ _fill_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
 	} while (--num_spans > 1);
     } else {
 	do {
-	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
 	    if (a) {
 		int yy = y, hh = h;
 		do {
@@ -2345,7 +2346,7 @@ _blit_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
 	uint8_t *src = r->u.blit.src_data + y*r->u.blit.src_stride;
 	uint8_t *dst = r->u.blit.data + y*r->u.blit.stride;
 	do {
-	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
 	    if (a) {
 		uint32_t *s = (uint32_t*)src + spans[0].x;
 		uint32_t *d = (uint32_t*)dst + spans[0].x;
@@ -2366,7 +2367,7 @@ _blit_xrgb32_lerp_spans (void *abstract_renderer, int y, int h,
 	} while (--num_spans > 1);
     } else {
 	do {
-	    uint8_t a = mul8_8 (spans[0].coverage, r->op);
+	    uint8_t a = mul8_8 (spans[0].coverage, r->bpp);
 	    if (a) {
 		int yy = y, hh = h;
 		do {
@@ -2441,7 +2442,7 @@ _inplace_spans (void *abstract_renderer,
 		mask = (uint8_t *)pixman_image_get_data (r->mask);
 		x0 = spans[1].x;
 	    } else if (spans[0].coverage == 0x0) {
-		if (x1 != x0) {
+		if (x1 - x0 > r->u.composite.run_length) {
 		    pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst,
 					      x0 + r->u.composite.src_x,
 					      y + r->u.composite.src_y,
@@ -2473,8 +2474,58 @@ _inplace_spans (void *abstract_renderer,
 }
 
 static cairo_status_t
-_inplace_src_spans (void *abstract_renderer,
-		    int y, int h,
+_inplace_opacity_spans (void *abstract_renderer, int y, int h,
+			const cairo_half_open_span_t *spans,
+			unsigned num_spans)
+{
+    cairo_image_span_renderer_t *r = abstract_renderer;
+    uint8_t *mask;
+    int x0, x1;
+
+    if (num_spans == 0)
+	return CAIRO_STATUS_SUCCESS;
+
+    mask = (uint8_t *)pixman_image_get_data (r->mask);
+    x1 = x0 = spans[0].x;
+    do {
+	int len = spans[1].x - spans[0].x;
+	uint8_t m = mul8_8(spans[0].coverage, r->bpp);
+	*mask++ = m;
+	if (len > 1) {
+	    if (m == 0) {
+		if (x1 - x0 > r->u.composite.run_length) {
+		    pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst,
+					      x0 + r->u.composite.src_x,
+					      y + r->u.composite.src_y,
+					      0, 0,
+					      x0, y,
+					      x1 - x0, h);
+		}
+		mask = (uint8_t *)pixman_image_get_data (r->mask);
+		x0 = spans[1].x;
+	    }else {
+		memset (mask, m, --len);
+		mask += len;
+	    }
+	}
+	x1 = spans[1].x;
+	spans++;
+    } while (--num_spans > 1);
+
+    if (x1 != x0) {
+	pixman_image_composite32 (r->op, r->src, r->mask, r->u.composite.dst,
+				  x0 + r->u.composite.src_x,
+				  y + r->u.composite.src_y,
+				  0, 0,
+				  x0, y,
+				  x1 - x0, h);
+    }
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
+static cairo_status_t
+_inplace_src_spans (void *abstract_renderer, int y, int h,
 		    const cairo_half_open_span_t *spans,
 		    unsigned num_spans)
 {
@@ -2486,7 +2537,7 @@ _inplace_src_spans (void *abstract_renderer,
 	return CAIRO_STATUS_SUCCESS;
 
     x0 = spans[0].x;
-    m = r->buf;
+    m = r->_buf;
     do {
 	int len = spans[1].x - spans[0].x;
 	if (len >= r->u.composite.run_length && spans[0].coverage == 0xff) {
@@ -2524,7 +2575,7 @@ _inplace_src_spans (void *abstract_renderer,
 				      spans[0].x, y,
 				      spans[1].x - spans[0].x, h);
 
-	    m = r->buf;
+	    m = r->_buf;
 	    x0 = spans[1].x;
 	} else if (spans[0].coverage == 0x0) {
 	    if (spans[0].x != x0) {
@@ -2553,7 +2604,7 @@ _inplace_src_spans (void *abstract_renderer,
 #endif
 	    }
 
-	    m = r->buf;
+	    m = r->_buf;
 	    x0 = spans[1].x;
 	} else {
 	    *m++ = spans[0].coverage;
@@ -2594,6 +2645,91 @@ _inplace_src_spans (void *abstract_renderer,
     return CAIRO_STATUS_SUCCESS;
 }
 
+static cairo_status_t
+_inplace_src_opacity_spans (void *abstract_renderer, int y, int h,
+			    const cairo_half_open_span_t *spans,
+			    unsigned num_spans)
+{
+    cairo_image_span_renderer_t *r = abstract_renderer;
+    uint8_t *mask;
+    int x0;
+
+    if (num_spans == 0)
+	return CAIRO_STATUS_SUCCESS;
+
+    x0 = spans[0].x;
+    mask = (uint8_t *)pixman_image_get_data (r->mask);
+    do {
+	int len = spans[1].x - spans[0].x;
+	uint8_t m = mul8_8(spans[0].coverage, r->bpp);
+	if (m == 0) {
+	    if (spans[0].x != x0) {
+#if PIXMAN_HAS_OP_LERP
+		pixman_image_composite32 (PIXMAN_OP_LERP_SRC,
+					  r->src, r->mask, r->u.composite.dst,
+					  x0 + r->u.composite.src_x,
+					  y + r->u.composite.src_y,
+					  0, 0,
+					  x0, y,
+					  spans[0].x - x0, h);
+#else
+		pixman_image_composite32 (PIXMAN_OP_OUT_REVERSE,
+					  r->mask, NULL, r->u.composite.dst,
+					  0, 0,
+					  0, 0,
+					  x0, y,
+					  spans[0].x - x0, h);
+		pixman_image_composite32 (PIXMAN_OP_ADD,
+					  r->src, r->mask, r->u.composite.dst,
+					  x0 + r->u.composite.src_x,
+					  y + r->u.composite.src_y,
+					  0, 0,
+					  x0, y,
+					  spans[0].x - x0, h);
+#endif
+	    }
+
+	    mask = (uint8_t *)pixman_image_get_data (r->mask);
+	    x0 = spans[1].x;
+	} else {
+	    *mask++ = m;
+	    if (len > 1) {
+		memset (mask, m, --len);
+		mask += len;
+	    }
+	}
+	spans++;
+    } while (--num_spans > 1);
+
+    if (spans[0].x != x0) {
+#if PIXMAN_HAS_OP_LERP
+	pixman_image_composite32 (PIXMAN_OP_LERP_SRC,
+				  r->src, r->mask, r->u.composite.dst,
+				  x0 + r->u.composite.src_x,
+				  y + r->u.composite.src_y,
+				  0, 0,
+				  x0, y,
+				  spans[0].x - x0, h);
+#else
+	pixman_image_composite32 (PIXMAN_OP_OUT_REVERSE,
+				  r->mask, NULL, r->u.composite.dst,
+				  0, 0,
+				  0, 0,
+				  x0, y,
+				  spans[0].x - x0, h);
+	pixman_image_composite32 (PIXMAN_OP_ADD,
+				  r->src, r->mask, r->u.composite.dst,
+				  x0 + r->u.composite.src_x,
+				  y + r->u.composite.src_y,
+				  0, 0,
+				  x0, y,
+				  spans[0].x - x0, h);
+#endif
+    }
+
+    return CAIRO_STATUS_SUCCESS;
+}
+
 static void free_pixels (pixman_image_t *image, void *data)
 {
 	free (data);
@@ -2612,7 +2748,7 @@ inplace_renderer_init (cairo_image_span_renderer_t	*r,
 	return CAIRO_INT_STATUS_UNSUPPORTED;
 
     r->base.render_rows = NULL;
-    r->op = composite->mask_pattern.solid.color.alpha_short >> 8;
+    r->bpp = composite->mask_pattern.solid.color.alpha_short >> 8;
 
     if (composite->source_pattern.base.type == CAIRO_PATTERN_TYPE_SOLID) {
 	const cairo_color_t *color;
@@ -2627,7 +2763,7 @@ inplace_renderer_init (cairo_image_span_renderer_t	*r,
 	     * typically small, too small to payback the startup overheads of
 	     * using SSE2 etc.
 	     */
-	    if (r->op == 0xff) {
+	    if (r->bpp == 0xff) {
 		switch (dst->format) {
 		case CAIRO_FORMAT_A8:
 		    r->base.render_rows = _fill_a8_lerp_opaque_spans;
@@ -2689,17 +2825,15 @@ inplace_renderer_init (cairo_image_span_renderer_t	*r,
 	}
     }
     if (r->base.render_rows == NULL) {
-	unsigned int width;
 	const cairo_pattern_t *src = &composite->source_pattern.base;
-
-	if (r->op != 0xff)
-	    return CAIRO_INT_STATUS_UNSUPPORTED;
+	unsigned int width;
 
 	if (composite->is_bounded == 0)
 	    return CAIRO_INT_STATUS_UNSUPPORTED;
 
+	r->base.render_rows = r->bpp == 0xff ? _inplace_spans : _inplace_opacity_spans;
 	width = (composite->bounded.width + 3) & ~3;
-	r->base.render_rows = _inplace_spans;
+
 	r->u.composite.run_length = 8;
 	if (src->type == CAIRO_PATTERN_TYPE_LINEAR ||
 	    src->type == CAIRO_PATTERN_TYPE_RADIAL)
@@ -2710,7 +2844,7 @@ inplace_renderer_init (cairo_image_span_renderer_t	*r,
 	     composite->op == CAIRO_OPERATOR_ADD)) {
 	    r->op = PIXMAN_OP_SRC;
 	} else if (composite->op == CAIRO_OPERATOR_SOURCE) {
-	    r->base.render_rows = _inplace_src_spans;
+	    r->base.render_rows = r->bpp == 0xff ? _inplace_src_spans : _inplace_src_opacity_spans;
 	    r->u.composite.mask_y = r->composite->unbounded.y;
 	    width = (composite->unbounded.width + 3) & ~3;
 	} else if (composite->op == CAIRO_OPERATOR_CLEAR) {
@@ -2728,8 +2862,8 @@ inplace_renderer_init (cairo_image_span_renderer_t	*r,
 	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);
 
 	/* Create an effectively unbounded mask by repeating the single line */
-	buf = r->buf;
-	if (width > sizeof (r->buf)) {
+	buf = r->_buf;
+	if (width > SZ_BUF) {
 	    buf = malloc (width);
 	    if (unlikely (buf == NULL)) {
 		pixman_image_unref (r->src);
@@ -2741,19 +2875,17 @@ inplace_renderer_init (cairo_image_span_renderer_t	*r,
 					    (uint32_t *)buf, 0);
 	if (unlikely (r->mask == NULL)) {
 	    pixman_image_unref (r->src);
-	    if (buf != r->buf)
+	    if (buf != r->_buf)
 		free (buf);
 	    return _cairo_error(CAIRO_STATUS_NO_MEMORY);
 	}
 
-	if (buf != r->buf)
+	if (buf != r->_buf)
 	    pixman_image_set_destroy_function (r->mask, free_pixels, buf);
 
 	r->u.composite.dst = dst->pixman_image;
     }
 
-    r->bpp = PIXMAN_FORMAT_BPP(dst->pixman_format);
-
     return CAIRO_INT_STATUS_SUCCESS;
 }
 
@@ -2855,7 +2987,7 @@ span_renderer_init (cairo_abstract_span_renderer_t	*_r,
 
     r->u.mask.extents = composite->unbounded;
     r->u.mask.stride = (r->u.mask.extents.width + 3) & ~3;
-    if (r->u.mask.extents.height * r->u.mask.stride > (int)sizeof (r->buf)) {
+    if (r->u.mask.extents.height * r->u.mask.stride > (int)sizeof (r->_buf)) {
 	r->mask = pixman_image_create_bits (PIXMAN_a8,
 					    r->u.mask.extents.width,
 					    r->u.mask.extents.height,
@@ -2867,7 +2999,7 @@ span_renderer_init (cairo_abstract_span_renderer_t	*_r,
 	r->mask = pixman_image_create_bits (PIXMAN_a8,
 					    r->u.mask.extents.width,
 					    r->u.mask.extents.height,
-					    (uint32_t *)r->buf, r->u.mask.stride);
+					    (uint32_t *)r->_buf, r->u.mask.stride);
 
 	r->base.render_rows = _cairo_image_spans_and_zero;
 	r->base.finish = _cairo_image_finish_spans_and_zero;
diff --git a/src/cairo-spans-compositor-private.h b/src/cairo-spans-compositor-private.h
index d8b94fb..0babebd 100644
--- a/src/cairo-spans-compositor-private.h
+++ b/src/cairo-spans-compositor-private.h
@@ -46,7 +46,7 @@ CAIRO_BEGIN_DECLS
 
 typedef struct _cairo_abstract_span_renderer {
     cairo_span_renderer_t base;
-    char data[2048];
+    char data[4096];
 } cairo_abstract_span_renderer_t;
 
 struct cairo_spans_compositor {


More information about the cairo-commit mailing list