[cairo-commit] 2 commits - perf/cairo-perf.c perf/cairo-perf.h perf/Makefile.am perf/unaligned-clip.c pixman/src

Carl Worth cworth at kemper.freedesktop.org
Tue Jan 9 15:47:30 PST 2007


 perf/Makefile.am      |    9 +
 perf/cairo-perf.c     |    1 
 perf/cairo-perf.h     |    1 
 perf/unaligned-clip.c |   66 ++++++++++++++
 pixman/src/fbmmx.c    |  226 ++++++++++++++++++++++++++++++++++++++++++++++++++
 pixman/src/fbmmx.h    |   43 +++++++++
 pixman/src/fbpict.c   |   98 +++++++++++++++++++++
 7 files changed, 439 insertions(+), 5 deletions(-)

New commits:
diff-tree cf1d95e714d0814e52910c4306d090ef6d989093 (from d5531c4f506caa9ad66fbeef1822a7036d4dd528)
Author: Soeren Sandmann <sandmann at daimi.au.dk>
Date:   Tue Jan 9 15:05:29 2007 -0800

    Add SRC and IN implementations to avoid CompositeGeneral in some cases hit by PDF rendering
    
    The patch implements a few more operations with special cases MMX
    code. On my laptop, applying the patch to cairo speeds up the
    benchmark (rendering page 14 of a PDF file[*]) from 20.9 seconds
    to 14.9 seconds, which is an improvement of 28.6%.
    
    [*] http://people.redhat.com/jakub/prelink.pdf
    
    This also benefits the recently added unaligned_clip perf case:
    
    image-rgb  unaligned_clip-100 0.11 -> 0.06: 1.65x speedup
    â–‹
    image-rgba unaligned_clip-100 0.11 -> 0.06: 1.64x speedup
    â–‹

diff --git a/pixman/src/fbmmx.c b/pixman/src/fbmmx.c
index a99168c..f6f512f 100644
--- a/pixman/src/fbmmx.c
+++ b/pixman/src/fbmmx.c
@@ -2135,6 +2135,232 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pi
 }
 
 void
+fbCompositeIn_nx8x8mmx (pixman_operator_t	op,
+			PicturePtr pSrc,
+			PicturePtr pMask,
+			PicturePtr pDst,
+			INT16      xSrc,
+			INT16      ySrc,
+			INT16      xMask,
+			INT16      yMask,
+			INT16      xDst,
+			INT16      yDst,
+			CARD16     width,
+			CARD16     height)
+{
+    CARD8	*dstLine, *dst;
+    CARD8	*maskLine, *mask;
+    FbStride	dstStride, maskStride;
+    CARD16	w;
+    CARD32	src;
+    CARD8	sa;
+    __m64	vsrc, vsrca;
+
+    fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
+    fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
+
+    fbComposeGetSolid(pSrc, pDst, src);
+
+    sa = src >> 24;
+    if (sa == 0)
+	return;
+
+    vsrc = load8888(src);
+    vsrca = expand_alpha(vsrc);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	mask = maskLine;
+	maskLine += maskStride;
+	w = width;
+
+	if ((((unsigned long)pDst & 3) == 0) &&
+	    (((unsigned long)pSrc & 3) == 0))
+	{
+	    while (w >= 4)
+	    {
+		CARD32 m;
+		__m64 vmask;
+		__m64 vdest;
+
+		m = 0;
+
+		vmask = load8888 (*(CARD32 *)mask);
+		vdest = load8888 (*(CARD32 *)dst);
+
+		*(CARD32 *)dst = store8888 (in (in (vsrca, vmask), vdest));
+
+		dst += 4;
+		mask += 4;
+		w -= 4;
+	    }
+	}
+
+	while (w--)
+	{
+	    CARD16	tmp;
+	    CARD8	a;
+	    CARD32	m, d;
+	    CARD32	r;
+
+	    a = *mask++;
+	    d = *dst;
+
+	    m = FbInU (sa, 0, a, tmp);
+	    r = FbInU (m, 0, d, tmp);
+
+	    *dst++ = r;
+	}
+    }
+
+    _mm_empty();
+}
+
+void
+fbCompositeIn_8x8mmx (pixman_operator_t	op,
+		      PicturePtr pSrc,
+		      PicturePtr pMask,
+		      PicturePtr pDst,
+		      INT16      xSrc,
+		      INT16      ySrc,
+		      INT16      xMask,
+		      INT16      yMask,
+		      INT16      xDst,
+		      INT16      yDst,
+		      CARD16     width,
+		      CARD16     height)
+{
+    CARD8	*dstLine, *dst;
+    CARD8	*srcLine, *src;
+    FbStride	srcStride, dstStride;
+    CARD16	w;
+
+    fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
+    fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	src = srcLine;
+	srcLine += srcStride;
+	w = width;
+
+	if ((((unsigned long)pDst & 3) == 0) &&
+	    (((unsigned long)pSrc & 3) == 0))
+	{
+	    while (w >= 4)
+	    {
+		CARD32 *s = (CARD32 *)src;
+		CARD32 *d = (CARD32 *)dst;
+
+		*d = store8888 (in (load8888 (*s), load8888 (*d)));
+
+		w -= 4;
+		dst += 4;
+		src += 4;
+	    }
+	}
+
+	while (w--)
+	{
+	    CARD8 s, d;
+	    CARD16 tmp;
+
+	    s = *src;
+	    d = *dst;
+
+	    *dst = FbInU (s, 0, d, tmp);
+
+	    src++;
+	    dst++;
+	}
+    }
+
+    _mm_empty ();
+}
+
+void
+fbCompositeSrcAdd_8888x8x8mmx (pixman_operator_t   op,
+			       PicturePtr pSrc,
+			       PicturePtr pMask,
+			       PicturePtr pDst,
+			       INT16      xSrc,
+			       INT16      ySrc,
+			       INT16      xMask,
+			       INT16      yMask,
+			       INT16      xDst,
+			       INT16      yDst,
+			       CARD16     width,
+			       CARD16     height)
+{
+    CARD8	*dstLine, *dst;
+    CARD8	*maskLine, *mask;
+    FbStride	dstStride, maskStride;
+    CARD16	w;
+    CARD32	src;
+    CARD8	sa;
+    __m64	vsrc, vsrca;
+
+    fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
+    fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
+
+    fbComposeGetSolid(pSrc, pDst, src);
+
+    sa = src >> 24;
+    if (sa == 0)
+	return;
+
+    vsrc = load8888(src);
+    vsrca = expand_alpha(vsrc);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	mask = maskLine;
+	maskLine += maskStride;
+	w = width;
+
+	if ((((unsigned long)pMask & 3) == 0) &&
+	    (((unsigned long)pDst  & 3) == 0))
+	{
+	    while (w >= 4)
+	    {
+		__m64 vmask = load8888 (*(CARD32 *)mask);
+		__m64 vdest = load8888 (*(CARD32 *)dst);
+
+		*(CARD32 *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+
+		w -= 4;
+		dst += 4;
+		mask += 4;
+	    }
+	}
+
+	while (w--)
+	{
+	    CARD16	tmp;
+	    CARD16	a;
+	    CARD32	m, d;
+	    CARD32	r;
+
+	    a = *mask++;
+	    d = *dst;
+
+	    m = FbInU (sa, 0, a, tmp);
+	    r = FbAdd (m, d, 0, tmp);
+
+	    *dst++ = r;
+	}
+    }
+
+    _mm_empty();
+}
+
+void
 fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t	op,
 				PicturePtr pSrc,
 				PicturePtr pMask,
diff --git a/pixman/src/fbmmx.h b/pixman/src/fbmmx.h
index 531bcba..5c08180 100644
--- a/pixman/src/fbmmx.h
+++ b/pixman/src/fbmmx.h
@@ -45,6 +45,20 @@ pixman_private
 void fbComposeSetupMMX(void);
 
 pixman_private
+void fbCompositeIn_nx8x8mmx (pixman_operator_t	op,
+			     PicturePtr pSrc,
+			     PicturePtr pMask,
+			     PicturePtr pDst,
+			     INT16      xSrc,
+			     INT16      ySrc,
+			     INT16      xMask,
+			     INT16      yMask,
+			     INT16      xDst,
+			     INT16      yDst,
+			     CARD16     width,
+			     CARD16     height);
+
+pixman_private
 void fbCompositeSolidMask_nx8888x0565Cmmx (pixman_operator_t      op,
 					   PicturePtr pSrc,
 					   PicturePtr pMask,
@@ -109,6 +123,35 @@ void fbCompositeSolidMaskSrc_nx8x8888mmx
 					  INT16      yDst,
 					  CARD16     width,
 					  CARD16     height);
+
+pixman_private
+void fbCompositeSrcAdd_8888x8x8mmx (pixman_operator_t   op,
+				    PicturePtr pSrc,
+				    PicturePtr pMask,
+				    PicturePtr pDst,
+				    INT16      xSrc,
+				    INT16      ySrc,
+				    INT16      xMask,
+				    INT16      yMask,
+				    INT16      xDst,
+				    INT16      yDst,
+				    CARD16     width,
+				    CARD16     height);
+
+pixman_private
+void fbCompositeIn_8x8mmx (pixman_operator_t	op,
+			   PicturePtr pSrc,
+			   PicturePtr pMask,
+			   PicturePtr pDst,
+			   INT16      xSrc,
+			   INT16      ySrc,
+			   INT16      xMask,
+			   INT16      yMask,
+			   INT16      xDst,
+			   INT16      yDst,
+			   CARD16     width,
+			   CARD16     height);
+
 pixman_private
 void fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t	op,
 				     PicturePtr pSrc,
diff --git a/pixman/src/fbpict.c b/pixman/src/fbpict.c
index 0cdec3f..2019cbc 100644
--- a/pixman/src/fbpict.c
+++ b/pixman/src/fbpict.c
@@ -844,6 +844,58 @@ fbCompositeSrcAdd_8888x8888 (pixman_oper
 }
 
 static void
+fbCompositeSrcAdd_8888x8x8 (pixman_operator_t   op,
+			    PicturePtr pSrc,
+			    PicturePtr pMask,
+			    PicturePtr pDst,
+			    INT16      xSrc,
+			    INT16      ySrc,
+			    INT16      xMask,
+			    INT16      yMask,
+			    INT16      xDst,
+			    INT16      yDst,
+			    CARD16     width,
+			    CARD16     height)
+{
+    CARD8	*dstLine, *dst;
+    CARD8	*maskLine, *mask;
+    FbStride	dstStride, maskStride;
+    CARD16	w;
+    CARD32	src;
+    CARD8	sa;
+
+    fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
+    fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
+    fbComposeGetSolid (pSrc, pDst, src);
+    sa = (src >> 24);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	mask = maskLine;
+	maskLine += maskStride;
+	w = width;
+
+	while (w--)
+	{
+	    CARD16	tmp;
+	    CARD16	a;
+	    CARD32	m, d;
+	    CARD32	r;
+
+	    a = *mask++;
+	    d = *dst;
+
+	    m = FbInU (sa, 0, a, tmp);
+	    r = FbAdd (m, d, 0, tmp);
+
+	    *dst++ = r;
+	}
+    }
+}
+
+static void
 fbCompositeSrcAdd_1000x1000 (pixman_operator_t   op,
 			     PicturePtr pSrc,
 			     PicturePtr pMask,
@@ -1759,6 +1811,26 @@ pixman_composite (pixman_operator_t	op,
 		break;
 	    }
 	}
+	else
+	{
+	    if ((pSrc->format_code == PICT_a8r8g8b8	||
+		 pSrc->format_code == PICT_a8b8g8r8) &&
+		srcRepeat			     &&
+		pMask->format_code == PICT_a8	     &&
+		pDst->format_code == PICT_a8)
+	    {
+#ifdef USE_MMX
+		if (fbHaveMMX())
+		{
+		    srcRepeat = FALSE;
+
+		    func = fbCompositeSrcAdd_8888x8x8mmx;
+		}
+		else
+#endif
+		    func = fbCompositeSrcAdd_8888x8x8;
+	    }
+	}
 	break;
     case PIXMAN_OPERATOR_SRC:
 	if (pMask)
@@ -1798,10 +1870,34 @@ pixman_composite (pixman_operator_t	op,
 	    }
 	}
 	break;
+    case PIXMAN_OPERATOR_IN:
+#ifdef USE_MMX
+	if (pSrc->format_code == PICT_a8 &&
+	    pDst->format_code == PICT_a8 &&
+	    !pMask)
+	{
+	    if (fbHaveMMX())
+		func = fbCompositeIn_8x8mmx;
+	}
+	else if (srcRepeat && pMask && !pMask->componentAlpha &&
+		 (pSrc->format_code == PICT_a8r8g8b8 ||
+		  pSrc->format_code == PICT_a8b8g8r8)   &&
+		 (pMask->format_code == PICT_a8)	&&
+		 pDst->format_code == PICT_a8)
+	{
+	    if (fbHaveMMX())
+	    {
+		srcRepeat = FALSE;
+		func = fbCompositeIn_nx8x8mmx;
+	    }
+	}
+#else
+	func = NULL;
+#endif
+	break;
     case PIXMAN_OPERATOR_CLEAR:
     case PIXMAN_OPERATOR_DST:
     case PIXMAN_OPERATOR_OVER_REVERSE:
-    case PIXMAN_OPERATOR_IN:
     case PIXMAN_OPERATOR_IN_REVERSE:
     case PIXMAN_OPERATOR_OUT:
     case PIXMAN_OPERATOR_OUT_REVERSE:
diff-tree d5531c4f506caa9ad66fbeef1822a7036d4dd528 (from 504cbdae37232d65f5c1f8913e55ac63397ad4f0)
Author: Carl Worth <cworth at cworth.org>
Date:   Tue Jan 9 14:27:32 2007 -0800

    Add unaligned_clip perf case courtesy of Jeff Muizelaar
    
    Conflicts:
    
    	perf/Makefile.am
    	perf/cairo-perf.c
    	perf/cairo-perf.h

diff --git a/perf/Makefile.am b/perf/Makefile.am
index 0d5f244..1ed4c81 100644
--- a/perf/Makefile.am
+++ b/perf/Makefile.am
@@ -22,18 +22,19 @@ cairo_perf_SOURCES =		\
 	box-outline.c		\
 	fill.c			\
 	long-lines.c		\
+	mosaic.c		\
+	mosaic.h		\
 	paint.c			\
+	pattern_create_radial.c \
 	stroke.c		\
 	subimage_copy.c		\
 	tessellate.c		\
-	pattern_create_radial.c \
+	unaligned-clip.c	\
 	text.c			\
 	world-map.c		\
 	world-map.h		\
 	zrusin.c		\
-	zrusin-another.h	\
-	mosaic.c		\
-	mosaic.h
+	zrusin-another.h
 
 if CAIRO_HAS_WIN32_SURFACE
 cairo_perf_SOURCES += cairo-perf-win32.c
diff --git a/perf/cairo-perf.c b/perf/cairo-perf.c
index 0708ed9..18cc652 100644
--- a/perf/cairo-perf.c
+++ b/perf/cairo-perf.c
@@ -328,5 +328,6 @@ cairo_perf_case_t perf_cases[] = {
     { box_outline, 100, 100},
     { mosaic, 800, 800 },
     { long_lines, 100, 100},
+    { unaligned_clip, 100, 100},
     { NULL }
 };
diff --git a/perf/cairo-perf.h b/perf/cairo-perf.h
index 2bfd665..1ef9600 100644
--- a/perf/cairo-perf.h
+++ b/perf/cairo-perf.h
@@ -104,5 +104,6 @@ CAIRO_PERF_DECL (world_map);
 CAIRO_PERF_DECL (box_outline);
 CAIRO_PERF_DECL (mosaic);
 CAIRO_PERF_DECL (long_lines);
+CAIRO_PERF_DECL (unaligned_clip);
 
 #endif
diff --git a/perf/unaligned-clip.c b/perf/unaligned-clip.c
new file mode 100644
index 0000000..c7b9d21
--- /dev/null
+++ b/perf/unaligned-clip.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2006 Jeff Muizelaar <jeff at infidigm.net>
+ * Copyright © 2006 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Jeff Muizelaar <jeff at infidigm.net>
+ *          Carl Worth <cworth at cworth.org>
+ */
+
+#include "cairo-perf.h"
+
+static cairo_perf_ticks_t
+do_unaligned_clip (cairo_t *cr, int width, int height)
+{
+    cairo_save (cr);
+
+    cairo_perf_timer_start ();
+    /* First a triangular clip that obviously isn't along device-pixel
+     * boundaries. */
+    cairo_move_to (cr, 50, 50);
+    cairo_line_to (cr, 50, 90);
+    cairo_line_to (cr, 90, 90);
+    cairo_close_path (cr);
+    cairo_clip (cr);
+
+    /* Then a rectangular clip that would be but for the non-integer
+     * scaling. */
+    cairo_scale (cr, 1.1, 1.1);
+    cairo_move_to (cr, 55, 55);
+    cairo_line_to (cr, 90, 55);
+    cairo_line_to (cr, 90, 90);
+    cairo_line_to (cr, 90, 55);
+    cairo_close_path (cr);
+
+    cairo_clip (cr);
+    cairo_perf_timer_stop ();
+
+    cairo_restore (cr);
+
+    return cairo_perf_timer_elapsed ();
+}
+
+void
+unaligned_clip (cairo_perf_t *perf, cairo_t *cr, int width, int height)
+{
+    cairo_perf_run (perf, "unaligned_clip", do_unaligned_clip);
+}


More information about the cairo-commit mailing list