[cairo] MMX Makefiles forPIXMAN

Frédéric Plourde frederic.plourde at polymtl.ca
Tue Feb 26 12:49:20 PST 2008


Hi !

Here are 2 patches that enable MMX compilation under win32 platforms for 
PIXMAN.
Notice the addition of the MMX option at command line for the 
Makefile.win32 files.

Care though is to be taken : There are still many issues to be resolved 
inside pixman-mmx.c fast paths as I noticed significant perf drops using 
MMX (with alpha_ perf cases) !!

** also, many similar changes have been made in pixman-mmx.c  to allow 
Visual Studio to compile correctly. These changes concern all the "cast 
to union type" made from subtypes to __m64 union type, which is a valid 
casting operation under GCC, but NOT using cl with Visual Studio 
(arrghh!) ;-)   I had to cast the pointers to those variables instead of 
using the objet refs. **  I'd like the input of the reviewers on that 
point... Is that in line with the Cairo/Pixman philosophy ? **

finally, this patch REQUIRES applying along the 
cairo_MMX_win32_makefile.patch  for correct cairo linking, posted next.

thanx.
_fred_


diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
index e0a1828..dc5f03e 100644
--- a/pixman/Makefile.win32
+++ b/pixman/Makefile.win32
@@ -1,31 +1,102 @@
-LIBRARY = pixman-1
+LIBRARY     = pixman-1
+MMX_LIBRARY = pixman-mmx
 
-CC = cl
+CC   = cl
 LINK = link
 
-CFLAGS = -MD -Zi -nologo -O2 -D_CRT_SECURE_NO_DEPRECATE 
-D_CRT_NONSTDC_NO_DEPRECATE -I../pixman/src -I. -DPACKAGE=$(LIBRARY) 
-DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT=""
+ifeq ($(CFG),)
+CFG=release
+endif
+
+ifeq ($(MMX),)
+MMX=0
+endif
+
+CFLAGS     = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE 
-D_CRT_NONSTDC_NO_DEPRECATE -I../pixman/src -I. -DPACKAGE=$(LIBRARY) 
-DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT=""
+MMX_CFLAGS = -DUSE_MMX -w14710 -w14714
+
+# optimization flags
+ifeq ($(CFG),debug)
+CFLAGS += -Od -Zi
+else
+CFLAGS += -O2
+endif
+
+# MMX compilation flags
+ifeq ($(MMX),1)
+CFLAGS += $(MMX_CFLAGS)
+endif
 
 SOURCES = \
-    pixman-region.c        \
-    pixman-image.c        \
-    pixman-compose.c    \
+    pixman-region.c                \
+    pixman-image.c                    \
+    pixman-compose.c                \
     pixman-compose-accessors.c    \
-    pixman-pict.c        \
-    pixman-utils.c        \
-    pixman-edge.c        \
+    pixman-pict.c                    \
+    pixman-utils.c                    \
+    pixman-edge.c                    \
     pixman-edge-accessors.c        \
-    pixman-trap.c        \
-    pixman-compute-region.c \
-    pixman-timer.c        \
+    pixman-trap.c                    \
+    pixman-compute-region.c        \
+    pixman-timer.c                    \
     $(NULL)
 
-OBJECTS = $(subst .c,.obj,$(SOURCES))
+# mmx code
+MMX_SOURCES =                \
+        pixman-mmx.c        \
+        $(NULL)
+
+OBJECTS     = $(patsubst %.c, $(CFG)/%.obj, $(SOURCES))
+MMX_OBJECTS = $(patsubst %.c, $(CFG)/%.obj, $(MMX_SOURCES))
+
+# targets
+all: inform informMMX $(CFG)/$(LIBRARY).lib $(CFG)/$(MMX_LIBRARY).lib   
+    @exit 0
+clean: inform clean_r
+    @exit 0
+pixman: inform informMMX $(CFG)/$(LIBRARY).lib
+    @exit 0
+pixman-mmx: inform informMMX $(CFG)/$(MMX_LIBRARY).lib   
+    @exit 0
+
+inform:
+ifneq ($(CFG),release)
+ifneq ($(CFG),debug)
+    @echo "Invalid specified configuration option : "$(CFG)"."
+    @echo
+    @echo -n "Possible choices for configuration are "
+    @echo "'release' and 'debug'"
+    @echo ""
+    @exit 1
+endif
+endif
+
+informMMX:
+ifneq ($(MMX),0)
+ifneq ($(MMX),1)
+    @echo "Invalid specified MMX option : "$(MMX)"."
+    @echo
+    @echo -n "Possible choices for MMX are 0 or 1"
+    @echo ""
+    @exit 1
+endif
+endif
+
+# pixman compilation and linking
+$(CFG)/%.obj: %.c
+    @mkdir -p $(CFG)
+    @$(CC) -c $(CFLAGS) -Fo"$@" $<
+
+$(CFG)/$(LIBRARY).lib: $(OBJECTS)
+    lib -NOLOGO -OUT:$@ $(OBJECTS) || exit 0
 
-%.obj: %.c
+# pixman-mmx compilation and linking
+$(CFG)/pixman-mmx.obj: pixman-mmx.c
+    @mkdir -p $(CFG)
     @$(CC) -c $(CFLAGS) -Fo"$@" $<
 
-$(LIBRARY).lib: $(OBJECTS)
-    lib -NOLOGO -OUT:$@ $(OBJECTS)
+$(CFG)/$(MMX_LIBRARY).lib: $(MMX_OBJECTS)
+    lib -NOLOGO -OUT:$@ $(MMX_OBJECTS) || exit 0
 
-clean:
-    @rm -f *.obj *.lib *.pdb *.ilk || exit 0
+clean_r:
+    @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk || exit 0
\ No newline at end of file
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 23835e4..e3ca5f5 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -1040,9 +1040,9 @@ fbCompositeSolid_nx0565mmx (pixman_op_t op,
     while (w && (unsigned long)dst & 7)
     {
         ullong d = *dst;
-        __m64 vdest = expand565 ((__m64)d, 0);
-        vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
-        *dst = (ullong)vdest;
+        __m64 vdest = expand565 (*(__m64 *)&d, 0);
+        vdest = pack565 (over(vsrc, vsrca, vdest), vdest, 0);
+        *dst = *(ullong*)&vdest;
 
         w--;
         dst++;
@@ -1070,9 +1070,9 @@ fbCompositeSolid_nx0565mmx (pixman_op_t op,
     while (w)
     {
         ullong d = *dst;
-        __m64 vdest = expand565 ((__m64)d, 0);
+        __m64 vdest = expand565 (*(__m64*)&d, 0);
         vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
-        *dst = (ullong)vdest;
+        *dst = *(ullong*)&vdest;
 
         w--;
         dst++;
@@ -1498,11 +1498,11 @@ fbCompositeSrc_8888x0565mmx (pixman_op_t op,
     {
         __m64 vsrc = load8888 (*src);
         ullong d = *dst;
-        __m64 vdest = expand565 ((__m64)d, 0);
+        __m64 vdest = expand565 (*(__m64*)&d, 0);
 
         vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
 
-        *dst = (ullong)vdest;
+        *dst = *(ullong*)&vdest;
 
         w--;
         dst++;
@@ -1541,11 +1541,11 @@ fbCompositeSrc_8888x0565mmx (pixman_op_t op,
     {
         __m64 vsrc = load8888 (*src);
         ullong d = *dst;
-        __m64 vdest = expand565 ((__m64)d, 0);
+        __m64 vdest = expand565 (*(__m64*)&d, 0);
 
         vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
 
-        *dst = (ullong)vdest;
+        *dst = *(ullong*)&vdest;
 
         w--;
         dst++;
@@ -1610,7 +1610,7 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
 
         if (m)
         {
-        __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), 
load8888(*dst));
+        __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev 
(*(__m64*)&m), load8888(*dst));
         *dst = store8888(vdest);
         }
 
@@ -1638,8 +1638,8 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
 
         vdest = *(__m64 *)dst;
 
-        dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m0), 
expand8888(vdest, 0));
-        dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m1), 
expand8888(vdest, 1));
+        dest0 = in_over(vsrc, vsrca, expand_alpha_rev (*(__m64*)&m0), 
expand8888(vdest, 0));
+        dest1 = in_over(vsrc, vsrca, expand_alpha_rev (*(__m64*)&m1), 
expand8888(vdest, 1));
 
         *(__m64 *)dst = pack8888(dest0, dest1);
         }
@@ -1658,7 +1658,7 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
         if (m)
         {
         __m64 vdest = load8888(*dst);
-        vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), vdest);
+        vdest = in_over(vsrc, vsrca, expand_alpha_rev (*(__m64*)&m), 
vdest);
         *dst = store8888(vdest);
         }
 
@@ -1711,7 +1711,7 @@ pixman_fill_mmx (uint32_t *bits,
     }
 
     fill = ((ullong)xor << 32) | xor;
-    vfill = (__m64)fill;
+    vfill = *(__m64*)&fill;
 
 #ifdef __GNUC__
     __asm__ (
@@ -1857,7 +1857,7 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_op_t op,
 
         if (m)
         {
-        __m64 vdest = in(vsrc, expand_alpha_rev ((__m64)m));
+        __m64 vdest = in(vsrc, expand_alpha_rev (*(__m64*)&m));
         *dst = store8888(vdest);
         }
         else
@@ -1889,8 +1889,8 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_op_t op,
 
         vdest = *(__m64 *)dst;
 
-        dest0 = in(vsrc, expand_alpha_rev ((__m64)m0));
-        dest1 = in(vsrc, expand_alpha_rev ((__m64)m1));
+        dest0 = in(vsrc, expand_alpha_rev (*(__m64*)&m0));
+        dest1 = in(vsrc, expand_alpha_rev (*(__m64*)&m1));
 
         *(__m64 *)dst = pack8888(dest0, dest1);
         }
@@ -1913,7 +1913,7 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_op_t op,
         if (m)
         {
         __m64 vdest = load8888(*dst);
-        vdest = in(vsrc, expand_alpha_rev ((__m64)m));
+        vdest = in(vsrc, expand_alpha_rev (*(__m64*)&m));
         *dst = store8888(vdest);
         }
         else
@@ -1950,6 +1950,7 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
     int    dstStride, maskStride;
     uint16_t    w;
     __m64    vsrc, vsrca;
+    __m64 src16m64;
     unsigned long long srcsrcsrcsrc, src16;
 
     CHECKPOINT();
@@ -1966,7 +1967,8 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
     vsrc = load8888 (src);
     vsrca = expand_alpha (vsrc);
 
-    src16 = (ullong)pack565(vsrc, _mm_setzero_si64(), 0);
+    src16m64 = (pack565(vsrc, _mm_setzero_si64(), 0));
+    src16    = *(ullong*) &src16m64;
 
     srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 |
     (ullong)src16 << 16 | (ullong)src16;
@@ -1988,9 +1990,11 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
         if (m)
         {
         ullong d = *dst;
-        __m64 vd = (__m64)d;
-        __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), 
expand565(vd, 0));
-        *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
+        __m64 vd = *(__m64*)&d;
+        __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev 
(*(__m64*)&m), expand565(vd, 0));
+      __m64 dstm64;
+      dstm64 = pack565(vdest, _mm_setzero_si64(), 0);
+        *dst   = *(ullong*)&dstm64;
         }
 
         w--;
@@ -2019,13 +2023,13 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
 
         vdest = *(__m64 *)dst;
 
-        vm0 = (__m64)m0;
+        vm0 = *(__m64*)&m0;
         vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), 
expand565(vdest, 0)), vdest, 0);
-        vm1 = (__m64)m1;
+        vm1 = *(__m64*)&m1;
         vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), 
expand565(vdest, 1)), vdest, 1);
-        vm2 = (__m64)m2;
+        vm2 = *(__m64*)&m2;
         vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), 
expand565(vdest, 2)), vdest, 2);
-        vm3 = (__m64)m3;
+        vm3 = *(__m64*)&m3;
         vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), 
expand565(vdest, 3)), vdest, 3);
 
         *(__m64 *)dst = vdest;
@@ -2045,9 +2049,11 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
         if (m)
         {
         ullong d = *dst;
-        __m64 vd = (__m64)d;
-        __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), 
expand565(vd, 0));
-        *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
+        __m64 vd = *(__m64*)&d;
+        __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev 
(*(__m64*)&m), expand565(vd, 0));
+      __m64 dstm64;
+      dstm64 = pack565(vdest, _mm_setzero_si64(), 0);
+        *dst = *(ullong*)&dstm64;
         }
 
         w--;
@@ -2102,11 +2108,13 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_op_t op,
     {
         __m64 vsrc = load8888 (*src);
         ullong d = *dst;
-        __m64 vdest = expand565 ((__m64)d, 0);
+        __m64 vdest = expand565 (*(__m64*)&d, 0);
+       __m64 dstm64;
 
         vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
-
-        *dst = (ullong)vdest;
+    
+       dstm64 = vdest;
+        *dst = *(ullong*)&dstm64;
 
         w--;
         dst++;
@@ -2163,11 +2171,13 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_op_t op,
     {
         __m64 vsrc = load8888 (*src);
         ullong d = *dst;
-        __m64 vdest = expand565 ((__m64)d, 0);
+        __m64 vdest = expand565 (*(__m64*)&d, 0);
+       __m64 dstm64;
 
         vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
 
-        *dst = (ullong)vdest;
+       dstm64 = vdest;
+        *dst = *(ullong*)&dstm64;
 
         w--;
         dst++;
@@ -2326,9 +2336,10 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_op_t op,
         if (m)
         {
         ullong d = *q;
-        __m64 vdest = expand565 ((__m64)d, 0);
+        __m64 vdest = expand565 (*(__m64*)&d, 0);
+      __m64 qm64;
         vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest), 
vdest, 0);
-        *q = (ullong)vdest;
+        *q = *(ullong*)&vdest;
         }
 
         twidth--;
@@ -2369,9 +2380,9 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_op_t op,
         if (m)
         {
         ullong d = *q;
-        __m64 vdest = expand565((__m64)d, 0);
+        __m64 vdest = expand565(*(__m64*)&d, 0);
         vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest), 
vdest, 0);
-        *q = (ullong)vdest;
+        *q = *(ullong*)&vdest;
         }
 
         twidth--;
@@ -2727,7 +2738,9 @@ fbCompositeSrcAdd_8888x8888mmx (pixman_op_t     op,
 
     while (w >= 2)
     {
-        *(ullong*)dst = (ullong) _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
+      __m64 dstm64;
+      dstm64 = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
+        *(ullong*)dst = *(ullong*)&dstm64;
         dst += 2;
         src += 2;
         w -= 2;
@@ -2953,7 +2966,7 @@ fbCompositeOver_x888x8x8888mmx (pixman_op_t      op,
         else
         {
             __m64 sa = expand_alpha (s);
-            __m64 vm = expand_alpha_rev ((__m64)m);
+            __m64 vm = expand_alpha_rev (*(__m64*)&m);
             __m64 vdest = in_over(s, sa, vm, load8888 (*dst));
 
             *dst = store8888 (vdest);




-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: pixman_MMX_win32_makefile.patch
Url: http://lists.cairographics.org/archives/cairo/attachments/20080226/72b6884d/attachment-0001.txt 


More information about the cairo mailing list