[cairo] MMX Makefile for PIXMAN
Frédéric Plourde
frederic.plourde at polymtl.ca
Tue Feb 26 14:37:27 PST 2008
Hi !
Here are 2 patches that enable MMX compilation under win32 platforms for
PIXMAN.
Notice the addition of the MMX option at command line for the
Makefile.win32 files.
Care though is to be taken : There are still many issues to be resolved
inside pixman-mmx.c fast paths as I noticed significant perf drops using
MMX (with alpha_ perf cases) !!
** also, many similar changes have been made in pixman-mmx.c to allow
Visual Studio to compile correctly. These changes concern all the "cast
to union type" made from subtypes to __m64 union type, which is a valid
casting operation under GCC, but NOT using cl with Visual Studio
(arrghh!) ;-) I had to cast the pointers to those variables instead
of using the objet refs. ** I'd like the input of the reviewers on that
point... Is that in line with the Cairo/Pixman philosophy ? **
finally, this patch REQUIRES applying along the
cairo_MMX_win32_makefile.patch for correct cairo linking, posted next.
thanx.
_fred_
diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
index e0a1828..dc5f03e 100644
--- a/pixman/Makefile.win32
+++ b/pixman/Makefile.win32
@@ -1,31 +1,102 @@
-LIBRARY = pixman-1
+LIBRARY = pixman-1
+MMX_LIBRARY = pixman-mmx
-CC = cl
+CC = cl
LINK = link
-CFLAGS = -MD -Zi -nologo -O2 -D_CRT_SECURE_NO_DEPRECATE
-D_CRT_NONSTDC_NO_DEPRECATE -I../pixman/src -I. -DPACKAGE=$(LIBRARY)
-DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT=""
+ifeq ($(CFG),)
+CFG=release
+endif
+
+ifeq ($(MMX),)
+MMX=0
+endif
+
+CFLAGS = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE
-D_CRT_NONSTDC_NO_DEPRECATE -I../pixman/src -I. -DPACKAGE=$(LIBRARY)
-DPACKAGE_VERSION="" -DPACKAGE_BUGREPORT=""
+MMX_CFLAGS = -DUSE_MMX -w14710 -w14714
+
+# optimization flags
+ifeq ($(CFG),debug)
+CFLAGS += -Od -Zi
+else
+CFLAGS += -O2
+endif
+
+# MMX compilation flags
+ifeq ($(MMX),1)
+CFLAGS += $(MMX_CFLAGS)
+endif
SOURCES = \
- pixman-region.c \
- pixman-image.c \
- pixman-compose.c \
+ pixman-region.c \
+ pixman-image.c \
+ pixman-compose.c \
pixman-compose-accessors.c \
- pixman-pict.c \
- pixman-utils.c \
- pixman-edge.c \
+ pixman-pict.c \
+ pixman-utils.c \
+ pixman-edge.c \
pixman-edge-accessors.c \
- pixman-trap.c \
- pixman-compute-region.c \
- pixman-timer.c \
+ pixman-trap.c \
+ pixman-compute-region.c \
+ pixman-timer.c \
$(NULL)
-OBJECTS = $(subst .c,.obj,$(SOURCES))
+# mmx code
+MMX_SOURCES = \
+ pixman-mmx.c \
+ $(NULL)
+
+OBJECTS = $(patsubst %.c, $(CFG)/%.obj, $(SOURCES))
+MMX_OBJECTS = $(patsubst %.c, $(CFG)/%.obj, $(MMX_SOURCES))
+
+# targets
+all: inform informMMX $(CFG)/$(LIBRARY).lib $(CFG)/$(MMX_LIBRARY).lib
+ @exit 0
+clean: inform clean_r
+ @exit 0
+pixman: inform informMMX $(CFG)/$(LIBRARY).lib
+ @exit 0
+pixman-mmx: inform informMMX $(CFG)/$(MMX_LIBRARY).lib + @exit 0
+
+inform:
+ifneq ($(CFG),release)
+ifneq ($(CFG),debug)
+ @echo "Invalid specified configuration option : "$(CFG)"."
+ @echo
+ @echo -n "Possible choices for configuration are "
+ @echo "'release' and 'debug'"
+ @echo ""
+ @exit 1
+endif
+endif
+
+informMMX:
+ifneq ($(MMX),0)
+ifneq ($(MMX),1)
+ @echo "Invalid specified MMX option : "$(MMX)"."
+ @echo
+ @echo -n "Possible choices for MMX are 0 or 1"
+ @echo ""
+ @exit 1
+endif
+endif
+
+# pixman compilation and linking
+$(CFG)/%.obj: %.c
+ @mkdir -p $(CFG)
+ @$(CC) -c $(CFLAGS) -Fo"$@" $<
+
+$(CFG)/$(LIBRARY).lib: $(OBJECTS)
+ lib -NOLOGO -OUT:$@ $(OBJECTS) || exit 0
-%.obj: %.c
+# pixman-mmx compilation and linking
+$(CFG)/pixman-mmx.obj: pixman-mmx.c
+ @mkdir -p $(CFG)
@$(CC) -c $(CFLAGS) -Fo"$@" $<
-$(LIBRARY).lib: $(OBJECTS)
- lib -NOLOGO -OUT:$@ $(OBJECTS)
+$(CFG)/$(MMX_LIBRARY).lib: $(MMX_OBJECTS)
+ lib -NOLOGO -OUT:$@ $(MMX_OBJECTS) || exit 0
-clean:
- @rm -f *.obj *.lib *.pdb *.ilk || exit 0
+clean_r:
+ @rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk || exit 0
\ No newline at end of file
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 23835e4..e3ca5f5 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -1040,9 +1040,9 @@ fbCompositeSolid_nx0565mmx (pixman_op_t op,
while (w && (unsigned long)dst & 7)
{
ullong d = *dst;
- __m64 vdest = expand565 ((__m64)d, 0);
- vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
- *dst = (ullong)vdest;
+ __m64 vdest = expand565 (*(__m64 *)&d, 0);
+ vdest = pack565 (over(vsrc, vsrca, vdest), vdest, 0);
+ *dst = *(ullong*)&vdest;
w--;
dst++;
@@ -1070,9 +1070,9 @@ fbCompositeSolid_nx0565mmx (pixman_op_t op,
while (w)
{
ullong d = *dst;
- __m64 vdest = expand565 ((__m64)d, 0);
+ __m64 vdest = expand565 (*(__m64*)&d, 0);
vdest = pack565(over(vsrc, vsrca, vdest), vdest, 0);
- *dst = (ullong)vdest;
+ *dst = *(ullong*)&vdest;
w--;
dst++;
@@ -1498,11 +1498,11 @@ fbCompositeSrc_8888x0565mmx (pixman_op_t op,
{
__m64 vsrc = load8888 (*src);
ullong d = *dst;
- __m64 vdest = expand565 ((__m64)d, 0);
+ __m64 vdest = expand565 (*(__m64*)&d, 0);
vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
- *dst = (ullong)vdest;
+ *dst = *(ullong*)&vdest;
w--;
dst++;
@@ -1541,11 +1541,11 @@ fbCompositeSrc_8888x0565mmx (pixman_op_t op,
{
__m64 vsrc = load8888 (*src);
ullong d = *dst;
- __m64 vdest = expand565 ((__m64)d, 0);
+ __m64 vdest = expand565 (*(__m64*)&d, 0);
vdest = pack565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
- *dst = (ullong)vdest;
+ *dst = *(ullong*)&vdest;
w--;
dst++;
@@ -1610,7 +1610,7 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
if (m)
{
- __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m),
load8888(*dst));
+ __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev
(*(__m64*)&m), load8888(*dst));
*dst = store8888(vdest);
}
@@ -1638,8 +1638,8 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
vdest = *(__m64 *)dst;
- dest0 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m0),
expand8888(vdest, 0));
- dest1 = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m1),
expand8888(vdest, 1));
+ dest0 = in_over(vsrc, vsrca, expand_alpha_rev (*(__m64*)&m0),
expand8888(vdest, 0));
+ dest1 = in_over(vsrc, vsrca, expand_alpha_rev (*(__m64*)&m1),
expand8888(vdest, 1));
*(__m64 *)dst = pack8888(dest0, dest1);
}
@@ -1658,7 +1658,7 @@ fbCompositeSolidMask_nx8x8888mmx (pixman_op_t op,
if (m)
{
__m64 vdest = load8888(*dst);
- vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m), vdest);
+ vdest = in_over(vsrc, vsrca, expand_alpha_rev (*(__m64*)&m),
vdest);
*dst = store8888(vdest);
}
@@ -1711,7 +1711,7 @@ pixman_fill_mmx (uint32_t *bits,
}
fill = ((ullong)xor << 32) | xor;
- vfill = (__m64)fill;
+ vfill = *(__m64*)&fill;
#ifdef __GNUC__
__asm__ (
@@ -1857,7 +1857,7 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_op_t op,
if (m)
{
- __m64 vdest = in(vsrc, expand_alpha_rev ((__m64)m));
+ __m64 vdest = in(vsrc, expand_alpha_rev (*(__m64*)&m));
*dst = store8888(vdest);
}
else
@@ -1889,8 +1889,8 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_op_t op,
vdest = *(__m64 *)dst;
- dest0 = in(vsrc, expand_alpha_rev ((__m64)m0));
- dest1 = in(vsrc, expand_alpha_rev ((__m64)m1));
+ dest0 = in(vsrc, expand_alpha_rev (*(__m64*)&m0));
+ dest1 = in(vsrc, expand_alpha_rev (*(__m64*)&m1));
*(__m64 *)dst = pack8888(dest0, dest1);
}
@@ -1913,7 +1913,7 @@ fbCompositeSolidMaskSrc_nx8x8888mmx (pixman_op_t op,
if (m)
{
__m64 vdest = load8888(*dst);
- vdest = in(vsrc, expand_alpha_rev ((__m64)m));
+ vdest = in(vsrc, expand_alpha_rev (*(__m64*)&m));
*dst = store8888(vdest);
}
else
@@ -1950,6 +1950,7 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
int dstStride, maskStride;
uint16_t w;
__m64 vsrc, vsrca;
+ __m64 src16m64;
unsigned long long srcsrcsrcsrc, src16;
CHECKPOINT();
@@ -1966,7 +1967,8 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
vsrc = load8888 (src);
vsrca = expand_alpha (vsrc);
- src16 = (ullong)pack565(vsrc, _mm_setzero_si64(), 0);
+ src16m64 = (pack565(vsrc, _mm_setzero_si64(), 0));
+ src16 = *(ullong*) &src16m64;
srcsrcsrcsrc = (ullong)src16 << 48 | (ullong)src16 << 32 |
(ullong)src16 << 16 | (ullong)src16;
@@ -1988,9 +1990,11 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
if (m)
{
ullong d = *dst;
- __m64 vd = (__m64)d;
- __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m),
expand565(vd, 0));
- *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
+ __m64 vd = *(__m64*)&d;
+ __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev
(*(__m64*)&m), expand565(vd, 0));
+ __m64 dstm64;
+ dstm64 = pack565(vdest, _mm_setzero_si64(), 0);
+ *dst = *(ullong*)&dstm64;
}
w--;
@@ -2019,13 +2023,13 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
vdest = *(__m64 *)dst;
- vm0 = (__m64)m0;
+ vm0 = *(__m64*)&m0;
vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm0),
expand565(vdest, 0)), vdest, 0);
- vm1 = (__m64)m1;
+ vm1 = *(__m64*)&m1;
vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm1),
expand565(vdest, 1)), vdest, 1);
- vm2 = (__m64)m2;
+ vm2 = *(__m64*)&m2;
vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm2),
expand565(vdest, 2)), vdest, 2);
- vm3 = (__m64)m3;
+ vm3 = *(__m64*)&m3;
vdest = pack565(in_over(vsrc, vsrca, expand_alpha_rev(vm3),
expand565(vdest, 3)), vdest, 3);
*(__m64 *)dst = vdest;
@@ -2045,9 +2049,11 @@ fbCompositeSolidMask_nx8x0565mmx (pixman_op_t op,
if (m)
{
ullong d = *dst;
- __m64 vd = (__m64)d;
- __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev ((__m64)m),
expand565(vd, 0));
- *dst = (ullong)pack565(vdest, _mm_setzero_si64(), 0);
+ __m64 vd = *(__m64*)&d;
+ __m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev
(*(__m64*)&m), expand565(vd, 0));
+ __m64 dstm64;
+ dstm64 = pack565(vdest, _mm_setzero_si64(), 0);
+ *dst = *(ullong*)&dstm64;
}
w--;
@@ -2102,11 +2108,13 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_op_t op,
{
__m64 vsrc = load8888 (*src);
ullong d = *dst;
- __m64 vdest = expand565 ((__m64)d, 0);
+ __m64 vdest = expand565 (*(__m64*)&d, 0);
+ __m64 dstm64;
vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
-
- *dst = (ullong)vdest;
+ + dstm64 = vdest;
+ *dst = *(ullong*)&dstm64;
w--;
dst++;
@@ -2163,11 +2171,13 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_op_t op,
{
__m64 vsrc = load8888 (*src);
ullong d = *dst;
- __m64 vdest = expand565 ((__m64)d, 0);
+ __m64 vdest = expand565 (*(__m64*)&d, 0);
+ __m64 dstm64;
vdest = pack565(over_rev_non_pre(vsrc, vdest), vdest, 0);
- *dst = (ullong)vdest;
+ dstm64 = vdest;
+ *dst = *(ullong*)&dstm64;
w--;
dst++;
@@ -2326,9 +2336,10 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_op_t
op,
if (m)
{
ullong d = *q;
- __m64 vdest = expand565 ((__m64)d, 0);
+ __m64 vdest = expand565 (*(__m64*)&d, 0);
+ __m64 qm64;
vdest = pack565 (in_over (vsrc, vsrca, load8888 (m), vdest),
vdest, 0);
- *q = (ullong)vdest;
+ *q = *(ullong*)&vdest;
}
twidth--;
@@ -2369,9 +2380,9 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pixman_op_t op,
if (m)
{
ullong d = *q;
- __m64 vdest = expand565((__m64)d, 0);
+ __m64 vdest = expand565(*(__m64*)&d, 0);
vdest = pack565 (in_over(vsrc, vsrca, load8888(m), vdest),
vdest, 0);
- *q = (ullong)vdest;
+ *q = *(ullong*)&vdest;
}
twidth--;
@@ -2727,7 +2738,9 @@ fbCompositeSrcAdd_8888x8888mmx (pixman_op_t op,
while (w >= 2)
{
- *(ullong*)dst = (ullong) _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
+ __m64 dstm64;
+ dstm64 = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
+ *(ullong*)dst = *(ullong*)&dstm64;
dst += 2;
src += 2;
w -= 2;
@@ -2953,7 +2966,7 @@ fbCompositeOver_x888x8x8888mmx (pixman_op_t op,
else
{
__m64 sa = expand_alpha (s);
- __m64 vm = expand_alpha_rev ((__m64)m);
+ __m64 vm = expand_alpha_rev (*(__m64*)&m);
__m64 vdest = in_over(s, sa, vm, load8888 (*dst));
*dst = store8888 (vdest);
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: pixman_MMX_win32_makefile.patch
Url: http://lists.cairographics.org/archives/cairo/attachments/20080226/1ad0d725/attachment.ksh
More information about the cairo
mailing list