[cairo] [PATCH] Preliminary altivec support
Luca Barbato
lu_zero at gentoo.org
Fri Feb 2 14:28:23 PST 2007
This is an updated patch, there are some problems yet to be solved.
please check the regression test and point me where is the rounding
mistake or which are the corner cases I'm not handling correctly.
lu
--
Luca Barbato
Gentoo/linux Gentoo/PPC
http://dev.gentoo.org/~lu_zero
-------------- next part --------------
diff --git a/configure.in b/configure.in
index ecb124b..99be080 100644
--- a/configure.in
+++ b/configure.in
@@ -705,6 +705,41 @@ AC_SUBST(MMX_CFLAGS)
AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
dnl ===========================================================================
+dnl Check for VMX/Altivec
+if test -n "`$cc -v 2>&1 | grep version | grep Apple`"; then
+ VMX_CFLAGS="-faltivec"
+else
+ VMX_CFLAGS="-maltivec -mabi=altivec"
+fi
+
+have_vmx_intrinsics=no
+AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS $VMX_CFLAGS"
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
+#error "Need GCC >= 3.4 for sane altivec support"
+#endif
+#include <altivec.h>
+int main () {
+ vector unsigned int v = vec_splat_u32 (1);
+ v = vec_sub (v, v);
+ return 0;
+}], have_vmx_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+AC_MSG_RESULT($have_vmx_intrinsics)
+
+if test $have_vmx_intrinsics = yes ; then
+ AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
+else
+ VMX_CFLAGS=
+fi
+AC_SUBST(VMX_CFLAGS)
+
+AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
+
+dnl ===========================================================================
+
AC_ARG_ENABLE(gcov,
AS_HELP_STRING([--enable-gcov],
diff --git a/pixman/configure.in b/pixman/configure.in
index c9cdae8..9a91697 100644
--- a/pixman/configure.in
+++ b/pixman/configure.in
@@ -83,6 +83,40 @@ AC_SUBST(MMX_CFLAGS)
AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
dnl ===========================================================================
+dnl Check for VMX/Altivec
+if test -n "`$cc -v 2>&1 | grep version | grep Apple`"; then
+ VMX_CFLAGS="-faltivec"
+else
+ VMX_CFLAGS="-maltivec -mabi=altivec"
+fi
+
+have_vmx_intrinsics=no
+AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS $VMX_CFLAGS"
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
+#error "Need GCC >= 3.4 for sane altivec support"
+#endif
+#include <altivec.h>
+int main () {
+ vector unsigned int v = vec_splat_u32 (1);
+ v = vec_sub (v, v);
+ return 0;
+}], have_vmx_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+AC_MSG_RESULT($have_vmx_intrinsics)
+
+if test $have_vmx_intrinsics = yes ; then
+ AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
+else
+ VMX_CFLAGS=
+fi
+AC_SUBST(VMX_CFLAGS)
+
+AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
+
+dnl ===========================================================================
AC_OUTPUT([
libpixman.pc
diff --git a/pixman/src/Makefile.am b/pixman/src/Makefile.am
index aa04c31..0987fb5 100644
--- a/pixman/src/Makefile.am
+++ b/pixman/src/Makefile.am
@@ -39,4 +39,13 @@ libpixman_mmx_la_CFLAGS = @MMX_CFLAGS@ $
libpixman_la_LIBADD = libpixman-mmx.la
endif
+if USE_VMX
+noinst_LTLIBRARIES += libpixman-vmx.la
+libpixman_vmx_la_SOURCES = \
+ fbvmx.c \
+ fbvmx.h
+libpixman_vmx_la_CFLAGS = @VMX_CFLAGS@ $(WARN_CFLAGS)
+libpixman_la_LIBADD = libpixman-vmx.la
+endif
+
INCLUDES = -I$(top_srcdir) -I$(srcdir) @WARN_CFLAGS@
diff --git a/pixman/src/fbpict.c b/pixman/src/fbpict.c
index 63b1cbc..0e30fc8 100644
--- a/pixman/src/fbpict.c
+++ b/pixman/src/fbpict.c
@@ -30,6 +30,7 @@
#include "fbpict.h"
#include "fbmmx.h"
+#include "fbvmx.h"
static CARD32
fbOver (CARD32 x, CARD32 y)
@@ -1411,6 +1412,14 @@ pixman_composite (pixman_operator_t op,
}
#endif
+#ifdef USE_VMX
+ static Bool vmx_setup = FALSE;
+ if (!vmx_setup) {
+ fbComposeSetupVMX();
+ vmx_setup = TRUE;
+ }
+#endif
+
xDst += pDst->pDrawable->x;
yDst += pDst->pDrawable->y;
if (pSrc->pDrawable) {
@@ -2010,6 +2019,42 @@ pixman_composite (pixman_operator_t op,
}
/* The CPU detection code needs to be in a file not compiled with
+ * "-maltivec -mabi=altivec", as gcc would try to save vector register
+ * across function calls causing SIGILL on cpus without Altivec/vmx.
+ */
+#ifdef USE_VMX
+
+#include <signal.h>
+#include <setjmp.h>
+
+static sigjmp_buf jmp;
+static volatile sig_atomic_t in_test = 0;
+
+static void vmx_test (int sig) {
+ if (!in_test) {
+ signal(sig, SIG_DFL);
+ raise (sig);
+ }
+ in_test = 0;
+ siglongjmp (jmp, 1);
+}
+
+pixman_private
+Bool fbHaveVMX(void) {
+ signal (SIGILL, vmx_test);
+ if (sigsetjmp (jmp, 1)) {
+ signal (SIGILL, SIG_DFL);
+ } else {
+ in_test = 1;
+ asm volatile ( "vor 0, 0, 0" );
+ signal (SIGILL, SIG_DFL);
+ return 1;
+ }
+ return 0;
+}
+#endif //USE_VMX
+
+/* The CPU detection code needs to be in a file not compiled with
* "-mmmx -msse", as gcc would generate CMOV instructions otherwise
* that would lead to SIGILL instructions on old CPUs that don't have
* it.
diff --git a/pixman/src/fbvmx.c b/pixman/src/fbvmx.c
new file mode 100644
index 0000000..63c7139
--- /dev/null
+++ b/pixman/src/fbvmx.c
@@ -0,0 +1,769 @@
+/*
+ * Copyright © 2006 Luca Barbato
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. Red Hat makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Luca Barbato (lu_zero at gentoo.org)
+ *
+ * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
+ */
+
+#include "fbpict.h"
+#include "fbvmx.h"
+#include <altivec.h>
+//#include <stdio.h>
+
+static __inline__ vector unsigned int
+splat_alpha(vector unsigned int pix) {
+ return vec_perm(pix, pix,
+ (vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04,
+ 0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C));
+}
+
+static __inline__ vector unsigned int
+pix_multiply(vector unsigned int p, vector unsigned int a)
+{
+ vector unsigned short hi, lo, mod;
+ /* unpack to short */
+ hi = (vector unsigned short)
+ vec_mergeh((vector unsigned char)AVV(0),
+ (vector unsigned char)p);
+ mod = (vector unsigned short)
+ vec_mergeh((vector unsigned char)AVV(0),
+ (vector unsigned char)a);
+
+ hi = vec_mladd(hi, mod, (vector unsigned short)
+ AVV(0x0080,0x0080,0x0080,0x0080,
+ 0x0080,0x0080,0x0080,0x0080));
+
+ hi = vec_adds(hi, vec_sr(hi, vec_splat_u16(8)));
+
+ hi = vec_sr(hi, vec_splat_u16(8));
+
+ /* unpack to short */
+ lo = (vector unsigned short)
+ vec_mergel((vector unsigned char)AVV(0),
+ (vector unsigned char)p);
+ mod = (vector unsigned short)
+ vec_mergel((vector unsigned char)AVV(0),
+ (vector unsigned char)a);
+
+ lo = vec_mladd(lo, mod, (vector unsigned short)
+ AVV(0x0080,0x0080,0x0080,0x0080,
+ 0x0080,0x0080,0x0080,0x0080));
+
+ lo = vec_adds(lo, vec_sr(lo, vec_splat_u16(8)));
+
+ lo = vec_sr(lo, vec_splat_u16(8));
+
+ return vec_packsu(hi, lo);
+}
+
+static __inline__ vector unsigned int
+pix_add (vector unsigned int a, vector unsigned int b)
+{
+ return vec_adds ((vector unsigned char)a,
+ (vector unsigned char)b);
+}
+
+static __inline__ vector unsigned int
+pix_add_mul (vector unsigned int x, vector unsigned int a,
+ vector unsigned int y, vector unsigned int b)
+{
+ vector unsigned short hi, lo, mod, hiy, loy, mody;
+
+ hi = (vector unsigned short)
+ vec_mergeh((vector unsigned char)AVV(0),
+ (vector unsigned char)x);
+ mod = (vector unsigned short)
+ vec_mergeh((vector unsigned char)AVV(0),
+ (vector unsigned char)a);
+ hiy = (vector unsigned short)
+ vec_mergeh((vector unsigned char)AVV(0),
+ (vector unsigned char)y);
+ mody = (vector unsigned short)
+ vec_mergeh((vector unsigned char)AVV(0),
+ (vector unsigned char)b);
+
+ hi = vec_mladd(hi, mod, (vector unsigned short)
+ AVV(0x0080,0x0080,0x0080,0x0080,
+ 0x0080,0x0080,0x0080,0x0080));
+
+ hi = vec_mladd(hiy, mody, hi);
+
+ hi = vec_adds(hi, vec_sr(hi, vec_splat_u16(8)));
+
+ hi = vec_sr(hi, vec_splat_u16(8));
+
+ lo = (vector unsigned short)
+ vec_mergel((vector unsigned char)AVV(0),
+ (vector unsigned char)x);
+ mod = (vector unsigned short)
+ vec_mergel((vector unsigned char)AVV(0),
+ (vector unsigned char)a);
+
+ loy = (vector unsigned short)
+ vec_mergel((vector unsigned char)AVV(0),
+ (vector unsigned char)y);
+ mody = (vector unsigned short)
+ vec_mergel((vector unsigned char)AVV(0),
+ (vector unsigned char)b);
+
+ lo = vec_mladd(lo, mod, (vector unsigned short)
+ AVV(0x0080,0x0080,0x0080,0x0080,
+ 0x0080,0x0080,0x0080,0x0080));
+
+ lo = vec_mladd(loy, mody, lo);
+
+ lo = vec_adds(lo, vec_sr(lo, vec_splat_u16(8)));
+
+ lo = vec_sr(lo, vec_splat_u16(8));
+
+ return vec_packsu(hi, lo);
+}
+
+static __inline__ vector unsigned int
+negate (vector unsigned int src)
+{
+ return vec_nor (src, src);
+}
+
+static __inline__ vector unsigned int
+over (vector unsigned int src, vector unsigned int srca,
+ vector unsigned int dest)
+{
+ vector unsigned char tmp = pix_multiply(dest, negate (srca));
+ tmp = vec_adds((vector unsigned char)src, tmp);
+ return tmp;
+}
+
+// in == pix_multiply
+#define in_over(src, srca, mask, dest) over(pix_multiply(src, mask),\
+ pix_multiply(srca, mask), dest)
+
+
+#define COMPUTE_SHIFT_MASK(source) \
+ source ## _mask = vec_lvsl(0, source);
+
+#define COMPUTE_SHIFT_MASKS(dest, source) \
+ dest ## _mask = vec_lvsl(0, dest); \
+ source ## _mask = vec_lvsl(0, source); \
+ store_mask = vec_lvsr(0, dest);
+
+#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
+ mask ## _mask = vec_lvsl(0, mask); \
+ dest ## _mask = vec_lvsl(0, dest); \
+ source ## _mask = vec_lvsl(0, source); \
+ store_mask = vec_lvsr(0, dest);
+
+// notice you have to declare temp vars...
+// Note: tmp3 and tmp4 must remain untouched!
+#define LOAD_VECTOR (source) \
+ tmp1 = vec_ld(0, source); \
+ tmp2 = vec_ld(15, source); \
+ v ## source = vec_perm(tmp1, tmp2, source ## _mask);
+
+#define LOAD_VECTORS(dest, source) \
+ tmp1 = vec_ld(0, source); \
+ tmp2 = vec_ld(15, source); \
+ tmp3 = vec_ld(0, dest); \
+ v ## source = vec_perm(tmp1, tmp2, source ## _mask); \
+ tmp4 = vec_ld(15, dest); \
+ v ## dest = vec_perm(tmp3, tmp4, dest ## _mask);
+
+#define LOAD_VECTORSC(dest, source, mask) \
+ tmp1 = vec_ld(0, source); \
+ tmp2 = vec_ld(15, source); \
+ tmp3 = vec_ld(0, dest); \
+ v ## source = vec_perm(tmp1, tmp2, source ## _mask); \
+ tmp4 = vec_ld(15, dest); \
+ tmp1 = vec_ld(0, mask); \
+ v ## dest = vec_perm(tmp3, tmp4, dest ## _mask); \
+ tmp2 = vec_ld(15, mask); \
+ v ## mask = vec_perm(tmp1, tmp2, mask ## _mask);
+
+#define STORE_VECTOR(dest) \
+ edges = vec_perm(tmp4, tmp3, dest ## _mask); \
+ tmp3 = vec_perm((vector unsigned char)v ## dest, edges, store_mask); \
+ tmp1 = vec_perm(edges, (vector unsigned char)v ## dest, store_mask); \
+ vec_st((vector unsigned int) tmp3, 15, dest ); \
+ vec_st((vector unsigned int) tmp1, 0, dest );
+
+static FASTCALL void
+vmxCombineMaskU (CARD32 *src, const CARD32 *msk, int width)
+{
+ int i;
+ vector unsigned int vsrc, vmsk;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ src_mask, msk_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(src, msk)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(src, msk)
+
+ vsrc = pix_multiply(vsrc, splat_alpha(vmsk));
+
+ STORE_VECTOR(src)
+
+ msk+=4;
+ src+=4;
+ }
+
+ for (i = width%4; --i >= 0;) {
+ CARD32 a = msk[i] >> 24;
+ CARD32 s = src[i];
+ FbByteMul(s, a);
+ src[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineOverU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = over(vsrc, splat_alpha(vsrc), vdest);
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = src[i];
+ CARD32 d = dest[i];
+ CARD32 ia = Alpha(~s);
+
+ FbByteMulAdd(d, ia, s);
+ dest[i] = d;
+ }
+}
+
+
+static FASTCALL void
+vmxCombineOverReverseU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = over(vdest, splat_alpha(vdest) , vsrc);
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = src[i];
+ CARD32 d = dest[i];
+ CARD32 ia = Alpha(~s);
+
+ FbByteMulAdd(d, ia, s);
+ dest[i] = d;
+ }
+}
+
+static FASTCALL void
+vmxCombineInU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = pix_multiply(vsrc, splat_alpha(vdest));
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+
+ CARD32 s = src[i];
+ CARD32 a = Alpha(dest[i]);
+ FbByteMul(s, a);
+ dest[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineInReverseU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = pix_multiply(vdest, splat_alpha(vsrc));
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = dest[i];
+ CARD32 a = Alpha(src[i]);
+ FbByteMul(s, a);
+ dest[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineOutU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = pix_multiply(vsrc, splat_alpha(negate(vdest)));
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = src[i];
+ CARD32 a = Alpha(~dest[i]);
+ FbByteMul(s, a);
+ dest[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineOutReverseU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = pix_multiply(vdest, splat_alpha(negate(vsrc)));
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = src[i];
+ CARD32 a = Alpha(~dest[i]);
+ FbByteMul(s, a);
+ dest[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineAtopU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = pix_add_mul(vsrc, splat_alpha(vdest),
+ vdest, splat_alpha(negate(vsrc)));
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = src[i];
+ CARD32 d = dest[i];
+ CARD32 dest_a = Alpha(d);
+ CARD32 src_ia = Alpha(~s);
+
+ FbByteAddMul(s, dest_a, d, src_ia);
+ dest[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineAtopReverseU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = pix_add_mul(vdest, splat_alpha(vsrc),
+ vsrc, splat_alpha(negate(vdest)));
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = src[i];
+ CARD32 d = dest[i];
+ CARD32 src_a = Alpha(s);
+ CARD32 dest_ia = Alpha(~d);
+
+ FbByteAddMul(s, dest_ia, d, src_a);
+ dest[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineXorU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = pix_add_mul(vsrc, splat_alpha(negate(vdest)),
+ vdest, splat_alpha(negate(vsrc)));
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = src[i];
+ CARD32 d = dest[i];
+ CARD32 src_ia = Alpha(~s);
+ CARD32 dest_ia = Alpha(~d);
+
+ FbByteAddMul(s, dest_ia, d, src_ia);
+ dest[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineAddU (CARD32 *dest, const CARD32 *src, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKS(dest, src)
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORS(dest, src)
+
+ vdest = pix_add(vsrc, vdest);
+
+ STORE_VECTOR(dest)
+
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 s = src[i];
+ CARD32 d = dest[i];
+ FbByteAdd(d, s);
+ dest[i] = d;
+ }
+}
+
+static FASTCALL void
+vmxCombineSrcC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc, vmask;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, mask_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKC(dest, src, mask);
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORSC(dest, src, mask)
+
+ vdest = pix_multiply(vsrc, vmask);
+
+ STORE_VECTOR(dest)
+
+ mask+=4;
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 a = mask[i];
+ CARD32 s = src[i];
+ FbByteMulC(s, a);
+ dest[i] = s;
+ }
+}
+
+static FASTCALL void
+vmxCombineOverC (CARD32 *dest, CARD32 *src, CARD32 *mask, int width)
+{
+ int i;
+ vector unsigned int vdest, vsrc, vmask;
+ vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+ dest_mask, mask_mask, src_mask, store_mask;
+
+ COMPUTE_SHIFT_MASKC(dest, src, mask);
+ //printf("%s\n",__PRETTY_FUNCTION__);
+ for (i = width/4; i > 0; i--) {
+
+ LOAD_VECTORSC(dest, src, mask)
+
+ vdest = in_over(vsrc, splat_alpha(vsrc), vmask, vdest);
+
+ STORE_VECTOR(dest)
+
+ mask+=4;
+ src+=4;
+ dest+=4;
+ }
+
+ for (i = width%4; --i >=0;) {
+ CARD32 a = mask[i];
+ CARD32 s = src[i];
+ CARD32 d = dest[i];
+ FbByteMulC(s, a);
+ FbByteMulAddC(d, ~a, s);
+ dest[i] = d;
+ }
+}
+
+
+
+
+#if 0
+void
+fbCompositeSolid_nx8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD32 src;
+ CARD32 *dstLine, *dst;
+ FbStride dstStride;
+
+ fbComposeGetSolid(pSrc, pDst, src);
+
+ if (src >> 24 == 0)
+ return;
+
+ fbComposeGetStart (pDst, xDst, yDst, CARD32, dstStride, dstLine, 1);
+
+ while (height--)
+ {
+ dst = dstLine;
+ dstLine += dstStride;
+// vmxCombineOverU(dst, src, width);
+ }
+}
+
+void
+fbCompositeSolid_nx0565mmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD32 src;
+ CARD16 *dstLine, *dst;
+ CARD16 w;
+ FbStride dstStride;
+
+ fbComposeGetSolid(pSrc, pDst, src);
+
+ if (src >> 24 == 0)
+ return;
+
+ fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
+
+ while (height--)
+ {
+ dst = dstLine;
+ dstLine += dstStride;
+ vmxCombineOverU565(dst, src, width);
+ }
+}
+
+#endif
+
+extern FbComposeFunctions composeFunctions;
+
+void fbComposeSetupVMX(void)
+{
+ /* check if we have MMX support and initialize accordingly */
+ if (fbHaveVMX()) {
+ composeFunctions.combineU[PIXMAN_OPERATOR_OVER] = vmxCombineOverU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_OVER_REVERSE] = vmxCombineOverReverseU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_IN] = vmxCombineInU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_IN_REVERSE] = vmxCombineInReverseU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_OUT] = vmxCombineOutU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_OUT_REVERSE] = vmxCombineOutReverseU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_ATOP] = vmxCombineAtopU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_ATOP_REVERSE] = vmxCombineAtopReverseU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_XOR] = vmxCombineXorU;
+ composeFunctions.combineU[PIXMAN_OPERATOR_ADD] = vmxCombineAddU;
+
+ composeFunctions.combineC[PIXMAN_OPERATOR_SRC] = vmxCombineSrcC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_OVER] = vmxCombineOverC;
+/* composeFunctions.combineC[PIXMAN_OPERATOR_OVER_REVERSE] = vmxCombineOverReverseC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_IN] = vmxCombineInC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_IN_REVERSE] = vmxCombineInReverseC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_OUT] = vmxCombineOutC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_OUT_REVERSE] = vmxCombineOutReverseC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_ATOP] = vmxCombineAtopC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_ATOP_REVERSE] = vmxCombineAtopReverseC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_XOR] = vmxCombineXorC;
+ composeFunctions.combineC[PIXMAN_OPERATOR_ADD] = vmxCombineAddC;
+*/
+ composeFunctions.combineMaskU = vmxCombineMaskU;
+
+ }
+}
+
+
+
+
+#if 0
+int main (int argc, char** argv)
+{
+
+ int data[5][10245]; __attribute__((aligned(16)));
+ int i;
+ //input data
+ for (i = 0; i<10240; i++) data[0][i] = data[1][i] = data[2][i] =
+ data[3][i] = (i&0xff) * 0x01010101;
+ //
+ for (i = 0; i<10240; i++) data[4][i] = (i&0xff) * 0x01010101;
+
+ for (i = 0; i<10240; i++)
+ if (data[0][i] != data[1][i]) {
+ //printf ("wrong byte %d : %d != %d\n",i , data[0][i], data[1][i]);
+ }
+
+ //printf ("combine \n");
+ fbCombineSrcC (data[0], data[2], data[4], 1024);
+ vmxCombineSrcC (data[1], data[3], data[4], 1024);
+
+ for (i = 0; i<10240; i++)
+ if (data[0][i] != data[1][i]) {
+ //printf ("wrong byte %0d : %0x != %0x\n",i , data[0][i], data[1][i]);
+ }
+ return 0;
+}
+
+#endif
diff --git a/pixman/src/fbvmx.h b/pixman/src/fbvmx.h
new file mode 100644
index 0000000..e690dbf
--- /dev/null
+++ b/pixman/src/fbvmx.h
@@ -0,0 +1,316 @@
+/*
+ * Copyright 2006 Luca Barbato
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. Red Hat makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Luca Barbato (lu_zero at gentoo.org)
+ *
+ * Based on work by Owen Taylor, Søren Sandmann and Lars Knoll
+ */
+#ifdef USE_VMX
+
+pixman_private
+Bool fbHaveVMX(void);
+
+#else
+#define fbHaveVMX() FALSE
+#endif
+
+#ifdef USE_VMX
+
+#define AVV(x...) {x}
+
+
+pixman_private
+void fbComposeSetupVMX(void);
+
+
+#if 0
+pixman_private
+void fbCompositeIn_nx8x8vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+pixman_private
+void fbCompositeSolidMask_nx8888x0565Cvmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSrcAdd_8888x8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSolidMask_nx8888x8888Cvmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSolidMask_nx8x8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSolidMaskSrc_nx8x8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+pixman_private
+void fbCompositeSrcAdd_8888x8x8vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+pixman_private
+void fbCompositeIn_8x8vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+pixman_private
+void fbCompositeSrcAdd_8000x8000vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSrc_8888RevNPx8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSrc_8888x0565vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSrc_8888RevNPx0565vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSolid_nx8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSolid_nx0565vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSolidMask_nx8x0565vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSrc_x888x8x8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSrc_8888x8x8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+void fbCompositeSrc_8888x8888vmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+pixman_private
+Bool fbCopyAreavmx (FbPixels *pSrc,
+ FbPixels *pDst,
+ int src_x,
+ int src_y,
+ int dst_x,
+ int dst_y,
+ int width,
+ int height);
+
+pixman_private
+void fbCompositeCopyAreavmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+pixman_private
+Bool fbSolidFillvmx (FbPixels *pDraw,
+ int x,
+ int y,
+ int width,
+ int height,
+ FbBits xor);
+#endif
+#endif /* USE_VMX */
More information about the cairo
mailing list