No subject

=3D?utf-8?q?S=3DC3=3DB8ren=3D20Sandmann=3D20Pedersen?=3D sandmann at da=
Sun Sep 14 12:41:12 PDT 2008


imi.au.dk>
Date: Sun, 14 Sep 2008 14:58:00 -0400
Subject: [PATCH] [sse2] Fix rounding bug in conversion from 565 to 8888

When converting from 565 to 8888, replicate the topmost bits instead
of appending zeros.
---
 pixman/pixman-sse2.c |   52 ++++++++++++++++++++++++++++++++--------------=
---
 1 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index cc08189..0c671ed 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -1,3 +1,4 @@
+#include <stdio.h>
 /*
  * Copyright =C2=A9 2008 Rodrigo Kumpera
  * Copyright =C2=A9 2008 Andr=C3=A9 Tupinamb=C3=A1
@@ -73,6 +74,9 @@ static __m128i MaskRed;
 static __m128i MaskGreen;
 static __m128i MaskBlue;
=20
+static __m128i Mask565FixRB;
+static __m128i Mask565FixG;
+
 /* -----------------------------------------------------------------------=
--------------------------
  * SSE2 Inlines
  */
@@ -89,26 +93,37 @@ unpack_128_2x128 (__m128i data, __m128i* dataLo, __m128=
i* dataHi)
     *dataHi =3D _mm_unpackhi_epi8 (data, _mm_setzero_si128 ());
 }
=20
+static inline __m128i
+unpack565to8888 (__m128i lo)
+{
+    __m128i r, g, b, rb, t;
+=20=20=20=20
+    r =3D _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed);
+    g =3D _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen);
+    b =3D _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue);
+
+    rb =3D _mm_or_si128 (r, b);
+    t  =3D _mm_and_si128 (rb, Mask565FixRB);
+    t  =3D _mm_srli_epi32 (t, 5);
+    rb =3D _mm_or_si128 (rb, t);
+
+    t  =3D _mm_and_si128 (g, Mask565FixG);
+    t  =3D _mm_srli_epi32 (t, 6);
+    g  =3D _mm_or_si128 (g, t);
+=20=20=20=20
+    return _mm_or_si128 (rb, g);
+}
+
 static inline void
 unpack565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i=
* data2, __m128i* data3)
 {
     __m128i lo, hi;
-    __m128i r, g, b;
=20
     lo =3D _mm_unpacklo_epi16 (data, _mm_setzero_si128 ());
     hi =3D _mm_unpackhi_epi16 (data, _mm_setzero_si128 ());
=20
-    r =3D _mm_and_si128 (_mm_slli_epi32 (lo, 8), MaskRed);
-    g =3D _mm_and_si128 (_mm_slli_epi32 (lo, 5), MaskGreen);
-    b =3D _mm_and_si128 (_mm_slli_epi32 (lo, 3), MaskBlue);
-
-    lo =3D _mm_or_si128 (_mm_or_si128 (r, g), b);
-
-    r =3D _mm_and_si128 (_mm_slli_epi32 (hi, 8), MaskRed);
-    g =3D _mm_and_si128 (_mm_slli_epi32 (hi, 5), MaskGreen);
-    b =3D _mm_and_si128 (_mm_slli_epi32 (hi, 3), MaskBlue);
-
-    hi =3D _mm_or_si128 (_mm_or_si128 (r, g), b);
+    lo =3D unpack565to8888 (lo);
+    hi =3D unpack565to8888 (hi);
=20
     unpack_128_2x128 (lo, data0, data1);
     unpack_128_2x128 (hi, data2, data3);
@@ -2297,7 +2312,8 @@ fbComposeSetupSSE2(void)
         MaskRed   =3D createMask_2x32_128 (0x00f80000, 0x00f80000);
         MaskGreen =3D createMask_2x32_128 (0x0000fc00, 0x0000fc00);
         MaskBlue  =3D createMask_2x32_128 (0x000000f8, 0x000000f8);
-
+	Mask565FixRB =3D createMask_2x32_128 (0x00e000e0, 0x00e000e0);
+	Mask565FixG =3D createMask_2x32_128  (0x0000c000, 0x0000c000);
         Mask0080 =3D createMask_16_128 (0x0080);
         Mask00ff =3D createMask_16_128 (0x00ff);
         Mask0101 =3D createMask_16_128 (0x0101);
@@ -2484,6 +2500,7 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op,
         while (w && (unsigned long)dst & 15)
         {
             d =3D *dst;
+
             *dst++ =3D pack565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi6=
4_pi64 (xmmSrc),
                                                              _mm_movepi64_=
pi64 (xmmAlpha),
                                                              expand565_16_=
1x64 (d))));
@@ -2498,15 +2515,14 @@ fbCompositeSolid_nx0565sse2 (pixman_op_t op,
             /* fill cache line with next memory */
             cachePrefetchNext ((__m128i*)dst);
=20
-            xmmDst =3D load128Aligned ((__m128i*)dst);
-
-            unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xm=
mDst3);
-
+	    xmmDst =3D load128Aligned ((__m128i*)dst);
+=09=20=20=20=20
+	    unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
+=09=20=20=20=20
             over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst0, =
&xmmDst1);
             over_2x128 (&xmmSrc, &xmmSrc, &xmmAlpha, &xmmAlpha, &xmmDst2, =
&xmmDst3);
=20
             xmmDst =3D pack565_4x128_128 (&xmmDst0, &xmmDst1, &xmmDst2, &x=
mmDst3);
-
             save128Aligned ((__m128i*)dst, xmmDst);
=20
             dst +=3D 8;
--=20
1.6.0.1


--=-=-=--


More information about the cairo mailing list