[cairo-commit] libpixman/src ic.c,1.30,1.31
Jeff Muizelaar
commit at pdx.freedesktop.org
Sat Jul 30 08:48:27 PDT 2005
Committed by: jrmuizel
Update of /cvs/cairo/libpixman/src
In directory gabe:/tmp/cvs-serv12223/src
Modified Files:
ic.c
Log Message:
2005-07-30 Jeff Muizelaar <jeff at infidigm.net>
* src/ic.c: (fbCompositeSolidMask_nx8x0888),
(fbCompositeSolidMask_nx8x0565), (fbCompositeTrans_0565xnx0565),
(fbCompositeTrans_0888xnx0888), (fbCompositeSrcSrc_nxn),
(pixman_composite):
Add some optimizations from jaymz. Also adds some compile warnings
that will hopefully go away as we continue merging.
Index: ic.c
===================================================================
RCS file: /cvs/cairo/libpixman/src/ic.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -u -d -r1.30 -r1.31
--- ic.c 16 Jul 2005 18:27:32 -0000 1.30
+++ ic.c 30 Jul 2005 15:48:25 -0000 1.31
@@ -150,6 +150,51 @@
(line) = ((type *) __bits__) + (stride) * ((y) + __yoff__) + (mul) * ((x) + __xoff__); \
}
+#define genericCombine24(a,b,c,d) (((a)*(c)+(b)*(d)))
+
+#define fastcombine32(alpha, source, destval, destptr, dstrb, dstag, drb, dag) \
+ dstrb=destval&0xFF00FF; dstag=(destval>>8)&0xFF00FF; \
+ drb=((source&0xFF00FF)-dstrb)*alpha; dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \
+ *destptr++=((((drb>>8) + dstrb) & 0x00FF00FF) | ((((dag>>8) + dstag) << 8) & 0xFF00FF00)); \
+
+#define fastcombine32(alpha, source, destval, destptr, dstrb, dstag, drb, dag) \
+ dstrb=destval&0xFF00FF; dstag=(destval>>8)&0xFF00FF; \
+ drb=((source&0xFF00FF)-dstrb)*alpha; dag=(((source>>8)&0xFF00FF)-dstag)*alpha; \
+ *destptr++=((((drb>>8) + dstrb) & 0x00FF00FF) | ((((dag>>8) + dstag) << 8) & 0xFF00FF00)); \
+
+// Note: this macro expects 6 bits of alpha, not 8!
+#define fastCombine0565(alpha, source, destval, destptr) { \
+ CARD16 dstrb = destval & 0xf81f; CARD16 dstg = destval & 0x7e0; \
+ CARD32 drb = ((source&0xf81f)-dstrb)*alpha; CARD32 dg=((source & 0x7e0)-dstg)*alpha; \
+ destptr= ((((drb>>6) + dstrb)&0xf81f) | (((dg>>6) + dstg) & 0x7e0)); \
+ }
+
+#if IMAGE_BYTE_ORDER == LSBFirst
+ #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \
+ temp=count&3; \
+ where-=temp; \
+ workingWhere=(CARD32 *)where; \
+ workingVal=*workingWhere++; \
+ count=4-temp; \
+ workingVal>>=(8*temp)
+ #define readPacked(where,x,y,z) {if(!(x)) { (x)=4; y=*z++; } where=(y)&0xff; (y)>>=8; (x)--;}
+ #define readPackedSource(where) readPacked(where,ws,workingSource,wsrc)
+ #define readPackedDest(where) readPacked(where,wd,workingiDest,widst)
+ #define writePacked(what) workingoDest>>=8; workingoDest|=(what<<24); ww--; if(!ww) { ww=4; *wodst++=workingoDest; }
+#else
+ #warning "I havn't tested fbCompositeTrans_0888xnx0888() on big endian yet!"
+ #define setupPackedReader(count,temp,where,workingWhere,workingVal) count=(int)where; \
+ temp=count&3; \
+ where-=temp; \
+ workingWhere=(CARD32 *)where; \
+ workingVal=*workingWhere++; \
+ count=4-temp; \
+ workingVal<<=(8*temp)
+ #define readPacked(where,x,y,z) {if(!(x)) { (x)=4; y=*z++; } where=(y)>>24; (y)<<=8; (x)--;}
+ #define readPackedSource(where) readPacked(where,ws,workingSource,wsrc)
+ #define readPackedDest(where) readPacked(where,wd,workingiDest,widst)
+ #define writePacked(what) workingoDest<<=8; workingoDest|=what; ww--; if(!ww) { ww=4; *wodst++=workingoDest; }
+#endif
/*
* Naming convention:
*
@@ -287,6 +332,7 @@
}
}
+#define srcAlphaCombine24(a,b) genericCombine24(a,b,srca,srcia)
static void
fbCompositeSolidMask_nx8x0888 (pixman_operator_t op,
PicturePtr pSrc,
@@ -301,52 +347,86 @@
CARD16 width,
CARD16 height)
{
- CARD32 src, srca;
- CARD8 *dstLine, *dst;
+ CARD32 src, srca, srcia;
+ CARD8 *dstLine, *dst, *edst;
CARD32 d;
CARD8 *maskLine, *mask, m;
FbStride dstStride, maskStride;
CARD16 w;
+ CARD32 rs,gs,bs,rd,gd,bd;
fbComposeGetSolid(pSrc, src);
srca = src >> 24;
+ srcia = 255-srca;
if (src == 0)
return;
+
+ rs=src&0xff;
+ gs=(src>>8)&0xff;
+ bs=(src>>16)&0xff;
fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3);
fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
-
- while (height--)
- {
- dst = dstLine;
- dstLine += dstStride;
- mask = maskLine;
- maskLine += maskStride;
- w = width;
- while (w--)
+ while (height--)
{
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- d = src;
- else
+ // fixme: cleanup unused
+ unsigned int wt,wd;
+ CARD32 workingiDest;
+ CARD32 *widst;
+
+ edst=dst = dstLine;
+ dstLine += dstStride;
+ mask = maskLine;
+ maskLine += maskStride;
+ w = width;
+
+#ifndef NO_MASKED_PACKED_READ
+ setupPackedReader(wd,wt,edst,widst,workingiDest);
+#endif
+
+ while (w--)
{
- d = Fetch24(dst);
- d = fbOver24 (src, d);
+#ifndef NO_MASKED_PACKED_READ
+ readPackedDest(rd);
+ readPackedDest(gd);
+ readPackedDest(bd);
+#else
+ rd= *edst++;
+ gd= *edst++;
+ bd= *edst++;
+#endif
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ {
+ *dst++=rs;
+ *dst++=gs;
+ *dst++=bs;
+ }
+ else
+ {
+ *dst++=(srcAlphaCombine24(rs, rd)>>8);
+ *dst++=(srcAlphaCombine24(gs, gd)>>8);
+ *dst++=(srcAlphaCombine24(bs, bd)>>8);
+ }
+ }
+ else if (m)
+ {
+ int na=(srca*(int)m)>>8;
+ int nia=255-na;
+ *dst++=(genericCombine24(rs, rd, na, nia)>>8);
+ *dst++=(genericCombine24(gs, gd, na, nia)>>8);
+ *dst++=(genericCombine24(bs, bd, na, nia)>>8);
+ }
+ else
+ {
+ dst+=3;
+ }
}
- Store24(dst,d);
- }
- else if (m)
- {
- d = fbOver24 (fbIn(src,m), Fetch24(dst));
- Store24(dst,d);
- }
- dst += 3;
}
- }
}
static void
@@ -363,55 +443,60 @@
CARD16 width,
CARD16 height)
{
- CARD32 src, srca;
+ CARD32 src, srca,na, rsrca;
CARD16 *dstLine, *dst;
- CARD32 d;
+ CARD16 d;
CARD8 *maskLine, *mask, m;
FbStride dstStride, maskStride;
- CARD16 w;
+ CARD16 w,src16;
fbComposeGetSolid(pSrc, src);
+ src16 = cvt8888to0565(src);
- srca = src >> 24;
+ rsrca = src >> 24;
+ srca=rsrca>>2;
if (src == 0)
- return;
+ return;
fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
-
- while (height--)
- {
- dst = dstLine;
- dstLine += dstStride;
- mask = maskLine;
- maskLine += maskStride;
- w = width;
-
- while (w--)
+
+ while (height--)
{
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- d = src;
- else
+ dst = dstLine;
+ dstLine += dstStride;
+ mask = maskLine;
+ maskLine += maskStride;
+ w = width;
+
+ while (w--)
{
- d = *dst;
- d = fbOver24 (src, cvt0565to8888(d));
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (srca == 0xff)
+ {
+ *dst=src16;
+ }
+ else
+ {
+ d = *dst;
+ fastCombine0565(srca, src16, d, *dst++);
+ }
+ }
+ else if (m)
+ {
+ na=(rsrca*(int)m)>>10;
+ d = *dst;
+ fastCombine0565(na, src16, d, *dst++);
+ }
+ else
+ dst++;
}
- *dst = cvt8888to0565(d);
- }
- else if (m)
- {
- d = *dst;
- d = fbOver24 (fbIn(src,m), cvt0565to8888(d));
- *dst = cvt8888to0565(d);
- }
- dst++;
}
- }
}
+
static void
fbCompositeSolidMask_nx8888x0565C (pixman_operator_t op,
PicturePtr pSrc,
@@ -910,7 +995,7 @@
CARD32 s_32, d_32, i_32, r_32;
fbComposeGetSolid (pMask, mask);
- maskAlpha = mask >> 24;
+ maskAlpha = mask >> 26;
if (!maskAlpha)
return;
@@ -926,26 +1011,272 @@
fbComposeGetStart (pDst, xDst, yDst, CARD16, dstStride, dstLine, 1);
while (height--)
- {
- dst = dstLine;
- dstLine += dstStride;
- src = srcLine;
- srcLine += srcStride;
- w = width;
+ {
+ CARD32 *isrc;
+ dst = dstLine;
+ dstLine += dstStride;
+ src = srcLine;
+ srcLine += srcStride;
+ w = width;
+
+ if(((int)src&1)==1)
+ {
+ s_16 = *src++;
+ d_16 = *dst;
+ fastCombine0565(maskAlpha, s_16, d_16, *dst++);
+ w--;
+ }
+ isrc=(CARD32 *)src;
+ while (w>1)
+ {
+ s_32=*isrc++;
+#if IMAGE_BYTE_ORDER == LSBFirst
+ s_16=s_32&0xffff;
+#else
+ s_16=s_32>>16;
+#endif
+ d_16 = *dst;
+ fastCombine0565(maskAlpha, s_16, d_16, *dst++);
+#if IMAGE_BYTE_ORDER == LSBFirst
+ s_16=s_32>>16;
+#else
+ s_16=s_32&0xffff;
+#endif
+ d_16 = *dst;
+ fastCombine0565(maskAlpha, s_16, d_16, *dst++);
+ w-=2;
+ }
+ src=(CARD16 *)isrc;
+ if(w!=0)
+ {
+ s_16 = *src;
+ d_16 = *dst;
+ fastCombine0565(maskAlpha, s_16, d_16, *dst);
+ }
+ }
+}
+
+
+
+// macros for "i can't believe it's not fast" packed pixel handling
+#define alphamaskCombine24(a,b) genericCombine24(a,b,maskAlpha,maskiAlpha)
+static void
+fbCompositeTrans_0888xnx0888(pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD8 *dstLine, *dst,*idst;
+ CARD8 *srcLine, *src;
+ FbStride dstStride, srcStride;
+ CARD16 w;
+ FbBits mask;
+ CARD16 maskAlpha,maskiAlpha;
+
+ fbComposeGetSolid (pMask, mask);
+ maskAlpha = mask >> 24;
+ maskiAlpha= 255-maskAlpha;
+
+ if (!maskAlpha)
+ return;
+ //if (maskAlpha == 0xff)
+ //{
+ //fbCompositeSrc_0888x0888 (op, pSrc, pMask, pDst,
+ // xSrc, ySrc, xMask, yMask, xDst, yDst,
+ // width, height);
+ //return;
+ //}
+
+ fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 3);
+ fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 3);
- while (w--)
{
- s_16 = *src++;
- s_32 = cvt0565to8888(s_16);
- d_16 = *dst;
- d_32 = cvt0565to8888(d_16);
-
- i_32 = fbIn24 (s_32, maskAlpha);
- r_32 = fbOver24 (i_32, d_32);
- r_16 = cvt8888to0565(r_32);
- *dst++ = r_16;
+ unsigned int ws,wt,wd,ww;
+ CARD32 workingSource;
+ CARD32 *wsrc;
+ CARD32 rs,gs,bs;
+ CARD32 rd,gd,bd;
+
+ CARD32 workingiDest,workingoDest;
+ CARD32 *widst,*wodst;
+
+
+ // are xSrc and xDst at the same alignment? if not, we need to be complicated :)
+ //if(0==0)
+ if( (((xSrc*3)&3)!=((xDst*3)&3)) || (srcStride&3)!=0 || (dstStride&3)!=0)
+ {
+ while (height--)
+ {
+ idst=dst = dstLine;
+ dstLine += dstStride;
+ src = srcLine;
+ srcLine += srcStride;
+ w = width*3;
+
+ setupPackedReader(wd,wt,idst,widst,workingiDest);
+ ww=(int)dst;
+ wt=ww&3;
+ dst-=wt;
+ wodst=(CARD32 *)dst;
+ workingoDest=*wodst;
+ ww=4-wt;
+#if IMAGE_BYTE_ORDER == LSBFirst
+ workingoDest<<=(8*(ww+1));
+#else
+ workingoDest>>=(8*(ww+1));
+#endif
+
+ // get to word aligned
+ switch(!(int)src&3)
+ {
+ case 1:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ w--; if(w==0) break;
+ case 2:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ w--; if(w==0) break;
+ case 3:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ w--; if(w==0) break;
+ }
+ wsrc=(CARD32 *)src;
+ while (w>3)
+ {
+ rs=*wsrc++;
+ // FIXME: write a version of readPackedDest() which
+ // can collect 4 bytes at once if we're on a boundry (which we're
+ // actually guarenteed not to be in this version, but do it anyhow), and can
+ // collect as 2 16bit words on a 2byte boundry, and then use the 32bit combine here
+#if IMAGE_BYTE_ORDER == LSBFirst
+ readPackedDest(rd);
+ rd=alphamaskCombine24(rs&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24((rs>>8)&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24((rs>>16)&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24(rs>>24, rd)>>8;
+ writePacked(rd);
+#else
+ readPackedDest(rd);
+ rd=alphamaskCombine24(rs>>24, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24((rs>>16)&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24((rs>>8)&0xff, rd)>>8;
+ writePacked(rd);
+
+ readPackedDest(rd);
+ rd=alphamaskCombine24(rs&0xff, rd)>>8;
+ writePacked(rd);
+#endif
+ w-=4;
+ }
+ src=(CARD8 *)wsrc;
+ switch(w)
+ {
+ case 3:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ case 2:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ case 1:
+ readPackedDest(rd);
+ rd=alphamaskCombine24(*src++, rd)>>8;
+ writePacked(rd);
+ }
+ dst=(CARD8 *)wodst;
+ switch(ww)
+ {
+ case 1:
+ dst[2]=(workingoDest>>8)&0xff;
+ case 2:
+ dst[1]=(workingoDest>>16)&0xff;
+ case 3:
+ dst[0]=workingoDest>>24;
+ }
+ }
+ }
+ else
+ {
+ while (height--)
+ {
+ idst=dst = dstLine;
+ dstLine += dstStride;
+ src = srcLine;
+ srcLine += srcStride;
+ w = width*3;
+ // get to word aligned
+ switch(!(int)src&3)
+ {
+ case 1:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ w--; if(w==0) break;
+ case 2:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ w--; if(w==0) break;
+ case 3:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ w--; if(w==0) break;
+ }
+ wsrc=(CARD32 *)src;
+ widst=(CARD32 *)dst;
+
+ register CARD32 t1, t2, t3, t4;
+ while(w>3)
+ {
+ rs = *wsrc++;
+ rd = *widst;
+ fastcombine32(maskAlpha, rs, rd, widst, t1, t2, t3, t4);
+ w-=4;
+ }
+ src=(CARD8 *)wsrc;
+ dst=(CARD8 *)widst;
+ switch(w)
+ {
+ case 3:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ case 2:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ case 1:
+ rd=alphamaskCombine24(*src++, *dst)>>8;
+ *dst++=rd;
+ }
+ }
+ }
}
- }
}
/*
@@ -973,29 +1304,80 @@
int dstXoff, dstYoff;
int srcBpp;
int dstBpp;
+ // these need to be signed now!
+ int iwidth=width;
+ int iheight=height;
Bool reverse = FALSE;
Bool upsidedown = FALSE;
-
- FbGetPixels(pSrc->pixels,src,srcStride,srcBpp,srcXoff,srcYoff);
- FbGetPixels(pDst->pixels,dst,dstStride,dstBpp,dstXoff,dstYoff);
+ int initialWidth=width;
+ int initialX=xDst;
- fbBlt (src + (ySrc + srcYoff) * srcStride,
- srcStride,
- (xSrc + srcXoff) * srcBpp,
+ // FIXME: this is possibly the worst piece of code I've ever written.
+ // My main objection to it, is that it is incrfedibly slow in a few cases, due to the
+ // call-per-repeat structure of it - the *correct* solution is to implement
+ // repeat into fbBlt(), but that's a nontrivial job, and it's far more
+ // important to get the "requireRepeat" stuff implented functionally
+ // first, *then* make it fast.
+ // -- jj
+ Bool srcRepeat=pSrc->repeat;
+ CARD32 srcHeight=pSrc->pDrawable->height;
+ CARD32 srcWidth=pSrc->pDrawable->width;
- dst + (yDst + dstYoff) * dstStride,
- dstStride,
- (xDst + dstXoff) * dstBpp,
+ FbGetPixels(pSrc->pixels,src,srcStride,srcBpp,srcXoff,srcYoff);
+ FbGetPixels(pDst->pixels,dst,dstStride,dstBpp,dstXoff,dstYoff);
- (width) * dstBpp,
- (height),
+ if(srcRepeat)
+ {
+ xSrc%=srcWidth;
+ ySrc%=srcHeight;
+ }
+
+ while(iheight>0)
+ {
+ int wheight=iheight;
+ if(wheight>(srcHeight-ySrc))
+ wheight=(srcHeight-ySrc);
+ iwidth=initialWidth;
+ xDst=initialX;
+ while(iwidth>0)
+ {
+ int wwidth=iwidth;
+ if(wwidth>(srcWidth-xSrc))
+ wwidth=(srcWidth-xSrc);
- GXcopy,
- FB_ALLONES,
- dstBpp,
+ fbBlt (src + (ySrc + srcYoff) * srcStride,
+ srcStride,
+ (xSrc + srcXoff) * srcBpp,
- reverse,
- upsidedown);
+ dst + (yDst + dstYoff) * dstStride,
+ dstStride,
+ (xDst + dstXoff) * dstBpp,
+
+ (wwidth) * dstBpp,
+ (wheight),
+
+ GXcopy,
+ FB_ALLONES,
+ dstBpp,
+
+ reverse,
+ upsidedown);
+ if(!srcRepeat)
+ iwidth=0;
+ else
+ {
+ xDst+=wwidth;
+ iwidth-=wwidth;
+ }
+ }
+ if(!srcRepeat)
+ iheight=0;
+ else
+ {
+ yDst+=wheight;
+ iheight-=wheight;
+ }
+ }
}
/*
@@ -1164,7 +1546,13 @@
if (pDst->format_code == pSrc->format_code)
func = fbCompositeTrans_0565xnx0565;
break;
+ case PICT_r8g8b8:
+ case PICT_b8g8r8:
+ if (pDst->format_code == pSrc->format_code)
+ func = fbCompositeTrans_0888xnx0888;
+ break;
}
+
if (func != pixman_compositeGeneral)
maskRepeat = FALSE;
}
@@ -1272,6 +1660,13 @@
n = pixman_region_num_rects (region);
pbox = pixman_region_rects (region);
+ // FIXME: this is bascially a "white list" of composites that work
+ // with repeat until they are all implented. Once that's done, we
+ // remove the checks below entirely
+ if(func==fbCompositeSrcSrc_nxn)
+ {
+ srcRepeat=maskRepeat=FALSE;
+ }
while (n--)
{
h = pbox->y2 - pbox->y1;
More information about the cairo-commit
mailing list