[cairo] Cairo/pixman behavior with large translations

Thu Dec 6 06:09:40 PST 2012

On Thu, 06 Dec 2012 14:02:11 +0100
sandmann at cs.au.dk (Søren Sandmann) wrote:

> Søren Sandmann <sandmann at cs.au.dk> writes:
> 
> > Intermediate results from matrix and vector multiplications fit in 64
> > bits, and
> 
> That is, as long as the final result doesn't overflow the 31.16 type,
> the intermediate values fit in 64 bits.

We are fine if we don't mind dealing with two intermediate 64-bit
variables for doing multiplication and accumulation ('lo' and 'hi'
variables in the example below):

/*******************************************************************/

#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>

/* a reference implementation (needs 128-bit int types) */
static uint64_t
ref_mul_16_16_by_31_16_to_48_16 (int32_t fix_16_16, int64_t fix_31_16)
{
    return ((__int128_t)fix_16_16 * fix_31_16 + 0x8000) >> 16;
}

/* this is a reference floating point implementation for comparison */
static __float128
float_mul_16_16_by_31_16_to_48_16 (int32_t fix_16_16, int64_t fix_31_16)
{
    __float128 a = (__float128)fix_16_16 / 65536.;
    __float128 b = (__float128)fix_31_16 / 65536.;
    return a * b;
}

/* this implementation does not need 128-bit data types */
static uint64_t
mul_16_16_by_31_16_to_48_16 (int32_t fix_16_16, int64_t fix_31_16)
{
    int64_t hi = (int64_t)fix_16_16 * (fix_31_16 >> 16);
    int64_t lo = (int64_t)fix_16_16 * (fix_31_16 & 0xFFFF);
    return hi + ((lo + 0x8000) >> 16);
}

void fillrand (void *p, int size)
{
    uint8_t *bytep = (uint8_t *)p;
    while (size--)
        *bytep++ = rand() & 0xFF;
}

int main (void)
{
    int i;
    for (i = 0; i < 100000000; i++)
    {
        int32_t fix_16_16;
        int64_t fix_31_16;
        int64_t ref_res, res;
        __float128 float_res; /* 64-bit double data type is not enough */

        fillrand (&fix_16_16, sizeof(fix_16_16));
        fillrand (&fix_31_16, sizeof(fix_31_16));

        /* in fact this will be 33.16 random number after this shift,
         * but in the matrix by vector multiplication code we want to
         * be able to add together 3 multiplication results without
         * overflow, hence the fixed 31.16 data type is interesting
         * for the extra 2 bits headroom */
        fix_31_16 >>= 15; 

        /* test the correctness of mul_16_16_by_31_16_to_48_16 by comparing
         * it with the other implementations */
        ref_res   = ref_mul_16_16_by_31_16_to_48_16 (fix_16_16, fix_31_16);
        float_res = float_mul_16_16_by_31_16_to_48_16 (fix_16_16, fix_31_16);
        res       = mul_16_16_by_31_16_to_48_16 (fix_16_16, fix_31_16);

        assert(ref_res == res);
        assert(fabs(float_res * 65536. - (__float128)res) < 0.51);
    }
    return 0;
}

/*******************************************************************/

-- 
Best regards,
Siarhei Siamashka