[cairo-commit] cairo/src cairo_unicode.c,NONE,1.1
Owen Taylor
commit at pdx.freedesktop.org
Tue Feb 1 16:29:19 PST 2005
- Previous message: [cairo-commit]
cairo/src Makefile.am, 1.36, 1.37 cairo-win32.h, 1.1,
1.2 cairo.c, 1.49, 1.50 cairo.h, 1.70, 1.71 cairo_ft_font.c,
1.39, 1.40 cairo_gstate.c, 1.78, 1.79 cairo_traps.c, 1.20,
1.21 cairo_win32_font.c, 1.1, 1.2 cairo_win32_surface.c, 1.1,
1.2 cairoint.h, 1.93, 1.94
- Next message: [cairo-commit] cairo-demo/cairo_snippets ChangeLog,1.24,1.25
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Committed by: otaylor
Update of /cvs/cairo/cairo/src
In directory gabe:/tmp/cvs-serv18108/src
Added Files:
cairo_unicode.c
Log Message:
Commit earlier missed new file
2005-02-01 Owen Taylor <otaylor at redhat.com>
* src/cairo_unicode.c src/cairoint.h src/Makefile.am: Add
_cairo_utf8_to_utf16(), _cairo_utf8_to_ucs4() based on code from GLib.
--- NEW FILE: cairo_unicode.c ---
/* cairo_unicode.c: Unicode conversion routines
*
* The code in this file is derived from GLib's gutf8.c and
* ultimately from libunicode. It is relicensed under the
* dual LGPL/MPL with permission of the original authors.
*
* Copyright © 1999 Tom Tromey
* Copyright © 2005 Red Hat, Inc
*
* This library is free software; you can redistribute it and/or
* modify it either under the terms of the GNU Lesser General Public
* License version 2.1 as published by the Free Software Foundation
* (the "LGPL") or, at your option, under the terms of the Mozilla
* Public License Version 1.1 (the "MPL"). If you do not alter this
* notice, a recipient may use your version of this file under either
* the MPL or the LGPL.
*
* You should have received a copy of the LGPL along with this library
* in the file COPYING-LGPL-2.1; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* You should have received a copy of the MPL along with this library
* in the file COPYING-MPL-1.1
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY
* OF ANY KIND, either express or implied. See the LGPL or the MPL for
* the specific language governing rights and limitations.
*
* The Original Code is cairo_unicode.c as distributed with the
* cairo graphics library.
*
* The Initial Developer of the Original Code is Tom Tromey.
* and Red Hat, Inc.
*
* Contributor(s):
* Owen Taylor <otaylor at redhat.com>
*/
#include <limits.h>
#include <cairoint.h>
#define UTF8_COMPUTE(Char, Mask, Len) \
if (Char < 128) \
{ \
Len = 1; \
Mask = 0x7f; \
} \
else if ((Char & 0xe0) == 0xc0) \
{ \
Len = 2; \
Mask = 0x1f; \
} \
else if ((Char & 0xf0) == 0xe0) \
{ \
Len = 3; \
Mask = 0x0f; \
} \
else if ((Char & 0xf8) == 0xf0) \
{ \
Len = 4; \
Mask = 0x07; \
} \
else if ((Char & 0xfc) == 0xf8) \
{ \
Len = 5; \
Mask = 0x03; \
} \
else if ((Char & 0xfe) == 0xfc) \
{ \
Len = 6; \
Mask = 0x01; \
} \
else \
Len = -1;
#define UTF8_LENGTH(Char) \
((Char) < 0x80 ? 1 : \
((Char) < 0x800 ? 2 : \
((Char) < 0x10000 ? 3 : \
((Char) < 0x200000 ? 4 : \
((Char) < 0x4000000 ? 5 : 6)))))
#define UTF8_GET(Result, Chars, Count, Mask, Len) \
(Result) = (Chars)[0] & (Mask); \
for ((Count) = 1; (Count) < (Len); ++(Count)) \
{ \
if (((Chars)[(Count)] & 0xc0) != 0x80) \
{ \
(Result) = -1; \
break; \
} \
(Result) <<= 6; \
(Result) |= ((Chars)[(Count)] & 0x3f); \
}
#define UNICODE_VALID(Char) \
((Char) < 0x110000 && \
(((Char) & 0xFFFFF800) != 0xD800) && \
((Char) < 0xFDD0 || (Char) > 0xFDEF) && \
((Char) & 0xFFFE) != 0xFFFE)
static const char utf8_skip_data[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
};
#define UTF8_NEXT_CHAR(p) (char *)((p) + utf8_skip_data[*(unsigned char *)(p)])
/* Converts a sequence of bytes encoded as UTF-8 to a Unicode character.
* If @p does not point to a valid UTF-8 encoded character, results are
* undefined.
**/
static uint32_t
_utf8_get_char (const char *p)
{
int i, mask = 0, len;
uint32_t result;
unsigned char c = (unsigned char) *p;
UTF8_COMPUTE (c, mask, len);
if (len == -1)
return (uint32_t)-1;
UTF8_GET (result, p, i, mask, len);
return result;
}
/* Like _utf8_get_char, but take a maximum length
* and return (uint32_t)-2 on incomplete trailing character
*/
static uint32_t
_utf8_get_char_extended (const char *p,
long max_len)
{
int i, len;
uint32_t wc = (unsigned char) *p;
if (wc < 0x80) {
return wc;
} else if (wc < 0xc0) {
return (uint32_t)-1;
} else if (wc < 0xe0) {
len = 2;
wc &= 0x1f;
} else if (wc < 0xf0) {
len = 3;
wc &= 0x0f;
} else if (wc < 0xf8) {
len = 4;
wc &= 0x07;
} else if (wc < 0xfc) {
len = 5;
wc &= 0x03;
} else if (wc < 0xfe) {
len = 6;
wc &= 0x01;
} else {
return (uint32_t)-1;
}
if (max_len >= 0 && len > max_len) {
for (i = 1; i < max_len; i++) {
if ((((unsigned char *)p)[i] & 0xc0) != 0x80)
return (uint32_t)-1;
}
return (uint32_t)-2;
}
for (i = 1; i < len; ++i) {
uint32_t ch = ((unsigned char *)p)[i];
if ((ch & 0xc0) != 0x80) {
if (ch)
return (uint32_t)-1;
else
return (uint32_t)-2;
}
wc <<= 6;
wc |= (ch & 0x3f);
}
if (UTF8_LENGTH(wc) != len)
return (uint32_t)-1;
return wc;
}
/**
* _cairo_utf8_to_utf32:
* @str: an UTF-8 string
* @len: length of @str in bytes, or -1 if it is nul-terminated.
* If @len is supplied and the string has an embedded nul
* byte, only the portion before the nul byte is converted.
* @result: location to store a pointer to a newly allocated UTF-32
* string (always native endian). Free with free(). A 0
* word will be written after the last character.
* @items_written: location to store number of 32-bit words
* written. (Not including the trailing 0)
*
* Converts a UTF-8 string to UCS-4. UCS-4 is an encoding of Unicode
* with 1 32-bit word per character. The string is validated to
* consist entirely of valid Unicode characters.
*
* Return value: %CAIRO_STATUS_SUCCESS if the entire string was
* succesfully converted. %CAIRO_STATUS_INVALID_STRING if an
* an invalid sequence was found.
**/
cairo_status_t
_cairo_utf8_to_ucs4 (const char *str,
int len,
uint32_t **result,
int *items_written)
{
uint32_t *str32 = NULL;
int n_chars, i;
const char *in;
in = str;
n_chars = 0;
while ((len < 0 || str + len - in > 0) && *in)
{
uint32_t wc = _utf8_get_char_extended (in, str + len - in);
if (wc & 0x80000000 || !UNICODE_VALID (wc))
return CAIRO_STATUS_INVALID_STRING;
n_chars++;
if (n_chars == INT_MAX)
return CAIRO_STATUS_INVALID_STRING;
in = UTF8_NEXT_CHAR (in);
}
str32 = malloc (sizeof (uint32_t) * (n_chars + 1));
if (!str32)
return CAIRO_STATUS_NO_MEMORY;
in = str;
for (i=0; i < n_chars; i++) {
str32[i] = _utf8_get_char (in);
in = UTF8_NEXT_CHAR (in);
}
str32[i] = 0;
*result = str32;
if (items_written)
*items_written = n_chars;
return CAIRO_STATUS_SUCCESS;
}
/**
* _cairo_utf8_to_utf16:
* @str: an UTF-8 string
* @len: length of @str in bytes, or -1 if it is nul-terminated.
* If @len is supplied and the string has an embedded nul
* byte, only the portion before the nul byte is converted.
* @result: location to store a pointer to a newly allocated UTF-16
* string (always native endian). Free with free(). A 0
* word will be written after the last character.
* @items_written: location to store number of 16-bit words
* written. (Not including the trailing 0)
*
* Converts a UTF-8 string to UTF-16. UTF-16 is an encoding of Unicode
* where characters are represented either as a single 16-bit word, or
* as a pair of 16-bit "surrogates". The string is validated to
* consist entirely of valid Unicode characters.
*
* Return value: %CAIRO_STATUS_SUCCESS if the entire string was
* succesfully converted. %CAIRO_STATUS_INVALID_STRING if an
* an invalid sequence was found.
**/
cairo_status_t
_cairo_utf8_to_utf16 (const char *str,
int len,
uint16_t **result,
int *items_written)
{
uint16_t *str16 = NULL;
int n16, i;
const char *in;
in = str;
n16 = 0;
while ((len < 0 || str + len - in > 0) && *in) {
uint32_t wc = _utf8_get_char_extended (in, str + len - in);
if (wc & 0x80000000 || !UNICODE_VALID (wc))
return CAIRO_STATUS_INVALID_STRING;
if (wc < 0x10000)
n16 += 1;
else
n16 += 2;
if (n16 == INT_MAX - 1 || n16 == INT_MAX)
return CAIRO_STATUS_INVALID_STRING;
in = UTF8_NEXT_CHAR (in);
}
str16 = malloc (sizeof (uint16_t) * (n16 + 1));
if (!str16)
return CAIRO_STATUS_NO_MEMORY;
in = str;
for (i = 0; i < n16;) {
uint32_t wc = _utf8_get_char (in);
if (wc < 0x10000) {
str16[i++] = wc;
} else {
str16[i++] = (wc - 0x10000) / 0x400 + 0xd800;
str16[i++] = (wc - 0x10000) % 0x400 + 0xdc00;
}
in = UTF8_NEXT_CHAR (in);
}
str16[i] = 0;
*result = str16;
if (items_written)
*items_written = n16;
return CAIRO_STATUS_SUCCESS;
}
- Previous message: [cairo-commit]
cairo/src Makefile.am, 1.36, 1.37 cairo-win32.h, 1.1,
1.2 cairo.c, 1.49, 1.50 cairo.h, 1.70, 1.71 cairo_ft_font.c,
1.39, 1.40 cairo_gstate.c, 1.78, 1.79 cairo_traps.c, 1.20,
1.21 cairo_win32_font.c, 1.1, 1.2 cairo_win32_surface.c, 1.1,
1.2 cairoint.h, 1.93, 1.94
- Next message: [cairo-commit] cairo-demo/cairo_snippets ChangeLog,1.24,1.25
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
More information about the cairo-commit
mailing list