[cairo-commit] src/cairo-cff-subset.c src/cairo-pdf-surface.c src/cairo-scaled-font-subsets-private.h src/cairo-truetype-subset.c

Adrian Johnson ajohnson at kemper.freedesktop.org
Thu Sep 15 05:36:23 PDT 2011


 src/cairo-cff-subset.c                  |   12 -
 src/cairo-pdf-surface.c                 |   81 ++++++++++-
 src/cairo-scaled-font-subsets-private.h |    4 
 src/cairo-truetype-subset.c             |  225 ++++++++++++++++++++++++--------
 4 files changed, 253 insertions(+), 69 deletions(-)

New commits:
commit 47e16d0e565fcb579148285394bafd45b53ecaff
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Thu Sep 15 21:52:26 2011 +0930

    subsetting: Support unicode fontnames
    
    Most fonts use Window platform specific encoded font names since they
    allow unicode names.
    
    - Make _cairo_truetype_read_font_name() read the Windows platform
      names first. If this fails, fallback to reading he the Mac platform
      MacRoman encoded name.
    
    - Use the PDF method of encoding non ASCII PS font names. Poppler will
      correctly extract the unicode name.
    
    - Make PDF embed the font family name as AsciiHex if the name is not ASCII.

diff --git a/src/cairo-cff-subset.c b/src/cairo-cff-subset.c
index 3ee3ef3..c011cfc 100644
--- a/src/cairo-cff-subset.c
+++ b/src/cairo-cff-subset.c
@@ -2837,13 +2837,13 @@ _cairo_cff_subset_init (cairo_cff_subset_t          *cff_subset,
     }
 
     if (font->font_name) {
-	cff_subset->font_name = strdup (font->font_name);
-	if (cff_subset->font_name == NULL) {
+	cff_subset->family_name_utf8 = strdup (font->font_name);
+	if (cff_subset->family_name_utf8 == NULL) {
 	    status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
 	    goto fail2;
 	}
     } else {
-	cff_subset->font_name = NULL;
+	cff_subset->family_name_utf8 = NULL;
     }
 
     cff_subset->widths = calloc (sizeof (double), font->scaled_font_subset->num_glyphs);
@@ -2877,7 +2877,7 @@ _cairo_cff_subset_init (cairo_cff_subset_t          *cff_subset,
  fail4:
     free (cff_subset->widths);
  fail3:
-    free (cff_subset->font_name);
+    free (cff_subset->family_name_utf8);
  fail2:
     free (cff_subset->ps_name);
  fail1:
@@ -2890,7 +2890,7 @@ void
 _cairo_cff_subset_fini (cairo_cff_subset_t *subset)
 {
     free (subset->ps_name);
-    free (subset->font_name);
+    free (subset->family_name_utf8);
     free (subset->widths);
     free (subset->data);
 }
@@ -3249,7 +3249,7 @@ _cairo_cff_fallback_init (cairo_cff_subset_t          *cff_subset,
     if (unlikely (status))
 	goto fail2;
 
-    cff_subset->font_name = NULL;
+    cff_subset->family_name_utf8 = NULL;
     cff_subset->ps_name = strdup (font->ps_name);
     if (unlikely (cff_subset->ps_name == NULL)) {
 	status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
diff --git a/src/cairo-pdf-surface.c b/src/cairo-pdf-surface.c
index 7336a7d..16e4576 100644
--- a/src/cairo-pdf-surface.c
+++ b/src/cairo-pdf-surface.c
@@ -3960,6 +3960,61 @@ _cairo_pdf_surface_write_pages (cairo_pdf_surface_t *surface)
 }
 
 static cairo_status_t
+_utf8_to_pdf_string (const char *utf8, char **str_out)
+{
+    int i;
+    int len;
+    cairo_bool_t ascii;
+    char *str;
+    cairo_status_t status = CAIRO_STATUS_SUCCESS;
+
+    ascii = TRUE;
+    len = strlen (utf8);
+    for (i = 0; i < len; i++) {
+	unsigned c = utf8[i];
+	if (c < 32 || c > 126 || c == '(' || c == ')' || c == '\\') {
+	    ascii = FALSE;
+	    break;
+	}
+    }
+
+    if (ascii) {
+	str = malloc (len + 3);
+	if (str == NULL)
+	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+
+	str[0] = '(';
+	for (i = 0; i < len; i++)
+	    str[i+1] = utf8[i];
+	str[i+1] = ')';
+	str[i+2] = 0;
+    } else {
+	uint16_t *utf16 = NULL;
+	int utf16_len = 0;
+
+	status = _cairo_utf8_to_utf16 (utf8, -1, &utf16, &utf16_len);
+	if (unlikely (status))
+	    return status;
+
+	str = malloc (utf16_len*4 + 7);
+	if (str == NULL) {
+	    free (utf16);
+	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+	}
+
+	strcpy (str, "<FEFF");
+	for (i = 0; i < utf16_len; i++)
+	    snprintf (str + 4*i + 5, 5, "%04X", utf16[i]);
+
+	strcat (str, ">");
+	free (utf16);
+    }
+    *str_out = str;
+
+    return status;
+}
+
+static cairo_status_t
 _cairo_pdf_surface_emit_unicode_for_glyph (cairo_pdf_surface_t	*surface,
 					   const char 		*utf8)
 {
@@ -4243,10 +4298,17 @@ _cairo_pdf_surface_emit_cff_font (cairo_pdf_surface_t		*surface,
 				 tag,
 				 subset->ps_name);
 
-    if (subset->font_name) {
+    if (subset->family_name_utf8) {
+	char *pdf_str;
+
+	status = _utf8_to_pdf_string (subset->family_name_utf8, &pdf_str);
+	if (unlikely (status))
+	    return status;
+
 	_cairo_output_stream_printf (surface->output,
-				     "   /FontFamily (%s)\n",
-				     subset->font_name);
+				     "   /FontFamily %s\n",
+				     pdf_str);
+	free (pdf_str);
     }
 
     _cairo_output_stream_printf (surface->output,
@@ -4681,10 +4743,17 @@ _cairo_pdf_surface_emit_truetype_font_subset (cairo_pdf_surface_t		*surface,
 				 tag,
 				 subset.ps_name);
 
-    if (subset.font_name) {
+    if (subset.family_name_utf8) {
+	char *pdf_str;
+
+	status = _utf8_to_pdf_string (subset.family_name_utf8, &pdf_str);
+	if (unlikely (status))
+	    return status;
+
 	_cairo_output_stream_printf (surface->output,
-				     "   /FontFamily (%s)\n",
-				     subset.font_name);
+				     "   /FontFamily %s\n",
+				     pdf_str);
+	free (pdf_str);
     }
 
     _cairo_output_stream_printf (surface->output,
diff --git a/src/cairo-scaled-font-subsets-private.h b/src/cairo-scaled-font-subsets-private.h
index 92f0020..2edf770 100644
--- a/src/cairo-scaled-font-subsets-private.h
+++ b/src/cairo-scaled-font-subsets-private.h
@@ -345,7 +345,7 @@ cairo_private cairo_int_status_t
 _cairo_scaled_font_subset_create_glyph_names (cairo_scaled_font_subset_t *subset);
 
 typedef struct _cairo_cff_subset {
-    char *font_name;
+    char *family_name_utf8;
     char *ps_name;
     double *widths;
     double x_min, y_min, x_max, y_max;
@@ -427,7 +427,7 @@ cairo_private void
 _cairo_cff_fallback_fini (cairo_cff_subset_t *cff_subset);
 
 typedef struct _cairo_truetype_subset {
-    char *font_name;
+    char *family_name_utf8;
     char *ps_name;
     double *widths;
     double x_min, y_min, x_max, y_max;
diff --git a/src/cairo-truetype-subset.c b/src/cairo-truetype-subset.c
index d0d6ab7..1afdf3a 100644
--- a/src/cairo-truetype-subset.c
+++ b/src/cairo-truetype-subset.c
@@ -1137,13 +1137,13 @@ cairo_truetype_subset_init_internal (cairo_truetype_subset_t     *truetype_subse
     }
 
     if (font->base.font_name != NULL) {
-	truetype_subset->font_name = strdup (font->base.font_name);
-	if (unlikely (truetype_subset->font_name == NULL)) {
+	truetype_subset->family_name_utf8 = strdup (font->base.font_name);
+	if (unlikely (truetype_subset->family_name_utf8 == NULL)) {
 	    status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
 	    goto fail2;
 	}
     } else {
-	truetype_subset->font_name = NULL;
+	truetype_subset->family_name_utf8 = NULL;
     }
 
     /* The widths array returned must contain only widths for the
@@ -1201,7 +1201,7 @@ cairo_truetype_subset_init_internal (cairo_truetype_subset_t     *truetype_subse
  fail4:
     free (truetype_subset->widths);
  fail3:
-    free (truetype_subset->font_name);
+    free (truetype_subset->family_name_utf8);
  fail2:
     free (truetype_subset->ps_name);
  fail1:
@@ -1228,7 +1228,7 @@ void
 _cairo_truetype_subset_fini (cairo_truetype_subset_t *subset)
 {
     free (subset->ps_name);
-    free (subset->font_name);
+    free (subset->family_name_utf8);
     free (subset->widths);
     free (subset->data);
     free (subset->string_offsets);
@@ -1395,6 +1395,107 @@ cleanup:
     return status;
 }
 
+static cairo_status_t
+find_name (tt_name_t *name, int name_id, int platform, int encoding, int language, char **str_out)
+{
+    tt_name_record_t *record;
+    int i, len;
+    char *str;
+    char *p;
+    cairo_bool_t has_tag;
+    cairo_status_t status;
+
+    str = NULL;
+    for (i = 0; i < be16_to_cpu (name->num_records); i++) {
+        record = &(name->records[i]);
+	if (be16_to_cpu (record->name) == name_id &&
+	    be16_to_cpu (record->platform) == platform &&
+            be16_to_cpu (record->encoding) == encoding &&
+	    (language == -1 || be16_to_cpu (record->language) == language)) {
+
+	    str = malloc (be16_to_cpu (record->length) + 1);
+	    if (str == NULL)
+		return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+
+	    len = be16_to_cpu (record->length);
+	    memcpy (str,
+		    ((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
+		    len);
+	    str[be16_to_cpu (record->length)] = 0;
+	    break;
+	}
+    }
+    if (str == NULL) {
+	*str_out = NULL;
+	return CAIRO_STATUS_SUCCESS;
+    }
+
+    if (platform == 3) { /* Win platform, unicode encoding */
+	/* convert to utf8 */
+	int size = 0;
+	char *utf8;
+	uint16_t *u = (uint16_t *) str;
+	int u_len = len/2;
+
+	for (i = 0; i < u_len; i++)
+	    size += _cairo_ucs4_to_utf8 (be16_to_cpu(u[i]), NULL);
+
+	utf8 = malloc (size + 1);
+	if (utf8 == NULL) {
+	    status =_cairo_error (CAIRO_STATUS_NO_MEMORY);
+	    goto fail;
+	}
+	p = utf8;
+	for (i = 0; i < u_len; i++)
+	    p += _cairo_ucs4_to_utf8 (be16_to_cpu(u[i]), p);
+	*p = 0;
+	free (str);
+	str = utf8;
+    } else if (platform == 1) { /* Mac platform, Mac Roman encoding */
+	/* Replace characters above 127 with underscores. We could use
+	 * a lookup table to convert to unicode but since most fonts
+	 * include a unicode name this is just a rarely used fallback. */
+	for (i = 0; i < len; i++) {
+	    if ((unsigned char)str[i] > 127)
+		str[i] = '_';
+	}
+    }
+
+    /* If font name is prefixed with a PDF subset tag, strip it off. */
+    p = str;
+    len = strlen (str);
+    has_tag = FALSE;
+    if (len > 7 && p[6] == '+') {
+	has_tag = TRUE;
+	for (i = 0; i < 6; i++) {
+	    if (p[i] < 'A' || p[i] > 'Z') {
+		has_tag = FALSE;
+		break;
+	    }
+	}
+    }
+    if (has_tag) {
+	p = malloc (len - 6);
+	if (unlikely (p == NULL)) {
+	    status =_cairo_error (CAIRO_STATUS_NO_MEMORY);
+	    goto fail;
+	}
+	memcpy (p, str + 7, len - 7);
+	p[len-7] = 0;
+	free (str);
+	str = p;
+    }
+
+    *str_out = str;
+
+    return CAIRO_STATUS_SUCCESS;
+
+  fail:
+    free (str);
+
+    return status;
+}
+
 cairo_int_status_t
 _cairo_truetype_read_font_name (cairo_scaled_font_t  	 *scaled_font,
 				char 	       		**ps_name_out,
@@ -1403,11 +1504,9 @@ _cairo_truetype_read_font_name (cairo_scaled_font_t  	 *scaled_font,
     cairo_status_t status;
     const cairo_scaled_font_backend_t *backend;
     tt_name_t *name;
-    tt_name_record_t *record;
     unsigned long size;
-    int i, j;
     char *ps_name = NULL;
-    char *font_name = NULL;
+    char *family_name = NULL;
 
     backend = scaled_font->backend;
     if (!backend->load_truetype_table)
@@ -1425,76 +1524,92 @@ _cairo_truetype_read_font_name (cairo_scaled_font_t  	 *scaled_font,
     if (name == NULL)
         return _cairo_error (CAIRO_STATUS_NO_MEMORY);
 
-   status = backend->load_truetype_table (scaled_font,
+    status = backend->load_truetype_table (scaled_font,
 					   TT_TAG_name, 0,
 					   (unsigned char *) name,
 					   &size);
     if (status)
 	goto fail;
 
-    /* Extract the font name and PS name from the name table. At
-     * present this just looks for the Mac platform/Roman encoded font
-     * name. It should be extended to use any suitable font name in
-     * the name table.
-     */
-    for (i = 0; i < be16_to_cpu(name->num_records); i++) {
-        record = &(name->records[i]);
-        if ((be16_to_cpu (record->platform) == 1) &&
-            (be16_to_cpu (record->encoding) == 0)) {
-
-	    if (be16_to_cpu (record->name) == 4) {
-		font_name = malloc (be16_to_cpu(record->length) + 1);
-		if (font_name == NULL) {
-		    status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
-		    goto fail;
-		}
-		strncpy(font_name,
-			((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
-			be16_to_cpu (record->length));
-		font_name[be16_to_cpu (record->length)] = 0;
-	    }
+    /* Find PS Name (name_id = 6). OT spec says PS name must be one of
+     * the following two encodings */
+    status = find_name (name, 6, 3, 1, 0x409, &ps_name); /* win, unicode, english-us */
+    if (unlikely(status))
+	goto fail;
 
-	    if (be16_to_cpu (record->name) == 6) {
-		ps_name = malloc (be16_to_cpu(record->length) + 1);
-		if (ps_name == NULL) {
-		    status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
-		    goto fail;
-		}
-		strncpy(ps_name,
-			((char*)name) + be16_to_cpu (name->strings_offset) + be16_to_cpu (record->offset),
-			be16_to_cpu (record->length));
-		ps_name[be16_to_cpu (record->length)] = 0;
-	    }
+    if (!ps_name) {
+	status = find_name (name, 6, 1, 0, 0, &ps_name); /* mac, roman, english */
+	if (unlikely(status))
+	    goto fail;
+    }
 
-	    if (font_name && ps_name)
-		break;
-        }
+    /* Find Family name (name_id = 1) */
+    status = find_name (name, 1, 3, 1, 0x409, &family_name); /* win, unicode, english-us */
+    if (unlikely(status))
+	goto fail;
+
+    if (!family_name) {
+	status = find_name (name, 1, 3, 0, 0x409, &family_name); /* win, symbol, english-us */
+	if (unlikely(status))
+	    goto fail;
+    }
+
+    if (!family_name) {
+	status = find_name (name, 1, 1, 0, 0, &family_name); /* mac, roman, english */
+	if (unlikely(status))
+	    goto fail;
+    }
+
+    if (!family_name) {
+	status = find_name (name, 1, 3, 1, -1, &family_name); /* win, unicode, any language */
+	if (unlikely(status))
+	    goto fail;
     }
 
     free (name);
 
-    /* Ensure PS name does not contain any spaces */
+    /* Ensure PS name is a valid PDF/PS name object. In PDF names are
+     * treated as UTF8 and non ASCII bytes, ' ', and '#' are encoded
+     * as '#' followed by 2 hex digits that encode the byte. By also
+     * encoding the characters in the reserved string we ensure the
+     * name is also PS compatible. */
     if (ps_name) {
-	for (i = 0, j = 0; ps_name[j]; j++) {
-	    if (ps_name[j] == ' ')
-		continue;
-	    ps_name[i++] = ps_name[j];
+	static const char *reserved = "()<>[]{}/%#\\";
+	char buf[128]; /* max name length is 127 bytes */
+	char *src = ps_name;
+	char *dst = buf;
+
+	while (*src && dst < buf + 127) {
+	    unsigned char c = *src;
+	    if (c < 0x21 || c > 0x7e || strchr (reserved, c)) {
+		if (dst + 4 > buf + 127)
+		    break;
+
+		snprintf (dst, 4, "#%02X", c);
+		src++;
+		dst += 3;
+	    } else {
+		*dst++ = *src++;
+	    }
+	}
+	*dst = 0;
+	free (ps_name);
+	ps_name = strdup (buf);
+	if (ps_name == NULL) {
+	    status = _cairo_error (CAIRO_STATUS_NO_MEMORY);
+	    goto fail;
 	}
-	ps_name[i] = '\0';
     }
 
     *ps_name_out = ps_name;
-    *font_name_out = font_name;
+    *font_name_out = family_name;
 
     return CAIRO_STATUS_SUCCESS;
 
 fail:
     free (name);
-
     free (ps_name);
-
-    free (font_name);
-
+    free (family_name);
     *ps_name_out = NULL;
     *font_name_out = NULL;
 


More information about the cairo-commit mailing list