[cairo-commit] 3 commits - src/cairo-pdf.h src/cairo-pdf-interchange.c src/cairo-pdf-surface.c test/pdf-tagged-text.c

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sun Feb 27 03:51:52 UTC 2022


 src/cairo-pdf-interchange.c |  118 +++++++++++++++++++++++++++++++++++++++++---
 src/cairo-pdf-surface.c     |   44 ++++++++++++----
 src/cairo-pdf.h             |    6 +-
 test/pdf-tagged-text.c      |    6 ++
 4 files changed, 156 insertions(+), 18 deletions(-)

New commits:
commit 1a799577b2c7bcd3757211adcc4e46abb3bdfc97
Merge: b0b294b82 895b249b0
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Sun Feb 27 03:51:50 2022 +0000

    Merge branch 'issue-526' into 'master'
    
    pdf: ensure urls and filename strings are correctly encoded
    
    Closes #526
    
    See merge request cairo/cairo!266

commit 895b249b0ac6d2c45f47423f6529359991a66c3c
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Sun Jan 9 17:55:11 2022 +1030

    pdf: ensure filenames are correctly encoded

diff --git a/src/cairo-pdf-interchange.c b/src/cairo-pdf-interchange.c
index d48a2d1f1..cb80151d1 100644
--- a/src/cairo-pdf-interchange.c
+++ b/src/cairo-pdf-interchange.c
@@ -437,6 +437,71 @@ cairo_pdf_interchange_write_dest (cairo_pdf_surface_t *surface,
     return status;
 }
 
+static cairo_int_status_t
+_cairo_utf8_to_pdf_utf8_hexstring (const char *utf8, char **str_out)
+{
+    int i;
+    int len;
+    unsigned char *p;
+    cairo_bool_t ascii;
+    char *str;
+    cairo_int_status_t status = CAIRO_STATUS_SUCCESS;
+
+    ascii = TRUE;
+    p = (unsigned char *)utf8;
+    len = 0;
+    while (*p) {
+	if (*p < 32 || *p > 126) {
+	    ascii = FALSE;
+	}
+	if (*p == '(' || *p == ')' || *p == '\\')
+	    len += 2;
+	else
+	    len++;
+	p++;
+    }
+
+    if (ascii) {
+	str = _cairo_malloc (len + 3);
+	if (str == NULL)
+	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+
+	str[0] = '(';
+	p = (unsigned char *)utf8;
+	i = 1;
+	while (*p) {
+	    if (*p == '(' || *p == ')' || *p == '\\')
+		str[i++] = '\\';
+	    str[i++] = *p;
+	    p++;
+	}
+	str[i++] = ')';
+	str[i++] = 0;
+    } else {
+	str = _cairo_malloc (len*2 + 3);
+	if (str == NULL)
+	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);
+
+	str[0] = '<';
+	p = (unsigned char *)utf8;
+	i = 1;
+	while (*p) {
+	    if (*p == '\\') {
+		snprintf(str + i, 3, "%02x", '\\');
+		i += 2;
+	    }
+	    snprintf(str + i, 3, "%02x", *p);
+	    i += 2;
+	    p++;
+	}
+	str[i++] = '>';
+	str[i++] = 0;
+    }
+    *str_out = str;
+
+    return status;
+}
+
 static cairo_int_status_t
 cairo_pdf_interchange_write_link_action (cairo_pdf_surface_t   *surface,
 					 cairo_link_attrs_t    *link_attrs)
@@ -469,12 +534,42 @@ cairo_pdf_interchange_write_link_action (cairo_pdf_surface_t   *surface,
 				     dest);
 	free (dest);
     } else if (link_attrs->link_type == TAG_LINK_FILE) {
+	/* According to "Developing with PDF", Leonard Rosenthol, 2013,
+	 * The F key is encoded in the "standard encoding for the
+	 * platform on which the document is being viewed. For most
+	 * modern operating systems, that's UTF-8"
+	 *
+	 * As we don't know the target platform, we assume UTF-8. The
+	 * F key may contain multi-byte encodings using the hex
+	 * encoding.
+	 *
+	 * For PDF 1.7 we also include the UF key which uses the
+	 * standard PDF UTF-16BE strings.
+	 */
+	status = _cairo_utf8_to_pdf_utf8_hexstring (link_attrs->file, &dest);
+	if (unlikely (status))
+	    return status;
+
 	_cairo_output_stream_printf (surface->object_stream.stream,
 				     "   /A <<\n"
 				     "      /Type /Action\n"
 				     "      /S /GoToR\n"
-				     "      /F (%s)\n",
-				     link_attrs->file);
+				     "      /F %s\n",
+				     dest);
+	free (dest);
+
+	if (surface->pdf_version >= CAIRO_PDF_VERSION_1_7)
+	{
+	    status = _cairo_utf8_to_pdf_string (link_attrs->file, &dest);
+	    if (unlikely (status))
+		return status;
+
+	    _cairo_output_stream_printf (surface->object_stream.stream,
+				     "      /UF %s\n",
+				     dest);
+	    free (dest);
+	}
+
 	if (link_attrs->dest) {
 	    status = _cairo_utf8_to_pdf_string (link_attrs->dest, &dest);
 	    if (unlikely (status))
@@ -487,13 +582,13 @@ cairo_pdf_interchange_write_link_action (cairo_pdf_surface_t   *surface,
 	} else {
 	    if (link_attrs->has_pos) {
 		_cairo_output_stream_printf (surface->object_stream.stream,
-					     "      /D [%d %f %f 0]\n",
+					     "      /D [%d /XYZ %f %f 0]\n",
 					     link_attrs->page,
 					     link_attrs->pos.x,
 					     link_attrs->pos.y);
 	    } else {
 		_cairo_output_stream_printf (surface->object_stream.stream,
-					     "      /D [%d null null 0]\n",
+					     "      /D [%d /XYZ null null 0]\n",
 					     link_attrs->page);
 	    }
 	}
diff --git a/src/cairo-pdf-surface.c b/src/cairo-pdf-surface.c
index 8cc290ade..d8ef47db8 100644
--- a/src/cairo-pdf-surface.c
+++ b/src/cairo-pdf-surface.c
@@ -209,7 +209,9 @@ _cairo_pdf_surface_get_extents (void		        *abstract_surface,
 static const cairo_pdf_version_t _cairo_pdf_versions[] =
 {
     CAIRO_PDF_VERSION_1_4,
-    CAIRO_PDF_VERSION_1_5
+    CAIRO_PDF_VERSION_1_5,
+    CAIRO_PDF_VERSION_1_6,
+    CAIRO_PDF_VERSION_1_7
 };
 
 #define CAIRO_PDF_VERSION_LAST ARRAY_LENGTH (_cairo_pdf_versions)
@@ -218,6 +220,8 @@ static const char * _cairo_pdf_version_strings[CAIRO_PDF_VERSION_LAST] =
 {
     "PDF 1.4",
     "PDF 1.5"
+    "PDF 1.6"
+    "PDF 1.7"
 };
 
 static const char *_cairo_pdf_supported_mime_types[] =
@@ -497,7 +501,7 @@ _cairo_pdf_surface_create_for_stream_internal (cairo_output_stream_t	*output,
     }
 
     surface->struct_tree_root.id = 0;
-    surface->pdf_version = CAIRO_PDF_VERSION_1_5;
+    surface->pdf_version = CAIRO_PDF_VERSION_1_7;
     surface->compress_streams = TRUE;
     surface->pdf_stream.active = FALSE;
     surface->pdf_stream.old_output = NULL;
@@ -2635,10 +2639,16 @@ _cairo_pdf_surface_start_page (void *abstract_surface)
 	case CAIRO_PDF_VERSION_1_4:
 	    version = "1.4";
 	    break;
-	default:
 	case CAIRO_PDF_VERSION_1_5:
 	    version = "1.5";
 	    break;
+	case CAIRO_PDF_VERSION_1_6:
+	    version = "1.6";
+	    break;
+	default:
+	case CAIRO_PDF_VERSION_1_7:
+	    version = "1.7";
+	    break;
 	}
 
 	_cairo_output_stream_printf (surface->output,
diff --git a/src/cairo-pdf.h b/src/cairo-pdf.h
index 49afb687e..7e8025be3 100644
--- a/src/cairo-pdf.h
+++ b/src/cairo-pdf.h
@@ -47,6 +47,8 @@ CAIRO_BEGIN_DECLS
  * cairo_pdf_version_t:
  * @CAIRO_PDF_VERSION_1_4: The version 1.4 of the PDF specification. (Since 1.10)
  * @CAIRO_PDF_VERSION_1_5: The version 1.5 of the PDF specification. (Since 1.10)
+ * @CAIRO_PDF_VERSION_1_6: The version 1.6 of the PDF specification. (Since 1.18)
+ * @CAIRO_PDF_VERSION_1_7: The version 1.7 of the PDF specification. (Since 1.18)
  *
  * #cairo_pdf_version_t is used to describe the version number of the PDF
  * specification that a generated PDF file will conform to.
@@ -55,7 +57,9 @@ CAIRO_BEGIN_DECLS
  **/
 typedef enum _cairo_pdf_version {
     CAIRO_PDF_VERSION_1_4,
-    CAIRO_PDF_VERSION_1_5
+    CAIRO_PDF_VERSION_1_5,
+    CAIRO_PDF_VERSION_1_6,
+    CAIRO_PDF_VERSION_1_7
 } cairo_pdf_version_t;
 
 cairo_public cairo_surface_t *
diff --git a/test/pdf-tagged-text.c b/test/pdf-tagged-text.c
index 1e5abcae8..378e6a920 100644
--- a/test/pdf-tagged-text.c
+++ b/test/pdf-tagged-text.c
@@ -358,6 +358,12 @@ draw_cover (cairo_surface_t *surface, cairo_t *cr)
     cairo_show_text (cr, "link to page section 3.3");
     cairo_tag_end (cr, CAIRO_TAG_LINK);
 
+    /* Create link to external file */
+    cairo_tag_begin (cr, CAIRO_TAG_LINK, "file='foo.pdf' page=1");
+    cairo_move_to (cr, PAGE_WIDTH/3, 4.4*PAGE_HEIGHT/5);
+    cairo_show_text (cr, "link file 'foo.pdf'");
+    cairo_tag_end (cr, CAIRO_TAG_LINK);
+
     draw_page_num (surface, cr, "cover", 0);
 }
 
commit a4d8eb98ba8d05fd0026f9c2f9433bdfedde1d06
Author: Adrian Johnson <ajohnson at redneon.com>
Date:   Sun Jan 9 15:13:34 2022 +1030

    pdf: ensure url strings are correctly encoded
    
    Fixes #526

diff --git a/src/cairo-pdf-interchange.c b/src/cairo-pdf-interchange.c
index 38bec977f..d48a2d1f1 100644
--- a/src/cairo-pdf-interchange.c
+++ b/src/cairo-pdf-interchange.c
@@ -450,13 +450,24 @@ cairo_pdf_interchange_write_link_action (cairo_pdf_surface_t   *surface,
 	    return status;
 
     } else if (link_attrs->link_type == TAG_LINK_URI) {
+	status = _cairo_utf8_to_pdf_string (link_attrs->uri, &dest);
+	if (unlikely (status))
+	    return status;
+
+	if (dest[0] != '(') {
+	    free (dest);
+	    return _cairo_tag_error ("Link attribute: \"url=%s\" URI may only contain ASCII characters",
+				     link_attrs->uri);
+	}
+
 	_cairo_output_stream_printf (surface->object_stream.stream,
 				     "   /A <<\n"
 				     "      /Type /Action\n"
 				     "      /S /URI\n"
-				     "      /URI (%s)\n"
+				     "      /URI %s\n"
 				     "   >>\n",
-				     link_attrs->uri);
+				     dest);
+	free (dest);
     } else if (link_attrs->link_type == TAG_LINK_FILE) {
 	_cairo_output_stream_printf (surface->object_stream.stream,
 				     "   /A <<\n"
diff --git a/src/cairo-pdf-surface.c b/src/cairo-pdf-surface.c
index 07fe5dffc..8cc290ade 100644
--- a/src/cairo-pdf-surface.c
+++ b/src/cairo-pdf-surface.c
@@ -5448,18 +5448,24 @@ _cairo_utf8_to_pdf_string (const char *utf8, char **str_out)
 {
     int i;
     int len;
+    unsigned char *p;
     cairo_bool_t ascii;
     char *str;
     cairo_int_status_t status = CAIRO_STATUS_SUCCESS;
 
     ascii = TRUE;
-    len = strlen (utf8);
-    for (i = 0; i < len; i++) {
-	unsigned c = utf8[i];
-	if (c < 32 || c > 126 || c == '(' || c == ')' || c == '\\') {
+    p = (unsigned char *)utf8;
+    len = 0;
+    while (*p) {
+	if (*p < 32 || *p > 126) {
 	    ascii = FALSE;
 	    break;
 	}
+	if (*p == '(' || *p == ')' || *p == '\\')
+	    len += 2;
+	else
+	    len++;
+	p++;
     }
 
     if (ascii) {
@@ -5468,10 +5474,16 @@ _cairo_utf8_to_pdf_string (const char *utf8, char **str_out)
 	    return _cairo_error (CAIRO_STATUS_NO_MEMORY);
 
 	str[0] = '(';
-	for (i = 0; i < len; i++)
-	    str[i+1] = utf8[i];
-	str[i+1] = ')';
-	str[i+2] = 0;
+	p = (unsigned char *)utf8;
+	i = 1;
+	while (*p) {
+	    if (*p == '(' || *p == ')' || *p == '\\')
+		str[i++] = '\\';
+	    str[i++] = *p;
+	    p++;
+	}
+	str[i++] = ')';
+	str[i++] = 0;
     } else {
 	uint16_t *utf16 = NULL;
 	int utf16_len = 0;


More information about the cairo-commit mailing list