[pkgsrc/trunk]: pkgsrc/print/py-Pdf pyPdf: fallback if non-UTF16 strings star...

To: pkgsrc-changes-hg%NetBSD.org@localhost
Subject: [pkgsrc/trunk]: pkgsrc/print/py-Pdf pyPdf: fallback if non-UTF16 strings star...
From: joerg <joerg%pkgsrc.org@localhost>
Date: Tue, 06 Apr 2021 21:07:10 +0000

details:   https://anonhg.NetBSD.org/pkgsrc/rev/691fba72d533
branches:  trunk
changeset: 449850:691fba72d533
user:      joerg <joerg%pkgsrc.org@localhost>
date:      Tue Apr 06 18:57:10 2021 +0000

description:
pyPdf: fallback if non-UTF16 strings start with UTF16 BOM

This can be seen with Reportlab, which is embedding a digest as binary.
Bump revision.

diffstat:

 print/py-Pdf/Makefile                       |   4 +-
 print/py-Pdf/distinfo                       |   4 +-
 print/py-Pdf/patches/patch-pyPdf_generic.py |  46 +++++++++++++++++++++++++---
 3 files changed, 44 insertions(+), 10 deletions(-)

diffs (117 lines):

diff -r de363dac5a26 -r 691fba72d533 print/py-Pdf/Makefile
--- a/print/py-Pdf/Makefile     Tue Apr 06 18:55:05 2021 +0000
+++ b/print/py-Pdf/Makefile     Tue Apr 06 18:57:10 2021 +0000
@@ -1,8 +1,8 @@
-# $NetBSD: Makefile,v 1.8 2019/07/29 20:39:39 joerg Exp $
+# $NetBSD: Makefile,v 1.9 2021/04/06 18:57:10 joerg Exp $
 
 DISTNAME=      pyPdf-1.13
 PKGNAME=       ${PYPKGPREFIX}-Pdf-1.13
-PKGREVISION=   3
+PKGREVISION=   4
 CATEGORIES=    print python
 MASTER_SITES=  http://pybrary.net/pyPdf/
 
diff -r de363dac5a26 -r 691fba72d533 print/py-Pdf/distinfo
--- a/print/py-Pdf/distinfo     Tue Apr 06 18:55:05 2021 +0000
+++ b/print/py-Pdf/distinfo     Tue Apr 06 18:57:10 2021 +0000
@@ -1,9 +1,9 @@
-$NetBSD: distinfo,v 1.7 2019/07/29 20:39:39 joerg Exp $
+$NetBSD: distinfo,v 1.8 2021/04/06 18:57:10 joerg Exp $
 
 SHA1 (pyPdf-1.13.tar.gz) = ba7aed11cf21a2c218df2e3979be5eb90992dcbe
 RMD160 (pyPdf-1.13.tar.gz) = 0669d4a93c20fcd899d15ff42cb1b8b908f28e3b
 SHA512 (pyPdf-1.13.tar.gz) = 1c00a5a6658054671a396e7b334cbeb9e26dc1e3ad9668e212d05a9483f164931d8bd14fb9ab4d083d7ca3999fee7a2e9eea55b604a06c7d5d0632f0791b7598
 Size (pyPdf-1.13.tar.gz) = 35699 bytes
-SHA1 (patch-pyPdf_generic.py) = c661b2fd4cea8679ac551733ea25b7efa9fbb37d
+SHA1 (patch-pyPdf_generic.py) = 17a919f234f5493db803fb39d7aae4c4ffd06a05
 SHA1 (patch-pyPdf_pdf.py) = 3359ec14fb25fedc86d27d23c052d1891c8c9e4d
 SHA1 (patch-pyPdf_utils.py) = 11b14d0c3bdbdac04312f1a4f8eb9f27c6d09009
diff -r de363dac5a26 -r 691fba72d533 print/py-Pdf/patches/patch-pyPdf_generic.py
--- a/print/py-Pdf/patches/patch-pyPdf_generic.py       Tue Apr 06 18:55:05 2021 +0000
+++ b/print/py-Pdf/patches/patch-pyPdf_generic.py       Tue Apr 06 18:57:10 2021 +0000
@@ -1,4 +1,4 @@
-$NetBSD: patch-pyPdf_generic.py,v 1.1 2014/01/23 14:38:42 wiz Exp $
+$NetBSD: patch-pyPdf_generic.py,v 1.2 2021/04/06 18:57:10 joerg Exp $
 
 python-3.x compatibility.
 
@@ -22,7 +22,41 @@
          while True:
              # skip leading whitespace
              tok = stream.read(1)
-@@ -425,7 +425,7 @@ class NameObject(str, PdfObject):
+@@ -245,20 +245,22 @@ def createStringObject(string):
+         return TextStringObject(string)
+     elif isinstance(string, str):
+         if string.startswith(codecs.BOM_UTF16_BE):
+-            retval = TextStringObject(string.decode("utf-16"))
+-            retval.autodetect_utf16 = True
+-            return retval
+-        else:
+-            # This is probably a big performance hit here, but we need to
+-            # convert string objects into the text/unicode-aware version if
+-            # possible... and the only way to check if that's possible is
+-            # to try.  Some strings are strings, some are just byte arrays.
+             try:
+-                retval = TextStringObject(decode_pdfdocencoding(string))
+-                retval.autodetect_pdfdocencoding = True
++                retval = TextStringObject(string.decode("utf-16"))
++                retval.autodetect_utf16 = True
+                 return retval
+             except UnicodeDecodeError:
+-                return ByteStringObject(string)
++                pass
++        # This is probably a big performance hit here, but we need to
++        # convert string objects into the text/unicode-aware version if
++        # possible... and the only way to check if that's possible is
++        # to try.  Some strings are strings, some are just byte arrays.
++        try:
++            retval = TextStringObject(decode_pdfdocencoding(string))
++            retval.autodetect_pdfdocencoding = True
++            return retval
++        except UnicodeDecodeError:
++            return ByteStringObject(string)
+     else:
+         raise TypeError("createStringObject should have str or unicode arg")
+ 
+@@ -425,7 +427,7 @@ class NameObject(str, PdfObject):
      def readFromStream(stream):
          name = stream.read(1)
          if name != "/":
@@ -31,7 +65,7 @@
          while True:
              tok = stream.read(1)
              if tok.isspace() or tok in NameObject.delimiterCharacters:
-@@ -517,7 +517,7 @@ class DictionaryObject(dict, PdfObject):
+@@ -517,7 +519,7 @@ class DictionaryObject(dict, PdfObject):
      def readFromStream(stream, pdf):
          tmp = stream.read(2)
          if tmp != "<<":
@@ -40,7 +74,7 @@
          data = {}
          while True:
              tok = readNonWhitespace(stream)
-@@ -531,7 +531,7 @@ class DictionaryObject(dict, PdfObject):
+@@ -531,7 +533,7 @@ class DictionaryObject(dict, PdfObject):
              value = readObject(stream, pdf)
              if data.has_key(key):
                  # multiple definitions of key not permitted
@@ -49,7 +83,7 @@
              data[key] = value
          pos = stream.tell()
          s = readNonWhitespace(stream)
-@@ -570,7 +570,7 @@ class DictionaryObject(dict, PdfObject):
+@@ -570,7 +572,7 @@ class DictionaryObject(dict, PdfObject):
                      data["__streamdata__"] = data["__streamdata__"][:-1]
                  else:
                      stream.seek(pos, 0)
@@ -58,7 +92,7 @@
          else:
              stream.seek(pos, 0)
          if data.has_key("__streamdata__"):
-@@ -655,7 +655,7 @@ class EncodedStreamObject(StreamObject):
+@@ -655,7 +657,7 @@ class EncodedStreamObject(StreamObject):
              return decoded._data
  
      def setData(self, data):

Prev by Date: [pkgsrc/trunk]: pkgsrc/mk/pkgformat/pkg Preserve stdin for the bootstrap and ...
Next by Date: [pkgsrc/trunk]: pkgsrc/www/py-django-modelcluster Update py-djangocluster to ...
Previous by Thread: [pkgsrc/trunk]: pkgsrc/mk/pkgformat/pkg Preserve stdin for the bootstrap and ...
Next by Thread: [pkgsrc/trunk]: pkgsrc/www/py-django-modelcluster Update py-djangocluster to ...
Indexes:

Home | Main Index | Thread Index | Old Index