pkgsrc-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[pkgsrc/trunk]: pkgsrc/print/py-Pdf pyPdf: fallback if non-UTF16 strings star...
details: https://anonhg.NetBSD.org/pkgsrc/rev/691fba72d533
branches: trunk
changeset: 449850:691fba72d533
user: joerg <joerg%pkgsrc.org@localhost>
date: Tue Apr 06 18:57:10 2021 +0000
description:
pyPdf: fallback if non-UTF16 strings start with UTF16 BOM
This can be seen with Reportlab, which is embedding a digest as binary.
Bump revision.
diffstat:
print/py-Pdf/Makefile | 4 +-
print/py-Pdf/distinfo | 4 +-
print/py-Pdf/patches/patch-pyPdf_generic.py | 46 +++++++++++++++++++++++++---
3 files changed, 44 insertions(+), 10 deletions(-)
diffs (117 lines):
diff -r de363dac5a26 -r 691fba72d533 print/py-Pdf/Makefile
--- a/print/py-Pdf/Makefile Tue Apr 06 18:55:05 2021 +0000
+++ b/print/py-Pdf/Makefile Tue Apr 06 18:57:10 2021 +0000
@@ -1,8 +1,8 @@
-# $NetBSD: Makefile,v 1.8 2019/07/29 20:39:39 joerg Exp $
+# $NetBSD: Makefile,v 1.9 2021/04/06 18:57:10 joerg Exp $
DISTNAME= pyPdf-1.13
PKGNAME= ${PYPKGPREFIX}-Pdf-1.13
-PKGREVISION= 3
+PKGREVISION= 4
CATEGORIES= print python
MASTER_SITES= http://pybrary.net/pyPdf/
diff -r de363dac5a26 -r 691fba72d533 print/py-Pdf/distinfo
--- a/print/py-Pdf/distinfo Tue Apr 06 18:55:05 2021 +0000
+++ b/print/py-Pdf/distinfo Tue Apr 06 18:57:10 2021 +0000
@@ -1,9 +1,9 @@
-$NetBSD: distinfo,v 1.7 2019/07/29 20:39:39 joerg Exp $
+$NetBSD: distinfo,v 1.8 2021/04/06 18:57:10 joerg Exp $
SHA1 (pyPdf-1.13.tar.gz) = ba7aed11cf21a2c218df2e3979be5eb90992dcbe
RMD160 (pyPdf-1.13.tar.gz) = 0669d4a93c20fcd899d15ff42cb1b8b908f28e3b
SHA512 (pyPdf-1.13.tar.gz) = 1c00a5a6658054671a396e7b334cbeb9e26dc1e3ad9668e212d05a9483f164931d8bd14fb9ab4d083d7ca3999fee7a2e9eea55b604a06c7d5d0632f0791b7598
Size (pyPdf-1.13.tar.gz) = 35699 bytes
-SHA1 (patch-pyPdf_generic.py) = c661b2fd4cea8679ac551733ea25b7efa9fbb37d
+SHA1 (patch-pyPdf_generic.py) = 17a919f234f5493db803fb39d7aae4c4ffd06a05
SHA1 (patch-pyPdf_pdf.py) = 3359ec14fb25fedc86d27d23c052d1891c8c9e4d
SHA1 (patch-pyPdf_utils.py) = 11b14d0c3bdbdac04312f1a4f8eb9f27c6d09009
diff -r de363dac5a26 -r 691fba72d533 print/py-Pdf/patches/patch-pyPdf_generic.py
--- a/print/py-Pdf/patches/patch-pyPdf_generic.py Tue Apr 06 18:55:05 2021 +0000
+++ b/print/py-Pdf/patches/patch-pyPdf_generic.py Tue Apr 06 18:57:10 2021 +0000
@@ -1,4 +1,4 @@
-$NetBSD: patch-pyPdf_generic.py,v 1.1 2014/01/23 14:38:42 wiz Exp $
+$NetBSD: patch-pyPdf_generic.py,v 1.2 2021/04/06 18:57:10 joerg Exp $
python-3.x compatibility.
@@ -22,7 +22,41 @@
while True:
# skip leading whitespace
tok = stream.read(1)
-@@ -425,7 +425,7 @@ class NameObject(str, PdfObject):
+@@ -245,20 +245,22 @@ def createStringObject(string):
+ return TextStringObject(string)
+ elif isinstance(string, str):
+ if string.startswith(codecs.BOM_UTF16_BE):
+- retval = TextStringObject(string.decode("utf-16"))
+- retval.autodetect_utf16 = True
+- return retval
+- else:
+- # This is probably a big performance hit here, but we need to
+- # convert string objects into the text/unicode-aware version if
+- # possible... and the only way to check if that's possible is
+- # to try. Some strings are strings, some are just byte arrays.
+ try:
+- retval = TextStringObject(decode_pdfdocencoding(string))
+- retval.autodetect_pdfdocencoding = True
++ retval = TextStringObject(string.decode("utf-16"))
++ retval.autodetect_utf16 = True
+ return retval
+ except UnicodeDecodeError:
+- return ByteStringObject(string)
++ pass
++ # This is probably a big performance hit here, but we need to
++ # convert string objects into the text/unicode-aware version if
++ # possible... and the only way to check if that's possible is
++ # to try. Some strings are strings, some are just byte arrays.
++ try:
++ retval = TextStringObject(decode_pdfdocencoding(string))
++ retval.autodetect_pdfdocencoding = True
++ return retval
++ except UnicodeDecodeError:
++ return ByteStringObject(string)
+ else:
+ raise TypeError("createStringObject should have str or unicode arg")
+
+@@ -425,7 +427,7 @@ class NameObject(str, PdfObject):
def readFromStream(stream):
name = stream.read(1)
if name != "/":
@@ -31,7 +65,7 @@
while True:
tok = stream.read(1)
if tok.isspace() or tok in NameObject.delimiterCharacters:
-@@ -517,7 +517,7 @@ class DictionaryObject(dict, PdfObject):
+@@ -517,7 +519,7 @@ class DictionaryObject(dict, PdfObject):
def readFromStream(stream, pdf):
tmp = stream.read(2)
if tmp != "<<":
@@ -40,7 +74,7 @@
data = {}
while True:
tok = readNonWhitespace(stream)
-@@ -531,7 +531,7 @@ class DictionaryObject(dict, PdfObject):
+@@ -531,7 +533,7 @@ class DictionaryObject(dict, PdfObject):
value = readObject(stream, pdf)
if data.has_key(key):
# multiple definitions of key not permitted
@@ -49,7 +83,7 @@
data[key] = value
pos = stream.tell()
s = readNonWhitespace(stream)
-@@ -570,7 +570,7 @@ class DictionaryObject(dict, PdfObject):
+@@ -570,7 +572,7 @@ class DictionaryObject(dict, PdfObject):
data["__streamdata__"] = data["__streamdata__"][:-1]
else:
stream.seek(pos, 0)
@@ -58,7 +92,7 @@
else:
stream.seek(pos, 0)
if data.has_key("__streamdata__"):
-@@ -655,7 +655,7 @@ class EncodedStreamObject(StreamObject):
+@@ -655,7 +657,7 @@ class EncodedStreamObject(StreamObject):
return decoded._data
def setData(self, data):
Home |
Main Index |
Thread Index |
Old Index