Project

Profile

Help

Issue #3370 » encoding_issue.patch

sbernhard, 03/13/2018 12:42 PM

View differences:

/usr/lib/python2.7/site-packages/debpkgr/debpkg.py 2018-03-13 12:35:46.859920320 +0100
from __future__ import division
from __future__ import unicode_literals
import chardet
import logging
import sys
import inspect
......
return results
def dump(self, fd=None, encoding=None, text_mode=False):
"""Dump the the contents in the original format
If fd is None, returns a unicode object. Otherwise, fd is assumed to
be a file-like object, and this method will write the data to it
instead of returning a unicode object.
If fd is not none and text_mode is False, the data will be encoded
to a byte string before writing to the file. The encoding used is
chosen via the encoding parameter; None means to use the encoding the
object was initialized with (utf-8 by default). This will raise
UnicodeEncodeError if the encoding can't support all the characters in
the Deb822Dict values.
"""
# Ideally this would never try to encode (that should be up to the
# caller when opening the file), but we may still have users who rely
# on the binary mode encoding. But...might it be better to break them
# than to introduce yet another parameter relating to encoding?
from io import StringIO
if fd is None:
fd = StringIO()
return_string = True
else:
return_string = False
if encoding is None:
# Use the encoding we've been using to decode strings with if none
# was explicitly specified
encoding = self.encoding
for key in self:
value = self.get_as_string(key)
key_u = '%s' % key.decode(encoding)
if not value or value[0] == '\n':
# Avoid trailing whitespace after "Field:" if it's on its own
# line or the value is empty. We don't have to worry about the
# case where value == '\n', since we ensure that is not the
# case in __setitem__.
entry = '%s:%s\n' % (key_u, value)
else:
entry = '%s: %s\n' % (key_u, value)
if not return_string and not text_mode:
fd.write(entry.encode(encoding))
else:
fd.write(entry)
if return_string:
return fd.getvalue()
class DebPkgRequires(object):
__slots__ = ('depends', 'pre_depends', 'recommends',
......
__slots__ = ("_c", "_h", "_md5", "_deps", "_version", "_scripts")
def __init__(self, control, hashes, md5sums, scripts={}):
def __init__(self, control, hashes, md5sums, scripts={}, encoding='utf-8'):
if isinstance(control, dict):
control = deb822.Deb822(control)
self._c = control
......
if isinstance(md5sums, DebPkgMD5sums):
self._md5 = md5sums
else:
self._md5 = DebPkgMD5sums(md5sums)
self._md5 = DebPkgMD5sums(md5sums, encoding=encoding)
def __repr__(self):
return 'DebPkg(%s)' % self.nevra
......
def make_hashes(path):
return deb_hash_file(path)
@staticmethod
def get_encoding(md5sums):
encoding="utf-8"
for key in md5sums:
result = chardet.detect(key)
if result['encoding'] not in ['ascii','utf-8']:
encoding = result['encoding']
return encoding
@classmethod
def from_file(cls, path, **kwargs):
"""
......
using keyword arguments.
"""
debpkg = debfile.DebFile(filename=path)
# existance of md5sums in control part is optional
encoding = 'utf-8'
# existance of md5sums in control part is optional
try:
md5sums = debpkg.md5sums(encoding='utf-8')
md5sums = debpkg.md5sums(encoding)
except UnicodeDecodeError:
# Try to detect encoding
md5sums = debpkg.md5sums()
encoding = DebPkg.get_encoding(md5sums)
md5sums = debpkg.md5sums(encoding)
except debfile.DebError as err:
log.warn('While processing %s: %s', path, err.args[0])
md5sums = None
......
scripts = debpkg.control.scripts()
hashes = cls.make_hashes(path)
control.update(kwargs)
return cls(control, hashes, md5sums, scripts=scripts)
return cls(control, hashes, md5sums, scripts=scripts, encoding=encoding)
def dump(self, path):
return self.package.dump(path)
(3-3/5)