Issue #3370 » encoding_issue.patch
/usr/lib/python2.7/site-packages/debpkgr/debpkg.py 2018-03-13 12:35:46.859920320 +0100 | ||
---|---|---|
from __future__ import division
|
||
from __future__ import unicode_literals
|
||
import chardet
|
||
import logging
|
||
import sys
|
||
import inspect
|
||
... | ... | |
return results
|
||
def dump(self, fd=None, encoding=None, text_mode=False):
|
||
"""Dump the the contents in the original format
|
||
If fd is None, returns a unicode object. Otherwise, fd is assumed to
|
||
be a file-like object, and this method will write the data to it
|
||
instead of returning a unicode object.
|
||
If fd is not none and text_mode is False, the data will be encoded
|
||
to a byte string before writing to the file. The encoding used is
|
||
chosen via the encoding parameter; None means to use the encoding the
|
||
object was initialized with (utf-8 by default). This will raise
|
||
UnicodeEncodeError if the encoding can't support all the characters in
|
||
the Deb822Dict values.
|
||
"""
|
||
# Ideally this would never try to encode (that should be up to the
|
||
# caller when opening the file), but we may still have users who rely
|
||
# on the binary mode encoding. But...might it be better to break them
|
||
# than to introduce yet another parameter relating to encoding?
|
||
from io import StringIO
|
||
if fd is None:
|
||
fd = StringIO()
|
||
return_string = True
|
||
else:
|
||
return_string = False
|
||
if encoding is None:
|
||
# Use the encoding we've been using to decode strings with if none
|
||
# was explicitly specified
|
||
encoding = self.encoding
|
||
for key in self:
|
||
value = self.get_as_string(key)
|
||
key_u = '%s' % key.decode(encoding)
|
||
if not value or value[0] == '\n':
|
||
# Avoid trailing whitespace after "Field:" if it's on its own
|
||
# line or the value is empty. We don't have to worry about the
|
||
# case where value == '\n', since we ensure that is not the
|
||
# case in __setitem__.
|
||
entry = '%s:%s\n' % (key_u, value)
|
||
else:
|
||
entry = '%s: %s\n' % (key_u, value)
|
||
if not return_string and not text_mode:
|
||
fd.write(entry.encode(encoding))
|
||
else:
|
||
fd.write(entry)
|
||
if return_string:
|
||
return fd.getvalue()
|
||
class DebPkgRequires(object):
|
||
__slots__ = ('depends', 'pre_depends', 'recommends',
|
||
... | ... | |
__slots__ = ("_c", "_h", "_md5", "_deps", "_version", "_scripts")
|
||
def __init__(self, control, hashes, md5sums, scripts={}):
|
||
def __init__(self, control, hashes, md5sums, scripts={}, encoding='utf-8'):
|
||
if isinstance(control, dict):
|
||
control = deb822.Deb822(control)
|
||
self._c = control
|
||
... | ... | |
if isinstance(md5sums, DebPkgMD5sums):
|
||
self._md5 = md5sums
|
||
else:
|
||
self._md5 = DebPkgMD5sums(md5sums)
|
||
self._md5 = DebPkgMD5sums(md5sums, encoding=encoding)
|
||
def __repr__(self):
|
||
return 'DebPkg(%s)' % self.nevra
|
||
... | ... | |
def make_hashes(path):
|
||
return deb_hash_file(path)
|
||
@staticmethod
|
||
def get_encoding(md5sums):
|
||
encoding="utf-8"
|
||
for key in md5sums:
|
||
result = chardet.detect(key)
|
||
if result['encoding'] not in ['ascii','utf-8']:
|
||
encoding = result['encoding']
|
||
return encoding
|
||
@classmethod
|
||
def from_file(cls, path, **kwargs):
|
||
"""
|
||
... | ... | |
using keyword arguments.
|
||
"""
|
||
debpkg = debfile.DebFile(filename=path)
|
||
# existance of md5sums in control part is optional
|
||
encoding = 'utf-8'
|
||
# existance of md5sums in control part is optional
|
||
try:
|
||
md5sums = debpkg.md5sums(encoding='utf-8')
|
||
md5sums = debpkg.md5sums(encoding)
|
||
except UnicodeDecodeError:
|
||
# Try to detect encoding
|
||
md5sums = debpkg.md5sums()
|
||
encoding = DebPkg.get_encoding(md5sums)
|
||
md5sums = debpkg.md5sums(encoding)
|
||
except debfile.DebError as err:
|
||
log.warn('While processing %s: %s', path, err.args[0])
|
||
md5sums = None
|
||
... | ... | |
scripts = debpkg.control.scripts()
|
||
hashes = cls.make_hashes(path)
|
||
control.update(kwargs)
|
||
return cls(control, hashes, md5sums, scripts=scripts)
|
||
return cls(control, hashes, md5sums, scripts=scripts, encoding=encoding)
|
||
def dump(self, path):
|
||
return self.package.dump(path)
|