[Python-apps-team] Bug#737498: [PATCH RFC] patch: when importing from email, RFC2047-decode From/Subject headers
Julien Cristau
julien.cristau at logilab.fr
Thu Mar 3 17:55:16 UTC 2016
# HG changeset patch
# User Julien Cristau <julien.cristau at logilab.fr>
# Date 1457026459 -3600
# Thu Mar 03 18:34:19 2016 +0100
# Node ID 6c153cbad4a032861417dbba9d1d90332964ab5f
# Parent 549ff28a345f595cad7e06fb08c2ac6973e2f030
patch: when importing from email, RFC2047-decode From/Subject headers
I'm not too sure about the Subject part: it should be possible to use
the charset information from the email (RFC2047 encoding and the
Content-Type header), but mercurial seems to use its own encoding
instead (in the test, that means the commit message ends up as "????"
if the import is done without --encoding utf-8). Advice welcome.
Reported at https://bugs.debian.org/737498
diff --git a/mercurial/patch.py b/mercurial/patch.py
--- a/mercurial/patch.py
+++ b/mercurial/patch.py
@@ -201,19 +201,28 @@ def extract(ui, fileobj):
# (this heuristic is borrowed from quilt)
diffre = re.compile(r'^(?:Index:[ \t]|diff[ \t]|RCS file: |'
r'retrieving revision [0-9]+(\.[0-9]+)*$|'
r'---[ \t].*?^\+\+\+[ \t]|'
r'\*\*\*[ \t].*?^---[ \t])', re.MULTILINE|re.DOTALL)
+ def decode_header(header):
+ if header is None:
+ return None
+ parts = []
+ for part, charset in email.Header.decode_header(header):
+ if charset is None:
+ charset = 'ascii'
+ parts.append(part.decode(charset))
+ return encoding.tolocal(u' '.join(parts).encode('utf-8'))
data = {}
fd, tmpname = tempfile.mkstemp(prefix='hg-patch-')
tmpfp = os.fdopen(fd, 'w')
try:
msg = email.Parser.Parser().parse(fileobj)
- subject = msg['Subject']
- data['user'] = msg['From']
+ subject = decode_header(msg['Subject'])
+ data['user'] = decode_header(msg['From'])
if not subject and not data['user']:
# Not an email, restore parsed headers if any
subject = '\n'.join(': '.join(h) for h in msg.items()) + '\n'
# should try to parse msg['Date']
diff --git a/tests/test-import-git.t b/tests/test-import-git.t
--- a/tests/test-import-git.t
+++ b/tests/test-import-git.t
@@ -820,6 +820,29 @@ Test corner case involving copies and mu
> a
> +b
> EOF
applying patch from stdin
+Test email metadata
+
+ $ hg revert -qa
+ $ hg --encoding utf-8 import - <<EOF
+ > From: =?UTF-8?q?Rapha=C3=ABl=20Hertzog?= <hertzog at debian.org>
+ > Subject: [PATCH] =?UTF-8?q?=C5=A7=E2=82=AC=C3=9F=E1=B9=AA?=
+ >
+ > diff --git a/a b/a
+ > --- a/a
+ > +++ b/a
+ > @@ -1,1 +1,2 @@
+ > a
+ > +a
+ > EOF
+ applying patch from stdin
+ $ hg --encoding utf-8 log -r .
+ changeset: 2:* (glob)
+ tag: tip
+ user: Rapha\xc3\xabl Hertzog <hertzog at debian.org> (esc)
+ date: * (glob)
+ summary: \xc5\xa7\xe2\x82\xac\xc3\x9f\xe1\xb9\xaa (esc)
+
+
$ cd ..
More information about the Python-apps-team
mailing list