Bug#898022: diffoscope: Traceback when comparing paths with invalid unicode characters

Chris Lamb lamby at debian.org
Sun May 6 01:38:58 BST 2018


Package: diffoscope
Version: 93
Severity: important

Hi,

This is via <https://github.com/lamby/trydiffoscope/issues/35>, but I
think the bug is in diffoscope itself.

So, given the following test:

  import os
  import pytest
  import subprocess
    
  def test_invalid_filename(capsys, tmpdir):
      base = str(tmpdir.mkdir('src')).encode('utf-8')
  
      a = os.path.join(base, b'\xf0\x28\x8c\x28')
      b = os.path.join(base, b'\xf0\x28\x8c\x29')
  
      with open(a, 'w'), open(b, 'w'):
          pass
  
      subprocess.check_call(('bin/diffoscope', a, b))

I get:

  ____________________________ test_invalid_filename _____________________________
  
  capsys = <_pytest.capture.CaptureFixture object at 0x7f25bd267710>
  tmpdir = local('/tmp/pytest-of-lamby/pytest-43/test_invalid_filename0')
  
      def test_invalid_filename(capsys, tmpdir):
          base = str(tmpdir.mkdir('src')).encode('utf-8')
      
          a = os.path.join(base, b'\xf0\x28\x8c\x28')
          b = os.path.join(base, b'\xf0\x28\x8c\x29')
      
          with open(a, 'w'), open(b, 'w'):
              pass
      
  >       subprocess.check_call(('bin/diffoscope', a, b))
  
  tests/test_filenames.py:34: 
  _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
  
  popenargs = (('bin/diffoscope', b'/tmp/pytest-of-lamby/pytest-43/test_invalid_filename0/src/\xf0(\x8c(', b'/tmp/pytest-of-lamby/pytest-43/test_invalid_filename0/src/\xf0(\x8c)'),)
  kwargs = {}, retcode = 2
  cmd = ('bin/diffoscope', b'/tmp/pytest-of-lamby/pytest-43/test_invalid_filename0/src/\xf0(\x8c(', b'/tmp/pytest-of-lamby/pytest-43/test_invalid_filename0/src/\xf0(\x8c)')
  
      def check_call(*popenargs, **kwargs):
          """Run command with arguments.  Wait for command to complete.  If
          the exit code was zero then return, otherwise raise
          CalledProcessError.  The CalledProcessError object will have the
          return code in the returncode attribute.
      
          The arguments are the same as for the call function.  Example:
      
          check_call(["ls", "-l"])
          """
          retcode = call(*popenargs, **kwargs)
          if retcode:
              cmd = kwargs.get("args")
              if cmd is None:
                  cmd = popenargs[0]
  >           raise CalledProcessError(retcode, cmd)
  E           subprocess.CalledProcessError: Command '('bin/diffoscope', b'/tmp/pytest-of-lamby/pytest-43/test_invalid_filename0/src/\xf0(\x8c(', b'/tmp/pytest-of-lamby/pytest-43/test_invalid_filename0/src/\xf0(\x8c)')' returned non-zero exit status 2.
  
  /usr/lib/python3.6/subprocess.py:291: CalledProcessError
  ------------------------------ Captured log setup ------------------------------
  locale.py                   33 DEBUG    Normalising locale, timezone, etc.
  __init__.py                128 DEBUG    Loaded 66 comparator classes
  __init__.py                128 DEBUG    Loaded 66 comparator classes
  ----------------------------- Captured stderr call -----------------------------
  Traceback (most recent call last):
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/main.py", line 448, in main
      sys.exit(run_diffoscope(parsed_args))
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/main.py", line 420, in run_diffoscope
      difference = compare_root_paths(path1, path2)
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/comparators/utils/compare.py", line 65, in compare_root_paths
      file1 = specialize(FilesystemFile(path1, container=container1))
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/comparators/utils/specialize.py", line 49, in specialize
      if try_recognize(file, cls, cls.recognizes):
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/comparators/utils/specialize.py", line 36, in try_recognize
      if not recognizes(file):
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/comparators/debian.py", line 169, in recognizes
      if not super().recognizes(file):
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/comparators/utils/file.py", line 141, in recognizes
      lambda m, t: t.search(m), file.magic_file_type),
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/comparators/utils/file.py", line 227, in magic_file_type
      self._magic_file_type = File.guess_file_type(self.path)
    File "/home/lamby/git/debian/reproducible/diffoscope/diffoscope/comparators/utils/file.py", line 71, in guess_file_type
      return self._mimedb.file(path)
    File "/usr/lib/python3/dist-packages/magic/compat.py", line 148, in file
      return Magic.__tostr(_file(self._magic_t, Magic.__tobytes(filename)))
    File "/usr/lib/python3/dist-packages/magic/compat.py", line 138, in __tobytes
      return bytes(b, 'utf-8')
  UnicodeEncodeError: 'utf-8' codec can't encode character '\udcf0' in position 58: surrogates not allowed
  =========================== 1 failed in 0.75 seconds ===========================


However, I can't seem to minimally reproduce with file by itself:

  import magic
  filename = b'\xf0\x28\x8c\x28'
  with open(filename, 'w'):
      pass
  m = magic.open(magic.NONE)
  m.load()
  m.file(filename)


Regards,

-- 
      ,''`.
     : :'  :     Chris Lamb
     `. `'`      lamby at debian.org / chris-lamb.co.uk
       `-



More information about the Reproducible-builds mailing list