[Secure-testing-commits] r1939 - bin lib/python

Florian Weimer fw at costa.debian.org
Mon Sep 12 20:08:46 UTC 2005


Author: fw
Date: 2005-09-12 20:08:46 +0000 (Mon, 12 Sep 2005)
New Revision: 1939

Added:
   bin/apt-update-file
Modified:
   lib/python/debian_support.py
Log:
lib/python/debian_support.py:
  Add support for downloading package file diffs.

bin/apt-update-file:
  Driver script for the new functionality.

(I will use this functionality to implement package database
replication.  The goal is to keep a local copy of all the interesting
data, so that we no longer need to consult madison etc.)


Added: bin/apt-update-file
===================================================================
--- bin/apt-update-file	2005-09-12 18:53:42 UTC (rev 1938)
+++ bin/apt-update-file	2005-09-12 20:08:46 UTC (rev 1939)
@@ -0,0 +1,30 @@
+#!/usr/bin/python
+
+# This script is mainly used to demo the updateFile function.
+
+import os
+import os.path
+import string
+import sys
+
+def setup_paths():
+    check_file = 'lib/python/debian_support.py'
+    path = os.getcwd()
+    while 1:
+        if os.path.exists("%s/%s" % (path, check_file)):
+            sys.path = [path + '/lib/python'] + sys.path
+            return path
+        idx = string.rfind(path, '/')
+        if idx == -1:
+            raise ImportError, "could not setup paths"
+        path = path[0:idx]
+root_path = setup_paths()
+
+import bugs
+import debian_support
+
+if len(sys.argv) <> 3:
+    sys.stderr.write("usage: apt-update-file REMOTE LOCAL\n")
+    sys.exit(1)
+
+debian_support.updateFile(sys.argv[1], sys.argv[2], verbose=True)


Property changes on: bin/apt-update-file
___________________________________________________________________
Name: svn:executable
   + *

Modified: lib/python/debian_support.py
===================================================================
--- lib/python/debian_support.py	2005-09-12 18:53:42 UTC (rev 1938)
+++ lib/python/debian_support.py	2005-09-12 20:08:46 UTC (rev 1939)
@@ -17,7 +17,9 @@
 
 """This module implements facilities to deal with Debian-specific metadata."""
 
+import os
 import re
+import sha
 import types
 
 class ParseError(Exception):
@@ -103,7 +105,7 @@
     Objects of this class can be used to read Debian's Source and
     Packages files."""
 
-    re_field = re.compile(r'^([A-Za-z][A-Za-z0-9-]+):\s+(.*?)\s*$')
+    re_field = re.compile(r'^([A-Za-z][A-Za-z0-9-]+):(?:\s+(.*?))?\s*$')
     re_continuation = re.compile(r'^\s+(?:\.|(\S.*?)\s*)$')
 
     def __init__(self, name, fileObj=None):
@@ -137,6 +139,7 @@
             if not match:
                 self.raiseSyntaxError("expected package field")
             (name, contents) = match.groups()
+            contents = contents or ''
 
             while True:
                 line = self.file.readline()
@@ -150,6 +153,8 @@
                 else:
                     break
             pkg.append((name, contents))
+        if pkg:
+            yield pkg
 
     def raiseSyntaxError(self, msg, lineno=None):
         if lineno is None:
@@ -186,6 +191,188 @@
         return None
 del listReleases
 
+def readLinesSHA1(lines):
+    m = sha.new()
+    for l in lines:
+        m.update(l)
+    return m.hexdigest()
+
+def patchesFromEdScript(source,
+                        re_cmd=re.compile(r'^(\d+)(?:,(\d+))?([acd])$')):
+    """Converts source to a stream of patches.
+
+    Patches are triples of line indexes:
+
+    - first line to be replaced
+    - one past the last line being replaces
+    - list of line replacements
+
+    This is enough to model arbitrary additions, deletions and
+    replacements.
+    """
+
+    i = iter(source)
+    
+    for line in i:
+        match = re_cmd.match(line)
+        if match is None:
+            raise ValueError, "invalid patch command: " + `line`
+
+        (first, last, cmd) = match.groups()
+        first = int(first)
+        if last is not None:
+            last = int(last)
+
+        if cmd == 'd':
+            first = first - 1
+            if last is None:
+                last = first + 1
+            yield (first, last, [])
+            continue
+
+        if cmd == 'a':
+            if last is not None:
+                raise ValueError, "invalid patch argument: " + `line`
+            last = first
+        else:                           # cmd == c
+            first = first - 1
+            if last is None:
+                last = first + 1
+
+        lines = []
+        for l in i:
+            if l == '':
+                raise ValueError, "end of stream in command: " + `line`
+            if l == '.\n' or l == '.':
+                break
+            lines.append(l)
+        yield (first, last, lines)
+
+def patchLines(lines, patches):
+    """Applies patches to lines.  Updates lines in place."""
+    for (first, last, args) in patches:
+        lines[first:last] = args
+
+def replaceFile(lines, local):
+    new_file = file(local + '.new', 'w+')
+    for l in lines:
+        new_file.write(l)
+    new_file.close()
+    os.rename(local + '.new', local)
+
+def downloadGunzipLines(remote):
+    """Downloads a file from a remote location and gunzips it.
+
+    Returns the lines in the file."""
+
+    # The implementation is rather crude, but it seems that the gzip
+    # module needs a real file for input.
+
+    import gzip
+    import tempfile
+    import urllib
+
+    (handle, fname) = tempfile.mkstemp()
+    try:
+        os.close(handle)
+        (filename, headers) = urllib.urlretrieve(remote, fname)
+        gfile = gzip.GzipFile(filename)
+        lines = gfile.readlines()
+        gfile.close()
+    finally:
+        os.unlink(fname)
+    return lines
+        
+def downloadFile(remote, local):
+    """Copies a gzipped remote file to the local system.
+
+    remote - URL, without the .gz suffix
+    local - name of the local file
+    """
+    
+    lines = downloadGunzipLines(remote + '.gz')
+    replaceFile(lines, local)
+    return lines
+
+def updateFile(remote, local, verbose=None):
+    """Updates the local file by downloading a remote patch.
+
+    Returns a list of lines in the local file.
+    """
+
+    try:
+        local_file = file(local)
+    except OSError:
+        return downloadFile(remote, local)
+
+    lines = local_file.readlines()
+    local_file.close()
+    local_hash = readLinesSHA1(lines)
+    patches_to_apply = []
+    patch_hashes = {}
+    
+    import urllib
+    index_name = remote + '.diff/Index'
+
+    re_whitespace=re.compile('\s+')
+    
+    for fields in PackageFile(index_name, urllib.urlopen(index_name)):
+        for (field, value) in fields:
+            if field == 'SHA1-Current':
+                (remote_hash, remote_size) = value.split(' ')
+                if local_hash == remote_hash:
+                    if verbose:
+                        print "updateFile: local file is up-to-date"
+                    return lines
+                continue
+
+            if field =='SHA1-History':
+                for entry in value.splitlines():
+                    if entry == '':
+                        continue
+                    (hist_hash, hist_size, patch_name) \
+                                = re_whitespace.split(entry)
+
+                    # After the first patch, we have to apply all
+                    # remaining patches.
+                    if patches_to_apply or  hist_hash == local_hash:
+                        patches_to_apply.append(patch_name)
+                        
+                continue
+            
+            if field == 'SHA1-Patches':
+                for entry in value.splitlines():
+                    if entry == '':
+                        continue
+                    (patch_hash, patch_size, patch_name) \
+                                 = re_whitespace.split(entry)
+                    patch_hashes[patch_name] = patch_hash
+                continue
+            
+            if verbose:
+                print "updateFile: field %s ignored" % `field`
+        
+    if not patches_to_apply:
+        if verbose:
+            print "updateFile: could not find historic entry", local_hash
+        return downloadFile(remote, local)
+
+    for patch_name in patches_to_apply:
+        print "updateFile: downloading patch " + `patch_name`
+        patch_contents = downloadGunzipLines(remote + '.diff/' + patch_name
+                                          + '.gz')
+        if readLinesSHA1(patch_contents ) <> patch_hashes[patch_name]:
+            raise ValueError, "patch %s was garbled" % `patch_name`
+        patchLines(lines, patchesFromEdScript(patch_contents))
+        
+    new_hash = readLinesSHA1(lines)
+    if new_hash <> remote_hash:
+        raise ValueError, ("patch failed, got %s instead of %s"
+                           % (new_hash, remote_hash))
+
+    replaceFile(lines, local)
+    return lines
+
 def test():
     # Version
     assert Version('0') < Version('a')
@@ -210,5 +397,20 @@
     # for p in PackageFile('../../data/packages/sarge/Packages.i386'):
     #     assert p[0][0] == 'Package'
 
+    # Helper routines
+    assert readLinesSHA1([]) == 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
+    assert readLinesSHA1(['1\n', '23\n']) \
+           == '14293c9bd646a15dc656eaf8fba95124020dfada'
+
+    file_a = map(lambda x: "%d\n" % x, range(1, 18))
+    file_b = ['0\n', '1\n', '<2>\n', '<3>\n', '4\n', '5\n', '7\n', '8\n',
+              '11\n', '12\n', '<13>\n', '14\n', '15\n', 'A\n', 'B\n', 'C\n',
+              '16\n', '17\n',]
+    patch = ['15a\n', 'A\n', 'B\n', 'C\n', '.\n', '13c\n', '<13>\n', '.\n',
+             '9,10d\n', '6d\n', '2,3c\n', '<2>\n', '<3>\n', '.\n', '0a\n',
+             '0\n', '.\n']
+    patchLines(file_a, patchesFromEdScript(patch))
+    assert ''.join(file_b) == ''.join(file_a)
+
 if __name__ == "__main__":
     test()




More information about the Secure-testing-commits mailing list