Bug#863880: diffoscope: Document feeders.
Daniel Shahaf
danielsh at apache.org
Mon May 29 15:14:54 UTC 2017
Source: diffoscope
Version: 82
Severity: wishlist
Tags: patch
Dear Maintainer,
Please find attached a patch documenting the internal "feeder" concept.
The patch was developed against 00f6b7d53d8f7f30701e51dce29942145406c369, and diff.py
has changed since then, but AFAICT not in ways that affect this patch.
Cheers,
Daniel
[[[
>From 64819ae8000a2677f4b1d7e599787becae2ac963 Mon Sep 17 00:00:00 2001
From: Daniel Shahaf <danielsh at apache.org>
Date: Mon, 29 May 2017 15:14:54 +0000
Subject: [PATCH 2/2] diffoscope.diff: Document feeders.
---
diffoscope/diff.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 58 insertions(+)
diff --git a/diffoscope/diff.py b/diffoscope/diff.py
index a300217..bd6e2d8 100644
--- a/diffoscope/diff.py
+++ b/diffoscope/diff.py
@@ -235,6 +235,64 @@ class FIFOFeeder(threading.Thread):
if self._exception is not None:
raise self._exception
+class _Feeder:
+ """A 'feeder' is a specialized writer.
+
+ A 'feeder' is a callable that takes as argument a writeable file, and writes
+ to it. Feeders can transform the written data, truncate it, checksum it,
+ and so on. The callable must return True to represent that the data had
+ a terminating newline, and False otherwise.
+
+ Feeders are created by the functions make_feeder_from_raw_reader() and
+ empty_file_feeder(). The returned objects are closures, and are not
+ (currently?) instances of any particular class.
+ """
+ pass
+
+def empty_file_feeder():
+ """Returns a feeder that simulates an empty file.
+
+ See _Feeder for feeders.
+ """
+ def feeder(f):
+ return False
+ return feeder
+
+def make_feeder_from_raw_reader(in_file, filter=None):
+ """Create a feeder that checksums, truncates, and transcodes the data.
+ The optional argument FILTER is a callable that gets passed each line,
+ and returns the line that should be used in its stead. (There is no
+ facility for FILTER to discard a line entirely.)
+
+ See _Feeder for feeders.
+ """
+ def feeder(out_file):
+ h = None
+ end_nl = False
+ max_lines = Config().max_diff_input_lines
+ line_count = 0
+
+ if max_lines < float("inf"):
+ h = hashlib.sha1()
+
+ for buf in in_file:
+ line_count += 1
+ out = filter(buf) if filter else buf
+ if h:
+ h.update(out)
+ if line_count < max_lines:
+ out_file.write(out)
+ end_nl = buf[-1] == '\n'
+
+ if h and line_count >= max_lines:
+ out_file.write("[ Too much input for diff (SHA1: {}) ]\n".format(
+ h.hexdigest(),
+ ).encode('utf-8'))
+ end_nl = True
+
+ return end_nl
+ return feeder
+
def diff(feeder1, feeder2):
tmpdir = get_temporary_directory().name
]]]
More information about the Reproducible-builds
mailing list