[med-svn] [Git][med-team/mypy][master] 6 commits: routine-update: Standards-Version: 4.5.1

Andreas Tille gitlab at salsa.debian.org
Tue Dec 8 21:16:25 GMT 2020



Andreas Tille pushed to branch master at Debian Med / mypy


Commits:
78ee7973 by Andreas Tille at 2020-12-08T11:08:09+01:00
routine-update: Standards-Version: 4.5.1

- - - - -
05913f0c by Andreas Tille at 2020-12-08T11:08:17+01:00
routine-update: Remove trailing whitespace in debian/rules

- - - - -
c3c6a0e4 by Andreas Tille at 2020-12-08T11:08:26+01:00
routine-update: watch file standard 4

- - - - -
a017471d by Andreas Tille at 2020-12-08T12:47:43+01:00
Do not hardcode Python3.8

- - - - -
443144d6 by Andreas Tille at 2020-12-08T21:39:51+01:00
Remove no longer valid code on Python 3.9 in test suite which is not maintained upstream any more

- - - - -
96cfb5a4 by Andreas Tille at 2020-12-08T21:57:59+01:00
Upload to unstable

- - - - -


6 changed files:

- debian/changelog
- debian/control
- + debian/patches/015190f1fb2ce024c1ae022a7665c382b27167bf.patch
- debian/patches/series
- debian/rules
- debian/watch


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,14 @@
+mypy (0.790-3) unstable; urgency=medium
+
+  * Team upload.
+  * Standards-Version: 4.5.1 (routine-update)
+  * Remove trailing whitespace in debian/rules (routine-update)
+  * watch file standard 4 (routine-update)
+  * Do not hardcode Python3.8
+    Closes: #976779
+
+ -- Andreas Tille <tille at debian.org>  Tue, 08 Dec 2020 21:40:01 +0100
+
 mypy (0.790-2) unstable; urgency=medium
 
   * debian/patches/py39: from upstream to support Python 3.9


=====================================
debian/control
=====================================
@@ -22,7 +22,7 @@ Build-Depends: debhelper-compat (= 13),
                python3-typing-extensions,
                python3-virtualenv <!nocheck>,
                python3-all-dev
-Standards-Version: 4.5.0
+Standards-Version: 4.5.1
 Vcs-Browser: https://salsa.debian.org/med-team/mypy
 Vcs-Git: https://salsa.debian.org/med-team/mypy.git
 Homepage: http://www.mypy-lang.org/


=====================================
debian/patches/015190f1fb2ce024c1ae022a7665c382b27167bf.patch
=====================================
@@ -0,0 +1,882 @@
+From 015190f1fb2ce024c1ae022a7665c382b27167bf Mon Sep 17 00:00:00 2001
+From: hauntsaninja <>
+Date: Sat, 29 Aug 2020 15:24:32 -0700
+Subject: [PATCH] samples: remove crawl.py
+
+This is no longer valid code on Python 3.9, so py39 CI fails.
+We've talked about not investing in keeping these samples up to date:
+https://github.com/python/mypy/pull/8838#issuecomment-630070909
+---
+ test-data/samples/crawl.py | 863 -------------------------------------
+ 1 file changed, 863 deletions(-)
+ delete mode 100644 test-data/samples/crawl.py
+
+diff --git a/test-data/samples/crawl.py b/test-data/samples/crawl.py
+deleted file mode 100644
+index 2caf631e0c..0000000000
+--- a/test-data/samples/crawl.py
++++ /dev/null
+@@ -1,863 +0,0 @@
+-#!/usr/bin/env python3.4
+-
+-"""A simple web crawler."""
+-
+-# This is cloned from <asyncio>/examples/crawl.py,
+-# with type annotations added (PEP 484).
+-#
+-# TODO: convert to `async def` + `await` (PEP 492).
+-
+-import argparse
+-import asyncio
+-import cgi
+-from http.client import BadStatusLine
+-import logging
+-import re
+-import sys
+-import time
+-import urllib.parse
+-from typing import Any, Generator, IO, Optional, Sequence, Set, Tuple, List, Dict
+-
+-
+-ARGS = argparse.ArgumentParser(description="Web crawler")
+-ARGS.add_argument(
+-    '--iocp', action='store_true', dest='iocp',
+-    default=False, help='Use IOCP event loop (Windows only)')
+-ARGS.add_argument(
+-    '--select', action='store_true', dest='select',
+-    default=False, help='Use Select event loop instead of default')
+-ARGS.add_argument(
+-    'roots', nargs='*',
+-    default=[], help='Root URL (may be repeated)')
+-ARGS.add_argument(
+-    '--max_redirect', action='store', type=int, metavar='N',
+-    default=10, help='Limit redirection chains (for 301, 302 etc.)')
+-ARGS.add_argument(
+-    '--max_tries', action='store', type=int, metavar='N',
+-    default=4, help='Limit retries on network errors')
+-ARGS.add_argument(
+-    '--max_tasks', action='store', type=int, metavar='N',
+-    default=100, help='Limit concurrent connections')
+-ARGS.add_argument(
+-    '--max_pool', action='store', type=int, metavar='N',
+-    default=100, help='Limit connection pool size')
+-ARGS.add_argument(
+-    '--exclude', action='store', metavar='REGEX',
+-    help='Exclude matching URLs')
+-ARGS.add_argument(
+-    '--strict', action='store_true',
+-    default=True, help='Strict host matching (default)')
+-ARGS.add_argument(
+-    '--lenient', action='store_false', dest='strict',
+-    default=False, help='Lenient host matching')
+-ARGS.add_argument(
+-    '-v', '--verbose', action='count', dest='level',
+-    default=1, help='Verbose logging (repeat for more verbose)')
+-ARGS.add_argument(
+-    '-q', '--quiet', action='store_const', const=0, dest='level',
+-    default=1, help='Quiet logging (opposite of --verbose)')
+-
+-
+-ESCAPES = [('quot', '"'),
+-           ('gt', '>'),
+-           ('lt', '<'),
+-           ('amp', '&')  # Must be last.
+-           ]
+-
+-
+-def unescape(url: str) -> str:
+-    """Turn & into &, and so on.
+-
+-    This is the inverse of cgi.escape().
+-    """
+-    for name, char in ESCAPES:
+-        url = url.replace('&' + name + ';', char)
+-    return url
+-
+-
+-def fix_url(url: str) -> str:
+-    """Prefix a schema-less URL with http://."""
+-    if '://' not in url:
+-        url = 'http://' + url
+-    return url
+-
+-
+-class Logger:
+-
+-    def __init__(self, level: int) -> None:
+-        self.level = level
+-
+-    def _log(self, n: int, args: Sequence[Any]) -> None:
+-        if self.level >= n:
+-            print(*args, file=sys.stderr, flush=True)
+-
+-    def log(self, n: int, *args: Any) -> None:
+-        self._log(n, args)
+-
+-    def __call__(self, n: int, *args: Any) -> None:
+-        self._log(n, args)
+-
+-
+-KeyTuple = Tuple[str, int, bool]
+-
+-
+-class ConnectionPool:
+-    """A connection pool.
+-
+-    To open a connection, use reserve().  To recycle it, use unreserve().
+-
+-    The pool is mostly just a mapping from (host, port, ssl) tuples to
+-    lists of Connections.  The currently active connections are *not*
+-    in the data structure; get_connection() takes the connection out,
+-    and recycle_connection() puts it back in.  To recycle a
+-    connection, call conn.close(recycle=True).
+-
+-    There are limits to both the overall pool and the per-key pool.
+-    """
+-
+-    def __init__(self, log: Logger, max_pool: int = 10, max_tasks: int = 5) -> None:
+-        self.log = log
+-        self.max_pool = max_pool  # Overall limit.
+-        self.max_tasks = max_tasks  # Per-key limit.
+-        self.loop = asyncio.get_event_loop()
+-        self.connections = {}  # type: Dict[KeyTuple, List[Connection]]
+-        self.queue = []  # type: List[Connection]
+-
+-    def close(self) -> None:
+-        """Close all connections available for reuse."""
+-        for conns in self.connections.values():
+-            for conn in conns:
+-                conn.close()
+-        self.connections.clear()
+-        self.queue.clear()
+-
+-    @asyncio.coroutine
+-    def get_connection(self, host: str, port: int, ssl: bool) -> Generator[Any, None, 'Connection']:
+-        """Create or reuse a connection."""
+-        port = port or (443 if ssl else 80)
+-        try:
+-            ipaddrs = yield from self.loop.getaddrinfo(host, port)
+-        except Exception as exc:
+-            self.log(0, 'Exception %r for (%r, %r)' % (exc, host, port))
+-            raise
+-        self.log(1, '* %s resolves to %s' %
+-                    (host, ', '.join(ip[4][0] for ip in ipaddrs)))
+-
+-        # Look for a reusable connection.
+-        for _1, _2, _3, _4, (h, p, *_5) in ipaddrs:
+-            key = h, p, ssl
+-            conn = None
+-            conns = self.connections.get(key)
+-            while conns:
+-                conn = conns.pop(0)
+-                self.queue.remove(conn)
+-                if not conns:
+-                    del self.connections[key]
+-                if conn.stale():
+-                    self.log(1, 'closing stale connection for', key)
+-                    conn.close()  # Just in case.
+-                else:
+-                    self.log(1, '* Reusing pooled connection', key,
+-                                'FD =', conn.fileno())
+-                    return conn
+-
+-        # Create a new connection.
+-        conn = Connection(self.log, self, host, port, ssl)
+-        yield from conn.connect()
+-        self.log(1, '* New connection', conn.key, 'FD =', conn.fileno())
+-        return conn
+-
+-    def recycle_connection(self, conn: 'Connection') -> None:
+-        """Make a connection available for reuse.
+-
+-        This also prunes the pool if it exceeds the size limits.
+-        """
+-        if conn.stale():
+-            conn.close()
+-            return
+-
+-        key = conn.key
+-        conns = self.connections.setdefault(key, [])
+-        conns.append(conn)
+-        self.queue.append(conn)
+-
+-        if len(conns) <= self.max_tasks and len(self.queue) <= self.max_pool:
+-            return
+-
+-        # Prune the queue.
+-
+-        # Close stale connections for this key first.
+-        stale = [conn for conn in conns if conn.stale()]
+-        if stale:
+-            for conn in stale:
+-                conns.remove(conn)
+-                self.queue.remove(conn)
+-                self.log(1, 'closing stale connection for', key)
+-                conn.close()
+-            if not conns:
+-                del self.connections[key]
+-
+-        # Close oldest connection(s) for this key if limit reached.
+-        while len(conns) > self.max_tasks:
+-            conn = conns.pop(0)
+-            self.queue.remove(conn)
+-            self.log(1, 'closing oldest connection for', key)
+-            conn.close()
+-
+-        if len(self.queue) <= self.max_pool:
+-            return
+-
+-        # Close overall stale connections.
+-        stale = [conn for conn in self.queue if conn.stale()]
+-        if stale:
+-            for conn in stale:
+-                conns = self.connections.get(conn.key)
+-                conns.remove(conn)
+-                self.queue.remove(conn)
+-                self.log(1, 'closing stale connection for', key)
+-                conn.close()
+-
+-        # Close oldest overall connection(s) if limit reached.
+-        while len(self.queue) > self.max_pool:
+-            conn = self.queue.pop(0)
+-            conns = self.connections.get(conn.key)
+-            c = conns.pop(0)
+-            assert conn == c, (conn.key, conn, c, conns)
+-            self.log(1, 'closing overall oldest connection for', conn.key)
+-            conn.close()
+-
+-
+-class Connection:
+-
+-    def __init__(self, log: Logger, pool: ConnectionPool, host: str, port: int, ssl: bool) -> None:
+-        self.log = log
+-        self.pool = pool
+-        self.host = host
+-        self.port = port
+-        self.ssl = ssl
+-        self.reader = None  # type: asyncio.StreamReader
+-        self.writer = None  # type: asyncio.StreamWriter
+-        self.key = None  # type: KeyTuple
+-
+-    def stale(self) -> bool:
+-        return self.reader is None or self.reader.at_eof()
+-
+-    def fileno(self) -> Optional[int]:
+-        writer = self.writer
+-        if writer is not None:
+-            transport = writer.transport
+-            if transport is not None:
+-                sock = transport.get_extra_info('socket')
+-                if sock is not None:
+-                    return sock.fileno()
+-        return None
+-
+-    @asyncio.coroutine
+-    def connect(self) -> Generator[Any, None, None]:
+-        self.reader, self.writer = yield from asyncio.open_connection(
+-            self.host, self.port, ssl=self.ssl)
+-        peername = self.writer.get_extra_info('peername')
+-        if peername:
+-            self.host, self.port = peername[:2]
+-        else:
+-            self.log(1, 'NO PEERNAME???', self.host, self.port, self.ssl)
+-        self.key = self.host, self.port, self.ssl
+-
+-    def close(self, recycle: bool = False) -> None:
+-        if recycle and not self.stale():
+-            self.pool.recycle_connection(self)
+-        else:
+-            self.writer.close()
+-            self.pool = self.reader = self.writer = None
+-
+-
+-class Request:
+-    """HTTP request.
+-
+-    Use connect() to open a connection; send_request() to send the
+-    request; get_response() to receive the response headers.
+-    """
+-
+-    def __init__(self, log: Logger, url: str, pool: ConnectionPool) -> None:
+-        self.log = log
+-        self.url = url
+-        self.pool = pool
+-        self.parts = urllib.parse.urlparse(self.url)
+-        self.scheme = self.parts.scheme
+-        assert self.scheme in ('http', 'https'), repr(url)
+-        self.ssl = self.parts.scheme == 'https'
+-        self.netloc = self.parts.netloc
+-        self.hostname = self.parts.hostname
+-        self.port = self.parts.port or (443 if self.ssl else 80)
+-        self.path = (self.parts.path or '/')
+-        self.query = self.parts.query
+-        if self.query:
+-            self.full_path = '%s?%s' % (self.path, self.query)
+-        else:
+-            self.full_path = self.path
+-        self.http_version = 'HTTP/1.1'
+-        self.method = 'GET'
+-        self.headers = []  # type: List[Tuple[str, str]]
+-        self.conn = None  # type: Connection
+-
+-    @asyncio.coroutine
+-    def connect(self) -> Generator[Any, None, None]:
+-        """Open a connection to the server."""
+-        self.log(1, '* Connecting to %s:%s using %s for %s' %
+-                    (self.hostname, self.port,
+-                     'ssl' if self.ssl else 'tcp',
+-                     self.url))
+-        self.conn = yield from self.pool.get_connection(self.hostname,
+-                                                        self.port, self.ssl)
+-
+-    def close(self, recycle: bool = False) -> None:
+-        """Close the connection, recycle if requested."""
+-        if self.conn is not None:
+-            if not recycle:
+-                self.log(1, 'closing connection for', self.conn.key)
+-            self.conn.close(recycle)
+-            self.conn = None
+-
+-    @asyncio.coroutine
+-    def putline(self, line: str) -> None:
+-        """Write a line to the connection.
+-
+-        Used for the request line and headers.
+-        """
+-        self.log(2, '>', line)
+-        self.conn.writer.write(line.encode('latin-1') + b'\r\n')
+-
+-    @asyncio.coroutine
+-    def send_request(self) -> Generator[Any, None, None]:
+-        """Send the request."""
+-        request_line = '%s %s %s' % (self.method, self.full_path,
+-                                     self.http_version)
+-        yield from self.putline(request_line)
+-        # TODO: What if a header is already set?
+-        self.headers.append(('User-Agent', 'asyncio-example-crawl/0.0'))
+-        self.headers.append(('Host', self.netloc))
+-        self.headers.append(('Accept', '*/*'))
+-        # self.headers.append(('Accept-Encoding', 'gzip'))
+-        for key, value in self.headers:
+-            line = '%s: %s' % (key, value)
+-            yield from self.putline(line)
+-        yield from self.putline('')
+-
+-    @asyncio.coroutine
+-    def get_response(self) -> Generator[Any, None, 'Response']:
+-        """Receive the response."""
+-        response = Response(self.log, self.conn.reader)
+-        yield from response.read_headers()
+-        return response
+-
+-
+-class Response:
+-    """HTTP response.
+-
+-    Call read_headers() to receive the request headers.  Then check
+-    the status attribute and call get_header() to inspect the headers.
+-    Finally call read() to receive the body.
+-    """
+-
+-    def __init__(self, log: Logger, reader: asyncio.StreamReader) -> None:
+-        self.log = log
+-        self.reader = reader
+-        self.http_version = None  # type: str  # 'HTTP/1.1'
+-        self.status = None  # type: int  # 200
+-        self.reason = None  # type: str  # 'Ok'
+-        self.headers = []  # type: List[Tuple[str, str]]  # [('Content-Type', 'text/html')]
+-
+-    @asyncio.coroutine
+-    def getline(self) -> Generator[Any, None, str]:
+-        """Read one line from the connection."""
+-        line = (yield from self.reader.readline()).decode('latin-1').rstrip()
+-        self.log(2, '<', line)
+-        return line
+-
+-    @asyncio.coroutine
+-    def read_headers(self) -> Generator[Any, None, None]:
+-        """Read the response status and the request headers."""
+-        status_line = yield from self.getline()
+-        status_parts = status_line.split(None, 2)
+-        if len(status_parts) != 3:
+-            self.log(0, 'bad status_line', repr(status_line))
+-            raise BadStatusLine(status_line)
+-        self.http_version, status, self.reason = status_parts
+-        self.status = int(status)
+-        while True:
+-            header_line = yield from self.getline()
+-            if not header_line:
+-                break
+-            # TODO: Continuation lines.
+-            key, value = header_line.split(':', 1)
+-            self.headers.append((key, value.strip()))
+-
+-    def get_redirect_url(self, default: str = '') -> str:
+-        """Inspect the status and return the redirect url if appropriate."""
+-        if self.status not in (300, 301, 302, 303, 307):
+-            return default
+-        return self.get_header('Location', default)
+-
+-    def get_header(self, key: str, default: str = '') -> str:
+-        """Get one header value, using a case insensitive header name."""
+-        key = key.lower()
+-        for k, v in self.headers:
+-            if k.lower() == key:
+-                return v
+-        return default
+-
+-    @asyncio.coroutine
+-    def read(self) -> Generator[Any, None, bytes]:
+-        """Read the response body.
+-
+-        This honors Content-Length and Transfer-Encoding: chunked.
+-        """
+-        nbytes = None
+-        for key, value in self.headers:
+-            if key.lower() == 'content-length':
+-                nbytes = int(value)
+-                break
+-        if nbytes is None:
+-            if self.get_header('transfer-encoding').lower() == 'chunked':
+-                self.log(2, 'parsing chunked response')
+-                blocks = []
+-                while True:
+-                    size_header = yield from self.reader.readline()
+-                    if not size_header:
+-                        self.log(0, 'premature end of chunked response')
+-                        break
+-                    self.log(3, 'size_header =', repr(size_header))
+-                    parts = size_header.split(b';')
+-                    size = int(parts[0], 16)
+-                    if size:
+-                        self.log(3, 'reading chunk of', size, 'bytes')
+-                        block = yield from self.reader.readexactly(size)
+-                        assert len(block) == size, (len(block), size)
+-                        blocks.append(block)
+-                    crlf = yield from self.reader.readline()
+-                    assert crlf == b'\r\n', repr(crlf)
+-                    if not size:
+-                        break
+-                body = b''.join(blocks)
+-                self.log(1, 'chunked response had', len(body),
+-                            'bytes in', len(blocks), 'blocks')
+-            else:
+-                self.log(3, 'reading until EOF')
+-                body = yield from self.reader.read()
+-                # TODO: Should make sure not to recycle the connection
+-                # in this case.
+-        else:
+-            body = yield from self.reader.readexactly(nbytes)
+-        return body
+-
+-
+-class Fetcher:
+-    """Logic and state for one URL.
+-
+-    When found in crawler.busy, this represents a URL to be fetched or
+-    in the process of being fetched; when found in crawler.done, this
+-    holds the results from fetching it.
+-
+-    This is usually associated with a task.  This references the
+-    crawler for the connection pool and to add more URLs to its todo
+-    list.
+-
+-    Call fetch() to do the fetching, then report() to print the results.
+-    """
+-
+-    def __init__(self, log: Logger, url: str, crawler: 'Crawler',
+-                 max_redirect: int = 10, max_tries: int = 4) -> None:
+-        self.log = log
+-        self.url = url
+-        self.crawler = crawler
+-        # We don't loop resolving redirects here -- we just use this
+-        # to decide whether to add the redirect URL to crawler.todo.
+-        self.max_redirect = max_redirect
+-        # But we do loop to retry on errors a few times.
+-        self.max_tries = max_tries
+-        # Everything we collect from the response goes here.
+-        self.task = None  # type: asyncio.Task
+-        self.exceptions = []  # type: List[Exception]
+-        self.tries = 0
+-        self.request = None  # type: Request
+-        self.response = None  # type: Response
+-        self.body = None  # type: bytes
+-        self.next_url = None  # type: str
+-        self.ctype = None  # type: str
+-        self.pdict = None  # type: Dict[str, str]
+-        self.encoding = None  # type: str
+-        self.urls = None  # type: Set[str]
+-        self.new_urls = None  # type: Set[str]
+-
+-    @asyncio.coroutine
+-    def fetch(self) -> Generator[Any, None, None]:
+-        """Attempt to fetch the contents of the URL.
+-
+-        If successful, and the data is HTML, extract further links and
+-        add them to the crawler.  Redirects are also added back there.
+-        """
+-        while self.tries < self.max_tries:
+-            self.tries += 1
+-            self.request = None
+-            try:
+-                self.request = Request(self.log, self.url, self.crawler.pool)
+-                yield from self.request.connect()
+-                yield from self.request.send_request()
+-                self.response = yield from self.request.get_response()
+-                self.body = yield from self.response.read()
+-                h_conn = self.response.get_header('connection').lower()
+-                if h_conn != 'close':
+-                    self.request.close(recycle=True)
+-                    self.request = None
+-                if self.tries > 1:
+-                    self.log(1, 'try', self.tries, 'for', self.url, 'success')
+-                break
+-            except (BadStatusLine, OSError) as exc:
+-                self.exceptions.append(exc)
+-                self.log(1, 'try', self.tries, 'for', self.url,
+-                            'raised', repr(exc))
+-                # import pdb; pdb.set_trace()
+-                # Don't reuse the connection in this case.
+-            finally:
+-                if self.request is not None:
+-                    self.request.close()
+-        else:
+-            # We never broke out of the while loop, i.e. all tries failed.
+-            self.log(0, 'no success for', self.url,
+-                        'in', self.max_tries, 'tries')
+-            return
+-        next_url = self.response.get_redirect_url()
+-        if next_url:
+-            self.next_url = urllib.parse.urljoin(self.url, next_url)
+-            if self.max_redirect > 0:
+-                self.log(1, 'redirect to', self.next_url, 'from', self.url)
+-                self.crawler.add_url(self.next_url, self.max_redirect - 1)
+-            else:
+-                self.log(0, 'redirect limit reached for', self.next_url,
+-                            'from', self.url)
+-        else:
+-            if self.response.status == 200:
+-                self.ctype = self.response.get_header('content-type')
+-                self.pdict = {}
+-                if self.ctype:
+-                    self.ctype, self.pdict = cgi.parse_header(self.ctype)
+-                self.encoding = self.pdict.get('charset', 'utf-8')
+-                if self.ctype == 'text/html':
+-                    body = self.body.decode(self.encoding, 'replace')
+-                    # Replace href with (?:href|src) to follow image links.
+-                    self.urls = set(re.findall(r'(?i)href=["\']?([^\s"\'<>]+)',
+-                                               body))
+-                    if self.urls:
+-                        self.log(1, 'got', len(self.urls),
+-                                    'distinct urls from', self.url)
+-                    self.new_urls = set()
+-                    for url in self.urls:
+-                        url = unescape(url)
+-                        url = urllib.parse.urljoin(self.url, url)
+-                        url, frag = urllib.parse.urldefrag(url)
+-                        if self.crawler.add_url(url):
+-                            self.new_urls.add(url)
+-
+-    def report(self, stats: 'Stats', file: IO[str] = None) -> None:
+-        """Print a report on the state for this URL.
+-
+-        Also update the Stats instance.
+-        """
+-        if self.task is not None:
+-            if not self.task.done():
+-                stats.add('pending')
+-                print(self.url, 'pending', file=file)
+-                return
+-            elif self.task.cancelled():
+-                stats.add('cancelled')
+-                print(self.url, 'cancelled', file=file)
+-                return
+-            elif self.task.exception():
+-                stats.add('exception')
+-                exc = self.task.exception()
+-                stats.add('exception_' + exc.__class__.__name__)
+-                print(self.url, exc, file=file)
+-                return
+-        if len(self.exceptions) == self.tries:
+-            stats.add('fail')
+-            exc = self.exceptions[-1]
+-            stats.add('fail_' + str(exc.__class__.__name__))
+-            print(self.url, 'error', exc, file=file)
+-        elif self.next_url:
+-            stats.add('redirect')
+-            print(self.url, self.response.status, 'redirect', self.next_url,
+-                  file=file)
+-        elif self.ctype == 'text/html':
+-            stats.add('html')
+-            size = len(self.body or b'')
+-            stats.add('html_bytes', size)
+-            if self.log.level:
+-                print(self.url, self.response.status,
+-                      self.ctype, self.encoding,
+-                      size,
+-                      '%d/%d' % (len(self.new_urls or ()), len(self.urls or ())),
+-                      file=file)
+-        elif self.response is None:
+-            print(self.url, 'no response object')
+-        else:
+-            size = len(self.body or b'')
+-            if self.response.status == 200:
+-                stats.add('other')
+-                stats.add('other_bytes', size)
+-            else:
+-                stats.add('error')
+-                stats.add('error_bytes', size)
+-                stats.add('status_%s' % self.response.status)
+-            print(self.url, self.response.status,
+-                  self.ctype, self.encoding,
+-                  size,
+-                  file=file)
+-
+-
+-class Stats:
+-    """Record stats of various sorts."""
+-
+-    def __init__(self) -> None:
+-        self.stats = {}  # type: Dict[str, int]
+-
+-    def add(self, key: str, count: int = 1) -> None:
+-        self.stats[key] = self.stats.get(key, 0) + count
+-
+-    def report(self, file: IO[str] = None) -> None:
+-        for key, count in sorted(self.stats.items()):
+-            print('%10d' % count, key, file=file)
+-
+-
+-class Crawler:
+-    """Crawl a set of URLs.
+-
+-    This manages three disjoint sets of URLs (todo, busy, done).  The
+-    data structures actually store dicts -- the values in todo give
+-    the redirect limit, while the values in busy and done are Fetcher
+-    instances.
+-    """
+-    def __init__(self, log: Logger,
+-                 roots: Set[str], exclude: str = None, strict: bool = True,  # What to crawl.
+-                 max_redirect: int = 10, max_tries: int = 4,  # Per-url limits.
+-                 max_tasks: int = 10, max_pool: int = 10,  # Global limits.
+-                 ) -> None:
+-        self.log = log
+-        self.roots = roots
+-        self.exclude = exclude
+-        self.strict = strict
+-        self.max_redirect = max_redirect
+-        self.max_tries = max_tries
+-        self.max_tasks = max_tasks
+-        self.max_pool = max_pool
+-        self.todo = {}  # type: Dict[str, int]
+-        self.busy = {}  # type: Dict[str, Fetcher]
+-        self.done = {}  # type: Dict[str, Fetcher]
+-        self.pool = ConnectionPool(self.log, max_pool, max_tasks)
+-        self.root_domains = set()  # type: Set[str]
+-        for root in roots:
+-            host = urllib.parse.urlparse(root).hostname
+-            if not host:
+-                continue
+-            if re.match(r'\A[\d\.]*\Z', host):
+-                self.root_domains.add(host)
+-            else:
+-                host = host.lower()
+-                if self.strict:
+-                    self.root_domains.add(host)
+-                    if host.startswith('www.'):
+-                        self.root_domains.add(host[4:])
+-                    else:
+-                        self.root_domains.add('www.' + host)
+-                else:
+-                    parts = host.split('.')
+-                    if len(parts) > 2:
+-                        host = '.'.join(parts[-2:])
+-                    self.root_domains.add(host)
+-        for root in roots:
+-            self.add_url(root)
+-        self.governor = asyncio.Semaphore(max_tasks)
+-        self.termination = asyncio.Condition()
+-        self.t0 = time.time()
+-        self.t1 = None  # type: Optional[float]
+-
+-    def close(self) -> None:
+-        """Close resources (currently only the pool)."""
+-        self.pool.close()
+-
+-    def host_okay(self, host: str) -> bool:
+-        """Check if a host should be crawled.
+-
+-        A literal match (after lowercasing) is always good.  For hosts
+-        that don't look like IP addresses, some approximate matches
+-        are okay depending on the strict flag.
+-        """
+-        host = host.lower()
+-        if host in self.root_domains:
+-            return True
+-        if re.match(r'\A[\d\.]*\Z', host):
+-            return False
+-        if self.strict:
+-            return self._host_okay_strictish(host)
+-        else:
+-            return self._host_okay_lenient(host)
+-
+-    def _host_okay_strictish(self, host: str) -> bool:
+-        """Check if a host should be crawled, strict-ish version.
+-
+-        This checks for equality modulo an initial 'www.' component.
+-         """
+-        if host.startswith('www.'):
+-            if host[4:] in self.root_domains:
+-                return True
+-        else:
+-            if 'www.' + host in self.root_domains:
+-                return True
+-        return False
+-
+-    def _host_okay_lenient(self, host: str) -> bool:
+-        """Check if a host should be crawled, lenient version.
+-
+-        This compares the last two components of the host.
+-        """
+-        parts = host.split('.')
+-        if len(parts) > 2:
+-            host = '.'.join(parts[-2:])
+-        return host in self.root_domains
+-
+-    def add_url(self, url: str, max_redirect: int = None) -> bool:
+-        """Add a URL to the todo list if not seen before."""
+-        if self.exclude and re.search(self.exclude, url):
+-            return False
+-        parsed = urllib.parse.urlparse(url)
+-        if parsed.scheme not in ('http', 'https'):
+-            self.log(2, 'skipping non-http scheme in', url)
+-            return False
+-        host = parsed.hostname
+-        if not self.host_okay(host):
+-            self.log(2, 'skipping non-root host in', url)
+-            return False
+-        if max_redirect is None:
+-            max_redirect = self.max_redirect
+-        if url in self.todo or url in self.busy or url in self.done:
+-            return False
+-        self.log(1, 'adding', url, max_redirect)
+-        self.todo[url] = max_redirect
+-        return True
+-
+-    @asyncio.coroutine
+-    def crawl(self) -> Generator[Any, None, None]:
+-        """Run the crawler until all finished."""
+-        with (yield from self.termination):
+-            while self.todo or self.busy:
+-                if self.todo:
+-                    url, max_redirect = self.todo.popitem()
+-                    fetcher = Fetcher(self.log, url,
+-                                      crawler=self,
+-                                      max_redirect=max_redirect,
+-                                      max_tries=self.max_tries,
+-                                      )
+-                    self.busy[url] = fetcher
+-                    fetcher.task = asyncio.Task(self.fetch(fetcher))
+-                else:
+-                    yield from self.termination.wait()
+-        self.t1 = time.time()
+-
+-    @asyncio.coroutine
+-    def fetch(self, fetcher: Fetcher) -> Generator[Any, None, None]:
+-        """Call the Fetcher's fetch(), with a limit on concurrency.
+-
+-        Once this returns, move the fetcher from busy to done.
+-        """
+-        url = fetcher.url
+-        with (yield from self.governor):
+-            try:
+-                yield from fetcher.fetch()  # Fetcher gonna fetch.
+-            finally:
+-                # Force GC of the task, so the error is logged.
+-                fetcher.task = None
+-        with (yield from self.termination):
+-            self.done[url] = fetcher
+-            del self.busy[url]
+-            self.termination.notify()
+-
+-    def report(self, file: IO[str] = None) -> None:
+-        """Print a report on all completed URLs."""
+-        if self.t1 is None:
+-            self.t1 = time.time()
+-        dt = self.t1 - self.t0
+-        if dt and self.max_tasks:
+-            speed = len(self.done) / dt / self.max_tasks
+-        else:
+-            speed = 0
+-        stats = Stats()
+-        print('*** Report ***', file=file)
+-        try:
+-            show = []  # type: List[Tuple[str, Fetcher]]
+-            show.extend(self.done.items())
+-            show.extend(self.busy.items())
+-            show.sort()
+-            for url, fetcher in show:
+-                fetcher.report(stats, file=file)
+-        except KeyboardInterrupt:
+-            print('\nInterrupted', file=file)
+-        print('Finished', len(self.done),
+-              'urls in %.3f secs' % dt,
+-              '(max_tasks=%d)' % self.max_tasks,
+-              '(%.3f urls/sec/task)' % speed,
+-              file=file)
+-        stats.report(file=file)
+-        print('Todo:', len(self.todo), file=file)
+-        print('Busy:', len(self.busy), file=file)
+-        print('Done:', len(self.done), file=file)
+-        print('Date:', time.ctime(), 'local time', file=file)
+-
+-
+-def main() -> None:
+-    """Main program.
+-
+-    Parse arguments, set up event loop, run crawler, print report.
+-    """
+-    args = ARGS.parse_args()
+-    if not args.roots:
+-        print('Use --help for command line help')
+-        return
+-
+-    log = Logger(args.level)
+-
+-    if args.iocp:
+-        if sys.platform == 'win32':
+-            from asyncio import ProactorEventLoop
+-            loop = ProactorEventLoop()  # type: ignore
+-            asyncio.set_event_loop(loop)
+-        else:
+-            assert False
+-    elif args.select:
+-        loop = asyncio.SelectorEventLoop()  # type: ignore
+-        asyncio.set_event_loop(loop)
+-    else:
+-        loop = asyncio.get_event_loop()  # type: ignore
+-
+-    roots = {fix_url(root) for root in args.roots}
+-
+-    crawler = Crawler(log,
+-                      roots, exclude=args.exclude,
+-                      strict=args.strict,
+-                      max_redirect=args.max_redirect,
+-                      max_tries=args.max_tries,
+-                      max_tasks=args.max_tasks,
+-                      max_pool=args.max_pool,
+-                      )
+-    try:
+-        loop.run_until_complete(crawler.crawl())  # Crawler gonna crawl.
+-    except KeyboardInterrupt:
+-        sys.stderr.flush()
+-        print('\nInterrupted\n')
+-    finally:
+-        crawler.report()
+-        crawler.close()
+-        loop.close()
+-
+-
+-if __name__ == '__main__':
+-    logging.basicConfig(level=logging.INFO) # type: ignore
+-    main()


=====================================
debian/patches/series
=====================================
@@ -3,3 +3,4 @@ older_pytest
 mypyc_analysis
 ignore_mypyc
 verbose
+015190f1fb2ce024c1ae022a7665c382b27167bf.patch


=====================================
debian/rules
=====================================
@@ -75,8 +75,7 @@ ifeq (,$(filter nocheck,$(DEB_BUILD_PROFILES)))
 	# 	--config-file {dir}/mypy_self_check.ini -p mypy" dh_auto_test
 	# ^^^ requires pytest > 6.0.0
 	dh_auto_install
-	#set -e; for v in $(PY3VERS); do 
-	set -e; for v in 3.8; do \
+	set -e; for v in $(PY3VERS); do \
 		PATH=$$PATH:$(CURDIR)/debian/mypy/usr/bin/ python$$v -m pytest -n auto \
 		-o testpaths=mypy/test -o python_files=test*.py \
 		-k "not StubtestMiscUnit" \


=====================================
debian/watch
=====================================
@@ -1,3 +1,3 @@
-version=3
+version=4
 opts=uversionmangle=s/(rc|a|b|c)/~$1/ \
 https://pypi.debian.net/mypy/mypy-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))



View it on GitLab: https://salsa.debian.org/med-team/mypy/-/compare/af00356f4b834627dc9e8604f9df61fb6b2ae5c1...96cfb5a427333ff3c4d70113f063916ac2af829d

-- 
View it on GitLab: https://salsa.debian.org/med-team/mypy/-/compare/af00356f4b834627dc9e8604f9df61fb6b2ae5c1...96cfb5a427333ff3c4d70113f063916ac2af829d
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201208/c1f2c434/attachment-0001.html>


More information about the debian-med-commit mailing list