[Qa-jenkins-scm] [jenkins.debian.net] 01/01: reproducible debian: switch python database backend to SQLAlchemy

Holger Levsen holger at layer-acht.org
Fri Aug 19 08:41:40 UTC 2016


This is an automated email from the git hooks/post-receive script.

holger pushed a commit to branch master
in repository jenkins.debian.net.

commit b1b64d98e5971437abb3fe793f6fb58eab1ef4fd
Author: Valerie R Young <spectranaut at riseup.net>
Date:   Thu Aug 11 09:17:07 2016 -0400

    reproducible debian: switch python database backend to SQLAlchemy
    
    Signed-off-by: Holger Levsen <holger at layer-acht.org>
---
 bin/reproducible_common.py           |  48 ++++++++++++---
 bin/reproducible_notes.py            |  87 ++++++++++++++++----------
 bin/reproducible_remote_scheduler.py |  62 ++++++++++++++-----
 bin/reproducible_scheduler.py        | 116 ++++++++++++++++++++---------------
 bin/reproducible_setup_notify.py     |  15 ++---
 update_jdn.sh                        |   5 +-
 6 files changed, 221 insertions(+), 112 deletions(-)

diff --git a/bin/reproducible_common.py b/bin/reproducible_common.py
index eeb4ffb..dcbe05e 100755
--- a/bin/reproducible_common.py
+++ b/bin/reproducible_common.py
@@ -29,6 +29,7 @@ from traceback import print_exception
 from subprocess import call, check_call
 from tempfile import NamedTemporaryFile
 from datetime import datetime, timedelta
+from sqlalchemy import MetaData, Table, sql, create_engine
 
 DEBUG = False
 QUIET = False
@@ -92,6 +93,11 @@ with open(os.path.join(BIN_PATH, './meta_pkgset.csv'), newline='') as f:
     for line in csv.reader(f):
         META_PKGSET[int(line[0])] = (line[2], line[1])
 
+# init the database data and connection
+DB_ENGINE = create_engine("sqlite:///" + REPRODUCIBLE_DB)
+DB_METADATA = MetaData(DB_ENGINE)  # Get all table definitions
+conn_db = DB_ENGINE.connect()  # the local sqlite3 reproducible db
+
 parser = argparse.ArgumentParser()
 group = parser.add_mutually_exclusive_group()
 group.add_argument("-d", "--debug", action="store_true")
@@ -349,18 +355,46 @@ def write_html_page(title, body, destfile, no_header=False, style_note=False,
     with open(destfile, 'w', encoding='UTF-8') as fd:
         fd.write(html)
 
-def start_db_connection():
-    return sqlite3.connect(REPRODUCIBLE_DB, timeout=60)
+
+def db_table(table_name):
+    """Returns a SQLAlchemy Table objects to be used in queries
+    using SQLAlchemy's Expressive Language.
+
+    Arguments:
+        table_name: a string corrosponding to an existing table name
+    """
+    try:
+        return Table(table_name, DB_METADATA, autoload=True)
+    except sqlalchemy.exc.NoSuchTableError:
+        log.error("Table %s does not exist or schema for %s could not be loaded",
+                  table_name, REPRODUCIBLE_DB)
+        raise
+
 
 def query_db(query):
-    cursor = conn_db.cursor()
+    """Excutes a raw SQL query. Return depends on query type.
+
+    Returns:
+        select:
+            list of tuples
+        update or delete:
+            the number of rows affected
+        insert:
+            None
+    """
     try:
-        cursor.execute(query)
+        result = conn_db.execute(query)
     except:
         print_critical_message('Error executing this query:\n' + query)
         raise
-    conn_db.commit()
-    return cursor.fetchall()
+
+    if result.returns_rows:
+        return result.fetchall()
+    elif result.supports_sane_rowcount() and result.rowcount > -1:
+        return result.rowcount
+    else:
+        return None
+
 
 def start_udd_connection():
     username = "public-udd-mirror"
@@ -797,8 +831,6 @@ class Package:
             return False
 
 
-# init the databases connections
-conn_db = start_db_connection()  # the local sqlite3 reproducible db
 # get_bugs() is the only user of this, let it initialize the connection itself,
 # during it's first call to speed up things when unneeded
 # also "share" the bugs, to avoid collecting them multiple times per run
diff --git a/bin/reproducible_notes.py b/bin/reproducible_notes.py
index 85c32ce..5528d93 100755
--- a/bin/reproducible_notes.py
+++ b/bin/reproducible_notes.py
@@ -8,12 +8,14 @@
 #
 # Import the content of the notes.git repository into the reproducible database
 
+from reproducible_common import *
+
+import os
 import apt
-import json
 import yaml
+import json
+from sqlalchemy import sql
 from apt_pkg import version_compare
-from reproducible_common import *
-import os
 
 NOTES = 'packages.yml'
 ISSUES = 'issues.yml'
@@ -100,53 +102,74 @@ def load_issues():
 
 
 def store_issues():
-    query = 'REPLACE INTO issues (name, url, description) ' + \
-            'VALUES (?, ?, ?)'
-    cursor = conn_db.cursor()
-    to_add = []
-    for issue in sorted(issues):
-        name = issue
+    issues_table = db_table('issues')
+    # Get existing issues
+    results = conn_db.execute(sql.select([issues_table.c.name]))
+    existing_issues = set([row[0] for row in results])
+    to_insert = []
+    to_update = []
+    for name in issues:
         url = issues[name]['url'] if 'url' in issues[name] else ''
         desc = issues[name]['description']
-        to_add.append((name, url, desc))
-    cursor.executemany(query, to_add)
-    conn_db.commit()
-    log.debug('Issues saved in the database')
+        if name in existing_issues:
+            to_update.append({
+                'issuename': name,
+                'url': url,
+                'description': desc
+            })
+            # remove this package from the set, to know who to delete later
+            existing_issues.remove(name)
+        else:
+            to_insert.append({
+                'name': name,
+                'url': url,
+                'description': desc
+            })
 
+    if to_update:
+        update_query = issues_table.update().\
+                  where(issues_table.c.name == sql.bindparam('issuename'))
+        conn_db.execute(update_query, to_update)
+        log.debug('Issues updated in the database')
+    if to_insert:
+        conn_db.execute(issues_table.insert(), to_insert)
+        log.debug('Issues added to the database')
 
-def drop_old_issues():
-    old = [x[0] for x in query_db('SELECT name FROM issues')]
-    to_drop = [x for x in old if x not in issues]
-    if to_drop:
-        log.info("I'm about to remove the following issues: " + str(to_drop))
-    for issue in to_drop:
-        query_db('DELETE FROM issues WHERE name="{}"'.format(issue))
+    # if there are any existing issues left, delete them.
+    if existing_issues:
+        to_delete = [{'issuename': name} for name in existing_issues]
+        delete_query = issues_table.delete().\
+                  where(issues_table.c.name == sql.bindparam('issuename'))
+        conn_db.execute(delete_query, to_delete)
+        log.info("Removed the following issues: " + str(existing_issues))
 
 
 def store_notes():
     log.debug('Removing all notes')
-    query_db('DELETE FROM notes')
-    query = 'REPLACE INTO notes ' + \
-            '(package_id, version, issues, bugs, comments) ' + \
-            'VALUES (?, ?, ?, ?, ?)'
-    to_add = []
+    notes_table = db_table('notes')
+    conn_db.execute(notes_table.delete())
+    to_insert = []
     for entry in [x for y in sorted(notes) for x in notes[y]]:
         pkg_id = entry['id']
         pkg_version = entry['version']
         pkg_issues = json.dumps(entry['issues'])
         pkg_bugs = json.dumps(entry['bugs'])
         pkg_comments = entry['comments']
-        pkg = (pkg_id, pkg_version, pkg_issues, pkg_bugs, pkg_comments)
-        to_add.append(pkg)
-    cursor = conn_db.cursor()
-    cursor.executemany(query, to_add)
-    conn_db.commit()
-    log.info('Saved ' + str(len(to_add)) + ' notes in the database')
+        to_insert.append({
+            'id': pkg_id,
+            'version': pkg_version,
+            'issues': pkg_issues,
+            'bugs': pkg_bugs,
+            'comments': pkg_comments
+        })
+
+    if (len(to_insert)):
+        conn_db.execute(notes_table.insert(), to_insert)
+        log.info('Saved ' + str(len(to_insert)) + ' notes in the database')
 
 
 if __name__ == '__main__':
     notes = load_notes()
     issues = load_issues()
     store_issues()
-    drop_old_issues()
     store_notes()
diff --git a/bin/reproducible_remote_scheduler.py b/bin/reproducible_remote_scheduler.py
index a7179f6..7f846c8 100755
--- a/bin/reproducible_remote_scheduler.py
+++ b/bin/reproducible_remote_scheduler.py
@@ -11,7 +11,7 @@
 import sys
 import time
 import argparse
-
+from sqlalchemy import sql
 
 parser = argparse.ArgumentParser(
     description='Reschedule packages to re-test their reproducibility',
@@ -229,7 +229,8 @@ if amount + len(ids) > 200 and not local:
 
 
 # do the actual scheduling
-to_schedule = []
+add_to_schedule = []
+update_schedule = []
 save_schedule = []
 artifacts_value = 1 if artifacts else 0
 if notify_on_start:
@@ -238,23 +239,54 @@ elif notify or artifacts:
     do_notify = 1
 else:
     do_notify = 0
+
+schedule_table = db_table('schedule')
+existing_pkg_ids = dict(query_db(sql.select([
+        schedule_table.c.package_id,
+        schedule_table.c.id,
+    ]).where(schedule_table.c.package_id.in_(ids))))
+
 for id in ids:
-    to_schedule.append((id, date, artifacts_value, str(do_notify), requester,
-                        None))
-    save_schedule.append((id, requester, epoch))
-log.debug('Packages about to be scheduled: ' + str(to_schedule))
+    if id in existing_pkg_ids:
+        update_schedule.append({
+            'update_id': existing_pkg_ids[id],
+            'package_id': id,
+            'date_scheduled': date,
+            'save_artifacts': artifacts_value,
+            'notify': str(do_notify),
+            'scheduler': requester,
+        })
+    else:
+        add_to_schedule.append({
+            'package_id': id,
+            'date_scheduled': date,
+            'save_artifacts': artifacts_value,
+            'notify': str(do_notify),
+            'scheduler': requester,
+        })
+
+    save_schedule.append({
+        'package_id': id,
+        'requester': requester,
+        'date_request': epoch,
+    })
+
+log.debug('Packages about to be scheduled: ' + str(add_to_schedule)
+          + str(update_schedule))
 
-query1 = '''REPLACE INTO schedule
-    (package_id, date_scheduled, save_artifacts, notify, scheduler, message)
-    VALUES (?, ?, ?, ?, ?, ?)'''
-query2 = '''INSERT INTO manual_scheduler
-    (package_id, requester, date_request) VALUES (?, ?, ?)'''
+update_schedule_query = schedule_table.update().\
+                        where(schedule_table.c.id == sql.bindparam('update_id'))
+insert_schedule_query = schedule_table.insert()
+insert_manual_query = db_table('manual_scheduler').insert()
 
 if not dry_run:
-    cursor = conn_db.cursor()
-    cursor.executemany(query1, to_schedule)
-    cursor.executemany(query2, save_schedule)
-    conn_db.commit()
+    transaction = conn_db.begin()
+    if len(add_to_schedule):
+        conn_db.execute(insert_schedule_query, add_to_schedule)
+    if len(update_schedule):
+        conn_db.execute(update_schedule_query, update_schedule)
+    conn_db.execute(insert_manual_query, save_schedule)
+    transaction.commit()
 else:
     log.info('Ran with --dry-run, scheduled nothing')
 
diff --git a/bin/reproducible_scheduler.py b/bin/reproducible_scheduler.py
index 557c002..29769d6 100755
--- a/bin/reproducible_scheduler.py
+++ b/bin/reproducible_scheduler.py
@@ -18,6 +18,7 @@ import random
 from subprocess import call
 from apt_pkg import version_compare
 from urllib.request import urlopen
+from sqlalchemy import sql
 
 from reproducible_common import *
 from reproducible_html_live_status import generate_schedule
@@ -228,8 +229,8 @@ def update_sources_db(suite, arch, sources):
     pkgs_to_add = []
     updated_pkgs = []
     different_pkgs = [x for x in new_pkgs if x not in cur_pkgs]
-    log.debug('Packages different in the archive and in the db: ' +
-              str(different_pkgs))
+    log.debug('Packages different in the archive and in the db: %s',
+              different_pkgs)
     for pkg in different_pkgs:
         # pkg: (name, version, suite, arch)
         query = 'SELECT id, version, notify_maintainer FROM sources ' + \
@@ -238,7 +239,12 @@ def update_sources_db(suite, arch, sources):
         try:
             result = query_db(query)[0]
         except IndexError:  # new package
-            pkgs_to_add.append(pkg)
+            pkgs_to_add.append({
+                'name': pkg[0],
+                'version': pkg[1],
+                'suite': pkg[2],
+                'architecture': pkg[3],
+            })
             continue
         pkg_id = result[0]
         old_version = result[1]
@@ -246,53 +252,71 @@ def update_sources_db(suite, arch, sources):
         if version_compare(pkg[1], old_version) > 0:
             log.debug('New version: ' + str(pkg) + ' (we had  ' +
                       old_version + ')')
-            updated_pkgs.append(
-                (pkg_id, pkg[0], pkg[1], pkg[2], pkg[3], notify_maint))
+            updated_pkgs.append({
+                'update_id': pkg_id,
+                'name': pkg[0],
+                'version': pkg[1],
+                'suite': pkg[2],
+                'architecture': pkg[3],
+                'notify_maintainer': notify_maint,
+            })
     # Now actually update the database:
-    cursor = conn_db.cursor()
+    sources_table = db_table('sources')
     # updated packages
     log.info('Pushing ' + str(len(updated_pkgs)) +
              ' updated packages to the database...')
-    cursor.executemany(
-        'REPLACE INTO sources ' +
-        '(id, name, version, suite, architecture, notify_maintainer) ' +
-        'VALUES (?, ?, ?, ?, ?, ?)',
-        updated_pkgs)
-    conn_db.commit()
+    if updated_pkgs:
+        transaction = conn_db.begin()
+        update_query = sources_table.update().\
+                       where(sources_table.c.id == sql.bindparam('update_id'))
+        conn_db.execute(update_query, updated_pkgs)
+        transaction.commit()
+
     # new packages
-    log.info('Now inserting ' + str(len(pkgs_to_add)) +
-             ' new sources in the database: ' +
-             str(pkgs_to_add))
-    cursor.executemany('INSERT INTO sources ' +
-                       '(name, version, suite, architecture) ' +
-                       'VALUES (?, ?, ?, ?)', pkgs_to_add)
-    conn_db.commit()
+    if pkgs_to_add:
+        log.info('Now inserting %i new sources in the database: %s',
+                 len(pkgs_to_add), pkgs_to_add)
+        transaction = conn_db.begin()
+        conn_db.execute(sources_table.insert(), pkgs_to_add)
+        transaction.commit()
+
     # RM'ed packages
     cur_pkgs_name = [x[0] for x in cur_pkgs]
     new_pkgs_name = [x[0] for x in new_pkgs]
     rmed_pkgs = [x for x in cur_pkgs_name if x not in new_pkgs_name]
-    log.info('Now deleting ' + str(len(rmed_pkgs)) +
-             ' removed packages: ' + str(rmed_pkgs))
+    log.info('Now deleting %i removed packages: %s', len(rmed_pkgs),
+             rmed_pkgs)
     rmed_pkgs_id = []
     pkgs_to_rm = []
     query = 'SELECT id FROM sources WHERE name="{}" AND suite="{}" ' + \
             'AND architecture="{}"'
     for pkg in rmed_pkgs:
         result = query_db(query.format(pkg, suite, arch))
-        rmed_pkgs_id.extend(result)
-        pkgs_to_rm.append((pkg, suite, arch))
-    log.debug('removed packages ID: ' + str([str(x[0]) for x in rmed_pkgs_id]))
-    log.debug('removed packages: ' + str(pkgs_to_rm))
-    cursor.executemany('DELETE FROM sources '
-                       'WHERE id=?', rmed_pkgs_id)
-    cursor.executemany('DELETE FROM results '
-                       'WHERE package_id=?', rmed_pkgs_id)
-    cursor.executemany('DELETE FROM schedule '
-                       'WHERE package_id=?', rmed_pkgs_id)
-    cursor.executemany('INSERT INTO removed_packages '
-                       '(name, suite, architecture) '
-                       'VALUES (?, ?, ?)', pkgs_to_rm)
-    conn_db.commit()
+        rmed_pkgs_id.append({'deleteid': result[0][0]})
+        pkgs_to_rm.append({'name': pkg, 'suite': suite, 'architecture': arch})
+    log.debug('removed packages ID: %s',
+              [str(x['deleteid']) for x in rmed_pkgs_id])
+    log.debug('removed packages: %s', pkgs_to_rm)
+
+    if rmed_pkgs_id:
+        transaction = conn_db.begin()
+        results_table = db_table('results')
+        schedule_table = db_table('schedule')
+        removed_packages_table = db_table('removed_packages')
+
+        delete_sources_query = sources_table.delete().\
+            where(sources_table.c.id == sql.bindparam('deleteid'))
+        delete_results_query = results_table.delete().\
+            where(results_table.c.package_id == sql.bindparam('deleteid'))
+        delete_schedule_query = schedule_table.delete().\
+            where(schedule_table.c.package_id == sql.bindparam('deleteid'))
+
+        conn_db.execute(delete_sources_query, rmed_pkgs_id)
+        conn_db.execute(delete_results_query, rmed_pkgs_id)
+        conn_db.execute(delete_schedule_query, rmed_pkgs_id)
+        conn_db.execute(removed_packages_table.insert(), pkgs_to_rm)
+        transaction.commit()
+
     # finally check whether the db has the correct number of packages
     query = 'SELECT count(*) FROM sources WHERE suite="{}" ' + \
             'AND architecture="{}"'
@@ -301,14 +325,14 @@ def update_sources_db(suite, arch, sources):
     if int(pkgs_end[0][0]) != count_new_pkgs:
         print_critical_message('AH! The number of source in the Sources file' +
                                ' is different than the one in the DB!')
-        log.critical('source in the debian archive for the ' + suite +
-                     ' suite:' + str(count_new_pkgs))
-        log.critical('source in the reproducible db for the ' + suite +
-                     ' suite:' + str(pkgs_end[0][0]))
+        log.critical('source in the debian archive for the %s suite: %s',
+                     suite, str(count_new_pkgs))
+        log.critical('source in the reproducible db for the  %s suite: %s',
+                     suite, str(pkgs_end[0][0]))
         sys.exit(1)
     if pkgs_to_add:
         log.info('Building pages for the new packages')
-        gen_packages_html([Package(x[0]) for x in pkgs_to_add], no_clean=True)
+        gen_packages_html([Package(x['name']) for x in pkgs_to_add], no_clean=True)
 
 
 def print_schedule_result(suite, arch, criteria, packages):
@@ -334,14 +358,10 @@ def queue_packages(all_pkgs, packages, date):
 
 
 def schedule_packages(packages):
-    pkgs = ((x, packages[x]) for x in packages)
-    log.debug('IDs about to be scheduled: ' + str(packages.keys()))
-    query = 'INSERT INTO schedule ' + \
-            '(package_id, date_scheduled) ' + \
-            'VALUES (?, ?)'
-    cursor = conn_db.cursor()
-    cursor.executemany(query, pkgs)
-    conn_db.commit()
+    pkgs = [{'package_id': x, 'date_scheduled': packages[x]} for x in packages.keys()]
+    log.debug('IDs about to be scheduled: %s', packages.keys())
+    if pkgs:
+        conn_db.execute(db_table('schedule').insert(), pkgs)
 
 
 def add_up_numbers(packages, arch):
diff --git a/bin/reproducible_setup_notify.py b/bin/reproducible_setup_notify.py
index 8daee9e..f7d4be9 100755
--- a/bin/reproducible_setup_notify.py
+++ b/bin/reproducible_setup_notify.py
@@ -29,7 +29,6 @@ from reproducible_common import *
 from reproducible_html_packages import gen_packages_html
 from reproducible_html_indexes import build_page
 
-
 class bcolors:
     BOLD = '\033[1m'
     UNDERLINE = '\033[4m'
@@ -51,14 +50,18 @@ def _good(text):
 
 def process_pkg(package, deactivate):
     if deactivate:
-        _good('Deactovating notification for package ' + str(package))
+        _good('Deactivating notification for package ' + str(package))
         flag = 0
     else:
         _good('Activating notification for package ' + str(package))
         flag = 1
-    rows = c.execute(('UPDATE OR FAIL sources SET notify_maintainer="{}" ' +
-                     'WHERE name="{}"').format(flag, package)).rowcount
-    conn_db.commit()
+
+    sources_table = db_table('sources')
+    update_query = sources_table.update().\
+                   where(sources_table.c.name == package).\
+                   values(notify_maintainer=flag)
+    rows = conn_db.execute(update_query).rowcount
+
     if rows == 0:
         log.error(bcolors.FAIL + str(package) + ' does not exists')
         sys.exit(1)
@@ -86,8 +89,6 @@ if maintainer:
     log.info('\t' + ', '.join(pkgs))
     packages.extend(pkgs)
 
-
-c = conn_db.cursor()
 for package in packages:
     process_pkg(package, local_args.deactivate)
 
diff --git a/update_jdn.sh b/update_jdn.sh
index b7a87d8..841b19c 100755
--- a/update_jdn.sh
+++ b/update_jdn.sh
@@ -388,9 +388,10 @@ if [ -f /etc/debian_version ] ; then
 				poxml 
 				procmail 
 				python3-debian 
-				python3-xdg
-				python3-yaml 
 				python3-pystache
+				python3-sqlalchemy
+				python3-xdg
+				python3-yaml
 				python-arpy 
 				python-hachoir-metadata 
 				python-imaging 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/qa/jenkins.debian.net.git



More information about the Qa-jenkins-scm mailing list