[med-svn] r22173 - trunk/community/edam
Steffen Möller
moeller at moszumanska.debian.org
Wed Jun 22 19:50:52 UTC 2016
Author: moeller
Date: 2016-06-22 19:50:52 +0000 (Wed, 22 Jun 2016)
New Revision: 22173
Added:
trunk/community/edam/debian2edam
Modified:
trunk/community/edam/registry-tool.py
Log:
Synchronisation with past Debian Sprint
Added: trunk/community/edam/debian2edam
===================================================================
--- trunk/community/edam/debian2edam (rev 0)
+++ trunk/community/edam/debian2edam 2016-06-22 19:50:52 UTC (rev 22173)
@@ -0,0 +1,259 @@
+#!/bin/bash -e
+
+
+# A routine to facilitate the output to STDERR instead of the default STDIN
+function STDERR () {
+ cat - 1>&2
+}
+
+# echoindent outputs a series of blanks to STDOUT. An optional
+# second argument is echoed after those blanks if present.
+function echoindent () {
+ for i in $(seq 1 $1)
+ do
+ echo -n " "
+ done
+ if [ "" != "$2" ]; then
+ echo $2
+ fi
+}
+
+level=0
+# helper to properly close an open paranthesis
+function closeParenthesis () {
+ level=$(($level-1))
+ echoindent $level
+ echo -n "}"
+ if [ -n "$1" ]; then
+ echo "# $1"
+ else
+ echo
+ fi
+}
+
+
+function echoTerm(){
+ level=$(($level-1))
+ echoindent $level
+ echo "{\"uri\": \"$1\", \"term\": \"Pippi Langstrumpf\"}"
+}
+
+# Key argument indicating the debian directory from which to retrieve all the
+# information
+pathToDebian=$1
+#verbose="yes"
+verbose=""
+
+# Variable keeping usage information
+USAGE=<<EOUSAGE
+debian2edam [--upload] <path to 'debian' directory>
+
+server=https://
+Environment variables:
+elixir_cat_username
+elixir_cat_password
+
+EOUSAGE
+
+filename=$(basename "$pathToDebian")
+if [ "edam" = "$filename" ]; then
+ pathToDebian=$(dirname "$pathToDebian") # upstream
+ pathToDebian=$(dirname "$pathToDebian") # debian
+fi
+
+if [ -z "$pathToDebian" ]; then
+ echo "$USAGE" | STDERR
+ echo "E: Please specify debian directory in which to find EDAM annotation." | STDERR
+ exit -1
+fi
+
+if [ ! -d "$pathToDebian" ]; then
+ echo "$USAGE" | STDERR
+ echo "E: Could not find directory '$pathToDebian'" | STDERR
+ exit -1
+fi
+
+if [ ! -r "$pathToDebian/changelog" ]; then
+ echo "$USAGE" | STDERR
+ echo "E: Could not find a changelog file expected at '$pathToDebian/changelog'" | STDERR
+ exit -1
+fi
+
+cd $(dirname "$pathToDebian")
+
+edamfile="debian/upstream/edam"
+if [ ! -r "$edamfile" ]; then
+ echo "$USAGE" | STDERR
+ echo "E: Could not access file '$edamfile' from $(pwd)" | STDERR
+ exit -1
+fi
+
+sourcepackage=$(dpkg-parsechangelog |grep ^Source | sed -e 's/`^Source: //' )
+version=$(dpkg-parsechangelog |grep ^Version | cut -f2 -d\ | sed -e 's/-[^-][^-]*//' )
+
+declare -a descriptions
+declare -a packages
+
+if [ -n "$debug" ]; then cat debian/control; fi
+
+while read pack; do
+ p=$(echo "$pack"|sed -e 's/^[^:]*: *//')
+ echo Package: $p
+ packages[${#packages[*]}]="$p"
+done < <(grep "^Package:" debian/control )
+
+while read desc; do
+ d=$(echo "$desc"|sed -e 's/^[^:]*: *//')
+ echo Description: $d
+ descriptions[${#descriptions[*]}]="$d"
+ #descriptions[1]="$d"
+ #descriptions="$d"
+done < <(grep "^Description:" debian/control )
+
+#echo "DESCRIPTIONS: ${descriptions[*]}"
+#echo "PACKAGES: ${packages[*]}"
+#echo "DESCRIPTIONS: $descriptions}"
+#echo "PACKAGES: $packages"
+
+if [ ${#packages[*]} != ${#descriptions[*]} ]; then
+ echo "E: Internal error - expected same number of packages (${#packagesp[*]}) as for their descriptions (${#descriptions[*]})" | STDERR
+ exit -1
+fi
+
+(
+if [ -n "$verbose" ]; then
+ for packageno in $(seq 0 ${#descriptions[*]})
+ do
+ echo "# $packageno"
+ echo Packages[$packageno]: ${packages[$packageno]}
+ echo Descriptions[$packageno]: ${descriptions[$packageno]}
+ done
+fi
+) | STDERR
+
+prevstate="start";
+previndent=0
+currentscope=""
+currenttopic=""
+opentopic=0
+openfunction=0
+openscope=0
+indentlen=0
+
+# Core part of the program
+# It reads every line of the EDAM file (see end of loop for the redirection)
+# and decides what to print to STDOUT.
+
+while IFS='' read -r line
+do
+ if [ -z "$line" ]; then
+ echo "Read empty line"
+ continue
+ fi
+
+ if [ -n "$verbose" ]; then
+ echo "line: '$line'" | STDERR
+ fi
+
+ # retrieve different parts of the description
+ blanks=$(echo "$line"|sed -e 's/^\( *\)\([^ :]\+\): *\([^ ]\+\).*$/\1/')
+ type=$(echo "$line"|sed -e 's/^\( *\)\([^ :]\+\): *\([^ ]\+\).*$/\2/')
+ val=$(echo "$line"|sed -e 's/^\( *\)\([^ :]\+\): *\([^ ]\+\).*$/\3/')
+
+ if echo "$val" | grep -q : ; then
+ echo "W: found colon in ID of line '$line' - transscribing to underscore" | STDERR
+ val=$(echo "$val"|tr ":" "_")
+ fi
+
+ #echo "Indent='$blanks'"
+ #echo "Indentlength='$indentlen'"
+ #echo "Type='$type'"
+ #echo "Val='$val'"
+
+ if [ -n "$currentscope" -a "*" != "$currentscope" -a "summary" != "$currentscope" -a "scope" != "$type" ]; then
+ echo "I: Wrong scope ($currentscope) - ignored '$line'" | STDERR
+ continue
+ fi
+ indentlen=${#blanks}
+
+ if [ "scope" = "$type" ]; then
+ if [ $openfunction -gt 0 ]; then closeParenthesis "openfunction($openfunction) in scope"; fi
+ currentscope="$val"
+ resourcename=$sourcepackage
+ if [ "*"!=$val -a "summary"!="$val" ];then
+ resourcename=$val
+ fi
+
+ if [ "summary" != "$val" -a "*" != "$val" ]; then
+ echo "I: treatment of multiple scopes not yet implemented" | STDERR
+ else
+ echo "{"
+ # Some decent comparison of package names with scope is not implemented
+ level=$((level+1))
+ echoindent
+ echo "Package $resourcename"
+ echoindent
+ echo "\"version\": \"$version\","
+ echoindent
+ echo "\"description\": \"${descriptions[0]}\","
+ echoindent
+ echo "\"topic\": \"{$currenttopic}\""
+ openscope=1
+ fi
+ elif [ "topic" = "$type" ]; then
+ if [ $openfunction -gt 0 ]; then closeParenthesis "openfunction($openfunction) in topic"; openfunction=0; fi
+ if [ $openscope -gt 0 ]; then closeParenthesis "openscope($openscope) after loop"; openscope=0; fi
+ if [ "start" != "$prevstate" ]; then
+ closeParenthesis "topic with prior state - weird"
+ fi
+ currenttopic="$val"
+ # at some laterimplementation point, bits generated here would be cached and then distributed
+ # to various lower-level scopes
+ elif [ "function" = "$type" ]; then
+ if [ $openfunction -gt 0 ]; then
+ closeParenthesis "openfunction($openfunction) in function"
+ openfunction=0
+ fi
+ echoindent $level
+ echo "{function: [ { \"functionName\": ["
+ echoTerm $val
+ echo "] }],"
+ level=$((level+1))
+ openfunction=1
+ elif [ "input" = "$type" -o "output" = "$type" ]; then
+ if [ $prevstate = $type ]; then
+ echo "},{"
+ fi
+ if [ $prevstate = 'function' ]; then
+ echo "\"$type\": [{"
+ fi
+ echoindent $level
+ echo "($type $val)"
+ else
+ echo "W: unknown type '$type' - ignored" | STDERR
+ fi
+ prevstate=$type
+ #echo "indentlen='$indentlen'"
+done < $edamfile
+
+if [ $openfunction -gt 0 ]; then
+ closeParenthesis "openfunction($openfunction) after loop"
+ openfunction=0
+fi
+
+if [ $openscope -gt 0 ]; then
+ #echo "I: treatment of multiple scopes not yet implemented"|STDERR
+ closeParenthesis "openscope($openscope) after loop"
+ openscope=0
+fi
+
+#echo "indentlen='$indentlen'" | STDERR
+
+if [ $opentopic -gt 0 ]; then
+ opentopic=0
+fi
+
+#for i in $(seq $(($indentlen-$openfunction-$openscope-$opentopic)) -1 1)
+#do
+# closeParenthesis "indent $i"
+#done
Modified: trunk/community/edam/registry-tool.py
===================================================================
--- trunk/community/edam/registry-tool.py 2016-06-22 14:34:40 UTC (rev 22172)
+++ trunk/community/edam/registry-tool.py 2016-06-22 19:50:52 UTC (rev 22173)
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
import json
import yaml
import argparse
@@ -5,18 +6,33 @@
import os.path
import getpass
import re
-import random # for uri2term, to be removed
-def uri2term(uri):
- """The routine is meant to retrieve the human-readable term name for a URI provided.
+from lxml import etree
- The current implementation merely produces a combination of the first and
- last name of Pipi Longstocking as named in different languages.
- """
- pipi= [["Pippi","Langstrumpf"],["Pippi","Longstocking"],["Inger","Nilsson"],["Fifi","Brindacier"],
- ["Pippi","Långstrump"],["Pippi","Langstrømpe"],["Pippi","Calcesllargues"],["Pipi","Ŝtrumpolonga"],["Pippi","Uzunçorap"]]
- return(random.choice(pipi)[0]+" "+random.choice(pipi)[1])
+#parsing and declaring namespaces...
+EDAM_NS = {'owl' : 'http://www.w3.org/2002/07/owl#',
+ 'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+ 'rdfs':"http://www.w3.org/2000/01/rdf-schema#",
+ 'oboInOwl': "http://www.geneontology.org/formats/oboInOwl#"}
+EDAM_DOC = doc = etree.parse("/home/hmenager/edamontology/EDAM_1.13_dev.owl")
+
+def check_id(label, axis):
+ xpath_query = "//owl:Class[translate(rdfs:label/text(),'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')=translate('" + label\
+ + "','abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ') and starts-with(@rdf:about, 'http://edamontology.org/" + axis + "')]/@rdf:about"
+ matching_terms = EDAM_DOC.xpath(xpath_query, namespaces=EDAM_NS)
+ if len(matching_terms)==0:
+ print("ERROR - No matching " + axis + " term for label " + label + "!")
+ print(xpath_query)
+ elif len(matching_terms)>1:
+ print("ERROR - More than one " + axis + " term for label " + label + "!")
+ else:
+ term_id = matching_terms[0]
+ if len(EDAM_DOC.xpath("//owl:Class[@rdf:about='"+ term_id +"' and owl:deprecated='true']", namespaces=EDAM_NS))>0:
+ print("ERROR - Term " + term_id + " term for label " + label + " is deprecated!")
+ else:
+ return term_id
+
def doc_to_dict(pack_dir):
debian_path = os.path.join(pack_dir, 'debian')
control_path = os.path.join(debian_path, 'control')
@@ -31,14 +47,13 @@
version_upstream = m.groups()[m.lastindex-1]
edam = yaml.load(open(edam_path))
metadata = yaml.load(open(metadata_path))
-
resource = {'name': control.get('Source'),
'homepage': control.get('Homepage'),
'version': version_debian,
'collection': 'debian',
'interface': {}, #TODO
'description': control.get('Description'),
- 'topic': [{'uri':uri,'term':uri2term(el['data'])} for uri in edam.get('topic')],
+ 'topic': [{'uri':check_id(topic_label,'topic')} for topic_label in edam.get('topic')],
'sourceRegistry': '',
'publications': [{'publicationsOtherID': [i['DOI'] for i in metadata['Reference']]}],
'function': []
@@ -46,68 +61,32 @@
for scope in edam['scopes']:
function = {}
function['functionHandle'] = scope['name']
- function['functionName'] = [{'uri':uri,'term':uri2term(el['data'])} for uri in scope.get('function')]
+ function['functionName'] = [{'uri':check_id(function_label,'operation')} for function_label in scope.get('function')]
function['input'] = []
for el in scope.get('inputs'):
function['input'].append({
- 'dataType': {'uri':el['data'],'term':uri2term(el['data'])},
- 'dataFormat' : [{'uri':format_el,'term':uri2term(el['data'])} for format_el in el['formats']]
+ 'dataType': {'uri':check_id(el['data'],'data')},
+ 'dataFormat' : [{'uri':check_id(format_el,'format')} for format_el in el['formats']]
})
function['output'] = []
for el in scope.get('outputs'):
function['output'].append({
- 'dataType': {'uri':el['data'],'term':uri2term(el['data'])},
- 'dataFormat' : [{'uri':format_el,'term':uri2term(el['data'])} for format_el in el['formats']]
+ 'dataType': {'uri':check_id(el['data'],'data')},
+ 'dataFormat' : [{'uri':check_id(format_el,'format')} for format_el in el['formats']]
})
resource['function'].append(function)
return resource
-def auth(login):
- password = getpass.getpass()
- resp = requests.post('https://elixir-registry.cbs.dtu.dk/api/auth/login','{"username": "%s","password": "%s"}' % (login, password), headers={'Accept':'application/json', 'Content-type':'application/json'}).text
- return json.loads(resp)['token']
if __name__ == '__main__':
- # 1. Import XML files from a Mobyle server or from a folder containing XML files
- # 2. Convert to BTR XML
- # 3. Convert to BTR JSON
- # 4. Register to Elixir BTR
parser = argparse.ArgumentParser(
description='ELIXIR registry tool for Debian Med packages')
- group = parser.add_mutually_exclusive_group()
- parser.add_argument('--package_dirs', help="Debian package directory", nargs='+')
- parser.add_argument('--json_dir', help="target directory for JSON files")
- parser.add_argument('--login', help="registry login")
+ parser.add_argument('package_dirs', help="Debian package directory", nargs='+')
args = parser.parse_args()
if args.package_dirs:
package_dirs = args.package_dirs
- params = {'mobyle_root':"'http://mobyle.pasteur.fr'",
- 'mobyle_contact':"'mobyle at pasteur.fr'"}
- if args.login:
- print "authenticating..."
- token = auth(args.login)
- print "authentication ok"
- ok_cnt = 0
- ko_cnt = 0
- #print "attempting to delete all registered services..."
- #resp = requests.delete('https://elixir-registry.cbs.dtu.dk/api/tool/%s' % args.login, headers={'Accept':'application/json', 'Content-type':'application/json', 'Authorization': 'Token %s' % token})
- #print resp
for package_dir in package_dirs:
- print "processing %s..." % package_dirs
+ print "processing %s..." % package_dir
res = doc_to_dict(package_dir)
print json.dumps(res, indent=True)
- resource_name = res['name']
- if args.json_dir:
- json_path = os.path.join(args.json_dir, resource_name + '.json')
- json.dump(res, open(json_path, 'w'), indent=True)
- if args.login and args:
- resp = requests.post('https://elixir-registry.cbs.dtu.dk/api/tool', json.dumps(res), headers={'Accept':'application/json', 'Content-type':'application/json', 'Authorization': 'Token %s' % token})
- #print resp.status_code
- if resp.status_code==201:
- print "%s ok" % resource_name
- ok_cnt += 1
- else:
- print "%s ko, error: %s" % (resource_name, resp.text)
- ko_cnt += 1
- if args.login:
- print "import finished, ok=%s, ko=%s" % (ok_cnt, ko_cnt)
+ print "done processing %s..." % package_dir
More information about the debian-med-commit
mailing list