[med-svn] r23627 - trunk/community/edam
Steffen Möller
moeller at moszumanska.debian.org
Tue Jan 17 22:52:04 UTC 2017
Author: moeller
Date: 2017-01-17 22:52:03 +0000 (Tue, 17 Jan 2017)
New Revision: 23627
Modified:
trunk/community/edam/registry-tool-iterator.sh
trunk/community/edam/registry-tool.py
Log:
EDAM rewriting - Series of bug fixes / incresed flexibility
Modified: trunk/community/edam/registry-tool-iterator.sh
===================================================================
--- trunk/community/edam/registry-tool-iterator.sh 2017-01-17 18:09:39 UTC (rev 23626)
+++ trunk/community/edam/registry-tool-iterator.sh 2017-01-17 22:52:03 UTC (rev 23627)
@@ -5,54 +5,117 @@
exit 1
fi
+DONTUPDATE=true
+DONTOVERWRITE=true
+DONTCLONE=true
+
TOOLDIR=$(realpath $(dirname $0))
set -e
-EDAMPACKAGESINGIT="mummer fastaq barrnap muscle fastqc uc-echo arden artemis sra-sdk bowtie2 rna-star trimmomatic fastx-toolkit mothur jalview snpomatic condetri picard-tools dindel"
+#EDAMPACKAGESINGIT="mummer fastaq barrnap muscle fastqc uc-echo arden artemis sra-sdk bowtie2 rna-star trimmomatic fastx-toolkit mothur jalview snpomatic condetri picard-tools dindel"
+
# And also:
# filo
GITDIR=/home/moeller/git/debian-med
+
JSONBUFFERDIR=/home/moeller/git/json-buffer
+JSONBUFFERSUBDIR=records
if [ ! -d "$GITDIR" ]; then
echo "E: Directory '$GITDIR' is not existing. Expected a whole range of git repositories from Debian Med here. Please check."
exit -1
fi
-cd "$GITDIR"
+if [ ! -d "$JSONBUFFERDIR" ]; then
+ echo "E: The diretory destined to hold the generated records is not existing. $JSONBUFFERDIR'"
+ exit -1
+fi
-for p in $EDAMPACKAGESINGIT
-do
- echo -n "I: Preparing package '$p'"
- origin="https://anonscm.debian.org/git/debian-med/$p.git"
- if [ -d "$GITDIR"/"$p" ]; then
- echo " is existing, will pull latest version from Debian Med git repository '$origin'"
+dest="$JSONBUFFERDIR"/"$JSONBUFFERSUBDIR"
+if [ ! -d "$JSONBUFFERDIR"/"$JSONBUFFERSUBDIR" ]; then
+ echo "W: Creating directory '$dest'"
+ mkdir "$dest"
+else
+ echo "I: Found destination directory '$dest'"
+fi
+unset dest
+
+if [ ! -r "$TOOLDIR/packages.list.txt" ]; then
+ echo "E: Expected list of packages to work on in '$GITDIR/packages.list.txt'. Fie not found/readable."
+ exit 1
+fi
+
+if ! $DONTCLONE; then
+ #for p in $EDAMPACKAGESINGIT
+ cat "$TOOLDIR/packages.list.txt" | while read p
+ do
+ cd "$GITDIR" # We may have moved into a subdir
+ echo -n "I: Preparing package '$p'"
+
+ if $DONTOVERWRITE && [ -r "$JSONBUFFERDIR"/"$JSONBUFFERSUBDIR"/"$p".json ] ; then
+ echo " not overwriting exiting '$JSONBUFFERDIR/$JSONBUFFERSUBDIR/$p.json'"
+ continue
+ fi
+
+ origin="https://anonscm.debian.org/git/debian-med/$p.git"
+ #origin="ssh://anonscm.debian.org/git/debian-med/$p.git"
+ if [ -d "$GITDIR"/"$p" ]; then
+ if $DONTUPDATE; then
+ echo " is existing, will not check for any later version"
+ else
+ echo " is existing, will pull latest version from Debian Med git repository '$origin'"
+ cd "$GITDIR"/"$p"
+ if ! git pull; then
+ echo
+ echo "E: Could not pull latest revision for '$p' from $origin - skipped"
+ continue
+ fi
+ fi
+ else
+ echo " is not existing, will clone from Debian Med git repository '$origin'"
+ if ! git clone --quiet --branch=master --single-branch $origin; then
+ echo
+ echo "E: Could not clone package '$p' from $origin - skipped"
+ continue
+ fi
+ fi
+
cd "$GITDIR"/"$p"
- git pull
- else
- echo " is not existing, will clone from Debian Med git repository '$origin'"
- git clone $origin
- fi
+ git checkout master
- if [ ! -r debian/upstream/edam ]; then
- echo "W: The package '$p' suprisingly does not feature an EDAM annotation file"
- continue
- fi
+ if [ ! -r debian/upstream/edam ]; then
+ echo "W: The package '$p' suprisingly does not feature an EDAM annotation file"
+ continue
+ fi
- if ! yamllint debian/upstream/edam; then
- echo
- echo "E: The package '$p' has a problem with its EDAM annotation. Please fix."
- exit 1
- fi
+ if ! yamllint debian/upstream/edam; then
+ echo
+ echo "E: The package '$p' has a problem with its EDAM annotation. Please fix."
+ exit 1
+ fi
+ done
+fi
-done
-for p in $EDAMPACKAGESINGIT
+#for p in $EDAMPACKAGESINGIT
+cat "$TOOLDIR/packages.list.txt" | grep -v ^# | while read p
do
echo -n "I: Package '$p'"
+ if [ ! -d "$GITDIR"/"$p" ]; then
+ echo " not existing in '$GITDIR/$p' - skipped"
+ continue
+ fi
+ if [ ! -r "$GITDIR"/"$p"/debian/control ]; then
+ echo " with incomplete local repository, searched for $GITDIR/$p/debian/control - skipped"
+ continue
+ fi
- python "$TOOLDIR"/registry-tool.py "$GITDIR"/"$p" > "$JSONBUFFERDIR"/"$p".json
-
+ dest="$JSONBUFFERDIR"/"$JSONBUFFERSUBDIR"/"$p".json
+ echo -n " creating $dest"
+ cd "$GITDIR"/"$p"
+ #git checkout master
+ python "$TOOLDIR"/registry-tool.py "$GITDIR"/"$p" > $dest
echo " [OK]"
+ unset dest
done
Modified: trunk/community/edam/registry-tool.py
===================================================================
--- trunk/community/edam/registry-tool.py 2017-01-17 18:09:39 UTC (rev 23626)
+++ trunk/community/edam/registry-tool.py 2017-01-17 22:52:03 UTC (rev 23627)
@@ -28,14 +28,14 @@
xpath_query = "//owl:Class[rdfs:label/text()='"+label+"' and starts-with(@rdf:about, 'http://edamontology.org/" + axis + "')]/@rdf:about"
matching_terms = EDAM_DOC.xpath(xpath_query, namespaces=EDAM_NS)
if len(matching_terms)==0:
- print("ERROR - No matching " + axis + " term for label " + label + "!")
- print(xpath_query)
+ sys.stderr.write("\nE: No matching " + axis + " term for label " + label + "!"+"\n")
+ # print(xpath_query)
elif len(matching_terms)>1:
- print("ERROR - More than one " + axis + " term for label " + label + "!")
+ sys.stderr.write("\nE: More than one " + axis + " term for label " + label + "!"+"\n")
else:
term_id = matching_terms[0]
if len(EDAM_DOC.xpath("//owl:Class[@rdf:about='"+ term_id +"' and owl:deprecated='true']", namespaces=EDAM_NS))>0:
- print("ERROR - Term " + term_id + " term for label " + label + " is deprecated!")
+ sys.stderr.write("\nE: Term " + term_id + " term for label " + label + " is deprecated!\n")
else:
return term_id
@@ -72,59 +72,104 @@
version_line = open(changelog_path).readline()
version_debian = re.split('[()]', version_line)[1]
m = re.match('^([0-9]+:)?(.*)-[^-]+$', version_debian)
+ if (m is None):
+ sys.stderr.write("E: Bad version in "+changelog_path+"\n")
+ sys.exit(1)
version_upstream = m.groups()[m.lastindex-1]
- edam = yaml.load(open(edam_path))
resource_name=getUpstreamName(copyright_path)
if "" == resource_name:
resource_name=control_source
+
resource = {'name': resource_name,
'homepage': control_homepage,
'version': version_debian,
'collection': 'DebianMed',
- 'interface': {}, #TODO
+ #'interface': {}, #TODO
'description': control_description,
- 'topic': [{'uri':check_id(topic_label,'topic')} for topic_label in edam.get('topic')],
'sourceRegistry': '',
'function': []
}
- metadata = yaml.load(open(metadata_path))
resource['publications'] = {}
try:
- resource['publications']['publicationsPrimaryID'] = metadata['Reference']['DOI'],
+ metadata = yaml.load(open(metadata_path))
+ #print metadata
+ try:
+ resource['publications']['publicationsPrimaryID'] = metadata['Reference']['DOI'],
+ except KeyError:
+ resource['publications']['publicationsPrimaryID'] = metadata['Reference']['doi'],
+
except TypeError:
- print "TypeError 1"
- resource['publications']['publicationsPrimaryID'] = metadata['Reference'][0]['DOI'],
+ #sys.stderr.write("W: " + resource_name + "shows TypeError for DOI - presumed harmless")
+ try:
+ resource['publications']['publicationsPrimaryID'] = metadata['Reference'][0]['DOI'],
+ except KeyError:
+ resource['publications']['publicationsPrimaryID'] = metadata['Reference'][0]['doi'],
if len( metadata['Reference'])>1:
resource['publications']['publicationsOtherID'] = []
for pos in range(1,len(metadata['Reference'])):
try:
resource['publications']['publicationsOtherID'] = metadata['Reference'][pos]['DOI']
except KeyError:
- print "No DOI at pos %d\n" % pos
+ try:
+ resource['publications']['publicationsOtherID'] = metadata['Reference'][pos]['doi']
+ except KeyError:
+ sys.stderr.write("\nW: No DOI at pos %d in '%s'\n" % (pos,metadata_path))
except KeyError:
# already done - assignment of none to publication
resource['publications']['publicationsPrimaryID'] = "None"
+ except IOError:
+ sys.stderr.write("\nW: No metadata file found (looked for "+metadata_path+")\n")
+ resource['publications']['publicationsPrimaryID'] = "None"
- for scope in edam['scopes']:
- function = {}
- function['functionHandle'] = scope['name']
- function['functionName'] = [{'uri':check_id(function_label,'operation')} for function_label in scope.get('function')]
- function['input'] = []
- for el in scope.get('inputs'):
- function['input'].append({
- 'dataType': {'uri':check_id(el['data'],'data')},
- 'dataFormat' : [{'uri':check_id(format_el,'format')} for format_el in el['formats']]
- })
- function['output'] = []
- for el in scope.get('outputs'):
- function['output'].append({
- 'dataType': {'uri':check_id(el['data'],'data')},
- 'dataFormat' : [{'uri':check_id(format_el,'format')} for format_el in el['formats']]
- })
- resource['function'].append(function)
+
+
+ try:
+ edam = yaml.load(open(edam_path))
+ #print(edam)
+ topicORtopics = []
+ try:
+ topicORtopics = edam['topics']
+ except KeyError:
+ topicORtopics = edam['topic']
+
+ resource['topic']=[{'uri':check_id(topic_label,'topic')} for topic_label in topicORtopics]
+
+ scopeORscopes = []
+ try:
+ scopeORscopes = edam['scopes']
+ except KeyError:
+ scopeORscopes = edam['scope']
+
+ for scope in scopeORscopes:
+ function = {}
+ function['functionHandle'] = scope['name']
+ function['functionName'] = [{'uri':check_id(function_label,'operation')} for function_label in scope.get('function')]
+ function['input'] = []
+ if not scope.get('inputs') is None:
+ for el in scope.get('inputs'):
+ v={}
+ v['dataType']={'uri':check_id(el['data'],'data')}
+ if 'formats' in el:
+ v['dataFormat']=[{'uri':check_id(format_el,'format')} for format_el in el['formats']]
+ elif 'format' in el:
+ v['dataFormat']=[{'uri':check_id(format_el,'format')} for format_el in el['format']]
+ function['input'].append(v)
+ function['output'] = []
+ if not scope.get('outputs') is None:
+ for el in scope.get('outputs'):
+ v={}
+ v['dataType'] = {'uri':check_id(el['data'],'data')}
+ if 'formats' in el:
+ v['dataFormat'] = [{'uri':check_id(format_el,'format')} for format_el in el['formats']]
+ elif 'format' in el:
+ v['dataFormat'] = [{'uri':check_id(format_el,'format')} for format_el in el['format']]
+ function['output'].append(v)
+ resource['function'].append(function)
+ except IOError:
+ sys.stderr.write("\nW: No EDAM file found (looked for "+edam_path+")\n")
return resource
@@ -136,7 +181,7 @@
if args.package_dirs:
package_dirs = args.package_dirs
for package_dir in package_dirs:
- print "processing %s..." % package_dir
+ #print "processing %s..." % package_dir
res = doc_to_dict(package_dir)
print json.dumps(res, indent=True)
- print "done processing %s..." % package_dir
+ #print "done processing %s..." % package_dir
More information about the debian-med-commit
mailing list