[med-svn] r23627 - trunk/community/edam

Steffen Möller moeller at moszumanska.debian.org
Tue Jan 17 22:52:04 UTC 2017


Author: moeller
Date: 2017-01-17 22:52:03 +0000 (Tue, 17 Jan 2017)
New Revision: 23627

Modified:
   trunk/community/edam/registry-tool-iterator.sh
   trunk/community/edam/registry-tool.py
Log:
EDAM rewriting - Series of bug fixes / incresed flexibility



Modified: trunk/community/edam/registry-tool-iterator.sh
===================================================================
--- trunk/community/edam/registry-tool-iterator.sh	2017-01-17 18:09:39 UTC (rev 23626)
+++ trunk/community/edam/registry-tool-iterator.sh	2017-01-17 22:52:03 UTC (rev 23627)
@@ -5,54 +5,117 @@
 	exit 1
 fi
 
+DONTUPDATE=true
+DONTOVERWRITE=true
+DONTCLONE=true
+
 TOOLDIR=$(realpath $(dirname $0))
 
 set -e
 
-EDAMPACKAGESINGIT="mummer fastaq barrnap muscle fastqc uc-echo arden artemis sra-sdk bowtie2 rna-star trimmomatic fastx-toolkit mothur jalview snpomatic condetri picard-tools dindel"
+#EDAMPACKAGESINGIT="mummer fastaq barrnap muscle fastqc uc-echo arden artemis sra-sdk bowtie2 rna-star trimmomatic fastx-toolkit mothur jalview snpomatic condetri picard-tools dindel"
+
 # And also:
 # filo 
 GITDIR=/home/moeller/git/debian-med
+
 JSONBUFFERDIR=/home/moeller/git/json-buffer
+JSONBUFFERSUBDIR=records
 
 if [ ! -d "$GITDIR" ]; then
 	echo "E: Directory '$GITDIR' is not existing. Expected a whole range of git repositories from Debian Med here. Please check."
 	exit -1
 fi
 
-cd "$GITDIR"
+if [ ! -d "$JSONBUFFERDIR" ]; then
+	echo "E: The diretory destined to hold the generated records is not existing. $JSONBUFFERDIR'" 
+	exit -1
+fi
 
-for p in $EDAMPACKAGESINGIT
-do
-	echo -n "I: Preparing package '$p'"
-	origin="https://anonscm.debian.org/git/debian-med/$p.git"
-	if [ -d "$GITDIR"/"$p" ]; then
-		echo " is existing, will pull latest version from Debian Med git repository '$origin'"
+dest="$JSONBUFFERDIR"/"$JSONBUFFERSUBDIR" 
+if [ ! -d "$JSONBUFFERDIR"/"$JSONBUFFERSUBDIR" ]; then
+	echo "W: Creating directory '$dest'"
+	mkdir "$dest"
+else
+	echo "I: Found destination directory '$dest'"
+fi
+unset dest
+
+if [ ! -r "$TOOLDIR/packages.list.txt" ]; then
+	echo "E: Expected list of packages to work on in '$GITDIR/packages.list.txt'. Fie not found/readable."
+	exit 1
+fi
+
+if ! $DONTCLONE; then
+	#for p in $EDAMPACKAGESINGIT
+	cat "$TOOLDIR/packages.list.txt" | while read p
+	do
+		cd "$GITDIR"  # We may have moved into a subdir
+		echo -n "I: Preparing package '$p'"
+
+		if $DONTOVERWRITE && [ -r "$JSONBUFFERDIR"/"$JSONBUFFERSUBDIR"/"$p".json ] ; then
+			echo " not overwriting exiting '$JSONBUFFERDIR/$JSONBUFFERSUBDIR/$p.json'"
+			continue
+		fi
+
+		origin="https://anonscm.debian.org/git/debian-med/$p.git"
+		#origin="ssh://anonscm.debian.org/git/debian-med/$p.git"
+		if [ -d "$GITDIR"/"$p" ]; then
+			if $DONTUPDATE; then
+				echo " is existing, will not check for any later version"
+			else
+				echo " is existing, will pull latest version from Debian Med git repository '$origin'"
+				cd "$GITDIR"/"$p"
+				if ! git pull; then
+					echo
+					echo "E: Could not pull latest revision for '$p' from $origin - skipped"
+					continue
+				fi
+			fi
+		else
+			echo " is not existing, will clone from Debian Med git repository '$origin'"
+			if ! git clone --quiet --branch=master --single-branch $origin; then
+				echo
+				echo "E: Could not clone package '$p' from $origin - skipped"
+				continue
+			fi
+		fi
+
 		cd "$GITDIR"/"$p"
-		git pull
-	else
-		echo " is not existing, will clone from Debian Med git repository '$origin'"
-		git clone $origin
-	fi
+		git checkout master
 
-	if [ ! -r debian/upstream/edam ]; then
-		echo "W: The package '$p' suprisingly does not feature an EDAM annotation file"
-		continue
-	fi
+		if [ ! -r debian/upstream/edam ]; then
+			echo "W: The package '$p' suprisingly does not feature an EDAM annotation file"
+			continue
+		fi
 
-	if ! yamllint debian/upstream/edam; then
-		echo
-		echo "E: The package '$p' has a problem with its EDAM annotation. Please fix."
-		exit 1
-	fi
+		if ! yamllint debian/upstream/edam; then
+			echo
+			echo "E: The package '$p' has a problem with its EDAM annotation. Please fix."
+			exit 1
+		fi
+	done
+fi
 
-done
 
-for p in $EDAMPACKAGESINGIT
+#for p in $EDAMPACKAGESINGIT
+cat "$TOOLDIR/packages.list.txt" | grep -v ^# | while read p
 do
 	echo -n "I: Package '$p'"
+	if [ ! -d "$GITDIR"/"$p" ]; then
+		echo " not existing in '$GITDIR/$p' - skipped"
+		continue
+	fi
+	if [ ! -r "$GITDIR"/"$p"/debian/control ]; then
+		echo " with incomplete local repository, searched for $GITDIR/$p/debian/control  - skipped"
+		continue
+	fi
 
-	python "$TOOLDIR"/registry-tool.py "$GITDIR"/"$p" > "$JSONBUFFERDIR"/"$p".json
-
+	dest="$JSONBUFFERDIR"/"$JSONBUFFERSUBDIR"/"$p".json
+	echo -n " creating $dest"
+	cd "$GITDIR"/"$p"
+	#git checkout master
+	python "$TOOLDIR"/registry-tool.py "$GITDIR"/"$p" > $dest
 	echo " [OK]"
+	unset dest
 done

Modified: trunk/community/edam/registry-tool.py
===================================================================
--- trunk/community/edam/registry-tool.py	2017-01-17 18:09:39 UTC (rev 23626)
+++ trunk/community/edam/registry-tool.py	2017-01-17 22:52:03 UTC (rev 23627)
@@ -28,14 +28,14 @@
     xpath_query = "//owl:Class[rdfs:label/text()='"+label+"' and starts-with(@rdf:about, 'http://edamontology.org/" + axis + "')]/@rdf:about"
     matching_terms = EDAM_DOC.xpath(xpath_query, namespaces=EDAM_NS)
     if len(matching_terms)==0:
-        print("ERROR - No matching " + axis + " term for label " + label + "!")
-        print(xpath_query)
+        sys.stderr.write("\nE: No matching " + axis + " term for label " + label + "!"+"\n")
+        # print(xpath_query)
     elif len(matching_terms)>1:
-        print("ERROR - More than one " + axis + " term for label " + label + "!")
+        sys.stderr.write("\nE: More than one " + axis + " term for label " + label + "!"+"\n")
     else:
         term_id = matching_terms[0]
         if len(EDAM_DOC.xpath("//owl:Class[@rdf:about='"+ term_id +"' and owl:deprecated='true']", namespaces=EDAM_NS))>0:
-            print("ERROR - Term " + term_id + " term for label " + label + " is deprecated!")
+            sys.stderr.write("\nE: Term " + term_id + " term for label " + label + " is deprecated!\n")
         else:
             return term_id            
 
@@ -72,59 +72,104 @@
     version_line = open(changelog_path).readline()
     version_debian = re.split('[()]', version_line)[1]
     m = re.match('^([0-9]+:)?(.*)-[^-]+$', version_debian)
+    if (m is None):
+        sys.stderr.write("E: Bad version in "+changelog_path+"\n")
+	sys.exit(1)
     version_upstream = m.groups()[m.lastindex-1]
-    edam = yaml.load(open(edam_path))
 
     resource_name=getUpstreamName(copyright_path)
     if "" == resource_name:
 	resource_name=control_source
+
     
     resource = {'name': resource_name,
                 'homepage': control_homepage,
                 'version': version_debian,
                 'collection': 'DebianMed',
-                'interface': {}, #TODO
+                #'interface': {}, #TODO
                 'description': control_description,
-                'topic': [{'uri':check_id(topic_label,'topic')} for topic_label in edam.get('topic')],
                 'sourceRegistry': '',
                 'function': []
                }
-    metadata = yaml.load(open(metadata_path))
     resource['publications'] = {}
 
     try:
-        resource['publications']['publicationsPrimaryID'] = metadata['Reference']['DOI'],
+        metadata = yaml.load(open(metadata_path))
+        #print metadata       
+        try:
+            resource['publications']['publicationsPrimaryID'] = metadata['Reference']['DOI'],
+        except KeyError:
+            resource['publications']['publicationsPrimaryID'] = metadata['Reference']['doi'],
+
     except TypeError:
-        print "TypeError 1"
-        resource['publications']['publicationsPrimaryID'] = metadata['Reference'][0]['DOI'],
+        #sys.stderr.write("W: " + resource_name + "shows TypeError for DOI - presumed harmless")
+        try:
+            resource['publications']['publicationsPrimaryID'] = metadata['Reference'][0]['DOI'],
+        except KeyError:
+            resource['publications']['publicationsPrimaryID'] = metadata['Reference'][0]['doi'],
 	if len( metadata['Reference'])>1:
             resource['publications']['publicationsOtherID'] = []
             for pos in range(1,len(metadata['Reference'])):
                 try:
                     resource['publications']['publicationsOtherID'] = metadata['Reference'][pos]['DOI']
                 except KeyError:
-                    print "No DOI at pos %d\n" % pos
+                    try:
+                       resource['publications']['publicationsOtherID'] = metadata['Reference'][pos]['doi']
+                    except KeyError:
+                       sys.stderr.write("\nW: No DOI at pos %d in '%s'\n" % (pos,metadata_path))
     except KeyError:
         # already done - assignment of none to publication
         resource['publications']['publicationsPrimaryID'] = "None"
+    except IOError:
+        sys.stderr.write("\nW: No metadata file found (looked for "+metadata_path+")\n")
+        resource['publications']['publicationsPrimaryID'] = "None"
 
-    for scope in edam['scopes']:
-        function = {}
-        function['functionHandle'] = scope['name']
-        function['functionName'] = [{'uri':check_id(function_label,'operation')} for function_label in scope.get('function')]
-        function['input'] = []
-        for el in scope.get('inputs'):
-            function['input'].append({
-                                      'dataType': {'uri':check_id(el['data'],'data')},
-                                      'dataFormat' : [{'uri':check_id(format_el,'format')} for format_el in el['formats']]
-                                     })
-        function['output'] = []
-        for el in scope.get('outputs'):
-            function['output'].append({
-                                      'dataType': {'uri':check_id(el['data'],'data')},
-                                      'dataFormat' : [{'uri':check_id(format_el,'format')} for format_el in el['formats']]
-                                     })
-        resource['function'].append(function)
+
+
+    try:
+        edam = yaml.load(open(edam_path))
+	#print(edam)
+        topicORtopics = []
+        try:
+            topicORtopics = edam['topics']
+        except KeyError:
+            topicORtopics = edam['topic']
+    
+        resource['topic']=[{'uri':check_id(topic_label,'topic')} for topic_label in topicORtopics]
+
+        scopeORscopes = []
+        try:
+            scopeORscopes = edam['scopes']
+        except KeyError:
+            scopeORscopes = edam['scope']
+    
+        for scope in scopeORscopes:
+            function = {}
+            function['functionHandle'] = scope['name']
+            function['functionName'] = [{'uri':check_id(function_label,'operation')} for function_label in scope.get('function')]
+            function['input'] = []
+            if not scope.get('inputs') is None:
+               for el in scope.get('inputs'):
+                   v={}
+                   v['dataType']={'uri':check_id(el['data'],'data')}
+                   if 'formats' in el:
+                       v['dataFormat']=[{'uri':check_id(format_el,'format')} for format_el in el['formats']]
+                   elif 'format' in el:
+                       v['dataFormat']=[{'uri':check_id(format_el,'format')} for format_el in el['format']]
+                   function['input'].append(v)
+            function['output'] = []
+            if not scope.get('outputs') is None:
+               for el in scope.get('outputs'):
+                   v={}
+                   v['dataType'] = {'uri':check_id(el['data'],'data')}
+                   if 'formats' in el:
+                       v['dataFormat'] = [{'uri':check_id(format_el,'format')} for format_el in el['formats']]
+                   elif 'format' in el:
+                       v['dataFormat'] = [{'uri':check_id(format_el,'format')} for format_el in el['format']]
+                   function['output'].append(v)
+            resource['function'].append(function)
+    except IOError:
+        sys.stderr.write("\nW: No EDAM file found (looked for "+edam_path+")\n")
     return resource
  
 
@@ -136,7 +181,7 @@
     if args.package_dirs:
         package_dirs = args.package_dirs
     for package_dir in package_dirs:
-        print "processing %s..." % package_dir
+        #print "processing %s..." % package_dir
         res = doc_to_dict(package_dir)
         print json.dumps(res, indent=True)
-        print "done processing %s..." % package_dir
+        #print "done processing %s..." % package_dir




More information about the debian-med-commit mailing list