[med-svn] r23601 - in trunk/community/edam: . test/test_muscle/debian/upstream

Steffen Möller moeller at moszumanska.debian.org
Sun Jan 15 19:35:08 UTC 2017


Author: moeller
Date: 2017-01-15 19:35:08 +0000 (Sun, 15 Jan 2017)
New Revision: 23601

Added:
   trunk/community/edam/registry-tool-iterator.sh
Modified:
   trunk/community/edam/registry-tool.py
   trunk/community/edam/test/test_muscle/debian/upstream/edam
Log:
First series fo fixes for the EDAM annotation transfer

Added: trunk/community/edam/registry-tool-iterator.sh
===================================================================
--- trunk/community/edam/registry-tool-iterator.sh	                        (rev 0)
+++ trunk/community/edam/registry-tool-iterator.sh	2017-01-15 19:35:08 UTC (rev 23601)
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+if [ ! -x /usr/bin/realpath ]; then
+	echo "E: Please install realpath"
+	exit 1
+fi
+
+TOOLDIR=$(realpath $(dirname $0))
+
+set -e
+
+EDAMPACKAGESINGIT="mummer fastaq barrnap muscle fastqc uc-echo arden artemis sra-sdk bowtie2 rna-star trimmomatic fastx-toolkit mothur jalview snpomatic condetri picard-tools dindel"
+# And also:
+# filo 
+GITDIR=/home/moeller/git/debian-med
+JSONBUFFERDIR=/home/moeller/git/json-buffer
+
+if [ ! -d "$GITDIR" ]; then
+	echo "E: Directory '$GITDIR' is not existing. Expected a whole range of git repositories from Debian Med here. Please check."
+	exit -1
+fi
+
+cd "$GITDIR"
+
+for p in $EDAMPACKAGESINGIT
+do
+	echo -n "I: Preparing package '$p'"
+	origin="https://anonscm.debian.org/git/debian-med/$p.git"
+	if [ -d "$GITDIR"/"$p" ]; then
+		echo " is existing, will pull latest version from Debian Med git repository '$origin'"
+		cd "$GITDIR"/"$p"
+		git pull
+	else
+		echo " is not existing, will clone from Debian Med git repository '$origin'"
+		git clone $origin
+	fi
+
+	if [ ! -r debian/upstream/edam ]; then
+		echo "W: The package '$p' suprisingly does not feature an EDAM annotation file"
+		continue
+	fi
+
+	if ! yamllint debian/upstream/edam; then
+		echo
+		echo "E: The package '$p' has a problem with its EDAM annotation. Please fix."
+		exit 1
+	fi
+
+done
+
+for p in $EDAMPACKAGESINGIT
+do
+	echo -n "I: Package '$p'"
+
+	python "$TOOLDIR"/registry-tool.py "$GITDIR"/"$p" > "$JSONBUFFERDIR"/"$p".json
+
+	echo " [OK]"
+done


Property changes on: trunk/community/edam/registry-tool-iterator.sh
___________________________________________________________________
Added: svn:executable
   + *

Modified: trunk/community/edam/registry-tool.py
===================================================================
--- trunk/community/edam/registry-tool.py	2017-01-15 17:54:27 UTC (rev 23600)
+++ trunk/community/edam/registry-tool.py	2017-01-15 19:35:08 UTC (rev 23601)
@@ -9,6 +9,8 @@
 
 from lxml import etree
 
+from debian import deb822
+
 #parsing and declaring namespaces...
 EDAM_NS = {'owl' : 'http://www.w3.org/2002/07/owl#',
            'rdf':"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
@@ -16,11 +18,14 @@
            'oboInOwl': "http://www.geneontology.org/formats/oboInOwl#"}
 
 #EDAM_DOC = doc = etree.parse("/home/hmenager/edamontology/EDAM_1.13_dev.owl")
-EDAM_DOC = doc = etree.parse("EDAM.owl")
+#EDAM_DOC = doc = etree.parse("EDAM.owl")
+doc = etree.parse("/home/moeller/debian-med/community/edam/EDAM.owl")
+EDAM_DOC = doc.getroot()
 
 def check_id(label, axis):
-    xpath_query = "//owl:Class[translate(rdfs:label/text(),'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')=translate('" + label\
-          + "','abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ') and starts-with(@rdf:about, 'http://edamontology.org/" + axis + "')]/@rdf:about"
+    #xpath_query = "//owl:Class[translate(rdfs:label/text(),'abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ')=translate('" + label\
+    #      + "','abcdefghijklmnopqrstuvwxyz','ABCDEFGHIJKLMNOPQRSTUVWXYZ') and starts-with(@rdf:about, 'http://edamontology.org/" + axis + "')]/@rdf:about"
+    xpath_query = "//owl:Class[rdfs:label/text()='"+label+"' and starts-with(@rdf:about, 'http://edamontology.org/" + axis + "')]/@rdf:about"
     matching_terms = EDAM_DOC.xpath(xpath_query, namespaces=EDAM_NS)
     if len(matching_terms)==0:
         print("ERROR - No matching " + axis + " term for label " + label + "!")
@@ -33,26 +38,53 @@
             print("ERROR - Term " + term_id + " term for label " + label + " is deprecated!")
         else:
             return term_id            
+
+import re, sys
+def getUpstreamName(changelogFile):
+    p = re.compile("Upstream-Name: *([A-Za-z0-9]+) *")
+    for l in open(changelogFile):
+        m = p.findall(l)
+        if m:
+            return(m[0])
+    return("")
+
         
 def doc_to_dict(pack_dir):
     debian_path = os.path.join(pack_dir, 'debian')
     control_path = os.path.join(debian_path, 'control')
     changelog_path = os.path.join(debian_path, 'changelog')
+    copyright_path = os.path.join(debian_path, 'copyright')
     edam_path = os.path.join(debian_path, 'upstream', 'edam')
     metadata_path = os.path.join(debian_path, 'upstream', 'metadata')
-    control = yaml.load(open(control_path))
+    control_iterator = deb822.Packages.iter_paragraphs(open(control_path))
+    control_description=""
+    control_homepage=""
+    control_name=""
+    for p in control_iterator:
+        if p.has_key("Source"):
+            control_source=p.get("Source")
+        if p.has_key("Homepage"):
+            control_homepage=p.get("Homepage")
+        if p.has_key("Description"):
+            control_description=p.get("Description")
+            break;
 
     version_line = open(changelog_path).readline()
     version_debian = re.split('[()]', version_line)[1]
     m = re.match('^([0-9]+:)?(.*)-[^-]+$', version_debian)
     version_upstream = m.groups()[m.lastindex-1]
     edam = yaml.load(open(edam_path))
-    resource = {'name': control.get('Source'),
-                'homepage': control.get('Homepage'),
+
+    resource_name=getUpstreamName(copyright_path)
+    if "" == resource_name:
+	resource_name=control_source
+    
+    resource = {'name': resource_name,
+                'homepage': control_homepage,
                 'version': version_debian,
                 'collection': 'DebianMed',
                 'interface': {}, #TODO
-                'description': control.get('Description'),
+                'description': control_description,
                 'topic': [{'uri':check_id(topic_label,'topic')} for topic_label in edam.get('topic')],
                 'sourceRegistry': '',
                 'function': []

Modified: trunk/community/edam/test/test_muscle/debian/upstream/edam
===================================================================
--- trunk/community/edam/test/test_muscle/debian/upstream/edam	2017-01-15 17:54:27 UTC (rev 23600)
+++ trunk/community/edam/test/test_muscle/debian/upstream/edam	2017-01-15 19:35:08 UTC (rev 23601)
@@ -1,15 +1,16 @@
-topic: 
-  - topic_0182 #Sequence alignment
-scopes: 
+ontology: EDAM (1.12)
+topic:
+  - Sequence analysis
+scopes:
   - name: summary
-    function: 
-     - operation_0492 #Multiple Sequence Alignment
-    inputs: 
-     - data:   data_2044 #Sequence
-       formats: [format_1929] #FASTA
+    function:
+     - Multiple Sequence Alignment
+    inputs:
+     - data:   Sequence
+       formats: [FASTA]
     outputs:
-     - data: data_1916 #Alignment
-       formats: 
-       - format_1984 #FASTA-aln
-       - format_1982 #Clustalw format
-       - format_1997 #Phylip format
+     - data: Alignment
+       formats:
+       - FASTA-aln
+       - Clustalw format
+       - Phylip format




More information about the debian-med-commit mailing list