[med-svn] r23666 - in trunk/packages/metastudent/trunk/debian: . patches
Tatiana Malygina
latticetower-guest at moszumanska.debian.org
Sun Feb 5 11:21:34 UTC 2017
Author: latticetower-guest
Date: 2017-02-05 11:21:33 +0000 (Sun, 05 Feb 2017)
New Revision: 23666
Added:
trunk/packages/metastudent/trunk/debian/patches/
trunk/packages/metastudent/trunk/debian/patches/01_fix_blastp.patch
trunk/packages/metastudent/trunk/debian/patches/series
Modified:
trunk/packages/metastudent/trunk/debian/changelog
Log:
add patch to parse blast+ output correctly (closes: #848903)
Modified: trunk/packages/metastudent/trunk/debian/changelog
===================================================================
--- trunk/packages/metastudent/trunk/debian/changelog 2017-02-05 07:36:49 UTC (rev 23665)
+++ trunk/packages/metastudent/trunk/debian/changelog 2017-02-05 11:21:33 UTC (rev 23666)
@@ -5,6 +5,7 @@
* add details on package testsuite failure to REAMDE.Debian,
add to README.test how to test manually (currently metastudent supports
legacy blast only and fails on blast+ output parsing).
+ * add patch to parse blast+ output correctly (closes: #848903).
-- Tatiana Malygina <merlettaia at gmail.com> Sat, 16 Jul 2016 09:24:57 +0300
Added: trunk/packages/metastudent/trunk/debian/patches/01_fix_blastp.patch
===================================================================
--- trunk/packages/metastudent/trunk/debian/patches/01_fix_blastp.patch (rev 0)
+++ trunk/packages/metastudent/trunk/debian/patches/01_fix_blastp.patch 2017-02-05 11:21:33 UTC (rev 23666)
@@ -0,0 +1,229 @@
+Author: Tatiana Malygina <merlettaia at gmail.com>
+Last-Update: 2017-02-05
+Description: metastudent supports only legacy version of BLAST.
+ This patch makes it work with blast+, which is now the main tool associated
+ with blast2 package.
+ Metastudent's temporary files were observed and compared for blast+ and legacy
+ blast - since different stages of metastudent pipeline parse blast output
+ separately, they all could produce incorrect output. This patch fixes parsing
+ in different stages of metastudent.
+Bug-Debian: https://bugs.debian.org/848903
+
+--- a/metastudentPkg/lib/groupA/GOSSIP/src/GOSSIPSTarter.java
++++ b/metastudentPkg/lib/groupA/GOSSIP/src/GOSSIPSTarter.java
+@@ -62,9 +62,23 @@
+ try
+ {
+ scanner = new Scanner(new FileInputStream(path));
++ boolean usePSIBlast = false;
++ boolean prefixSkipped = false;
+ while (scanner.hasNextLine())
+ {
+ String currLine = scanner.nextLine() + NL;
++ if (currLine.startsWith("PSIBLAST"))
++ usePSIBlast = true;
++ if (usePSIBlast && currLine.startsWith("Results from round 1"))
++ {
++ if (prefixSkipped &&
++ text.toString().replaceAll("\\s", "").length() > 0)
++ {
++ indivdResults.add(text.toString());
++ text = new StringBuilder();
++ }
++ prefixSkipped = true;
++ }
+ if(currLine.startsWith("BLASTP") && text.toString().replaceAll("\\s","").length() > 0)
+ {
+ // System.out.println(text.toString());
+@@ -75,7 +89,8 @@
+ }
+ text.append(currLine);
+ }
+- indivdResults.add(text.toString());
++ if (!(usePSIBlast) || prefixSkipped)
++ indivdResults.add(text.toString());
+ }
+ catch (FileNotFoundException e)
+ {
+--- a/metastudentPkg/lib/groupA/GOSSIP/src/Predictor.java
++++ b/metastudentPkg/lib/groupA/GOSSIP/src/Predictor.java
+@@ -128,7 +128,7 @@
+ String actGo = "";
+ for(int i = 0; i < lines.length; i++)
+ {
+- if( (Constants.NUM_BLAST_ITERATIONS==1 && lines[i].startsWith("Searching")) || (Constants.NUM_BLAST_ITERATIONS>1 && lines[i].startsWith("Results from round ")) )
++ if( (Constants.NUM_BLAST_ITERATIONS==1 && (lines[i].startsWith("Searching") || lines[i].replaceAll("\\s", "").startsWith("Query="))) || (lines[i].startsWith("Results from round ")) )
+ {
+ selector.gos.clear();
+ selector.length.clear();
+@@ -156,7 +156,7 @@
+ go = false;
+ actGo = actGo.replaceAll("\\s+", "");
+ selector.gos.add(actGo);
+- selector.length.add(Integer.parseInt(lines[i].split(" = ")[1]));
++ selector.length.add(Integer.parseInt(lines[i].replaceAll("\\s", "").split("=")[1]));
+ i++;
+ //System.out.println(line);
+ i++;
+@@ -187,4 +187,3 @@
+
+ }
+ }
+-
+
+
+--- a/metastudentPkg/lib/groupB/java/BlastFileParser.java
++++ b/metastudentPkg/lib/groupB/java/BlastFileParser.java
+@@ -36,7 +36,18 @@
+ File file = new File(dir);
+ result_per_blast = new ArrayList<String>();
+ BufferedReader in = new BufferedReader(new FileReader(file));
++ boolean prefixSkipped = false;
+ while((line = in.readLine()) != null) {
++ if (line.startsWith("Results from round 1")) {
++ if (prefixSkipped) {
++ if (!result_per_blast.isEmpty()) {
++ parse(result_per_blast);
++ }
++ }
++ result_per_blast.clear();
++ result_per_blast.add(line);
++ prefixSkipped = true;
++ }
+ if(Pattern.matches("BLASTP.*",line)) {
+ if(!result_per_blast.isEmpty()) {
+ parse(result_per_blast);
+@@ -60,12 +71,12 @@
+ String target = "";
+ int round = 0;
+ ArrayList<TabulatorFormat> result = new ArrayList<TabulatorFormat>();
+-
+ for(int i = 0; i < result_per_blast.size();i++) {
+ line = result_per_blast.get(i);
+ result.add(new TabulatorFormat());
+ if(Pattern.matches("Results\\sfrom\\sround\\s\\d+",line)) {
+- moreThanOneRound = true;
++ if (Pattern.matches("Results\\sfrom\\sround\\s2", line))
++ moreThanOneRound = true;
+ result.add(new TabulatorFormat());
+ round++;
+ result.get(round).addRound(round);
+@@ -75,15 +86,27 @@
+ matcher = pattern.matcher(line);
+ matcher.find();
+ target = matcher.group(1);
++ i++;
++ line = result_per_blast.get(i);
++ while (Pattern.matches("\\S+", line) &&
++ line.length() > 0 &&
++ !line.replaceAll("\\s", "").startsWith("Length") &&
++ !line.startsWith(">") &&
++ i < result_per_blast.size() - 1) {
++ target += line;
++ i++;
++ line = result_per_blast.get(i);
++ }
++ System.out.println(target);
+ }
+- if(Pattern.matches(">\\S*GO\\S+\\s*",line)) {
++ if(Pattern.matches(">\\s*\\S*GO\\S+\\s*",line)) {
+ QueryResult queryResult = new QueryResult();
+- if(Pattern.matches(">GO.*", line)) {
+- pattern = Pattern.compile(">(GO\\S+)\\s*");
++ if(Pattern.matches(">\\s*GO.*", line)) {
++ pattern = Pattern.compile(">\\s*(GO\\S+)\\s*");
+ matcher = pattern.matcher(line);
+ matcher.find();
+ goTerms = matcher.group(1);
+- while(!Pattern.matches("\\s*Length\\s=\\s\\d+.*", line) && i < result_per_blast.size()-1) {
++ while(!Pattern.matches("\\s*Length\\s*=\\s*\\d+.*", line) && i < result_per_blast.size()-1) {
+ if(Pattern.matches("\\s*GO\\S*\\s*", line)) {
+ pattern = Pattern.compile("\\s*(GO\\S+)\\s*");
+ matcher = pattern.matcher(line);
+@@ -94,13 +117,13 @@
+ line = result_per_blast.get(i);
+ }
+ queryResult.setName(goTerms);
+- } else if (Pattern.matches(">\\S+\\|\\S+.*",line)) {
+- pattern = Pattern.compile(">(\\S+)\\|(\\S+)\\s*");
++ } else if (Pattern.matches(">\\s*\\S+\\|\\S+.*",line)) {
++ pattern = Pattern.compile(">\\s*(\\S+)\\|(\\S+)\\s*");
+ matcher = pattern.matcher(line);
+ matcher.find();
+ name = matcher.group(1);
+ goTerms = matcher.group(2);
+- while(!Pattern.matches("\\s*Length\\s=\\s\\d+.*", line) && i < result_per_blast.size()-1) {
++ while(!Pattern.matches("\\s*Length\\s*=\\s*\\d+.*", line) && i < result_per_blast.size()-1) {
+ if(Pattern.matches("\\s*GO\\S*\\s*", line)) {
+ pattern = Pattern.compile("\\s*(GO\\S+)\\s*");
+ matcher = pattern.matcher(line);
+@@ -113,15 +136,15 @@
+ queryResult.setName(name);
+ }
+ queryResult.addGoTerms(goTerms);
+- pattern = Pattern.compile("\\s+Length\\s*=\\s*(\\d+)\\s*");
++ pattern = Pattern.compile("\\s*Length\\s*=\\s*(\\d+)\\s*");
+ matcher = pattern.matcher(line);
+ matcher.find();
+ queryResult.addLength("" + matcher.group(1));
+- while(!Pattern.matches("\\s*Score\\s=\\s*\\S+\\sbits\\s\\(\\d+\\),\\sExpect\\s=\\s\\S+.*", line) && i < result_per_blast.size()-1) {
++ while(!Pattern.matches("\\s*Score\\s=\\s*\\S+\\sbits\\s\\(\\d+\\),\\s*Expect\\s=\\s\\S+.*", line) && i < result_per_blast.size()-1) {
+ i++;
+ line = result_per_blast.get(i);
+ }
+- pattern = Pattern.compile("\\s*Score\\s=\\s*(\\S+)\\sbits\\s\\((\\d+)\\),\\sExpect\\s=\\s(\\S+),.*");
++ pattern = Pattern.compile("\\s*Score\\s=\\s*(\\S+)\\sbits\\s\\((\\d+)\\),\\s*Expect\\s=\\s(\\S+),.*");
+ matcher = pattern.matcher(line);
+ matcher.find();
+ queryResult.addScore(Double.parseDouble(matcher.group(1)));
+@@ -185,4 +208,3 @@
+ }
+
+ }
+-
+
+
+--- a/metastudentPkg/lib/groupC/exercise3.pl
++++ b/metastudentPkg/lib/groupC/exercise3.pl
+@@ -58,11 +58,25 @@
+ # }
+ }
+
+- if(!/^>.*?:(.+)$|^\s+(Score)|(^Searching)|^Query=\s(.*)|^\s(Identities)|^(Database)/)
++ if(!/^>.*?:(.+)$|^\s+(Score)|(^Searching|Results\sfrom\sround\s1)|^Query=\s(.*)|^\s(Identities)|^(Database)/)
+ {
+ if($parseId)
+ {
+- $target .= $_;
++ if (!/\S+/) {
++ $target =~ s:\r?\n::g;
++ $target =~ s:\s*::g;
++ if ($target ne "") {
++ push (@out,"Target: $target\n");
++ $targetid=$target;
++ $targets{$targetid}="no prediction";
++ }
++ undef $target;
++ $true=1;
++ $parseId = 0;
++ } else {
++ $target .= $_;
++ }
++ #$target .= $_;
+ }
+ next;
+ }
+@@ -124,10 +138,12 @@
+ $target.= $_;
+ $target =~ s:\r?\n::g;
+ $target =~ s:\s*::g;
+- $target =~ s/\(.*letters\)Database:.*//g;
++ $target =~ s/(\(.*letters\))?Database:.*//g;
++ if ($target != "") {
+ push (@out,"Target: $target\n");
+ $targetid=$target;
+ $targets{$targetid}="no prediction";
++ }
+ $true=0;
+ undef $target;
+
Added: trunk/packages/metastudent/trunk/debian/patches/series
===================================================================
--- trunk/packages/metastudent/trunk/debian/patches/series (rev 0)
+++ trunk/packages/metastudent/trunk/debian/patches/series 2017-02-05 11:21:33 UTC (rev 23666)
@@ -0,0 +1 @@
+01_fix_blastp.patch
More information about the debian-med-commit
mailing list