[med-svn] [giira] 01/04: Imported Upstream version 0.0.20140210
Andreas Tille
tille at debian.org
Mon Feb 10 12:50:09 UTC 2014
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository giira.
commit a3156724f34e26c479ebfb3d027872a9b96d7486
Author: Andreas Tille <tille at debian.org>
Date: Mon Feb 10 13:45:16 2014 +0100
Imported Upstream version 0.0.20140210
---
src/geneFinder/BWA_Call.java | 14 +-
src/geneFinder/CalculateScores.java | 10 +-
src/geneFinder/CigarParser.java | 177 ++++++++++
src/geneFinder/CleanAfterAmbiOpti.java | 6 +-
src/geneFinder/GeneFinder.java | 18 +-
src/geneFinder/Giira.java | 14 +-
src/geneFinder/Operon_LP.java | 8 +-
src/geneFinder/OptimizeAmbis.java | 10 +-
src/geneFinder/OptimizeAmbis_GLPK.java | 427 ++++++++++++++++++++++++
src/geneFinder/PrepareMapping_GF.java | 4 +-
src/geneFinder/ProkaryoteExtraction.java | 13 +-
src/geneFinder/ReadInParameters_GeneFinder.java | 20 +-
src/geneFinder/SamParser.java | 18 +-
src/geneFinder/TopHat_Call.java | 13 +-
src/geneFinder/WriteOutput.java | 24 +-
15 files changed, 703 insertions(+), 73 deletions(-)
diff --git a/src/geneFinder/BWA_Call.java b/src/geneFinder/BWA_Call.java
index a9dc084..b07c9e3 100755
--- a/src/geneFinder/BWA_Call.java
+++ b/src/geneFinder/BWA_Call.java
@@ -29,7 +29,7 @@ public class BWA_Call {
// more than one read file, so concatenate them to one big file
- rnaFile = new File(GeneFinder.pathOut+"resultsRun/mergedReadFiles.fastq");
+ rnaFile = new File(GeneFinder.pathOut+"mergedReadFiles.fastq");
callConcatenateReadFiles(); // perform the concatenation
}else{
@@ -63,24 +63,24 @@ public class BWA_Call {
System.out.print("Indexing.... "); // first step
String firstExe = "python " + GeneFinder.pathToHelpFiles+"bwa_index.py " + refFile + " " + nameIndexTool; // first index reference file
- Giira.callAndHandleOutput(firstExe);
+ Giira.callAndHandleOutput(firstExe,true);
System.out.println("\nPerform alignment.... ");
if(!GeneFinder.useBWAsw){
- String saiOut = GeneFinder.pathOut+"resultsRun/aln_sa.sai";
+ String saiOut = GeneFinder.pathOut+"aln_sa.sai";
String secondExe = "python "+GeneFinder.pathToHelpFiles+"bwa_aln.py " + optionString_aln + " " + refFile + " " + rnaFile + " " + saiOut; // perform alignment
- Giira.callAndHandleOutput(secondExe);
+ Giira.callAndHandleOutput(secondExe,true);
System.out.println("\nWrite to SAM format....");
String thirdExe = "python "+GeneFinder.pathToHelpFiles+"bwa_samse.py " + optionString_samse + " " + refFile + " " + saiOut + " " + rnaFile + " " + nameOutSamFile; //output: sam file
- Giira.callAndHandleOutput(thirdExe);
+ Giira.callAndHandleOutput(thirdExe,true);
}else{
String secondExe = "python "+GeneFinder.pathToHelpFiles+"bwa_sw.py " + optionString_samse + " " + refFile + " " + rnaFile + " " + nameOutSamFile; //output: sam file
- Giira.callAndHandleOutput(secondExe);
+ Giira.callAndHandleOutput(secondExe,true);
}
System.out.println("\nDone.");
@@ -110,7 +110,7 @@ public class BWA_Call {
}
try { // call python helper script
- String exe = "python "+GeneFinder.pathToHelpFiles+"callCat_RnaFile.py " + allRnaNames + " " + GeneFinder.pathOut+"resultsRun/mergedReadFiles.fastq";
+ String exe = "python "+GeneFinder.pathToHelpFiles+"callCat_RnaFile.py " + allRnaNames + " " + GeneFinder.pathOut+"mergedReadFiles.fastq";
firstExe = prepRef.exec(exe);
firstExe.waitFor();
} catch (IOException e) {
diff --git a/src/geneFinder/CalculateScores.java b/src/geneFinder/CalculateScores.java
index b71c1dd..ab51f13 100755
--- a/src/geneFinder/CalculateScores.java
+++ b/src/geneFinder/CalculateScores.java
@@ -77,7 +77,11 @@ public class CalculateScores {
cluster.exonsOfGene.clear();
cluster.exonLength = 0;
FindExonsOfGene.findExonsForGene(cluster); // TODO: also check if exons are there!
- OptimizeAmbis.sumUpExonLengths(cluster, cluster.exonsOfGene);
+ if(GeneFinder.useCPLEX){
+ OptimizeAmbis.sumUpExonLengths(cluster, cluster.exonsOfGene);
+ }else{
+ OptimizeAmbis_GLPK.sumUpExonLengths(cluster, cluster.exonsOfGene);
+ }
exonLength = cluster.exonLength;
@@ -161,13 +165,13 @@ public class CalculateScores {
}
}
- WriteOutput.writeToOtherFile(GeneFinder.pathOut+"resultsRun/reassignedReads.sam",(String)minInfo[1]);
+ WriteOutput.writeToOtherFile(GeneFinder.pathOut+"reassignedReads.sam",(String)minInfo[1]);
}else if(rna.contigsMappedOn.size() == 1){
String rnaInfo = "";
Object[] info = rna.contigsMappedOn.get(0);
rnaInfo += rna.rnaID + "\t0\t"+ contigName + "\t" + ((Integer)info[1]) + "\t0\t" + ((String)info[2]) + "\t*\t*\t*\t*\t*\tNH:i:1\n";
- WriteOutput.writeToOtherFile(GeneFinder.pathOut+"resultsRun/reassignedReads.sam",rnaInfo);
+ WriteOutput.writeToOtherFile(GeneFinder.pathOut+"reassignedReads.sam",rnaInfo);
}
}
}
diff --git a/src/geneFinder/CigarParser.java b/src/geneFinder/CigarParser.java
new file mode 100755
index 0000000..5120456
--- /dev/null
+++ b/src/geneFinder/CigarParser.java
@@ -0,0 +1,177 @@
+package geneFinder;
+
+import java.util.Vector;
+
+/**
+ * parse cigar, MD and sequence to extract all indels and mismatches
+ * @author zickmannf
+ *
+ */
+
+public class CigarParser {
+
+ public static void main(String[] args){ // main is only for debugging
+ String cigar = "3M2I1M1D4M";
+ String mdTag = "4^A2C1";
+ String rnaSeq = "AAATTATTGT";
+
+ Object[] returnArr = extractAllAlignDiffs(cigar, mdTag, rnaSeq);
+ }
+
+ /*
+ * extract all alignment differences
+ * note: positions are reference-based, insert bases and mismatch bases are read-based (necessary for transcript reconstruction)
+ * note: no clipped alignments are supported
+ */
+
+ public static Object[] extractAllAlignDiffs(String cigar, String mdTag, String readSeq){
+
+ Vector<int[]> inserts = new Vector<int[]>();
+ Vector<int[]> dels = new Vector<int[]>();
+ Vector<int[]> mms = new Vector<int[]>();
+
+ int[] refToReadPos = new int[readSeq.length()*2];
+ for(int pos = 0;pos<refToReadPos.length;++pos){
+ refToReadPos[pos] = pos;
+ }
+
+ /////// first: insertions ///////
+
+ if(cigar.contains("I")){
+
+ int globalPos = 0; // in case of more than one intron
+ String[] cigarArr_I = cigar.split("I");
+
+ for(int pos = 0; pos<(cigarArr_I.length-1);++pos){
+ String i_part = cigarArr_I[pos];
+ String[] iPart_all = i_part.split("[DIMXN]");
+
+ int insertSize = Integer.parseInt(iPart_all[iPart_all.length - 1]);
+
+ int pos_local = 0;
+ String[] iPart_all2 = i_part.split("[DIMX]");
+
+ for(int pos2 = 0;pos2 < iPart_all2.length-1;++pos2){
+ String partSmall = iPart_all2[pos2];
+ if(!partSmall.contains("N")){
+ pos_local = pos_local + Integer.parseInt(partSmall);
+ }else{
+ String[] partSmall_n = partSmall.split("N"); // do not count N positions
+ pos_local = pos_local + Integer.parseInt(partSmall_n[partSmall_n.length - 1]);
+ }
+ }
+
+ globalPos = globalPos + pos_local;
+
+ int[] insertTemp = new int[2+insertSize]; // 0=position on ref at that insert is started; 1=insertSize,2etc=base codes for inserted bases
+ insertTemp[0] = globalPos;
+ insertTemp[1] = insertSize;
+
+ for(int posMap = globalPos;posMap < refToReadPos.length;++posMap){
+ refToReadPos[posMap] = refToReadPos[posMap] + insertSize;
+ }
+
+ inserts.add(insertTemp);
+ }
+ }
+
+ /////// second: deletions ///////
+
+ if(cigar.contains("D")){
+
+ int globalPos = 0;
+ String[] cigarArr_D = cigar.split("D");
+ for(int pos = 0; pos<(cigarArr_D.length-1);++pos){
+ String d_part = cigarArr_D[pos];
+ String[] dPart_all = d_part.split("[DIMXN]");
+
+ int delSize = Integer.parseInt(dPart_all[dPart_all.length - 1]);
+
+ int pos_local = 0;
+ String[] dPart_all2 = d_part.split("[DMX]");
+
+ for(int pos2 = 0;pos2 < dPart_all2.length-1;++pos2){
+ String partSmall = dPart_all2[pos2];
+ if(!partSmall.contains("N") && !partSmall.contains("I")){
+ pos_local = pos_local + Integer.parseInt(partSmall);
+ }else{
+ String[] partSmall_split = partSmall.split("[NI]"); // do not count N or I positions
+ pos_local = pos_local + Integer.parseInt(partSmall_split[partSmall_split.length - 1]);
+ }
+ }
+
+ globalPos = globalPos + pos_local;
+
+ int[] delTemp = new int[2]; // 0=position on ref at that deletion is started; 1=deletionSize
+ delTemp[0] = globalPos;
+ delTemp[1] = delSize;
+
+ /*for(int posMap = globalPos;posMap < Math.min(refToReadPos.length,globalPos + delSize);++posMap){
+ refToReadPos[posMap] = -1;
+ }*/
+
+ for(int posMap = globalPos + delSize;posMap < refToReadPos.length;++posMap){
+ refToReadPos[posMap] = refToReadPos[posMap] - delSize;
+ }
+
+ dels.add(delTemp);
+ globalPos = globalPos + delSize;
+ }
+ }
+
+ /////// third: complete inserts ///////
+
+ for(int[] insertTemp : inserts){
+ for(int pos = 0;pos<insertTemp[1];++pos){
+ String base = readSeq.substring((refToReadPos[insertTemp[0]+pos]-insertTemp[1]),(refToReadPos[insertTemp[0]+pos]-insertTemp[1]) + 1);
+ insertTemp[2+pos] = SamParser.returnBaseCode(base);
+ }
+ }
+
+ /////// fourth: extract mismatches ///////
+
+ String[] mdParts = mdTag.split("");
+
+ int posOnRef = 0;
+
+ boolean inDeletion = false;
+ StringBuffer numString = new StringBuffer("");
+
+ for(int posArr = 1; posArr<mdParts.length;++posArr){
+ String mdChar = mdParts[posArr];
+ if(mdChar.contains("^")){
+ inDeletion = true;
+ if(numString.length() != 0){
+ posOnRef = posOnRef + Integer.parseInt(numString.toString()); // +1 for current position
+ }
+ numString = new StringBuffer("");
+ }else if(mdChar.contains("A") || mdChar.contains("C") || mdChar.contains("G") || mdChar.contains("T") || mdChar.contains("N")){
+ if(!inDeletion){
+ // mismatch
+ //posOnRef++;
+ if(numString.length() != 0){
+ posOnRef = posOnRef + Integer.parseInt(numString.toString()) + 1; // +1 for current position
+ }else{
+ posOnRef++;
+ }
+ numString = new StringBuffer("");
+ String base = readSeq.substring(refToReadPos[posOnRef-1],refToReadPos[posOnRef-1]+1);
+ int[] tempMM = new int[2];
+ tempMM[0] = posOnRef-1;
+ tempMM[1] = SamParser.returnBaseCode(base);
+ mms.add(tempMM);
+ }else{
+ posOnRef++;
+ }
+ }else{
+ inDeletion = false;
+ numString.append(mdChar);
+ //posOnRef = posOnRef + Integer.parseInt(mdChar);
+ }
+ }
+
+ return new Object[] {inserts,dels,mms};
+
+ }
+
+}
diff --git a/src/geneFinder/CleanAfterAmbiOpti.java b/src/geneFinder/CleanAfterAmbiOpti.java
index 639babc..0ff4e56 100755
--- a/src/geneFinder/CleanAfterAmbiOpti.java
+++ b/src/geneFinder/CleanAfterAmbiOpti.java
@@ -44,7 +44,7 @@ public class CleanAfterAmbiOpti {
int counterVars = 0;
try{
- BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"resultsRun/solutionCPLEX_it" + GeneFinder.iteration + ".sol"));
+ BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"solutionCPLEX_it" + GeneFinder.iteration + ".sol"));
System.out.println("Parsing solution of cplex and clean ambiguous reads... ");
System.out.print("Processed: ");
@@ -148,7 +148,7 @@ public class CleanAfterAmbiOpti {
int processedNum = 0; // for progress reporting
try{
- BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"resultsRun/solutionGLPK_out_it" + GeneFinder.iteration + ".out"));
+ BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"solutionGLPK_out_it" + GeneFinder.iteration + ".out"));
System.out.println("Parsing solution of glpk and clean ambiguous reads... ");
System.out.print("Processed: ");
@@ -200,6 +200,8 @@ public class CleanAfterAmbiOpti {
r.gc();
r.gc(); // to keep the memory requirements down as much as possible
}
+ }else if(line.contains("x__") && (line.contains("__f"))){
+ line = br.readLine();
}
line = br.readLine();
diff --git a/src/geneFinder/GeneFinder.java b/src/geneFinder/GeneFinder.java
index bf48ac4..5dba17a 100755
--- a/src/geneFinder/GeneFinder.java
+++ b/src/geneFinder/GeneFinder.java
@@ -106,8 +106,8 @@ public class GeneFinder {
inputFileAlign = new File(haveSam);
}else{
- inputFileAlign = new File(pathOut+"resultsRun/accepted_hits.sam");
- GeneFinder.haveSam = pathOut+"resultsRun/accepted_hits.sam";
+ inputFileAlign = new File(pathOut+"accepted_hits.sam");
+ GeneFinder.haveSam = pathOut+"accepted_hits.sam";
// first test if not already created in earlier rounds
@@ -128,8 +128,8 @@ public class GeneFinder {
inputFileAlign = new File(haveSam);
}else{
- inputFileAlign = new File(pathOut+"resultsRun/aln_BWA.sam");
- GeneFinder.haveSam = pathOut+"resultsRun/aln_BWA.sam";
+ inputFileAlign = new File(pathOut+"aln_BWA.sam");
+ GeneFinder.haveSam = pathOut+"aln_BWA.sam";
// first test if not already created in earlier rounds
@@ -140,7 +140,7 @@ public class GeneFinder {
}catch (IOException e) {
BWA_Call bwaStart = new BWA_Call();
- bwaStart.callBWA(new File(nameRef+".fasta"), pathOut+"resultsRun/aln_BWA.sam");
+ bwaStart.callBWA(new File(nameRef+".fasta"), pathOut+"aln_BWA.sam");
}
@@ -197,8 +197,12 @@ public class GeneFinder {
}
if(!GeneFinder.noAmbiOpti){
- OptimizeAmbis.maxFlowLP();
-
+ if(useCPLEX){
+ OptimizeAmbis.maxFlowLP();
+ }else{
+ OptimizeAmbis_GLPK.maxFlowLP();
+ }
+
}
double[] minMax = CalculateScores.assignGeneScores(false);
diff --git a/src/geneFinder/Giira.java b/src/geneFinder/Giira.java
index c381279..4f264b5 100755
--- a/src/geneFinder/Giira.java
+++ b/src/geneFinder/Giira.java
@@ -77,7 +77,7 @@ public class Giira {
String sysCall_1 = "java -Xmx"+ mem[0] +"m -cp " + classPath + ":" + decodedPath + " geneFinder.GeneFinder " + argString + "-iter " + itNum + " -solverOn n -splitRunAndOpti n -scripts " + scriptPath + "scripts/";
System.out.println("Call part 1:");
- callAndHandleOutput(sysCall_1);
+ callAndHandleOutput(sysCall_1,true);
// optimization
@@ -89,13 +89,13 @@ public class Giira {
}
System.out.println("Optimization: " + sysCall_Opti);
- callAndHandleOutput(sysCall_Opti);
+ callAndHandleOutput(sysCall_Opti,true);
// run 2
String sysCall_2 = "java -Xmx"+ mem[0] +"m -cp " + classPath + ":" + decodedPath + " geneFinder.GeneFinder " + argString + "-iter " + itNum + " -solverOn n -splitRunAndOpti n -secondPart y -scripts " + scriptPath + "scripts/";
System.out.println("Call part 2:");
- callAndHandleOutput(sysCall_2);
+ callAndHandleOutput(sysCall_2,true);
}else{
@@ -108,7 +108,7 @@ public class Giira {
}
System.out.println("Call: ");
- callAndHandleOutput(sysCall);
+ callAndHandleOutput(sysCall,true);
}
@@ -203,7 +203,7 @@ public class Giira {
* call GIIRA and handle the output streams
*/
- public static void callAndHandleOutput(String sysCall){
+ public static void callAndHandleOutput(String sysCall,boolean printAll){
try{
@@ -219,7 +219,9 @@ public class Giira {
String lineExe = "";
while((lineExe = bExe.readLine()) != null){
- System.out.println(lineExe);
+ if(printAll){
+ System.out.println(lineExe);
+ }
}
while((lineExe = bErr.readLine()) != null){
System.out.println(lineExe);
diff --git a/src/geneFinder/Operon_LP.java b/src/geneFinder/Operon_LP.java
index bb668c9..b752ad4 100755
--- a/src/geneFinder/Operon_LP.java
+++ b/src/geneFinder/Operon_LP.java
@@ -51,7 +51,7 @@ public class Operon_LP {
try {
- BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"resultsRun/input_operonLP.lp")); // name of lp file
+ BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"input_operonLP.lp")); // name of lp file
bw.write("Maximize\n");
@@ -260,12 +260,12 @@ public class Operon_LP {
try {
IloCplex cplex = new IloCplex();
cplex.setOut(null);
- cplex.importModel(GeneFinder.pathOut+"resultsRun/input_operonLP.lp");
+ cplex.importModel(GeneFinder.pathOut+"input_operonLP.lp");
cplex.setParam(IloCplex.IntParam.Threads,GeneFinder.numberThreads);
cplex.solve();
- cplex.writeSolution(GeneFinder.pathOut+"resultsRun/solution_operonLP.sol");
+ cplex.writeSolution(GeneFinder.pathOut+"solution_operonLP.sol");
} catch (IloException e2) {
@@ -284,7 +284,7 @@ public class Operon_LP {
double objective = -Double.MAX_VALUE;
try{
- BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"resultsRun/solution_operonLP.sol"));
+ BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"solution_operonLP.sol"));
String line = "";
diff --git a/src/geneFinder/OptimizeAmbis.java b/src/geneFinder/OptimizeAmbis.java
index 05c1e16..b922969 100755
--- a/src/geneFinder/OptimizeAmbis.java
+++ b/src/geneFinder/OptimizeAmbis.java
@@ -53,7 +53,7 @@ public class OptimizeAmbis {
try {
- BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"resultsRun/input_it" + GeneFinder.iteration + ".lp")); // name of lp file
+ BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"input_it" + GeneFinder.iteration + ".lp")); // name of lp file
bw.write("Maximize\n");
@@ -381,7 +381,7 @@ public class OptimizeAmbis {
System.out.println("Start cplex solve...");
IloCplex cplex = new IloCplex();
- cplex.importModel(GeneFinder.pathOut+"resultsRun/input_it" + GeneFinder.iteration + ".lp");
+ cplex.importModel(GeneFinder.pathOut+"input_it" + GeneFinder.iteration + ".lp");
cplex.setParam(IloCplex.IntParam.RootAlg,IloCplex.Algorithm.Network);
cplex.setParam(IloCplex.DoubleParam.EpGap,0.01);
@@ -393,7 +393,7 @@ public class OptimizeAmbis {
cplex.setParam(IloCplex.DoubleParam.WorkMem,GeneFinder.memForCplex);
}
- cplex.setParam(IloCplex.StringParam.WorkDir,GeneFinder.pathOut+"resultsRun/");
+ cplex.setParam(IloCplex.StringParam.WorkDir,GeneFinder.pathOut);
System.out.println("Directory: " + cplex.getParam(IloCplex.StringParam.WorkDir));
cplex.setParam(IloCplex.DoubleParam.PolishTime,1000.0);
@@ -411,7 +411,7 @@ public class OptimizeAmbis {
cplex.solve();
- cplex.writeSolution(GeneFinder.pathOut+"resultsRun/solutionCPLEX_it" + GeneFinder.iteration + ".sol");
+ cplex.writeSolution(GeneFinder.pathOut+"solutionCPLEX_it" + GeneFinder.iteration + ".sol");
} catch (IloException e2) {
@@ -430,7 +430,7 @@ public class OptimizeAmbis {
System.out.println("Start glpk solve...");
Runtime rt = Runtime.getRuntime();
- Process firstExe = rt.exec("glpsol --lp " + GeneFinder.pathOut + "resultsRun/input_it" + GeneFinder.iteration + ".lp --output " + GeneFinder.pathOut + "resultsRun/solutionGLPK_out_it" + GeneFinder.iteration + ".out");
+ Process firstExe = rt.exec("glpsol --lp " + GeneFinder.pathOut + "input_it" + GeneFinder.iteration + ".lp --output " + GeneFinder.pathOut + "solutionGLPK_out_it" + GeneFinder.iteration + ".out");
firstExe.waitFor();
} catch (InterruptedException e) {
diff --git a/src/geneFinder/OptimizeAmbis_GLPK.java b/src/geneFinder/OptimizeAmbis_GLPK.java
new file mode 100755
index 0000000..d049fc8
--- /dev/null
+++ b/src/geneFinder/OptimizeAmbis_GLPK.java
@@ -0,0 +1,427 @@
+package geneFinder;
+
+import java.io.*;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Vector;
+
+import types.*;
+
+/**
+* manage the assignment of ambiguous hits to their final position, using a maximum flow formulation
+* current methods: GLPK solver, CPLEX linear program
+* Copyright (c) 2013,
+* Franziska Zickmann,
+* ZickmannF at rki.de, Robert Koch-Institute, Berlin, Germany
+* Distributed under the GNU Lesser General Public License, version 3.0
+*
+*/
+
+
+ public class OptimizeAmbis_GLPK {
+
+
+ public static Map<String,Object[]> multiRnas = new HashMap<String,Object[]>(); // store all ambiguous rnas
+ public static Map<Gene,Vector<String>> multiRnas_eachCluster = new HashMap<Gene,Vector<String>>(); // stores all variables that belong to one gene
+ public static Map<Gene,Object[]> clustNeighbors = new HashMap<Gene,Object[]>(); // first entry: gene vector, second entry: multiRnas, note: if too memory intensive, sacrifice running time and use associated rnas
+
+
+ /*
+ * set up the maximum flow formulation
+ */
+
+ public static void maxFlowLP(){
+
+ multiRnas.clear();
+ multiRnas_eachCluster.clear();
+ clustNeighbors.clear();
+
+ long timeBef = System.currentTimeMillis();
+
+ // the following 3 variables are required for output messages
+ int numMulti = 0; // count all ambiguous reads
+ int numShared = 0; // count all reads of twin candidate genes
+ int processCounter = 0; // for progress report
+
+ int fVar_counter = 0; // necessary to parse cplex solution
+
+ StringBuffer binaryVars = new StringBuffer(""); // will include all variables
+
+ boolean noMultis = false;
+
+ try {
+
+ BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"input_it" + GeneFinder.iteration + ".lp")); // name of lp file
+
+ bw.write("Maximize\n");
+
+ StringBuffer targetFunction = new StringBuffer(""); // will include the target function
+
+ for(String contigName : GeneFinder.mappedContigs.keySet()){
+
+ Contig contig = GeneFinder.mappedContigs.get(contigName);
+
+ for(int posGene = contig.allGenes.size() -1; posGene >= 0; posGene--){
+
+ Gene cluster = contig.allGenes.get(posGene);
+
+ boolean hasMultis = false; // hasMultis and hasTwin indicate if we have to deal with another gene candidate
+ boolean hasTwin = false;
+
+ cluster.totalCov = 0.0;
+
+ if(!FindExonsOfGene.findExonsForGene(cluster)){ // define the exons as they are at the moment, necessary for exon length calculation
+ contig.allGenes.remove(cluster);
+ }else{
+ sumUpExonLengths(cluster, cluster.exonsOfGene);
+
+ if(cluster.twinNode != null){
+ // we have to resolve the twin as well
+
+ cluster.twinNode.totalCov = 0.0;
+ if(!FindExonsOfGene.findExonsForGene(cluster.twinNode)){
+ cluster.twinNode = null;
+ }else{
+ sumUpExonLengths(cluster.twinNode, cluster.twinNode.exonsOfGene);
+
+ hasTwin = true;
+ }
+ }
+
+
+ int multiNumNode = 0; // count the ambiguous reads of each candidate
+
+ for(String rnaKey : cluster.idTOassociatedRnas.keySet()){
+
+ Rna rna = ((Rna)cluster.idTOassociatedRnas.get(rnaKey)[0]);
+
+ String varName = "x__"+rna.rnaID;
+
+ if(rna.isMulti == 1 || (rna.isSharedBy.contains(cluster.geneID))){
+
+ if(rna.isMulti == 1){ // to determine the right unique coverage
+
+ multiNumNode++;
+ numMulti++;
+
+ cluster.totalCov += (double) (((1.0)/((double) rna.hitNum)) * (double)GeneFinder.readLength); // ambiguous reads have fewer weight, according to the number of their hits
+
+ }else{
+ cluster.totalCov += (double) GeneFinder.readLength;
+ }
+
+ if(rna.assignedNum > 1){
+ hasMultis = true;
+
+ if(((multiNumNode % 50) == 0) && (targetFunction.length() > 10)){
+ bw.write(targetFunction.substring(2));
+ targetFunction = new StringBuffer(" ");
+ }else{
+ targetFunction.append(" + x__"+rna.rnaID + "__" + cluster.geneID + "__f" + "\n"); // include the variable for each readTOgene connection in the target fkt.
+ }
+
+ fVar_counter++;
+ initialize_MaxFlow(cluster,varName); // perform the necessary updates of the maps
+ }
+
+ }else{
+ cluster.totalCov += (double)GeneFinder.readLength;
+ }
+
+ }
+
+ cluster.totalCov = ((double) cluster.totalCov)/((double) cluster.exonLength);
+
+ if(hasMultis){
+
+ cluster.numOfMultis = multiNumNode;
+ cluster.uniqueCov = (((double)((cluster.idTOassociatedRnas.keySet().size() - multiNumNode) * GeneFinder.readLength))/((double)cluster.exonLength));
+
+ }
+
+ if(hasTwin){ // now perform the same stuff for the twin candidate
+
+ for(String rnaKey : cluster.twinNode.idTOassociatedRnas.keySet()){
+
+ Rna rna = ((Rna)cluster.twinNode.idTOassociatedRnas.get(rnaKey)[0]);
+
+ String varName = "x__"+rna.rnaID;
+
+ if(rna.isMulti == 1 || rna.isSharedBy.contains(cluster.twinNode.geneID)){
+
+ if(!rna.isSharedBy.contains(cluster.twinNode.geneID)){
+ // this is non-shared multiple read, so count it
+ //cluster.twinNode.numOfMultis++;
+ numMulti++;
+ }
+ if(rna.isMulti == 1){
+ cluster.twinNode.numOfMultis++;
+
+ cluster.twinNode.totalCov += (double) (((1.0)/((double) rna.hitNum)) * (double)GeneFinder.readLength);
+ }
+
+ if(rna.isSharedBy.contains(cluster.twinNode.geneID)){
+ numShared++; // at this point, update the shared rna number
+ }
+
+ if(rna.assignedNum > 1){
+
+ if(((cluster.twinNode.numOfMultis % 50) == 0) && (targetFunction.length() > 10)){
+ bw.write(targetFunction.substring(2));
+ targetFunction = new StringBuffer(" ");
+ }else{
+ targetFunction.append(" + x__"+rna.rnaID + "__" + cluster.twinNode.geneID + "__f" + "\n");
+ }
+
+ fVar_counter++;
+ initialize_MaxFlow(cluster.twinNode,varName);
+ }
+
+
+ }else{
+ cluster.twinNode.totalCov += (double)GeneFinder.readLength;
+ }
+ }
+
+ cluster.twinNode.uniqueCov = (((double)((cluster.twinNode.idTOassociatedRnas.keySet().size() - cluster.twinNode.numOfMultis) * GeneFinder.readLength))/((double)cluster.twinNode.exonLength));
+ cluster.twinNode.totalCov = ((double) cluster.twinNode.totalCov)/((double) cluster.twinNode.exonLength);
+ }
+
+ processCounter++;
+ if((processCounter % 2000) == 0){
+
+ if((processCounter % 10000) == 0){
+ long timeAfterTmp = System.currentTimeMillis();
+ if(!GeneFinder.secondPart){
+ System.out.println("Processed " + processCounter + " candidates in " + (double) (timeAfterTmp-timeBef)/1000.0 +"s.");
+ }
+ }
+
+ if(!(numMulti == 0 && numShared == 0 && multiRnas.keySet().size() == 0)){
+ if((targetFunction.length() > 10)){
+ bw.write(targetFunction.substring(2));
+ targetFunction = new StringBuffer(" ");
+ }
+ }
+ }else{
+ if((targetFunction.length() > 10) && (fVar_counter % 1000) == 0){
+ bw.write(targetFunction.substring(2));
+ targetFunction = new StringBuffer(" ");
+ }
+ }
+ }
+ }
+ }
+
+ // log messages:
+
+ if(!GeneFinder.secondPart){
+ System.out.println("Processed " + processCounter + " candidates.");
+ System.out.println("Number of multiple rnas: " + numMulti);
+ System.out.println("Number of shared rnas: " + numShared);
+ System.out.println("Number constraints: " + multiRnas.keySet().size());
+ WriteOutput.writeToLogFile("Processed " + processCounter + " candidates. \n" + "Number of multiple rnas: " + numMulti + " \nNumber of shared rnas: " + numShared + " \nNumber constraints: " + multiRnas.keySet().size() + "\n");
+
+ long timeAfter1 = System.currentTimeMillis();
+ System.out.println("Time needed for max flow initialization: "+(double) (timeAfter1-timeBef)/1000.0 +"s.");
+ WriteOutput.writeToLogFile("Time needed for max flow initialization: "+(double) (timeAfter1-timeBef)/1000.0 +"s.\n");
+ }
+
+ if(numMulti == 0 && numShared == 0 && multiRnas.keySet().size() == 0){
+ System.out.println("No multiple rnas, no optimization necessary!");
+ WriteOutput.writeToLogFile("No multiple rnas, no optimization necessary! \n");
+ noMultis = true;
+ GeneFinder.noAmbiOpti = true;
+ }else{
+ // write out the lp
+ if(targetFunction.length() > 3){
+ bw.write(targetFunction.substring(2));
+ targetFunction = new StringBuffer("");
+ }
+
+ bw.write("Subject To \n");
+
+ // now constraints
+
+ for(String varName : multiRnas.keySet()){
+
+ StringBuffer constraint = new StringBuffer(""); // three different types of necessary constraints
+ StringBuffer constraintVar_f = new StringBuffer("");
+ StringBuffer constraintVar_Diff = new StringBuffer("");
+
+ for(String var : (Vector<String>) multiRnas.get(varName)[0]){ // set up the different constraints
+
+ constraint.append(" + " + var + "\n");
+ binaryVars.append(" " + var + "\n");
+ constraintVar_f.append(var+"__f >= 0 \n");
+ //constraintVar_Diff.append(var+"__f - " + GeneFinder.readLength + " " + var + " <= 0 \n");
+ constraintVar_Diff.append(var+"__f - " + var + " <= 0 \n");
+ }
+
+ constraint.append(" = 1");
+
+ bw.write(constraintVar_f.toString() + constraintVar_Diff.toString() + constraint.substring(2) + "\n");
+
+ }
+
+ // now add the weight-constraints
+
+ for(Gene clust : clustNeighbors.keySet()){
+
+ double covSum = 0.0;
+ double sum_exonL = clust.exonLength;
+
+ StringBuffer constraintF = new StringBuffer("");
+
+ for(Gene neighbor : (Vector<Gene>) clustNeighbors.get(clust)[0]){
+ covSum += neighbor.uniqueCov;// + ((1.0/100000.0)); // the ((1.0/100000.0)) ensures that also candidates without unique hits can be chosen
+ sum_exonL = sum_exonL + neighbor.exonLength;
+ }
+
+ for(String var : (Vector<String>) clustNeighbors.get(clust)[1]){
+ constraintF.append(" + " + var + "\n");
+ }
+
+ // 1) no additional read length factor
+ // 2) multiple rnas have a penalty according to their number of ambiguous hits, already included in totalCov
+ // 3) competing candidates have a direct influence on the weight due to covSum
+
+ double weight = 0;
+
+ covSum = Math.pow((covSum+1),2);
+ weight = ((clust.totalCov/covSum) * ((double)clust.exonLength/(double)sum_exonL));
+
+ constraintF.append(" <= " + Math.pow((weight+1),2)); // to the power of two leads to a more "the winner takes it all fashion"
+
+ bw.write(constraintF.substring(2) + "\n");
+
+ }
+
+ //now declare normal variables as binary
+
+ bw.write("Binary \n");
+
+ int posBin = 0;
+
+ for(posBin = 0;posBin<binaryVars.length();){
+ int posBin2 = posBin + 500000;
+ if(posBin2 < binaryVars.length()){
+ bw.write(binaryVars.substring(posBin,posBin2)); // extracted: posBin - (posBin2-1)
+ }
+ posBin = posBin + 500000;
+ }
+
+ if((posBin-500000) >= 0){
+ bw.write(binaryVars.substring(posBin-500000));
+ }
+
+ bw.write("END");
+ }
+
+ bw.close();
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ long timeAfter = System.currentTimeMillis();
+ if(!GeneFinder.secondPart){
+ System.out.println("Time needed to set up lp file: "+ (double) (timeAfter-timeBef)/1000.0 +"s.");
+ WriteOutput.writeToLogFile("Time needed to set up lp file: "+ (double) (timeAfter-timeBef)/1000.0 +"s.\n");
+ System.out.println();
+ }
+
+ if(!noMultis){ // now solve the lp
+
+ long timeBefSolve = System.currentTimeMillis();
+
+ // use glpk
+
+ if(GeneFinder.optiSolve){
+ solveMaxFlowWithGLPK();
+ long timeAfterSolve = System.currentTimeMillis();
+ System.out.println("Time needed to solve max flow: "+ (double) (timeAfterSolve-timeBefSolve)/1000.0 +"s.");
+ WriteOutput.writeToLogFile("Time needed to solve max flow: "+ (double) (timeAfterSolve-timeBefSolve)/1000.0 +"s.\n");
+ }
+
+ // parse solution file
+ CleanAfterAmbiOpti.parse_solution_and_clean_GLPK(multiRnas);
+
+ }
+
+ }
+
+
+ /*
+ * solves MaxFlow with the glpsol of the GLPK optimizer
+ */
+
+ public static void solveMaxFlowWithGLPK(){
+
+ System.out.println("Start glpk solve...");
+
+ String firstExe = "python " + GeneFinder.pathToHelpFiles+"lpCall.py " + GeneFinder.pathOut + "input_it" + GeneFinder.iteration + ".lp " + GeneFinder.pathOut + "solutionGLPK_out_it" + GeneFinder.iteration + ".out";
+ Giira.callAndHandleOutput(firstExe,true);
+
+ }
+
+
+ /*
+ * do the initialization for the max flow optimization
+ * fill the required maps
+ */
+
+ public static void initialize_MaxFlow(Gene cluster, String varName){
+
+ if(!clustNeighbors.keySet().contains(cluster)){
+ Vector<Gene> geneTmp = new Vector<Gene>(); // candidate is not contained so far, so add to map
+ Vector<String> varTmp = new Vector<String>();
+ varTmp.add(varName+ "__" + cluster.geneID + "__f");
+ geneTmp.add(cluster);
+ Object[] geneObj = {geneTmp,varTmp};
+ clustNeighbors.put(cluster,geneObj);
+ }else{
+ ( (Vector<String>) clustNeighbors.get(cluster)[1]).add(varName+ "__" + cluster.geneID + "__f"); // add this variable to reads of the candidate
+ }
+
+
+ if(multiRnas.keySet().contains(varName)){ // add this candidate to the list of genes connected to this read
+ ((Vector<String>) multiRnas.get(varName)[0]).add(varName+"__"+cluster.geneID);
+
+ ((Vector<Gene>) multiRnas.get(varName)[1]).add(cluster);
+
+ Vector<Gene> clustGenes = (Vector<Gene>) clustNeighbors.get(cluster)[0];
+ for(Gene clust : (Vector<Gene>) multiRnas.get(varName)[1]){ // add this candidate to the list of competing genes for all those genes already connected to this read
+ if(!clustGenes.contains(clust)){
+ ( (Vector<Gene>) clustNeighbors.get(cluster)[0]).add(clust);
+ }
+
+ if(!((Vector<Gene>) clustNeighbors.get(clust)[0]).contains(cluster)){
+ ((Vector<Gene>) clustNeighbors.get(clust)[0]).add(cluster);
+ }
+ }
+
+ }else{
+ Vector<String> vecTmp = new Vector<String>(); // first time ambiguous read was looked at, so add to map
+ Vector<Gene> vecGeneTmp = new Vector<Gene>();
+ vecGeneTmp.add(cluster);
+ vecTmp.add(varName+"__"+cluster.geneID);
+ Object[] objTmp = {vecTmp,vecGeneTmp};
+ multiRnas.put(varName,objTmp);
+ }
+ }
+
+
+ /*
+ * go through all exons and sum up their lengths do determine the total exon length
+ */
+
+ public static void sumUpExonLengths(Gene gene, Vector<int[]> exons){
+
+ for(int[] exon :exons){
+ gene.exonLength += (exon[1] - exon[0] + 1);
+ }
+ }
+
+ }
+
diff --git a/src/geneFinder/PrepareMapping_GF.java b/src/geneFinder/PrepareMapping_GF.java
index 03b0c8c..63143ff 100755
--- a/src/geneFinder/PrepareMapping_GF.java
+++ b/src/geneFinder/PrepareMapping_GF.java
@@ -22,7 +22,7 @@ public class PrepareMapping_GF {
public String prepareRefFile_GF(){
- String nameRef = GeneFinder.pathOut+"resultsRun/concaRefFile";
+ String nameRef = GeneFinder.pathOut+"concaRefFile";
Runtime prepRef = Runtime.getRuntime();
Process firstExe;
@@ -50,7 +50,7 @@ public class PrepareMapping_GF {
e.printStackTrace();
}
- return (GeneFinder.pathOut+"resultsRun/concaRefFile");
+ return (GeneFinder.pathOut+"concaRefFile");
}
}
diff --git a/src/geneFinder/ProkaryoteExtraction.java b/src/geneFinder/ProkaryoteExtraction.java
index 77292a6..65c12a4 100755
--- a/src/geneFinder/ProkaryoteExtraction.java
+++ b/src/geneFinder/ProkaryoteExtraction.java
@@ -432,6 +432,12 @@ public class ProkaryoteExtraction {
Object[] returnArr = Prokaryote_Specials.define_OrfsInOperon(clustBef.sequence,clustBef);
clustBef.operonOrfs = (Vector<int[]>) returnArr[0]; // first entry is always 1-d array indicating whether normal gene sequence follows (1) or operon sequence (-1)
clustBef.operonDirectionIsForward = (Boolean) returnArr[1];
+ if(clustBef.operonDirectionIsForward){
+ clustBef.onRevStrand = false;
+ }else{
+ clustBef.onRevStrand = true;
+ }
+
}
}
@@ -454,6 +460,11 @@ public class ProkaryoteExtraction {
Object[] returnArr = Prokaryote_Specials.define_OrfsInOperon(clustBef.sequence, clustBef);
clustBef.operonOrfs = (Vector<int[]>) returnArr[0]; // first entry is always 1-d array indicating whether normal gene sequence follows (1) or operon sequence (-1)
clustBef.operonDirectionIsForward = (Boolean) returnArr[1];
+ if(clustBef.operonDirectionIsForward){
+ clustBef.onRevStrand = false;
+ }else{
+ clustBef.onRevStrand = true;
+ }
}
}
@@ -610,7 +621,7 @@ public class ProkaryoteExtraction {
cluster.geneID = id++;
cluster.onRevStrand = false;
cluster.startPos = startPos;
- cluster.stopPos = currentPos+basesToAdd-1; // note: without "-1" stopPos would be the position necessary for seq-extraction = actual stop + 1
+ cluster.stopPos = Math.min(currentPos+basesToAdd-1,contigSeq.length()-2); // note: without "-1" stopPos would be the position necessary for seq-extraction = actual stop + 1
cluster.coreSeq = (cluster.stopPos-cluster.startPos);
cluster.realDirectionNotKnown = true;
diff --git a/src/geneFinder/ReadInParameters_GeneFinder.java b/src/geneFinder/ReadInParameters_GeneFinder.java
index 70ae2dd..9313070 100755
--- a/src/geneFinder/ReadInParameters_GeneFinder.java
+++ b/src/geneFinder/ReadInParameters_GeneFinder.java
@@ -410,13 +410,19 @@ public class ReadInParameters_GeneFinder {
GeneFinder.inprogeaCall = false;
}
- Runtime rtAlign = Runtime.getRuntime();
+ GeneFinder.logFile = new File(GeneFinder.pathOut+"log_it" + GeneFinder.iteration + ".txt");
+ if(!GeneFinder.secondPart){
+ System.out.println(inputText);
+ WriteOutput.writeToLogFile(inputText);
+ }
+
+ /*Runtime rtAlign = Runtime.getRuntime();
Process firstExe;
try {
- String exe = "mkdir "+GeneFinder.pathOut+"resultsRun";
- GeneFinder.logFile = new File(GeneFinder.pathOut+"resultsRun/log_it" + GeneFinder.iteration + ".txt");
- firstExe = rtAlign.exec(exe);
- firstExe.waitFor();
+ //String exe = "mkdir "+GeneFinder.pathOut+"resultsRun";
+ GeneFinder.logFile = new File(GeneFinder.pathOut+"log_it" + GeneFinder.iteration + ".txt");
+ //firstExe = rtAlign.exec(exe);
+ //firstExe.waitFor();
if(!GeneFinder.secondPart){
System.out.println(inputText);
@@ -427,9 +433,7 @@ public class ReadInParameters_GeneFinder {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
- }
-
-
+ }*/
}
diff --git a/src/geneFinder/SamParser.java b/src/geneFinder/SamParser.java
index 7e8eda5..291befe 100755
--- a/src/geneFinder/SamParser.java
+++ b/src/geneFinder/SamParser.java
@@ -80,7 +80,7 @@ public class SamParser {
String lineReaSam = "";
if(GeneFinder.iteration == 2){
- br = new BufferedReader(new FileReader(new File(GeneFinder.pathOut+"resultsRun/reassignedReads_sorted.sam")));
+ br = new BufferedReader(new FileReader(new File(GeneFinder.pathOut+"reassignedReads_sorted.sam")));
while((lineReaSam = br.readLine()) != null){
String[] parts1 = lineReaSam.split(" ");
if(!parts1[0].startsWith("@") && parts1.length >=4){
@@ -494,7 +494,7 @@ public class SamParser {
}
}else{
if(GeneFinder.iteration == 1){
- WriteOutput.writeToOtherFile(GeneFinder.pathOut+"resultsRun/reassignedReads.sam",line+"\n");
+ WriteOutput.writeToOtherFile(GeneFinder.pathOut+"reassignedReads.sam",line+"\n");
}
}
}
@@ -530,13 +530,13 @@ public class SamParser {
System.out.println((multiCount) + " of them are ambiguous reads.");
System.out.println("The total number of hits is " + (totalHitCount) + ".");
System.out.println("Total number of multiple hits: " + multiTotalCount + ".");
- System.out.println("Time needed to parse SAM file: "+ (double) (timeAfter-timebef)/1000.0 +"s.");
+ System.out.println("Time required to parse SAM file: "+ (double) (timeAfter-timebef)/1000.0 +"s.");
GeneFinder.ambiProportion = (double) ((double)multiTotalCount/(double)totalHitCount);
System.out.println("Proportion of ambiguous reads: " + GeneFinder.ambiProportion);
System.out.println();
- WriteOutput.writeToLogFile("Done.\nTime needed to parse SAM file: "+ (double) (timeAfter-timebef)/1000.0 +"s.\n" + rnaCount + " rnas have been mapped to the reference. \n" + (multiCount) + " of them are ambiguous reads. \nThe total number of hits is " + (totalHitCount) + ".\nTotal number of multiple hits: " + multiTotalCount + ".\nProportion of ambiguous reads: " + GeneFinder.ambiProportion + ".\n\n");
+ WriteOutput.writeToLogFile("Done.\nTime required to parse SAM file: "+ (double) (timeAfter-timebef)/1000.0 +"s.\n" + rnaCount + " rnas have been mapped to the reference. \n" + (multiCount) + " of them are ambiguous reads. \nThe total number of hits is " + (totalHitCount) + ".\nTotal number of multiple hits: " + multiTotalCount + ".\nProportion of ambiguous reads: " + GeneFinder.ambiProportion + ".\n\n");
}
@@ -774,19 +774,19 @@ public class SamParser {
if(GeneFinder.haveSam != null){
nameSam = GeneFinder.haveSam;
}else{
- nameSam = GeneFinder.pathOut+"resultsRun/accepted_hits.sam";
+ nameSam = GeneFinder.pathOut+"accepted_hits.sam";
}
}else{
if(GeneFinder.haveSam != null){
nameSam = GeneFinder.haveSam;
}else{
- nameSam = GeneFinder.pathOut+"resultsRun/aln_BWA.sam";
+ nameSam = GeneFinder.pathOut+"aln_BWA.sam";
}
}
- String nameOutfile = GeneFinder.pathOut+"resultsRun/covMean.txt";
+ String nameOutfile = GeneFinder.pathOut+"covMean.txt";
try {
@@ -807,7 +807,7 @@ public class SamParser {
// no coverage file provided, so create a new one
String firstExe = "python " + GeneFinder.pathToHelpFiles+"getMeanCov.py " + nameSam + " " + nameOutfile;
- Giira.callAndHandleOutput(firstExe);
+ Giira.callAndHandleOutput(firstExe,false);
timeAfter = System.currentTimeMillis();
@@ -1139,7 +1139,7 @@ public class SamParser {
}else{
if(GeneFinder.iteration == 1){
- WriteOutput.writeToOtherFile(GeneFinder.pathOut+"resultsRun/reassignedReads.sam",line+"\n");
+ WriteOutput.writeToOtherFile(GeneFinder.pathOut+"reassignedReads.sam",line+"\n");
}
}
}
diff --git a/src/geneFinder/TopHat_Call.java b/src/geneFinder/TopHat_Call.java
index 91e7465..8f9b840 100755
--- a/src/geneFinder/TopHat_Call.java
+++ b/src/geneFinder/TopHat_Call.java
@@ -35,7 +35,7 @@ public class TopHat_Call {
System.out.print("Indexing with Bowtie.... ");
String firstExe = "bowtie2-build " + refFile + " " + nameRefFile;
- Giira.callAndHandleOutput(firstExe);
+ Giira.callAndHandleOutput(firstExe,true);
String optionString = ""; // get the options for the alignment
if(GeneFinder.settingMapper != null){
@@ -57,9 +57,8 @@ public class TopHat_Call {
}
// now call topHat, note that we do use the bam format as an intermediate format to ensure the right ordering of reads, final output is in sam format
- // output directory shall be resultsRun
- String out_dir = GeneFinder.pathOut+"resultsRun";
+ String out_dir = GeneFinder.pathOut;
String fileNames = new String();
@@ -78,19 +77,19 @@ public class TopHat_Call {
if(GeneFinder.rnaFilesWithNames.keySet().size() == 1){ // make the right call depending on how many read files are provided
secondExe = "tophat2 --no-sort-bam " + optionString + "-o " + out_dir + " " + nameRefFile + " " + rnaFile;
- Giira.callAndHandleOutput(secondExe);
+ Giira.callAndHandleOutput(secondExe,true);
}else{
secondExe = "tophat2 --no-sort-bam " + optionString + "-o " + out_dir + " " + nameRefFile + " " + fileNames;
- Giira.callAndHandleOutput(secondExe);
+ Giira.callAndHandleOutput(secondExe,true);
}
// the following is necessary to ensure that the reads in the resulting sam file will be in the necessary order
String thirdExe = "samtools sort -n " + out_dir + "/accepted_hits.bam " + out_dir + "/accepted_hits_sorted"; // sort and view guarantees that sam file is sorted correctly
String fourthExe = "samtools view -h -o " + out_dir + "/accepted_hits.sam " + out_dir + "/accepted_hits_sorted.bam"; // accepted_hits.sam is the file for the further analysis
- Giira.callAndHandleOutput(thirdExe);
- Giira.callAndHandleOutput(fourthExe);
+ Giira.callAndHandleOutput(thirdExe,true);
+ Giira.callAndHandleOutput(fourthExe,true);
// log messages
System.out.println("Done.");
diff --git a/src/geneFinder/WriteOutput.java b/src/geneFinder/WriteOutput.java
index d6ed517..f742825 100755
--- a/src/geneFinder/WriteOutput.java
+++ b/src/geneFinder/WriteOutput.java
@@ -63,7 +63,7 @@ public class WriteOutput {
public static void sortReassignSamFile(){
String exe = "python " + GeneFinder.pathToHelpFiles+"sortReaSam.py " + GeneFinder.pathOut;
- Giira.callAndHandleOutput(exe);
+ Giira.callAndHandleOutput(exe,true);
}
@@ -75,9 +75,9 @@ public class WriteOutput {
try {
Runtime removeFile = Runtime.getRuntime();
- Process exe3 = removeFile.exec("rm " + GeneFinder.pathOut+"resultsRun/reassignedReads.bam");
+ Process exe3 = removeFile.exec("rm " + GeneFinder.pathOut+"reassignedReads.bam");
exe3.waitFor();
- Process exe4 = removeFile.exec("rm " + GeneFinder.pathOut+"resultsRun/reassignedReads_sorted.bam");
+ Process exe4 = removeFile.exec("rm " + GeneFinder.pathOut+"reassignedReads_sorted.bam");
exe4.waitFor();
} catch (InterruptedException e) {
e.printStackTrace();
@@ -97,9 +97,9 @@ public class WriteOutput {
BufferedWriter bwGTF = null;
if(GeneFinder.iteration == 2){
- bwGTF = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"resultsRun/stats"+ GeneFinder.outputName + "_final.gtf")));
+ bwGTF = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"stats"+ GeneFinder.outputName + "_final.gtf")));
}else{
- bwGTF = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"resultsRun/stats"+ GeneFinder.outputName + namePartOut + ".gtf")));
+ bwGTF = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"stats"+ GeneFinder.outputName + namePartOut + ".gtf")));
}
int exonLength_covered_total = 0;
@@ -256,8 +256,8 @@ public class WriteOutput {
BufferedWriter bwGTF_ccAna = null;
BufferedWriter bwGTF_prok = null;
- bwGTF_ccAna = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"resultsRun/stats"+ GeneFinder.outputName + namePartOut +"_ccAna.gtf")));
- bwGTF_prok = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"resultsRun/stats"+ GeneFinder.outputName + namePartOut +"_prok.gtf")));
+ bwGTF_ccAna = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"stats"+ GeneFinder.outputName + namePartOut +"_ccAna.gtf")));
+ bwGTF_prok = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"stats"+ GeneFinder.outputName + namePartOut +"_prok.gtf")));
int exonLength_covered_total = 0;
geneNumTotal = 0;
@@ -460,12 +460,12 @@ public class WriteOutput {
//System.out.println("Number of prokaryotic genes with more than one cistron: " + moreThanOneCistron);
//System.out.println("Number of prokaryotic genes with more than two cistrons: " + moreThanTwoCistrons);
//System.out.println("Number of identified genes with support below threshold: " + numGenesWithSupportBelowThreshold);
- System.out.println("Number of identified genes with total coverage below threshold: " + numGenesWithTotalSupportBelowThreshold);
- System.out.println("Number of identified genes with only ambiguous support: " + numGenesWithOnlyMulti);
- System.out.println("Number of identified genes on reference: " + geneRefNonZero);
- System.out.println("More than one operon in transcript: " + moreOperons + " with average split number: " + operonSplitAv);
+ System.out.println("Number of identified transcripts with total coverage below threshold: " + numGenesWithTotalSupportBelowThreshold);
+ System.out.println("Number of identified transcripts with only ambiguous support: " + numGenesWithOnlyMulti);
+ System.out.println("Number of identified transcripts on reference: " + geneRefNonZero);
+ System.out.println("More than one operon in " + moreOperons + " transcripts (with average operon number: " + operonSplitAv + ").");
- writeToLogFile("\n\nNumber of identified genes on reference: " + geneRefNonZero + " \nNumber of identified genes with total coverage below threshold: " + numGenesWithTotalSupportBelowThreshold + "\nNumber of identified genes with only ambiguous support: " + numGenesWithOnlyMulti + "\n");
+ writeToLogFile("\n\nNumber of identified transcripts on reference: " + geneRefNonZero + " \nNumber of identified transcripts with total coverage below threshold: " + numGenesWithTotalSupportBelowThreshold + "\nNumber of identified transcripts with only ambiguous support: " + numGenesWithOnlyMulti + "\n");
bwGTF_ccAna.close();
bwGTF_prok.close();
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/giira.git
More information about the debian-med-commit
mailing list