[med-svn] [giira] 01/04: Imported Upstream version 0.0.20140210

Andreas Tille tille at debian.org
Mon Feb 10 12:50:09 UTC 2014


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository giira.

commit a3156724f34e26c479ebfb3d027872a9b96d7486
Author: Andreas Tille <tille at debian.org>
Date:   Mon Feb 10 13:45:16 2014 +0100

    Imported Upstream version 0.0.20140210
---
 src/geneFinder/BWA_Call.java                    |  14 +-
 src/geneFinder/CalculateScores.java             |  10 +-
 src/geneFinder/CigarParser.java                 | 177 ++++++++++
 src/geneFinder/CleanAfterAmbiOpti.java          |   6 +-
 src/geneFinder/GeneFinder.java                  |  18 +-
 src/geneFinder/Giira.java                       |  14 +-
 src/geneFinder/Operon_LP.java                   |   8 +-
 src/geneFinder/OptimizeAmbis.java               |  10 +-
 src/geneFinder/OptimizeAmbis_GLPK.java          | 427 ++++++++++++++++++++++++
 src/geneFinder/PrepareMapping_GF.java           |   4 +-
 src/geneFinder/ProkaryoteExtraction.java        |  13 +-
 src/geneFinder/ReadInParameters_GeneFinder.java |  20 +-
 src/geneFinder/SamParser.java                   |  18 +-
 src/geneFinder/TopHat_Call.java                 |  13 +-
 src/geneFinder/WriteOutput.java                 |  24 +-
 15 files changed, 703 insertions(+), 73 deletions(-)

diff --git a/src/geneFinder/BWA_Call.java b/src/geneFinder/BWA_Call.java
index a9dc084..b07c9e3 100755
--- a/src/geneFinder/BWA_Call.java
+++ b/src/geneFinder/BWA_Call.java
@@ -29,7 +29,7 @@ public class BWA_Call {
 
 			// more than one read file, so concatenate them to one big file
 
-			rnaFile = new File(GeneFinder.pathOut+"resultsRun/mergedReadFiles.fastq");
+			rnaFile = new File(GeneFinder.pathOut+"mergedReadFiles.fastq");
 			callConcatenateReadFiles(); // perform the concatenation
 
 		}else{
@@ -63,24 +63,24 @@ public class BWA_Call {
 
 		System.out.print("Indexing.... ");	// first step
 		String firstExe = "python " + GeneFinder.pathToHelpFiles+"bwa_index.py " + refFile + " " + nameIndexTool;    // first index reference file		
-		Giira.callAndHandleOutput(firstExe);
+		Giira.callAndHandleOutput(firstExe,true);
 
 		System.out.println("\nPerform alignment.... ");
 
 		if(!GeneFinder.useBWAsw){
-			String saiOut = GeneFinder.pathOut+"resultsRun/aln_sa.sai";
+			String saiOut = GeneFinder.pathOut+"aln_sa.sai";
 
 			String secondExe = "python "+GeneFinder.pathToHelpFiles+"bwa_aln.py " + optionString_aln + " " + refFile + " " + rnaFile + " " + saiOut;  // perform alignment				
-			Giira.callAndHandleOutput(secondExe);
+			Giira.callAndHandleOutput(secondExe,true);
 
 			System.out.println("\nWrite to SAM format....");
 
 			String thirdExe = "python "+GeneFinder.pathToHelpFiles+"bwa_samse.py " + optionString_samse + " " + refFile + " " + saiOut + " " + rnaFile + " " + nameOutSamFile;  //output: sam file
-			Giira.callAndHandleOutput(thirdExe);
+			Giira.callAndHandleOutput(thirdExe,true);
 
 		}else{
 			String secondExe = "python "+GeneFinder.pathToHelpFiles+"bwa_sw.py " + optionString_samse + " " + refFile +  " " + rnaFile + " " + nameOutSamFile;  //output: sam file
-			Giira.callAndHandleOutput(secondExe);
+			Giira.callAndHandleOutput(secondExe,true);
 		}
 
 		System.out.println("\nDone.");
@@ -110,7 +110,7 @@ public class BWA_Call {
 		}
 
 		try { // call python helper script
-			String exe = "python "+GeneFinder.pathToHelpFiles+"callCat_RnaFile.py " + allRnaNames + " " + GeneFinder.pathOut+"resultsRun/mergedReadFiles.fastq";
+			String exe = "python "+GeneFinder.pathToHelpFiles+"callCat_RnaFile.py " + allRnaNames + " " + GeneFinder.pathOut+"mergedReadFiles.fastq";
 			firstExe = prepRef.exec(exe);
 			firstExe.waitFor();
 		} catch (IOException e) {
diff --git a/src/geneFinder/CalculateScores.java b/src/geneFinder/CalculateScores.java
index b71c1dd..ab51f13 100755
--- a/src/geneFinder/CalculateScores.java
+++ b/src/geneFinder/CalculateScores.java
@@ -77,7 +77,11 @@ public class CalculateScores {
 		cluster.exonsOfGene.clear();
 		cluster.exonLength = 0;
 		FindExonsOfGene.findExonsForGene(cluster); // TODO: also check if exons are there!
-		OptimizeAmbis.sumUpExonLengths(cluster, cluster.exonsOfGene);
+		if(GeneFinder.useCPLEX){
+			OptimizeAmbis.sumUpExonLengths(cluster, cluster.exonsOfGene);
+		}else{
+			OptimizeAmbis_GLPK.sumUpExonLengths(cluster, cluster.exonsOfGene);
+		}
 
 		exonLength = cluster.exonLength;
 
@@ -161,13 +165,13 @@ public class CalculateScores {
 				}
 			}
 			
-			WriteOutput.writeToOtherFile(GeneFinder.pathOut+"resultsRun/reassignedReads.sam",(String)minInfo[1]);
+			WriteOutput.writeToOtherFile(GeneFinder.pathOut+"reassignedReads.sam",(String)minInfo[1]);
 			
 		}else if(rna.contigsMappedOn.size() == 1){
 			String rnaInfo = "";
 			Object[] info = rna.contigsMappedOn.get(0);
 			rnaInfo += rna.rnaID + "\t0\t"+ contigName + "\t" + ((Integer)info[1]) + "\t0\t" + ((String)info[2]) + "\t*\t*\t*\t*\t*\tNH:i:1\n"; 
-			WriteOutput.writeToOtherFile(GeneFinder.pathOut+"resultsRun/reassignedReads.sam",rnaInfo);
+			WriteOutput.writeToOtherFile(GeneFinder.pathOut+"reassignedReads.sam",rnaInfo);
 		}
 	}
 }
diff --git a/src/geneFinder/CigarParser.java b/src/geneFinder/CigarParser.java
new file mode 100755
index 0000000..5120456
--- /dev/null
+++ b/src/geneFinder/CigarParser.java
@@ -0,0 +1,177 @@
+package geneFinder;
+
+import java.util.Vector;
+
+/**
+ * parse cigar, MD and sequence to extract all indels and mismatches
+ * @author zickmannf
+ *
+ */
+
+public class CigarParser {
+
+	public static void main(String[] args){  // main is only for debugging
+		String cigar = "3M2I1M1D4M";
+		String mdTag = "4^A2C1";
+		String rnaSeq = "AAATTATTGT";
+		
+		Object[] returnArr = extractAllAlignDiffs(cigar, mdTag, rnaSeq);
+	}
+	
+	/*
+	 * extract all alignment differences
+	 * note: positions are reference-based, insert bases and mismatch bases are read-based (necessary for transcript reconstruction)
+	 * note: no clipped alignments are supported
+	 */
+	
+	public static Object[] extractAllAlignDiffs(String cigar, String mdTag, String readSeq){
+		
+		Vector<int[]> inserts = new Vector<int[]>();
+		Vector<int[]> dels = new Vector<int[]>();
+		Vector<int[]> mms = new Vector<int[]>();
+		
+		int[] refToReadPos = new int[readSeq.length()*2];
+		for(int pos = 0;pos<refToReadPos.length;++pos){
+			refToReadPos[pos] = pos;
+		}
+		
+		/////// first: insertions ///////
+		
+		if(cigar.contains("I")){
+			
+			int globalPos = 0;		// in case of more than one intron
+			String[] cigarArr_I = cigar.split("I");
+			
+			for(int pos = 0; pos<(cigarArr_I.length-1);++pos){
+				String i_part = cigarArr_I[pos];
+				String[] iPart_all = i_part.split("[DIMXN]");
+				
+				int insertSize = Integer.parseInt(iPart_all[iPart_all.length - 1]);
+				
+				int pos_local = 0;
+				String[] iPart_all2 = i_part.split("[DIMX]");
+				
+				for(int pos2 = 0;pos2 < iPart_all2.length-1;++pos2){
+					String partSmall = iPart_all2[pos2];
+					if(!partSmall.contains("N")){
+						pos_local = pos_local + Integer.parseInt(partSmall);
+					}else{
+						String[] partSmall_n = partSmall.split("N");		// do not count N positions
+						pos_local = pos_local + Integer.parseInt(partSmall_n[partSmall_n.length - 1]);
+					}
+				}
+				
+				globalPos = globalPos + pos_local;
+				
+				int[] insertTemp = new int[2+insertSize];	// 0=position on ref at that insert is started; 1=insertSize,2etc=base codes for inserted bases
+				insertTemp[0] = globalPos;
+				insertTemp[1] = insertSize;
+				
+				for(int posMap = globalPos;posMap < refToReadPos.length;++posMap){
+					refToReadPos[posMap] = refToReadPos[posMap] + insertSize;
+				}
+				
+				inserts.add(insertTemp);
+			}
+		}
+		
+		/////// second: deletions ///////
+		
+		if(cigar.contains("D")){
+			
+			int globalPos = 0;
+			String[] cigarArr_D = cigar.split("D");
+			for(int pos = 0; pos<(cigarArr_D.length-1);++pos){
+				String d_part = cigarArr_D[pos];
+				String[] dPart_all = d_part.split("[DIMXN]");
+				
+				int delSize = Integer.parseInt(dPart_all[dPart_all.length - 1]);
+				
+				int pos_local = 0;
+				String[] dPart_all2 = d_part.split("[DMX]");
+				
+				for(int pos2 = 0;pos2 < dPart_all2.length-1;++pos2){
+					String partSmall = dPart_all2[pos2];
+					if(!partSmall.contains("N") && !partSmall.contains("I")){
+						pos_local = pos_local + Integer.parseInt(partSmall);
+					}else{
+						String[] partSmall_split = partSmall.split("[NI]");		// do not count N or I positions
+						pos_local = pos_local + Integer.parseInt(partSmall_split[partSmall_split.length - 1]);
+					}
+				}
+				
+				globalPos = globalPos + pos_local;
+				
+				int[] delTemp = new int[2];	// 0=position on ref at that deletion is started; 1=deletionSize
+				delTemp[0] = globalPos;
+				delTemp[1] = delSize;
+				
+				/*for(int posMap = globalPos;posMap < Math.min(refToReadPos.length,globalPos + delSize);++posMap){
+					refToReadPos[posMap] = -1;
+				}*/
+				
+				for(int posMap = globalPos + delSize;posMap < refToReadPos.length;++posMap){
+					refToReadPos[posMap] = refToReadPos[posMap] - delSize;
+				}
+				
+				dels.add(delTemp);
+				globalPos = globalPos + delSize;
+			}
+		}
+		
+		/////// third: complete inserts ///////
+		
+		for(int[] insertTemp : inserts){
+			for(int pos = 0;pos<insertTemp[1];++pos){
+				String base = readSeq.substring((refToReadPos[insertTemp[0]+pos]-insertTemp[1]),(refToReadPos[insertTemp[0]+pos]-insertTemp[1]) + 1);
+				insertTemp[2+pos] = SamParser.returnBaseCode(base);
+			}
+		}
+		
+		/////// fourth: extract mismatches ///////
+		
+		String[] mdParts = mdTag.split("");
+	
+		int posOnRef = 0;
+		
+		boolean inDeletion = false;
+		StringBuffer numString = new StringBuffer("");
+		
+		for(int posArr = 1; posArr<mdParts.length;++posArr){
+			String mdChar = mdParts[posArr];
+			if(mdChar.contains("^")){
+				inDeletion = true;
+				if(numString.length() != 0){
+					posOnRef = posOnRef + Integer.parseInt(numString.toString());  // +1 for current position
+				}
+				numString = new StringBuffer("");
+			}else if(mdChar.contains("A") || mdChar.contains("C") || mdChar.contains("G") || mdChar.contains("T") || mdChar.contains("N")){
+				if(!inDeletion){
+					// mismatch
+					//posOnRef++;		
+					if(numString.length() != 0){
+						posOnRef = posOnRef + Integer.parseInt(numString.toString()) + 1;  // +1 for current position
+					}else{
+						posOnRef++;
+					}
+					numString = new StringBuffer("");
+					String base = readSeq.substring(refToReadPos[posOnRef-1],refToReadPos[posOnRef-1]+1);
+					int[] tempMM = new int[2];
+					tempMM[0] = posOnRef-1;
+					tempMM[1] = SamParser.returnBaseCode(base);
+					mms.add(tempMM);
+				}else{
+					posOnRef++;
+				}
+			}else{
+				inDeletion = false;
+				numString.append(mdChar);
+				//posOnRef = posOnRef + Integer.parseInt(mdChar);
+			}
+		}
+		
+		return new Object[] {inserts,dels,mms};
+		
+	}
+
+}
diff --git a/src/geneFinder/CleanAfterAmbiOpti.java b/src/geneFinder/CleanAfterAmbiOpti.java
index 639babc..0ff4e56 100755
--- a/src/geneFinder/CleanAfterAmbiOpti.java
+++ b/src/geneFinder/CleanAfterAmbiOpti.java
@@ -44,7 +44,7 @@ public class CleanAfterAmbiOpti {
 		
 		int counterVars = 0;
 		try{
-			BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"resultsRun/solutionCPLEX_it" + GeneFinder.iteration + ".sol"));
+			BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"solutionCPLEX_it" + GeneFinder.iteration + ".sol"));
 			
 			System.out.println("Parsing solution of cplex and clean ambiguous reads... ");
 			System.out.print("Processed: ");
@@ -148,7 +148,7 @@ public class CleanAfterAmbiOpti {
 		int processedNum = 0;   // for progress reporting
 
 		try{
-			BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"resultsRun/solutionGLPK_out_it" + GeneFinder.iteration + ".out"));
+			BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"solutionGLPK_out_it" + GeneFinder.iteration + ".out"));
 
 			System.out.println("Parsing solution of glpk and clean ambiguous reads... ");
 			System.out.print("Processed: ");
@@ -200,6 +200,8 @@ public class CleanAfterAmbiOpti {
 						r.gc();
 						r.gc();	// to keep the memory requirements down as much as possible
 					}
+				}else if(line.contains("x__") && (line.contains("__f"))){
+					line = br.readLine();
 				}
 				
 				line = br.readLine();
diff --git a/src/geneFinder/GeneFinder.java b/src/geneFinder/GeneFinder.java
index bf48ac4..5dba17a 100755
--- a/src/geneFinder/GeneFinder.java
+++ b/src/geneFinder/GeneFinder.java
@@ -106,8 +106,8 @@ public class GeneFinder {
 				inputFileAlign = new File(haveSam);
 			}else{
 				
-				inputFileAlign = new File(pathOut+"resultsRun/accepted_hits.sam");
-				GeneFinder.haveSam = pathOut+"resultsRun/accepted_hits.sam";
+				inputFileAlign = new File(pathOut+"accepted_hits.sam");
+				GeneFinder.haveSam = pathOut+"accepted_hits.sam";
 				
 				// first test if not already created in earlier rounds
 				
@@ -128,8 +128,8 @@ public class GeneFinder {
 				inputFileAlign = new File(haveSam);				
 			}else{
 				
-				inputFileAlign = new File(pathOut+"resultsRun/aln_BWA.sam");
-				GeneFinder.haveSam = pathOut+"resultsRun/aln_BWA.sam";
+				inputFileAlign = new File(pathOut+"aln_BWA.sam");
+				GeneFinder.haveSam = pathOut+"aln_BWA.sam";
 				
 				// first test if not already created in earlier rounds
 				
@@ -140,7 +140,7 @@ public class GeneFinder {
 					
 				}catch (IOException e) {
 					BWA_Call bwaStart = new BWA_Call();
-					bwaStart.callBWA(new File(nameRef+".fasta"), pathOut+"resultsRun/aln_BWA.sam"); 
+					bwaStart.callBWA(new File(nameRef+".fasta"), pathOut+"aln_BWA.sam"); 
 					
 				}
 	
@@ -197,8 +197,12 @@ public class GeneFinder {
 		}
 		
 		if(!GeneFinder.noAmbiOpti){
-			OptimizeAmbis.maxFlowLP();
-			 
+			if(useCPLEX){
+				OptimizeAmbis.maxFlowLP();
+			}else{
+				OptimizeAmbis_GLPK.maxFlowLP();
+			}
+					 
 		}
 			
 		double[] minMax = CalculateScores.assignGeneScores(false);
diff --git a/src/geneFinder/Giira.java b/src/geneFinder/Giira.java
index c381279..4f264b5 100755
--- a/src/geneFinder/Giira.java
+++ b/src/geneFinder/Giira.java
@@ -77,7 +77,7 @@ public class Giira {
 
 					String sysCall_1 = "java -Xmx"+ mem[0] +"m -cp " + classPath + ":" + decodedPath + " geneFinder.GeneFinder " + argString + "-iter " + itNum + " -solverOn n -splitRunAndOpti n -scripts " + scriptPath + "scripts/";				
 					System.out.println("Call part 1:");
-					callAndHandleOutput(sysCall_1);
+					callAndHandleOutput(sysCall_1,true);
 					
 					// optimization
 
@@ -89,13 +89,13 @@ public class Giira {
 					}
 					
 					System.out.println("Optimization: " + sysCall_Opti);
-					callAndHandleOutput(sysCall_Opti);			
+					callAndHandleOutput(sysCall_Opti,true);			
 
 					// run 2
 					
 					String sysCall_2 = "java -Xmx"+ mem[0] +"m -cp " + classPath + ":" + decodedPath + " geneFinder.GeneFinder " + argString + "-iter " + itNum + " -solverOn n -splitRunAndOpti n -secondPart y -scripts " + scriptPath + "scripts/";
 					System.out.println("Call part 2:");
-					callAndHandleOutput(sysCall_2);
+					callAndHandleOutput(sysCall_2,true);
 					
 				}else{
 					
@@ -108,7 +108,7 @@ public class Giira {
 					}
 					
 					System.out.println("Call: ");
-					callAndHandleOutput(sysCall);
+					callAndHandleOutput(sysCall,true);
 					
 				}
 
@@ -203,7 +203,7 @@ public class Giira {
 	 * call GIIRA and handle the output streams
 	 */
 
-	public static void callAndHandleOutput(String sysCall){
+	public static void callAndHandleOutput(String sysCall,boolean printAll){
 		
 		try{
 			
@@ -219,7 +219,9 @@ public class Giira {
 			String lineExe = "";
 			
 			while((lineExe = bExe.readLine()) != null){
-				System.out.println(lineExe);
+				if(printAll){
+					System.out.println(lineExe);
+				}			
 			}
 			while((lineExe = bErr.readLine()) != null){
 				System.out.println(lineExe);
diff --git a/src/geneFinder/Operon_LP.java b/src/geneFinder/Operon_LP.java
index bb668c9..b752ad4 100755
--- a/src/geneFinder/Operon_LP.java
+++ b/src/geneFinder/Operon_LP.java
@@ -51,7 +51,7 @@ public class Operon_LP {
 
 		try {
 
-			BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"resultsRun/input_operonLP.lp"));	// name of lp file
+			BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"input_operonLP.lp"));	// name of lp file
 
 			bw.write("Maximize\n");
 
@@ -260,12 +260,12 @@ public class Operon_LP {
 		try {
 			IloCplex cplex = new IloCplex();
 			cplex.setOut(null);
-			cplex.importModel(GeneFinder.pathOut+"resultsRun/input_operonLP.lp");
+			cplex.importModel(GeneFinder.pathOut+"input_operonLP.lp");
 			cplex.setParam(IloCplex.IntParam.Threads,GeneFinder.numberThreads);
 
 			cplex.solve();
 
-			cplex.writeSolution(GeneFinder.pathOut+"resultsRun/solution_operonLP.sol");
+			cplex.writeSolution(GeneFinder.pathOut+"solution_operonLP.sol");
 
 
 		} catch (IloException e2) {
@@ -284,7 +284,7 @@ public class Operon_LP {
 		double objective = -Double.MAX_VALUE;
 		
 		try{
-			BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"resultsRun/solution_operonLP.sol"));
+			BufferedReader br = new BufferedReader(new FileReader(GeneFinder.pathOut+"solution_operonLP.sol"));
 
 			String line = "";
 
diff --git a/src/geneFinder/OptimizeAmbis.java b/src/geneFinder/OptimizeAmbis.java
index 05c1e16..b922969 100755
--- a/src/geneFinder/OptimizeAmbis.java
+++ b/src/geneFinder/OptimizeAmbis.java
@@ -53,7 +53,7 @@ public class OptimizeAmbis {
 
 		try {
 
-			BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"resultsRun/input_it" + GeneFinder.iteration + ".lp"));	// name of lp file
+			BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"input_it" + GeneFinder.iteration + ".lp"));	// name of lp file
 
 			bw.write("Maximize\n");
 			
@@ -381,7 +381,7 @@ public class OptimizeAmbis {
 			System.out.println("Start cplex solve...");
 			IloCplex cplex = new IloCplex();
 
-			cplex.importModel(GeneFinder.pathOut+"resultsRun/input_it" + GeneFinder.iteration + ".lp");
+			cplex.importModel(GeneFinder.pathOut+"input_it" + GeneFinder.iteration + ".lp");
 			
 			cplex.setParam(IloCplex.IntParam.RootAlg,IloCplex.Algorithm.Network);
 			cplex.setParam(IloCplex.DoubleParam.EpGap,0.01);
@@ -393,7 +393,7 @@ public class OptimizeAmbis {
 					cplex.setParam(IloCplex.DoubleParam.WorkMem,GeneFinder.memForCplex);
 				}
 				
-				cplex.setParam(IloCplex.StringParam.WorkDir,GeneFinder.pathOut+"resultsRun/");
+				cplex.setParam(IloCplex.StringParam.WorkDir,GeneFinder.pathOut);
 				System.out.println("Directory: " + cplex.getParam(IloCplex.StringParam.WorkDir));
 				
 				cplex.setParam(IloCplex.DoubleParam.PolishTime,1000.0);
@@ -411,7 +411,7 @@ public class OptimizeAmbis {
 			
 			cplex.solve();
 
-			cplex.writeSolution(GeneFinder.pathOut+"resultsRun/solutionCPLEX_it" + GeneFinder.iteration + ".sol");
+			cplex.writeSolution(GeneFinder.pathOut+"solutionCPLEX_it" + GeneFinder.iteration + ".sol");
 
 
 		} catch (IloException e2) {
@@ -430,7 +430,7 @@ public class OptimizeAmbis {
 			System.out.println("Start glpk solve...");
 			
 			Runtime rt = Runtime.getRuntime();
-			Process firstExe = rt.exec("glpsol --lp " + GeneFinder.pathOut + "resultsRun/input_it" + GeneFinder.iteration + ".lp --output " + GeneFinder.pathOut + "resultsRun/solutionGLPK_out_it" + GeneFinder.iteration + ".out");
+			Process firstExe = rt.exec("glpsol --lp " + GeneFinder.pathOut + "input_it" + GeneFinder.iteration + ".lp --output " + GeneFinder.pathOut + "solutionGLPK_out_it" + GeneFinder.iteration + ".out");
 			firstExe.waitFor();
 
 		} catch (InterruptedException e) {
diff --git a/src/geneFinder/OptimizeAmbis_GLPK.java b/src/geneFinder/OptimizeAmbis_GLPK.java
new file mode 100755
index 0000000..d049fc8
--- /dev/null
+++ b/src/geneFinder/OptimizeAmbis_GLPK.java
@@ -0,0 +1,427 @@
+package geneFinder;
+
+import java.io.*;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Vector;
+
+import types.*;
+
+/**
+* manage the assignment of ambiguous hits to their final position, using a maximum flow formulation
+* current methods: GLPK solver, CPLEX linear program
+* Copyright (c) 2013,
+* Franziska Zickmann, 
+* ZickmannF at rki.de, Robert Koch-Institute, Berlin, Germany
+* Distributed under the GNU Lesser General Public License, version 3.0
+*
+*/
+
+
+	public class OptimizeAmbis_GLPK {
+
+
+		public static Map<String,Object[]> multiRnas = new HashMap<String,Object[]>();						// store all ambiguous rnas
+		public static Map<Gene,Vector<String>> multiRnas_eachCluster = new HashMap<Gene,Vector<String>>();  // stores all variables that belong to one gene
+		public static Map<Gene,Object[]> clustNeighbors = new HashMap<Gene,Object[]>();  // first entry: gene vector, second entry: multiRnas, note: if too memory intensive, sacrifice running time and use associated rnas
+		
+		
+		/*
+		 * set up the maximum flow formulation
+		 */
+		
+		public static void maxFlowLP(){
+
+			multiRnas.clear();
+			multiRnas_eachCluster.clear();
+			clustNeighbors.clear();
+
+			long timeBef = System.currentTimeMillis();	
+
+			// the following 3 variables are required for output messages
+			int numMulti = 0;	// count all ambiguous reads 
+			int numShared = 0;	// count all reads of twin candidate genes
+			int processCounter = 0;	// for progress report
+			
+			int fVar_counter = 0; // necessary to parse cplex solution
+			
+			StringBuffer binaryVars = new StringBuffer("");			// will include all variables
+
+			boolean noMultis = false;
+
+			try {
+
+				BufferedWriter bw = new BufferedWriter(new FileWriter(GeneFinder.pathOut+"input_it" + GeneFinder.iteration + ".lp"));	// name of lp file
+
+				bw.write("Maximize\n");
+				
+				StringBuffer targetFunction = new StringBuffer("");		// will include the target function
+				
+				for(String contigName : GeneFinder.mappedContigs.keySet()){
+					
+					Contig contig = GeneFinder.mappedContigs.get(contigName);
+
+					for(int posGene = contig.allGenes.size() -1; posGene >= 0; posGene--){
+
+						Gene cluster = contig.allGenes.get(posGene);
+						
+						boolean hasMultis = false;		// hasMultis and hasTwin indicate if we have to deal with another gene candidate
+						boolean hasTwin = false;
+						
+						cluster.totalCov = 0.0;
+						
+						if(!FindExonsOfGene.findExonsForGene(cluster)){	// define the exons as they are at the moment, necessary for exon length calculation
+							contig.allGenes.remove(cluster);
+						}else{
+							sumUpExonLengths(cluster, cluster.exonsOfGene);
+
+							if(cluster.twinNode != null){
+								// we have to resolve the twin as well					
+
+								cluster.twinNode.totalCov = 0.0;
+								if(!FindExonsOfGene.findExonsForGene(cluster.twinNode)){
+									cluster.twinNode = null;
+								}else{
+									sumUpExonLengths(cluster.twinNode, cluster.twinNode.exonsOfGene);
+														
+									hasTwin = true;
+								}
+							}
+
+
+							int multiNumNode = 0;		// count the ambiguous reads of each candidate
+
+							for(String rnaKey : cluster.idTOassociatedRnas.keySet()){
+
+								Rna rna = ((Rna)cluster.idTOassociatedRnas.get(rnaKey)[0]);
+
+								String varName = "x__"+rna.rnaID; 
+
+								if(rna.isMulti == 1 || (rna.isSharedBy.contains(cluster.geneID))){
+
+									if(rna.isMulti == 1){  // to determine the right unique coverage
+
+										multiNumNode++;
+										numMulti++;
+
+										cluster.totalCov += (double) (((1.0)/((double) rna.hitNum)) * (double)GeneFinder.readLength);  // ambiguous reads have fewer weight, according to the number of their hits
+
+									}else{
+										cluster.totalCov += (double) GeneFinder.readLength;
+									}
+
+									if(rna.assignedNum > 1){
+										hasMultis = true;
+
+										if(((multiNumNode % 50) == 0) && (targetFunction.length() > 10)){
+											bw.write(targetFunction.substring(2));
+											targetFunction = new StringBuffer("  ");
+										}else{
+											targetFunction.append(" + x__"+rna.rnaID + "__" + cluster.geneID + "__f" + "\n");  // include the variable for each readTOgene connection in the target fkt.									
+										}
+										
+										fVar_counter++;
+										initialize_MaxFlow(cluster,varName);	// perform the necessary updates of the maps
+									}								
+
+								}else{
+									cluster.totalCov += (double)GeneFinder.readLength;
+								}
+
+							}
+
+							cluster.totalCov = ((double) cluster.totalCov)/((double) cluster.exonLength);
+
+							if(hasMultis){
+
+								cluster.numOfMultis = multiNumNode;
+								cluster.uniqueCov = (((double)((cluster.idTOassociatedRnas.keySet().size() - multiNumNode) * GeneFinder.readLength))/((double)cluster.exonLength));
+
+							}
+
+							if(hasTwin){ // now perform the same stuff for the twin candidate
+
+								for(String rnaKey : cluster.twinNode.idTOassociatedRnas.keySet()){
+
+									Rna rna = ((Rna)cluster.twinNode.idTOassociatedRnas.get(rnaKey)[0]);
+
+									String varName = "x__"+rna.rnaID; 
+
+									if(rna.isMulti == 1 || rna.isSharedBy.contains(cluster.twinNode.geneID)){							
+
+										if(!rna.isSharedBy.contains(cluster.twinNode.geneID)){
+											// this is non-shared multiple read, so count it
+											//cluster.twinNode.numOfMultis++;
+											numMulti++;	
+										}
+										if(rna.isMulti == 1){											
+											cluster.twinNode.numOfMultis++;	
+											
+											cluster.twinNode.totalCov += (double) (((1.0)/((double) rna.hitNum)) * (double)GeneFinder.readLength);
+										}
+
+										if(rna.isSharedBy.contains(cluster.twinNode.geneID)){
+											numShared++;	// at this point, update the shared rna number								
+										}
+										
+										if(rna.assignedNum > 1){
+											
+											if(((cluster.twinNode.numOfMultis % 50) == 0) && (targetFunction.length() > 10)){
+												bw.write(targetFunction.substring(2));
+												targetFunction = new StringBuffer("  ");
+											}else{
+												targetFunction.append(" + x__"+rna.rnaID + "__" + cluster.twinNode.geneID + "__f" + "\n");
+											}
+											
+											fVar_counter++;
+											initialize_MaxFlow(cluster.twinNode,varName);
+										}
+										
+
+									}else{
+										cluster.twinNode.totalCov += (double)GeneFinder.readLength;
+									}
+								}
+
+								cluster.twinNode.uniqueCov = (((double)((cluster.twinNode.idTOassociatedRnas.keySet().size() - cluster.twinNode.numOfMultis) * GeneFinder.readLength))/((double)cluster.twinNode.exonLength));
+								cluster.twinNode.totalCov = ((double) cluster.twinNode.totalCov)/((double) cluster.twinNode.exonLength);
+							}
+
+							processCounter++;
+							if((processCounter % 2000) == 0){
+								
+								if((processCounter % 10000) == 0){
+									long timeAfterTmp = System.currentTimeMillis();
+									if(!GeneFinder.secondPart){
+										System.out.println("Processed " + processCounter + " candidates in " + (double) (timeAfterTmp-timeBef)/1000.0 +"s.");
+									}
+								}
+								
+								if(!(numMulti == 0 && numShared == 0 &&  multiRnas.keySet().size() == 0)){
+									if((targetFunction.length() > 10)){
+										bw.write(targetFunction.substring(2));
+										targetFunction = new StringBuffer("  ");
+									}
+								}
+							}else{
+								if((targetFunction.length() > 10) && (fVar_counter % 1000) == 0){
+									bw.write(targetFunction.substring(2));
+									targetFunction = new StringBuffer("  ");
+								}
+							}
+						}
+					}
+				}
+
+				// log messages:
+				
+				if(!GeneFinder.secondPart){
+					System.out.println("Processed " + processCounter + " candidates.");
+					System.out.println("Number of multiple rnas: " + numMulti);
+					System.out.println("Number of shared rnas: " + numShared);
+					System.out.println("Number constraints: " + multiRnas.keySet().size());
+					WriteOutput.writeToLogFile("Processed " + processCounter + " candidates. \n" + "Number of multiple rnas: " + numMulti + " \nNumber of shared rnas: " + numShared + " \nNumber constraints: " + multiRnas.keySet().size() + "\n");
+
+					long timeAfter1 = System.currentTimeMillis();
+					System.out.println("Time needed for max flow initialization: "+(double) (timeAfter1-timeBef)/1000.0 +"s.");
+					WriteOutput.writeToLogFile("Time needed for max flow initialization: "+(double) (timeAfter1-timeBef)/1000.0 +"s.\n");
+				}
+				
+				if(numMulti == 0 && numShared == 0 &&  multiRnas.keySet().size() == 0){
+					System.out.println("No multiple rnas, no optimization necessary!");
+					WriteOutput.writeToLogFile("No multiple rnas, no optimization necessary! \n");
+					noMultis = true;
+					GeneFinder.noAmbiOpti = true;
+				}else{
+					// write out the lp
+					if(targetFunction.length() > 3){
+						bw.write(targetFunction.substring(2));
+						targetFunction = new StringBuffer("");
+					}
+							
+					bw.write("Subject To \n");
+					
+					// now constraints
+
+					for(String varName : multiRnas.keySet()){
+
+						StringBuffer constraint = new StringBuffer("");			// three different types of necessary constraints
+						StringBuffer constraintVar_f = new StringBuffer("");
+						StringBuffer constraintVar_Diff = new StringBuffer("");
+
+						for(String var : (Vector<String>) multiRnas.get(varName)[0]){  // set up the different constraints
+							
+							constraint.append(" + " + var + "\n");
+							binaryVars.append(" " + var + "\n");
+							constraintVar_f.append(var+"__f >= 0 \n");
+							//constraintVar_Diff.append(var+"__f - " + GeneFinder.readLength + " " + var + " <= 0 \n");
+							constraintVar_Diff.append(var+"__f - " + var + " <= 0 \n");
+						}
+						
+						constraint.append(" = 1");
+						
+						bw.write(constraintVar_f.toString() + constraintVar_Diff.toString() + constraint.substring(2) + "\n");
+
+					}
+
+					// now add the weight-constraints
+
+					for(Gene clust : clustNeighbors.keySet()){
+						
+						double covSum = 0.0;
+						double sum_exonL = clust.exonLength;
+
+						StringBuffer constraintF = new StringBuffer("");
+
+						for(Gene neighbor : (Vector<Gene>) clustNeighbors.get(clust)[0]){
+							covSum += neighbor.uniqueCov;// + ((1.0/100000.0));   // the ((1.0/100000.0)) ensures that also candidates without unique hits can be chosen
+							sum_exonL = sum_exonL + neighbor.exonLength;
+						}
+
+						for(String var : (Vector<String>) clustNeighbors.get(clust)[1]){
+							constraintF.append(" + " + var + "\n");
+						}
+						
+						// 1) no additional read length factor
+						// 2) multiple rnas have a penalty according to their number of ambiguous hits, already included in totalCov
+						// 3) competing candidates have a direct influence on the weight due to covSum
+						
+						double weight = 0; 
+						
+						covSum = Math.pow((covSum+1),2);
+						weight = ((clust.totalCov/covSum) * ((double)clust.exonLength/(double)sum_exonL));
+						
+						constraintF.append(" <= " + Math.pow((weight+1),2));  // to the power of two leads to a more "the winner takes it all fashion"
+						
+						bw.write(constraintF.substring(2) + "\n");
+						
+					}
+
+					//now declare normal variables as binary
+				
+					bw.write("Binary \n");
+					
+					int posBin = 0;
+					
+					for(posBin = 0;posBin<binaryVars.length();){
+						int posBin2 = posBin + 500000;
+						if(posBin2 < binaryVars.length()){
+							bw.write(binaryVars.substring(posBin,posBin2));	// extracted: posBin - (posBin2-1)
+						}
+						posBin = posBin + 500000;
+					}
+					
+					if((posBin-500000) >= 0){
+						bw.write(binaryVars.substring(posBin-500000));
+					}
+					
+					bw.write("END");				
+				}
+				
+				bw.close();
+
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+
+			long timeAfter = System.currentTimeMillis();
+			if(!GeneFinder.secondPart){
+				System.out.println("Time needed to set up lp file: "+ (double) (timeAfter-timeBef)/1000.0 +"s.");
+				WriteOutput.writeToLogFile("Time needed to set up lp file: "+ (double) (timeAfter-timeBef)/1000.0 +"s.\n");
+				System.out.println();
+			}
+			
+			if(!noMultis){  // now solve the lp
+				
+				long timeBefSolve = System.currentTimeMillis();
+				
+				// use glpk	
+					
+				if(GeneFinder.optiSolve){
+					solveMaxFlowWithGLPK();
+					long timeAfterSolve = System.currentTimeMillis();
+					System.out.println("Time needed to solve max flow: "+ (double) (timeAfterSolve-timeBefSolve)/1000.0 +"s.");
+					WriteOutput.writeToLogFile("Time needed to solve max flow: "+ (double) (timeAfterSolve-timeBefSolve)/1000.0 +"s.\n");
+				}
+
+				// parse solution file
+				CleanAfterAmbiOpti.parse_solution_and_clean_GLPK(multiRnas);
+						
+			}
+
+		}
+		
+		
+		/*
+		 * solves MaxFlow with the glpsol of the GLPK optimizer
+		 */
+		
+		public static void solveMaxFlowWithGLPK(){		
+		
+			System.out.println("Start glpk solve...");
+				
+			String firstExe = "python " + GeneFinder.pathToHelpFiles+"lpCall.py " + GeneFinder.pathOut + "input_it" + GeneFinder.iteration + ".lp " + GeneFinder.pathOut + "solutionGLPK_out_it" + GeneFinder.iteration + ".out"; 	
+			Giira.callAndHandleOutput(firstExe,true);
+			
+		}
+		
+		
+		/*
+		 * do the initialization for the max flow optimization
+		 * fill the required maps
+		 */
+		
+		public static void initialize_MaxFlow(Gene cluster, String varName){
+			
+			if(!clustNeighbors.keySet().contains(cluster)){
+				Vector<Gene> geneTmp = new Vector<Gene>();	// candidate is not contained so far, so add to map
+				Vector<String> varTmp = new Vector<String>();
+				varTmp.add(varName+ "__" + cluster.geneID + "__f");
+				geneTmp.add(cluster);
+				Object[] geneObj = {geneTmp,varTmp};
+				clustNeighbors.put(cluster,geneObj);
+			}else{
+				( (Vector<String>) clustNeighbors.get(cluster)[1]).add(varName+ "__" + cluster.geneID + "__f");  // add this variable to reads of the candidate
+			}
+
+			
+			if(multiRnas.keySet().contains(varName)){   // add this candidate to the list of genes connected to this read
+				((Vector<String>) multiRnas.get(varName)[0]).add(varName+"__"+cluster.geneID);
+				
+				((Vector<Gene>) multiRnas.get(varName)[1]).add(cluster);
+				
+				Vector<Gene> clustGenes = (Vector<Gene>) clustNeighbors.get(cluster)[0];
+				for(Gene clust : (Vector<Gene>) multiRnas.get(varName)[1]){			// add this candidate to the list of competing genes for all those genes already connected to this read
+					if(!clustGenes.contains(clust)){
+						( (Vector<Gene>) clustNeighbors.get(cluster)[0]).add(clust);
+					}
+					
+					if(!((Vector<Gene>) clustNeighbors.get(clust)[0]).contains(cluster)){
+						((Vector<Gene>) clustNeighbors.get(clust)[0]).add(cluster);
+					}
+				}
+				
+			}else{
+				Vector<String> vecTmp = new Vector<String>();  // first time ambiguous read was looked at, so add to map
+				Vector<Gene> vecGeneTmp = new Vector<Gene>();
+				vecGeneTmp.add(cluster);
+				vecTmp.add(varName+"__"+cluster.geneID);
+				Object[] objTmp = {vecTmp,vecGeneTmp};			
+				multiRnas.put(varName,objTmp);															
+			}
+		}
+		
+		
+		/*
+		 * go through all exons and sum up their lengths do determine the total exon length 
+		 */
+		
+		public static void sumUpExonLengths(Gene gene, Vector<int[]> exons){
+			
+			for(int[] exon :exons){
+				gene.exonLength += (exon[1] - exon[0] + 1);
+			}
+		}
+		
+	}
+
diff --git a/src/geneFinder/PrepareMapping_GF.java b/src/geneFinder/PrepareMapping_GF.java
index 03b0c8c..63143ff 100755
--- a/src/geneFinder/PrepareMapping_GF.java
+++ b/src/geneFinder/PrepareMapping_GF.java
@@ -22,7 +22,7 @@ public class PrepareMapping_GF {
 	
 	public String prepareRefFile_GF(){
 
-		String nameRef = GeneFinder.pathOut+"resultsRun/concaRefFile";
+		String nameRef = GeneFinder.pathOut+"concaRefFile";
 
 		Runtime prepRef = Runtime.getRuntime();
 		Process firstExe;
@@ -50,7 +50,7 @@ public class PrepareMapping_GF {
 			e.printStackTrace();
 		}
 
-		return (GeneFinder.pathOut+"resultsRun/concaRefFile");
+		return (GeneFinder.pathOut+"concaRefFile");
 	}
 	
 }
diff --git a/src/geneFinder/ProkaryoteExtraction.java b/src/geneFinder/ProkaryoteExtraction.java
index 77292a6..65c12a4 100755
--- a/src/geneFinder/ProkaryoteExtraction.java
+++ b/src/geneFinder/ProkaryoteExtraction.java
@@ -432,6 +432,12 @@ public class ProkaryoteExtraction {
 					Object[] returnArr = Prokaryote_Specials.define_OrfsInOperon(clustBef.sequence,clustBef);
 					clustBef.operonOrfs = (Vector<int[]>) returnArr[0];  // first entry is always 1-d array indicating whether normal gene sequence follows (1) or operon sequence (-1)
 					clustBef.operonDirectionIsForward = (Boolean) returnArr[1];
+					if(clustBef.operonDirectionIsForward){
+						clustBef.onRevStrand = false;
+					}else{
+						clustBef.onRevStrand = true;
+					}
+					
 				}			
 
 			}
@@ -454,6 +460,11 @@ public class ProkaryoteExtraction {
 				Object[] returnArr = Prokaryote_Specials.define_OrfsInOperon(clustBef.sequence, clustBef);
 				clustBef.operonOrfs = (Vector<int[]>) returnArr[0]; // first entry is always 1-d array indicating whether normal gene sequence follows (1) or operon sequence (-1)
 				clustBef.operonDirectionIsForward = (Boolean) returnArr[1];
+				if(clustBef.operonDirectionIsForward){
+					clustBef.onRevStrand = false;
+				}else{
+					clustBef.onRevStrand = true;
+				}
 			}
 
 		}	
@@ -610,7 +621,7 @@ public class ProkaryoteExtraction {
 		cluster.geneID = id++;
 		cluster.onRevStrand = false;
 		cluster.startPos = startPos;		
-		cluster.stopPos = currentPos+basesToAdd-1; // note: without "-1" stopPos would be the position necessary for seq-extraction = actual stop + 1 
+		cluster.stopPos = Math.min(currentPos+basesToAdd-1,contigSeq.length()-2); // note: without "-1" stopPos would be the position necessary for seq-extraction = actual stop + 1 
 		
 		cluster.coreSeq = (cluster.stopPos-cluster.startPos);
 		cluster.realDirectionNotKnown = true;	
diff --git a/src/geneFinder/ReadInParameters_GeneFinder.java b/src/geneFinder/ReadInParameters_GeneFinder.java
index 70ae2dd..9313070 100755
--- a/src/geneFinder/ReadInParameters_GeneFinder.java
+++ b/src/geneFinder/ReadInParameters_GeneFinder.java
@@ -410,13 +410,19 @@ public class ReadInParameters_GeneFinder {
 			GeneFinder.inprogeaCall = false;
 		}
 		
-		Runtime rtAlign = Runtime.getRuntime();
+		GeneFinder.logFile = new File(GeneFinder.pathOut+"log_it" + GeneFinder.iteration + ".txt");
+		if(!GeneFinder.secondPart){
+			System.out.println(inputText);
+			WriteOutput.writeToLogFile(inputText);
+		}
+		
+		/*Runtime rtAlign = Runtime.getRuntime();
 		Process firstExe;
 		try {
-			String exe = "mkdir "+GeneFinder.pathOut+"resultsRun";
-			GeneFinder.logFile = new File(GeneFinder.pathOut+"resultsRun/log_it" + GeneFinder.iteration + ".txt");
-			firstExe = rtAlign.exec(exe);
-			firstExe.waitFor();
+			//String exe = "mkdir "+GeneFinder.pathOut+"resultsRun";
+			GeneFinder.logFile = new File(GeneFinder.pathOut+"log_it" + GeneFinder.iteration + ".txt");
+			//firstExe = rtAlign.exec(exe);
+			//firstExe.waitFor();
 			
 			if(!GeneFinder.secondPart){
 				System.out.println(inputText);
@@ -427,9 +433,7 @@ public class ReadInParameters_GeneFinder {
 			e.printStackTrace();
 		} catch (InterruptedException e) {
 			e.printStackTrace();
-		}	
-		
-		
+		}*/
 
 	}	
 	
diff --git a/src/geneFinder/SamParser.java b/src/geneFinder/SamParser.java
index 7e8eda5..291befe 100755
--- a/src/geneFinder/SamParser.java
+++ b/src/geneFinder/SamParser.java
@@ -80,7 +80,7 @@ public class SamParser {
 			String lineReaSam = "";
 			
 			if(GeneFinder.iteration == 2){
-				br = new BufferedReader(new FileReader(new File(GeneFinder.pathOut+"resultsRun/reassignedReads_sorted.sam")));
+				br = new BufferedReader(new FileReader(new File(GeneFinder.pathOut+"reassignedReads_sorted.sam")));
 				while((lineReaSam = br.readLine()) != null){
 					String[] parts1 = lineReaSam.split("	");
 					if(!parts1[0].startsWith("@") && parts1.length >=4){
@@ -494,7 +494,7 @@ public class SamParser {
 					}
 				}else{
 					if(GeneFinder.iteration == 1){
-						WriteOutput.writeToOtherFile(GeneFinder.pathOut+"resultsRun/reassignedReads.sam",line+"\n");
+						WriteOutput.writeToOtherFile(GeneFinder.pathOut+"reassignedReads.sam",line+"\n");
 					}					
 				}
 			}
@@ -530,13 +530,13 @@ public class SamParser {
 			System.out.println((multiCount) + " of them are ambiguous reads.");
 			System.out.println("The total number of hits is " + (totalHitCount) + ".");		
 			System.out.println("Total number of multiple hits: " + multiTotalCount + ".");
-			System.out.println("Time needed to parse SAM file: "+ (double) (timeAfter-timebef)/1000.0 +"s.");
+			System.out.println("Time required to parse SAM file: "+ (double) (timeAfter-timebef)/1000.0 +"s.");
 			
 			GeneFinder.ambiProportion = (double) ((double)multiTotalCount/(double)totalHitCount);
 			System.out.println("Proportion of ambiguous reads: " + GeneFinder.ambiProportion);
 			
 			System.out.println();
-			WriteOutput.writeToLogFile("Done.\nTime needed to parse SAM file: "+ (double) (timeAfter-timebef)/1000.0 +"s.\n" + rnaCount + " rnas have been mapped to the reference. \n" + (multiCount) + " of them are ambiguous reads. \nThe total number of hits is " + (totalHitCount) + ".\nTotal number of multiple hits: " + multiTotalCount + ".\nProportion of ambiguous reads: " + GeneFinder.ambiProportion + ".\n\n");
+			WriteOutput.writeToLogFile("Done.\nTime required to parse SAM file: "+ (double) (timeAfter-timebef)/1000.0 +"s.\n" + rnaCount + " rnas have been mapped to the reference. \n" + (multiCount) + " of them are ambiguous reads. \nThe total number of hits is " + (totalHitCount) + ".\nTotal number of multiple hits: " + multiTotalCount + ".\nProportion of ambiguous reads: " + GeneFinder.ambiProportion + ".\n\n");
 		
 		}
 		
@@ -774,19 +774,19 @@ public class SamParser {
 				if(GeneFinder.haveSam != null){
 					nameSam = GeneFinder.haveSam;
 				}else{
-					nameSam = GeneFinder.pathOut+"resultsRun/accepted_hits.sam";
+					nameSam = GeneFinder.pathOut+"accepted_hits.sam";
 				}
 				
 			}else{
 				if(GeneFinder.haveSam != null){
 					nameSam = GeneFinder.haveSam;				
 				}else{
-					nameSam = GeneFinder.pathOut+"resultsRun/aln_BWA.sam";
+					nameSam = GeneFinder.pathOut+"aln_BWA.sam";
 				}
 				
 			}
 			
-			String nameOutfile = GeneFinder.pathOut+"resultsRun/covMean.txt";
+			String nameOutfile = GeneFinder.pathOut+"covMean.txt";
 			
 			try {
 				
@@ -807,7 +807,7 @@ public class SamParser {
 				// no coverage file provided, so create a new one
 
 				String firstExe = "python " + GeneFinder.pathToHelpFiles+"getMeanCov.py " + nameSam + " " + nameOutfile; 	
-				Giira.callAndHandleOutput(firstExe);
+				Giira.callAndHandleOutput(firstExe,false);
 				
 				timeAfter = System.currentTimeMillis();
 				
@@ -1139,7 +1139,7 @@ public class SamParser {
 										
 				}else{
 					if(GeneFinder.iteration == 1){
-						WriteOutput.writeToOtherFile(GeneFinder.pathOut+"resultsRun/reassignedReads.sam",line+"\n");
+						WriteOutput.writeToOtherFile(GeneFinder.pathOut+"reassignedReads.sam",line+"\n");
 					}					
 				}
 			}
diff --git a/src/geneFinder/TopHat_Call.java b/src/geneFinder/TopHat_Call.java
index 91e7465..8f9b840 100755
--- a/src/geneFinder/TopHat_Call.java
+++ b/src/geneFinder/TopHat_Call.java
@@ -35,7 +35,7 @@ public class TopHat_Call {
 
 		System.out.print("Indexing with Bowtie.... ");
 		String firstExe = "bowtie2-build " + refFile + " " + nameRefFile;
-		Giira.callAndHandleOutput(firstExe);
+		Giira.callAndHandleOutput(firstExe,true);
 
 		String optionString = "";   // get the options for the alignment
 		if(GeneFinder.settingMapper != null){
@@ -57,9 +57,8 @@ public class TopHat_Call {
 		}
 
 		// now call topHat, note that we do use the bam format as an intermediate format to ensure the right ordering of reads, final output is in sam format
-		// output directory shall be resultsRun
 
-		String out_dir = GeneFinder.pathOut+"resultsRun";
+		String out_dir = GeneFinder.pathOut;
 
 		String fileNames = new String();
 
@@ -78,19 +77,19 @@ public class TopHat_Call {
 
 		if(GeneFinder.rnaFilesWithNames.keySet().size() == 1){	// make the right call depending on how many read files are provided
 			secondExe = "tophat2 --no-sort-bam " + optionString + "-o " + out_dir + " " + nameRefFile + " " + rnaFile;
-			Giira.callAndHandleOutput(secondExe);
+			Giira.callAndHandleOutput(secondExe,true);
 
 		}else{
 			secondExe = "tophat2 --no-sort-bam " + optionString + "-o " + out_dir + " " + nameRefFile + " " + fileNames;
-			Giira.callAndHandleOutput(secondExe);			
+			Giira.callAndHandleOutput(secondExe,true);			
 		}
 
 		// the following is necessary to ensure that the reads in the resulting sam file will be in the necessary order
 		String thirdExe = "samtools sort -n " + out_dir + "/accepted_hits.bam " + out_dir + "/accepted_hits_sorted";  // sort and view guarantees that sam file is sorted correctly
 		String fourthExe = "samtools view -h -o " + out_dir + "/accepted_hits.sam " + out_dir + "/accepted_hits_sorted.bam";  // accepted_hits.sam is the file for the further analysis
 
-		Giira.callAndHandleOutput(thirdExe);
-		Giira.callAndHandleOutput(fourthExe);
+		Giira.callAndHandleOutput(thirdExe,true);
+		Giira.callAndHandleOutput(fourthExe,true);
 
 		// log messages
 		System.out.println("Done.");	    	
diff --git a/src/geneFinder/WriteOutput.java b/src/geneFinder/WriteOutput.java
index d6ed517..f742825 100755
--- a/src/geneFinder/WriteOutput.java
+++ b/src/geneFinder/WriteOutput.java
@@ -63,7 +63,7 @@ public class WriteOutput {
 	public static void sortReassignSamFile(){
 
 		String exe = "python " + GeneFinder.pathToHelpFiles+"sortReaSam.py " + GeneFinder.pathOut;
-		Giira.callAndHandleOutput(exe);
+		Giira.callAndHandleOutput(exe,true);
 
 	}
 
@@ -75,9 +75,9 @@ public class WriteOutput {
 
 		try {		
 			Runtime removeFile = Runtime.getRuntime();
-			Process exe3 = removeFile.exec("rm " + GeneFinder.pathOut+"resultsRun/reassignedReads.bam");
+			Process exe3 = removeFile.exec("rm " + GeneFinder.pathOut+"reassignedReads.bam");
 			exe3.waitFor();	
-			Process exe4 = removeFile.exec("rm " + GeneFinder.pathOut+"resultsRun/reassignedReads_sorted.bam");
+			Process exe4 = removeFile.exec("rm " + GeneFinder.pathOut+"reassignedReads_sorted.bam");
 			exe4.waitFor();
 		} catch (InterruptedException e) {			
 			e.printStackTrace();
@@ -97,9 +97,9 @@ public class WriteOutput {
 			BufferedWriter bwGTF = null;
 
 			if(GeneFinder.iteration == 2){
-				bwGTF = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"resultsRun/stats"+ GeneFinder.outputName + "_final.gtf")));		
+				bwGTF = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"stats"+ GeneFinder.outputName + "_final.gtf")));		
 			}else{				
-				bwGTF = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"resultsRun/stats"+ GeneFinder.outputName + namePartOut + ".gtf")));
+				bwGTF = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"stats"+ GeneFinder.outputName + namePartOut + ".gtf")));
 			}
 
 			int exonLength_covered_total = 0;
@@ -256,8 +256,8 @@ public class WriteOutput {
 			BufferedWriter bwGTF_ccAna = null;
 			BufferedWriter bwGTF_prok = null;
 
-			bwGTF_ccAna = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"resultsRun/stats"+ GeneFinder.outputName + namePartOut +"_ccAna.gtf")));
-			bwGTF_prok = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"resultsRun/stats"+ GeneFinder.outputName + namePartOut +"_prok.gtf")));
+			bwGTF_ccAna = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"stats"+ GeneFinder.outputName + namePartOut +"_ccAna.gtf")));
+			bwGTF_prok = new BufferedWriter(new FileWriter(new File(GeneFinder.pathOut+"stats"+ GeneFinder.outputName + namePartOut +"_prok.gtf")));
 
 			int exonLength_covered_total = 0;
 			geneNumTotal = 0;
@@ -460,12 +460,12 @@ public class WriteOutput {
 			//System.out.println("Number of prokaryotic genes with more than one cistron: " +  moreThanOneCistron);
 			//System.out.println("Number of prokaryotic genes with more than two cistrons: " +  moreThanTwoCistrons);
 			//System.out.println("Number of identified genes with support below threshold: " +  numGenesWithSupportBelowThreshold);
-			System.out.println("Number of identified genes with total coverage below threshold: " +  numGenesWithTotalSupportBelowThreshold);
-			System.out.println("Number of identified genes with only ambiguous support: " +  numGenesWithOnlyMulti);
-			System.out.println("Number of identified genes on reference: " +  geneRefNonZero);
-			System.out.println("More than one operon in transcript: " +  moreOperons + " with average split number: " + operonSplitAv);
+			System.out.println("Number of identified transcripts with total coverage below threshold: " +  numGenesWithTotalSupportBelowThreshold);
+			System.out.println("Number of identified transcripts with only ambiguous support: " +  numGenesWithOnlyMulti);
+			System.out.println("Number of identified transcripts on reference: " +  geneRefNonZero);
+			System.out.println("More than one operon in " +  moreOperons + " transcripts (with average operon number: " + operonSplitAv + ").");
 		
-			writeToLogFile("\n\nNumber of identified genes on reference: " +  geneRefNonZero + " \nNumber of identified genes with total coverage below threshold: " +  numGenesWithTotalSupportBelowThreshold + "\nNumber of identified genes with only ambiguous support: " +  numGenesWithOnlyMulti + "\n");
+			writeToLogFile("\n\nNumber of identified transcripts on reference: " +  geneRefNonZero + " \nNumber of identified transcripts with total coverage below threshold: " +  numGenesWithTotalSupportBelowThreshold + "\nNumber of identified transcripts with only ambiguous support: " +  numGenesWithOnlyMulti + "\n");
 
 			bwGTF_ccAna.close();
 			bwGTF_prok.close();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/giira.git



More information about the debian-med-commit mailing list