[med-svn] [Git][med-team/chromhmm][upstream] New upstream version 1.23+dfsg

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Tue Sep 7 13:14:59 BST 2021



Nilesh Patra pushed to branch upstream at Debian Med / chromhmm


Commits:
bd371feb by Nilesh Patra at 2021-09-07T17:35:44+05:30
New upstream version 1.23+dfsg
- - - - -


7 changed files:

- README.md
- edu/mit/compbio/ChromHMM/BrowserOutput.java
- edu/mit/compbio/ChromHMM/ChromHMM.java
- edu/mit/compbio/ChromHMM/ConvertGeneTable.java
- edu/mit/compbio/ChromHMM/NestedEliminateInitialize.java
- edu/mit/compbio/ChromHMM/Preprocessing.java
- edu/mit/compbio/ChromHMM/StateAnalysis.java


Changes:

=====================================
README.md
=====================================
@@ -1,2 +1,2 @@
-See http://compbio.mit.edu/ChromHMM/ or https://ernstlab.biolchem.ucla.edu/ChromHMM/ for more information on ChromHMM.
+See https://ernstlab.biolchem.ucla.edu/ChromHMM/ for more information on ChromHMM.
 ========


=====================================
edu/mit/compbio/ChromHMM/BrowserOutput.java
=====================================
@@ -159,8 +159,8 @@ public class BrowserOutput
           while ((szLine = brcolor.readLine())!=null)
           {
              StringTokenizer st = new StringTokenizer(szLine,"\t");
-	     String szID = st.nextToken();
-	     String szColor = st.nextToken();
+	     String szID = st.nextToken().trim();
+	     String szColor = st.nextToken().trim();
              hmcolor.put(szID, szColor);
 	  }
 	  brcolor.close();
@@ -347,8 +347,8 @@ public class BrowserOutput
           while ((szLine = bridlabel.readLine())!=null)
           {
              StringTokenizer st = new StringTokenizer(szLine,"\t");
-	     String szID = st.nextToken();
-	     String szLabelExtend = st.nextToken();
+	     String szID = st.nextToken().trim();
+	     String szLabelExtend = st.nextToken().trim();
 	     hmlabelExtend.put(szID,szLabelExtend);
 	  }	  
 	  bridlabel.close();
@@ -378,10 +378,10 @@ public class BrowserOutput
           while ((szLine =brsegment.readLine())!=null)
           {
 	     StringTokenizer st = new StringTokenizer(szLine,"\t");
-	     String szcurrchrom = st.nextToken();
-	     int nbegin = Integer.parseInt(st.nextToken());
-	     int nend = Integer.parseInt(st.nextToken());
-	     String szFullID = st.nextToken();
+	     String szcurrchrom = st.nextToken().trim();
+	     int nbegin = Integer.parseInt(st.nextToken().trim());
+	     int nend = Integer.parseInt(st.nextToken().trim());
+	     String szFullID = st.nextToken().trim();
 	     String szID = szFullID.substring(1); //this removes ordering type
 	     if (bfirst)
 	     {
@@ -426,10 +426,10 @@ public class BrowserOutput
           while ((szLine =brsegment.readLine())!=null)
           {
 	     StringTokenizer st = new StringTokenizer(szLine,"\t");
-	     String szcurrchrom = st.nextToken();
-	     int nbegin = Integer.parseInt(st.nextToken());
-	     int nend = Integer.parseInt(st.nextToken());
-	     String szFullID = st.nextToken();
+	     String szcurrchrom = st.nextToken().trim();
+	     int nbegin = Integer.parseInt(st.nextToken().trim());
+	     int nend = Integer.parseInt(st.nextToken().trim());
+	     String szFullID = st.nextToken().trim();
 	     String szID = szFullID.substring(1); //this removes ordering type
 	     if (bfirst)
 	     {
@@ -496,10 +496,10 @@ public class BrowserOutput
        while ((szLine = brsegment.readLine())!=null)
        {
 	   StringTokenizer st = new StringTokenizer(szLine,"\t");
-	   String szchrom = st.nextToken();
-	   int nbegin = Integer.parseInt(st.nextToken());
-	   int nend = Integer.parseInt(st.nextToken());
-	   szLabelFull = st.nextToken();
+	   String szchrom = st.nextToken().trim();
+	   int nbegin = Integer.parseInt(st.nextToken().trim());
+	   int nend = Integer.parseInt(st.nextToken().trim());
+	   szLabelFull = st.nextToken().trim();
 	   String szLabel = szLabelFull.substring(1);
 
 	   hmlabelToFull.put(szLabel, szLabelFull);
@@ -740,10 +740,10 @@ public class BrowserOutput
        while ((szLine = brsegment.readLine())!=null)
        {
 	   StringTokenizer st = new StringTokenizer(szLine,"\t");
-	   String szchrom = st.nextToken();
-	   int nbegin = Integer.parseInt(st.nextToken());
-	   int nend = Integer.parseInt(st.nextToken());
-	   szLabelFull = st.nextToken();
+	   String szchrom = st.nextToken().trim();
+	   int nbegin = Integer.parseInt(st.nextToken().trim());
+	   int nend = Integer.parseInt(st.nextToken().trim());
+	   szLabelFull = st.nextToken().trim();
 	   String szLabel = szLabelFull.substring(1);
 
 	   hmlabelToFull.put(szLabel, szLabelFull);
@@ -827,12 +827,12 @@ public class BrowserOutput
               while ((szLine = brsegment.readLine())!=null)
               {
 	         StringTokenizer st = new StringTokenizer(szLine,"\t");
-		 String szchrom = st.nextToken();
+		 String szchrom = st.nextToken().trim();
 	         if (szchrom.equals(szcurrchrom))
 	         {
-	            int nbegin = Integer.parseInt(st.nextToken());
-	            int nend = Integer.parseInt(st.nextToken());
-       	            szLabelFull = st.nextToken();
+	            int nbegin = Integer.parseInt(st.nextToken().trim());
+	            int nend = Integer.parseInt(st.nextToken().trim());
+       	            szLabelFull = st.nextToken().trim();
 	            String szLabel = szLabelFull.substring(1);
 
 		    ArrayList alRecs = (ArrayList) hmcoords.get(szLabel);
@@ -930,12 +930,12 @@ public class BrowserOutput
               while ((szLine = brsegment.readLine())!=null)
               {
 		  StringTokenizer st = new StringTokenizer(szLine,"\t");
-		  String szchrom = st.nextToken();
+		  String szchrom = st.nextToken().trim();
 	          if (szchrom.equals(szcurrchrom))
 	          {
-		     int nbegin = Integer.parseInt(st.nextToken());
-		     int nend = Integer.parseInt(st.nextToken());
-		     szLabelFull = st.nextToken();
+		     int nbegin = Integer.parseInt(st.nextToken().trim());
+		     int nend = Integer.parseInt(st.nextToken().trim());
+		     szLabelFull = st.nextToken().trim();
 		     String szLabel = szLabelFull.substring(1);
 
 		     ArrayList alRecs = (ArrayList) hmcoords.get(szLabel);


=====================================
edu/mit/compbio/ChromHMM/ChromHMM.java
=====================================
@@ -814,8 +814,8 @@ public class ChromHMM
 	    while ((szLine = bridlabel.readLine())!=null)
 	    {
 		StringTokenizer st = new StringTokenizer(szLine,"\t");
-		String szID = st.nextToken();
-		String szLabelExtend = st.nextToken();
+		String szID = st.nextToken().trim();
+		String szLabelExtend = st.nextToken().trim();
 		hmlabelExtend.put(szID,szLabelExtend);
 	    }
 	    bridlabel.close();
@@ -1072,7 +1072,7 @@ public class ChromHMM
 	   String szLine;
 	   while ((szLine = brstate.readLine())!=null)
 	   {
-	       StringTokenizer st = new StringTokenizer(szLine,"\t");
+	       StringTokenizer st = new StringTokenizer(szLine,"\t ");
 	       int nold = Integer.parseInt(st.nextToken())-1;
 	       int nnew = Integer.parseInt(st.nextToken())-1;
 	       stateordering[nnew] = nold;
@@ -1156,8 +1156,8 @@ public class ChromHMM
 		    pwzip.write(btformat,0,btformat.length);
 		    continue;
 		}
-		StringBuffer sb = new StringBuffer(st.nextToken()+"\t"+st.nextToken()+"\t"+st.nextToken());
-		String szinstate = st.nextToken();
+		StringBuffer sb = new StringBuffer(st.nextToken().trim()+"\t"+st.nextToken().trim()+"\t"+st.nextToken().trim());
+		String szinstate = st.nextToken().trim();
 		String szoutstate;
 
 		if (!Character.isDigit(szinstate.charAt(0)))
@@ -1172,7 +1172,7 @@ public class ChromHMM
 
 		while (st.hasMoreTokens())
 		{
-		    sb.append("\t"+st.nextToken());
+		    sb.append("\t"+st.nextToken().trim());
 		}
 		sb.append("\n");
 
@@ -1199,8 +1199,8 @@ public class ChromHMM
 		    pw.println(szLine);
 		    continue;
 		}
-		pw.print(st.nextToken()+"\t"+st.nextToken()+"\t"+st.nextToken());
-		String szinstate = st.nextToken();
+		pw.print(st.nextToken().trim()+"\t"+st.nextToken().trim()+"\t"+st.nextToken().trim());
+		String szinstate = st.nextToken().trim();
 		String szoutstate;
 		if (!Character.isDigit(szinstate.charAt(0)))
 		{
@@ -1214,7 +1214,7 @@ public class ChromHMM
 
 		while (st.hasMoreTokens())
 		{
-		    pw.print("\t"+st.nextToken());
+		    pw.print("\t"+st.nextToken().trim());
 		}
 		pw.println();
 	    }
@@ -1902,7 +1902,7 @@ public class ChromHMM
 	       ArrayList aldata = new ArrayList();
 	       while ((szLine = br.readLine())!=null)
 	       {
-	          StringTokenizer st = new StringTokenizer(szLine,"\t");
+	          StringTokenizer st = new StringTokenizer(szLine,"\t ");
       	          StringBuffer sb = new StringBuffer();
 		
 	          for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -1912,7 +1912,7 @@ public class ChromHMM
 	                throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles[nseq]);
 		     }
 
-		     String sztoken = st.nextToken();
+		     String sztoken = st.nextToken();//.trim();
 		    
 		     if (sztoken.equals("0"))
 	             {
@@ -2903,13 +2903,13 @@ public class ChromHMM
         StringTokenizer st = new StringTokenizer(szLoadHeader,"\t");
 
 	//first token of the first line of the model file is assume to give the number of states
-	numstates = Integer.parseInt(st.nextToken());
-	numdatasets = Integer.parseInt(st.nextToken());
+	numstates = Integer.parseInt(st.nextToken().trim());
+	numdatasets = Integer.parseInt(st.nextToken().trim());
 	if (datasets ==null)
 	{
 	    datasets = new String[numdatasets];
 	}
-	chorder = st.nextToken().charAt(0);
+	chorder = st.nextToken().trim().charAt(0);
 
 	if ((nstateorder != ChromHMM.STATEORDER_TRANSITION)&&(nstateorder != ChromHMM.STATEORDER_EMISSION)&&(nstateorder != ChromHMM.STATEORDER_FIXED))
 	{
@@ -2964,20 +2964,20 @@ public class ChromHMM
 	while ((szLine = br.readLine())!=null)
 	{
 	    st = new StringTokenizer(szLine,"\t");
-	    String szvartype = st.nextToken();
+	    String szvartype = st.nextToken().trim();
 
 	    if (szvartype.equalsIgnoreCase("probinit"))
 	    {
 		//reading an inital probability
-		int nstate = Integer.parseInt(st.nextToken())-1;
-		double dprob = Double.parseDouble(st.nextToken());
+		int nstate = Integer.parseInt(st.nextToken().trim())-1;
+		double dprob = Double.parseDouble(st.nextToken().trim());
 		probinit[nstate] = dprob;
 	    }
 	    else if (szvartype.equalsIgnoreCase("transitionprobs"))
 	    {
-		int nfrom = Integer.parseInt(st.nextToken())-1;
-		int nto = Integer.parseInt(st.nextToken())-1;
-		double dprob = Double.parseDouble(st.nextToken());
+		int nfrom = Integer.parseInt(st.nextToken().trim())-1;
+		int nto = Integer.parseInt(st.nextToken().trim())-1;
+		double dprob = Double.parseDouble(st.nextToken().trim());
 		//this smooths the transition probability if dproceduresmmothtransition>0 using a weighted average with uniform 
 		transitionprobs[nfrom][nto] = dproceduresmoothtransition/((double) transitionprobs.length)+(1-dproceduresmoothtransition)*dprob;
 		if (transitionprobs[nfrom][nto] == 0)
@@ -2989,20 +2989,20 @@ public class ChromHMM
 	    }
 	    else if (szvartype.equalsIgnoreCase("emissionprobs"))
 	    {
-		int nstate = Integer.parseInt(st.nextToken())-1;
-		int nmod = Integer.parseInt(st.nextToken());
-		String szdataset = st.nextToken();
+		int nstate = Integer.parseInt(st.nextToken().trim())-1;
+		int nmod = Integer.parseInt(st.nextToken().trim());
+		String szdataset = st.nextToken().trim();
 		if (datasets[nmod]==null)
 		{
-		    datasets[nmod] = szdataset;//st.nextToken();
+		    datasets[nmod] = szdataset;//st.nextToken().trim();
 		}
 		else if (!szdataset.equals(datasets[nmod]))
 		{
 		    throw new IllegalArgumentException("For mark "+nmod+" in model file found "+szdataset+", but expecting "+datasets[nmod]+"."
                                                       +" This might be because columns of binarized file are in a different order from model learning.");
 		}
-		int nval = Integer.parseInt(st.nextToken());
-		double dprob = Double.parseDouble(st.nextToken());
+		int nval = Integer.parseInt(st.nextToken().trim());
+		double dprob = Double.parseDouble(st.nextToken().trim());
 		//smooths the emission probability if dproceduresmoothemission>0 using a weighted average with uniform
 		emissionprobs[nstate][nmod][nval] = dproceduresmoothemission/((double) numbuckets)+(1-dproceduresmoothemission)*dprob;
 	    }
@@ -3229,7 +3229,7 @@ public class ChromHMM
 	     int nline = 0;
 	     while ((szLinePosterior = brprobs.readLine())!=null)
 	     {
-		StringTokenizer stposterior = new StringTokenizer(szLinePosterior,"\t");
+		StringTokenizer stposterior = new StringTokenizer(szLinePosterior,"\t ");
 	        for (int nstate = 0; nstate < numstates; nstate++)
 	        {
 		   fullposterior[nline][nstate] = Double.parseDouble(stposterior.nextToken());
@@ -3288,13 +3288,13 @@ public class ChromHMM
 	     while ((szLineMax = brbed.readLine())!=null)
 	     {
 		 StringTokenizer stchrom = new StringTokenizer(szLineMax,"\t");
-		 String szchrom = stchrom.nextToken();
+		 String szchrom = stchrom.nextToken().trim();
 
 		 if (szchrom.equals(szcurrchrom))
 	         {
-		     int nbegin = Integer.parseInt(stchrom.nextToken())/nbinsize;
-		     int nend = (Integer.parseInt(stchrom.nextToken())-1)/nbinsize;
-		     int nstate = Integer.parseInt(stchrom.nextToken().substring(1))-1;		        		
+		     int nbegin = Integer.parseInt(stchrom.nextToken().trim())/nbinsize;
+		     int nend = (Integer.parseInt(stchrom.nextToken().trim())-1)/nbinsize;
+		     int nstate = Integer.parseInt(stchrom.nextToken().trim().substring(1))-1;		        		
 		     for (int nj = nbegin; nj <= nend; nj++)
 		     {
 			 fullmax[nj] = nstate;
@@ -3990,7 +3990,7 @@ public class ChromHMM
 	  ArrayList aldata = new ArrayList();
 	  while ((szLine = br.readLine())!=null)
 	  {
-	      StringTokenizer st = new StringTokenizer(szLine,"\t");
+	      StringTokenizer st = new StringTokenizer(szLine,"\t ");
 	      StringBuffer sb = new StringBuffer();
 
 	      for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -4000,7 +4000,7 @@ public class ChromHMM
 		    throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles[nordered_nseq]);
 		 }
 
-		 String sztoken = st.nextToken();
+		 String sztoken = st.nextToken();//.trim();
 
 		 if (sztoken.equals("0"))
 		 {
@@ -4138,7 +4138,7 @@ public class ChromHMM
 	     int nline = 0;
 	     while ((szLinePosterior = brprobs.readLine())!=null)
 	     {
-		StringTokenizer stposterior = new StringTokenizer(szLinePosterior,"\t");
+		StringTokenizer stposterior = new StringTokenizer(szLinePosterior,"\t ");
 	        for (int nstate = 0; nstate < numstates; nstate++)
 	        {
 		   fullposterior[nline][nstate] = Double.parseDouble(stposterior.nextToken());
@@ -4197,13 +4197,13 @@ public class ChromHMM
 	     while ((szLineMax = brbed.readLine())!=null)
 	     {
 		 StringTokenizer stchrom = new StringTokenizer(szLineMax,"\t");
-		 String szchrom = stchrom.nextToken();
+		 String szchrom = stchrom.nextToken().trim();
 
 		 if (szchrom.equals(szcurrchrom))
 	         {
-		     int nbegin = Integer.parseInt(stchrom.nextToken())/nbinsize;
-		     int nend = (Integer.parseInt(stchrom.nextToken())-1)/nbinsize;
-		     int nstate = Integer.parseInt(stchrom.nextToken().substring(1))-1;		        		
+		     int nbegin = Integer.parseInt(stchrom.nextToken().trim())/nbinsize;
+		     int nend = (Integer.parseInt(stchrom.nextToken().trim())-1)/nbinsize;
+		     int nstate = Integer.parseInt(stchrom.nextToken().trim().substring(1))-1;		        		
 		     for (int nj = nbegin; nj <= nend; nj++)
 		     {
 			 fullmax[nj] = nstate;
@@ -4983,7 +4983,7 @@ public class ChromHMM
 	   ArrayList aldata = new ArrayList();
 	   while ((szLine = br.readLine())!=null)
 	   {
-	      StringTokenizer st = new StringTokenizer(szLine,"\t");
+	      StringTokenizer st = new StringTokenizer(szLine,"\t ");
       	      StringBuffer sb = new StringBuffer();
 		
 	      for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -5168,12 +5168,12 @@ public class ChromHMM
 
 		    String szout = cellSeq[nordered_nseq]+"\t"+szactualchrom+"\n";
                     byte[] btformat = szout.getBytes();
-                    pwprobszip.write(btformat,0,btformat.length);
+                    pwmaxzip.write(btformat,0,btformat.length); //fix 1.23
 	            //pwmax.println(cellSeq[nordered_nseq]+"\t"+chromSeq[nordered_nseq]);
 
 		    szout = "MaxState "+chorder+"\n";
 		    btformat = szout.getBytes();
-                    pwprobszip.write(btformat,0,btformat.length);
+                    pwmaxzip.write(btformat,0,btformat.length); //fix 1.23
 	            //pwmax.println("MaxState "+chorder);
 		 }
 
@@ -7703,7 +7703,7 @@ public class ChromHMM
 	      ArrayList aldata = new ArrayList();
 	      while ((szLine = br.readLine())!=null)
 	      {
-	         StringTokenizer st = new StringTokenizer(szLine,"\t");
+	         StringTokenizer st = new StringTokenizer(szLine,"\t ");
 		 StringBuffer sb = new StringBuffer();
 		
 		 for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -7713,7 +7713,7 @@ public class ChromHMM
 	      	       throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles[nseq]);
 		    }
 
-		    String sztoken = st.nextToken();
+		    String sztoken = st.nextToken();//.trim();
 		    
 		    if (sztoken.equals("0"))
 		    {
@@ -10265,7 +10265,7 @@ public class ChromHMM
 	      ArrayList aldata = new ArrayList();
 	      while ((szLine = br.readLine())!=null)
 	      {
-	         StringTokenizer st = new StringTokenizer(szLine,"\t");
+	         StringTokenizer st = new StringTokenizer(szLine,"\t ");
 		 StringBuffer sb = new StringBuffer();
 		
 		 for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -10275,7 +10275,7 @@ public class ChromHMM
 	      	       throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles_nseq);
 		    }
 
-		    String sztoken = st.nextToken();
+		    String sztoken = st.nextToken();//.trim();
 		    
 		    if (sztoken.equals("0"))
 		    {
@@ -12286,12 +12286,12 @@ public class ChromHMM
 	       throw new IllegalArgumentException("First line must contain cell type and chromosome. No entries found.");
 	    }
 
-	    cellSeq[nfile] = st.nextToken();
+	    cellSeq[nfile] = st.nextToken().trim();//added trim in version 1.23
 	    if (!st.hasMoreTokens())
 	    {
 		throw new IllegalArgumentException("First line must contain cell type and chromosome. Only one entry found.");
 	    }
-	    chromSeq[nfile] = st.nextToken();
+	    chromSeq[nfile] = st.nextToken().trim();//added trim in version 1.23
 
 	    if (st.hasMoreTokens())
 	    {
@@ -12312,7 +12312,7 @@ public class ChromHMM
 	       int ntoken = 0;
 	       while (st.hasMoreTokens())
 	       {
-		  datasets[ntoken] = st.nextToken();
+		  datasets[ntoken] = st.nextToken().trim();
 	          ntoken++;
 	       }
  	       //numdatasets is the number of marks we are integrating
@@ -12330,7 +12330,7 @@ public class ChromHMM
 		//Gives warning if a header column does not match
 	        while (st.hasMoreTokens())
 	        {
-		   String sztoken = st.nextToken();
+		   String sztoken = st.nextToken().trim();
 	           if (!datasets[ntoken].equals(sztoken))
 		   {
 		       System.out.println("WARNING headers do not match between "+chromfiles[nfile]+" and "+chromfiles[0]);
@@ -12632,12 +12632,12 @@ public class ChromHMM
 	       throw new IllegalArgumentException("First line must contain cell type and chromosome. No entries found.");
 	    }
 
-	    cellSeq[nfile] = st.nextToken();
+	    cellSeq[nfile] = st.nextToken().trim(); //added trim in version 1.23
 	    if (!st.hasMoreTokens())
 	    {
 		throw new IllegalArgumentException("First line must contain cell type and chromosome. Only one entry found.");
 	    }
-	    chromSeq[nfile] = st.nextToken();
+	    chromSeq[nfile] = st.nextToken().trim(); //added trim in version 1.23
 
 	    if (st.hasMoreTokens())
 	    {
@@ -12658,7 +12658,7 @@ public class ChromHMM
 	       int ntoken = 0;
 	       while (st.hasMoreTokens())
 	       {
-		  datasets[ntoken] = st.nextToken();
+		  datasets[ntoken] = st.nextToken().trim();
 		  ntoken++;
 	       }
 	    }
@@ -12674,7 +12674,7 @@ public class ChromHMM
 		//Gives warning if a header column does not match
 	        while (st.hasMoreTokens())
 	        {
-		   String sztoken = st.nextToken();
+		   String sztoken = st.nextToken().trim();
 	           if (!datasets[ntoken].equals(sztoken))
 		   {
 		       System.out.println("WARNING headers do not match between "+chromfiles[nfile]+" and "+chromfiles[0]);
@@ -12689,7 +12689,7 @@ public class ChromHMM
 	    ArrayList aldata = new ArrayList();
 	    while ((szLine = br.readLine())!=null)
 	    {
-		st = new StringTokenizer(szLine,"\t");
+		st = new StringTokenizer(szLine,"\t ");
 		StringBuffer sb = new StringBuffer();
 		
 		for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -12700,7 +12700,7 @@ public class ChromHMM
 			throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles[nfile]);
 		    }
 
-		    String sztoken = st.nextToken();
+		    String sztoken = st.nextToken();//.trim();
 		    
 		    if (sztoken.equals("0"))
 		    {
@@ -12848,13 +12848,14 @@ public class ChromHMM
 
 	if (szcommand.equalsIgnoreCase("Version"))
 	{
-	    System.out.println("This is Version 1.22 of ChromHMM (c) Copyright 2008-2012 Massachusetts Institute of Technology");
+	    System.out.println("This is Version 1.23 of ChromHMM (c) Copyright 2008-2012 Massachusetts Institute of Technology");
 	}
         else if ((szcommand.equals("BinarizeBam"))||(szcommand.equalsIgnoreCase("BinarizeBed")))
 	{
 	    boolean bstacked = false;
 	    boolean bgzip = false;
 	    boolean bpairend = false;
+	    boolean bmixed = false;
 	    boolean bsplit = false;
 	    String szcontroldir=null;
 	    int nflankwidthcontrol = 5;
@@ -13012,6 +13013,10 @@ public class ChromHMM
 		     {
 			 bpairend = true;
 		     }
+		     else if ((args[nargindex].equals("-mixed"))&&(szcommand.equalsIgnoreCase("BinarizeBam")))
+		     {
+			 bmixed = true;
+		     }
 		     else if (args[nargindex].equals("-w"))
 		     {
 		        nflankwidthcontrol = Integer.parseInt(args[++nargindex]);
@@ -13030,7 +13035,7 @@ public class ChromHMM
 	       } 	       
 	    }
 
-	    if ((bpairend) && (bcenterinterval||bshift||bpeaks))
+	    if ((bpairend) && (bmixed||bcenterinterval||bshift||bpeaks))
 	    {
 		bok = false;
 	    }
@@ -13214,7 +13219,7 @@ public class ChromHMM
 						szoutputbinarydirsub,szoutputcontroldir,
 					        dpoissonthresh,dfoldthresh,bcontainsthresh,
 					        npseudocountcontrol,nbinsize,szcolfields,bpeaks, dcountthresh,szcommand.equalsIgnoreCase("BinarizeBam"),
-					       bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex, bstacked);	   	        
+							     bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex, bstacked, bmixed);	   	        
 		      }
 		  }
 		  else
@@ -13267,7 +13272,7 @@ public class ChromHMM
 						szoutputbinarydir,szoutputcontroldir,
 					        dpoissonthresh,dfoldthresh,bcontainsthresh,
 					        npseudocountcontrol,nbinsize,szcolfields,bpeaks, dcountthresh,szcommand.equalsIgnoreCase("BinarizeBam"),
-					        bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex,bstacked);	   	          
+							 bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex,bstacked,bmixed);	   	          
 		   
 		  }
 	       }
@@ -13291,7 +13296,7 @@ public class ChromHMM
 	       else
 	       {
 		   System.out.println("usage BinarizeBam [-b binsize][-c controldir][-e offsetend][-f foldthresh]"+
-                                  "[-g signalthresh][-gzip][[-o outputcontroldir][-p poissonthresh][-paired|[-center][-n shift][-peaks [-i splitindex]]"+
+                                  "[-g signalthresh][-gzip][[-o outputcontroldir][-p poissonthresh][-paired|[-mixed][-center][-n shift][-peaks [-i splitindex]]"+
                                   "[-s offsetstart][-splitcols [-k splitcolindex][-m numsplitcols]][-splitrows [-j numsplitbins]][-stacked][-strictthresh][-t outputsignaldir]"+
                                   "[-u pseudocountcontrol][-w flankwidthcontrol] "+
 				      "chromosomelengthfile inputbamdir cellmarkfiletable outputbinarydir");
@@ -13541,9 +13546,9 @@ public class ChromHMM
 		        StringTokenizer stcolor = new StringTokenizer(szcolor,",");
 		        if (stcolor.countTokens()==3)
 		        {
-		           nr = Integer.parseInt(stcolor.nextToken());
-			   ng = Integer.parseInt(stcolor.nextToken());
-		           nb = Integer.parseInt(stcolor.nextToken());
+		           nr = Integer.parseInt(stcolor.nextToken().trim());
+			   ng = Integer.parseInt(stcolor.nextToken().trim());
+		           nb = Integer.parseInt(stcolor.nextToken().trim());
 			}
 		        else
 		        {
@@ -13659,9 +13664,9 @@ public class ChromHMM
 		     StringTokenizer stcolor = new StringTokenizer(szcolor,",");
 		     if (stcolor.countTokens()==3)
 		     {
-		        nr = Integer.parseInt(stcolor.nextToken());
-		        ng = Integer.parseInt(stcolor.nextToken());
-		        nb = Integer.parseInt(stcolor.nextToken());
+		        nr = Integer.parseInt(stcolor.nextToken().trim());
+		        ng = Integer.parseInt(stcolor.nextToken().trim());
+		        nb = Integer.parseInt(stcolor.nextToken().trim());
 		     }
 		     else
 		     {
@@ -14039,9 +14044,9 @@ public class ChromHMM
 		     StringTokenizer stcolor = new StringTokenizer(szcolor,",");
 		     if (stcolor.countTokens()==3)
 		     {
-			nr = Integer.parseInt(stcolor.nextToken());
-			ng = Integer.parseInt(stcolor.nextToken());
-			nb = Integer.parseInt(stcolor.nextToken());
+			nr = Integer.parseInt(stcolor.nextToken().trim());
+			ng = Integer.parseInt(stcolor.nextToken().trim());
+			nb = Integer.parseInt(stcolor.nextToken().trim());
 		     }
 		     else
 		     {
@@ -14211,9 +14216,9 @@ public class ChromHMM
 		     StringTokenizer stcolor = new StringTokenizer(szcolor,",");
 		     if (stcolor.countTokens()==3)
 		     {
-			nr = Integer.parseInt(stcolor.nextToken());
-			ng = Integer.parseInt(stcolor.nextToken());
-			nb = Integer.parseInt(stcolor.nextToken());
+			nr = Integer.parseInt(stcolor.nextToken().trim());
+			ng = Integer.parseInt(stcolor.nextToken().trim());
+			nb = Integer.parseInt(stcolor.nextToken().trim());
 		     }
 		     else
 		     {
@@ -14404,9 +14409,9 @@ public class ChromHMM
 		     StringTokenizer stcolor = new StringTokenizer(szcolor,",");
 		     if (stcolor.countTokens()==3)
 		     {
-		        nr = Integer.parseInt(stcolor.nextToken());
-		        ng = Integer.parseInt(stcolor.nextToken());
-		        nb = Integer.parseInt(stcolor.nextToken());
+		        nr = Integer.parseInt(stcolor.nextToken().trim());
+		        ng = Integer.parseInt(stcolor.nextToken().trim());
+		        nb = Integer.parseInt(stcolor.nextToken().trim());
 		     }
 		     else
 		     {
@@ -14996,9 +15001,9 @@ public class ChromHMM
 		     StringTokenizer stcolor = new StringTokenizer(szcolor,",");
 		     if (stcolor.countTokens()==3)
 		     {
-		        nr = Integer.parseInt(stcolor.nextToken());
-		        ng = Integer.parseInt(stcolor.nextToken());
-		        nb = Integer.parseInt(stcolor.nextToken());
+		        nr = Integer.parseInt(stcolor.nextToken().trim());
+		        ng = Integer.parseInt(stcolor.nextToken().trim());
+		        nb = Integer.parseInt(stcolor.nextToken().trim());
 		     }
 		     else
 		     {


=====================================
edu/mit/compbio/ChromHMM/ConvertGeneTable.java
=====================================
@@ -140,37 +140,37 @@ public class ConvertGeneTable
 	while ((szLine = br.readLine())!=null)
 	{
 	    StringTokenizer st = new StringTokenizer(szLine,"\t",true);
-	    String szbin = st.nextToken();
+	    String szbin = st.nextToken().trim();
 	    if (!szbin.equals("\t"))
-		st.nextToken();
-	    String szname = st.nextToken();
+		st.nextToken().trim();
+	    String szname = st.nextToken().trim();
 	    if (!szname.equals("\t"))
-		st.nextToken();
-	    String szchrom = st.nextToken();
+		st.nextToken().trim();
+	    String szchrom = st.nextToken().trim();
 	    if (!szchrom.equals("\t"))
-		st.nextToken();
-	    String szstrand = st.nextToken();
+		st.nextToken().trim();
+	    String szstrand = st.nextToken().trim();
 	    if (!szstrand.equals("\t"))
-		st.nextToken();
-	    String sztxStart = st.nextToken();
+		st.nextToken().trim();
+	    String sztxStart = st.nextToken().trim();
 	    if (!sztxStart.equals("\t"))
-		st.nextToken();
-	    String sztxEnd = st.nextToken();
+		st.nextToken().trim();
+	    String sztxEnd = st.nextToken().trim();
 	    if (!sztxEnd.equals("\t"))
-		st.nextToken();
-	    String szcdsStart = st.nextToken();
+		st.nextToken().trim();
+	    String szcdsStart = st.nextToken().trim();
 	    if (!szcdsStart.equals("\t"))
-		st.nextToken();
-	    String szcdsEnd = st.nextToken();
+		st.nextToken().trim();
+	    String szcdsEnd = st.nextToken().trim();
 	    if (!szcdsEnd.equals("\t"))
-		st.nextToken();
-	    String szexonCount = st.nextToken();
+		st.nextToken().trim();
+	    String szexonCount = st.nextToken().trim();
 	    if (!szexonCount.equals("\t"))
-		st.nextToken();
-	    String szexonStarts = st.nextToken();
+		st.nextToken().trim();
+	    String szexonStarts = st.nextToken().trim();
 	    if (!szexonStarts.equals("\t"))
-		st.nextToken();
-	    String szexonEnds = st.nextToken();
+		st.nextToken().trim();
+	    String szexonEnds = st.nextToken().trim();
 
 	    //removed 1.22
 	    //if (!szexonEnds.equals("\t"))
@@ -288,7 +288,7 @@ public class ConvertGeneTable
 
 	       while (stexonStarts.hasMoreTokens())
 	       {
-		  String szexonOut = szchrom+"\t"+stexonStarts.nextToken()+"\t"+stexonEnds.nextToken()+"\n";
+		  String szexonOut = szchrom+"\t"+stexonStarts.nextToken().trim()+"\t"+stexonEnds.nextToken().trim()+"\n";
 		  if (!hsexon.contains(szexonOut))
 		  {
                      byte[] btformat = szexonOut.getBytes();
@@ -340,7 +340,7 @@ public class ConvertGeneTable
 
 	       while (stexonStarts.hasMoreTokens())
 	       {
-		  String szexonOut = szchrom+"\t"+stexonStarts.nextToken()+"\t"+stexonEnds.nextToken();
+		  String szexonOut = szchrom+"\t"+stexonStarts.nextToken().trim()+"\t"+stexonEnds.nextToken().trim();
 
                   if (!hsexon.contains(szexonOut))
 		  {


=====================================
edu/mit/compbio/ChromHMM/NestedEliminateInitialize.java
=====================================
@@ -80,10 +80,10 @@ public class NestedEliminateInitialize
 		      if (szLine.startsWith("emission"))
 		      {
 			  StringTokenizer stemiss = new StringTokenizer(szLine,"\t");
-			  stemiss.nextToken();
-			  stemiss.nextToken();
-			  stemiss.nextToken();
-			  String szmark = stemiss.nextToken();
+			  stemiss.nextToken().trim();
+			  stemiss.nextToken().trim();
+			  stemiss.nextToken().trim();
+			  String szmark = stemiss.nextToken().trim();
 			  
 			  Object objInt = hmNameToID.get(szmark);
 			  if (objInt ==null)
@@ -140,10 +140,10 @@ public class NestedEliminateInitialize
 	          if (szLine.startsWith("emissionprobs"))
 		  {
 		      st = new StringTokenizer(szLine,"\t");
-		      st.nextToken();
-		      int nstate = Integer.parseInt(st.nextToken())-1;
-		      st.nextToken();
-		      String szmark = st.nextToken();
+		      st.nextToken().trim();
+		      int nstate = Integer.parseInt(st.nextToken().trim())-1;
+		      st.nextToken().trim();
+		      String szmark = st.nextToken().trim();
 		      Integer intMark = (Integer) hmNameToID.get(szmark);
 		      int nmark = -1;
 		      if (intMark == null)
@@ -154,10 +154,10 @@ public class NestedEliminateInitialize
 		      {
 			  nmark = ((Integer) intMark).intValue();
 		      }
-		      String szbucket = st.nextToken();
+		      String szbucket = st.nextToken().trim();
 		      if (szbucket.equals("1"))
 		      {
-		         modelemissions[nmodel][nstate][nmark] = Double.parseDouble(st.nextToken());
+		         modelemissions[nmodel][nstate][nmark] = Double.parseDouble(st.nextToken().trim());
 			 ncount++;
 		      }
 		  }
@@ -197,9 +197,9 @@ public class NestedEliminateInitialize
 	       throw new IllegalArgumentException(szinputdir+"/"+szmainfile+" is missing lines!");
 	   }
 	   StringTokenizer st = new StringTokenizer(szinitline,"\t");
-           st.nextToken();
-           st.nextToken();
-           bestprobinit[ni] = Double.parseDouble(st.nextToken());
+           st.nextToken().trim();
+           st.nextToken().trim();
+           bestprobinit[ni] = Double.parseDouble(st.nextToken().trim());
         }
 
 	for (int ni = 0; ni < nbestnumstates; ni++)
@@ -212,10 +212,10 @@ public class NestedEliminateInitialize
 		  throw new IllegalArgumentException(szinputdir+"/"+szmainfile+" is missing lines!");
 	      }
 	      StringTokenizer st = new StringTokenizer(sztransitionline,"\t");
-	      st.nextToken();
-	      st.nextToken();
-	      st.nextToken();
-	      besttransitionprobs[ni][nj] = Double.parseDouble(st.nextToken());
+	      st.nextToken().trim();
+	      st.nextToken().trim();
+	      st.nextToken().trim();
+	      besttransitionprobs[ni][nj] = Double.parseDouble(st.nextToken().trim());
 	   }
 	}
 
@@ -228,9 +228,9 @@ public class NestedEliminateInitialize
         while ((szLine = brfile.readLine())!=null)
 	{
 	   StringTokenizer st = new StringTokenizer(szLine,"\t");
-           st.nextToken();
-	   int nj = Integer.parseInt(st.nextToken())-1;
-           emissionsline[nj].add(st.nextToken()+"\t"+st.nextToken()+"\t"+st.nextToken()+"\t"+st.nextToken());
+           st.nextToken().trim();
+	   int nj = Integer.parseInt(st.nextToken().trim())-1;
+           emissionsline[nj].add(st.nextToken().trim()+"\t"+st.nextToken().trim()+"\t"+st.nextToken().trim()+"\t"+st.nextToken().trim());
 	}
         brfile.close();
 


=====================================
edu/mit/compbio/ChromHMM/Preprocessing.java
=====================================
@@ -1,4 +1,3 @@
-
 /**
  * ChromHMM - automating chromatin state discovery and characterization
  * Copyright (C) 2008-2012 Massachusetts Institute of Technology
@@ -89,7 +88,8 @@ public class Preprocessing
     private static void loadGrid(int[][][] grid,boolean[] bpresent, boolean[] bpresentmarks, String[] marks,int nshift, int nbinsize,
                                  boolean bcenterinterval,int noffsetleft,
 				 int noffsetright,HashMap hmfiles, String szcell, String szmarkdir,HashMap hmchrom, 
-                                 int ninitval, String szcolfields,boolean bpeaks,boolean bcontrol, boolean bbinarizebam, boolean bpairend) throws IOException
+                                 int ninitval, String szcolfields,boolean bpeaks,boolean bcontrol, boolean bbinarizebam, 
+                                 boolean bpairend,boolean bmixed) throws IOException
     {
 	int nummarks = grid[0][0].length;
 	//initalizes all values in grid to ninitval
@@ -122,12 +122,12 @@ public class Preprocessing
           {
              throw new IllegalArgumentException(" invalid number of column fields in "+szcolfields+" expecting 3 or 4 integers");
           }
-          nchromcol = Integer.parseInt(stcolfields.nextToken());
-          nbegincol = Integer.parseInt(stcolfields.nextToken());
-          nendcol = Integer.parseInt(stcolfields.nextToken());
+          nchromcol = Integer.parseInt(stcolfields.nextToken().trim());
+          nbegincol = Integer.parseInt(stcolfields.nextToken().trim());
+          nendcol = Integer.parseInt(stcolfields.nextToken().trim());
           if (!bcenterinterval)
           {
-             nstrandcol = Integer.parseInt(stcolfields.nextToken());
+             nstrandcol = Integer.parseInt(stcolfields.nextToken().trim());
           }
 
 	  nmaxindex = Math.max(nchromcol,Math.max(nbegincol,Math.max(nendcol, nstrandcol)));
@@ -195,7 +195,133 @@ public class Preprocessing
 		     SamReader samReader = srf.open(new File(szmarkdir+"/"+szfile));
 		     SAMRecordIterator iter = samReader.iterator();
 
-		     if (bpairend)
+		     if (bmixed)
+		     {
+			 //added in v1.23 to handle mixed
+		        while (iter.hasNext())
+		        {
+			   SAMRecord rec= iter.next();
+
+			   if (rec.getReadPairedFlag())
+			   {
+
+			      if ((rec.getProperPairFlag()) && rec.getFirstOfPairFlag()&&(!rec.getReadUnmappedFlag()))
+			      {
+			         int nstartorig  = rec.getAlignmentStart()-1;
+			         int nendorig = rec.getAlignmentEnd();
+			         String szchrom = rec.getReferenceName();
+			         boolean bnegstrand = rec.getReadNegativeStrandFlag();
+			         int ninsertsize = rec.getInferredInsertSize();
+
+		                 Integer objInt = (Integer) hmchrom.get(szchrom);
+
+		                 //if we don't have the chromosome for the read will ignore it
+	                         if (objInt != null)
+	                         {
+		                    int nchrom = objInt.intValue();
+			            int nbin;
+
+				    //no center mode in paired end
+
+		                    if (bnegstrand) 
+		                    {		   
+				       //"-"   
+				       //was nshift
+		                       nbin = (nendorig-noffsetright+ninsertsize/2)/nbinsize;	
+				       //ninsertsize can be negative so adding
+                                       //removed one from here may need it for backwards consistency		      		       		      	    
+				    }
+				    else
+				    {
+				       //"+"
+				       //was nshift
+		                       nbin = (nstartorig-noffsetleft+ninsertsize/2)/nbinsize; 
+				    }
+				     //}
+		   
+		                    if ((nbin>=0)&&(nbin < grid[nchrom].length))
+	                            {
+		                       //increment bin count if falls into valid interval
+	                               grid[nchrom][nbin][nmark]++;
+		                       //we do have this chromosome
+	                               bpresent[nchrom] = true;			    
+			               bdatafound = true;
+				    }
+				 }
+			      }
+			   }
+			   else
+			   {
+			      //treat it as single end
+			      int nstartorig  = rec.getAlignmentStart()-1;
+		      	      int nendorig = rec.getAlignmentEnd();
+			      String szchrom = rec.getReferenceName();
+			      boolean bnegstrand = rec.getReadNegativeStrandFlag();
+			      boolean bunmapped = rec.getReadUnmappedFlag();
+
+			      if (bunmapped)
+			      {
+			         continue;
+			      }
+
+		              Integer objInt = (Integer) hmchrom.get(szchrom);
+
+			      //if we don't have the chromosome for the read will ignore it
+	                      if (objInt != null)
+	                      {
+		                 int nchrom = objInt.intValue();
+			         int nbin;
+
+		                 if (bpeaks)
+		                 {
+			            int nstart = Math.max(0,(nstartorig-noffsetleft)/nbinsize); 
+			            int nend = Math.min(grid[nchrom].length-1, (nendorig-noffsetright)/nbinsize);		      
+
+			            for (nbin = nstart; nbin <= nend; nbin++)
+	                            {
+		                       //increment bin count if falls into valid interval
+	                               grid[nchrom][nbin][nmark]++;
+		                       //we do have this chromosome
+	                               bpresent[nchrom] = true;			    
+			               bdatafound = true;
+				    }
+				 }
+		                 else
+		                 {
+		                    if (bcenterinterval)
+		                    {
+		                       //uses the center of the interval which is useful if read is already extended
+		                       nbin = (nstartorig-noffsetleft+nendorig-noffsetright)/(2*nbinsize);
+				    }
+		                    else
+		                    {
+		                       if (bnegstrand) 
+		                       {		   
+				          //"-"   
+		                          nbin = (nendorig-noffsetright-nshift)/nbinsize;	
+                                          //removed one from here may need it for backwards consistency		      		       		     	    
+				       }
+				       else
+				       {
+			                  //"+"
+		                          nbin = (nstartorig-noffsetleft+nshift)/nbinsize; 
+				       }
+				    }
+		   
+		                    if ((nbin>=0)&&(nbin < grid[nchrom].length))
+	                            {
+		                       //increment bin count if falls into valid interval
+	                               grid[nchrom][nbin][nmark]++;
+		                       //we do have this chromosome
+	                               bpresent[nchrom] = true;			    
+			               bdatafound = true;
+				    }
+				 }
+			      }
+			   }
+			}		       
+		     }
+		     else if (bpairend)
 		     {
 		        while (iter.hasNext())
 		        {
@@ -411,7 +537,7 @@ public class Preprocessing
 		          {
 		             throw new IllegalArgumentException("Empty line found in "+szmarkdir+"/"+szfile);
 			  }
-		          String szchrom = st.nextToken();
+		          String szchrom = st.nextToken();//.trim();
 		          Integer objInt = (Integer) hmchrom.get(szchrom);
 
 		          //if we don't have the chromosome for the read will ignore it
@@ -423,13 +549,13 @@ public class Preprocessing
 		                throw new IllegalArgumentException("Missing begin coordinate in "+szmarkdir+"/"+szfile);
 			     }
 
-			     String szbegin = st.nextToken();
+			     String szbegin = st.nextToken();//.trim();
 
 		             if (!st.hasMoreTokens())
 		             {
 		                throw new IllegalArgumentException("Missing end coordinate in "+szmarkdir+"/"+szfile);
 			     }
-			     String szend = st.nextToken();
+			     String szend = st.nextToken();//.trim();
 			     int nbin;
 
 		             if (bpeaks)
@@ -460,11 +586,11 @@ public class Preprocessing
 				      throw new IllegalArgumentException("strand column expected, but not found in "+szmarkdir+"/"+szfile);
 				   }
 		                   //looks for strand in sixth column or last column if less than six
-	                           String szstrand = st.nextToken();
+	                           String szstrand = st.nextToken();//.trim();
 	                           if (st.hasMoreTokens())
-	                              szstrand = st.nextToken();
+				       szstrand = st.nextToken();//.trim();
 	                           if (st.hasMoreTokens())
-	    	                      szstrand = st.nextToken();
+				       szstrand = st.nextToken();//.trim();
 			     	 	      
 		                   if (szstrand.equals("+"))
 		                   {		      
@@ -548,13 +674,13 @@ public class Preprocessing
 	    {
 		throw new IllegalArgumentException("empty line found in "+szchromlengthfile);
 	    }
-	    chroms[ni] = st.nextToken();
+	    chroms[ni] = st.nextToken();//.trim();
 
 	    if (!st.hasMoreTokens())
 	    {
 		throw new IllegalArgumentException("missing chromosome length for "+allines.get(ni)+" in "+szchromlengthfile);
 	    }
-	    int nlength = Integer.parseInt(st.nextToken());
+	    int nlength = Integer.parseInt(st.nextToken());//.trim());
             int numbins = nlength/nbinsize;
             //if (nlength % nbinsize !=0)  //removed pre 1.18 to be consistent from binarizing directly
             //    numbins++;
@@ -606,7 +732,12 @@ public class Preprocessing
 		  alfiles = new ArrayList();
 	          hmfiles.put(szcell+"\t"+szmark,alfiles);
 	       }
-	       alfiles.add(szfile);
+
+	       if (!alfiles.contains(szfile))
+	       {
+		   //added in v1.23 to only count once a given file
+	          alfiles.add(szfile);
+	       }
 	    }
 	    ncellmarkentry++;
 	}
@@ -675,11 +806,11 @@ public class Preprocessing
 	  	    {
 		       StringTokenizer st = new StringTokenizer(szLine,"\t");
 		       if (st.countTokens() == 0) continue;
-		       String szchrom = st.nextToken();
+		       String szchrom = st.nextToken().trim();
 		       if (szchrom.equals(szcurrchrom))
 		       {
-			  int nbegin = (Integer.parseInt(st.nextToken())-noffsetleft)/nbinsize;
-			  int nend = (Integer.parseInt(st.nextToken())-noffsetright)/nbinsize; //updated -1 from pre-release 1.18
+			  int nbegin = (Integer.parseInt(st.nextToken().trim())-noffsetleft)/nbinsize;
+			  int nend = (Integer.parseInt(st.nextToken().trim())-noffsetright)/nbinsize; //updated -1 from pre-release 1.18
 
 			  //need to update
 			  int nactualbegin = nbegin;
@@ -854,7 +985,7 @@ public class Preprocessing
 					     double dpoissonthresh, double dfoldthresh,boolean bcontainsthresh, int npseudocountcontrol,int nbinsize,
 					     String szcolfields, boolean bpeaks, double dcountthresh, boolean bbinarizebam, boolean bpairend,
 					     boolean bgzip, boolean bsplit, int numsplitbins,
-					     boolean bsplitcols,int nnummarksplit,int nmarksplitindex, boolean bstacked
+					     boolean bsplitcols,int nnummarksplit,int nmarksplitindex, boolean bstacked, boolean bmixed
                                             ) throws IOException
     {
 
@@ -879,13 +1010,13 @@ public class Preprocessing
 	    {
 		throw new IllegalArgumentException("empty line found in "+szchromlengthfile);
 	    }
-	    chroms[ni] = st.nextToken();
+	    chroms[ni] = st.nextToken();//.trim();
 	    hmchrom.put(chroms[ni], Integer.valueOf(ni));
 	    if (!st.hasMoreTokens())
 	    {
 		throw new IllegalArgumentException("missing chromosome length for "+allines.get(ni)+" in "+szchromlengthfile);
 	    }
-	    lengths[ni] = Integer.parseInt(st.nextToken());
+	    lengths[ni] = Integer.parseInt(st.nextToken());//.trim());
 	}
 
 
@@ -967,7 +1098,12 @@ public class Preprocessing
 		  alfiles = new ArrayList();
 	          hmfiles.put(szcell+"\t"+szmark,alfiles);
 	       }
-	       alfiles.add(szfile);
+
+	       if (!alfiles.contains(szfile))
+	       {
+		   //added in v1.23 to only count once a given file
+	          alfiles.add(szfile);
+	       }
 	    }
 	    ncellmarkentry++;	    
 	}
@@ -1050,7 +1186,7 @@ public class Preprocessing
 
 	    //loading data for the cell type
 	    loadGrid(grid,bpresent,bpresentmarks,marks,nshift,nbinsize,bcenterinterval,noffsetleft,
-		     noffsetright,hmfiles,szcell,szmarkdir,hmchrom,0,szcolfields,bpeaks,false,bbinarizebam, bpairend);
+		     noffsetright,hmfiles,szcell,szmarkdir,hmchrom,0,szcolfields,bpeaks,false,bbinarizebam, bpairend,bmixed);
 	    if (bcontrolfile)
 	    {
 	       if ((gridcontrol[0] == null)||(gridcontrol[0][0].length !=numcontrolmarks))
@@ -1066,7 +1202,7 @@ public class Preprocessing
 
 	       //we have control data loading cell type data for that
 	       loadGrid(gridcontrol,bpresentcontrol,bpresentmarkscontrol,marks,nshift,nbinsize,bcenterinterval,noffsetleft,noffsetright,
-                        hmfilescontrol,szcell,szcontroldir,hmchrom,npseudocountcontrol,szcolfields,bpeaks,true,bbinarizebam,bpairend);
+                        hmfilescontrol,szcell,szcontroldir,hmchrom,npseudocountcontrol,szcolfields,bpeaks,true,bbinarizebam,bpairend,bmixed);
 	    }
 	    
 
@@ -2566,7 +2702,7 @@ public class Preprocessing
 		   throw new IllegalArgumentException(szcontrolDIR+"/"+allfilescontrol[nfile]+" header line must have two columns delimited by a tab found only one in "+
 						      szheader);
 	       }
-	       String szkey = st.nextToken()+"\t"+st.nextToken();
+	       String szkey = st.nextToken().trim()+"\t"+st.nextToken().trim();
 	       hmcontrol.put(szkey,allfilescontrol[nfile]);
 	       brcontrol.close();
 	   }
@@ -2588,7 +2724,7 @@ public class Preprocessing
 	       throw new IllegalArgumentException(signalchromfiles[nfile]+" is empty!");
 	   }
 	   StringTokenizer st = new StringTokenizer(szLine,"\t"); 
-	   szcurrcell = st.nextToken();
+	   szcurrcell = st.nextToken().trim();
 	   br.close();
 
           ArrayList al = (ArrayList) hmcellsToIndex.get(szcurrcell);
@@ -2641,7 +2777,7 @@ public class Preprocessing
 		   throw new IllegalArgumentException("Only found one entry for line "+szHeaderLine1+" in file "+szbinneddataDIR+"/"+szfilename
 						      +" expecting 2");
 	       }
-	       String szchrom = stheader.nextToken();
+	       String szchrom = stheader.nextToken().trim();
 	       chroms[nchrom] = szchrom;
 	       String szcontrolfilename = (String) hmcontrol.get(szcell+"\t"+szchrom);
 	       if (szcontrolfilename == null)
@@ -2714,7 +2850,7 @@ public class Preprocessing
 	       {
 		  int[] grid_nchrom_nbin = grid_nchrom[nbin];
 		  int[] gridcontrol_nchrom_nbin = gridcontrol_nchrom[nbin];
-	          st = new StringTokenizer(szLine,"\t");
+	          st = new StringTokenizer(szLine,"\t ");//updated in 1.23
 		  if (st.countTokens() != nummarks)
 		  {
 		     throw new IllegalArgumentException("In "+szfilename+" did not find the expected "+nummarks+" marks in this line: "+szLine);
@@ -2731,7 +2867,7 @@ public class Preprocessing
 		      throw new IllegalArgumentException("The number of lines in the control file "+
 			     szcontrolDIR+"/"+szfilename+" does not match that in the signal file "+szbinneddataDIR+"/"+szcontrolfilename); 
 		  }
-	          st = new StringTokenizer(szLineControl,"\t");
+	          st = new StringTokenizer(szLineControl,"\t ");
 	          for (int nmark = 0; nmark < nummarkscontrol; nmark++)
                   {
 		      //reading in the control data
@@ -3186,7 +3322,7 @@ public class Preprocessing
 	       throw new IllegalArgumentException(szbinneddataDIR+"/"+signalchromfiles[nfile]+" does not contain any data!");
 	   }
 	   StringTokenizer st = new StringTokenizer(szLine,"\t"); 
-	   String szcurrcell = st.nextToken();
+	   String szcurrcell = st.nextToken().trim();
 	   br.close();
 
            ArrayList al = (ArrayList) hmcellsToIndex.get(szcurrcell);
@@ -3241,7 +3377,7 @@ public class Preprocessing
  	       while ((szLine = br.readLine())!=null)
 	       {
 	          ntotallocs++;
-	          st = new StringTokenizer(szLine,"\t");
+	          st = new StringTokenizer(szLine,"\t ");
 		  if (st.countTokens() != nummarks)
 		  {
 		     throw new IllegalArgumentException("In "+szfilename+" did not find the expected "+nummarks+" marks in this line: "+szLine);
@@ -3249,7 +3385,7 @@ public class Preprocessing
 
 	          for (int nj = 0; nj < nummarks; nj++)
                   {
-		      double dval =  Double.parseDouble(st.nextToken());		      
+		      double dval =  Double.parseDouble(st.nextToken());//.trim());		      
 		      sumtags[nj] += dval;
 		  }
 	       }
@@ -3314,8 +3450,8 @@ public class Preprocessing
 	             String szLine;
 		     int nbin = 0;
 		     int nsplit = 0;
-		     String szcurrcell = st.nextToken();
-		     String szchrom = st.nextToken();
+		     String szcurrcell = st.nextToken().trim();
+		     String szchrom = st.nextToken().trim();
 		     boolean bopen = false;
 		     GZIPOutputStream pwzip = null;
 
@@ -3339,12 +3475,12 @@ public class Preprocessing
 			   bopen = true;
 			}
 	              
-                        st = new StringTokenizer(szLine,"\t");
+                        st = new StringTokenizer(szLine,"\t ");
 		        StringBuffer sbout = new StringBuffer();
 
 	                for (int ncol = 0; ncol < nummarks-1; ncol++)
 	                {
-		           double dval = Double.parseDouble(st.nextToken());
+			   double dval = Double.parseDouble(st.nextToken());//.trim());
 		           if (dval == -1)
 	                   {
 			      sbout.append("2\t");
@@ -3362,7 +3498,7 @@ public class Preprocessing
 			   }
 			}
 
-		        double dval = Double.parseDouble(st.nextToken());
+		        double dval = Double.parseDouble(st.nextToken());//.trim());
 		        if (dval == -1)
 		        {
 			   sbout.append("2\n");
@@ -3400,7 +3536,7 @@ public class Preprocessing
 		  }
 		  else
 		  {
-		     String szfile = szoutputDIR+"/"+st.nextToken()+"_"+st.nextToken()+"_binary.txt.gz";
+		     String szfile = szoutputDIR+"/"+st.nextToken().trim()+"_"+st.nextToken().trim()+"_binary.txt.gz";
 		     System.out.println("Writing to file "+szfile);
 		     GZIPOutputStream pwzip = new GZIPOutputStream(new FileOutputStream(szfile));
 		     //PrintWriter pw = new PrintWriter(new FileWriter(szfile));
@@ -3415,12 +3551,12 @@ public class Preprocessing
 		     String szLine;
 		     while ((szLine = br.readLine())!=null)
 		     {
-		        st = new StringTokenizer(szLine,"\t");
+		        st = new StringTokenizer(szLine,"\t ");
 		        StringBuffer sbout = new StringBuffer();
 
 		        for (int ncol = 0; ncol < nummarks-1; ncol++)
 			{
-		           double dval = Double.parseDouble(st.nextToken());
+			    double dval = Double.parseDouble(st.nextToken());//.trim());
 			   if (dval == -1)
 			   {
 		              sbout.append("2\t");
@@ -3438,7 +3574,7 @@ public class Preprocessing
 			   }
 			}
 
-			double dval = Double.parseDouble(st.nextToken());
+			double dval = Double.parseDouble(st.nextToken());//.trim());
 		        if (dval == -1)
 		        {
 			    sbout.append("2\n");
@@ -3470,8 +3606,8 @@ public class Preprocessing
 		  {
 		     int nbin = 0;
 		     int nsplit = 0;
-		     String szcurrcell = st.nextToken();
-		     String szchrom = st.nextToken();
+		     String szcurrcell = st.nextToken().trim();
+		     String szchrom = st.nextToken().trim();
 		     boolean bopen = false;
 		     PrintWriter pw = null;
 	             String szLine;
@@ -3489,10 +3625,10 @@ public class Preprocessing
 			    bopen = true;
 		        }
 
-	                st = new StringTokenizer(szLine,"\t");
+	                st = new StringTokenizer(szLine,"\t ");
 	                for (int ncol = 0; ncol < nummarks-1; ncol++)
 	                {
-		           double dval = Double.parseDouble(st.nextToken());
+			    double dval = Double.parseDouble(st.nextToken());//.trim());
 		           if (dval == -1)
 	                   {
 			      pw.print("2\t");
@@ -3507,7 +3643,7 @@ public class Preprocessing
 		           }
 		        }
 
-		        double dval = Double.parseDouble(st.nextToken());
+		        double dval = Double.parseDouble(st.nextToken());//.trim());
 		        if (dval == -1)
 		        {
 		           pw.println("2");
@@ -3538,7 +3674,7 @@ public class Preprocessing
 		  }
 		  else
 		  {
-	             String szfile = szoutputDIR+"/"+st.nextToken()+"_"+st.nextToken()+"_binary.txt";
+		     String szfile = szoutputDIR+"/"+st.nextToken().trim()+"_"+st.nextToken().trim()+"_binary.txt";
 	             System.out.println("Writing to file "+szfile);
 	             PrintWriter pw = new PrintWriter(new FileWriter(szfile));
 	             pw.println(szChromCellLine);
@@ -3546,10 +3682,10 @@ public class Preprocessing
 	             String szLine;
 	             while ((szLine = br.readLine())!=null)
 	             {
-	                st = new StringTokenizer(szLine,"\t");
+	                st = new StringTokenizer(szLine,"\t ");
 	                for (int ncol = 0; ncol < nummarks-1; ncol++)
 	                {
-		           double dval = Double.parseDouble(st.nextToken());
+			   double dval = Double.parseDouble(st.nextToken());//.trim());
 		           if (dval == -1)
 	                   {
 			      pw.print("2\t");
@@ -3564,7 +3700,7 @@ public class Preprocessing
 		           }
 		        }
 
-		        double dval = Double.parseDouble(st.nextToken());
+		        double dval = Double.parseDouble(st.nextToken());//.trim());
 		        if (dval == -1)
 		        {
 		           pw.println("2");
@@ -3671,7 +3807,10 @@ public class Preprocessing
 	         //added hidden check in v.1.11
 		  //read first two lines
 		  //maps the first line to the buffered reader
-                  BufferedReader brfile = Util.getBufferedReader(szcurrpath+"/"+szcurrfile);
+		  //updating in v1.23 so not so many open files at once
+		  String szpathfile = szcurrpath+"/"+szcurrfile;
+                  //BufferedReader brfile = Util.getBufferedReader(szcurrpath+"/"+szcurrfile);
+                  BufferedReader brfile = Util.getBufferedReader(szpathfile);//szcurrpath+"/"+szcurrfile);
 		  String szheader1 = brfile.readLine();
 		  if (szheader1 == null)
 		  {
@@ -3684,6 +3823,7 @@ public class Preprocessing
 		  }
 
                   String szheader2 = brfile.readLine();
+		  brfile.close();
 
 		  if (szheader2A[ndir] == null)
 		  {
@@ -3693,7 +3833,7 @@ public class Preprocessing
 		  {
 		      throw new IllegalArgumentException("Inconsistent header lines in "+szcurrpath+" found "+szheader2+" and "+szheader2A[ndir]);
 		  }
-		  hmbrA[ndir].put(szheader1, brfile);
+		  hmbrA[ndir].put(szheader1, szpathfile);//brfile);
 
 		  hsfiles.add(szheader1);
 	      }
@@ -3711,10 +3851,29 @@ public class Preprocessing
        //sbmergedheader.append(szheader2A[0]);
        String[] notpresent = new String[szheader2A.length];
 
+       //added in v1.23 to only count mark once
+       HashSet hsmarkpresent = new HashSet();
+
        for (int ndir = 0; ndir < szheader2A.length; ndir++)
        {
 	   StringTokenizer st = new StringTokenizer(szheader2A[ndir], "\t");
            int ntokens =  st.countTokens();
+
+	   //added in v1.23 to only count mark once
+	   while (st.hasMoreTokens())
+	   {
+	      String szmarktoken = st.nextToken().trim();
+	      if (hsmarkpresent.contains(szmarktoken))
+	      {
+	         throw new IllegalArgumentException("Feature "+szmarktoken+" is not unique. Found in multiple files being merged");
+       	      }
+	      else
+	      {
+		  hsmarkpresent.add(szmarktoken);
+	      }
+	   }
+
+
 	   StringBuffer sbnotpresent = new StringBuffer();
 	   sbnotpresent.append("0");
 	   for (int na = 1; na <ntokens; na++)
@@ -3756,8 +3915,10 @@ public class Preprocessing
 	   String szcurrfile = chromfilescombine[nfile];
 	   for (int ndir = 0; ndir < subdirall.length; ndir++)
 	   {
-	      BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
-	      if (br == null)
+	       //BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
+	      String szpathfile = (String) hmbrA[ndir].get(szcurrfile);
+	      //if (br == null)
+	      if (szpathfile == null)
 	      {
 		  System.out.println("Warning directory "+szinputdir+"/"+subdirall[ndir]+" does not contain file "+szcurrfile+" going to treat as not present");
 		  bpresent[ndir] = false;
@@ -3766,14 +3927,16 @@ public class Preprocessing
 	      else
 	      {
 		  bpresent[ndir] = true;
-		  brfiles[ndir] = br;
+		  brfiles[ndir] = Util.getBufferedReader(szpathfile);//br;
+		  brfiles[ndir].readLine();
+		  brfiles[ndir].readLine();
 	      }
 	   }
 
 
 	   StringTokenizer st = new StringTokenizer(chromfilescombine[nfile],"\t");
-           String szcell = st.nextToken();
-           String szchrom = st.nextToken();
+           String szcell = st.nextToken().trim();
+           String szchrom = st.nextToken().trim();
 
 
 	   if (bsplit)
@@ -4038,12 +4201,24 @@ public class Preprocessing
 	      }
 	   }
 
-	   for (int ndir = 0; ndir < hmbrA.length; ndir++)
+
+
+	   //  for (int ndir = 0; ndir < hmbrA.length; ndir++)
+	   //{
+	   //    BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
+	   //    if (br != null)
+	   //    {
+	   //       br.close();
+	   //    }
+	   //}
+
+
+	   for (int ndir = 0; ndir < brfiles.length; ndir++)
 	   {
-	       BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
-	       if (br != null)
+	       //BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
+	       if (brfiles[ndir] != null)
 	       {
-	          br.close();
+	          brfiles[ndir].close();
 	       }
 	   }
 


=====================================
edu/mit/compbio/ChromHMM/StateAnalysis.java
=====================================
@@ -248,6 +248,11 @@ public class StateAnalysis
 
 	      int nactualindex = 0;
 	      files = new String[nnonhiddencount];// dir.list(); 
+	      if (nnonhiddencount == 0)
+	      {
+	         throw new IllegalArgumentException("No files found in "+szinputcoorddir);
+	      }
+
               for (int nfile = 0; nfile < filesWithHidden.length; nfile++)
 	      {
 	         if (!(new File(filesWithHidden[nfile])).isHidden())
@@ -307,7 +312,7 @@ public class StateAnalysis
 		if (szHeader == null)
 		    throw new IllegalArgumentException(szposteriordir+"/"+szposteriorfiles_nfile+" is empty!");
 		StringTokenizer st =new StringTokenizer(szHeader,"\t");
-		String szcurrcell = st.nextToken();
+		String szcurrcell = st.nextToken().trim();
 		if ((!szcurrcell.equals(szcell))&&(!szcell.equals("")))
 		{
 		    brposterior.close();
@@ -315,7 +320,7 @@ public class StateAnalysis
 		else
 		{
 		   bposteriorfound = true;
-		   String szchrom = st.nextToken();
+		   String szchrom = st.nextToken().trim();
 
 		   int numlines = 0;
 		   szLine = brposterior.readLine();
@@ -327,7 +332,7 @@ public class StateAnalysis
 	           int numcurrstates = st.countTokens();
 	           if (bfirst)
 	           {
-		      chorder = st.nextToken().charAt(0);
+		      chorder = st.nextToken().trim().charAt(0);
 		      bfirst = false;
 		      nfirstindex = nfile;
 		      numposteriorstates = numcurrstates;
@@ -360,7 +365,7 @@ public class StateAnalysis
 		      float[] posterior_nline = posterior[nline];
                       for (int nstate = 0; nstate < numposteriorstates; nstate++)
                       {
-		         float fval = Float.parseFloat(st.nextToken());
+		         float fval = Float.parseFloat(st.nextToken().trim());
 		         posterior_nline[nstate] = fval;
 		         tallylabel[nstate] += fval;
 		      }
@@ -381,13 +386,13 @@ public class StateAnalysis
 	              {
 		         //gets the start and end coordinates
 		         StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
-		         nchromindex = Integer.parseInt(stcolfields.nextToken());
-		         nstartindex = Integer.parseInt(stcolfields.nextToken());
-		         nendindex = Integer.parseInt(stcolfields.nextToken());
+		         nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+		         nstartindex = Integer.parseInt(stcolfields.nextToken().trim());
+		         nendindex = Integer.parseInt(stcolfields.nextToken().trim());
 
 	                 if (busesignal)
 	                 {
-		            nsignalindex = Integer.parseInt(stcolfields.nextToken());
+		            nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
 			 }
 		      }
 
@@ -695,9 +700,9 @@ public class StateAnalysis
 	       st = new StringTokenizer(szLine,"\t ");
 	    } 
 
-	    String szchrom = st.nextToken();
-	    int nbegincoord = Integer.parseInt(st.nextToken());
-	    int nendcoord = Integer.parseInt(st.nextToken());
+	    String szchrom = st.nextToken().trim();
+	    int nbegincoord = Integer.parseInt(st.nextToken().trim());
+	    int nendcoord = Integer.parseInt(st.nextToken().trim());
 	    if (nbegincoord % nbinsize != 0)
 	    {
 		throw new IllegalArgumentException("Binsize of "+nbinsize+" does not agree with coordinates in input segment "+szLine+". -b binsize should match parameter value to LearnModel or "+
@@ -705,7 +710,7 @@ public class StateAnalysis
 	    }
 	    int nbegin = nbegincoord/nbinsize;
 	    int nend = (nendcoord-1)/nbinsize;
-	    szlabel = st.nextToken();
+	    szlabel = st.nextToken().trim();
 
 	    short slabel = -1;
 	    if (bstringlabels)
@@ -783,7 +788,7 @@ public class StateAnalysis
 	          }
 	          catch (NumberFormatException ex2)
 	          {
-		     throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+		     throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
 	          }
 	       }
 
@@ -872,6 +877,11 @@ public class StateAnalysis
 
 	       int nactualindex = 0;
 	       files = new String[nnonhiddencount];// dir.list(); 
+	       if (nnonhiddencount == 0)
+	       {
+		   throw new IllegalArgumentException("No files found in "+szinputcoorddir);
+	       }
+
                for (int nfile = 0; nfile < filesWithHidden.length; nfile++)
 	       {
 	          if (!(new File(filesWithHidden[nfile])).isHidden())
@@ -925,13 +935,13 @@ public class StateAnalysis
 	    if (szcolfields  != null)
 	    {
 	       StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
-	       nchromindex = Integer.parseInt(stcolfields.nextToken());
-	       nstartindex = Integer.parseInt(stcolfields.nextToken());
-	       nendindex = Integer.parseInt(stcolfields.nextToken());
+	       nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+	       nstartindex = Integer.parseInt(stcolfields.nextToken().trim());
+	       nendindex = Integer.parseInt(stcolfields.nextToken().trim());
 
 	       if (busesignal)
 	       {
-		   nsignalindex = Integer.parseInt(stcolfields.nextToken());
+		   nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
 	       }
 	    }
 
@@ -1244,6 +1254,11 @@ public class StateAnalysis
 
 	     int nactualindex = 0;
 	     files = new String[nnonhiddencount];// dir.list(); 
+	     if (nnonhiddencount == 0)
+	     {
+	        throw new IllegalArgumentException("No files found in "+szinputcoorddir);
+	     }
+
              for (int nfile = 0; nfile < filesWithHidden.length; nfile++)
 	     {
 	        if (!(new File(filesWithHidden[nfile])).isHidden())
@@ -1309,9 +1324,9 @@ public class StateAnalysis
 	     st = new StringTokenizer(szLine,"\t ");
           }
 
-	  String szchrom = st.nextToken();
-	  int nbegincoord = Integer.parseInt(st.nextToken());
-	  int nendcoord = Integer.parseInt(st.nextToken());
+	  String szchrom = st.nextToken().trim();
+	  int nbegincoord = Integer.parseInt(st.nextToken().trim());
+	  int nendcoord = Integer.parseInt(st.nextToken().trim());
 	  if (nbegincoord % nbinsize != 0)
 	  {
 		throw new IllegalArgumentException("Binsize of "+nbinsize+" does not agree with coordinates in input segment "+szLine+". -b binsize should match parameter value to LearnModel or "+
@@ -1319,7 +1334,7 @@ public class StateAnalysis
 	  }
           //int nbegin = nbegincoord/nbinsize;
 	  int nend = (nendcoord-1)/nbinsize;
-          szlabel = st.nextToken();
+          szlabel = st.nextToken().trim();
 	  short slabel;
 
 
@@ -1398,7 +1413,7 @@ public class StateAnalysis
 		}
 	        catch (NumberFormatException ex2)
 	        {
-                   throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+                   throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
 		}
 	     }
 
@@ -1461,12 +1476,12 @@ public class StateAnalysis
 	        st = new StringTokenizer(szLine,"\t ");
 	     }
 
-	     String szchrom = st.nextToken();
+	     String szchrom = st.nextToken().trim();
 	     if (!szchrom.equals(szchromwant)) 
 	        continue;
 
-	     int nbegincoord = Integer.parseInt(st.nextToken());
-	     int nendcoord = Integer.parseInt(st.nextToken());
+	     int nbegincoord = Integer.parseInt(st.nextToken().trim());
+	     int nendcoord = Integer.parseInt(st.nextToken().trim());
 
 	     //if (nbegincoord % nbinsize != 0)
 	     // {
@@ -1474,7 +1489,7 @@ public class StateAnalysis
 	     //}
 	     int nbegin = nbegincoord/nbinsize;
 	     int nend = (nendcoord-1)/nbinsize;
-	     szlabel = st.nextToken();
+	     szlabel = st.nextToken().trim();
 	     short slabel = -1;
 
 	     if (bstringlabels)
@@ -1528,7 +1543,7 @@ public class StateAnalysis
 		   }
 		   catch (NumberFormatException ex2)
 		   {
-		      throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+		      throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
 	           }
 		}
 	     }
@@ -1571,13 +1586,13 @@ public class StateAnalysis
 	     if (szcolfields  != null)
 	     {
 	        StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
-		nchromindex = Integer.parseInt(stcolfields.nextToken());
-		nstartindex = Integer.parseInt(stcolfields.nextToken());
-		nendindex = Integer.parseInt(stcolfields.nextToken());
+		nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+		nstartindex = Integer.parseInt(stcolfields.nextToken().trim());
+		nendindex = Integer.parseInt(stcolfields.nextToken().trim());
 
 	        if (busesignal)
 	        {
-	           nsignalindex = Integer.parseInt(stcolfields.nextToken());
+	           nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
 	        }
 	     }
 
@@ -1854,8 +1869,8 @@ public class StateAnalysis
 	   while ((szLine = bridlabel.readLine())!=null)
            {
 	      StringTokenizer st = new StringTokenizer(szLine,"\t");
-	      String szID = st.nextToken();
-	      String szLabelExtend = st.nextToken();
+	      String szID = st.nextToken().trim();
+	      String szLabelExtend = st.nextToken().trim();
 	      hmlabelExtend.put(szID,szLabelExtend);
 	   }
 	   bridlabel.close();
@@ -1888,6 +1903,11 @@ public class StateAnalysis
 	nf5.setGroupingUsed(false);
 	nf5.setMinimumFractionDigits(5);
 
+        NumberFormat nf10 = NumberFormat.getInstance();
+        nf10.setMaximumFractionDigits(10);
+	nf10.setGroupingUsed(false);
+	nf10.setMinimumFractionDigits(10);
+
 	System.out.println("Writing to file "+szoutfile+".txt");
 	PrintWriter pw = new PrintWriter(new FileWriter(szoutfile+".txt"));
 
@@ -1963,7 +1983,7 @@ public class StateAnalysis
 	pw.print("Base\t100");
         for (int nfile = 0; nfile < tallyoverlaplabel.length; nfile++)
 	{
-	    pw.print("\t"+nf5.format(100*(dsumoverlaplabel[nfile]/dsumlabel)));
+	    pw.print("\t"+nf10.format(100*(dsumoverlaplabel[nfile]/dsumlabel)));
 	}
 	pw.println();
 	pw.close();
@@ -2134,10 +2154,10 @@ public class StateAnalysis
 	   if (szcolfields != null)
            {
 	       StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
-	       nchromindex = Integer.parseInt(stcolfields.nextToken());
-	       npositionindex = Integer.parseInt(stcolfields.nextToken());
-	       nstrandindex = Integer.parseInt(stcolfields.nextToken());
-	       nsignalindex = Integer.parseInt(stcolfields.nextToken());
+	       nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+	       npositionindex = Integer.parseInt(stcolfields.nextToken().trim());
+	       nstrandindex = Integer.parseInt(stcolfields.nextToken().trim());
+	       nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
 	   }
 	}
         else if (busestrand && !busesignal)
@@ -2145,9 +2165,9 @@ public class StateAnalysis
 	   if (szcolfields != null)
            {
 	       StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
-	       nchromindex = Integer.parseInt(stcolfields.nextToken());
-	       npositionindex = Integer.parseInt(stcolfields.nextToken());
-	       nstrandindex = Integer.parseInt(stcolfields.nextToken());
+	       nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+	       npositionindex = Integer.parseInt(stcolfields.nextToken().trim());
+	       nstrandindex = Integer.parseInt(stcolfields.nextToken().trim());
 	   }
 	}
         else if (!busestrand && busesignal)
@@ -2155,9 +2175,9 @@ public class StateAnalysis
 	   if (szcolfields != null)
 	   {
 	      StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
-	      nchromindex = Integer.parseInt(stcolfields.nextToken());
-	      npositionindex = Integer.parseInt(stcolfields.nextToken());
-	      nsignalindex = Integer.parseInt(stcolfields.nextToken());
+	      nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+	      npositionindex = Integer.parseInt(stcolfields.nextToken().trim());
+	      nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
 	   }
 	   else //if (szcolfields == null)
            {
@@ -2169,8 +2189,8 @@ public class StateAnalysis
 	   if (szcolfields != null)
 	   {
 	      StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
-	      nchromindex = Integer.parseInt(stcolfields.nextToken());
-	      npositionindex = Integer.parseInt(stcolfields.nextToken());
+	      nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+	      npositionindex = Integer.parseInt(stcolfields.nextToken().trim());
 	   }
 	}
 
@@ -2211,6 +2231,7 @@ public class StateAnalysis
     {
 
 
+	boolean bchrommatch =  false;//added in 1.23 to check for chromosome matches
 	//an array of chromosome names
 	ArrayList alchromindex = new ArrayList();
 
@@ -2247,13 +2268,13 @@ public class StateAnalysis
 	    {
 	       st = new StringTokenizer(szLine,"\t ");
 	    }
-	    String szchrom = st.nextToken();
+	    String szchrom = st.nextToken().trim();
             //assumes segments are in standard bed format which to get to 
 	    //0-based inclusive requires substract 1 from the end
-	    //int nbegin = Integer.parseInt(st.nextToken())/nbinsize; 
-	    st.nextToken();
-	    int nend = (Integer.parseInt(st.nextToken())-1)/nbinsize; 
-	    szlabel = st.nextToken();
+	    //int nbegin = Integer.parseInt(st.nextToken().trim())/nbinsize; 
+	    st.nextToken().trim();
+	    int nend = (Integer.parseInt(st.nextToken().trim())-1)/nbinsize; 
+	    szlabel = st.nextToken().trim();
 	    short slabel;
 
 	    if (bstringlabels)
@@ -2326,7 +2347,7 @@ public class StateAnalysis
 	          }
 	          catch (NumberFormatException ex2)
 	          {
-	             throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+	             throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
 	          }
 	       }
 	    
@@ -2403,14 +2424,16 @@ public class StateAnalysis
 	         st = new StringTokenizer(szLine,"\t ");
 	      }
 
-	      String szchrom = st.nextToken();
+	      String szchrom = st.nextToken().trim();
 	      if (!szchromwant.equals(szchrom))
 		  continue;
+
+	      bchrommatch = true;
               //assumes segments are in standard bed format which to get to 
 	      //0-based inclusive requires substract 1 from the end
-	      int nbegin = Integer.parseInt(st.nextToken())/nbinsize;
-	      int nend = (Integer.parseInt(st.nextToken())-1)/nbinsize; 
-	      szlabel = st.nextToken();
+	      int nbegin = Integer.parseInt(st.nextToken().trim())/nbinsize;
+	      int nend = (Integer.parseInt(st.nextToken().trim())-1)/nbinsize; 
+	      szlabel = st.nextToken().trim();
 	      short slabel = -1;
 
 	      if (bstringlabels)
@@ -2464,7 +2487,7 @@ public class StateAnalysis
 		    }
 		    catch (NumberFormatException ex2)
 		    {
-		       throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+		       throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
 		    }
 	         }
 	      }
@@ -2575,6 +2598,12 @@ public class StateAnalysis
            brcoords.close(); 	    
 	}
 
+	if (!bchrommatch)
+	{
+	    throw new IllegalArgumentException("No chromosome name matches found between "+szanchorpositions+
+                                               " and those in the segmentation file.");
+	}
+
 	outputneighborhood(tallyoverlaplabel,tallylabel,dsumoverlaplabel,szoutfile,nspacing,numright,
                            numleft,theColor,ChromHMM.convertCharOrderToStringOrder(szlabel.charAt(0)),sztitle,0,
                            szlabelmapping,szlabel.charAt(0), bprintimage, bstringlabels, hmIndexToLabel);
@@ -2610,6 +2639,7 @@ public class StateAnalysis
 					String sztitle,String szlabelmapping, boolean bprintimage,boolean  bstringlabels) throws IOException
     {
 	//highmem
+	boolean bchrommatch =  false;//added in 1.23 to check for chromosome matches
 	//stores all the segments in the data
 	ArrayList alsegments = new ArrayList();
 
@@ -2645,12 +2675,12 @@ public class StateAnalysis
 	    {
 		st = new StringTokenizer(szLine,"\t ");
 	    }
-	    String szchrom = st.nextToken();
+	    String szchrom = st.nextToken().trim();
             //assumes segments are in standard bed format which to get to 
 	    //0-based inclusive requires substract 1 from the end
-	    int nbegin = Integer.parseInt(st.nextToken())/nbinsize;
-	    int nend = (Integer.parseInt(st.nextToken())-1)/nbinsize; 
-	    szlabel = st.nextToken();
+	    int nbegin = Integer.parseInt(st.nextToken().trim())/nbinsize;
+	    int nend = (Integer.parseInt(st.nextToken().trim())-1)/nbinsize; 
+	    szlabel = st.nextToken().trim();
 	    short slabel = -1;
 
 	    if (bstringlabels)
@@ -2728,7 +2758,7 @@ public class StateAnalysis
 	          }
 	          catch (NumberFormatException ex2)
 	          {
-		     throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+		     throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
 		  }
 	       }
 
@@ -2854,6 +2884,7 @@ public class StateAnalysis
 	   {
 	      int nchrom = objChrom.intValue();
 	      short[] labels_nchrom = labels[nchrom];
+	      bchrommatch = true;
 
 	      if (bposstrand)
 	      {
@@ -2887,6 +2918,13 @@ public class StateAnalysis
 	}
         brcoords.close(); 	    
 
+
+	if (!bchrommatch)
+	{
+	    throw new IllegalArgumentException("No chromosome name matches found between "+szanchorpositions+
+                                               " and those in the segmentation file.");
+	}
+
 	outputneighborhood(tallyoverlaplabel,tallylabel,dsumoverlaplabel,szoutfile,nspacing,numright,
                            numleft,theColor,ChromHMM.convertCharOrderToStringOrder(szlabel.charAt(0)),sztitle,0,
                            szlabelmapping,szlabel.charAt(0),bprintimage, bstringlabels, hmIndexToLabel);
@@ -2920,6 +2958,8 @@ public class StateAnalysis
 					     boolean busestrand, boolean busesignal,String szcolfields, int noffsetanchor,
 					   String szoutfile,Color theColor,String sztitle,String szlabelmapping, boolean bprintimage) throws IOException
     {
+
+	boolean bchrommatch =  false;//added in 1.23 to check for chromosome matches
 	//posterior here is really signal just using equivalent variable names
 	//list of possible posterior files
 	File posteriordir = new File(szposteriordir);
@@ -2959,7 +2999,7 @@ public class StateAnalysis
 	           throw new IllegalArgumentException(szposteriordir+"/"+szposteriorfiles_nfile+" is empty!");
 	        }
 		StringTokenizer st =new StringTokenizer(szLine,"\t");
-		String szcurrcell = st.nextToken();
+		String szcurrcell = st.nextToken().trim();
 		if ((!szcurrcell.equals(szcell))&&(!szcell.equals("")))
 		{
 		    brposterior.close();
@@ -2970,7 +3010,7 @@ public class StateAnalysis
 
   	           //must match cell type or consistent with empty cell type
 
-		   String szchrom = st.nextToken();
+		   String szchrom = st.nextToken().trim();
 	    
 		   int numlines = 0;
 		   szLine = brposterior.readLine(); //gets state header
@@ -2982,7 +3022,7 @@ public class StateAnalysis
 		   int numcurrstates = st.countTokens();
 	           if (bfirst)
 	           {
-		       chorder = st.nextToken().charAt(0);
+		       chorder = st.nextToken().trim().charAt(0);
 		       bfirst = false;
 		       nfirstindex = nfile;
 		       numposteriorstates = numcurrstates;
@@ -3036,6 +3076,7 @@ public class StateAnalysis
 
 	              if (szreadchrom.equals(szchrom))
 	              {  
+			 bchrommatch = true;
 	                 int nanchor = (Integer.parseInt(szLineA[theAnchorIndex.npositionindex])-noffsetanchor);
 	                 boolean bposstrand = true;
 	                 if (busestrand)
@@ -3120,6 +3161,12 @@ public class StateAnalysis
 	{
 	    throw new IllegalArgumentException("No posterior file found for cell type "+szcell);
 	}   
+
+	if (!bchrommatch)
+	{
+	    throw new IllegalArgumentException("No chromosome name matches found between "+szanchorpositions+
+                                               " and those in the segmentation file.");
+	}
  
 	outputneighborhoodsignal(tallyoverlaplabel,tallylabel,dsumoverlaplabel,numlocs,szoutfile,nspacing,numright,
 				 numleft,theColor,ChromHMM.convertCharOrderToStringOrder(chorder),sztitle,szmarknames,szlabelmapping,chorder,bprintimage);
@@ -3154,6 +3201,7 @@ public class StateAnalysis
 					      String szoutfile,Color theColor,String sztitle,String szlabelmapping, boolean bprintimage) throws IOException
     {
 
+	boolean bchrommatch =  false;//added in 1.23 to check for chromosome matches
 	//list of possible posterior files
 	File posteriordir = new File(szposteriordir);
 	if (!posteriordir.exists())
@@ -3191,7 +3239,7 @@ public class StateAnalysis
 	           throw new IllegalArgumentException(szposteriordir+"/"+szposteriorfiles_nfile+" is empty!");
 	        }
 		StringTokenizer st =new StringTokenizer(szLine,"\t");
-		String szcurrcell = st.nextToken();
+		String szcurrcell = st.nextToken().trim();
 		if ((!szcurrcell.equals(szcell))&&(!szcell.equals("")))
 		{
 		    brposterior.close();
@@ -3202,7 +3250,7 @@ public class StateAnalysis
 
   	           //must match cell type or consistent with empty cell type
 
-		   String szchrom = st.nextToken();
+		   String szchrom = st.nextToken().trim();
 	    
 		   int numlines = 0;
 		   szLine = brposterior.readLine(); //gets state header
@@ -3214,7 +3262,7 @@ public class StateAnalysis
 		   int numcurrstates = st.countTokens();
 	           if (bfirst)
 	           {
-		       chorder = st.nextToken().charAt(0);
+		       chorder = st.nextToken().trim().charAt(0);
 		       bfirst = false;
 		       nfirstindex = nfile;
 		       numposteriorstates = numcurrstates;
@@ -3268,6 +3316,7 @@ public class StateAnalysis
 
 	              if (szreadchrom.equals(szchrom))
 	              {  
+			 bchrommatch = true;
 	                 int nanchor = (Integer.parseInt(szLineA[theAnchorIndex.npositionindex])-noffsetanchor);
 	                 boolean bposstrand = true;
 	                 if (busestrand)
@@ -3350,6 +3399,12 @@ public class StateAnalysis
 	{
 	    throw new IllegalArgumentException("No posterior file found for cell type "+szcell);
 	}   
+
+	if (!bchrommatch)
+	{
+	    throw new IllegalArgumentException("No chromosome name matches found between "+szanchorpositions+
+                                               " and those in the segmentation file.");
+	}
  
 	outputneighborhood(tallyoverlaplabel,tallylabel,dsumoverlaplabel,szoutfile,nspacing,numright,
                            numleft,theColor,ChromHMM.convertCharOrderToStringOrder(chorder),sztitle,1,
@@ -3442,10 +3497,13 @@ public class StateAnalysis
 		      //copying over the contents
 		      heatmapreduce[nkeepindex][ncol] = heatmapfold[nstate][ncol];
 		  }
-		  rowlabels[nkeepindex] = stheader.nextToken();//""+(nstate+1);
+		  rowlabels[nkeepindex] = stheader.nextToken().trim();//""+(nstate+1);
 		  nkeepindex++;
 	      }
-	      stheader.nextToken();
+	      else //added in 1.23
+	      {
+	         stheader.nextToken();
+	      }
 	   }
 	   heatmapfold = heatmapreduce;
 	}
@@ -3454,7 +3512,7 @@ public class StateAnalysis
 	    rowlabels = new String[tallyoverlaplabel[0].length];
 	    for (int ni = 0; ni < rowlabels.length; ni++)
 	    {
-		rowlabels[ni] = ""+stheader.nextToken();
+		rowlabels[ni] = ""+stheader.nextToken().trim();
 	    }
 	}
 
@@ -3672,11 +3730,11 @@ public class StateAnalysis
 
 	//maps each mark identifier to a consistent indext value
 	HashMap hmNameToID = new HashMap();
-	String szaxis = stheader.nextToken();
+	String szaxis = stheader.nextToken().trim();
 	int ncol = 0;
 	while (stheader.hasMoreTokens())
 	{
-	    hmNameToID.put(stheader.nextToken(), Integer.valueOf(ncol));
+	    hmNameToID.put(stheader.nextToken().trim(), Integer.valueOf(ncol));
 	    ncol++;
 	}
 
@@ -3703,10 +3761,10 @@ public class StateAnalysis
 		throw new IllegalArgumentException("Expecting "+numstatesmain+" lines in "+szmainmodelfile+" found fewer.");
 	    }
 	    StringTokenizer stLine = new StringTokenizer(szLine,"\t");
-	    rowlabels[nstate] = stLine.nextToken();
+	    rowlabels[nstate] = stLine.nextToken().trim();
 	    for (ncol = 0; ncol < numcols; ncol++)
 	    {
-	       emissionparamsmain[nstate][ncol] = Double.parseDouble(stLine.nextToken());
+	       emissionparamsmain[nstate][ncol] = Double.parseDouble(stLine.nextToken().trim());
 	    }
 	}
 	bremissions.close();
@@ -3759,7 +3817,7 @@ public class StateAnalysis
 	           ncol  = 0;
 		   while (stheader.hasMoreTokens())
 		   {
-		       mappedcol[ncol] = ((Integer) hmNameToID.get(stheader.nextToken())).intValue();
+		       mappedcol[ncol] = ((Integer) hmNameToID.get(stheader.nextToken().trim())).intValue();
 		       ncol++;
 		   }
 		}
@@ -3779,7 +3837,7 @@ public class StateAnalysis
 		    for (ncol = 0; ncol < numcols; ncol++)
 		    {
 			//storing the emission parameter for
-			theRecEmissionFile.emissionparams[nstate][mappedcol[ncol]] = Double.parseDouble(stLine.nextToken());
+			theRecEmissionFile.emissionparams[nstate][mappedcol[ncol]] = Double.parseDouble(stLine.nextToken().trim());
 		    }
 		}
 		//adds a record for this emission file containing the parameters, number of states, and number of marks



View it on GitLab: https://salsa.debian.org/med-team/chromhmm/-/commit/bd371febdf5c385e3d51c83ff7a2ec0dfcde1f12

-- 
View it on GitLab: https://salsa.debian.org/med-team/chromhmm/-/commit/bd371febdf5c385e3d51c83ff7a2ec0dfcde1f12
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210907/f09d3a1f/attachment-0001.htm>


More information about the debian-med-commit mailing list