[med-svn] [Git][med-team/chromhmm][upstream] New upstream version 1.23+dfsg
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Tue Sep 7 13:14:59 BST 2021
Nilesh Patra pushed to branch upstream at Debian Med / chromhmm
Commits:
bd371feb by Nilesh Patra at 2021-09-07T17:35:44+05:30
New upstream version 1.23+dfsg
- - - - -
7 changed files:
- README.md
- edu/mit/compbio/ChromHMM/BrowserOutput.java
- edu/mit/compbio/ChromHMM/ChromHMM.java
- edu/mit/compbio/ChromHMM/ConvertGeneTable.java
- edu/mit/compbio/ChromHMM/NestedEliminateInitialize.java
- edu/mit/compbio/ChromHMM/Preprocessing.java
- edu/mit/compbio/ChromHMM/StateAnalysis.java
Changes:
=====================================
README.md
=====================================
@@ -1,2 +1,2 @@
-See http://compbio.mit.edu/ChromHMM/ or https://ernstlab.biolchem.ucla.edu/ChromHMM/ for more information on ChromHMM.
+See https://ernstlab.biolchem.ucla.edu/ChromHMM/ for more information on ChromHMM.
========
=====================================
edu/mit/compbio/ChromHMM/BrowserOutput.java
=====================================
@@ -159,8 +159,8 @@ public class BrowserOutput
while ((szLine = brcolor.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szID = st.nextToken();
- String szColor = st.nextToken();
+ String szID = st.nextToken().trim();
+ String szColor = st.nextToken().trim();
hmcolor.put(szID, szColor);
}
brcolor.close();
@@ -347,8 +347,8 @@ public class BrowserOutput
while ((szLine = bridlabel.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szID = st.nextToken();
- String szLabelExtend = st.nextToken();
+ String szID = st.nextToken().trim();
+ String szLabelExtend = st.nextToken().trim();
hmlabelExtend.put(szID,szLabelExtend);
}
bridlabel.close();
@@ -378,10 +378,10 @@ public class BrowserOutput
while ((szLine =brsegment.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szcurrchrom = st.nextToken();
- int nbegin = Integer.parseInt(st.nextToken());
- int nend = Integer.parseInt(st.nextToken());
- String szFullID = st.nextToken();
+ String szcurrchrom = st.nextToken().trim();
+ int nbegin = Integer.parseInt(st.nextToken().trim());
+ int nend = Integer.parseInt(st.nextToken().trim());
+ String szFullID = st.nextToken().trim();
String szID = szFullID.substring(1); //this removes ordering type
if (bfirst)
{
@@ -426,10 +426,10 @@ public class BrowserOutput
while ((szLine =brsegment.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szcurrchrom = st.nextToken();
- int nbegin = Integer.parseInt(st.nextToken());
- int nend = Integer.parseInt(st.nextToken());
- String szFullID = st.nextToken();
+ String szcurrchrom = st.nextToken().trim();
+ int nbegin = Integer.parseInt(st.nextToken().trim());
+ int nend = Integer.parseInt(st.nextToken().trim());
+ String szFullID = st.nextToken().trim();
String szID = szFullID.substring(1); //this removes ordering type
if (bfirst)
{
@@ -496,10 +496,10 @@ public class BrowserOutput
while ((szLine = brsegment.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szchrom = st.nextToken();
- int nbegin = Integer.parseInt(st.nextToken());
- int nend = Integer.parseInt(st.nextToken());
- szLabelFull = st.nextToken();
+ String szchrom = st.nextToken().trim();
+ int nbegin = Integer.parseInt(st.nextToken().trim());
+ int nend = Integer.parseInt(st.nextToken().trim());
+ szLabelFull = st.nextToken().trim();
String szLabel = szLabelFull.substring(1);
hmlabelToFull.put(szLabel, szLabelFull);
@@ -740,10 +740,10 @@ public class BrowserOutput
while ((szLine = brsegment.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szchrom = st.nextToken();
- int nbegin = Integer.parseInt(st.nextToken());
- int nend = Integer.parseInt(st.nextToken());
- szLabelFull = st.nextToken();
+ String szchrom = st.nextToken().trim();
+ int nbegin = Integer.parseInt(st.nextToken().trim());
+ int nend = Integer.parseInt(st.nextToken().trim());
+ szLabelFull = st.nextToken().trim();
String szLabel = szLabelFull.substring(1);
hmlabelToFull.put(szLabel, szLabelFull);
@@ -827,12 +827,12 @@ public class BrowserOutput
while ((szLine = brsegment.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
if (szchrom.equals(szcurrchrom))
{
- int nbegin = Integer.parseInt(st.nextToken());
- int nend = Integer.parseInt(st.nextToken());
- szLabelFull = st.nextToken();
+ int nbegin = Integer.parseInt(st.nextToken().trim());
+ int nend = Integer.parseInt(st.nextToken().trim());
+ szLabelFull = st.nextToken().trim();
String szLabel = szLabelFull.substring(1);
ArrayList alRecs = (ArrayList) hmcoords.get(szLabel);
@@ -930,12 +930,12 @@ public class BrowserOutput
while ((szLine = brsegment.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
if (szchrom.equals(szcurrchrom))
{
- int nbegin = Integer.parseInt(st.nextToken());
- int nend = Integer.parseInt(st.nextToken());
- szLabelFull = st.nextToken();
+ int nbegin = Integer.parseInt(st.nextToken().trim());
+ int nend = Integer.parseInt(st.nextToken().trim());
+ szLabelFull = st.nextToken().trim();
String szLabel = szLabelFull.substring(1);
ArrayList alRecs = (ArrayList) hmcoords.get(szLabel);
=====================================
edu/mit/compbio/ChromHMM/ChromHMM.java
=====================================
@@ -814,8 +814,8 @@ public class ChromHMM
while ((szLine = bridlabel.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szID = st.nextToken();
- String szLabelExtend = st.nextToken();
+ String szID = st.nextToken().trim();
+ String szLabelExtend = st.nextToken().trim();
hmlabelExtend.put(szID,szLabelExtend);
}
bridlabel.close();
@@ -1072,7 +1072,7 @@ public class ChromHMM
String szLine;
while ((szLine = brstate.readLine())!=null)
{
- StringTokenizer st = new StringTokenizer(szLine,"\t");
+ StringTokenizer st = new StringTokenizer(szLine,"\t ");
int nold = Integer.parseInt(st.nextToken())-1;
int nnew = Integer.parseInt(st.nextToken())-1;
stateordering[nnew] = nold;
@@ -1156,8 +1156,8 @@ public class ChromHMM
pwzip.write(btformat,0,btformat.length);
continue;
}
- StringBuffer sb = new StringBuffer(st.nextToken()+"\t"+st.nextToken()+"\t"+st.nextToken());
- String szinstate = st.nextToken();
+ StringBuffer sb = new StringBuffer(st.nextToken().trim()+"\t"+st.nextToken().trim()+"\t"+st.nextToken().trim());
+ String szinstate = st.nextToken().trim();
String szoutstate;
if (!Character.isDigit(szinstate.charAt(0)))
@@ -1172,7 +1172,7 @@ public class ChromHMM
while (st.hasMoreTokens())
{
- sb.append("\t"+st.nextToken());
+ sb.append("\t"+st.nextToken().trim());
}
sb.append("\n");
@@ -1199,8 +1199,8 @@ public class ChromHMM
pw.println(szLine);
continue;
}
- pw.print(st.nextToken()+"\t"+st.nextToken()+"\t"+st.nextToken());
- String szinstate = st.nextToken();
+ pw.print(st.nextToken().trim()+"\t"+st.nextToken().trim()+"\t"+st.nextToken().trim());
+ String szinstate = st.nextToken().trim();
String szoutstate;
if (!Character.isDigit(szinstate.charAt(0)))
{
@@ -1214,7 +1214,7 @@ public class ChromHMM
while (st.hasMoreTokens())
{
- pw.print("\t"+st.nextToken());
+ pw.print("\t"+st.nextToken().trim());
}
pw.println();
}
@@ -1902,7 +1902,7 @@ public class ChromHMM
ArrayList aldata = new ArrayList();
while ((szLine = br.readLine())!=null)
{
- StringTokenizer st = new StringTokenizer(szLine,"\t");
+ StringTokenizer st = new StringTokenizer(szLine,"\t ");
StringBuffer sb = new StringBuffer();
for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -1912,7 +1912,7 @@ public class ChromHMM
throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles[nseq]);
}
- String sztoken = st.nextToken();
+ String sztoken = st.nextToken();//.trim();
if (sztoken.equals("0"))
{
@@ -2903,13 +2903,13 @@ public class ChromHMM
StringTokenizer st = new StringTokenizer(szLoadHeader,"\t");
//first token of the first line of the model file is assume to give the number of states
- numstates = Integer.parseInt(st.nextToken());
- numdatasets = Integer.parseInt(st.nextToken());
+ numstates = Integer.parseInt(st.nextToken().trim());
+ numdatasets = Integer.parseInt(st.nextToken().trim());
if (datasets ==null)
{
datasets = new String[numdatasets];
}
- chorder = st.nextToken().charAt(0);
+ chorder = st.nextToken().trim().charAt(0);
if ((nstateorder != ChromHMM.STATEORDER_TRANSITION)&&(nstateorder != ChromHMM.STATEORDER_EMISSION)&&(nstateorder != ChromHMM.STATEORDER_FIXED))
{
@@ -2964,20 +2964,20 @@ public class ChromHMM
while ((szLine = br.readLine())!=null)
{
st = new StringTokenizer(szLine,"\t");
- String szvartype = st.nextToken();
+ String szvartype = st.nextToken().trim();
if (szvartype.equalsIgnoreCase("probinit"))
{
//reading an inital probability
- int nstate = Integer.parseInt(st.nextToken())-1;
- double dprob = Double.parseDouble(st.nextToken());
+ int nstate = Integer.parseInt(st.nextToken().trim())-1;
+ double dprob = Double.parseDouble(st.nextToken().trim());
probinit[nstate] = dprob;
}
else if (szvartype.equalsIgnoreCase("transitionprobs"))
{
- int nfrom = Integer.parseInt(st.nextToken())-1;
- int nto = Integer.parseInt(st.nextToken())-1;
- double dprob = Double.parseDouble(st.nextToken());
+ int nfrom = Integer.parseInt(st.nextToken().trim())-1;
+ int nto = Integer.parseInt(st.nextToken().trim())-1;
+ double dprob = Double.parseDouble(st.nextToken().trim());
//this smooths the transition probability if dproceduresmmothtransition>0 using a weighted average with uniform
transitionprobs[nfrom][nto] = dproceduresmoothtransition/((double) transitionprobs.length)+(1-dproceduresmoothtransition)*dprob;
if (transitionprobs[nfrom][nto] == 0)
@@ -2989,20 +2989,20 @@ public class ChromHMM
}
else if (szvartype.equalsIgnoreCase("emissionprobs"))
{
- int nstate = Integer.parseInt(st.nextToken())-1;
- int nmod = Integer.parseInt(st.nextToken());
- String szdataset = st.nextToken();
+ int nstate = Integer.parseInt(st.nextToken().trim())-1;
+ int nmod = Integer.parseInt(st.nextToken().trim());
+ String szdataset = st.nextToken().trim();
if (datasets[nmod]==null)
{
- datasets[nmod] = szdataset;//st.nextToken();
+ datasets[nmod] = szdataset;//st.nextToken().trim();
}
else if (!szdataset.equals(datasets[nmod]))
{
throw new IllegalArgumentException("For mark "+nmod+" in model file found "+szdataset+", but expecting "+datasets[nmod]+"."
+" This might be because columns of binarized file are in a different order from model learning.");
}
- int nval = Integer.parseInt(st.nextToken());
- double dprob = Double.parseDouble(st.nextToken());
+ int nval = Integer.parseInt(st.nextToken().trim());
+ double dprob = Double.parseDouble(st.nextToken().trim());
//smooths the emission probability if dproceduresmoothemission>0 using a weighted average with uniform
emissionprobs[nstate][nmod][nval] = dproceduresmoothemission/((double) numbuckets)+(1-dproceduresmoothemission)*dprob;
}
@@ -3229,7 +3229,7 @@ public class ChromHMM
int nline = 0;
while ((szLinePosterior = brprobs.readLine())!=null)
{
- StringTokenizer stposterior = new StringTokenizer(szLinePosterior,"\t");
+ StringTokenizer stposterior = new StringTokenizer(szLinePosterior,"\t ");
for (int nstate = 0; nstate < numstates; nstate++)
{
fullposterior[nline][nstate] = Double.parseDouble(stposterior.nextToken());
@@ -3288,13 +3288,13 @@ public class ChromHMM
while ((szLineMax = brbed.readLine())!=null)
{
StringTokenizer stchrom = new StringTokenizer(szLineMax,"\t");
- String szchrom = stchrom.nextToken();
+ String szchrom = stchrom.nextToken().trim();
if (szchrom.equals(szcurrchrom))
{
- int nbegin = Integer.parseInt(stchrom.nextToken())/nbinsize;
- int nend = (Integer.parseInt(stchrom.nextToken())-1)/nbinsize;
- int nstate = Integer.parseInt(stchrom.nextToken().substring(1))-1;
+ int nbegin = Integer.parseInt(stchrom.nextToken().trim())/nbinsize;
+ int nend = (Integer.parseInt(stchrom.nextToken().trim())-1)/nbinsize;
+ int nstate = Integer.parseInt(stchrom.nextToken().trim().substring(1))-1;
for (int nj = nbegin; nj <= nend; nj++)
{
fullmax[nj] = nstate;
@@ -3990,7 +3990,7 @@ public class ChromHMM
ArrayList aldata = new ArrayList();
while ((szLine = br.readLine())!=null)
{
- StringTokenizer st = new StringTokenizer(szLine,"\t");
+ StringTokenizer st = new StringTokenizer(szLine,"\t ");
StringBuffer sb = new StringBuffer();
for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -4000,7 +4000,7 @@ public class ChromHMM
throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles[nordered_nseq]);
}
- String sztoken = st.nextToken();
+ String sztoken = st.nextToken();//.trim();
if (sztoken.equals("0"))
{
@@ -4138,7 +4138,7 @@ public class ChromHMM
int nline = 0;
while ((szLinePosterior = brprobs.readLine())!=null)
{
- StringTokenizer stposterior = new StringTokenizer(szLinePosterior,"\t");
+ StringTokenizer stposterior = new StringTokenizer(szLinePosterior,"\t ");
for (int nstate = 0; nstate < numstates; nstate++)
{
fullposterior[nline][nstate] = Double.parseDouble(stposterior.nextToken());
@@ -4197,13 +4197,13 @@ public class ChromHMM
while ((szLineMax = brbed.readLine())!=null)
{
StringTokenizer stchrom = new StringTokenizer(szLineMax,"\t");
- String szchrom = stchrom.nextToken();
+ String szchrom = stchrom.nextToken().trim();
if (szchrom.equals(szcurrchrom))
{
- int nbegin = Integer.parseInt(stchrom.nextToken())/nbinsize;
- int nend = (Integer.parseInt(stchrom.nextToken())-1)/nbinsize;
- int nstate = Integer.parseInt(stchrom.nextToken().substring(1))-1;
+ int nbegin = Integer.parseInt(stchrom.nextToken().trim())/nbinsize;
+ int nend = (Integer.parseInt(stchrom.nextToken().trim())-1)/nbinsize;
+ int nstate = Integer.parseInt(stchrom.nextToken().trim().substring(1))-1;
for (int nj = nbegin; nj <= nend; nj++)
{
fullmax[nj] = nstate;
@@ -4983,7 +4983,7 @@ public class ChromHMM
ArrayList aldata = new ArrayList();
while ((szLine = br.readLine())!=null)
{
- StringTokenizer st = new StringTokenizer(szLine,"\t");
+ StringTokenizer st = new StringTokenizer(szLine,"\t ");
StringBuffer sb = new StringBuffer();
for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -5168,12 +5168,12 @@ public class ChromHMM
String szout = cellSeq[nordered_nseq]+"\t"+szactualchrom+"\n";
byte[] btformat = szout.getBytes();
- pwprobszip.write(btformat,0,btformat.length);
+ pwmaxzip.write(btformat,0,btformat.length); //fix 1.23
//pwmax.println(cellSeq[nordered_nseq]+"\t"+chromSeq[nordered_nseq]);
szout = "MaxState "+chorder+"\n";
btformat = szout.getBytes();
- pwprobszip.write(btformat,0,btformat.length);
+ pwmaxzip.write(btformat,0,btformat.length); //fix 1.23
//pwmax.println("MaxState "+chorder);
}
@@ -7703,7 +7703,7 @@ public class ChromHMM
ArrayList aldata = new ArrayList();
while ((szLine = br.readLine())!=null)
{
- StringTokenizer st = new StringTokenizer(szLine,"\t");
+ StringTokenizer st = new StringTokenizer(szLine,"\t ");
StringBuffer sb = new StringBuffer();
for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -7713,7 +7713,7 @@ public class ChromHMM
throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles[nseq]);
}
- String sztoken = st.nextToken();
+ String sztoken = st.nextToken();//.trim();
if (sztoken.equals("0"))
{
@@ -10265,7 +10265,7 @@ public class ChromHMM
ArrayList aldata = new ArrayList();
while ((szLine = br.readLine())!=null)
{
- StringTokenizer st = new StringTokenizer(szLine,"\t");
+ StringTokenizer st = new StringTokenizer(szLine,"\t ");
StringBuffer sb = new StringBuffer();
for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -10275,7 +10275,7 @@ public class ChromHMM
throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles_nseq);
}
- String sztoken = st.nextToken();
+ String sztoken = st.nextToken();//.trim();
if (sztoken.equals("0"))
{
@@ -12286,12 +12286,12 @@ public class ChromHMM
throw new IllegalArgumentException("First line must contain cell type and chromosome. No entries found.");
}
- cellSeq[nfile] = st.nextToken();
+ cellSeq[nfile] = st.nextToken().trim();//added trim in version 1.23
if (!st.hasMoreTokens())
{
throw new IllegalArgumentException("First line must contain cell type and chromosome. Only one entry found.");
}
- chromSeq[nfile] = st.nextToken();
+ chromSeq[nfile] = st.nextToken().trim();//added trim in version 1.23
if (st.hasMoreTokens())
{
@@ -12312,7 +12312,7 @@ public class ChromHMM
int ntoken = 0;
while (st.hasMoreTokens())
{
- datasets[ntoken] = st.nextToken();
+ datasets[ntoken] = st.nextToken().trim();
ntoken++;
}
//numdatasets is the number of marks we are integrating
@@ -12330,7 +12330,7 @@ public class ChromHMM
//Gives warning if a header column does not match
while (st.hasMoreTokens())
{
- String sztoken = st.nextToken();
+ String sztoken = st.nextToken().trim();
if (!datasets[ntoken].equals(sztoken))
{
System.out.println("WARNING headers do not match between "+chromfiles[nfile]+" and "+chromfiles[0]);
@@ -12632,12 +12632,12 @@ public class ChromHMM
throw new IllegalArgumentException("First line must contain cell type and chromosome. No entries found.");
}
- cellSeq[nfile] = st.nextToken();
+ cellSeq[nfile] = st.nextToken().trim(); //added trim in version 1.23
if (!st.hasMoreTokens())
{
throw new IllegalArgumentException("First line must contain cell type and chromosome. Only one entry found.");
}
- chromSeq[nfile] = st.nextToken();
+ chromSeq[nfile] = st.nextToken().trim(); //added trim in version 1.23
if (st.hasMoreTokens())
{
@@ -12658,7 +12658,7 @@ public class ChromHMM
int ntoken = 0;
while (st.hasMoreTokens())
{
- datasets[ntoken] = st.nextToken();
+ datasets[ntoken] = st.nextToken().trim();
ntoken++;
}
}
@@ -12674,7 +12674,7 @@ public class ChromHMM
//Gives warning if a header column does not match
while (st.hasMoreTokens())
{
- String sztoken = st.nextToken();
+ String sztoken = st.nextToken().trim();
if (!datasets[ntoken].equals(sztoken))
{
System.out.println("WARNING headers do not match between "+chromfiles[nfile]+" and "+chromfiles[0]);
@@ -12689,7 +12689,7 @@ public class ChromHMM
ArrayList aldata = new ArrayList();
while ((szLine = br.readLine())!=null)
{
- st = new StringTokenizer(szLine,"\t");
+ st = new StringTokenizer(szLine,"\t ");
StringBuffer sb = new StringBuffer();
for (int ncol = 0; ncol < numdatasets; ncol++)
@@ -12700,7 +12700,7 @@ public class ChromHMM
throw new IllegalArgumentException("Found line without "+numdatasets+" values in file "+chromfiles[nfile]);
}
- String sztoken = st.nextToken();
+ String sztoken = st.nextToken();//.trim();
if (sztoken.equals("0"))
{
@@ -12848,13 +12848,14 @@ public class ChromHMM
if (szcommand.equalsIgnoreCase("Version"))
{
- System.out.println("This is Version 1.22 of ChromHMM (c) Copyright 2008-2012 Massachusetts Institute of Technology");
+ System.out.println("This is Version 1.23 of ChromHMM (c) Copyright 2008-2012 Massachusetts Institute of Technology");
}
else if ((szcommand.equals("BinarizeBam"))||(szcommand.equalsIgnoreCase("BinarizeBed")))
{
boolean bstacked = false;
boolean bgzip = false;
boolean bpairend = false;
+ boolean bmixed = false;
boolean bsplit = false;
String szcontroldir=null;
int nflankwidthcontrol = 5;
@@ -13012,6 +13013,10 @@ public class ChromHMM
{
bpairend = true;
}
+ else if ((args[nargindex].equals("-mixed"))&&(szcommand.equalsIgnoreCase("BinarizeBam")))
+ {
+ bmixed = true;
+ }
else if (args[nargindex].equals("-w"))
{
nflankwidthcontrol = Integer.parseInt(args[++nargindex]);
@@ -13030,7 +13035,7 @@ public class ChromHMM
}
}
- if ((bpairend) && (bcenterinterval||bshift||bpeaks))
+ if ((bpairend) && (bmixed||bcenterinterval||bshift||bpeaks))
{
bok = false;
}
@@ -13214,7 +13219,7 @@ public class ChromHMM
szoutputbinarydirsub,szoutputcontroldir,
dpoissonthresh,dfoldthresh,bcontainsthresh,
npseudocountcontrol,nbinsize,szcolfields,bpeaks, dcountthresh,szcommand.equalsIgnoreCase("BinarizeBam"),
- bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex, bstacked);
+ bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex, bstacked, bmixed);
}
}
else
@@ -13267,7 +13272,7 @@ public class ChromHMM
szoutputbinarydir,szoutputcontroldir,
dpoissonthresh,dfoldthresh,bcontainsthresh,
npseudocountcontrol,nbinsize,szcolfields,bpeaks, dcountthresh,szcommand.equalsIgnoreCase("BinarizeBam"),
- bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex,bstacked);
+ bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex,bstacked,bmixed);
}
}
@@ -13291,7 +13296,7 @@ public class ChromHMM
else
{
System.out.println("usage BinarizeBam [-b binsize][-c controldir][-e offsetend][-f foldthresh]"+
- "[-g signalthresh][-gzip][[-o outputcontroldir][-p poissonthresh][-paired|[-center][-n shift][-peaks [-i splitindex]]"+
+ "[-g signalthresh][-gzip][[-o outputcontroldir][-p poissonthresh][-paired|[-mixed][-center][-n shift][-peaks [-i splitindex]]"+
"[-s offsetstart][-splitcols [-k splitcolindex][-m numsplitcols]][-splitrows [-j numsplitbins]][-stacked][-strictthresh][-t outputsignaldir]"+
"[-u pseudocountcontrol][-w flankwidthcontrol] "+
"chromosomelengthfile inputbamdir cellmarkfiletable outputbinarydir");
@@ -13541,9 +13546,9 @@ public class ChromHMM
StringTokenizer stcolor = new StringTokenizer(szcolor,",");
if (stcolor.countTokens()==3)
{
- nr = Integer.parseInt(stcolor.nextToken());
- ng = Integer.parseInt(stcolor.nextToken());
- nb = Integer.parseInt(stcolor.nextToken());
+ nr = Integer.parseInt(stcolor.nextToken().trim());
+ ng = Integer.parseInt(stcolor.nextToken().trim());
+ nb = Integer.parseInt(stcolor.nextToken().trim());
}
else
{
@@ -13659,9 +13664,9 @@ public class ChromHMM
StringTokenizer stcolor = new StringTokenizer(szcolor,",");
if (stcolor.countTokens()==3)
{
- nr = Integer.parseInt(stcolor.nextToken());
- ng = Integer.parseInt(stcolor.nextToken());
- nb = Integer.parseInt(stcolor.nextToken());
+ nr = Integer.parseInt(stcolor.nextToken().trim());
+ ng = Integer.parseInt(stcolor.nextToken().trim());
+ nb = Integer.parseInt(stcolor.nextToken().trim());
}
else
{
@@ -14039,9 +14044,9 @@ public class ChromHMM
StringTokenizer stcolor = new StringTokenizer(szcolor,",");
if (stcolor.countTokens()==3)
{
- nr = Integer.parseInt(stcolor.nextToken());
- ng = Integer.parseInt(stcolor.nextToken());
- nb = Integer.parseInt(stcolor.nextToken());
+ nr = Integer.parseInt(stcolor.nextToken().trim());
+ ng = Integer.parseInt(stcolor.nextToken().trim());
+ nb = Integer.parseInt(stcolor.nextToken().trim());
}
else
{
@@ -14211,9 +14216,9 @@ public class ChromHMM
StringTokenizer stcolor = new StringTokenizer(szcolor,",");
if (stcolor.countTokens()==3)
{
- nr = Integer.parseInt(stcolor.nextToken());
- ng = Integer.parseInt(stcolor.nextToken());
- nb = Integer.parseInt(stcolor.nextToken());
+ nr = Integer.parseInt(stcolor.nextToken().trim());
+ ng = Integer.parseInt(stcolor.nextToken().trim());
+ nb = Integer.parseInt(stcolor.nextToken().trim());
}
else
{
@@ -14404,9 +14409,9 @@ public class ChromHMM
StringTokenizer stcolor = new StringTokenizer(szcolor,",");
if (stcolor.countTokens()==3)
{
- nr = Integer.parseInt(stcolor.nextToken());
- ng = Integer.parseInt(stcolor.nextToken());
- nb = Integer.parseInt(stcolor.nextToken());
+ nr = Integer.parseInt(stcolor.nextToken().trim());
+ ng = Integer.parseInt(stcolor.nextToken().trim());
+ nb = Integer.parseInt(stcolor.nextToken().trim());
}
else
{
@@ -14996,9 +15001,9 @@ public class ChromHMM
StringTokenizer stcolor = new StringTokenizer(szcolor,",");
if (stcolor.countTokens()==3)
{
- nr = Integer.parseInt(stcolor.nextToken());
- ng = Integer.parseInt(stcolor.nextToken());
- nb = Integer.parseInt(stcolor.nextToken());
+ nr = Integer.parseInt(stcolor.nextToken().trim());
+ ng = Integer.parseInt(stcolor.nextToken().trim());
+ nb = Integer.parseInt(stcolor.nextToken().trim());
}
else
{
=====================================
edu/mit/compbio/ChromHMM/ConvertGeneTable.java
=====================================
@@ -140,37 +140,37 @@ public class ConvertGeneTable
while ((szLine = br.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t",true);
- String szbin = st.nextToken();
+ String szbin = st.nextToken().trim();
if (!szbin.equals("\t"))
- st.nextToken();
- String szname = st.nextToken();
+ st.nextToken().trim();
+ String szname = st.nextToken().trim();
if (!szname.equals("\t"))
- st.nextToken();
- String szchrom = st.nextToken();
+ st.nextToken().trim();
+ String szchrom = st.nextToken().trim();
if (!szchrom.equals("\t"))
- st.nextToken();
- String szstrand = st.nextToken();
+ st.nextToken().trim();
+ String szstrand = st.nextToken().trim();
if (!szstrand.equals("\t"))
- st.nextToken();
- String sztxStart = st.nextToken();
+ st.nextToken().trim();
+ String sztxStart = st.nextToken().trim();
if (!sztxStart.equals("\t"))
- st.nextToken();
- String sztxEnd = st.nextToken();
+ st.nextToken().trim();
+ String sztxEnd = st.nextToken().trim();
if (!sztxEnd.equals("\t"))
- st.nextToken();
- String szcdsStart = st.nextToken();
+ st.nextToken().trim();
+ String szcdsStart = st.nextToken().trim();
if (!szcdsStart.equals("\t"))
- st.nextToken();
- String szcdsEnd = st.nextToken();
+ st.nextToken().trim();
+ String szcdsEnd = st.nextToken().trim();
if (!szcdsEnd.equals("\t"))
- st.nextToken();
- String szexonCount = st.nextToken();
+ st.nextToken().trim();
+ String szexonCount = st.nextToken().trim();
if (!szexonCount.equals("\t"))
- st.nextToken();
- String szexonStarts = st.nextToken();
+ st.nextToken().trim();
+ String szexonStarts = st.nextToken().trim();
if (!szexonStarts.equals("\t"))
- st.nextToken();
- String szexonEnds = st.nextToken();
+ st.nextToken().trim();
+ String szexonEnds = st.nextToken().trim();
//removed 1.22
//if (!szexonEnds.equals("\t"))
@@ -288,7 +288,7 @@ public class ConvertGeneTable
while (stexonStarts.hasMoreTokens())
{
- String szexonOut = szchrom+"\t"+stexonStarts.nextToken()+"\t"+stexonEnds.nextToken()+"\n";
+ String szexonOut = szchrom+"\t"+stexonStarts.nextToken().trim()+"\t"+stexonEnds.nextToken().trim()+"\n";
if (!hsexon.contains(szexonOut))
{
byte[] btformat = szexonOut.getBytes();
@@ -340,7 +340,7 @@ public class ConvertGeneTable
while (stexonStarts.hasMoreTokens())
{
- String szexonOut = szchrom+"\t"+stexonStarts.nextToken()+"\t"+stexonEnds.nextToken();
+ String szexonOut = szchrom+"\t"+stexonStarts.nextToken().trim()+"\t"+stexonEnds.nextToken().trim();
if (!hsexon.contains(szexonOut))
{
=====================================
edu/mit/compbio/ChromHMM/NestedEliminateInitialize.java
=====================================
@@ -80,10 +80,10 @@ public class NestedEliminateInitialize
if (szLine.startsWith("emission"))
{
StringTokenizer stemiss = new StringTokenizer(szLine,"\t");
- stemiss.nextToken();
- stemiss.nextToken();
- stemiss.nextToken();
- String szmark = stemiss.nextToken();
+ stemiss.nextToken().trim();
+ stemiss.nextToken().trim();
+ stemiss.nextToken().trim();
+ String szmark = stemiss.nextToken().trim();
Object objInt = hmNameToID.get(szmark);
if (objInt ==null)
@@ -140,10 +140,10 @@ public class NestedEliminateInitialize
if (szLine.startsWith("emissionprobs"))
{
st = new StringTokenizer(szLine,"\t");
- st.nextToken();
- int nstate = Integer.parseInt(st.nextToken())-1;
- st.nextToken();
- String szmark = st.nextToken();
+ st.nextToken().trim();
+ int nstate = Integer.parseInt(st.nextToken().trim())-1;
+ st.nextToken().trim();
+ String szmark = st.nextToken().trim();
Integer intMark = (Integer) hmNameToID.get(szmark);
int nmark = -1;
if (intMark == null)
@@ -154,10 +154,10 @@ public class NestedEliminateInitialize
{
nmark = ((Integer) intMark).intValue();
}
- String szbucket = st.nextToken();
+ String szbucket = st.nextToken().trim();
if (szbucket.equals("1"))
{
- modelemissions[nmodel][nstate][nmark] = Double.parseDouble(st.nextToken());
+ modelemissions[nmodel][nstate][nmark] = Double.parseDouble(st.nextToken().trim());
ncount++;
}
}
@@ -197,9 +197,9 @@ public class NestedEliminateInitialize
throw new IllegalArgumentException(szinputdir+"/"+szmainfile+" is missing lines!");
}
StringTokenizer st = new StringTokenizer(szinitline,"\t");
- st.nextToken();
- st.nextToken();
- bestprobinit[ni] = Double.parseDouble(st.nextToken());
+ st.nextToken().trim();
+ st.nextToken().trim();
+ bestprobinit[ni] = Double.parseDouble(st.nextToken().trim());
}
for (int ni = 0; ni < nbestnumstates; ni++)
@@ -212,10 +212,10 @@ public class NestedEliminateInitialize
throw new IllegalArgumentException(szinputdir+"/"+szmainfile+" is missing lines!");
}
StringTokenizer st = new StringTokenizer(sztransitionline,"\t");
- st.nextToken();
- st.nextToken();
- st.nextToken();
- besttransitionprobs[ni][nj] = Double.parseDouble(st.nextToken());
+ st.nextToken().trim();
+ st.nextToken().trim();
+ st.nextToken().trim();
+ besttransitionprobs[ni][nj] = Double.parseDouble(st.nextToken().trim());
}
}
@@ -228,9 +228,9 @@ public class NestedEliminateInitialize
while ((szLine = brfile.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- st.nextToken();
- int nj = Integer.parseInt(st.nextToken())-1;
- emissionsline[nj].add(st.nextToken()+"\t"+st.nextToken()+"\t"+st.nextToken()+"\t"+st.nextToken());
+ st.nextToken().trim();
+ int nj = Integer.parseInt(st.nextToken().trim())-1;
+ emissionsline[nj].add(st.nextToken().trim()+"\t"+st.nextToken().trim()+"\t"+st.nextToken().trim()+"\t"+st.nextToken().trim());
}
brfile.close();
=====================================
edu/mit/compbio/ChromHMM/Preprocessing.java
=====================================
@@ -1,4 +1,3 @@
-
/**
* ChromHMM - automating chromatin state discovery and characterization
* Copyright (C) 2008-2012 Massachusetts Institute of Technology
@@ -89,7 +88,8 @@ public class Preprocessing
private static void loadGrid(int[][][] grid,boolean[] bpresent, boolean[] bpresentmarks, String[] marks,int nshift, int nbinsize,
boolean bcenterinterval,int noffsetleft,
int noffsetright,HashMap hmfiles, String szcell, String szmarkdir,HashMap hmchrom,
- int ninitval, String szcolfields,boolean bpeaks,boolean bcontrol, boolean bbinarizebam, boolean bpairend) throws IOException
+ int ninitval, String szcolfields,boolean bpeaks,boolean bcontrol, boolean bbinarizebam,
+ boolean bpairend,boolean bmixed) throws IOException
{
int nummarks = grid[0][0].length;
//initalizes all values in grid to ninitval
@@ -122,12 +122,12 @@ public class Preprocessing
{
throw new IllegalArgumentException(" invalid number of column fields in "+szcolfields+" expecting 3 or 4 integers");
}
- nchromcol = Integer.parseInt(stcolfields.nextToken());
- nbegincol = Integer.parseInt(stcolfields.nextToken());
- nendcol = Integer.parseInt(stcolfields.nextToken());
+ nchromcol = Integer.parseInt(stcolfields.nextToken().trim());
+ nbegincol = Integer.parseInt(stcolfields.nextToken().trim());
+ nendcol = Integer.parseInt(stcolfields.nextToken().trim());
if (!bcenterinterval)
{
- nstrandcol = Integer.parseInt(stcolfields.nextToken());
+ nstrandcol = Integer.parseInt(stcolfields.nextToken().trim());
}
nmaxindex = Math.max(nchromcol,Math.max(nbegincol,Math.max(nendcol, nstrandcol)));
@@ -195,7 +195,133 @@ public class Preprocessing
SamReader samReader = srf.open(new File(szmarkdir+"/"+szfile));
SAMRecordIterator iter = samReader.iterator();
- if (bpairend)
+ if (bmixed)
+ {
+ //added in v1.23 to handle mixed
+ while (iter.hasNext())
+ {
+ SAMRecord rec= iter.next();
+
+ if (rec.getReadPairedFlag())
+ {
+
+ if ((rec.getProperPairFlag()) && rec.getFirstOfPairFlag()&&(!rec.getReadUnmappedFlag()))
+ {
+ int nstartorig = rec.getAlignmentStart()-1;
+ int nendorig = rec.getAlignmentEnd();
+ String szchrom = rec.getReferenceName();
+ boolean bnegstrand = rec.getReadNegativeStrandFlag();
+ int ninsertsize = rec.getInferredInsertSize();
+
+ Integer objInt = (Integer) hmchrom.get(szchrom);
+
+ //if we don't have the chromosome for the read will ignore it
+ if (objInt != null)
+ {
+ int nchrom = objInt.intValue();
+ int nbin;
+
+ //no center mode in paired end
+
+ if (bnegstrand)
+ {
+ //"-"
+ //was nshift
+ nbin = (nendorig-noffsetright+ninsertsize/2)/nbinsize;
+ //ninsertsize can be negative so adding
+ //removed one from here may need it for backwards consistency
+ }
+ else
+ {
+ //"+"
+ //was nshift
+ nbin = (nstartorig-noffsetleft+ninsertsize/2)/nbinsize;
+ }
+ //}
+
+ if ((nbin>=0)&&(nbin < grid[nchrom].length))
+ {
+ //increment bin count if falls into valid interval
+ grid[nchrom][nbin][nmark]++;
+ //we do have this chromosome
+ bpresent[nchrom] = true;
+ bdatafound = true;
+ }
+ }
+ }
+ }
+ else
+ {
+ //treat it as single end
+ int nstartorig = rec.getAlignmentStart()-1;
+ int nendorig = rec.getAlignmentEnd();
+ String szchrom = rec.getReferenceName();
+ boolean bnegstrand = rec.getReadNegativeStrandFlag();
+ boolean bunmapped = rec.getReadUnmappedFlag();
+
+ if (bunmapped)
+ {
+ continue;
+ }
+
+ Integer objInt = (Integer) hmchrom.get(szchrom);
+
+ //if we don't have the chromosome for the read will ignore it
+ if (objInt != null)
+ {
+ int nchrom = objInt.intValue();
+ int nbin;
+
+ if (bpeaks)
+ {
+ int nstart = Math.max(0,(nstartorig-noffsetleft)/nbinsize);
+ int nend = Math.min(grid[nchrom].length-1, (nendorig-noffsetright)/nbinsize);
+
+ for (nbin = nstart; nbin <= nend; nbin++)
+ {
+ //increment bin count if falls into valid interval
+ grid[nchrom][nbin][nmark]++;
+ //we do have this chromosome
+ bpresent[nchrom] = true;
+ bdatafound = true;
+ }
+ }
+ else
+ {
+ if (bcenterinterval)
+ {
+ //uses the center of the interval which is useful if read is already extended
+ nbin = (nstartorig-noffsetleft+nendorig-noffsetright)/(2*nbinsize);
+ }
+ else
+ {
+ if (bnegstrand)
+ {
+ //"-"
+ nbin = (nendorig-noffsetright-nshift)/nbinsize;
+ //removed one from here may need it for backwards consistency
+ }
+ else
+ {
+ //"+"
+ nbin = (nstartorig-noffsetleft+nshift)/nbinsize;
+ }
+ }
+
+ if ((nbin>=0)&&(nbin < grid[nchrom].length))
+ {
+ //increment bin count if falls into valid interval
+ grid[nchrom][nbin][nmark]++;
+ //we do have this chromosome
+ bpresent[nchrom] = true;
+ bdatafound = true;
+ }
+ }
+ }
+ }
+ }
+ }
+ else if (bpairend)
{
while (iter.hasNext())
{
@@ -411,7 +537,7 @@ public class Preprocessing
{
throw new IllegalArgumentException("Empty line found in "+szmarkdir+"/"+szfile);
}
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken();//.trim();
Integer objInt = (Integer) hmchrom.get(szchrom);
//if we don't have the chromosome for the read will ignore it
@@ -423,13 +549,13 @@ public class Preprocessing
throw new IllegalArgumentException("Missing begin coordinate in "+szmarkdir+"/"+szfile);
}
- String szbegin = st.nextToken();
+ String szbegin = st.nextToken();//.trim();
if (!st.hasMoreTokens())
{
throw new IllegalArgumentException("Missing end coordinate in "+szmarkdir+"/"+szfile);
}
- String szend = st.nextToken();
+ String szend = st.nextToken();//.trim();
int nbin;
if (bpeaks)
@@ -460,11 +586,11 @@ public class Preprocessing
throw new IllegalArgumentException("strand column expected, but not found in "+szmarkdir+"/"+szfile);
}
//looks for strand in sixth column or last column if less than six
- String szstrand = st.nextToken();
+ String szstrand = st.nextToken();//.trim();
if (st.hasMoreTokens())
- szstrand = st.nextToken();
+ szstrand = st.nextToken();//.trim();
if (st.hasMoreTokens())
- szstrand = st.nextToken();
+ szstrand = st.nextToken();//.trim();
if (szstrand.equals("+"))
{
@@ -548,13 +674,13 @@ public class Preprocessing
{
throw new IllegalArgumentException("empty line found in "+szchromlengthfile);
}
- chroms[ni] = st.nextToken();
+ chroms[ni] = st.nextToken();//.trim();
if (!st.hasMoreTokens())
{
throw new IllegalArgumentException("missing chromosome length for "+allines.get(ni)+" in "+szchromlengthfile);
}
- int nlength = Integer.parseInt(st.nextToken());
+ int nlength = Integer.parseInt(st.nextToken());//.trim());
int numbins = nlength/nbinsize;
//if (nlength % nbinsize !=0) //removed pre 1.18 to be consistent from binarizing directly
// numbins++;
@@ -606,7 +732,12 @@ public class Preprocessing
alfiles = new ArrayList();
hmfiles.put(szcell+"\t"+szmark,alfiles);
}
- alfiles.add(szfile);
+
+ if (!alfiles.contains(szfile))
+ {
+ //added in v1.23 to only count once a given file
+ alfiles.add(szfile);
+ }
}
ncellmarkentry++;
}
@@ -675,11 +806,11 @@ public class Preprocessing
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
if (st.countTokens() == 0) continue;
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
if (szchrom.equals(szcurrchrom))
{
- int nbegin = (Integer.parseInt(st.nextToken())-noffsetleft)/nbinsize;
- int nend = (Integer.parseInt(st.nextToken())-noffsetright)/nbinsize; //updated -1 from pre-release 1.18
+ int nbegin = (Integer.parseInt(st.nextToken().trim())-noffsetleft)/nbinsize;
+ int nend = (Integer.parseInt(st.nextToken().trim())-noffsetright)/nbinsize; //updated -1 from pre-release 1.18
//need to update
int nactualbegin = nbegin;
@@ -854,7 +985,7 @@ public class Preprocessing
double dpoissonthresh, double dfoldthresh,boolean bcontainsthresh, int npseudocountcontrol,int nbinsize,
String szcolfields, boolean bpeaks, double dcountthresh, boolean bbinarizebam, boolean bpairend,
boolean bgzip, boolean bsplit, int numsplitbins,
- boolean bsplitcols,int nnummarksplit,int nmarksplitindex, boolean bstacked
+ boolean bsplitcols,int nnummarksplit,int nmarksplitindex, boolean bstacked, boolean bmixed
) throws IOException
{
@@ -879,13 +1010,13 @@ public class Preprocessing
{
throw new IllegalArgumentException("empty line found in "+szchromlengthfile);
}
- chroms[ni] = st.nextToken();
+ chroms[ni] = st.nextToken();//.trim();
hmchrom.put(chroms[ni], Integer.valueOf(ni));
if (!st.hasMoreTokens())
{
throw new IllegalArgumentException("missing chromosome length for "+allines.get(ni)+" in "+szchromlengthfile);
}
- lengths[ni] = Integer.parseInt(st.nextToken());
+ lengths[ni] = Integer.parseInt(st.nextToken());//.trim());
}
@@ -967,7 +1098,12 @@ public class Preprocessing
alfiles = new ArrayList();
hmfiles.put(szcell+"\t"+szmark,alfiles);
}
- alfiles.add(szfile);
+
+ if (!alfiles.contains(szfile))
+ {
+ //added in v1.23 to only count once a given file
+ alfiles.add(szfile);
+ }
}
ncellmarkentry++;
}
@@ -1050,7 +1186,7 @@ public class Preprocessing
//loading data for the cell type
loadGrid(grid,bpresent,bpresentmarks,marks,nshift,nbinsize,bcenterinterval,noffsetleft,
- noffsetright,hmfiles,szcell,szmarkdir,hmchrom,0,szcolfields,bpeaks,false,bbinarizebam, bpairend);
+ noffsetright,hmfiles,szcell,szmarkdir,hmchrom,0,szcolfields,bpeaks,false,bbinarizebam, bpairend,bmixed);
if (bcontrolfile)
{
if ((gridcontrol[0] == null)||(gridcontrol[0][0].length !=numcontrolmarks))
@@ -1066,7 +1202,7 @@ public class Preprocessing
//we have control data loading cell type data for that
loadGrid(gridcontrol,bpresentcontrol,bpresentmarkscontrol,marks,nshift,nbinsize,bcenterinterval,noffsetleft,noffsetright,
- hmfilescontrol,szcell,szcontroldir,hmchrom,npseudocountcontrol,szcolfields,bpeaks,true,bbinarizebam,bpairend);
+ hmfilescontrol,szcell,szcontroldir,hmchrom,npseudocountcontrol,szcolfields,bpeaks,true,bbinarizebam,bpairend,bmixed);
}
@@ -2566,7 +2702,7 @@ public class Preprocessing
throw new IllegalArgumentException(szcontrolDIR+"/"+allfilescontrol[nfile]+" header line must have two columns delimited by a tab found only one in "+
szheader);
}
- String szkey = st.nextToken()+"\t"+st.nextToken();
+ String szkey = st.nextToken().trim()+"\t"+st.nextToken().trim();
hmcontrol.put(szkey,allfilescontrol[nfile]);
brcontrol.close();
}
@@ -2588,7 +2724,7 @@ public class Preprocessing
throw new IllegalArgumentException(signalchromfiles[nfile]+" is empty!");
}
StringTokenizer st = new StringTokenizer(szLine,"\t");
- szcurrcell = st.nextToken();
+ szcurrcell = st.nextToken().trim();
br.close();
ArrayList al = (ArrayList) hmcellsToIndex.get(szcurrcell);
@@ -2641,7 +2777,7 @@ public class Preprocessing
throw new IllegalArgumentException("Only found one entry for line "+szHeaderLine1+" in file "+szbinneddataDIR+"/"+szfilename
+" expecting 2");
}
- String szchrom = stheader.nextToken();
+ String szchrom = stheader.nextToken().trim();
chroms[nchrom] = szchrom;
String szcontrolfilename = (String) hmcontrol.get(szcell+"\t"+szchrom);
if (szcontrolfilename == null)
@@ -2714,7 +2850,7 @@ public class Preprocessing
{
int[] grid_nchrom_nbin = grid_nchrom[nbin];
int[] gridcontrol_nchrom_nbin = gridcontrol_nchrom[nbin];
- st = new StringTokenizer(szLine,"\t");
+ st = new StringTokenizer(szLine,"\t ");//updated in 1.23
if (st.countTokens() != nummarks)
{
throw new IllegalArgumentException("In "+szfilename+" did not find the expected "+nummarks+" marks in this line: "+szLine);
@@ -2731,7 +2867,7 @@ public class Preprocessing
throw new IllegalArgumentException("The number of lines in the control file "+
szcontrolDIR+"/"+szfilename+" does not match that in the signal file "+szbinneddataDIR+"/"+szcontrolfilename);
}
- st = new StringTokenizer(szLineControl,"\t");
+ st = new StringTokenizer(szLineControl,"\t ");
for (int nmark = 0; nmark < nummarkscontrol; nmark++)
{
//reading in the control data
@@ -3186,7 +3322,7 @@ public class Preprocessing
throw new IllegalArgumentException(szbinneddataDIR+"/"+signalchromfiles[nfile]+" does not contain any data!");
}
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szcurrcell = st.nextToken();
+ String szcurrcell = st.nextToken().trim();
br.close();
ArrayList al = (ArrayList) hmcellsToIndex.get(szcurrcell);
@@ -3241,7 +3377,7 @@ public class Preprocessing
while ((szLine = br.readLine())!=null)
{
ntotallocs++;
- st = new StringTokenizer(szLine,"\t");
+ st = new StringTokenizer(szLine,"\t ");
if (st.countTokens() != nummarks)
{
throw new IllegalArgumentException("In "+szfilename+" did not find the expected "+nummarks+" marks in this line: "+szLine);
@@ -3249,7 +3385,7 @@ public class Preprocessing
for (int nj = 0; nj < nummarks; nj++)
{
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
sumtags[nj] += dval;
}
}
@@ -3314,8 +3450,8 @@ public class Preprocessing
String szLine;
int nbin = 0;
int nsplit = 0;
- String szcurrcell = st.nextToken();
- String szchrom = st.nextToken();
+ String szcurrcell = st.nextToken().trim();
+ String szchrom = st.nextToken().trim();
boolean bopen = false;
GZIPOutputStream pwzip = null;
@@ -3339,12 +3475,12 @@ public class Preprocessing
bopen = true;
}
- st = new StringTokenizer(szLine,"\t");
+ st = new StringTokenizer(szLine,"\t ");
StringBuffer sbout = new StringBuffer();
for (int ncol = 0; ncol < nummarks-1; ncol++)
{
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
if (dval == -1)
{
sbout.append("2\t");
@@ -3362,7 +3498,7 @@ public class Preprocessing
}
}
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
if (dval == -1)
{
sbout.append("2\n");
@@ -3400,7 +3536,7 @@ public class Preprocessing
}
else
{
- String szfile = szoutputDIR+"/"+st.nextToken()+"_"+st.nextToken()+"_binary.txt.gz";
+ String szfile = szoutputDIR+"/"+st.nextToken().trim()+"_"+st.nextToken().trim()+"_binary.txt.gz";
System.out.println("Writing to file "+szfile);
GZIPOutputStream pwzip = new GZIPOutputStream(new FileOutputStream(szfile));
//PrintWriter pw = new PrintWriter(new FileWriter(szfile));
@@ -3415,12 +3551,12 @@ public class Preprocessing
String szLine;
while ((szLine = br.readLine())!=null)
{
- st = new StringTokenizer(szLine,"\t");
+ st = new StringTokenizer(szLine,"\t ");
StringBuffer sbout = new StringBuffer();
for (int ncol = 0; ncol < nummarks-1; ncol++)
{
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
if (dval == -1)
{
sbout.append("2\t");
@@ -3438,7 +3574,7 @@ public class Preprocessing
}
}
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
if (dval == -1)
{
sbout.append("2\n");
@@ -3470,8 +3606,8 @@ public class Preprocessing
{
int nbin = 0;
int nsplit = 0;
- String szcurrcell = st.nextToken();
- String szchrom = st.nextToken();
+ String szcurrcell = st.nextToken().trim();
+ String szchrom = st.nextToken().trim();
boolean bopen = false;
PrintWriter pw = null;
String szLine;
@@ -3489,10 +3625,10 @@ public class Preprocessing
bopen = true;
}
- st = new StringTokenizer(szLine,"\t");
+ st = new StringTokenizer(szLine,"\t ");
for (int ncol = 0; ncol < nummarks-1; ncol++)
{
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
if (dval == -1)
{
pw.print("2\t");
@@ -3507,7 +3643,7 @@ public class Preprocessing
}
}
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
if (dval == -1)
{
pw.println("2");
@@ -3538,7 +3674,7 @@ public class Preprocessing
}
else
{
- String szfile = szoutputDIR+"/"+st.nextToken()+"_"+st.nextToken()+"_binary.txt";
+ String szfile = szoutputDIR+"/"+st.nextToken().trim()+"_"+st.nextToken().trim()+"_binary.txt";
System.out.println("Writing to file "+szfile);
PrintWriter pw = new PrintWriter(new FileWriter(szfile));
pw.println(szChromCellLine);
@@ -3546,10 +3682,10 @@ public class Preprocessing
String szLine;
while ((szLine = br.readLine())!=null)
{
- st = new StringTokenizer(szLine,"\t");
+ st = new StringTokenizer(szLine,"\t ");
for (int ncol = 0; ncol < nummarks-1; ncol++)
{
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
if (dval == -1)
{
pw.print("2\t");
@@ -3564,7 +3700,7 @@ public class Preprocessing
}
}
- double dval = Double.parseDouble(st.nextToken());
+ double dval = Double.parseDouble(st.nextToken());//.trim());
if (dval == -1)
{
pw.println("2");
@@ -3671,7 +3807,10 @@ public class Preprocessing
//added hidden check in v.1.11
//read first two lines
//maps the first line to the buffered reader
- BufferedReader brfile = Util.getBufferedReader(szcurrpath+"/"+szcurrfile);
+ //updating in v1.23 so not so many open files at once
+ String szpathfile = szcurrpath+"/"+szcurrfile;
+ //BufferedReader brfile = Util.getBufferedReader(szcurrpath+"/"+szcurrfile);
+ BufferedReader brfile = Util.getBufferedReader(szpathfile);//szcurrpath+"/"+szcurrfile);
String szheader1 = brfile.readLine();
if (szheader1 == null)
{
@@ -3684,6 +3823,7 @@ public class Preprocessing
}
String szheader2 = brfile.readLine();
+ brfile.close();
if (szheader2A[ndir] == null)
{
@@ -3693,7 +3833,7 @@ public class Preprocessing
{
throw new IllegalArgumentException("Inconsistent header lines in "+szcurrpath+" found "+szheader2+" and "+szheader2A[ndir]);
}
- hmbrA[ndir].put(szheader1, brfile);
+ hmbrA[ndir].put(szheader1, szpathfile);//brfile);
hsfiles.add(szheader1);
}
@@ -3711,10 +3851,29 @@ public class Preprocessing
//sbmergedheader.append(szheader2A[0]);
String[] notpresent = new String[szheader2A.length];
+ //added in v1.23 to only count mark once
+ HashSet hsmarkpresent = new HashSet();
+
for (int ndir = 0; ndir < szheader2A.length; ndir++)
{
StringTokenizer st = new StringTokenizer(szheader2A[ndir], "\t");
int ntokens = st.countTokens();
+
+ //added in v1.23 to only count mark once
+ while (st.hasMoreTokens())
+ {
+ String szmarktoken = st.nextToken().trim();
+ if (hsmarkpresent.contains(szmarktoken))
+ {
+ throw new IllegalArgumentException("Feature "+szmarktoken+" is not unique. Found in multiple files being merged");
+ }
+ else
+ {
+ hsmarkpresent.add(szmarktoken);
+ }
+ }
+
+
StringBuffer sbnotpresent = new StringBuffer();
sbnotpresent.append("0");
for (int na = 1; na <ntokens; na++)
@@ -3756,8 +3915,10 @@ public class Preprocessing
String szcurrfile = chromfilescombine[nfile];
for (int ndir = 0; ndir < subdirall.length; ndir++)
{
- BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
- if (br == null)
+ //BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
+ String szpathfile = (String) hmbrA[ndir].get(szcurrfile);
+ //if (br == null)
+ if (szpathfile == null)
{
System.out.println("Warning directory "+szinputdir+"/"+subdirall[ndir]+" does not contain file "+szcurrfile+" going to treat as not present");
bpresent[ndir] = false;
@@ -3766,14 +3927,16 @@ public class Preprocessing
else
{
bpresent[ndir] = true;
- brfiles[ndir] = br;
+ brfiles[ndir] = Util.getBufferedReader(szpathfile);//br;
+ brfiles[ndir].readLine();
+ brfiles[ndir].readLine();
}
}
StringTokenizer st = new StringTokenizer(chromfilescombine[nfile],"\t");
- String szcell = st.nextToken();
- String szchrom = st.nextToken();
+ String szcell = st.nextToken().trim();
+ String szchrom = st.nextToken().trim();
if (bsplit)
@@ -4038,12 +4201,24 @@ public class Preprocessing
}
}
- for (int ndir = 0; ndir < hmbrA.length; ndir++)
+
+
+ // for (int ndir = 0; ndir < hmbrA.length; ndir++)
+ //{
+ // BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
+ // if (br != null)
+ // {
+ // br.close();
+ // }
+ //}
+
+
+ for (int ndir = 0; ndir < brfiles.length; ndir++)
{
- BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
- if (br != null)
+ //BufferedReader br = (BufferedReader) hmbrA[ndir].get(szcurrfile);
+ if (brfiles[ndir] != null)
{
- br.close();
+ brfiles[ndir].close();
}
}
=====================================
edu/mit/compbio/ChromHMM/StateAnalysis.java
=====================================
@@ -248,6 +248,11 @@ public class StateAnalysis
int nactualindex = 0;
files = new String[nnonhiddencount];// dir.list();
+ if (nnonhiddencount == 0)
+ {
+ throw new IllegalArgumentException("No files found in "+szinputcoorddir);
+ }
+
for (int nfile = 0; nfile < filesWithHidden.length; nfile++)
{
if (!(new File(filesWithHidden[nfile])).isHidden())
@@ -307,7 +312,7 @@ public class StateAnalysis
if (szHeader == null)
throw new IllegalArgumentException(szposteriordir+"/"+szposteriorfiles_nfile+" is empty!");
StringTokenizer st =new StringTokenizer(szHeader,"\t");
- String szcurrcell = st.nextToken();
+ String szcurrcell = st.nextToken().trim();
if ((!szcurrcell.equals(szcell))&&(!szcell.equals("")))
{
brposterior.close();
@@ -315,7 +320,7 @@ public class StateAnalysis
else
{
bposteriorfound = true;
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
int numlines = 0;
szLine = brposterior.readLine();
@@ -327,7 +332,7 @@ public class StateAnalysis
int numcurrstates = st.countTokens();
if (bfirst)
{
- chorder = st.nextToken().charAt(0);
+ chorder = st.nextToken().trim().charAt(0);
bfirst = false;
nfirstindex = nfile;
numposteriorstates = numcurrstates;
@@ -360,7 +365,7 @@ public class StateAnalysis
float[] posterior_nline = posterior[nline];
for (int nstate = 0; nstate < numposteriorstates; nstate++)
{
- float fval = Float.parseFloat(st.nextToken());
+ float fval = Float.parseFloat(st.nextToken().trim());
posterior_nline[nstate] = fval;
tallylabel[nstate] += fval;
}
@@ -381,13 +386,13 @@ public class StateAnalysis
{
//gets the start and end coordinates
StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
- nchromindex = Integer.parseInt(stcolfields.nextToken());
- nstartindex = Integer.parseInt(stcolfields.nextToken());
- nendindex = Integer.parseInt(stcolfields.nextToken());
+ nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nstartindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nendindex = Integer.parseInt(stcolfields.nextToken().trim());
if (busesignal)
{
- nsignalindex = Integer.parseInt(stcolfields.nextToken());
+ nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
}
}
@@ -695,9 +700,9 @@ public class StateAnalysis
st = new StringTokenizer(szLine,"\t ");
}
- String szchrom = st.nextToken();
- int nbegincoord = Integer.parseInt(st.nextToken());
- int nendcoord = Integer.parseInt(st.nextToken());
+ String szchrom = st.nextToken().trim();
+ int nbegincoord = Integer.parseInt(st.nextToken().trim());
+ int nendcoord = Integer.parseInt(st.nextToken().trim());
if (nbegincoord % nbinsize != 0)
{
throw new IllegalArgumentException("Binsize of "+nbinsize+" does not agree with coordinates in input segment "+szLine+". -b binsize should match parameter value to LearnModel or "+
@@ -705,7 +710,7 @@ public class StateAnalysis
}
int nbegin = nbegincoord/nbinsize;
int nend = (nendcoord-1)/nbinsize;
- szlabel = st.nextToken();
+ szlabel = st.nextToken().trim();
short slabel = -1;
if (bstringlabels)
@@ -783,7 +788,7 @@ public class StateAnalysis
}
catch (NumberFormatException ex2)
{
- throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+ throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
}
}
@@ -872,6 +877,11 @@ public class StateAnalysis
int nactualindex = 0;
files = new String[nnonhiddencount];// dir.list();
+ if (nnonhiddencount == 0)
+ {
+ throw new IllegalArgumentException("No files found in "+szinputcoorddir);
+ }
+
for (int nfile = 0; nfile < filesWithHidden.length; nfile++)
{
if (!(new File(filesWithHidden[nfile])).isHidden())
@@ -925,13 +935,13 @@ public class StateAnalysis
if (szcolfields != null)
{
StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
- nchromindex = Integer.parseInt(stcolfields.nextToken());
- nstartindex = Integer.parseInt(stcolfields.nextToken());
- nendindex = Integer.parseInt(stcolfields.nextToken());
+ nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nstartindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nendindex = Integer.parseInt(stcolfields.nextToken().trim());
if (busesignal)
{
- nsignalindex = Integer.parseInt(stcolfields.nextToken());
+ nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
}
}
@@ -1244,6 +1254,11 @@ public class StateAnalysis
int nactualindex = 0;
files = new String[nnonhiddencount];// dir.list();
+ if (nnonhiddencount == 0)
+ {
+ throw new IllegalArgumentException("No files found in "+szinputcoorddir);
+ }
+
for (int nfile = 0; nfile < filesWithHidden.length; nfile++)
{
if (!(new File(filesWithHidden[nfile])).isHidden())
@@ -1309,9 +1324,9 @@ public class StateAnalysis
st = new StringTokenizer(szLine,"\t ");
}
- String szchrom = st.nextToken();
- int nbegincoord = Integer.parseInt(st.nextToken());
- int nendcoord = Integer.parseInt(st.nextToken());
+ String szchrom = st.nextToken().trim();
+ int nbegincoord = Integer.parseInt(st.nextToken().trim());
+ int nendcoord = Integer.parseInt(st.nextToken().trim());
if (nbegincoord % nbinsize != 0)
{
throw new IllegalArgumentException("Binsize of "+nbinsize+" does not agree with coordinates in input segment "+szLine+". -b binsize should match parameter value to LearnModel or "+
@@ -1319,7 +1334,7 @@ public class StateAnalysis
}
//int nbegin = nbegincoord/nbinsize;
int nend = (nendcoord-1)/nbinsize;
- szlabel = st.nextToken();
+ szlabel = st.nextToken().trim();
short slabel;
@@ -1398,7 +1413,7 @@ public class StateAnalysis
}
catch (NumberFormatException ex2)
{
- throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+ throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
}
}
@@ -1461,12 +1476,12 @@ public class StateAnalysis
st = new StringTokenizer(szLine,"\t ");
}
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
if (!szchrom.equals(szchromwant))
continue;
- int nbegincoord = Integer.parseInt(st.nextToken());
- int nendcoord = Integer.parseInt(st.nextToken());
+ int nbegincoord = Integer.parseInt(st.nextToken().trim());
+ int nendcoord = Integer.parseInt(st.nextToken().trim());
//if (nbegincoord % nbinsize != 0)
// {
@@ -1474,7 +1489,7 @@ public class StateAnalysis
//}
int nbegin = nbegincoord/nbinsize;
int nend = (nendcoord-1)/nbinsize;
- szlabel = st.nextToken();
+ szlabel = st.nextToken().trim();
short slabel = -1;
if (bstringlabels)
@@ -1528,7 +1543,7 @@ public class StateAnalysis
}
catch (NumberFormatException ex2)
{
- throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+ throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
}
}
}
@@ -1571,13 +1586,13 @@ public class StateAnalysis
if (szcolfields != null)
{
StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
- nchromindex = Integer.parseInt(stcolfields.nextToken());
- nstartindex = Integer.parseInt(stcolfields.nextToken());
- nendindex = Integer.parseInt(stcolfields.nextToken());
+ nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nstartindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nendindex = Integer.parseInt(stcolfields.nextToken().trim());
if (busesignal)
{
- nsignalindex = Integer.parseInt(stcolfields.nextToken());
+ nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
}
}
@@ -1854,8 +1869,8 @@ public class StateAnalysis
while ((szLine = bridlabel.readLine())!=null)
{
StringTokenizer st = new StringTokenizer(szLine,"\t");
- String szID = st.nextToken();
- String szLabelExtend = st.nextToken();
+ String szID = st.nextToken().trim();
+ String szLabelExtend = st.nextToken().trim();
hmlabelExtend.put(szID,szLabelExtend);
}
bridlabel.close();
@@ -1888,6 +1903,11 @@ public class StateAnalysis
nf5.setGroupingUsed(false);
nf5.setMinimumFractionDigits(5);
+ NumberFormat nf10 = NumberFormat.getInstance();
+ nf10.setMaximumFractionDigits(10);
+ nf10.setGroupingUsed(false);
+ nf10.setMinimumFractionDigits(10);
+
System.out.println("Writing to file "+szoutfile+".txt");
PrintWriter pw = new PrintWriter(new FileWriter(szoutfile+".txt"));
@@ -1963,7 +1983,7 @@ public class StateAnalysis
pw.print("Base\t100");
for (int nfile = 0; nfile < tallyoverlaplabel.length; nfile++)
{
- pw.print("\t"+nf5.format(100*(dsumoverlaplabel[nfile]/dsumlabel)));
+ pw.print("\t"+nf10.format(100*(dsumoverlaplabel[nfile]/dsumlabel)));
}
pw.println();
pw.close();
@@ -2134,10 +2154,10 @@ public class StateAnalysis
if (szcolfields != null)
{
StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
- nchromindex = Integer.parseInt(stcolfields.nextToken());
- npositionindex = Integer.parseInt(stcolfields.nextToken());
- nstrandindex = Integer.parseInt(stcolfields.nextToken());
- nsignalindex = Integer.parseInt(stcolfields.nextToken());
+ nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+ npositionindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nstrandindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
}
}
else if (busestrand && !busesignal)
@@ -2145,9 +2165,9 @@ public class StateAnalysis
if (szcolfields != null)
{
StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
- nchromindex = Integer.parseInt(stcolfields.nextToken());
- npositionindex = Integer.parseInt(stcolfields.nextToken());
- nstrandindex = Integer.parseInt(stcolfields.nextToken());
+ nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+ npositionindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nstrandindex = Integer.parseInt(stcolfields.nextToken().trim());
}
}
else if (!busestrand && busesignal)
@@ -2155,9 +2175,9 @@ public class StateAnalysis
if (szcolfields != null)
{
StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
- nchromindex = Integer.parseInt(stcolfields.nextToken());
- npositionindex = Integer.parseInt(stcolfields.nextToken());
- nsignalindex = Integer.parseInt(stcolfields.nextToken());
+ nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+ npositionindex = Integer.parseInt(stcolfields.nextToken().trim());
+ nsignalindex = Integer.parseInt(stcolfields.nextToken().trim());
}
else //if (szcolfields == null)
{
@@ -2169,8 +2189,8 @@ public class StateAnalysis
if (szcolfields != null)
{
StringTokenizer stcolfields = new StringTokenizer(szcolfields,",");
- nchromindex = Integer.parseInt(stcolfields.nextToken());
- npositionindex = Integer.parseInt(stcolfields.nextToken());
+ nchromindex = Integer.parseInt(stcolfields.nextToken().trim());
+ npositionindex = Integer.parseInt(stcolfields.nextToken().trim());
}
}
@@ -2211,6 +2231,7 @@ public class StateAnalysis
{
+ boolean bchrommatch = false;//added in 1.23 to check for chromosome matches
//an array of chromosome names
ArrayList alchromindex = new ArrayList();
@@ -2247,13 +2268,13 @@ public class StateAnalysis
{
st = new StringTokenizer(szLine,"\t ");
}
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
//assumes segments are in standard bed format which to get to
//0-based inclusive requires substract 1 from the end
- //int nbegin = Integer.parseInt(st.nextToken())/nbinsize;
- st.nextToken();
- int nend = (Integer.parseInt(st.nextToken())-1)/nbinsize;
- szlabel = st.nextToken();
+ //int nbegin = Integer.parseInt(st.nextToken().trim())/nbinsize;
+ st.nextToken().trim();
+ int nend = (Integer.parseInt(st.nextToken().trim())-1)/nbinsize;
+ szlabel = st.nextToken().trim();
short slabel;
if (bstringlabels)
@@ -2326,7 +2347,7 @@ public class StateAnalysis
}
catch (NumberFormatException ex2)
{
- throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+ throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
}
}
@@ -2403,14 +2424,16 @@ public class StateAnalysis
st = new StringTokenizer(szLine,"\t ");
}
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
if (!szchromwant.equals(szchrom))
continue;
+
+ bchrommatch = true;
//assumes segments are in standard bed format which to get to
//0-based inclusive requires substract 1 from the end
- int nbegin = Integer.parseInt(st.nextToken())/nbinsize;
- int nend = (Integer.parseInt(st.nextToken())-1)/nbinsize;
- szlabel = st.nextToken();
+ int nbegin = Integer.parseInt(st.nextToken().trim())/nbinsize;
+ int nend = (Integer.parseInt(st.nextToken().trim())-1)/nbinsize;
+ szlabel = st.nextToken().trim();
short slabel = -1;
if (bstringlabels)
@@ -2464,7 +2487,7 @@ public class StateAnalysis
}
catch (NumberFormatException ex2)
{
- throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+ throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
}
}
}
@@ -2575,6 +2598,12 @@ public class StateAnalysis
brcoords.close();
}
+ if (!bchrommatch)
+ {
+ throw new IllegalArgumentException("No chromosome name matches found between "+szanchorpositions+
+ " and those in the segmentation file.");
+ }
+
outputneighborhood(tallyoverlaplabel,tallylabel,dsumoverlaplabel,szoutfile,nspacing,numright,
numleft,theColor,ChromHMM.convertCharOrderToStringOrder(szlabel.charAt(0)),sztitle,0,
szlabelmapping,szlabel.charAt(0), bprintimage, bstringlabels, hmIndexToLabel);
@@ -2610,6 +2639,7 @@ public class StateAnalysis
String sztitle,String szlabelmapping, boolean bprintimage,boolean bstringlabels) throws IOException
{
//highmem
+ boolean bchrommatch = false;//added in 1.23 to check for chromosome matches
//stores all the segments in the data
ArrayList alsegments = new ArrayList();
@@ -2645,12 +2675,12 @@ public class StateAnalysis
{
st = new StringTokenizer(szLine,"\t ");
}
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
//assumes segments are in standard bed format which to get to
//0-based inclusive requires substract 1 from the end
- int nbegin = Integer.parseInt(st.nextToken())/nbinsize;
- int nend = (Integer.parseInt(st.nextToken())-1)/nbinsize;
- szlabel = st.nextToken();
+ int nbegin = Integer.parseInt(st.nextToken().trim())/nbinsize;
+ int nend = (Integer.parseInt(st.nextToken().trim())-1)/nbinsize;
+ szlabel = st.nextToken().trim();
short slabel = -1;
if (bstringlabels)
@@ -2728,7 +2758,7 @@ public class StateAnalysis
}
catch (NumberFormatException ex2)
{
- throw new IllegalArgumentException("In fourth column neither state number or ID file found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
+ throw new IllegalArgumentException("In fourth column neither state number or ID found in segmentation file. Use '-labels' option to run overlap enrichment treating fourth column as labels");
}
}
@@ -2854,6 +2884,7 @@ public class StateAnalysis
{
int nchrom = objChrom.intValue();
short[] labels_nchrom = labels[nchrom];
+ bchrommatch = true;
if (bposstrand)
{
@@ -2887,6 +2918,13 @@ public class StateAnalysis
}
brcoords.close();
+
+ if (!bchrommatch)
+ {
+ throw new IllegalArgumentException("No chromosome name matches found between "+szanchorpositions+
+ " and those in the segmentation file.");
+ }
+
outputneighborhood(tallyoverlaplabel,tallylabel,dsumoverlaplabel,szoutfile,nspacing,numright,
numleft,theColor,ChromHMM.convertCharOrderToStringOrder(szlabel.charAt(0)),sztitle,0,
szlabelmapping,szlabel.charAt(0),bprintimage, bstringlabels, hmIndexToLabel);
@@ -2920,6 +2958,8 @@ public class StateAnalysis
boolean busestrand, boolean busesignal,String szcolfields, int noffsetanchor,
String szoutfile,Color theColor,String sztitle,String szlabelmapping, boolean bprintimage) throws IOException
{
+
+ boolean bchrommatch = false;//added in 1.23 to check for chromosome matches
//posterior here is really signal just using equivalent variable names
//list of possible posterior files
File posteriordir = new File(szposteriordir);
@@ -2959,7 +2999,7 @@ public class StateAnalysis
throw new IllegalArgumentException(szposteriordir+"/"+szposteriorfiles_nfile+" is empty!");
}
StringTokenizer st =new StringTokenizer(szLine,"\t");
- String szcurrcell = st.nextToken();
+ String szcurrcell = st.nextToken().trim();
if ((!szcurrcell.equals(szcell))&&(!szcell.equals("")))
{
brposterior.close();
@@ -2970,7 +3010,7 @@ public class StateAnalysis
//must match cell type or consistent with empty cell type
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
int numlines = 0;
szLine = brposterior.readLine(); //gets state header
@@ -2982,7 +3022,7 @@ public class StateAnalysis
int numcurrstates = st.countTokens();
if (bfirst)
{
- chorder = st.nextToken().charAt(0);
+ chorder = st.nextToken().trim().charAt(0);
bfirst = false;
nfirstindex = nfile;
numposteriorstates = numcurrstates;
@@ -3036,6 +3076,7 @@ public class StateAnalysis
if (szreadchrom.equals(szchrom))
{
+ bchrommatch = true;
int nanchor = (Integer.parseInt(szLineA[theAnchorIndex.npositionindex])-noffsetanchor);
boolean bposstrand = true;
if (busestrand)
@@ -3120,6 +3161,12 @@ public class StateAnalysis
{
throw new IllegalArgumentException("No posterior file found for cell type "+szcell);
}
+
+ if (!bchrommatch)
+ {
+ throw new IllegalArgumentException("No chromosome name matches found between "+szanchorpositions+
+ " and those in the segmentation file.");
+ }
outputneighborhoodsignal(tallyoverlaplabel,tallylabel,dsumoverlaplabel,numlocs,szoutfile,nspacing,numright,
numleft,theColor,ChromHMM.convertCharOrderToStringOrder(chorder),sztitle,szmarknames,szlabelmapping,chorder,bprintimage);
@@ -3154,6 +3201,7 @@ public class StateAnalysis
String szoutfile,Color theColor,String sztitle,String szlabelmapping, boolean bprintimage) throws IOException
{
+ boolean bchrommatch = false;//added in 1.23 to check for chromosome matches
//list of possible posterior files
File posteriordir = new File(szposteriordir);
if (!posteriordir.exists())
@@ -3191,7 +3239,7 @@ public class StateAnalysis
throw new IllegalArgumentException(szposteriordir+"/"+szposteriorfiles_nfile+" is empty!");
}
StringTokenizer st =new StringTokenizer(szLine,"\t");
- String szcurrcell = st.nextToken();
+ String szcurrcell = st.nextToken().trim();
if ((!szcurrcell.equals(szcell))&&(!szcell.equals("")))
{
brposterior.close();
@@ -3202,7 +3250,7 @@ public class StateAnalysis
//must match cell type or consistent with empty cell type
- String szchrom = st.nextToken();
+ String szchrom = st.nextToken().trim();
int numlines = 0;
szLine = brposterior.readLine(); //gets state header
@@ -3214,7 +3262,7 @@ public class StateAnalysis
int numcurrstates = st.countTokens();
if (bfirst)
{
- chorder = st.nextToken().charAt(0);
+ chorder = st.nextToken().trim().charAt(0);
bfirst = false;
nfirstindex = nfile;
numposteriorstates = numcurrstates;
@@ -3268,6 +3316,7 @@ public class StateAnalysis
if (szreadchrom.equals(szchrom))
{
+ bchrommatch = true;
int nanchor = (Integer.parseInt(szLineA[theAnchorIndex.npositionindex])-noffsetanchor);
boolean bposstrand = true;
if (busestrand)
@@ -3350,6 +3399,12 @@ public class StateAnalysis
{
throw new IllegalArgumentException("No posterior file found for cell type "+szcell);
}
+
+ if (!bchrommatch)
+ {
+ throw new IllegalArgumentException("No chromosome name matches found between "+szanchorpositions+
+ " and those in the segmentation file.");
+ }
outputneighborhood(tallyoverlaplabel,tallylabel,dsumoverlaplabel,szoutfile,nspacing,numright,
numleft,theColor,ChromHMM.convertCharOrderToStringOrder(chorder),sztitle,1,
@@ -3442,10 +3497,13 @@ public class StateAnalysis
//copying over the contents
heatmapreduce[nkeepindex][ncol] = heatmapfold[nstate][ncol];
}
- rowlabels[nkeepindex] = stheader.nextToken();//""+(nstate+1);
+ rowlabels[nkeepindex] = stheader.nextToken().trim();//""+(nstate+1);
nkeepindex++;
}
- stheader.nextToken();
+ else //added in 1.23
+ {
+ stheader.nextToken();
+ }
}
heatmapfold = heatmapreduce;
}
@@ -3454,7 +3512,7 @@ public class StateAnalysis
rowlabels = new String[tallyoverlaplabel[0].length];
for (int ni = 0; ni < rowlabels.length; ni++)
{
- rowlabels[ni] = ""+stheader.nextToken();
+ rowlabels[ni] = ""+stheader.nextToken().trim();
}
}
@@ -3672,11 +3730,11 @@ public class StateAnalysis
//maps each mark identifier to a consistent indext value
HashMap hmNameToID = new HashMap();
- String szaxis = stheader.nextToken();
+ String szaxis = stheader.nextToken().trim();
int ncol = 0;
while (stheader.hasMoreTokens())
{
- hmNameToID.put(stheader.nextToken(), Integer.valueOf(ncol));
+ hmNameToID.put(stheader.nextToken().trim(), Integer.valueOf(ncol));
ncol++;
}
@@ -3703,10 +3761,10 @@ public class StateAnalysis
throw new IllegalArgumentException("Expecting "+numstatesmain+" lines in "+szmainmodelfile+" found fewer.");
}
StringTokenizer stLine = new StringTokenizer(szLine,"\t");
- rowlabels[nstate] = stLine.nextToken();
+ rowlabels[nstate] = stLine.nextToken().trim();
for (ncol = 0; ncol < numcols; ncol++)
{
- emissionparamsmain[nstate][ncol] = Double.parseDouble(stLine.nextToken());
+ emissionparamsmain[nstate][ncol] = Double.parseDouble(stLine.nextToken().trim());
}
}
bremissions.close();
@@ -3759,7 +3817,7 @@ public class StateAnalysis
ncol = 0;
while (stheader.hasMoreTokens())
{
- mappedcol[ncol] = ((Integer) hmNameToID.get(stheader.nextToken())).intValue();
+ mappedcol[ncol] = ((Integer) hmNameToID.get(stheader.nextToken().trim())).intValue();
ncol++;
}
}
@@ -3779,7 +3837,7 @@ public class StateAnalysis
for (ncol = 0; ncol < numcols; ncol++)
{
//storing the emission parameter for
- theRecEmissionFile.emissionparams[nstate][mappedcol[ncol]] = Double.parseDouble(stLine.nextToken());
+ theRecEmissionFile.emissionparams[nstate][mappedcol[ncol]] = Double.parseDouble(stLine.nextToken().trim());
}
}
//adds a record for this emission file containing the parameters, number of states, and number of marks
View it on GitLab: https://salsa.debian.org/med-team/chromhmm/-/commit/bd371febdf5c385e3d51c83ff7a2ec0dfcde1f12
--
View it on GitLab: https://salsa.debian.org/med-team/chromhmm/-/commit/bd371febdf5c385e3d51c83ff7a2ec0dfcde1f12
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210907/f09d3a1f/attachment-0001.htm>
More information about the debian-med-commit
mailing list