[med-svn] [Git][med-team/chromhmm][master] 7 commits: d/watch: Fix watch regex
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Sun Jun 6 15:54:02 BST 2021
Nilesh Patra pushed to branch master at Debian Med / chromhmm
Commits:
c93dc38d by Nilesh Patra at 2021-06-06T20:00:11+05:30
d/watch: Fix watch regex
- - - - -
9d430d6f by Nilesh Patra at 2021-06-06T20:05:38+05:30
d/watch: Better regex (allow both v and V for version number)
- - - - -
8c0a88e0 by Nilesh Patra at 2021-06-06T20:07:06+05:30
New upstream version 1.22+dfsg
- - - - -
b4e4258d by Nilesh Patra at 2021-06-06T20:07:31+05:30
Update upstream source from tag 'upstream/1.22+dfsg'
Update to upstream version '1.22+dfsg'
with Debian dir 0c32f1bec6c7b49935408fc63c37d8174988869e
- - - - -
7909066f by Nilesh Patra at 2021-06-06T20:12:23+05:30
Declare compliance with policy 4.5.1
- - - - -
ca56d69b by Nilesh Patra at 2021-06-06T20:17:07+05:30
Update d/u/metadata
- - - - -
54d24f85 by Nilesh Patra at 2021-06-06T14:53:29+00:00
Interim changelog entry
- - - - -
7 changed files:
- debian/changelog
- debian/control
- debian/upstream/metadata
- debian/watch
- edu/mit/compbio/ChromHMM/ChromHMM.java
- edu/mit/compbio/ChromHMM/ConvertGeneTable.java
- edu/mit/compbio/ChromHMM/Preprocessing.java
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,13 @@
+chromhmm (1.22+dfsg-1) UNRELEASED; urgency=medium
+
+ * Team Upload.
+ * d/watch: Fix watch regex
+ * New upstream version 1.22+dfsg
+ * Declare compliance with policy 4.5.1
+ * Update d/u/metadata
+
+ -- Nilesh Patra <nilesh at debian.org> Sun, 06 Jun 2021 20:12:58 +0530
+
chromhmm (1.21+dfsg-1) unstable; urgency=medium
* New upstream version
=====================================
debian/control
=====================================
@@ -9,7 +9,7 @@ Build-Depends: debhelper-compat (= 13),
libbatik-java,
libjheatchart-java
Build-Depends-Indep: default-jdk
-Standards-Version: 4.5.0
+Standards-Version: 4.5.1
Vcs-Browser: https://salsa.debian.org/med-team/chromhmm
Vcs-Git: https://salsa.debian.org/med-team/chromhmm.git
Homepage: http://compbio.mit.edu/ChromHMM/
=====================================
debian/upstream/metadata
=====================================
@@ -1,3 +1,10 @@
+---
+Archive: GitHub
+Bug-Database: https://github.com/jernst98/ChromHMM/issues
+Bug-Submit: https://github.com/jernst98/ChromHMM/issues/new
+Changelog: https://github.com/jernst98/ChromHMM/tags
+Repository: https://github.com/jernst98/ChromHMM.git
+Repository-Browse: https://github.com/jernst98/ChromHMM
Contact: Jason Ernst <jason.ernst at ucla.edu>
Name: ChromHMM
Reference:
=====================================
debian/watch
=====================================
@@ -1,4 +1,4 @@
version=4
opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
- https://github.com/jernst98/ChromHMM/releases .*/archive/v?@ANY_VERSION@@ARCHIVE_EXT@
+ https://github.com/jernst98/ChromHMM/releases .*/archive/.*/[vV]?@ANY_VERSION@@ARCHIVE_EXT@
=====================================
edu/mit/compbio/ChromHMM/ChromHMM.java
=====================================
@@ -2911,7 +2911,7 @@ public class ChromHMM
}
chorder = st.nextToken().charAt(0);
- if ((nstateorder != ChromHMM.STATEORDER_TRANSITION)&&(nstateorder != ChromHMM.STATEORDER_EMISSION))
+ if ((nstateorder != ChromHMM.STATEORDER_TRANSITION)&&(nstateorder != ChromHMM.STATEORDER_EMISSION)&&(nstateorder != ChromHMM.STATEORDER_FIXED))
{
nstateorder = -1;
for (int ni = 0; ni < ChromHMM.ORDERCHARS.length; ni++)
@@ -7029,7 +7029,7 @@ public class ChromHMM
double[][] alpha = new double[data.length][numstates];
//Temporary storage of the gamma's for each state
- double[][] gamma = new double[data.length][numstates];
+ //double[][] gamma = new double[data.length][numstates];
//Temporary storage of the beta values for each state
double[] beta_nt = new double[numstates];
@@ -12848,10 +12848,11 @@ public class ChromHMM
if (szcommand.equalsIgnoreCase("Version"))
{
- System.out.println("This is Version 1.21 of ChromHMM (c) Copyright 2008-2012 Massachusetts Institute of Technology");
+ System.out.println("This is Version 1.22 of ChromHMM (c) Copyright 2008-2012 Massachusetts Institute of Technology");
}
else if ((szcommand.equals("BinarizeBam"))||(szcommand.equalsIgnoreCase("BinarizeBed")))
{
+ boolean bstacked = false;
boolean bgzip = false;
boolean bpairend = false;
boolean bsplit = false;
@@ -12877,6 +12878,12 @@ public class ChromHMM
boolean bshift = false;
boolean bflagsplitbins = false;
+ boolean bflagsplitcols = false;
+
+ boolean bsplitcols = false;
+ int nnummarksplit = 10;
+ int nmarksplitindex = -1;
+
int nargindex = 1;
if (args.length <= 4)
{
@@ -12926,6 +12933,16 @@ public class ChromHMM
bflagsplitbins = true;
//bsplit = true;
}
+ else if (args[nargindex].equals("-k"))
+ {
+ nmarksplitindex = Integer.parseInt(args[++nargindex]);
+ bflagsplitcols = true;
+ }
+ else if (args[nargindex].equals("-m"))
+ {
+ nnummarksplit = Integer.parseInt(args[++nargindex]);
+ bflagsplitcols = true;
+ }
else if (args[nargindex].equals("-n"))
{
nshift = Integer.parseInt(args[++nargindex]);
@@ -12955,6 +12972,14 @@ public class ChromHMM
{
bsplit = true;
}
+ else if (args[nargindex].equals("-splitcols"))
+ {
+ bsplitcols = true;
+ }
+ else if (args[nargindex].equals("-stacked"))
+ {
+ bstacked = true;
+ }
else if (args[nargindex].equals("-strictthresh"))
{
bcontainsthresh = false;
@@ -13015,6 +13040,12 @@ public class ChromHMM
bok = false;
}
+ if ((!bsplitcols)&&(bflagsplitcols))
+ {
+ bok = false;
+ }
+
+
if ((bok)&&(nargindex == args.length-4))
{
@@ -13037,26 +13068,208 @@ public class ChromHMM
szcontroldir = szmarkdir;
}
+
+
+ int nmaxsplit = 0;
+ if (bsplitcols)
+ {
+ String szLineCellMark;
+ int ncellmarkline = 0;
+
+ BufferedReader brcellmark = Util.getBufferedReader(szcellmarkfiletable);
+ HashSet hscombo = new HashSet();
+
+ while ((szLineCellMark=brcellmark.readLine()) != null)
+ {
+ if (szLineCellMark.trim().equals("")) continue;
+
+ StringTokenizer stcellmark = new StringTokenizer(szLineCellMark,"\t");
+
+ if (stcellmark.countTokens() < 3)
+ {
+ throw new IllegalArgumentException("In "+szcellmarkfiletable+" "+szLineCellMark+" does not have at least three columns");
+ }
+ String szcell = stcellmark.nextToken().trim(); //added trim in v1.20 to remove leading and trailing white space
+ String szmark = stcellmark.nextToken().trim();
+
+ if (hscombo.contains(szcell+"\t"+szmark))
+ {
+ throw new IllegalArgumentException("In "+szcellmarkfiletable+" "+szcell+"\t"+szmark+" found twice, but -splitcols specified");
+ }
+ hscombo.add(szcell+"\t"+szmark);
+
+ if (ncellmarkline % nnummarksplit == 0)
+ {
+ nmaxsplit++;
+ }
+ ncellmarkline++;
+ }
+ brcellmark.close();
+ }
+
if (nsplitindex >= 0)
{
- if (bpeaks)
- {
- Preprocessing.makeBinaryDataFromPeaksSplit(szchromlengthfile, szmarkdir, szoutputbinarydir, szcellmarkfiletable,
- nbinsize, bgzip, numsplitbins, nsplitindex, noffsetleft,noffsetright);
- }
- else
- {
- bok = false;
- }
+ if (bpeaks)
+ {
+ if ((bsplitcols)&&(nmarksplitindex==-1))
+ {
+ //need to figure out maxsplit
+
+ for (nmarksplitindex = 0; nmarksplitindex < nmaxsplit; nmarksplitindex++)
+ {
+ String szoutputbinarydirsub = szoutputbinarydir+"/SET"+(nmarksplitindex+1);
+ f = new File(szoutputbinarydirsub);
+ if (!f.exists())
+ {
+ if (!f.mkdirs())
+ {
+ throw new IllegalArgumentException(szoutputbinarydirsub+" does not exist and could not be created!");
+ }
+ }
+
+ Preprocessing.makeBinaryDataFromPeaksSplit(szchromlengthfile, szmarkdir, szoutputbinarydir, szcellmarkfiletable,
+ nbinsize, bgzip, numsplitbins, nsplitindex, noffsetleft,noffsetright,
+ bsplitcols,nnummarksplit,nmarksplitindex,bstacked);
+ }
+ }
+ else
+ {
+ if (bsplitcols)
+ {
+ if (nmarksplitindex >= nmaxsplit)
+ {
+ throw new IllegalArgumentException("nmarksplitindex"+" value of "+nmarksplitindex+" is greater than the maximum of "+(nmaxsplit-1));
+ }
+ szoutputbinarydir = szoutputbinarydir+"/SET"+(nmarksplitindex+1);
+ f = new File(szoutputbinarydir);
+ if (!f.exists())
+ {
+ if (!f.mkdirs())
+ {
+ throw new IllegalArgumentException(szoutputbinarydir+" does not exist and could not be created!");
+ }
+ }
+ }
+
+ Preprocessing.makeBinaryDataFromPeaksSplit(szchromlengthfile, szmarkdir, szoutputbinarydir, szcellmarkfiletable,
+ nbinsize, bgzip, numsplitbins, nsplitindex, noffsetleft,noffsetright,
+ bsplitcols,nnummarksplit,nmarksplitindex, bstacked);
+ }
+ }
+ else
+ {
+ bok = false;
+ }
}
else
{
- Preprocessing.makeBinaryDataFromBed(szchromlengthfile,szmarkdir,szcontroldir,nflankwidthcontrol,szcellmarkfiletable,
+
+ if ((bsplitcols)&&(nmarksplitindex==-1))
+ {
+ //going through all
+ //need to figure out maxsplit
+
+ for (nmarksplitindex = 0; nmarksplitindex < nmaxsplit; nmarksplitindex++)
+ {
+ String szoutputbinarydirsub = szoutputbinarydir+"/SET"+(nmarksplitindex+1);
+ f = new File(szoutputbinarydirsub);
+ if (!f.exists())
+ {
+ if (!f.mkdirs())
+ {
+ throw new IllegalArgumentException(szoutputbinarydirsub+" does not exist and could not be created!");
+ }
+ }
+
+ String szoutputsignaldirsub = null;
+ if (szoutputsignaldir != null)
+ {
+ szoutputsignaldirsub = szoutputsignaldir+"/SET"+(nmarksplitindex+1);
+ f = new File(szoutputsignaldirsub);
+ if (!f.exists())
+ {
+ if (!f.mkdirs())
+ {
+ throw new IllegalArgumentException(szoutputsignaldirsub+" does not exist and could not be created!");
+ }
+ }
+ }
+
+ String szoutputcontroldirsub = null;
+ if (szoutputcontroldirsub != null)
+ {
+ szoutputcontroldirsub = szoutputcontroldir+"/SET"+(nmarksplitindex+1);
+ f = new File(szoutputcontroldirsub);
+ if (!f.exists())
+ {
+ if (!f.mkdirs())
+ {
+ throw new IllegalArgumentException(szoutputcontroldirsub+" does not exist and could not be created!");
+ }
+ }
+ }
+
+ Preprocessing.makeBinaryDataFromBed(szchromlengthfile,szmarkdir,szcontroldir,nflankwidthcontrol,szcellmarkfiletable,
+ nshift,bcenterinterval, noffsetleft,noffsetright,szoutputsignaldir,
+ szoutputbinarydirsub,szoutputcontroldir,
+ dpoissonthresh,dfoldthresh,bcontainsthresh,
+ npseudocountcontrol,nbinsize,szcolfields,bpeaks, dcountthresh,szcommand.equalsIgnoreCase("BinarizeBam"),
+ bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex, bstacked);
+ }
+ }
+ else
+ {
+ if (bsplitcols)
+ {
+ if (nmarksplitindex >= nmaxsplit)
+ {
+ throw new IllegalArgumentException("nmarksplitindex"+" value of "+nmarksplitindex+" is greater than the maximum of "+(nmaxsplit-1));
+ }
+ szoutputbinarydir = szoutputbinarydir+"/SET"+(nmarksplitindex+1);
+ f = new File(szoutputbinarydir);
+ if (!f.exists())
+ {
+ if (!f.mkdirs())
+ {
+ throw new IllegalArgumentException(szoutputbinarydir+" does not exist and could not be created!");
+ }
+ }
+
+ if (szoutputsignaldir != null)
+ {
+ szoutputsignaldir = szoutputsignaldir+"/SET"+(nmarksplitindex+1);
+ f = new File(szoutputsignaldir);
+ if (!f.exists())
+ {
+ if (!f.mkdirs())
+ {
+ throw new IllegalArgumentException(szoutputsignaldir+" does not exist and could not be created!");
+ }
+ }
+ }
+
+ if (szoutputcontroldir != null)
+ {
+ szoutputcontroldir = szoutputcontroldir+"/SET"+(nmarksplitindex+1);
+ f = new File(szoutputcontroldir);
+ if (!f.exists())
+ {
+ if (!f.mkdirs())
+ {
+ throw new IllegalArgumentException(szoutputcontroldir+" does not exist and could not be created!");
+ }
+ }
+ }
+ }
+
+ Preprocessing.makeBinaryDataFromBed(szchromlengthfile,szmarkdir,szcontroldir,nflankwidthcontrol,szcellmarkfiletable,
nshift,bcenterinterval, noffsetleft,noffsetright,szoutputsignaldir,
szoutputbinarydir,szoutputcontroldir,
dpoissonthresh,dfoldthresh,bcontainsthresh,
npseudocountcontrol,nbinsize,szcolfields,bpeaks, dcountthresh,szcommand.equalsIgnoreCase("BinarizeBam"),
- bpairend, bgzip, bsplit, numsplitbins);
+ bpairend, bgzip, bsplit, numsplitbins, bsplitcols,nnummarksplit,nmarksplitindex,bstacked);
+
+ }
}
}
else
@@ -13071,15 +13284,15 @@ public class ChromHMM
{
//v1.18 update
System.out.println("usage BinarizeBed [-b binsize][-c controldir][-center][-colfields chromosome,start,end[,strand]][-e offsetend][-f foldthresh]"+
- "[-g signalthresh][-gzip][-n shift][-o outputcontroldir][-p poissonthresh][-peaks [-i splitindex]][-s offsetstart][-splitrows [-j numsplitbins]][-strictthresh][-t outputsignaldir]"+
+ "[-g signalthresh][-gzip][-n shift][-o outputcontroldir][-p poissonthresh][-peaks [-i splitrowindex]][-s offsetstart][-splitcols [-k splitcolindex][-m numsplitcols]][-splitrows [-j numsplitbins]][-stacked][-strictthresh][-t outputsignaldir]"+
"[-u pseudocountcontrol][-w flankwidthcontrol] "+
"chromosomelengthfile inputbeddir cellmarkfiletable outputbinarydir");
}
else
{
System.out.println("usage BinarizeBam [-b binsize][-c controldir][-e offsetend][-f foldthresh]"+
- "[-g signalthresh][-gzip][-o outputcontroldir][-p poissonthresh][-paired|[-center][-n shift][-peaks [-i splitindex]]"+
- "[-s offsetstart][-splitrows [-j numsplitbins]][-strictthresh][-t outputsignaldir]"+
+ "[-g signalthresh][-gzip][[-o outputcontroldir][-p poissonthresh][-paired|[-center][-n shift][-peaks [-i splitindex]]"+
+ "[-s offsetstart][-splitcols [-k splitcolindex][-m numsplitcols]][-splitrows [-j numsplitbins]][-stacked][-strictthresh][-t outputsignaldir]"+
"[-u pseudocountcontrol][-w flankwidthcontrol] "+
"chromosomelengthfile inputbamdir cellmarkfiletable outputbinarydir");
}
@@ -13217,7 +13430,7 @@ public class ChromHMM
int numsplitbins = ChromHMM.DEFAULT_NUMSPLITBINS;
boolean bsplit = false;
String szdirlistfile = null;
-
+ String sztype = "binary";
String szinputdir = null;
String szoutputdir = null;
boolean bflagsplitbins = false;
@@ -13237,6 +13450,10 @@ public class ChromHMM
{
szdirlistfile = args[++nargindex];
}
+ else if (args[nargindex].equals("-t"))
+ {
+ sztype = args[++nargindex];
+ }
else if (args[nargindex].equals("-j"))
{
numsplitbins = Integer.parseInt(args[++nargindex]);
@@ -13282,8 +13499,7 @@ public class ChromHMM
throw new IllegalArgumentException(szoutputdir+" does not exist and could not be created!");
}
}
-
- Preprocessing.mergeBinarizedFiles(szinputdir, szoutputdir, szdirlistfile, bsplit, numsplitbins, bgzip);
+ Preprocessing.mergeBinarizedFiles(szinputdir, szoutputdir, szdirlistfile, bsplit, numsplitbins, bgzip, sztype);
}
else
{
@@ -13293,7 +13509,7 @@ public class ChromHMM
if (!bok)
{
- System.out.println("usage MergeBinary [-f dirlistfile][-gzip][-splitrows [-j numsplitbins]] inputdir outputdir");
+ System.out.println("usage MergeBinary [-f dirlistfile][-gzip][-splitrows [-j numsplitbins]][-t type] inputdir outputdir");
}
}
else if (szcommand.equalsIgnoreCase("CompareModels"))
@@ -13366,7 +13582,7 @@ public class ChromHMM
if (!bok)
{
- System.out.println("usage CompareModels [-color r,g,b][-noimage] referencemodel comparedir outputprefix");
+ System.out.println("usage CompareModels [-color r,g,b][-noimage] referencemodelemissions comparedir outputprefix");
}
}
else if (szcommand.equalsIgnoreCase("StatePruning"))
@@ -14341,6 +14557,7 @@ public class ChromHMM
else if (args[nargindex].equals("-holdroworder"))
{
bnoorderrows = true;
+ nstateorder = ChromHMM.STATEORDER_FIXED;
}
else if ((args[nargindex].equals("-printstatebyline"))||(args[nargindex].equals("-printstatesbyline")))
{
=====================================
edu/mit/compbio/ChromHMM/ConvertGeneTable.java
=====================================
@@ -53,14 +53,23 @@ public class ConvertGeneTable
//String szchromlengths = args[4];
String szLine;
HashMap hmlengths = new HashMap();
- BufferedReader brlength = new BufferedReader(new FileReader(szchromlengths));
+ //BufferedReader brlength = new BufferedReader(new FileReader(szchromlengths));
+ BufferedReader brlength = Util.getBufferedReader(szchromlengths);
while ((szLine = brlength.readLine())!=null)
{
- StringTokenizer st = new StringTokenizer(szLine,"\t");
+ StringTokenizer st = new StringTokenizer(szLine,"\t ");
+ if (st.countTokens()==0)
+ {
+ throw new IllegalArgumentException("empty line found in "+szchromlengths);
+ }
+ else if (st.countTokens()==1)
+ {
+ throw new IllegalArgumentException("Only one token found in line "+szLine+" of "+szchromlengths);
+ }
hmlengths.put(st.nextToken(),Integer.valueOf(st.nextToken()));
}
brlength.close();
- BufferedReader br = new BufferedReader(new FileReader(sztable));
+ BufferedReader br = Util.getBufferedReader(sztable);
PrintWriter pwtss = null;
@@ -162,8 +171,11 @@ public class ConvertGeneTable
if (!szexonStarts.equals("\t"))
st.nextToken();
String szexonEnds = st.nextToken();
- if (!szexonEnds.equals("\t"))
- st.nextToken();
+
+ //removed 1.22
+ //if (!szexonEnds.equals("\t"))
+ // st.nextToken();
+
//String szscore = st.nextToken();
//if (!szscore.equals("\t"))
//st.nextToken();
=====================================
edu/mit/compbio/ChromHMM/Preprocessing.java
=====================================
@@ -524,7 +524,8 @@ public class Preprocessing
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
public static void makeBinaryDataFromPeaksSplit(String szchromlengthfile, String szmarkdir, String szoutputbinarydir, String szcellmarkfiletable,
- int nbinsize, boolean bgzip, int numsplitbins, int nsplit, int noffsetleft, int noffsetright) throws IOException
+ int nbinsize, boolean bgzip, int numsplitbins, int nsplit, int noffsetleft, int noffsetright,
+ boolean bsplitcols,int nnummarksplit,int nmarksplitindex, boolean bstacked) throws IOException
{
//reads in the chromosome length information file
//the first column of this file is the chromosome and the second is the chromsome length
@@ -567,29 +568,47 @@ public class Preprocessing
HashSet hsmarks = new HashSet(); //contains the names of all marks
HashMap hmfiles = new HashMap(); //contains a mapping from (cell type, mark) to the regular data file
+
+ int ncellmarkentry = 0;
+ int ncolsplitbegin = nnummarksplit*(nmarksplitindex);
+ int ncolsplitend = nnummarksplit*(nmarksplitindex+1);
+
while ((szLine = brcellmark.readLine())!=null)
{
if (szLine.trim().equals("")) continue;
- StringTokenizer st = new StringTokenizer(szLine,"\t");
- if (st.countTokens() != 3)
- {
- throw new IllegalArgumentException("In "+szcellmarkfiletable+" "+szLine+" does not have 3 columns, "+
+
+ if ((!bsplitcols) || ((ncolsplitbegin<=ncellmarkentry)&&(ncellmarkentry<ncolsplitend)))
+ {
+ StringTokenizer st = new StringTokenizer(szLine,"\t");
+
+ if (st.countTokens() != 3)
+ {
+ throw new IllegalArgumentException("In "+szcellmarkfiletable+" "+szLine+" does not have 3 columns, "+
"expecting 3 columns since peaks was specified");
- }
- String szcell = st.nextToken().trim(); //added trim in v1.20 to remove leading and trailing white space
- String szmark = st.nextToken().trim();
- String szfile = st.nextToken().trim();
+ }
+ String szcell = st.nextToken().trim(); //added trim in v1.20 to remove leading and trailing white space
+ String szmark = st.nextToken().trim();
- hscells.add(szcell);
- hsmarks.add(szmark);
+ if (bstacked)
+ {
+ szcell = "genome";
+ szmark = szcell + "_" + szmark;
+ }
- ArrayList alfiles = (ArrayList) hmfiles.get(szcell+"\t"+szmark);
- if (alfiles == null)
- {
- alfiles = new ArrayList();
- hmfiles.put(szcell+"\t"+szmark,alfiles);
+ String szfile = st.nextToken().trim();
+
+ hscells.add(szcell);
+ hsmarks.add(szmark);
+
+ ArrayList alfiles = (ArrayList) hmfiles.get(szcell+"\t"+szmark);
+ if (alfiles == null)
+ {
+ alfiles = new ArrayList();
+ hmfiles.put(szcell+"\t"+szmark,alfiles);
+ }
+ alfiles.add(szfile);
}
- alfiles.add(szfile);
+ ncellmarkentry++;
}
brcellmark.close();
@@ -834,7 +853,8 @@ public class Preprocessing
String szoutputsignaldir,String szoutputbinarydir, String szoutputcontroldir,
double dpoissonthresh, double dfoldthresh,boolean bcontainsthresh, int npseudocountcontrol,int nbinsize,
String szcolfields, boolean bpeaks, double dcountthresh, boolean bbinarizebam, boolean bpairend,
- boolean bgzip, boolean bsplit, int numsplitbins
+ boolean bgzip, boolean bsplit, int numsplitbins,
+ boolean bsplitcols,int nnummarksplit,int nmarksplitindex, boolean bstacked
) throws IOException
{
@@ -880,60 +900,76 @@ public class Preprocessing
boolean bcontrol = false; //whether there is control data at all
String szcontrolfile;
+ int ncellmarkentry = 0;
+ int ncolsplitbegin = nnummarksplit*(nmarksplitindex);
+ int ncolsplitend = nnummarksplit*(nmarksplitindex+1);
+
while ((szLine = brcellmark.readLine())!=null)
{
if (szLine.trim().equals("")) continue;
- StringTokenizer st = new StringTokenizer(szLine,"\t");
- if (st.countTokens() < 3)
- {
- throw new IllegalArgumentException("In "+szcellmarkfiletable+" "+szLine+" has less than 3 columns, expecting at least 3!");
- }
- String szcell = st.nextToken().trim(); //added in v1.20 to remove leading and trailing white space
- String szmark = st.nextToken().trim();
- String szfile = st.nextToken().trim();
- if (st.hasMoreTokens())
- {
- //we have control data
- szcontrolfile = st.nextToken().trim();
- bcontrol = true;
-
- //was hmfiles in version 1.00
- ArrayList alfilescontrol = (ArrayList) hmfilescontrol.get(szcell+"\t"+szmark);
- if (alfilescontrol == null)
- {
- alfilescontrol = new ArrayList();
- hmfilescontrol.put(szcell+"\t"+szmark,alfilescontrol);
- }
- if (!alfilescontrol.contains(szcontrolfile))
- {
- //added in version 1.11 to only count control file once, if appearing multiple times
- alfilescontrol.add(szcontrolfile);
- }
+ if ((!bsplitcols) || ((ncolsplitbegin<=ncellmarkentry)&&(ncellmarkentry<ncolsplitend)))
+ {
+ StringTokenizer st = new StringTokenizer(szLine,"\t");
+ if (st.countTokens() < 3)
+ {
+ throw new IllegalArgumentException("In "+szcellmarkfiletable+" "+szLine+" has less than 3 columns, expecting at least 3!");
+ }
+ String szcell = st.nextToken().trim(); //added in v1.20 to remove leading and trailing white space
+ String szmark = st.nextToken().trim();
- HashSet hscellcontrol = (HashSet) hmfilescellcontrol.get(szcell);
- if (hscellcontrol == null)
- {
- hscellcontrol = new HashSet();
- hmfilescellcontrol.put(szcell, hscellcontrol);
- }
+ if (bstacked)
+ {
+ szcell = "genome";
+ szmark = szcell + "_" + szmark;
+ }
+
+ String szfile = st.nextToken().trim();
+ if (st.hasMoreTokens())
+ {
+ //we have control data
+ szcontrolfile = st.nextToken().trim();
+ bcontrol = true;
+
+ //was hmfiles in version 1.00
+ ArrayList alfilescontrol = (ArrayList) hmfilescontrol.get(szcell+"\t"+szmark);
+ if (alfilescontrol == null)
+ {
+ alfilescontrol = new ArrayList();
+ hmfilescontrol.put(szcell+"\t"+szmark,alfilescontrol);
+ }
+
+ if (!alfilescontrol.contains(szcontrolfile))
+ {
+ //added in version 1.11 to only count control file once, if appearing multiple times
+ alfilescontrol.add(szcontrolfile);
+ }
+
+ HashSet hscellcontrol = (HashSet) hmfilescellcontrol.get(szcell);
+ if (hscellcontrol == null)
+ {
+ hscellcontrol = new HashSet();
+ hmfilescellcontrol.put(szcell, hscellcontrol);
+ }
- hscellcontrol.add(szcontrolfile);
- }
- else
- {
- hscellnocontrol.add(szcell);
- }
- hscells.add(szcell);
- hsmarks.add(szmark);
+ hscellcontrol.add(szcontrolfile);
+ }
+ else
+ {
+ hscellnocontrol.add(szcell);
+ }
+ hscells.add(szcell);
+ hsmarks.add(szmark);
- ArrayList alfiles = (ArrayList) hmfiles.get(szcell+"\t"+szmark);
- if (alfiles == null)
- {
- alfiles = new ArrayList();
- hmfiles.put(szcell+"\t"+szmark,alfiles);
+ ArrayList alfiles = (ArrayList) hmfiles.get(szcell+"\t"+szmark);
+ if (alfiles == null)
+ {
+ alfiles = new ArrayList();
+ hmfiles.put(szcell+"\t"+szmark,alfiles);
+ }
+ alfiles.add(szfile);
}
- alfiles.add(szfile);
+ ncellmarkentry++;
}
brcellmark.close();
@@ -2679,6 +2715,10 @@ public class Preprocessing
int[] grid_nchrom_nbin = grid_nchrom[nbin];
int[] gridcontrol_nchrom_nbin = gridcontrol_nchrom[nbin];
st = new StringTokenizer(szLine,"\t");
+ if (st.countTokens() != nummarks)
+ {
+ throw new IllegalArgumentException("In "+szfilename+" did not find the expected "+nummarks+" marks in this line: "+szLine);
+ }
for (int nmark = 0; nmark < nummarks; nmark++)
{
//reading in the regular data
@@ -3202,6 +3242,11 @@ public class Preprocessing
{
ntotallocs++;
st = new StringTokenizer(szLine,"\t");
+ if (st.countTokens() != nummarks)
+ {
+ throw new IllegalArgumentException("In "+szfilename+" did not find the expected "+nummarks+" marks in this line: "+szLine);
+ }
+
for (int nj = 0; nj < nummarks; nj++)
{
double dval = Double.parseDouble(st.nextToken());
@@ -3548,7 +3593,7 @@ public class Preprocessing
* used to do a row split on the input files, including if only one subdirectory is provided and no merging is done.
*/
public static void mergeBinarizedFiles(String szinputdir, String szoutputdir, String szdirlistfile,
- boolean bsplit, int numsplitbins, boolean bgzip) throws IOException
+ boolean bsplit, int numsplitbins, boolean bgzip,String szfiletype) throws IOException
{
ArrayList aldirs = new ArrayList();
@@ -3620,7 +3665,8 @@ public class Preprocessing
for (int nfile = 0; nfile < chromfilesall.length; nfile++)
{
String szcurrfile = chromfilesall[nfile];
- if ((szcurrfile.contains("_binary"))&&(!(new File(szcurrpath+"/"+szcurrfile)).isHidden()))
+ //if ((szcurrfile.contains("_binary"))&&(!(new File(szcurrpath+"/"+szcurrfile)).isHidden()))
+ if ((szcurrfile.contains("_"+szfiletype))&&(!(new File(szcurrpath+"/"+szcurrfile)).isHidden()))
{
//added hidden check in v.1.11
//read first two lines
@@ -3655,7 +3701,8 @@ public class Preprocessing
if (hmbrA[ndir].size() == 0)
{
- throw new IllegalArgumentException("No files found in "+szcurrpath+" containing '_binary' that are not Hidden");
+ //throw new IllegalArgumentException("No files found in "+szcurrpath+" containing '_binary' that are not Hidden");
+ throw new IllegalArgumentException("No files found in "+szcurrpath+" containing '_"+szfiletype+"' that are not Hidden");
}
}
@@ -3706,7 +3753,6 @@ public class Preprocessing
for (int nfile = 0; nfile < chromfilescombine.length; nfile++)
{
-
String szcurrfile = chromfilescombine[nfile];
for (int ndir = 0; ndir < subdirall.length; ndir++)
{
@@ -3739,7 +3785,8 @@ public class Preprocessing
if (bgzip)
{
//System.out.println(szcell+"_"+szchrom+"."+nsplit);
- GZIPOutputStream pwzip = new GZIPOutputStream(new FileOutputStream(szoutputdir+"/"+szcell+"_"+szchrom+"."+nsplit+"_binary.txt.gz"));
+ //GZIPOutputStream pwzip = new GZIPOutputStream(new FileOutputStream(szoutputdir+"/"+szcell+"_"+szchrom+"."+nsplit+"_binary.txt.gz"));
+ GZIPOutputStream pwzip = new GZIPOutputStream(new FileOutputStream(szoutputdir+"/"+szcell+"_"+szchrom+"."+nsplit+"_"+szfiletype+".txt.gz"));
//szfile));
//PrintWriter pw = new PrintWriter(new FileWriter(szoutputdir+"/"+szcell+"_"+szmark+"_binary.txt"));
//need to read here
@@ -3808,7 +3855,8 @@ public class Preprocessing
//String szcell = st.nextToken();
//String szmark = st.nextToken();
//System.out.println(szcell+"_"+szchrom+"."+nsplit);
- PrintWriter pw = new PrintWriter(new FileWriter(szoutputdir+"/"+szcell+"_"+szchrom+"."+nsplit+"_binary.txt"));
+ //PrintWriter pw = new PrintWriter(new FileWriter(szoutputdir+"/"+szcell+"_"+szchrom+"."+nsplit+"_binary.txt"));
+ PrintWriter pw = new PrintWriter(new FileWriter(szoutputdir+"/"+szcell+"_"+szchrom+"."+nsplit+"_"+szfiletype+".txt"));
//need to read here
pw.println(szcell+"\t"+szchrom+"."+nsplit);
pw.println(szmergedheader);
@@ -3868,7 +3916,8 @@ public class Preprocessing
{
if (bgzip)
{
- GZIPOutputStream pwzip = new GZIPOutputStream(new FileOutputStream(szoutputdir+"/"+szcell+"_"+szchrom+"_binary.txt.gz"));
+ //GZIPOutputStream pwzip = new GZIPOutputStream(new FileOutputStream(szoutputdir+"/"+szcell+"_"+szchrom+"_binary.txt.gz"));
+ GZIPOutputStream pwzip = new GZIPOutputStream(new FileOutputStream(szoutputdir+"/"+szcell+"_"+szchrom+"_"+szfiletype+".txt.gz"));
//szfile));
//PrintWriter pw = new PrintWriter(new FileWriter(szoutputdir+"/"+szcell+"_"+szmark+"_binary.txt"));
//need to read here
@@ -3934,7 +3983,8 @@ public class Preprocessing
//StringTokenizer st = new StringTokenizer(chromfilescombine[nfile],"\t");
//String szcell = st.nextToken();
//String szmark = st.nextToken();
- PrintWriter pw = new PrintWriter(new FileWriter(szoutputdir+"/"+szcell+"_"+szchrom+"_binary.txt"));
+ //PrintWriter pw = new PrintWriter(new FileWriter(szoutputdir+"/"+szcell+"_"+szchrom+"_binary.txt"));
+ PrintWriter pw = new PrintWriter(new FileWriter(szoutputdir+"/"+szcell+"_"+szchrom+"_"+szfiletype+".txt"));
//need to read here
pw.println(szcell+"\t"+szchrom);
pw.println(szmergedheader);
View it on GitLab: https://salsa.debian.org/med-team/chromhmm/-/compare/31cc527348f0ba1e8212581fca8d00a869421168...54d24f85c482ab982d40338dd27de7c9b2293081
--
View it on GitLab: https://salsa.debian.org/med-team/chromhmm/-/compare/31cc527348f0ba1e8212581fca8d00a869421168...54d24f85c482ab982d40338dd27de7c9b2293081
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210606/15c74ba8/attachment-0001.htm>
More information about the debian-med-commit
mailing list