[med-svn] [Git][med-team/chromhmm][upstream] New upstream version 1.27+dfsg
Dylan Aïssi (@daissi)
gitlab at salsa.debian.org
Sat Aug 16 08:57:33 BST 2025
Dylan Aïssi pushed to branch upstream at Debian Med / chromhmm
Commits:
50ac3881 by Dylan Aïssi at 2025-08-16T09:56:21+02:00
New upstream version 1.27+dfsg
- - - - -
4 changed files:
- edu/mit/compbio/ChromHMM/ChromHMM.java
- edu/mit/compbio/ChromHMM/Preprocessing.java
- edu/mit/compbio/ChromHMM/StateAnalysis.java
- versionlog.txt
Changes:
=====================================
edu/mit/compbio/ChromHMM/ChromHMM.java
=====================================
@@ -13387,7 +13387,7 @@ public class ChromHMM
if (szcommand.equalsIgnoreCase("Version"))
{
- System.out.println("This is Version 1.26 of ChromHMM (c) Copyright 2008-2012 Massachusetts Institute of Technology");
+ System.out.println("This is Version 1.27 of ChromHMM (c) Copyright 2008-2012 Massachusetts Institute of Technology");
}
else if ((szcommand.equals("BinarizeBam"))||(szcommand.equalsIgnoreCase("BinarizeBed")))
{
=====================================
edu/mit/compbio/ChromHMM/Preprocessing.java
=====================================
@@ -717,8 +717,8 @@ public class Preprocessing
if (bstacked)
{
- szcell = "genome";
szmark = szcell + "_" + szmark;
+ szcell = "genome"; //update in 1.27
}
String szfile = st.nextToken().trim();
@@ -1051,8 +1051,8 @@ public class Preprocessing
if (bstacked)
{
- szcell = "genome";
szmark = szcell + "_" + szmark;
+ szcell = "genome";
}
String szfile = st.nextToken().trim();
=====================================
edu/mit/compbio/ChromHMM/StateAnalysis.java
=====================================
@@ -44,6 +44,11 @@ public class StateAnalysis
static double PVALLOG10CUTOFF = 300;
+ /**
+ * Variable indicating if found a duplicate state assignment to prevent printing warning more than once
+ */
+ static boolean bfounddup = false;
+
/**
* A record for storing a segmentation
*/
@@ -403,6 +408,7 @@ public class StateAnalysis
}
BufferedReader brcoords = Util.getBufferedReader(szinputcoorddir+files[ncoordfile]);
+
if (bunique)
{
ArrayList alrecs = new ArrayList();
@@ -410,10 +416,26 @@ public class StateAnalysis
{
if (szLine.trim().equals("")) continue;
String[] szLineA = szLine.split("\\s+");
+ if (nchromindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nchromindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[ncoordfile]);
+ }
String szreadchrom = szLineA[nchromindex];
+
if (szreadchrom.equals(szchrom))
{
- String szcurrchrom = szLineA[nchromindex];
+ //removed in v1.27 appears to be a dead store
+ //String szcurrchrom = szLineA[nchromindex];
+ if (nstartindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nstartindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[ncoordfile]);
+ }
+
+ if (nendindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nendindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[ncoordfile]);
+ }
+
int nbeginactual =Integer.parseInt(szLineA[nstartindex])-noffsetleft;
int nendactual =Integer.parseInt(szLineA[nendindex])-noffsetright;
if (bcenter)
@@ -545,6 +567,12 @@ public class StateAnalysis
if ((busesignal)&&(nsignalindex < szLineA.length))
{
damount = Double.parseDouble(szLineA[nsignalindex]);
+
+ if (damount < 0)
+ {
+ throw new IllegalArgumentException("A negative weight of "+damount+" was found in "+szinputcoorddir+files[nfile]+
+ " but weights are assumed to be non-negative");
+ }
}
else
{
@@ -879,6 +907,13 @@ public class StateAnalysis
//stores each label position in the genome
for (int npos = nbegin; npos <= nend; npos++)
{
+ if ((!StateAnalysis.bfounddup) &&(labels_nchrom[npos] >= 0)&&(labels_nchrom[npos]!=slabel))
+ {
+ StateAnalysis.bfounddup = true;
+ System.out.println("WARNING: found the same position assigned to multiple states. First example found: "+theSegmentRec.szchrom+
+ ":"+(npos*nbinsize+1)+". Will use the last occurrence in file.");
+
+ }
labels_nchrom[npos] = slabel;
tallylabel[slabel]++;
}
@@ -989,26 +1024,32 @@ public class StateAnalysis
//reading in the coordinates to overlap with
BufferedReader brcoords = Util.getBufferedReader(szinputcoorddir +files[nfile]);
+
ArrayList alrecs = new ArrayList();
while ((szLine = brcoords.readLine())!=null)
{
if (szLine.trim().equals("")) continue;
String[] szLineA = szLine.split("\\s+");
- if (nstartindex >= szLineA.length)
- {
- throw new IllegalArgumentException(nstartindex+" is an invalid column index for "+szLine+" in "+szinputcoorddir+files[nfile]);
- }
-
- if (nendindex >= szLineA.length)
+ if (nchromindex >= szLineA.length)
{
- throw new IllegalArgumentException(nendindex+" is an invalid column index for "+szLine+" in "+szinputcoorddir+files[nfile]);
+ throw new IllegalArgumentException(nchromindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
}
-
String szcurrchrom = szLineA[nchromindex];
if (szchrom.equals(szcurrchrom))
{
+ if (nstartindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nstartindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
+ if (nendindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nendindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
+
int nbeginactual =Integer.parseInt(szLineA[nstartindex])-noffsetleft;
int nendactual =Integer.parseInt(szLineA[nendindex])-noffsetright;
if (bcenter)
@@ -1120,11 +1161,28 @@ public class StateAnalysis
else
{
BufferedReader brcoords = Util.getBufferedReader(szinputcoorddir +files[nfile]);
+
while ((szLine = brcoords.readLine())!=null)
{
if (szLine.trim().equals("")) continue;
String[] szLineA = szLine.split("\\s+");
+
+ if (nchromindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nchromindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
+ if (nstartindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nstartindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
+ if (nendindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nendindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
String szchrom = szLineA[nchromindex];
int nbeginactual =Integer.parseInt(szLineA[nstartindex])-noffsetleft;
int nbegin = nbeginactual/nbinsize;
@@ -1135,7 +1193,16 @@ public class StateAnalysis
double damount;
if ((busesignal)&&(nsignalindex < szLineA.length))
{
+ if (nsignalindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nsignalindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
damount = Double.parseDouble(szLineA[nsignalindex]);
+ if (damount < 0)
+ {
+ throw new IllegalArgumentException("A negative weight of "+damount+" was found in "+szinputcoorddir+files[nfile]+
+ " but weights are assumed to be non-negative");
+ }
}
else
{
@@ -1622,6 +1689,13 @@ public class StateAnalysis
//stores each label position in the genome
for (int npos = nbegin; npos <= nend; npos++)
{
+ if ((!StateAnalysis.bfounddup) &&(labels[npos] >= 0)&&(labels[npos]!=slabel))
+ {
+ StateAnalysis.bfounddup = true;
+ System.out.println("WARNING: found the same position assigned to multiple states. First example found: "+szchrom+
+ ":"+(npos*nbinsize+1)+". Will use the last occurrence in file.");
+
+ }
labels[npos] = slabel;
//tallylabel[slabel]++;
}
@@ -1664,24 +1738,31 @@ public class StateAnalysis
//reading in the coordinates to overlap with
BufferedReader brcoords = Util.getBufferedReader(szinputcoorddir +files[nfile]);
+
ArrayList alrecs = new ArrayList();
while ((szLine = brcoords.readLine())!=null)
{
if (szLine.trim().equals("")) continue;
String[] szLineA = szLine.split("\\s+");
- if (nstartindex >= szLineA.length)
- {
- throw new IllegalArgumentException(nstartindex+" is an invalid column index for "+szLine+" in "+szinputcoorddir+files[nfile]);
- }
- if (nendindex >= szLineA.length)
+ if (nchromindex >= szLineA.length)
{
- throw new IllegalArgumentException(nendindex+" is an invalid column index for "+szLine+" in "+szinputcoorddir+files[nfile]);
+ throw new IllegalArgumentException(nchromindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
}
String szcurrchrom = szLineA[nchromindex];
if (szchromwant.equals(szcurrchrom))
{
+ if (nstartindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nstartindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
+ if (nendindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nendindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
int nbeginactual =Integer.parseInt(szLineA[nstartindex])-noffsetleft;
int nendactual =Integer.parseInt(szLineA[nendindex])-noffsetright;
if (bcenter)
@@ -1791,15 +1872,31 @@ public class StateAnalysis
else
{
BufferedReader brcoords = Util.getBufferedReader(szinputcoorddir +files[nfile]);
+
while ((szLine = brcoords.readLine())!=null)
{
if (szLine.trim().equals("")) continue;
String[] szLineA = szLine.split("\\s+");
+ if (nchromindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nchromindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
String szchrom = szLineA[nchromindex];
if (!szchromwant.equals(szchrom))
continue;
+ if (nstartindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nstartindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
+ if (nendindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nendindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
+
int nbeginactual =Integer.parseInt(szLineA[nstartindex])-noffsetleft;
int nbegin = nbeginactual/nbinsize;
@@ -1809,7 +1906,17 @@ public class StateAnalysis
double damount;
if ((busesignal)&&(nsignalindex < szLineA.length))
{
+ if (nsignalindex >= szLineA.length)
+ {
+ throw new IllegalArgumentException(nsignalindex+" is an invalid column index for line "+szLine+" in "+szinputcoorddir+files[nfile]);
+ }
damount = Double.parseDouble(szLineA[nsignalindex]);
+
+ if (damount < 0)
+ {
+ throw new IllegalArgumentException("A negative weight of "+damount+" was found in "+szinputcoorddir+files[nfile]+
+ " but weights are assumed to be non-negative");
+ }
}
else
{
@@ -2977,6 +3084,13 @@ public class StateAnalysis
//short slabel = theSegmentRec.slabel;
for (int npos = nbegin; npos <= nend; npos++)
{
+ if ((!StateAnalysis.bfounddup) &&(labels[npos] >= 0)&&(labels[npos]!=slabel))
+ {
+ StateAnalysis.bfounddup = true;
+ System.out.println("WARNING: found the same position assigned to multiple states. First example found: "+szchrom+
+ ":"+(npos*nbinsize+1)+". Will use the last occurrence in file.");
+
+ }
labels[npos] = slabel;
}
@@ -3332,6 +3446,13 @@ public class StateAnalysis
short slabel = theSegmentRec.slabel;
for (int npos = nbegin; npos <= nend; npos++)
{
+ if ((!StateAnalysis.bfounddup) &&(labels_nchrom[npos] >= 0)&&(labels_nchrom[npos]!=slabel))
+ {
+ StateAnalysis.bfounddup = true;
+ System.out.println("WARNING: found the same position assigned to multiple states. First example found: "+theSegmentRec.szchrom+
+ ":"+(npos*nbinsize+1)+". Will use the last occurrence in file.");
+
+ }
labels_nchrom[npos] = slabel;
if (slabel >= 0)
{
=====================================
versionlog.txt
=====================================
@@ -1,5 +1,12 @@
+7/6/2025 ChromHMM 1.27
+* Fixed a bug with the '-stacked' in BinarizeBam and BinarizeBed so marks are now actually named cell_mark as intended
+* Added warning message if the same base is annotated to different states when conducting an OverlapEnrichment or NeighborhoodEnrichment
+* Added additional error checking on input coordinates for OverlapEnrichment and more informative error messages
+* Added the requirement that the values in the signal column be non-negative when using the '-signal' with OverlapEnrichment and throws error if that
+is violated. Previously this was implicitly assumed but not explicitly documented and there was no error checking.
+
1/2/2025 ChromHMM 1.26
-* Added to ChromHMM's output -log10 p-values of enrichments when using OverlapEnrichment with the '-center' flag. This can be supressed with the use of the '-nopvals' flag
+* Added to ChromHMM's output -log10 p-values of enrichments when using OverlapEnrichment with the '-center' flag. This can be suppressed with the use of the '-nopvals' flag
* Fixed a bug when running OverlapEnrichment with the '-uniformscale' flag that caused ChromHMM to display an extraneous 'Genome %'
in the header row and 100% in the bottom row in the text file output. This did not effect the image files or running in a default mode without the flag.
* Fixed a bug when running OverlapEnrichment with the '-center' and '-multicount' flags that caused an incorrect base % for the annotation category
@@ -74,7 +81,7 @@ to allow binarizing different subsets of the columns in parallel, and then can b
See manual for details on each otpion
* Added the option '-t type' to MergeBinary. If this option is specified, then it allows merging
files other than binary files, in particular signal or controlsignal files. Files in the subdirectories
-of inputdir that include ‘_type’ in the file name will be merged. By default type is binary, but for
+of inputdir that include _type in the file name will be merged. By default type is binary, but for
regular signal files it should be signal and for control signal files it should be controlsignal.
* Tweaked the position of column labels to better align with the columns in the heatmap
* Modified ConvertGeneTable to also read gzipped file
@@ -165,7 +172,7 @@ of additional runtime
*Added the '-pseudo' flag to LearnModel. If this flag is present, pseudo counts of 1 are used in computing the model parameters to smooth away from zero values.
These pseudo counts can provide numerical stability in the situation when the -n numseq is specified in training and some feature has very few present occurrences.
*Added the '-lowmem' flag option also to EvalSubset which uses less memory by only loading one chromosome in at a time though with potentially additional runtime.
-*Added the '-paired' flag to BinarizeBam. If this option is present then reads in the BAM file are treated as pairs, and each pair is counted once with bin assignment is based on shifting half the insert size. If this option is present then the –n shift, –center, and –peaks options cannot be used.
+*Added the '-paired' flag to BinarizeBam. If this option is present then reads in the BAM file are treated as pairs, and each pair is counted once with bin assignment is based on shifting half the insert size. If this option is present then the n shift, center, and peaks options cannot be used.
*Added a more informative error message for the situation in which the chromosome naming in the chromosome length file is
inconsistent with the Bam/Bed files when binarizing data
*Added a more informative error message for the situation in which the chromosome names in the segmentation files are not
@@ -188,7 +195,7 @@ Such coordinate positions that are off the chromosome are ignored instead of an
11/2/2017 ChromHMM 1.13 (GitHub only release)
*Added '-lowmem' flag to LearnModel, OverlapEnrichment, NeighborhoodEnrichment, and MakeSegmentation to have ChromHMM only load one chromosome file into memory at time thus reducing maximum memory usage at a potential of additional runtime
-*Added '-n numseq' flag to LearnModel. If this flag is present and the ‘-p’ flag is present then on each iteration of training only numseq chromosome files are randomly selected to be used for training. In such cases the ‘-d’ flag should be set to a negative number so model learning does not terminate prematurely since negative changes in the log-likelihood are expected since different chromosomes are used on each iteration. Also only numseq files are considered in the initial model initialization under the default ‘information’ mode. If the ‘-n’ flag is specified without the ‘-p’ flag a subset of chromosomes will still be used for initialization, but all chromosomes will still be used on all iterations of training.
+*Added '-n numseq' flag to LearnModel. If this flag is present and the -p flag is present then on each iteration of training only numseq chromosome files are randomly selected to be used for training. In such cases the -d flag should be set to a negative number so model learning does not terminate prematurely since negative changes in the log-likelihood are expected since different chromosomes are used on each iteration. Also only numseq files are considered in the initial model initialization under the default information mode. If the -n flag is specified without the -p flag a subset of chromosomes will still be used for initialization, but all chromosomes will still be used on all iterations of training.
4/3/2016 ChromHMM 1.12 (4/15/2016 updated hg38 and rn6 CpGIsland files)
View it on GitLab: https://salsa.debian.org/med-team/chromhmm/-/commit/50ac3881572026371499e60225bde156d71e384e
--
View it on GitLab: https://salsa.debian.org/med-team/chromhmm/-/commit/50ac3881572026371499e60225bde156d71e384e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20250816/6627148c/attachment-0001.htm>
More information about the debian-med-commit
mailing list