[med-svn] [rsem] 01/03: Imported Upstream version 1.2.23+dfsg

Michael Crusoe misterc-guest at moszumanska.debian.org
Thu Oct 22 15:25:21 UTC 2015


This is an automated email from the git hooks/post-receive script.

misterc-guest pushed a commit to branch master
in repository rsem.

commit 18fa8665d16961d56878014a74a65e2be11788d8
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date:   Thu Oct 22 08:02:31 2015 -0700

    Imported Upstream version 1.2.23+dfsg
---
 NoiseQProfile.h           |  7 ++++++-
 README.md                 | 18 ++++++++----------
 WHAT_IS_NEW               |  9 +++++++++
 extractRef.cpp            |  6 +++---
 rsem-calculate-expression |  5 +++--
 rsem-prepare-reference    |  6 +++---
 rsem_perl_utils.pm        |  8 +++-----
 synthesisRef.cpp          |  9 ++++-----
 8 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/NoiseQProfile.h b/NoiseQProfile.h
index 6de12b1..63c5897 100644
--- a/NoiseQProfile.h
+++ b/NoiseQProfile.h
@@ -153,11 +153,16 @@ void NoiseQProfile::write(FILE *fo) {
 void NoiseQProfile::startSimulation() {
 	pc = new double[SIZE][NCODES];
 
-	for (int i = 0; i < SIZE; i++)
+	for (int i = 0; i < SIZE; i++) {
 		for (int j = 0; j < NCODES; j++) {
 			pc[i][j] = p[i][j];
 			if (j > 0) pc[i][j] += pc[i][j - 1];
 		}
+		if (isZero(pc[i][NCODES - 1])) {
+		  assert(NCODES == 5);
+		  pc[i][0] = 0.25; pc[i][1] = 0.5; pc[i][2] = 0.75; pc[i][3] = 1.0; pc[i][4] = 1.0;
+		}
+	}
 }
 
 std::string NoiseQProfile::simulate(simul* sampler, int len, const std::string& qual) {
diff --git a/README.md b/README.md
index 4636079..7aa1112 100644
--- a/README.md
+++ b/README.md
@@ -114,21 +114,19 @@ consideration.
 #### Using an alternative aligner
 
 By default, RSEM automates the alignment of reads to reference
-transcripts using the Bowtie alignment program. Turn on '--bowtie2'
-for 'rsem-prepare-reference' and 'rsem-calculate-expression' will
-allow RSEM to use the Bowtie 2 alignment program instead. Please note
-that indel alignments, local alignments and discordant alignments are
+transcripts using the Bowtie aligner. Turn on '--bowtie2' for
+'rsem-prepare-reference' and 'rsem-calculate-expression' will allow
+RSEM to use the Bowtie 2 alignment program instead. Please note that
+indel alignments, local alignments and discordant alignments are
 disallowed when RSEM uses Bowtie 2 since RSEM currently cannot handle
 them. See the description of '--bowtie2' option in
-'rsem-calculate-expression' for more details. To use an alternative
-alignment program, align the input reads against the file
+'rsem-calculate-expression' for more details. Similarly, turn on
+'--star' will allow RSEM to use the STAR aligner. To use an
+alternative alignment program, align the input reads against the file
 'reference_name.idx.fa' generated by 'rsem-prepare-reference', and
 format the alignment output in SAM or BAM format.  Then, instead of
 providing reads to 'rsem-calculate-expression', specify the '--sam' or
-'--bam' option and provide the SAM or BAM file as an argument.  When
-using an alternative aligner, you may also want to provide the
-'--no-bowtie' option to 'rsem-prepare-reference' so that the Bowtie
-indices are not built.
+'--bam' option and provide the SAM or BAM file as an argument.
 
 RSEM requires the alignments of a read to be adjacent. For
 paired-end reads, RSEM also requires the two mates of any alignment be
diff --git a/WHAT_IS_NEW b/WHAT_IS_NEW
index 68fdc30..8f0ef31 100644
--- a/WHAT_IS_NEW
+++ b/WHAT_IS_NEW
@@ -1,3 +1,12 @@
+RSEM v1.2.23
+
+- Moved version information from WHAT_IS_NEW to rsem_perl_utils.pm in order to make sure the '--version' option always output the version information
+- Fixed a typo in 'rsem-calculate-expression' that can lead an error when '--star' is set and '--star-path' is not set
+- Fixed a bug that can occasionally crash the RSEM simulator
+- Added user-friendly error messages that are triggered when RSEM detects invalid bases in the input FASTA file during reference building
+
+--------------------------------------------------------------------------------------------
+
 RSEM v1.2.22
 
 - Added options to run the STAR aligner
diff --git a/extractRef.cpp b/extractRef.cpp
index e1c4448..68854f1 100644
--- a/extractRef.cpp
+++ b/extractRef.cpp
@@ -179,8 +179,8 @@ void parse_gtf_file(char* gtfF) {
 	if (verbose) { printf("Parsing gtf File is done!\n"); }
 }
 
-char check(char c) {
-	general_assert(isalpha(c), "Sequence contains unknown letter '" + ctos(c) + "'!");
+inline char check(char c, string& seqname, int pos) {
+	general_assert(isalpha(c), "Sequence " + seqname + " contains an unknown letter (ASCII code " + itos(c) + ") at 0-based position " + itos(pos) + "!");
 	if (isupper(c) && c != 'A' && c != 'C' && c != 'G' && c != 'T') c = 'N';
 	if (islower(c) && c != 'a' && c != 'c' && c != 'g' && c != 't') c = 'n';
 	return c;
@@ -290,7 +290,7 @@ int main(int argc, char* argv[]) {
 
 			size_t len = gseq.length();
 			assert(len > 0);
-			for (size_t j = 0; j < len; j++) gseq[j] = check(gseq[j]);
+			for (size_t j = 0; j < len; j++) gseq[j] = check(gseq[j], seqname, j);
 			
 			iter = sn2tr.find(seqname);
 			if (iter == sn2tr.end()) continue;
diff --git a/rsem-calculate-expression b/rsem-calculate-expression
index ea6279e..c4dc32f 100755
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -254,7 +254,7 @@ if (!$is_sam && !$is_bam) {
 	if ( $star ) {
 	  ## align reads by STAR
     my $star_genome_path = dirname($refName);
-	  $command = "$star_path/STAR " . 
+	  $command = "$star_path"."STAR" . 
 	               ## ENCODE3 pipeline parameters
 	               " --genomeDir $star_genome_path " .
 	               ' --outSAMunmapped Within ' .
@@ -1096,7 +1096,8 @@ Assume the path to the bowtie executables is in the user's PATH environment vari
 
 6) '/data/mmliver_1.fq.gz' and '/data/mmliver_2.fq.gz', paired-end reads with quality scores and read files are compressed by gzip. We want to use STAR to aligned reads and assume STAR executable is '/sw/STAR'. Suppose we want to use 8 threads and do not generate a genome BAM file:
 
- rsem-calculate-expression --star \
+ rsem-calculate-expression --paired-end \
+                           --star \
                            --star-path /sw/STAR \
                            --gzipped-read-file \
                            -p 8 \
diff --git a/rsem-prepare-reference b/rsem-prepare-reference
index 8e5a3d9..ab0ebce 100755
--- a/rsem-prepare-reference
+++ b/rsem-prepare-reference
@@ -82,7 +82,7 @@ if ($polyA) {
 
 if ($bowtie_path ne "") { $bowtie_path .= "/"; }
 if ($bowtie2_path ne "") { $bowtie2_path .= "/"; }
-if ($star_path ne '') { $star_path .= '/'; }
+if ($star_path ne "") { $star_path .= "/"; }
 
 my $command = "";
 
@@ -128,9 +128,9 @@ if ($bowtie2) {
 
 if ($star) {
     my $out_star_genome_path = dirname($ARGV[1]);
-    $command = "$star_path/STAR " .
+    $command = $star_path . "STAR " .
                         " --runThreadN $star_nthreads " .
-                        ' --runMode genomeGenerate ' .
+                        " --runMode genomeGenerate " .
                         " --genomeDir $out_star_genome_path " .
                         " --genomeFastaFiles @list " .
                         " --sjdbGTFfile $gtfF " .
diff --git a/rsem_perl_utils.pm b/rsem_perl_utils.pm
index 01a3576..eaf7ea4 100644
--- a/rsem_perl_utils.pm
+++ b/rsem_perl_utils.pm
@@ -9,6 +9,8 @@ our @ISA = qw(Exporter);
 our @EXPORT = qw(runCommand);
 our @EXPORT_OK = qw(runCommand collectResults showVersionInfo);
 
+my $version = "RSEM v1.2.23";
+
 # command, {err_msg}
 sub runCommand {
     print $_[0]."\n";
@@ -88,11 +90,7 @@ sub collectResults {
 
 # 0, dir
 sub showVersionInfo {
-    open(INPUT, "$_[0]/WHAT_IS_NEW");
-    my $line = <INPUT>;
-    chomp($line);
-    close(INPUT);
-    print "Current version is $line\n";
+    print "Current version: $version\n";
     exit(0);
 }
 
diff --git a/synthesisRef.cpp b/synthesisRef.cpp
index 02bda0b..9ae4748 100644
--- a/synthesisRef.cpp
+++ b/synthesisRef.cpp
@@ -67,9 +67,9 @@ void loadMappingInfo(int file_type, char* mappingF) {
   fin.close();
 }
 
-char check(char c) {
-	if (!isalpha(c)) { fprintf(stderr, "Sequence contains unknown letter '%c'!\n", c); exit(-1); }
-	//assert(isalpha(c));
+
+inline char check(char c, string& seqname, int pos) {
+	general_assert(isalpha(c), "Sequence " + seqname + " contains an unknown letter (ASCII code " + itos(c) + ") at 0-based position " + itos(pos) + "!");
 	if (isupper(c) && c != 'A' && c != 'C' && c != 'G' && c != 'T') c = 'N';
 	if (islower(c) && c != 'a' && c != 'c' && c != 'g' && c != 't') c = 'n';
 	return c;
@@ -173,8 +173,7 @@ int main(int argc, char* argv[]) {
 
 			int len = gseq.length();
 			assert(len > 0);
-			for (int j = 0; j < len; j++) gseq[j] = check(gseq[j]);
-
+			for (int j = 0; j < len; j++) gseq[j] = check(gseq[j], seqname, j);
 			name2seq[seqname] = gseq;
 
 			transcript_id = seqname;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/rsem.git



More information about the debian-med-commit mailing list