[med-svn] [rsem] 01/03: Imported Upstream version 1.2.23+dfsg
Michael Crusoe
misterc-guest at moszumanska.debian.org
Thu Oct 22 15:25:21 UTC 2015
This is an automated email from the git hooks/post-receive script.
misterc-guest pushed a commit to branch master
in repository rsem.
commit 18fa8665d16961d56878014a74a65e2be11788d8
Author: Michael R. Crusoe <crusoe at ucdavis.edu>
Date: Thu Oct 22 08:02:31 2015 -0700
Imported Upstream version 1.2.23+dfsg
---
NoiseQProfile.h | 7 ++++++-
README.md | 18 ++++++++----------
WHAT_IS_NEW | 9 +++++++++
extractRef.cpp | 6 +++---
rsem-calculate-expression | 5 +++--
rsem-prepare-reference | 6 +++---
rsem_perl_utils.pm | 8 +++-----
synthesisRef.cpp | 9 ++++-----
8 files changed, 39 insertions(+), 29 deletions(-)
diff --git a/NoiseQProfile.h b/NoiseQProfile.h
index 6de12b1..63c5897 100644
--- a/NoiseQProfile.h
+++ b/NoiseQProfile.h
@@ -153,11 +153,16 @@ void NoiseQProfile::write(FILE *fo) {
void NoiseQProfile::startSimulation() {
pc = new double[SIZE][NCODES];
- for (int i = 0; i < SIZE; i++)
+ for (int i = 0; i < SIZE; i++) {
for (int j = 0; j < NCODES; j++) {
pc[i][j] = p[i][j];
if (j > 0) pc[i][j] += pc[i][j - 1];
}
+ if (isZero(pc[i][NCODES - 1])) {
+ assert(NCODES == 5);
+ pc[i][0] = 0.25; pc[i][1] = 0.5; pc[i][2] = 0.75; pc[i][3] = 1.0; pc[i][4] = 1.0;
+ }
+ }
}
std::string NoiseQProfile::simulate(simul* sampler, int len, const std::string& qual) {
diff --git a/README.md b/README.md
index 4636079..7aa1112 100644
--- a/README.md
+++ b/README.md
@@ -114,21 +114,19 @@ consideration.
#### Using an alternative aligner
By default, RSEM automates the alignment of reads to reference
-transcripts using the Bowtie alignment program. Turn on '--bowtie2'
-for 'rsem-prepare-reference' and 'rsem-calculate-expression' will
-allow RSEM to use the Bowtie 2 alignment program instead. Please note
-that indel alignments, local alignments and discordant alignments are
+transcripts using the Bowtie aligner. Turn on '--bowtie2' for
+'rsem-prepare-reference' and 'rsem-calculate-expression' will allow
+RSEM to use the Bowtie 2 alignment program instead. Please note that
+indel alignments, local alignments and discordant alignments are
disallowed when RSEM uses Bowtie 2 since RSEM currently cannot handle
them. See the description of '--bowtie2' option in
-'rsem-calculate-expression' for more details. To use an alternative
-alignment program, align the input reads against the file
+'rsem-calculate-expression' for more details. Similarly, turn on
+'--star' will allow RSEM to use the STAR aligner. To use an
+alternative alignment program, align the input reads against the file
'reference_name.idx.fa' generated by 'rsem-prepare-reference', and
format the alignment output in SAM or BAM format. Then, instead of
providing reads to 'rsem-calculate-expression', specify the '--sam' or
-'--bam' option and provide the SAM or BAM file as an argument. When
-using an alternative aligner, you may also want to provide the
-'--no-bowtie' option to 'rsem-prepare-reference' so that the Bowtie
-indices are not built.
+'--bam' option and provide the SAM or BAM file as an argument.
RSEM requires the alignments of a read to be adjacent. For
paired-end reads, RSEM also requires the two mates of any alignment be
diff --git a/WHAT_IS_NEW b/WHAT_IS_NEW
index 68fdc30..8f0ef31 100644
--- a/WHAT_IS_NEW
+++ b/WHAT_IS_NEW
@@ -1,3 +1,12 @@
+RSEM v1.2.23
+
+- Moved version information from WHAT_IS_NEW to rsem_perl_utils.pm in order to make sure the '--version' option always output the version information
+- Fixed a typo in 'rsem-calculate-expression' that can lead an error when '--star' is set and '--star-path' is not set
+- Fixed a bug that can occasionally crash the RSEM simulator
+- Added user-friendly error messages that are triggered when RSEM detects invalid bases in the input FASTA file during reference building
+
+--------------------------------------------------------------------------------------------
+
RSEM v1.2.22
- Added options to run the STAR aligner
diff --git a/extractRef.cpp b/extractRef.cpp
index e1c4448..68854f1 100644
--- a/extractRef.cpp
+++ b/extractRef.cpp
@@ -179,8 +179,8 @@ void parse_gtf_file(char* gtfF) {
if (verbose) { printf("Parsing gtf File is done!\n"); }
}
-char check(char c) {
- general_assert(isalpha(c), "Sequence contains unknown letter '" + ctos(c) + "'!");
+inline char check(char c, string& seqname, int pos) {
+ general_assert(isalpha(c), "Sequence " + seqname + " contains an unknown letter (ASCII code " + itos(c) + ") at 0-based position " + itos(pos) + "!");
if (isupper(c) && c != 'A' && c != 'C' && c != 'G' && c != 'T') c = 'N';
if (islower(c) && c != 'a' && c != 'c' && c != 'g' && c != 't') c = 'n';
return c;
@@ -290,7 +290,7 @@ int main(int argc, char* argv[]) {
size_t len = gseq.length();
assert(len > 0);
- for (size_t j = 0; j < len; j++) gseq[j] = check(gseq[j]);
+ for (size_t j = 0; j < len; j++) gseq[j] = check(gseq[j], seqname, j);
iter = sn2tr.find(seqname);
if (iter == sn2tr.end()) continue;
diff --git a/rsem-calculate-expression b/rsem-calculate-expression
index ea6279e..c4dc32f 100755
--- a/rsem-calculate-expression
+++ b/rsem-calculate-expression
@@ -254,7 +254,7 @@ if (!$is_sam && !$is_bam) {
if ( $star ) {
## align reads by STAR
my $star_genome_path = dirname($refName);
- $command = "$star_path/STAR " .
+ $command = "$star_path"."STAR" .
## ENCODE3 pipeline parameters
" --genomeDir $star_genome_path " .
' --outSAMunmapped Within ' .
@@ -1096,7 +1096,8 @@ Assume the path to the bowtie executables is in the user's PATH environment vari
6) '/data/mmliver_1.fq.gz' and '/data/mmliver_2.fq.gz', paired-end reads with quality scores and read files are compressed by gzip. We want to use STAR to aligned reads and assume STAR executable is '/sw/STAR'. Suppose we want to use 8 threads and do not generate a genome BAM file:
- rsem-calculate-expression --star \
+ rsem-calculate-expression --paired-end \
+ --star \
--star-path /sw/STAR \
--gzipped-read-file \
-p 8 \
diff --git a/rsem-prepare-reference b/rsem-prepare-reference
index 8e5a3d9..ab0ebce 100755
--- a/rsem-prepare-reference
+++ b/rsem-prepare-reference
@@ -82,7 +82,7 @@ if ($polyA) {
if ($bowtie_path ne "") { $bowtie_path .= "/"; }
if ($bowtie2_path ne "") { $bowtie2_path .= "/"; }
-if ($star_path ne '') { $star_path .= '/'; }
+if ($star_path ne "") { $star_path .= "/"; }
my $command = "";
@@ -128,9 +128,9 @@ if ($bowtie2) {
if ($star) {
my $out_star_genome_path = dirname($ARGV[1]);
- $command = "$star_path/STAR " .
+ $command = $star_path . "STAR " .
" --runThreadN $star_nthreads " .
- ' --runMode genomeGenerate ' .
+ " --runMode genomeGenerate " .
" --genomeDir $out_star_genome_path " .
" --genomeFastaFiles @list " .
" --sjdbGTFfile $gtfF " .
diff --git a/rsem_perl_utils.pm b/rsem_perl_utils.pm
index 01a3576..eaf7ea4 100644
--- a/rsem_perl_utils.pm
+++ b/rsem_perl_utils.pm
@@ -9,6 +9,8 @@ our @ISA = qw(Exporter);
our @EXPORT = qw(runCommand);
our @EXPORT_OK = qw(runCommand collectResults showVersionInfo);
+my $version = "RSEM v1.2.23";
+
# command, {err_msg}
sub runCommand {
print $_[0]."\n";
@@ -88,11 +90,7 @@ sub collectResults {
# 0, dir
sub showVersionInfo {
- open(INPUT, "$_[0]/WHAT_IS_NEW");
- my $line = <INPUT>;
- chomp($line);
- close(INPUT);
- print "Current version is $line\n";
+ print "Current version: $version\n";
exit(0);
}
diff --git a/synthesisRef.cpp b/synthesisRef.cpp
index 02bda0b..9ae4748 100644
--- a/synthesisRef.cpp
+++ b/synthesisRef.cpp
@@ -67,9 +67,9 @@ void loadMappingInfo(int file_type, char* mappingF) {
fin.close();
}
-char check(char c) {
- if (!isalpha(c)) { fprintf(stderr, "Sequence contains unknown letter '%c'!\n", c); exit(-1); }
- //assert(isalpha(c));
+
+inline char check(char c, string& seqname, int pos) {
+ general_assert(isalpha(c), "Sequence " + seqname + " contains an unknown letter (ASCII code " + itos(c) + ") at 0-based position " + itos(pos) + "!");
if (isupper(c) && c != 'A' && c != 'C' && c != 'G' && c != 'T') c = 'N';
if (islower(c) && c != 'a' && c != 'c' && c != 'g' && c != 't') c = 'n';
return c;
@@ -173,8 +173,7 @@ int main(int argc, char* argv[]) {
int len = gseq.length();
assert(len > 0);
- for (int j = 0; j < len; j++) gseq[j] = check(gseq[j]);
-
+ for (int j = 0; j < len; j++) gseq[j] = check(gseq[j], seqname, j);
name2seq[seqname] = gseq;
transcript_id = seqname;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/rsem.git
More information about the debian-med-commit
mailing list