[med-svn] [Git][med-team/gffread][upstream] New upstream version 0.11.6
Michael R. Crusoe
gitlab at salsa.debian.org
Wed Nov 6 16:42:10 GMT 2019
Michael R. Crusoe pushed to branch upstream at Debian Med / gffread
Commits:
6d79d533 by Michael R. Crusoe at 2019-11-06T16:31:20Z
New upstream version 0.11.6
- - - - -
2 changed files:
- gff_utils.cpp
- gffread.cpp
Changes:
=====================================
gff_utils.cpp
=====================================
@@ -734,7 +734,7 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
GffNameList& attrnames = GffObj::names->attrs;//gffr->names->attrs;
for (int i=0;i<attrnames.Count();i++) {
char* n=attrnames[i]->name;
- if (endsiWith(n, "_type")) {
+ if (endsiWith(n, "type")) {
pseudoTypeAttrIds.Add(attrnames[i]->idx);
}// else {
char* p=strifind(n, "pseudo");
@@ -783,7 +783,7 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
if (verbose) warnPseudo(*m);
continue;
}
- // *_type=*_pseudogene
+ // *type=*_pseudogene
//find all attributes ending with _type and have value like: *_pseudogene
for (int i=0;i<pseudoTypeAttrIds.Count();++i) {
char* attrv=NULL;
=====================================
gffread.cpp
=====================================
@@ -4,7 +4,7 @@
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
-#define VERSION "0.11.5"
+#define VERSION "0.11.6"
#define USAGE "gffread v" VERSION ". Usage:\n\
gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>] \n\
@@ -100,7 +100,9 @@ Output options:\n\
-g full path to a multi-fasta file with the genomic sequences\n\
for all input mappings, OR a directory with single-fasta files\n\
(one per genomic sequence, with file names matching sequence names)\n\
- -w write a fasta file with spliced exons for each GFF transcript\n\
+ -w write a fasta file with spliced exons for each transcript\n\
+ --w-add <N> for the -w option, extract additional <N> bases\n\
+ both upstream and downstream of the transcript boundaries\n\
-x write a fasta file with spliced CDS for each GFF transcript\n\
-y write a protein fasta file with the translation of CDS for each record\n\
-W for -w and -x options, write in the FASTA defline the exon\n\
@@ -111,8 +113,6 @@ Output options:\n\
-m <chr_replace> is a name mapping table for converting reference \n\
sequence names, having this 2-column format:\n\
<original_ref_ID> <new_ref_ID>\n\
- WARNING: all GFF records on reference sequences whose original IDs\n\
- are not found in the 1st column of this table will be discarded!\n\
-t use <trackname> in the 2nd column of each GFF/GTF output line\n\
-o write the records into <outfile> instead of stdout\n\
-T main output will be GTF instead of GFF3\n\
@@ -184,9 +184,10 @@ class CTableField {
FILE* ffasta=NULL;
FILE* f_in=NULL;
FILE* f_out=NULL;
-FILE* f_w=NULL; //fasta with spliced exons (transcripts)
-FILE* f_x=NULL; //fasta with spliced CDS
-FILE* f_y=NULL; //fasta with translated CDS
+FILE* f_w=NULL; //writing fasta with spliced exons (transcripts)
+int wPadding = 0; //padding for -w option
+FILE* f_x=NULL; //writing fasta with spliced CDS
+FILE* f_y=NULL; //wrting fasta with translated CDS
bool wCDSonly=false;
bool wNConly=false;
@@ -697,7 +698,21 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
uint cds_start=0;
uint cds_end=0;
seglst.Clear();
+ //TODO: ? if wPadding is set, *temporarily* change first and last exon coordinates ?!?
+ // or perhaps getSpliced() should take an additional padding parameter ?!?
+ int padLeft=0;
+ int padRight=0;
+ if (wPadding>0) {
+ padLeft= (gffrec.start>(uint)wPadding) ? wPadding : gffrec.start - 1;
+ int ediff=faseq->getseqlen()-gffrec.end;
+ padRight=(wPadding>ediff) ? ediff : wPadding;
+ gffrec.addPadding(padLeft, padRight);
+ }
char* exont=gffrec.getSpliced(faseq, false, &seqlen, &cds_start, &cds_end, &seglst);
+ //restore exons to normal (remove padding)
+ if (wPadding>0)
+ gffrec.removePadding(padLeft, padRight);
+
GStr defline(gffrec.getID());
if (exont!=NULL) {
if (gffrec.CDstart>0) {
@@ -719,6 +734,13 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
defline.append("-");
defline+=(int)gffrec.exons[i]->end;
}
+ if (wPadding>0) {
+ defline.append(" padding:");
+ defline.append(padLeft);
+ defline+=(char)'|';
+ defline.append(padRight);
+ }
+
defline.append(" segs:");
for (int i=0;i<seglst.Count();i++) {
if (i>0) defline.append(",");
@@ -727,6 +749,7 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
defline+=(int)seglst[i].end;
}
}
+
if (gffrec.attrs!=NULL) {
//append all attributes found for each transcripts
for (int i=0;i<gffrec.attrs->Count();i++) {
@@ -821,12 +844,13 @@ bool validateGffRec(GffObj* gffrec, GList<GffObj>* gfnew) {
if (rt) {
gffrec->setRefName(rt->new_name);
}
+ /* //no, do not discard non-matching entries, let them pass through!
else {
if (verbose)
GMessage("Info: %s discarded due to reference %s not being mapped\n",
gffrec->getID(), refname.chars());
return false; //discard, ref seq not in the given translation table
- }
+ }*/
}
if (gffloader.transcriptsOnly && gffrec->isDiscarded()) {
//discard generic "locus" features with no other detailed subfeatures
@@ -1006,7 +1030,7 @@ void printAsTable(FILE* f, GffObj* gfo, int* out_counter=NULL) {
int main(int argc, char* argv[]) {
GArgs args(argc, argv,
"version;debug;merge;adj-stop;bed;in-bed;tlf;in-tlf;cluster-only;nc;cov-info;help;"
- "sort-alpha;keep-genes;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
+ "sort-alpha;keep-genes;w-add=;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
"ignore-locus;no-pseudo;table=sort-by=hvOUNHPWCVJMKQYTDARSZFGLEBm:g:i:r:s:l:t:o:w:x:y:d:");
args.printError(USAGE, true);
if (args.getOpt('h') || args.getOpt("help")) {
@@ -1207,13 +1231,18 @@ int main(int argc, char* argv[]) {
//if (f_out==NULL) f_out=stdout;
if (gfasta.fastaPath==NULL && (validCDSonly || spliceCheck || args.getOpt('w')!=NULL || args.getOpt('x')!=NULL || args.getOpt('y')!=NULL))
GError("Error: -g option is required for options -w, -x, -y, -V, -N, -M !\n");
-
openfw(f_w, args, 'w');
openfw(f_x, args, 'x');
openfw(f_y, args, 'y');
+ s=args.getOpt("w-add");
+ if (!s.is_empty()) {
+ if (f_w==NULL) GError("Error: --w-add option requires -w option!\n");
+ wPadding=s.asInt();
+ }
if (f_out==NULL && f_w==NULL && f_x==NULL && f_y==NULL && !covInfo)
f_out=stdout;
+
//if (f_y!=NULL || f_x!=NULL) wCDSonly=true;
//useBadCDS=useBadCDS || (fgtfok==NULL && fgtfbad==NULL && f_y==NULL && f_x==NULL);
View it on GitLab: https://salsa.debian.org/med-team/gffread/commit/6d79d5333ad03ed3bb6be9a598b5124ad365cea2
--
View it on GitLab: https://salsa.debian.org/med-team/gffread/commit/6d79d5333ad03ed3bb6be9a598b5124ad365cea2
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20191106/3c5c1847/attachment-0001.html>
More information about the debian-med-commit
mailing list