[med-svn] [Git][med-team/gffread][upstream] New upstream version 0.11.6

Wed Nov 6 16:42:10 GMT 2019


Michael R. Crusoe pushed to branch upstream at Debian Med / gffread


Commits:
6d79d533 by Michael R. Crusoe at 2019-11-06T16:31:20Z
New upstream version 0.11.6
- - - - -


2 changed files:

- gff_utils.cpp
- gffread.cpp


Changes:

=====================================
gff_utils.cpp
=====================================
@@ -734,7 +734,7 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
 		GffNameList& attrnames = GffObj::names->attrs;//gffr->names->attrs;
 		for (int i=0;i<attrnames.Count();i++) {
 			char* n=attrnames[i]->name;
-			if (endsiWith(n, "_type")) {
+			if (endsiWith(n, "type")) {
 				pseudoTypeAttrIds.Add(attrnames[i]->idx);
 			}// else {
 			char* p=strifind(n, "pseudo");
@@ -783,7 +783,7 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
 				if (verbose) warnPseudo(*m);
 				continue;
 			}
-			//  *_type=*_pseudogene
+			//  *type=*_pseudogene
             //find all attributes ending with _type and have value like: *_pseudogene
 			for (int i=0;i<pseudoTypeAttrIds.Count();++i) {
 				char* attrv=NULL;


=====================================
gffread.cpp
=====================================
@@ -4,7 +4,7 @@
 #define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 
-#define VERSION "0.11.5"
+#define VERSION "0.11.6"
 
 #define USAGE "gffread v" VERSION ". Usage:\n\
 gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>] \n\
@@ -100,7 +100,9 @@ Output options:\n\
  -g   full path to a multi-fasta file with the genomic sequences\n\
       for all input mappings, OR a directory with single-fasta files\n\
       (one per genomic sequence, with file names matching sequence names)\n\
- -w    write a fasta file with spliced exons for each GFF transcript\n\
+ -w    write a fasta file with spliced exons for each transcript\n\
+ --w-add <N> for the -w option, extract additional <N> bases\n\
+       both upstream and downstream of the transcript boundaries\n\
  -x    write a fasta file with spliced CDS for each GFF transcript\n\
  -y    write a protein fasta file with the translation of CDS for each record\n\
  -W    for -w and -x options, write in the FASTA defline the exon\n\
@@ -111,8 +113,6 @@ Output options:\n\
  -m    <chr_replace> is a name mapping table for converting reference \n\
        sequence names, having this 2-column format:\n\
        <original_ref_ID> <new_ref_ID>\n\
-       WARNING: all GFF records on reference sequences whose original IDs\n\
-       are not found in the 1st column of this table will be discarded!\n\
  -t    use <trackname> in the 2nd column of each GFF/GTF output line\n\
  -o    write the records into <outfile> instead of stdout\n\
  -T    main output will be GTF instead of GFF3\n\
@@ -184,9 +184,10 @@ class CTableField {
 FILE* ffasta=NULL;
 FILE* f_in=NULL;
 FILE* f_out=NULL;
-FILE* f_w=NULL; //fasta with spliced exons (transcripts)
-FILE* f_x=NULL; //fasta with spliced CDS
-FILE* f_y=NULL; //fasta with translated CDS
+FILE* f_w=NULL; //writing fasta with spliced exons (transcripts)
+int wPadding = 0; //padding for -w option
+FILE* f_x=NULL; //writing fasta with spliced CDS
+FILE* f_y=NULL; //wrting fasta with translated CDS
 
 bool wCDSonly=false;
 bool wNConly=false;
@@ -697,7 +698,21 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
 	  uint cds_start=0;
 	  uint cds_end=0;
 	  seglst.Clear();
+	  //TODO: ? if wPadding is set, *temporarily* change first and last exon coordinates ?!?
+	  // or perhaps getSpliced() should take an additional padding parameter ?!?
+	  int padLeft=0;
+	  int padRight=0;
+	  if (wPadding>0) {
+		padLeft= (gffrec.start>(uint)wPadding) ? wPadding : gffrec.start - 1;
+		int ediff=faseq->getseqlen()-gffrec.end;
+	    padRight=(wPadding>ediff) ?  ediff : wPadding;
+   	    gffrec.addPadding(padLeft, padRight);
+	  }
 	  char* exont=gffrec.getSpliced(faseq, false, &seqlen, &cds_start, &cds_end, &seglst);
+	  //restore exons to normal (remove padding)
+	  if (wPadding>0)
+		  gffrec.removePadding(padLeft, padRight);
+
 	  GStr defline(gffrec.getID());
 	  if (exont!=NULL) {
 		  if (gffrec.CDstart>0) {
@@ -719,6 +734,13 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
 				  defline.append("-");
 				  defline+=(int)gffrec.exons[i]->end;
 			  }
+			if (wPadding>0) {
+				defline.append(" padding:");
+				defline.append(padLeft);
+				defline+=(char)'|';
+				defline.append(padRight);
+			}
+
 			defline.append(" segs:");
 			for (int i=0;i<seglst.Count();i++) {
 				if (i>0) defline.append(",");
@@ -727,6 +749,7 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
 				defline+=(int)seglst[i].end;
 				}
 		  }
+
 		  if (gffrec.attrs!=NULL) {
 			  //append all attributes found for each transcripts
 			  for (int i=0;i<gffrec.attrs->Count();i++) {
@@ -821,12 +844,13 @@ bool validateGffRec(GffObj* gffrec, GList<GffObj>* gfnew) {
 		if (rt) {
 			gffrec->setRefName(rt->new_name);
 		}
+		/* //no, do not discard non-matching entries, let them pass through!
 		else {
 			if (verbose)
 				GMessage("Info: %s discarded due to reference %s not being mapped\n",
 						gffrec->getID(), refname.chars());
 			return false; //discard, ref seq not in the given translation table
-		}
+		}*/
 	}
 	if (gffloader.transcriptsOnly && gffrec->isDiscarded()) {
 		//discard generic "locus" features with no other detailed subfeatures
@@ -1006,7 +1030,7 @@ void printAsTable(FILE* f, GffObj* gfo, int* out_counter=NULL) {
 int main(int argc, char* argv[]) {
  GArgs args(argc, argv,
    "version;debug;merge;adj-stop;bed;in-bed;tlf;in-tlf;cluster-only;nc;cov-info;help;"
-    "sort-alpha;keep-genes;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
+    "sort-alpha;keep-genes;w-add=;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
     "ignore-locus;no-pseudo;table=sort-by=hvOUNHPWCVJMKQYTDARSZFGLEBm:g:i:r:s:l:t:o:w:x:y:d:");
  args.printError(USAGE, true);
  if (args.getOpt('h') || args.getOpt("help")) {
@@ -1207,13 +1231,18 @@ int main(int argc, char* argv[]) {
  //if (f_out==NULL) f_out=stdout;
  if (gfasta.fastaPath==NULL && (validCDSonly || spliceCheck || args.getOpt('w')!=NULL || args.getOpt('x')!=NULL || args.getOpt('y')!=NULL))
   GError("Error: -g option is required for options -w, -x, -y, -V, -N, -M !\n");
-
  openfw(f_w, args, 'w');
  openfw(f_x, args, 'x');
  openfw(f_y, args, 'y');
+ s=args.getOpt("w-add");
+ if (!s.is_empty()) {
+	 if (f_w==NULL) GError("Error: --w-add option requires -w option!\n");
+	 wPadding=s.asInt();
+ }
 
  if (f_out==NULL && f_w==NULL && f_x==NULL && f_y==NULL && !covInfo)
 	 f_out=stdout;
+
  //if (f_y!=NULL || f_x!=NULL) wCDSonly=true;
  //useBadCDS=useBadCDS || (fgtfok==NULL && fgtfbad==NULL && f_y==NULL && f_x==NULL);
 



View it on GitLab: https://salsa.debian.org/med-team/gffread/commit/6d79d5333ad03ed3bb6be9a598b5124ad365cea2

-- 
View it on GitLab: https://salsa.debian.org/med-team/gffread/commit/6d79d5333ad03ed3bb6be9a598b5124ad365cea2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20191106/3c5c1847/attachment-0001.html>