[med-svn] [Git][med-team/gffread][master] 7 commits: Another test exemption from rtracklayer

Michael R. Crusoe gitlab at salsa.debian.org
Wed Nov 6 16:42:05 GMT 2019



Michael R. Crusoe pushed to branch master at Debian Med / gffread


Commits:
7324d7ec by Michael R. Crusoe at 2019-11-06T16:31:01Z
Another test exemption from rtracklayer

- - - - -
6d79d533 by Michael R. Crusoe at 2019-11-06T16:31:20Z
New upstream version 0.11.6
- - - - -
33bd966c by Michael R. Crusoe at 2019-11-06T16:31:20Z
New upstream version

- - - - -
947a78f9 by Michael R. Crusoe at 2019-11-06T16:31:21Z
Update upstream source from tag 'upstream/0.11.6'

Update to upstream version '0.11.6'
with Debian dir b0ab139b2a67da29f55557bfa519b90f0113b7a0
- - - - -
7c10960e by Michael R. Crusoe at 2019-11-06T16:31:24Z
Set upstream metadata fields: Repository.
- - - - -
cb5c360a by Michael R. Crusoe at 2019-11-06T16:31:25Z
Remove obsolete fields Name from debian/upstream/metadata.
- - - - -
72098d09 by Michael R. Crusoe at 2019-11-06T16:41:30Z
needs new release of libgclib-dev

See https://github.com/gpertea/gclib/issues/5

- - - - -


6 changed files:

- debian/changelog
- debian/control
- debian/tests/run-tests
- debian/upstream/metadata
- gff_utils.cpp
- gffread.cpp


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,12 @@
+gffread (0.11.6-1) UNRELEASED; urgency=medium
+
+  * Another test exemption from rtracklayer
+  * New upstream version
+  * Set upstream metadata fields: Repository.
+  * Remove obsolete fields Name from debian/upstream/metadata.
+
+ -- Michael R. Crusoe <michael.crusoe at gmail.com>  Wed, 06 Nov 2019 17:31:20 +0100
+
 gffread (0.11.5-1) unstable; urgency=medium
 
   * Team upload.


=====================================
debian/control
=====================================
@@ -1,10 +1,11 @@
 Source: gffread
 Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
+Uploaders: Andreas Tille <tille at debian.org>,
+           Michael R. Crusoe <michael.crusoe at gmail.com>
 Section: science
 Priority: optional
 Build-Depends: debhelper-compat (= 12),
-               libgclib-dev
+               libgclib-dev (>= 0.11.3)
 Standards-Version: 4.4.0
 Vcs-Browser: https://salsa.debian.org/med-team/gffread
 Vcs-Git: https://salsa.debian.org/med-team/gffread.git


=====================================
debian/tests/run-tests
=====================================
@@ -97,7 +97,7 @@ IFS=", " read -r -a pkgs <<< ${PACKAGES}
 # gffread /usr/lib/python3/dist-packages/pybedtools/test/data/c.gff
 # Error: discarding overlapping duplicate mRNA feature (496-576) with ID=AT1G01010.mRNA
 
-SKIPS="/usr/share/doc/augustus/tutorial/results/augustus.abinitio.gff|/usr/share/doc/augustus/tutorial/results/augustus.hints.gff|/usr/lib/R/site-library/Gviz/extdata/test.gff2|/usr/lib/R/site-library/rtracklayer/tests/genes.gff3|/usr/share/EMBOSS/test/data/featexample2.gff3|/usr/share/EMBOSS/test/data/gmod-quantitative.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox_all.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox_bacs.gff3|/var/lib/gbrowse/databases/ideograms/human_cytobands.gff|/var/lib/gbrowse/databases/ideograms/mouse_cytobands.gff|/var/lib/gbrowse/databases/ideograms/rat_cytobands.gff|/var/lib/gbrowse/databases/yeast_chr1\+2/yeast_chr1\+2.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/F3-unique-3.v2.gff|/usr/lib/python3/dist-packages/gffutils/test/data/mouse_extra_comma.gff3|/usr/lib/R/site-library/GenomicFeatures/extdata/GFF3_files/TheCanonicalGene_v2.gff3|/usr/share/doc/seqan-apps/insegt/example/annoOutput.gff|/usr/lib/python3/dist-packages/optimir/resources/coordinates/hsa_miRCarta_v1.1.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/dmel-all-no-analysis-r5.49_50k_lines.gff|/usr/lib/python3/dist-packages/gffutils/test/data/glimmer_nokeyval.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/unsanitized.gff|/usr/lib/python3/dist-packages/pybedtools/test/data/c.gff"
+SKIPS="/usr/share/doc/augustus/tutorial/results/augustus.abinitio.gff|/usr/share/doc/augustus/tutorial/results/augustus.hints.gff|/usr/lib/R/site-library/Gviz/extdata/test.gff2|/usr/lib/R/site-library/rtracklayer/tests/genes.gff3|/usr/share/EMBOSS/test/data/featexample2.gff3|/usr/share/EMBOSS/test/data/gmod-quantitative.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox_all.gff3|/usr/share/gbrowse/htdocs/tutorial/data_files/volvox_bacs.gff3|/var/lib/gbrowse/databases/ideograms/human_cytobands.gff|/var/lib/gbrowse/databases/ideograms/mouse_cytobands.gff|/var/lib/gbrowse/databases/ideograms/rat_cytobands.gff|/var/lib/gbrowse/databases/yeast_chr1\+2/yeast_chr1\+2.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/F3-unique-3.v2.gff|/usr/lib/python3/dist-packages/gffutils/test/data/mouse_extra_comma.gff3|/usr/lib/R/site-library/GenomicFeatures/extdata/GFF3_files/TheCanonicalGene_v2.gff3|/usr/share/doc/seqan-apps/insegt/example/annoOutput.gff|/usr/lib/python3/dist-packages/optimir/resources/coordinates/hsa_miRCarta_v1.1.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/dmel-all-no-analysis-r5.49_50k_lines.gff|/usr/lib/python3/dist-packages/gffutils/test/data/glimmer_nokeyval.gff3|/usr/lib/python3/dist-packages/gffutils/test/data/unsanitized.gff|/usr/lib/python3/dist-packages/pybedtools/test/data/c.gff|/usr/lib/R/site-library/rtracklayer/tests/v2.gff"
 
 for package in "${pkgs[@]}"
 do


=====================================
debian/upstream/metadata
=====================================
@@ -1,6 +1,6 @@
 Registry:
- - Name: OMICtools
-   Entry: OMICS_28050
- - Name: conda:bioconda
-   Entry: gffread
-Name: gffread
+- Name: OMICtools
+  Entry: OMICS_28050
+- Name: conda:bioconda
+  Entry: gffread
+Repository: https://github.com/gpertea/gffread


=====================================
gff_utils.cpp
=====================================
@@ -734,7 +734,7 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
 		GffNameList& attrnames = GffObj::names->attrs;//gffr->names->attrs;
 		for (int i=0;i<attrnames.Count();i++) {
 			char* n=attrnames[i]->name;
-			if (endsiWith(n, "_type")) {
+			if (endsiWith(n, "type")) {
 				pseudoTypeAttrIds.Add(attrnames[i]->idx);
 			}// else {
 			char* p=strifind(n, "pseudo");
@@ -783,7 +783,7 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
 				if (verbose) warnPseudo(*m);
 				continue;
 			}
-			//  *_type=*_pseudogene
+			//  *type=*_pseudogene
             //find all attributes ending with _type and have value like: *_pseudogene
 			for (int i=0;i<pseudoTypeAttrIds.Count();++i) {
 				char* attrv=NULL;


=====================================
gffread.cpp
=====================================
@@ -4,7 +4,7 @@
 #define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 
-#define VERSION "0.11.5"
+#define VERSION "0.11.6"
 
 #define USAGE "gffread v" VERSION ". Usage:\n\
 gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>] \n\
@@ -100,7 +100,9 @@ Output options:\n\
  -g   full path to a multi-fasta file with the genomic sequences\n\
       for all input mappings, OR a directory with single-fasta files\n\
       (one per genomic sequence, with file names matching sequence names)\n\
- -w    write a fasta file with spliced exons for each GFF transcript\n\
+ -w    write a fasta file with spliced exons for each transcript\n\
+ --w-add <N> for the -w option, extract additional <N> bases\n\
+       both upstream and downstream of the transcript boundaries\n\
  -x    write a fasta file with spliced CDS for each GFF transcript\n\
  -y    write a protein fasta file with the translation of CDS for each record\n\
  -W    for -w and -x options, write in the FASTA defline the exon\n\
@@ -111,8 +113,6 @@ Output options:\n\
  -m    <chr_replace> is a name mapping table for converting reference \n\
        sequence names, having this 2-column format:\n\
        <original_ref_ID> <new_ref_ID>\n\
-       WARNING: all GFF records on reference sequences whose original IDs\n\
-       are not found in the 1st column of this table will be discarded!\n\
  -t    use <trackname> in the 2nd column of each GFF/GTF output line\n\
  -o    write the records into <outfile> instead of stdout\n\
  -T    main output will be GTF instead of GFF3\n\
@@ -184,9 +184,10 @@ class CTableField {
 FILE* ffasta=NULL;
 FILE* f_in=NULL;
 FILE* f_out=NULL;
-FILE* f_w=NULL; //fasta with spliced exons (transcripts)
-FILE* f_x=NULL; //fasta with spliced CDS
-FILE* f_y=NULL; //fasta with translated CDS
+FILE* f_w=NULL; //writing fasta with spliced exons (transcripts)
+int wPadding = 0; //padding for -w option
+FILE* f_x=NULL; //writing fasta with spliced CDS
+FILE* f_y=NULL; //wrting fasta with translated CDS
 
 bool wCDSonly=false;
 bool wNConly=false;
@@ -697,7 +698,21 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
 	  uint cds_start=0;
 	  uint cds_end=0;
 	  seglst.Clear();
+	  //TODO: ? if wPadding is set, *temporarily* change first and last exon coordinates ?!?
+	  // or perhaps getSpliced() should take an additional padding parameter ?!?
+	  int padLeft=0;
+	  int padRight=0;
+	  if (wPadding>0) {
+		padLeft= (gffrec.start>(uint)wPadding) ? wPadding : gffrec.start - 1;
+		int ediff=faseq->getseqlen()-gffrec.end;
+	    padRight=(wPadding>ediff) ?  ediff : wPadding;
+   	    gffrec.addPadding(padLeft, padRight);
+	  }
 	  char* exont=gffrec.getSpliced(faseq, false, &seqlen, &cds_start, &cds_end, &seglst);
+	  //restore exons to normal (remove padding)
+	  if (wPadding>0)
+		  gffrec.removePadding(padLeft, padRight);
+
 	  GStr defline(gffrec.getID());
 	  if (exont!=NULL) {
 		  if (gffrec.CDstart>0) {
@@ -719,6 +734,13 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
 				  defline.append("-");
 				  defline+=(int)gffrec.exons[i]->end;
 			  }
+			if (wPadding>0) {
+				defline.append(" padding:");
+				defline.append(padLeft);
+				defline+=(char)'|';
+				defline.append(padRight);
+			}
+
 			defline.append(" segs:");
 			for (int i=0;i<seglst.Count();i++) {
 				if (i>0) defline.append(",");
@@ -727,6 +749,7 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
 				defline+=(int)seglst[i].end;
 				}
 		  }
+
 		  if (gffrec.attrs!=NULL) {
 			  //append all attributes found for each transcripts
 			  for (int i=0;i<gffrec.attrs->Count();i++) {
@@ -821,12 +844,13 @@ bool validateGffRec(GffObj* gffrec, GList<GffObj>* gfnew) {
 		if (rt) {
 			gffrec->setRefName(rt->new_name);
 		}
+		/* //no, do not discard non-matching entries, let them pass through!
 		else {
 			if (verbose)
 				GMessage("Info: %s discarded due to reference %s not being mapped\n",
 						gffrec->getID(), refname.chars());
 			return false; //discard, ref seq not in the given translation table
-		}
+		}*/
 	}
 	if (gffloader.transcriptsOnly && gffrec->isDiscarded()) {
 		//discard generic "locus" features with no other detailed subfeatures
@@ -1006,7 +1030,7 @@ void printAsTable(FILE* f, GffObj* gfo, int* out_counter=NULL) {
 int main(int argc, char* argv[]) {
  GArgs args(argc, argv,
    "version;debug;merge;adj-stop;bed;in-bed;tlf;in-tlf;cluster-only;nc;cov-info;help;"
-    "sort-alpha;keep-genes;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
+    "sort-alpha;keep-genes;w-add=;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
     "ignore-locus;no-pseudo;table=sort-by=hvOUNHPWCVJMKQYTDARSZFGLEBm:g:i:r:s:l:t:o:w:x:y:d:");
  args.printError(USAGE, true);
  if (args.getOpt('h') || args.getOpt("help")) {
@@ -1207,13 +1231,18 @@ int main(int argc, char* argv[]) {
  //if (f_out==NULL) f_out=stdout;
  if (gfasta.fastaPath==NULL && (validCDSonly || spliceCheck || args.getOpt('w')!=NULL || args.getOpt('x')!=NULL || args.getOpt('y')!=NULL))
   GError("Error: -g option is required for options -w, -x, -y, -V, -N, -M !\n");
-
  openfw(f_w, args, 'w');
  openfw(f_x, args, 'x');
  openfw(f_y, args, 'y');
+ s=args.getOpt("w-add");
+ if (!s.is_empty()) {
+	 if (f_w==NULL) GError("Error: --w-add option requires -w option!\n");
+	 wPadding=s.asInt();
+ }
 
  if (f_out==NULL && f_w==NULL && f_x==NULL && f_y==NULL && !covInfo)
 	 f_out=stdout;
+
  //if (f_y!=NULL || f_x!=NULL) wCDSonly=true;
  //useBadCDS=useBadCDS || (fgtfok==NULL && fgtfbad==NULL && f_y==NULL && f_x==NULL);
 



View it on GitLab: https://salsa.debian.org/med-team/gffread/compare/810cee750b894bc2b0d57978ea1a3c64494adcb9...72098d09bba35e0e0f71b7fb1e573cb8b8a3ad2c

-- 
View it on GitLab: https://salsa.debian.org/med-team/gffread/compare/810cee750b894bc2b0d57978ea1a3c64494adcb9...72098d09bba35e0e0f71b7fb1e573cb8b8a3ad2c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20191106/417f6f98/attachment-0001.html>


More information about the debian-med-commit mailing list