[med-svn] [Git][med-team/gffread][master] 8 commits: Fix watchfile to detect new versions on github

Andreas Tille (@tille) gitlab at salsa.debian.org
Wed Oct 13 16:16:04 BST 2021



Andreas Tille pushed to branch master at Debian Med / gffread


Commits:
5a4e4232 by Andreas Tille at 2021-10-13T15:53:48+02:00
Fix watchfile to detect new versions on github

- - - - -
a9c3e4d6 by Andreas Tille at 2021-10-13T15:53:51+02:00
routine-update: New upstream version

- - - - -
3f7ba6a2 by Andreas Tille at 2021-10-13T15:53:52+02:00
New upstream version 0.12.7
- - - - -
b86854cc by Andreas Tille at 2021-10-13T15:53:54+02:00
Update upstream source from tag 'upstream/0.12.7'

Update to upstream version '0.12.7'
with Debian dir 525098e5391d2a81a7c52c94cdc88ec640a6d7ff
- - - - -
8f12a25e by Andreas Tille at 2021-10-13T15:53:54+02:00
routine-update: Standards-Version: 4.6.0

- - - - -
eec57189 by Andreas Tille at 2021-10-13T16:48:32+02:00
Refresh patches and cleanup changelog

- - - - -
8ac3fe2b by Andreas Tille at 2021-10-13T17:11:22+02:00
Restrictions: allow-stderr

- - - - -
852c8655 by Andreas Tille at 2021-10-13T17:14:09+02:00
Upload to unstable

- - - - -


11 changed files:

- Makefile
- debian/changelog
- debian/control
- debian/patches/gclib.patch
- debian/patches/hardening
- debian/tests/control
- debian/watch
- gff_utils.cpp
- gff_utils.h
- gffread.cpp
- prep_source.sh


Changes:

=====================================
Makefile
=====================================
@@ -9,9 +9,9 @@ LINKER  := $(if $(LINKER),$(LINKER),g++)
 
 LDFLAGS := $(if $(LDFLAGS),$(LDFLAGS),-g)
 
-BASEFLAGS  := -Wall -Wextra ${SEARCHDIRS} -D_FILE_OFFSET_BITS=64 \
--D_LARGEFILE_SOURCE -D_REENTRANT -fno-strict-aliasing \
- -std=c++11 -fno-exceptions -fno-rtti
+BASEFLAGS  := -Wall -Wextra -std=c++11 ${SEARCHDIRS} -D_FILE_OFFSET_BITS=64 \
+ -D_LARGEFILE_SOURCE -D_REENTRANT -fno-strict-aliasing \
+ -fno-exceptions -fno-rtti
 
 GCCV8 := $(shell expr `${CXX} -dumpversion | cut -f1 -d.` \>= 8)
 ifeq "$(GCCV8)" "1"
@@ -22,8 +22,12 @@ CXXFLAGS := $(if $(CXXFLAGS),$(BASEFLAGS) $(CXXFLAGS),$(BASEFLAGS))
 
 ifneq (,$(filter %release %static, $(MAKECMDGOALS)))
   # -- release build
-  CXXFLAGS := -g -O3 -DNDEBUG $(CXXFLAGS)
-else
+  LIBS := 
+  ifneq (,$(findstring static,$(MAKECMDGOALS)))
+    LDFLAGS += -static-libstdc++ -static-libgcc
+  endif
+  CXXFLAGS := -O3 -DNDEBUG $(CXXFLAGS)
+else #debug builds
   ifneq (,$(filter %profile %gprof %prof, $(MAKECMDGOALS)))
     CXXFLAGS += -pg -O0 -DNDEBUG
     LDFLAGS += -pg
@@ -75,7 +79,7 @@ OBJS := ${GCLDIR}/GBase.o ${GCLDIR}/GArgs.o ${GCLDIR}/GFaSeqGet.o \
  
 .PHONY : all
 
-all release debug memcheck memdebug profile gprof prof: ../gclib gffread
+all static release debug memcheck memdebug profile gprof prof: ../gclib gffread
 
 ../gclib:
 	git clone https://github.com/gpertea/gclib.git ../gclib


=====================================
debian/changelog
=====================================
@@ -1,14 +1,16 @@
-gffread (0.12.4-1) UNRELEASED; urgency=medium
+gffread (0.12.7-1) unstable; urgency=medium
 
   [ Andreas Tille ]
   * New upstream version
-  Seems to require a new version of gclib which would mean a transition
-  which is to late now
+  * Fix watchfile to detect new versions on github
+  * New upstream version
+  * Standards-Version: 4.6.0 (routine-update)
+  * Autopkgtest: Restrictions: allow-stderr
 
   [ Michael R. Crusoe ]
   * debian/watch: rename downloaded file into something sensible
 
- -- Andreas Tille <tille at debian.org>  Sun, 17 Jan 2021 18:02:17 +0100
+ -- Andreas Tille <tille at debian.org>  Wed, 13 Oct 2021 17:11:28 +0200
 
 gffread (0.12.1-4) unstable; urgency=medium
 


=====================================
debian/control
=====================================
@@ -7,7 +7,7 @@ Section: science
 Priority: optional
 Build-Depends: debhelper-compat (= 13),
                libgclib-dev
-Standards-Version: 4.5.1
+Standards-Version: 4.6.0
 Vcs-Browser: https://salsa.debian.org/med-team/gffread
 Vcs-Git: https://salsa.debian.org/med-team/gffread.git
 Homepage: https://ccb.jhu.edu/software/stringtie/gff.shtml


=====================================
debian/patches/gclib.patch
=====================================
@@ -4,16 +4,16 @@ Description: Fix build against libgclib
 
 --- a/Makefile
 +++ b/Makefile
-@@ -75,7 +75,7 @@ OBJS := ${GCLDIR}/GBase.o ${GCLDIR}/GArg
+@@ -79,7 +79,7 @@ OBJS := ${GCLDIR}/GBase.o ${GCLDIR}/GArg
   
  .PHONY : all
  
--all release debug memcheck memdebug profile gprof prof: ../gclib gffread
-+all release debug memcheck memdebug profile gprof prof: gffread
+-all static release debug memcheck memdebug profile gprof prof: ../gclib gffread
++all static release debug memcheck memdebug profile gprof prof: gffread
  
  ../gclib:
  	git clone https://github.com/gpertea/gclib.git ../gclib
-@@ -85,8 +85,8 @@ gffread.o : gff_utils.h $(GCLDIR)/GBase.
+@@ -89,8 +89,8 @@ gffread.o : gff_utils.h $(GCLDIR)/GBase.
  gff_utils.o : gff_utils.h $(GCLDIR)/gff.h
  ${GCLDIR}/gff.o : ${GCLDIR}/gff.h ${GCLDIR}/GFaSeqGet.h ${GCLDIR}/GList.hh
  ${GCLDIR}/GFaSeqGet.o : ${GCLDIR}/GFaSeqGet.h


=====================================
debian/patches/hardening
=====================================
@@ -4,7 +4,7 @@ Subject: Use CPPFLAGS
 Allows Debian to harden the binary with CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2
 --- a/Makefile
 +++ b/Makefile
-@@ -65,7 +65,7 @@ endif
+@@ -69,7 +69,7 @@ endif
  #endif
  
  %.o : %.cpp
@@ -13,7 +13,7 @@ Allows Debian to harden the binary with CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2
  
  # C/C++ linker
  
-@@ -86,7 +86,7 @@ gff_utils.o : gff_utils.h $(GCLDIR)/gff.
+@@ -90,7 +90,7 @@ gff_utils.o : gff_utils.h $(GCLDIR)/gff.
  ${GCLDIR}/gff.o : ${GCLDIR}/gff.h ${GCLDIR}/GFaSeqGet.h ${GCLDIR}/GList.hh
  ${GCLDIR}/GFaSeqGet.o : ${GCLDIR}/GFaSeqGet.h
  gffread: gffread.o gff_utils.o


=====================================
debian/tests/control
=====================================
@@ -1,2 +1,3 @@
 Tests: run-tests
 Depends: gffread, bash, augustus-doc, bedtools-test, cnvkit [amd64 arm64 ppc64el], emboss-data, emboss-test, gbrowse, genometools-common, gff2aplot, gff2ps, htslib-test, libbio-graphics-perl, optimir [amd64 arm64 ppc64el], proteinortho [amd64 arm64 ppc64el s390x], python3-gffutils, python3-pybedtools [amd64 arm64 ppc64el], r-bioc-genomicfeatures, r-bioc-gviz, r-bioc-rhtslib, r-bioc-rtracklayer, seqan-apps, spades [amd64], trinityrnaseq-examples, dpkg-dev
+Restrictions: allow-stderr


=====================================
debian/watch
=====================================
@@ -1,4 +1,4 @@
 version=4
 
 opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%@PACKAGE at -$1.tar.gz%" \
-https://github.com/gpertea/gffread/releases .*/archive/v?@ANY_VERSION@@ARCHIVE_EXT@
+https://github.com/gpertea/gffread/releases .*/v?@ANY_VERSION@@ARCHIVE_EXT@


=====================================
gff_utils.cpp
=====================================
@@ -42,10 +42,15 @@ bool fullCDSonly=false; // starts with START, ends with STOP codon
 bool multiExon=false;
 bool writeExonSegs=false;
 char* tracklabel=NULL;
+/*
 char* rfltGSeq=NULL;
 char rfltStrand=0;
 uint rfltStart=0;
-uint rfltEnd=MAX_UINT;
+uint rfltEnd=MAX_UINT;*/
+GRangeParser* fltRange=NULL;
+
+GRangeParser* fltJunction=NULL;
+
 bool rfltWithin=false; //check for full containment within given range
 bool addDescr=false;
 
@@ -166,7 +171,6 @@ int cmpRedundant(GffObj& a, GffObj& b) {
    else return (a.exons.Count()>b.exons.Count())? 1: -1;
 }
 
-
 bool tMatch(GffObj& a, GffObj& b) {
   //strict intron chain match, or single-exon perfect match
   int imax=a.exons.Count()-1;
@@ -282,6 +286,8 @@ int adjust_stopcodon(GffObj& gffrec, int adj, GList<GSeg>* seglst) {
 
 void printTableData(FILE* f, GffObj& g, bool inFasta) {
  //using attribute list in tableCols
+	const int DBUF_LEN=1024; //there should not be attribute values larger than 1K!
+	char dbuf[DBUF_LEN];
 	char* av=NULL;
 	for(int i=0;i<tableCols.Count();i++) {
 		if (i>0 || inFasta) {
@@ -291,7 +297,12 @@ void printTableData(FILE* f, GffObj& g, bool inFasta) {
 		switch(tableCols[i].type) {
 		case ctfGFF_Attr:
 			av=g.getAttr(tableCols[i].name.chars());
-			fprintf(f,"%s",av!=NULL? av : ".");
+			if (av) {
+				if (decodeChars) {
+					GffObj::decodeHexChars(dbuf, av, DBUF_LEN-1);
+					fprintf(f,"%s", dbuf);
+				} else fprintf(f,"%s",av);
+			} else fprintf(f,".");
 			break;
 		case ctfGFF_chr:
 			fprintf(f,"%s",g.getGSeqName());
@@ -375,7 +386,9 @@ bool GffLoader::validateGffRec(GffObj* gffrec) {
 				   }
 			   }
 			}
+		return false;
 		}
+		if (gffrec->isGene() && keepGenes) return true;
 		return false;
 	} //transcript rejected
 	return true;
@@ -419,24 +432,24 @@ bool GffLoader::checkFilters(GffObj* gffrec) {
 					gffrec->getID(), minLen);
     	return false;
 	}
-	if (rfltGSeq!=NULL) { //filter by gseqName
-		if (strcmp(gffrec->getGSeqName(),rfltGSeq)!=0) {
+	if (fltRange!=NULL) { //filter by gseqName
+		if (fltRange->refName!=NULL && strcmp(gffrec->getGSeqName(),fltRange->refName)!=0) {
 			return false;
 		}
-	}
-	if (rfltStrand>0 && gffrec->strand !=rfltStrand) {
-		return false;
-	}
-	//check coordinates
-	if (rfltStart!=0 || rfltEnd!=MAX_UINT) {
-		if (rfltWithin) {
-			if (gffrec->start<rfltStart || gffrec->end>rfltEnd) {
-				return false; //not within query range
-			}
+		if (fltRange->strand>0 && gffrec->strand!=fltRange->strand) {
+			return false;
 		}
-		else {
-			if (gffrec->start>rfltEnd || gffrec->end<rfltStart) {
-				return false;
+		//check coordinates
+		if (fltRange->start || fltRange->end<UINT_MAX) {
+			if (rfltWithin) {
+				if (gffrec->start<fltRange->start || gffrec->end>fltRange->end) {
+					return false; //not within query range
+				}
+			}
+			else {
+				if (gffrec->start>fltRange->end || gffrec->end<fltRange->start) {
+					return false;
+				}
 			}
 		}
 	}
@@ -444,7 +457,6 @@ bool GffLoader::checkFilters(GffObj* gffrec) {
 		//remove attributes that are not in attrList
 		gffrec->removeAttrs(attrList);
 	}
-
     if (gffrec->isTranscript()) {    // && TFilters) ?
     	//these filters only apply to transcripts
 		if (multiExon && gffrec->exons.Count()<=1) {
@@ -454,12 +466,46 @@ bool GffLoader::checkFilters(GffObj* gffrec) {
 			return false;
 		}
 		if (wNConly && gffrec->hasCDS()) return false;
+		if (fltJunction!=NULL) {
+			if (gffrec->exons.Count()<=1) return false;
+			if (fltJunction->refName!=NULL && strcmp(gffrec->getGSeqName(),fltJunction->refName)!=0) {
+				return false;
+			}
+			if (fltJunction->strand && gffrec->strand!=fltJunction->strand) {
+				return false;
+			}
+			//check coordinates
+			uint jstart=fltJunction->start;
+			uint jend=fltJunction->end;
+			if (jstart==0) jstart=jend;
+			if (jend==0)  jend=jstart;
+			if (gffrec->start>=jstart || gffrec->end<=jend) {
+						return false;
+	        }
+
+			bool noJMatch=true;
+			for (int i=0;i<gffrec->exons.Count()-1;++i) {
+				if (fltJunction->start && fltJunction->end) {
+					if (gffrec->exons[i]->end+1==fltJunction->start &&
+							gffrec->exons[i+1]->start-1==fltJunction->end)
+						{ noJMatch=false; break; }
+				} else if (fltJunction->start) { //end match not required
+					if (gffrec->exons[i]->end+1==fltJunction->start)
+						{ noJMatch=false; break; }
+				} else { //only end match required:
+					if (gffrec->exons[i+1]->start-1==fltJunction->end)
+						{ noJMatch=false; break; }
+				}
+			}
+			if (noJMatch) return false;
+		}
+
 		return process_transcript(gfasta, *gffrec);
     } //transcript filters check
 	return true;
 }
 
-bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
+bool GffLoader::process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
  if (!gffrec.isTranscript()) return false; //shouldn't call this function unless it's a transcript
  //returns true if the transcript passed the filter
  char* gname=gffrec.getGeneName();
@@ -680,40 +726,48 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
   if (adjstop!=NULL) delete adjstop;
   */
   if (cdsnt!=NULL) { // && !inframeStop) {
+	  GStr defline(gffrec.getID(), 94);
+	  if (writeExonSegs) {
+		  defline.append(" loc:");
+		  defline.append(gffrec.getGSeqName());
+		  defline.appendfmt("(%c)",gffrec.strand);
+		  //warning: not CDS coordinates are written here, but the exon ones
+		  defline+=(int)gffrec.start;
+		  defline+=(char)'-';
+		  defline+=(int)gffrec.end;
+		  // -- here these are CDS substring coordinates on the spliced sequence:
+		  defline.append(" segs:");
+		  for (int i=0;i<seglst.Count();i++) {
+			  if (i>0) defline.append(",");
+			  defline+=(int)seglst[i].start;
+			  defline.append("-");
+			  defline+=(int)seglst[i].end;
+		  }
+	  }
 	  if (f_y!=NULL) { //CDS translation fasta output requested
 			 if (cdsaa==NULL) { //translate now if not done before
 			   cdsaa=translateDNA(cdsnt, aalen, seqlen);
 			 }
 			 if (aalen>0) {
 			   if (cdsaa[aalen-1]=='.' || cdsaa[aalen-1]=='\0') --aalen; //avoid printing the stop codon
- 			   fprintf(f_y, ">%s", gffrec.getID());
+ 			   fprintf(f_y, ">%s", defline.chars());
  			   if (fmtTable) printTableData(f_y, gffrec, true);
- 			   else fprintf(f_y, "\n");
+ 			   else {
+ 				  if (gffrec.attrs!=NULL && gffrec.attrs->Count()>0) fprintf(f_y," ");
+ 				  gffrec.printAttrs(f_y, ";", false, decodeChars, false);
+ 				  fprintf(f_y, "\n");
+ 			   }
 			   printFasta(f_y, NULL, cdsaa, aalen, StarStop);
 			 }
 	  }
 	  if (f_x!=NULL) { //CDS only
-			 GStr defline(gffrec.getID(), 94);
-			 if (writeExonSegs) {
-				  defline.append(" loc:");
-				  defline.append(gffrec.getGSeqName());
-				  defline.appendfmt("(%c)",gffrec.strand);
-				  //warning: not CDS coordinates are written here, but the exon ones
-				  defline+=(int)gffrec.start;
-				  defline+=(char)'-';
-				  defline+=(int)gffrec.end;
-				  // -- here these are CDS substring coordinates on the spliced sequence:
-				  defline.append(" segs:");
-				  for (int i=0;i<seglst.Count();i++) {
-					  if (i>0) defline.append(",");
-					  defline+=(int)seglst[i].start;
-					  defline.append("-");
-					  defline+=(int)seglst[i].end;
-					  }
-			 }
 			 fprintf(f_x, ">%s", defline.chars());
 			 if (fmtTable) printTableData(f_x, gffrec, true);
-			 else fprintf(f_x, "\n");
+			 else {
+				 if (gffrec.attrs!=NULL && gffrec.attrs->Count()>0) fprintf(f_x," ");
+				 gffrec.printAttrs(f_x, ";", false, decodeChars, false);
+				 fprintf(f_x, "\n");
+			 }
 			 printFasta(f_x, NULL, cdsnt, seqlen);
 	  }
 	  GFREE(cdsnt);
@@ -775,7 +829,11 @@ bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
 
 		  fprintf(f_w, ">%s", defline.chars());
 		  if (fmtTable) printTableData(f_w, gffrec, true);
-		    else fprintf(f_w, "\n");
+		    else {
+		    	if (gffrec.attrs!=NULL && gffrec.attrs->Count()>0) fprintf(f_w," ");
+		    	gffrec.printAttrs(f_w, ";", false, decodeChars, false);
+		    	fprintf(f_w, "\n");
+		    }
 		  printFasta(f_w, NULL, exont, seqlen);
 		  GFREE(exont);
 	  }
@@ -793,8 +851,6 @@ GTData::GTData(GffObj* t, GenomicSeqData* gd):rna(t),gdata(gd), locus(NULL), rep
  	   gdata->tdata.Add(this);
 }
 
-
-
 bool GffLoader::unsplContained(GffObj& ti, GffObj&  tj) {
  //returns true only if ti (which MUST be single-exon) is "almost" contained in any of tj's exons
  //but it does not cross any intron-exon boundary of tj


=====================================
gff_utils.h
=====================================
@@ -19,7 +19,6 @@ extern FILE* f_y; //wrting fasta with translated CDS
 
 extern FILE* f_j; //wrting junctions (introns)
 
-
 extern bool TFilters;
 
 extern bool wfaNoCDS;
@@ -56,10 +55,6 @@ extern bool fullCDSonly; // starts with START, ends with STOP codon
 extern bool multiExon;
 extern bool writeExonSegs;
 extern char* tracklabel;
-extern char* rfltGSeq;
-extern char rfltStrand;
-extern uint rfltStart;
-extern uint rfltEnd;
 extern bool rfltWithin; //check for full containment within given range
 extern bool addDescr;
 
@@ -94,6 +89,10 @@ typedef bool GFValidateFunc(GffObj* gf);
 //keep/set original/old strand
 #define T_SET_OSTRAND(d, s) d |= s
 
+extern GRangeParser* fltRange;
+
+extern GRangeParser* fltJunction;
+
 class SeqInfo { //populated from the -s option of gffread
  public:
   int len;
@@ -129,7 +128,6 @@ char* getSeqDescr(char* seqid);
 char* getSeqName(char* seqid);
 int adjust_stopcodon(GffObj& gffrec, int adj, GList<GSeg>* seglst=NULL);
 void printTableData(FILE* f, GffObj& g, bool inFasta=false);
-bool process_transcript(GFastaDb& gfasta, GffObj& gffrec);
 
 enum ETableFieldType {
   ctfGFF_Attr=0, // attribute name as is
@@ -803,6 +801,8 @@ class GffLoader {
   }
 
   bool validateGffRec(GffObj* gffrec);
+  bool process_transcript(GFastaDb& gfasta, GffObj& gffrec);
+
   bool checkFilters(GffObj* gffrec);
 
   void collectIntrons(GffObj& t); //for -j output


=====================================
gffread.cpp
=====================================
@@ -4,13 +4,14 @@
 #define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 
-#define VERSION "0.12.4"
+#define VERSION "0.12.7"
 
 #define USAGE "gffread v" VERSION ". Usage:\n\
 gffread [-g <genomic_seqs_fasta> | <dir>] [-s <seq_info.fsize>] \n\
- [-o <outfile>] [-t <trackname>] [-r [[<strand>]<chr>:]<start>..<end> [-R]]\n\
+ [-o <outfile>] [-t <trackname>] [-r [<strand>]<chr>:<start>-<end> [-R]]\n\
+ [--jmatch <chr>:<start>-<end>] [--no-pseudo] \n\
  [-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>]\n\
- [--ids <IDs.lst> | --nids <IDs.lst>] [--attrs <attr-list>] [-i <maxintron>]\n\
+ [-j ][--ids <IDs.lst> | --nids <IDs.lst>] [--attrs <attr-list>] [-i <maxintron>]\n\
  [--stream] [--bed | --gtf | --tlf] [--table <attrlist>] [--sort-by <ref.lst>]\n\
  [<input_gff>] \n\n\
  Filter, convert or cluster GFF/GTF/BED records, extract the sequence of\n\
@@ -20,14 +21,15 @@ gffread [-g <genomic_seqs_fasta> | <dir>] [-s <seq_info.fsize>] \n\
  the basic attributes.\n\
  \n\
 Options:\n\
- -i   discard transcripts having an intron larger than <maxintron>\n\
  --ids discard records/transcripts if their IDs are not listed in <IDs.lst>\n\
  --nids discard records/transcripts if their IDs are listed in <IDs.lst>\n\
+ -i   discard transcripts having an intron larger than <maxintron>\n\
  -l   discard transcripts shorter than <minlen> bases\n\
  -r   only show transcripts overlapping coordinate range <start>..<end>\n\
       (on chromosome/contig <chr>, strand <strand> if provided)\n\
  -R   for -r option, discard all transcripts that are not fully \n\
       contained within the given range\n\
+ --jmatch only output transcripts matching the given junction\n\
  -U   discard single-exon transcripts\n\
  -C   coding only: discard mRNAs that have no CDS features\n\
  --nc non-coding only: discard mRNAs that have CDS features\n\
@@ -105,14 +107,14 @@ Output options:\n\
  -g   full path to a multi-fasta file with the genomic sequences\n\
       for all input mappings, OR a directory with single-fasta files\n\
       (one per genomic sequence, with file names matching sequence names)\n\
- -j    write a tab delimited file with all the junctions (intron coordinates)\n\
+ -j    output the junctions and the corresponding transcripts\n\
  -w    write a fasta file with spliced exons for each transcript\n\
  --w-add <N> for the -w option, extract additional <N> bases\n\
        both upstream and downstream of the transcript boundaries\n\
  --w-nocds for -w, disable the output of CDS info in the FASTA file\n\
  -x    write a fasta file with spliced CDS for each GFF transcript\n\
  -y    write a protein fasta file with the translation of CDS for each record\n\
- -W    for -w and -x options, write in the FASTA defline all the exon\n\
+ -W    for -w, -x and -y options, write in the FASTA defline all the exon\n\
        coordinates projected onto the spliced sequence;\n\
  -S    for -y option, use '*' instead of '.' as stop codon translation\n\
  -L    Ensembl GTF to GFF3 conversion, adds version to IDs\n\
@@ -252,11 +254,6 @@ void setTableFormat(GStr& s) {
     	  tableCols.Add(tcol);
     	  continue;
       }
-      if (w=="geneID" || w=="gene_id") {
-    	  CTableField tcol(ctfGFF_geneID);
-    	  tableCols.Add(tcol);
-    	  continue;
-      }
       if (w=="Parent") {
     	  CTableField tcol(ctfGFF_Parent);
     	  tableCols.Add(tcol);
@@ -392,7 +389,8 @@ void shutDown() {
 	seqinfo.Clear();
 	//if (faseq!=NULL) delete faseq;
 	//if (gcdb!=NULL) delete gcdb;
-	GFREE(rfltGSeq);
+	delete fltRange;
+	delete fltJunction;
 	FWCLOSE(f_out);
 	FWCLOSE(f_w);
 	FWCLOSE(f_x);
@@ -403,11 +401,15 @@ void shutDown() {
 int main(int argc, char* argv[]) {
  GArgs args(argc, argv,
    "version;debug;merge;stream;adj-stop;bed;in-bed;tlf;in-tlf;cluster-only;nc;cov-info;help;"
-    "sort-alpha;keep-genes;w-nocds;attrs=;w-add=;ids=;nids=0;gtf;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
+    "sort-alpha;keep-genes;w-nocds;attrs=;w-add=;ids=;nids=;jmatch=;gtf;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
     "ignore-locus;no-pseudo;table=sort-by=hvOUNHPWCVJMKQYTDARSZFGLEBm:g:i:r:s:l:t:o:w:x:y:j:d:");
  args.printError(USAGE, true);
  int numfiles = args.startNonOpt();
- if (args.getOpt('h') || args.getOpt("help") || ( numfiles==0 && !hasStdInput())) {
+ if (args.getOpt("version")) {
+    printf(VERSION"\n");
+    exit(0);
+ }
+ if (args.getOpt('h') || args.getOpt("help") || ( numfiles==0 && !haveStdInput())) {
     GMessage("%s",USAGE);
     exit(1);
  }
@@ -491,10 +493,6 @@ int main(int argc, char* argv[]) {
      fprintf(stderr, "Command line was:\n");
      args.printCmdLine(stderr);
      }
- if (args.getOpt("version")) {
-  printf(VERSION"\n");
-  exit(0);
- }
  gffloader.fullAttributes=(args.getOpt('F')!=NULL);
  gffloader.keep_AllExonAttrs=(args.getOpt("keep-exon-attrs")!=NULL);
  if (gffloader.keep_AllExonAttrs && !gffloader.fullAttributes) {
@@ -558,44 +556,22 @@ int main(int argc, char* argv[]) {
 	 gffloader.fullAttributes=true;
  }
  rfltWithin=(args.getOpt('R')!=NULL);
- s=args.getOpt('r');
- if (!s.is_empty()) {
-   s.trim();
-   if (s[0]=='+' || s[0]=='-') {
-     rfltStrand=s[0];
-     s.cut(0,1);
-     }
-   int isep=s.index(':');
-   if (isep>0) { //gseq name given
-      if (rfltStrand==0 && (s[isep-1]=='+' || s[isep-1]=='-')) {
-        isep--;
-        rfltStrand=s[isep];
-        s.cut(isep,1);
-        }
-      if (isep>0)
-          rfltGSeq=Gstrdup((s.substr(0,isep)).chars());
-      s.cut(0,isep+1);
-      }
-   GStr gsend;
-   char slast=s[s.length()-1];
-   if (rfltStrand==0 && (slast=='+' || slast=='-')) {
-      s.chomp(slast);
-      rfltStrand=slast;
-      }
-   if (s.index("..")>=0) gsend=s.split("..");
-                    else gsend=s.split('-');
-   if (!s.is_empty()) rfltStart=(uint)s.asInt();
-   if (!gsend.is_empty()) {
-      rfltEnd=(uint)gsend.asInt();
-      if (rfltEnd==0) rfltEnd=MAX_UINT;
-      }
-   } //gseq/range filtering
- else {
+ char* sz=args.getOpt('r');
+ if (sz) {
+	fltRange=new GRangeParser(sz);
+ 	if (fltRange->end==0) //end coordinate not given
+ 		fltRange->end=UINT_MAX;
+ } else {
    if (rfltWithin)
      GError("Error: option -R requires -r!\n");
-   //if (rfltWholeTranscript)
-   //  GError("Error: option -P requires -r!\n");
-   }
+ }
+ sz=args.getOpt("jmatch");
+ if (sz) {
+	//TODO: check if this is a file?
+	fltJunction=new GRangeParser(sz);
+	if (fltJunction->strand=='.') fltJunction->strand=0;
+ } //gseq/range filtering
+
  s=args.getOpt('m');
  if (!s.is_empty()) {
    FILE* ft=fopen(s,"r");
@@ -645,7 +621,7 @@ int main(int argc, char* argv[]) {
  if (f_w!=NULL && args.getOpt("w-nocds"))
 	 wfaNoCDS=true;
 
- if (f_out==NULL && f_w==NULL && f_x==NULL && f_y==NULL && !covInfo)
+ if (f_out==NULL && f_w==NULL && f_x==NULL && f_y==NULL && f_j==NULL && !covInfo)
 	 f_out=stdout;
 
  //if (f_y!=NULL || f_x!=NULL) wCDSonly=true;


=====================================
prep_source.sh
=====================================
@@ -14,7 +14,7 @@ libdir=$pack/gclib/
 cp LICENSE README.md gffread.cpp gff_utils.{h,cpp} $pack/
 sed 's|\.\./gclib|./gclib|' Makefile > $pack/Makefile
 
-cp ../gclib/{GVec,GList,GHashMap,khashl}.hh ../gclib/xxhash.h $libdir
+cp ../gclib/{GVec,GList,GHashMap,khashl}.hh ../gclib/xxhash.h ../gclib/wyhash.h ../gclib/GBitVec.h $libdir
 cp ../gclib/{GArgs,GBase,gdna,GStr,gff,codons,GFaSeqGet,GFastaIndex}.{h,cpp} $libdir
 tar cvfz $pack.tar.gz $pack
 ls -l $pack.tar.gz



View it on GitLab: https://salsa.debian.org/med-team/gffread/-/compare/b60b67e61264f6b52333bdea38f528e59e0e08ce...852c8655e87bcaf4d8a1f48edb5d2f9ee6c682fb

-- 
View it on GitLab: https://salsa.debian.org/med-team/gffread/-/compare/b60b67e61264f6b52333bdea38f528e59e0e08ce...852c8655e87bcaf4d8a1f48edb5d2f9ee6c682fb
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211013/df268337/attachment-0001.htm>


More information about the debian-med-commit mailing list