[med-svn] [Git][med-team/gffread][master] 5 commits: New upstream version 0.12.1

Steffen Möller gitlab at salsa.debian.org
Mon Aug 10 11:49:02 BST 2020



Steffen Möller pushed to branch master at Debian Med / gffread


Commits:
e5cb5692 by Steffen Moeller at 2020-08-08T22:51:40+02:00
New upstream version 0.12.1
- - - - -
1e2579c4 by Steffen Moeller at 2020-08-08T22:51:40+02:00
routine-update: New upstream version

- - - - -
9dbff771 by Steffen Moeller at 2020-08-08T22:51:43+02:00
Update upstream source from tag 'upstream/0.12.1'

Update to upstream version '0.12.1'
with Debian dir 6ced4dedde8d40954843cc472a057b848a356ed9
- - - - -
b8893762 by Steffen Moeller at 2020-08-08T22:51:43+02:00
routine-update: debhelper-compat 13

- - - - -
d719fba1 by Steffen Moeller at 2020-08-10T12:45:36+02:00
Preparing for new upstream version.

- - - - -


17 changed files:

- Makefile
- README.md
- debian/changelog
- debian/control
- debian/patches/gclib.patch
- debian/patches/hardening
- + examples/README.md
- + examples/annotation.gff
- + examples/genome.fa
- + examples/output/ann_simple.gff
- + examples/output/annotation.gtf
- + examples/output/transcripts.fa
- + examples/transcripts.gtf
- gff_utils.cpp
- gff_utils.h
- gffread.cpp
- tag_git.sh


Changes:

=====================================
Makefile
=====================================
@@ -13,7 +13,7 @@ BASEFLAGS  := -Wall -Wextra ${SEARCHDIRS} -D_FILE_OFFSET_BITS=64 \
 -D_LARGEFILE_SOURCE -D_REENTRANT -fno-strict-aliasing \
  -std=c++0x -fno-exceptions -fno-rtti
 
-GCCV8 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 8)
+GCCV8 := $(shell expr `${CXX} -dumpversion | cut -f1 -d.` \>= 8)
 ifeq "$(GCCV8)" "1"
  BASEFLAGS += -Wno-class-memaccess
 endif
@@ -34,12 +34,12 @@ else
   ifneq (,$(filter %memcheck %memdebug, $(MAKECMDGOALS)))
      #use sanitizer in gcc 4.9+
      MEMCHECK_BUILD := 1
-     GCCVER49 := $(shell expr `g++ -dumpversion | cut -f1,2 -d.` \>= 4.9)
+     GCCVER49 := $(shell expr `${CXX} -dumpversion | cut -f1,2 -d.` \>= 4.9)
      ifeq "$(GCCVER49)" "0"
        $(error gcc version 4.9 or greater is required for this build target)
      endif
      CXXFLAGS += -fno-omit-frame-pointer -fsanitize=undefined -fsanitize=address
-     GCCVER5 := $(shell expr `g++ -dumpversion | cut -f1 -d.` \>= 5)
+     GCCVER5 := $(shell expr `${CXX} -dumpversion | cut -f1 -d.` \>= 5)
      ifeq "$(GCCVER5)" "1"
        CXXFLAGS += -fsanitize=bounds -fsanitize=float-divide-by-zero -fsanitize=vptr
        CXXFLAGS += -fsanitize=float-cast-overflow -fsanitize=object-size
@@ -75,8 +75,10 @@ OBJS := ${GCLDIR}/GBase.o ${GCLDIR}/GArgs.o ${GCLDIR}/GFaSeqGet.o \
  
 .PHONY : all
 
-nodebug: release
-all release debug memcheck memdebug profile gprof prof: gffread
+all release debug memcheck memdebug profile gprof prof: ../gclib gffread
+
+../gclib:
+	git clone https://github.com/gpertea/gclib.git ../gclib
 
 $(OBJS) : $(GCLDIR)/GBase.h $(GCLDIR)/gff.h
 gffread.o : gff_utils.h $(GCLDIR)/GBase.h $(GCLDIR)/gff.h


=====================================
README.md
=====================================
@@ -1,18 +1,25 @@
-# gffread
-GFF/GTF parsing utility providing format conversions, region filtering, 
-FASTA sequence extraction and more.
+## GffRead 
 
-Use gffread -h to check the usage options.
+GFF/GTF utility providing format conversions, filtering, FASTA sequence 
+extraction and more.
 
-Compiling this program from source requires the [GCLib](../../../gclib) code 
-library. Building the program can be done like this:
+More details and usage examples can be found in the paper [DOI: 10.12688/f1000research.23297.1](http://dx.doi.org/10.12688/f1000research.23297.1) which can be also used to cite this software.
+
+The official webpage with download packages for this utility can be found online here: 
+ http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+
+Use `gffread -h` to see the command line usage options.
+
+## Installation
+Building this program from source requires the [GCLib](../../../gclib) source code 
+library. The `make` command should automatically fetch the latest gclib version from the repository if no `../gclib` directory is found.
 
 ```
   cd /some/build/dir
-  git clone https://github.com/gpertea/gclib
   git clone https://github.com/gpertea/gffread
   cd gffread
   make release
 ```
+This should create the **gffread** binary in the current directory.
+
 
-This should build the **gffread** binary in the current directory.


=====================================
debian/changelog
=====================================
@@ -1,3 +1,14 @@
+gffread (0.12.1-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * debhelper-compat 13 (routine-update)
+  * Build-depends on newer version of libgclib.
+  * Don't depend on creating a checkout of gclib during build
+  * Added myself to uploaders.
+
+ -- Steffen Moeller <moeller at debian.org>  Sat, 08 Aug 2020 22:51:47 +0200
+
 gffread (0.11.8-1) unstable; urgency=medium
 
   * Adjust the autopkgtests to exclude the spades dependency on !amd64


=====================================
debian/control
=====================================
@@ -1,11 +1,12 @@
 Source: gffread
 Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
 Uploaders: Andreas Tille <tille at debian.org>,
-           Michael R. Crusoe <crusoe at debian.org>
+           Michael R. Crusoe <crusoe at debian.org>,
+           Steffen Moeller <moeller at debian.org>
 Section: science
 Priority: optional
-Build-Depends: debhelper-compat (= 12),
-               libgclib-dev
+Build-Depends: debhelper-compat (= 13),
+               libgclib-dev (>= 0.11.10)
 Standards-Version: 4.5.0
 Vcs-Browser: https://salsa.debian.org/med-team/gffread
 Vcs-Git: https://salsa.debian.org/med-team/gffread.git


=====================================
debian/patches/gclib.patch
=====================================
@@ -2,9 +2,20 @@ Author: Andreas Tille <tille at debian.org>
 Last-Update: Thu, 18 Apr 2019 12:40:25 +0200
 Description: Fix build against libgclib
 
+Index: gffread/Makefile
+===================================================================
 --- gffread.orig/Makefile
 +++ gffread/Makefile
-@@ -83,8 +83,8 @@
+@@ -75,7 +75,7 @@ OBJS := ${GCLDIR}/GBase.o ${GCLDIR}/GArg
+  
+ .PHONY : all
+ 
+-all release debug memcheck memdebug profile gprof prof: ../gclib gffread
++all release debug memcheck memdebug profile gprof prof: gffread
+ 
+ ../gclib:
+ 	git clone https://github.com/gpertea/gclib.git ../gclib
+@@ -85,8 +85,8 @@ gffread.o : gff_utils.h $(GCLDIR)/GBase.
  gff_utils.o : gff_utils.h $(GCLDIR)/gff.h
  ${GCLDIR}/gff.o : ${GCLDIR}/gff.h ${GCLDIR}/GFaSeqGet.h ${GCLDIR}/GList.hh ${GCLDIR}/GHash.hh
  ${GCLDIR}/GFaSeqGet.o : ${GCLDIR}/GFaSeqGet.h


=====================================
debian/patches/hardening
=====================================
@@ -2,9 +2,11 @@ From: Michael R. Crusoe <michael.crusoe at gmail.com>
 Subject: Use CPPFLAGS
 
 Allows Debian to harden the binary with CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2
+Index: gffread/Makefile
+===================================================================
 --- gffread.orig/Makefile
 +++ gffread/Makefile
-@@ -65,7 +65,7 @@
+@@ -65,7 +65,7 @@ endif
  #endif
  
  %.o : %.cpp
@@ -13,7 +15,7 @@ Allows Debian to harden the binary with CPPFLAGS=-Wdate-time -D_FORTIFY_SOURCE=2
  
  # C/C++ linker
  
-@@ -84,7 +84,7 @@
+@@ -86,7 +86,7 @@ gff_utils.o : gff_utils.h $(GCLDIR)/gff.
  ${GCLDIR}/gff.o : ${GCLDIR}/gff.h ${GCLDIR}/GFaSeqGet.h ${GCLDIR}/GList.hh ${GCLDIR}/GHash.hh
  ${GCLDIR}/GFaSeqGet.o : ${GCLDIR}/GFaSeqGet.h
  gffread: gffread.o gff_utils.o


=====================================
examples/README.md
=====================================
@@ -0,0 +1,29 @@
+## GffRead usage examples
+
+GffRead can be used to simply read an annotation file in a GFF format, and print it in either GFF3 (default) or 
+GTF2 format (with the -T option), while discarding any non-trasncript features and optional attributes. 
+It can also report some potential issues found in the input GFF records. The command line for such a quick GFF/GTF 
+file cleanup would be:
+```
+gffread -E annotation.gff -o ann_simple.gff
+```
+
+This will create a minimalist GFF3 re-formatting of the transcript records found in the input file (`annotation.gff` in this example). 
+The -E option directs GffRead to "expose" (display warnings about) any potential formatting issues 
+encountered while parsing the input file.
+
+In order to obtain the GTF2 version of the same transcript records, the `-T` option should be added:
+```
+gffread annotation.gff -T -o annotation.gtf
+```
+
+GffRead can be used to generate a FASTA file with the DNA sequences for all transcripts in a GFF file. For this operation 
+a fasta file with the genomic sequences has to be provided as well. This can be accomplished with a command line like this:
+```
+gffread -w transcripts.fa -g genome.fa annotation.gff
+```
+The file `genome.fa` in this example would be a multi-fasta file with the chromosome/contig sequences of the target genome. 
+This also requires that every contig or chromosome name found in the 1st column of the input GFF file 
+(`annotation.gff` in this example) must have a corresponding sequence entry in the `genome.fa` file.
+
+The `output` directory contains all the output files that should be generated by the above examples.


=====================================
examples/annotation.gff
=====================================
@@ -0,0 +1,86 @@
+NT_187562.1	BestRefSeq	gene	411	68627	.	+	.	ID=gene55473;Dbxref=GeneID:8972,HGNC:HGNC:7043,MIM:154360;Name=MGAM;description=maltase-glucoamylase;gbkey=Gene;gene=MGAM;gene_biotype=protein_coding;gene_synonym=MG,MGA;partial=true;start_range=.,411
+NT_187562.1	BestRefSeq	mRNA	411	68627	.	+	.	ID=rna157470;Parent=gene55473;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Name=NM_004668.2;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;start_range=.,411;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	411	495	.	+	.	ID=id1808900;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;start_range=.,411;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	1995	2051	.	+	.	ID=id1808901;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	2602	2726	.	+	.	ID=id1808902;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	9665	9753	.	+	.	ID=id1808903;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	12115	12164	.	+	.	ID=id1808904;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	12577	12744	.	+	.	ID=id1808905;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	14174	14326	.	+	.	ID=id1808906;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	14664	14864	.	+	.	ID=id1808907;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	16634	16788	.	+	.	ID=id1808908;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	17438	17606	.	+	.	ID=id1808909;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	17880	17976	.	+	.	ID=id1808910;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	18710	18822	.	+	.	ID=id1808911;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	20083	20208	.	+	.	ID=id1808912;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	21352	21480	.	+	.	ID=id1808913;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	21736	21846	.	+	.	ID=id1808914;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	22191	22253	.	+	.	ID=id1808915;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	24448	24582	.	+	.	ID=id1808916;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	25379	25466	.	+	.	ID=id1808917;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	26264	26402	.	+	.	ID=id1808918;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	27215	27348	.	+	.	ID=id1808919;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	56500	56534	.	+	.	ID=id1808920;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	56627	56743	.	+	.	ID=id1808921;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	57445	57593	.	+	.	ID=id1808922;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	58211	58295	.	+	.	ID=id1808923;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	59473	59529	.	+	.	ID=id1808924;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	61493	61617	.	+	.	ID=id1808925;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	62682	62770	.	+	.	ID=id1808926;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	64510	64559	.	+	.	ID=id1808927;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	65149	65319	.	+	.	ID=id1808928;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	exon	67694	68627	.	+	.	ID=id1808929;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NM_004668.2,HGNC:HGNC:7043,MIM:154360;Note=The RefSeq transcript aligns at 64%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=MGAM;partial=true;product=maltase-glucoamylase;transcript_id=NM_004668.2
+NT_187562.1	BestRefSeq	CDS	411	495	.	+	1	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2;start_range=.,411
+NT_187562.1	BestRefSeq	CDS	1995	2051	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	2602	2726	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	9665	9753	.	+	1	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	12115	12164	.	+	2	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	12577	12744	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	14174	14326	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	14664	14864	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	16634	16788	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	17438	17606	.	+	1	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	17880	17976	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	18710	18822	.	+	2	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	20083	20208	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	21352	21480	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	21736	21846	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	22191	22253	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	24448	24582	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	25379	25466	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	26264	26402	.	+	2	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	27215	27348	.	+	1	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	56500	56534	.	+	2	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	56627	56743	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	57445	57593	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	58211	58295	.	+	1	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	59473	59529	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	61493	61617	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	62682	62770	.	+	1	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	64510	64559	.	+	2	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	65149	65319	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	BestRefSeq	CDS	67694	67771	.	+	0	ID=cds110496;Parent=rna157470;Dbxref=GeneID:8972,Genbank:NP_004659.2,HGNC:HGNC:7043,MIM:154360;Name=NP_004659.2;Note=The RefSeq protein aligns at 59%25 coverage compared to this genomic sequence;exception=annotated by transcript or proteomic data;gbkey=CDS;gene=MGAM;partial=true;product=maltase-glucoamylase%2C intestinal;protein_id=NP_004659.2
+NT_187562.1	Curated Genomic	gene	214038	219958	.	-	.	ID=gene55476;Dbxref=GeneID:136541,HGNC:HGNC:39125;Name=PRSS58;description=protease%2C serine 58;gbkey=Gene;gene=PRSS58;gene_biotype=protein_coding;gene_synonym=PRSS1,TRY1,TRYX3,UNQ2540
+NT_187562.1	Curated Genomic	mRNA	214038	219958	.	-	.	ID=rna157473;Parent=gene55476;Dbxref=GeneID:136541,Genbank:NM_001001317.4,HGNC:HGNC:39125;Name=NM_001001317.4;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=PRSS58;product=protease%2C serine 58;transcript_id=NM_001001317.4
+NT_187562.1	Curated Genomic	exon	219891	219958	.	-	.	ID=id1808989;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NM_001001317.4,HGNC:HGNC:39125;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=PRSS58;product=protease%2C serine 58;transcript_id=NM_001001317.4
+NT_187562.1	Curated Genomic	exon	219568	219648	.	-	.	ID=id1808990;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NM_001001317.4,HGNC:HGNC:39125;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=PRSS58;product=protease%2C serine 58;transcript_id=NM_001001317.4
+NT_187562.1	Curated Genomic	exon	217435	217573	.	-	.	ID=id1808991;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NM_001001317.4,HGNC:HGNC:39125;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=PRSS58;product=protease%2C serine 58;transcript_id=NM_001001317.4
+NT_187562.1	Curated Genomic	exon	216955	217211	.	-	.	ID=id1808992;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NM_001001317.4,HGNC:HGNC:39125;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=PRSS58;product=protease%2C serine 58;transcript_id=NM_001001317.4
+NT_187562.1	Curated Genomic	exon	214372	214511	.	-	.	ID=id1808993;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NM_001001317.4,HGNC:HGNC:39125;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=PRSS58;product=protease%2C serine 58;transcript_id=NM_001001317.4
+NT_187562.1	Curated Genomic	exon	214038	214270	.	-	.	ID=id1808994;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NM_001001317.4,HGNC:HGNC:39125;exception=annotated by transcript or proteomic data;gbkey=mRNA;gene=PRSS58;product=protease%2C serine 58;transcript_id=NM_001001317.4
+NT_187562.1	Curated Genomic	CDS	219568	219607	.	-	0	ID=cds110498;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NP_001001317.1,HGNC:HGNC:39125;Name=NP_001001317.1;gbkey=CDS;gene=PRSS58;product=serine protease 58 precursor;protein_id=NP_001001317.1
+NT_187562.1	Curated Genomic	CDS	217435	217573	.	-	2	ID=cds110498;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NP_001001317.1,HGNC:HGNC:39125;Name=NP_001001317.1;gbkey=CDS;gene=PRSS58;product=serine protease 58 precursor;protein_id=NP_001001317.1
+NT_187562.1	Curated Genomic	CDS	216955	217211	.	-	1	ID=cds110498;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NP_001001317.1,HGNC:HGNC:39125;Name=NP_001001317.1;gbkey=CDS;gene=PRSS58;product=serine protease 58 precursor;protein_id=NP_001001317.1
+NT_187562.1	Curated Genomic	CDS	214372	214511	.	-	2	ID=cds110498;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NP_001001317.1,HGNC:HGNC:39125;Name=NP_001001317.1;gbkey=CDS;gene=PRSS58;product=serine protease 58 precursor;protein_id=NP_001001317.1
+NT_187562.1	Curated Genomic	CDS	214121	214270	.	-	0	ID=cds110498;Parent=rna157473;Dbxref=GeneID:136541,Genbank:NP_001001317.1,HGNC:HGNC:39125;Name=NP_001001317.1;gbkey=CDS;gene=PRSS58;product=serine protease 58 precursor;protein_id=NP_001001317.1
+NT_187562.1	BestRefSeq	gene	962573	963937	.	+	.	ID=gene55581;Dbxref=GeneID:135927,HGNC:HGNC:21750;Name=C7orf34;description=chromosome 7 open reading frame 34;gbkey=Gene;gene=C7orf34;gene_biotype=protein_coding;gene_synonym=ctm-1
+NT_187562.1	BestRefSeq	mRNA	962573	963937	.	+	.	ID=rna157497;Parent=gene55581;Dbxref=GeneID:135927,Genbank:NM_178829.4,HGNC:HGNC:21750;Name=NM_178829.4;gbkey=mRNA;gene=C7orf34;product=chromosome 7 open reading frame 34;transcript_id=NM_178829.4
+NT_187562.1	BestRefSeq	exon	962573	962821	.	+	.	ID=id1809397;Parent=rna157497;Dbxref=GeneID:135927,Genbank:NM_178829.4,HGNC:HGNC:21750;gbkey=mRNA;gene=C7orf34;product=chromosome 7 open reading frame 34;transcript_id=NM_178829.4
+NT_187562.1	BestRefSeq	exon	963419	963937	.	+	.	ID=id1809398;Parent=rna157497;Dbxref=GeneID:135927,Genbank:NM_178829.4,HGNC:HGNC:21750;gbkey=mRNA;gene=C7orf34;product=chromosome 7 open reading frame 34;transcript_id=NM_178829.4
+NT_187562.1	BestRefSeq	CDS	962614	962821	.	+	0	ID=cds110593;Parent=rna157497;Dbxref=GeneID:135927,Genbank:NP_849151.2,HGNC:HGNC:21750;Name=NP_849151.2;gbkey=CDS;gene=C7orf34;product=uncharacterized protein C7orf34;protein_id=NP_849151.2
+NT_187562.1	BestRefSeq	CDS	963419	963654	.	+	2	ID=cds110593;Parent=rna157497;Dbxref=GeneID:135927,Genbank:NP_849151.2,HGNC:HGNC:21750;Name=NP_849151.2;gbkey=CDS;gene=C7orf34;product=uncharacterized protein C7orf34;protein_id=NP_849151.2
+NT_187562.1	Curated Genomic	gene	230181	234148	.	-	.	ID=gene55477;Dbxref=GeneID:207147;Name=TRY2P;description=trypsinogen-like pseudogene;gbkey=Gene;gene=TRY2P;gene_biotype=misc_RNA;pseudo=true
+NT_187562.1	Curated Genomic	transcript	230181	234148	.	-	.	ID=rna157474;Parent=gene55477;Dbxref=GeneID:207147,Genbank:NR_036483.2;Name=NR_036483.2;exception=annotated by transcript or proteomic data;gbkey=misc_RNA;gene=TRY2P;product=trypsinogen-like pseudogene;transcript_id=NR_036483.2
+NT_187562.1	Curated Genomic	exon	233852	234148	.	-	.	ID=id1808995;Parent=rna157474;Dbxref=GeneID:207147,Genbank:NR_036483.2;exception=annotated by transcript or proteomic data;gbkey=misc_RNA;gene=TRY2P;product=trypsinogen-like pseudogene;transcript_id=NR_036483.2
+NT_187562.1	Curated Genomic	exon	233047	233200	.	-	.	ID=id1808996;Parent=rna157474;Dbxref=GeneID:207147,Genbank:NR_036483.2;exception=annotated by transcript or proteomic data;gbkey=misc_RNA;gene=TRY2P;product=trypsinogen-like pseudogene;transcript_id=NR_036483.2
+NT_187562.1	Curated Genomic	exon	230181	231784	.	-	.	ID=id1808997;Parent=rna157474;Dbxref=GeneID:207147,Genbank:NR_036483.2;exception=annotated by transcript or proteomic data;gbkey=misc_RNA;gene=TRY2P;product=trypsinogen-like pseudogene;transcript_id=NR_036483.2


=====================================
examples/genome.fa
=====================================
The diff for this file was not included because it is too large.

=====================================
examples/output/ann_simple.gff
=====================================
@@ -0,0 +1,85 @@
+# gffread -E annotation.gff -o ann_simple.gff
+# gffread v0.11.8
+##gff-version 3
+NT_187562.1	BestRefSeq	mRNA	411	68627	.	+	.	ID=rna157470;geneID=gene55473;gene_name=MGAM
+NT_187562.1	BestRefSeq	exon	411	495	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	1995	2051	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	2602	2726	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	9665	9753	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	12115	12164	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	12577	12744	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	14174	14326	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	14664	14864	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	16634	16788	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	17438	17606	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	17880	17976	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	18710	18822	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	20083	20208	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	21352	21480	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	21736	21846	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	22191	22253	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	24448	24582	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	25379	25466	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	26264	26402	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	27215	27348	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	56500	56534	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	56627	56743	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	57445	57593	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	58211	58295	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	59473	59529	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	61493	61617	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	62682	62770	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	64510	64559	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	65149	65319	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	exon	67694	68627	.	+	.	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	411	495	.	+	1	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	1995	2051	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	2602	2726	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	9665	9753	.	+	1	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	12115	12164	.	+	2	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	12577	12744	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	14174	14326	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	14664	14864	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	16634	16788	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	17438	17606	.	+	1	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	17880	17976	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	18710	18822	.	+	2	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	20083	20208	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	21352	21480	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	21736	21846	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	22191	22253	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	24448	24582	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	25379	25466	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	26264	26402	.	+	2	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	27215	27348	.	+	1	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	56500	56534	.	+	2	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	56627	56743	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	57445	57593	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	58211	58295	.	+	1	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	59473	59529	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	61493	61617	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	62682	62770	.	+	1	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	64510	64559	.	+	2	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	65149	65319	.	+	0	Parent=rna157470
+NT_187562.1	BestRefSeq	CDS	67694	67771	.	+	0	Parent=rna157470
+NT_187562.1	Curated Genomic	mRNA	214038	219958	.	-	.	ID=rna157473;geneID=gene55476;gene_name=PRSS58
+NT_187562.1	Curated Genomic	exon	214038	214270	.	-	.	Parent=rna157473
+NT_187562.1	Curated Genomic	exon	214372	214511	.	-	.	Parent=rna157473
+NT_187562.1	Curated Genomic	exon	216955	217211	.	-	.	Parent=rna157473
+NT_187562.1	Curated Genomic	exon	217435	217573	.	-	.	Parent=rna157473
+NT_187562.1	Curated Genomic	exon	219568	219648	.	-	.	Parent=rna157473
+NT_187562.1	Curated Genomic	exon	219891	219958	.	-	.	Parent=rna157473
+NT_187562.1	Curated Genomic	CDS	214121	214270	.	-	0	Parent=rna157473
+NT_187562.1	Curated Genomic	CDS	214372	214511	.	-	2	Parent=rna157473
+NT_187562.1	Curated Genomic	CDS	216955	217211	.	-	1	Parent=rna157473
+NT_187562.1	Curated Genomic	CDS	217435	217573	.	-	2	Parent=rna157473
+NT_187562.1	Curated Genomic	CDS	219568	219607	.	-	0	Parent=rna157473
+NT_187562.1	Curated Genomic	transcript	230181	234148	.	-	.	ID=rna157474;geneID=gene55477;gene_name=TRY2P
+NT_187562.1	Curated Genomic	exon	230181	231784	.	-	.	Parent=rna157474
+NT_187562.1	Curated Genomic	exon	233047	233200	.	-	.	Parent=rna157474
+NT_187562.1	Curated Genomic	exon	233852	234148	.	-	.	Parent=rna157474
+NT_187562.1	BestRefSeq	mRNA	962573	963937	.	+	.	ID=rna157497;geneID=gene55581;gene_name=C7orf34
+NT_187562.1	BestRefSeq	exon	962573	962821	.	+	.	Parent=rna157497
+NT_187562.1	BestRefSeq	exon	963419	963937	.	+	.	Parent=rna157497
+NT_187562.1	BestRefSeq	CDS	962614	962821	.	+	0	Parent=rna157497
+NT_187562.1	BestRefSeq	CDS	963419	963654	.	+	2	Parent=rna157497


=====================================
examples/output/annotation.gtf
=====================================
@@ -0,0 +1,82 @@
+NT_187562.1	BestRefSeq	transcript	411	68627	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	411	495	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	1995	2051	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	2602	2726	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	9665	9753	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	12115	12164	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	12577	12744	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	14174	14326	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	14664	14864	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	16634	16788	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	17438	17606	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	17880	17976	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	18710	18822	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	20083	20208	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	21352	21480	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	21736	21846	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	22191	22253	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	24448	24582	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	25379	25466	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	26264	26402	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	27215	27348	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	56500	56534	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	56627	56743	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	57445	57593	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	58211	58295	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	59473	59529	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	61493	61617	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	62682	62770	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	64510	64559	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	65149	65319	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	exon	67694	68627	.	+	.	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	411	495	.	+	1	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	1995	2051	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	2602	2726	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	9665	9753	.	+	1	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	12115	12164	.	+	2	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	12577	12744	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	14174	14326	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	14664	14864	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	16634	16788	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	17438	17606	.	+	1	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	17880	17976	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	18710	18822	.	+	2	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	20083	20208	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	21352	21480	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	21736	21846	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	22191	22253	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	24448	24582	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	25379	25466	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	26264	26402	.	+	2	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	27215	27348	.	+	1	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	56500	56534	.	+	2	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	56627	56743	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	57445	57593	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	58211	58295	.	+	1	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	59473	59529	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	61493	61617	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	62682	62770	.	+	1	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	64510	64559	.	+	2	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	65149	65319	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	BestRefSeq	CDS	67694	67771	.	+	0	transcript_id "rna157470"; gene_id "gene55473"; gene_name "MGAM";
+NT_187562.1	Curated Genomic	transcript	214038	219958	.	-	.	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	exon	214038	214270	.	-	.	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	exon	214372	214511	.	-	.	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	exon	216955	217211	.	-	.	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	exon	217435	217573	.	-	.	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	exon	219568	219648	.	-	.	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	exon	219891	219958	.	-	.	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	CDS	214121	214270	.	-	0	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	CDS	214372	214511	.	-	2	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	CDS	216955	217211	.	-	1	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	CDS	217435	217573	.	-	2	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	CDS	219568	219607	.	-	0	transcript_id "rna157473"; gene_id "gene55476"; gene_name "PRSS58";
+NT_187562.1	Curated Genomic	transcript	230181	234148	.	-	.	transcript_id "rna157474"; gene_id "gene55477"; gene_name "TRY2P";
+NT_187562.1	Curated Genomic	exon	230181	231784	.	-	.	transcript_id "rna157474"; gene_id "gene55477"; gene_name "TRY2P";
+NT_187562.1	Curated Genomic	exon	233047	233200	.	-	.	transcript_id "rna157474"; gene_id "gene55477"; gene_name "TRY2P";
+NT_187562.1	Curated Genomic	exon	233852	234148	.	-	.	transcript_id "rna157474"; gene_id "gene55477"; gene_name "TRY2P";
+NT_187562.1	BestRefSeq	transcript	962573	963937	.	+	.	transcript_id "rna157497"; gene_id "gene55581"; gene_name "C7orf34";
+NT_187562.1	BestRefSeq	exon	962573	962821	.	+	.	transcript_id "rna157497"; gene_id "gene55581"; gene_name "C7orf34";
+NT_187562.1	BestRefSeq	exon	963419	963937	.	+	.	transcript_id "rna157497"; gene_id "gene55581"; gene_name "C7orf34";
+NT_187562.1	BestRefSeq	CDS	962614	962821	.	+	0	transcript_id "rna157497"; gene_id "gene55581"; gene_name "C7orf34";
+NT_187562.1	BestRefSeq	CDS	963419	963654	.	+	2	transcript_id "rna157497"; gene_id "gene55581"; gene_name "C7orf34";


=====================================
examples/output/transcripts.fa
=====================================
@@ -0,0 +1,119 @@
+>rna157470 CDS=2-3343
+GTTCTACGAGGACAACAGCACTTGGGATGTGCACCAACAGTTCTTATGGGGGCCCGGCCTCCTCATCACT
+CCAGTTCTGGATGAAGGTGCAGAGAAAGTGATGGCATATGTGCCTGATGCTGTCTGGTATGACTACGAGA
+CTGGGAGCCAAGTGAGATGGAGGAAGCAAAAAGTCGAGATGGAACTTCCTGGAGACAAAATTGGACTTCA
+CCTTCGAGGAGGCTACATCTTCCCCACACAGCAGCCAAATACAACCACTCTGGCCAGTCGAAAGAACCCT
+CTTGGTCTTATCATTGCCCTAGATGAGAACAAAGAAGCAAAAGGAGAACTTTTCTGGGATAATGGGGAAA
+CGAAGGATACTGTGGCCAATAAAGTGTATCTTTTATGTGAGTTTTCTGTCACTCAAAACCGCTTGGAGGT
+GAATATTTCACAATCAACCTACAAGGACCCCAATAATTTAGCATTTAATGAGATTAAAATTCTTGGGACG
+GAGGAACCTAGCAATGTTACAGTGAAACACAATGGTGTCCCAAGTCAGACTTCTCCTACAGTCACTTATG
+ATTCTAACCTGAAGGTTGCCATTATCACAGATATTGATCTTCTCCTGGGAGAAGCATACACAGTGGAATG
+GAGCATAAAGATAAGGGATGAAGAAAAAATAGACTGTTACCCTGATGAGAATGGTGCTTCTGCCGAAAAC
+TGCACTGCCCGTGGCTGTATCTGGGAGGCATCCAATTCTTCTGGAGTCCCTTTTTGCTATTTTGTCAACG
+ACCTATACTCTGTCAGTGATGTTCAGTATAATTCCCATGGGGCCACAGCTGACATCTCCTTAAAGTCTTC
+CGTTTATGCCAATGCCTTCCCCTCCACACCCGTGAACCCCCTTCGCCTGGATGTCACTTACCATAAGAAT
+GAAATGCTGCAGTTCAAGATTTATGATCCCAACAAGAATCGGTATGAAGTTCCAGTCCCTCTGAACATAC
+CCAGCATGCCATCCAGCACCCCTGAGGGTCAACTCTATGATGTGCTCATTAAGAAGAATCCATTTGGGAT
+TGAAATTCGCCGGAAGAGTACAGGCACTATAATTTGGGACTCTCAGCTCCTTGGCTTTACCTTCAGTGAC
+ATGTTTATCCGCATCTCCACCCGCCTTCCCTCCAAGTACCTCTATGGCTTTGGGGAAACTGAGCACAGGT
+CCTATAGGAGAGACTTGGAGTGGCACACTTGGGGGATGTTCTCCCGAGACCAGCCCCCAGGGTACAAGAA
+GAATTCCTATGGTGTCCACCCCTACTACATGGGGCTGGAGGAGGACGGCAGTGCCCATGGAGTGCTCCTG
+CTGAACAGCAATGCCATGGATGTGACGTTCCAGCCCCTGCCTGCCTTGACATACCGCACCACAGGGGGAG
+TTCTGGACTTTTATGTGTTCTTGGGGCCGACTCCAGAGCTTGTCACCCAGCAGTACACTGAGTTGATTGG
+CCGGCCTGTGATGGTACCTTACTGGTCTTTGGGGTTCCAGCTGTGTCGCTATGGCTACCAGAATGACTCT
+GAGATCGCCAGCTTGTATGATGAGATGGTGGCTGCCCAGATCCCTTATGATGTGCAGTACTCAGACATCG
+ACTACATGGAGCGGCAGCTGGACTTCACCCTCAGCCCCAAGTTTGCTGGGTTTCCAGCTCTGATCAATCG
+CATGAAGGCTGATGGGATGCGGGTCATCCTCATTCTGGATCCAGCCATTTCTGGCAATGAGACACAGCCT
+TATCCTGCCTTCACTCGGGGCGTGGAGGATGACGTCTTCATCAAATACCCAAATGATGGAGACATTGTCT
+GGGGAAAGGTCTGGCCTGATTTTCCTGATGTTGTTGTGAATGGGTCTCTAGACTGGGACAGCCAAGTGGA
+GCTATATCGAGCTTATGTGGCCTTCCCAGACTTTTTCCGTAATTCAACTGCCAAGTGGTGGAAGAGGGAA
+ATAGAAGAACTATACAACAATCCACAGAATCCAGAGAGGAGCTTGAAGTTTGATGGCATGTGGATTGATA
+TGAATGAACCATCAAGCTTCGTGAATGGGGCAGTTTCTCCAGGCTGCAGGGACGCCTCTCTGAACCACCC
+TCCCTACATGCCACATTTGGAGTCCAGGGACAGGGGCCTGAGCAGCAAGACCCTTTGTATGGAGAGTCAG
+CAGATCCTCCCAGACGGCTCCCTGGTGCAGCACTACAACGTGCACAACCTGTATGGGTGGTCCCAGACCA
+GACCCACATACGAAGCCGTGCAGGAGGTGACGGGACAGCGAGGGGTCGTCATCACCCGCTCCACATTTCC
+CTCTTCTGGCCGCTGGGCAGGACATTGGCTGGGAGACAACACGGCCGCATGGGATCAGCTGAAGAAGTCT
+ATCATTGGCATGATGGAGTTCAGCCTCTTCGGCATATCCTATACGGGAGCAGATATCTGTGGGTTCTTTC
+AAGATGCTGAATATGAGATGTGTGTTCGCTGGATGCAGCTGGGGGCCTTTTACCCCTTCTCAAGAAACCA
+CAACACCATTGGGACCAGGAGACAAGACCCTGTGTCCTGGGATGTTGCTTTTGTGAATATTTCCAGAACT
+GTCCTGCAGACCAGATACACCCTGTTGCCATATCTGTATACCTTGATGCATAAGGCCCACACGGAGGGCG
+TCACTGTTGTGCGGCCTCTGCTCCATGAGTTTGTGTCAGACCAGGTGACATGGGACATAGACAGTCAGTT
+CCTGCTGGGCCCAGCCTTCCTGGTCAGCCCTGTCCTGGAGCGTAATGCCAGAAATGTCACTGCATATTTC
+CCTAGAGCCCGCTGGTATGATTACTACACGGGTGTGGATATTAATGCAAGAGGAGAGTGGAAGACCTTGC
+CAGCCCCTCTTGACCACATTAATCTTCATGTCCGTGGGGGCTACATCCTGCCCTGGCAAGAGCCTGCACT
+GAACACCCACTTAAGCCGCCAGAAATTCATGGGCTTCAAAATTGCCTTGGATGATGAAGGAACTGCTGGG
+GGCTGGCTCTTCTGGGATGATGGGCAAAGCATTGATACCTATGGGAAAGGACTCTATTACTTGGCCAGCT
+TTTCTGCCAGCCAGAATACGATGCAAAGCCATATAATTTTCAACAATTACATCACTGGTACAAATCCTTT
+GAAACTGGGCTACATTGAAATCTGGGGAGTGGGCAGTGTCCCCGTTACCAGTGTCAGCATCTCTGTGAGT
+GGCATGGTCATAACACCCTCCTTCAACAATGACCCCACGACACAGGTATTAAGCATCGATGTGACTGACA
+GAAACATCAGCCTACATAATTTTACTTCATTGACGTGGATAAGCACTCTGTGAATTTTTACAGCAAGATT
+CTAACTAACTATGAATGACTTTGAAACTACTTATACTTCATACTCATAAAAATTATTGTGTGTTGCTAAT
+TTGTTCATACCCACTATTGGTGAAATATTTCTGTTAATTTTGTTATATGTTTTTTGTGTGAACCCTAAAG
+GTTAAACCTTAGCCCTGTGGGATAGGCAGTTAGGGAGGTGTGGAAAATCTATGCATTACCTTAATGTCTC
+TGTGTGGTTAGTATGGTAGTGACTGTTCATCATATGACATTTACTGAAGATGAACTGGGTCCATGATGAA
+GTGTGTGTATGTCCACGTTTGTAATCATAGAATGGACCCCATTCTTTTGTTAAATACACAAGAGAAAGCT
+TTCTGTGACAGTTCCAGGTCTTGAAGCTAATCAGCATCTCAAGAAAGTATCCAGAAAGAACATCTGCTAG
+TTGGTTATAGGCGGTGGGAGGAATAATATACCTAATTGGTTATAGGTGGGGGGAGCATGATAAGCAAAGA
+AAAGGCAAACACAAGGAAAGATCAGATGAAACAGAAGATGATAGTAAAAGTGATCCTAAGTAAGAACATA
+ATGTAAAATTGTCAGCAGCCTCATGGGGAGGAAAAAGGAAGAGTCAACTCACTTGAAGAAGAGGGTCTTG
+AGAAATCCTTAGCATAAAGGGCTACTGGTGAGATTGAGATCTGAGCAGGCAAAGCTCAAAAGAGAGTTTG
+GAGGTTAAAAATAATTTATTTTTGCAGTAGTGTGCTTTGAAATGTGTAAATCTTATTTCTAATGTATACA
+ACCACATTTCACATAAAAATATGCAATTTATATGCCAGATAAAAATAAAACAAGTGAATTTGCAAGTGA
+>rna157473 CDS=110-835
+AAGGCTGGCAAAAAGGAGACCAGACAGGAGGCGTCTGTAGAGATATCATGAACTTCAACTTAGCTTTGTT
+TTCCAGAGACTGGAGCTAAACTGGGCTTTCAACATCATCATGAAGTTTATCCTCCTCTGGGCTCTCTTGA
+ATCTGACTGTTGCTTTGGCCTTTAATCCAGATTACACAGTCAGCTCCACTCCCCCTTACTTGGTCTATTT
+GAAATCTGACTACTTGCCCTGCGCTGGAGTCCTGATCCACCCGCTTTGGGTGATCACAGCTGCACACTGC
+AATTTACCAAAGCTTCGGGTGATATTGGGGGTTACAATCCCAGCAGACTCTAATGAAAAGCATCTGCAAG
+TGATTGGCTATGAGAAGATGATTCATCATCCACACTTCTCAGTCACTTCTATTGATCATGACATCATGCT
+AATCAAGCTGAAAACAGAGGCTGAACTCAATGACTATGTGAAATTAGCCAACCTGCCCTACCAAACTATC
+TCTGAAAATACCATGTGCTCTGTCTCTACCTGGAGCTACAATGTGTGTGATATCTACAAAGAGCCCGATT
+CACTGCAAACTGTGAACATCTCTGTAATCTCCAAGCCTCAGTGTCGCGATGCCTATAAAACCTACAACAT
+CACGGAAAATATGCTGTGTGTGGGCATTGTGCCAGGAAGGAGGCAGCCCTGCAAGGAAGTTTCTGCTGCC
+CCGGCAATCTGCAATGGGATGCTTCAAGGAATCCTGTCTTTTGCGGATGGATGTGTTTTGAGAGCCGATG
+TTGGCATCTATGCCAAAATTTTTTACTATATACCCTGGATTGAAAATGTAATCCAAAATAACTGAGCTGT
+GGCAGTTGTGGACCATATGACACAGCTTGTCCCCATCGTTCACCTTTAGAATTAAATATAAATTAACTCC
+TCACATTG
+>rna157474
+AAGATTTGTAGGATAGGAAAGGAGGCACCAGGACCTCGGCTTGGTCTGTGCCACTCTCTTTCACCCACAT
+CAGAGACGGCTCCCAGCAATGCCGTATATATTTGGGTCTTGATAAGGACTCATGTGAAGACTGCTCTCCT
+CTCATCTGTCTTCTGCCCATAATCTCCACAGCTGTTCTTGGCAAGGACACTTGGACTAAAGATTGGGACA
+GTTTACAGCTTGATCAACTTGTCTGTTCCTGGCGAGATCTGTCTGCCACGAAGGGTTTTCTCATCGTCGC
+TCTCTTGAGTGCAGCTGGAGCTGCTCTGGCTAAAGACTCTAAAGATAATCAGGATCTGACATATAATTTT
+ACTATTCCTTACATGGTCTATCTTCAGTCCTTCCCAGAACCCTGCGTGGGGTCTCTCATTCACCTTGACT
+GGGTATTGACAGCTGCCCACTGCCCCTTACCTGTTGAAATTCGACTGGGAGTTTCTCAACCTAGCATCAC
+AAACAAGAAACAACAGATATGAAACTATTCATCGATTGTGACCTACCCTGACTTTGATGCAAAATCCTTG
+AACAATGACCTAATGATGATCAAACTATCAACACCTGCTTCACTCAACCCCCATGTGGGGACTATAGCCA
+TAGCCTTGGAACCCATTCCATTTAATGAATCCTGCTTTATTCCAACCTGGACCTGGAATGAATATAAAAA
+CCGTATGTGTTGGATGCTGTGTTCTTTATGCTGAGGAGGTTGGAGCTGGGAGAAAATGTAGAGGGATGTC
+CCTAATGGAAATCGCTGCTATGAGACAATGTTGTCCACCATAGAGGGATTCCTGCCAGGGACCCAAACCA
+CCATCACTGGAGGTCAAAGATAAAATTCAAAGAGACTGATAAAGGGCAGCCACTCTGACACCAACAAATG
+GAGCAGACAGTAATTTTCATATCTCTAGAATATTGAAGACAAGAGTCCAGAGAACTGGATTGAAATACAG
+GTGGGAGAAATAGGAGTCCTCGTGCACAAAGACAGTATGAAGACAGCCTGCGGGAGGGTGACACCCATTG
+ATAGTTGCTGAAAGCCCATCACACAATGGAAGAACAGTGGGGGAAATGAACATAAAATATATGTGATGCG
+GTGAAGCAGCAGAAGATGCTTCCTACACCTTCCGCAGAGAACCCAGTTATGTGCAGAGAACCCTGGGAGA
+GAAGAGAATGGTTGAGGGAGTACCTGTCCTGTCACCAGAGCTGGTTCCCTGCACAGAGTGCTACTCATAC
+CCACTGCTTCTTCCCAGAGTTCCCATTTAGGGGTGGAGACTTTGGTGGTTCGTAGGGAAATTGATTATTA
+TGACATAATCAGTTGTTATCTCCCTGGTCTGACATGCCTCACAAGAAAAACAGTGGCAGCCCATGTTGTA
+AGCACACACCCATCTCCCCTGTGAGCTGAAATTGGGATGAGCAGAAAGATCCGGTCTCAGCAGGAGTCTG
+TCTAATAAAGAACACCTGTGAAGAAAACTACTTTCTAATAAGTTTGTTGGTCCTTGTTGGCTGTTACTCC
+ATTATTCTCTCCTTAGGTATTGTTGAGGATGAGGATACCACCACTTGATATCTGAAATGAAATGACAAAT
+GTAAATCTATTGCTTATTACAGCAAGTCAGAGCAATGCTGACCAGGCACCATCCCACTAAGCAGAATGGA
+GTGCTGATTAGCATAAGGGGTTTGTGGGAAGCTTGGAGTTGAGGGATTGGAGGACTTTCAGAGGTATGAA
+TGGGTTCCATCATCTGTTGAGGCCTGGCTACTTCAGGGGGATTGCTGTTGATTGATTGGTGCTCTGAGGC
+GTGTTTTTCAGAGGACCTCTGATCCTGATCAGCTGGTTTTAAGAAGTGAAGGCTGACATTGATTATACTG
+CAAAAGTTATGTTCACTCAAGCAAGTTGTCCTGGATCTATTAATCAGGTTTAAATCTCTCTCTGGTGGCT
+ACAGAACAATGCACGTGCTCTGTAAAAGCAAATAAAAGTGGAATTTTAGAAGTCAGTCTTTGGTAAAAAA
+CAGAAATAAAAAGCAGAAACTATTA
+>rna157497 CDS=42-485
+GGAGAAAATGCCCCTGGAAAGGGTTAAGGGCCAGGACAGGAATGGGGCAGGAGGTGCACGGATCCTGCTG
+GGCACTGGGAGCAGGGGGCGGCCAAAGGCAGTGGGTGGGCAGGTCCATGCCTCCCCTGGCCCCCCAGCTC
+TGCAGGGCAGTGTTCCTGGTTCCTATCTTGCTGCTGCTGCAGGTGAAGCCTCTGAACGGGAGCCCAGGCC
+CCAAAGATGGGAGCCAGACAGAGAAAACGCCCTCTGCAGACCAGAATCAAGAACAGTTCGAAGAGCACTT
+TGTGGCCTCCTCAGTGGGTGAGATGTGGCAGGTGGTGGACATGGCCCAGCAGGAAGAAGACCAGTCGTCC
+AAGACGGCAGCTGTTCACAAGCACTCTTTCCACCTCAGCTTCTGCTTTAGTCTGGCCAGTGTCATGGTTT
+TCTCAGGAGGGCCATTGAGGCGGACATTCCCAAATATCCAACTCTGCTTCATGCTCACTCACTGACCCTC
+CCTCCCTCCTGGGCTCCAGGTCACAACTCCCAAAGGAGATGCAGGCATGGCTCTCTGCCTCTGATCACCA
+TCACTGTATCTCAAGGTTCAGCAGCAGAGATACCAGTTGCCATCAGTGCTAACTGACTGCCTCTCCAGGT
+TCGGAGTTTCATCTCCCAGGGCCAGAGACAGCAGACCCACATCCTTCTCTCCCACACCTCTCCTGGTTTT
+GTTCAGGACAGCAGATTAGAGGCAGGAGGCAATGACAATAAAATAACGATAAAATCCTGAGAACAATT


=====================================
examples/transcripts.gtf
=====================================
@@ -0,0 +1,17 @@
+NT_187562.1	StringTie	transcript	403	10275	.	+	.	transcript_id "STRG.1.1"; gene_id "STRG.1";
+NT_187562.1	StringTie	exon	403	495	.	+	.	transcript_id "STRG.1.1"; gene_id "STRG.1";
+NT_187562.1	StringTie	exon	1995	2726	.	+	.	transcript_id "STRG.1.1"; gene_id "STRG.1";
+NT_187562.1	StringTie	exon	9665	10275	.	+	.	transcript_id "STRG.1.1"; gene_id "STRG.1";
+NT_187562.1	StringTie	transcript	214032	219967	.	-	.	transcript_id "STRG.3.1"; gene_id "STRG.3";
+NT_187562.1	StringTie	exon	214032	214270	.	-	.	transcript_id "STRG.3.1"; gene_id "STRG.3";
+NT_187562.1	StringTie	exon	214372	214511	.	-	.	transcript_id "STRG.3.1"; gene_id "STRG.3";
+NT_187562.1	StringTie	exon	216955	217211	.	-	.	transcript_id "STRG.3.1"; gene_id "STRG.3";
+NT_187562.1	StringTie	exon	217435	217573	.	-	.	transcript_id "STRG.3.1"; gene_id "STRG.3";
+NT_187562.1	StringTie	exon	219568	219648	.	-	.	transcript_id "STRG.3.1"; gene_id "STRG.3";
+NT_187562.1	StringTie	exon	219891	219967	.	-	.	transcript_id "STRG.3.1"; gene_id "STRG.3";
+NT_187562.1	StringTie	transcript	230172	233179	.	-	.	transcript_id "STRG.4.2"; gene_id "STRG.4";
+NT_187562.1	StringTie	exon	230172	231784	.	-	.	transcript_id "STRG.4.2"; gene_id "STRG.4";
+NT_187562.1	StringTie	exon	233047	233179	.	-	.	transcript_id "STRG.4.2"; gene_id "STRG.4";
+NT_187562.1	BestRefSeq	transcript	962573	963937	.	+	.	transcript_id "STRG.6.2"; gene_id "STRG.6";
+NT_187562.1	BestRefSeq	exon	962573	962829	.	+	.	transcript_id "STRG.6.2"; gene_id "STRG.6";
+NT_187562.1	BestRefSeq	exon	963419	963937	.	+	.	transcript_id "STRG.6.2"; gene_id "STRG.6";


=====================================
gff_utils.cpp
=====================================
@@ -1,6 +1,67 @@
 #include "gff_utils.h"
 
+GHash<GeneInfo> gene_ids;
+
 bool verbose=false; //same with GffReader::showWarnings and GffLoader::beVserbose
+bool debugMode=false;
+bool ensembl_convert=false; //-L, assist in converting Ensembl GTF to GFF3
+
+FILE* ffasta=NULL;
+FILE* f_in=NULL;
+FILE* f_out=NULL;
+FILE* f_w=NULL; //writing fasta with spliced exons (transcripts)
+int wPadding = 0; //padding for -w option
+FILE* f_x=NULL; //writing fasta with spliced CDS
+FILE* f_y=NULL; //wrting fasta with translated CDS
+
+
+int maxintron=999000000;
+
+bool wCDSonly=false;
+bool wNConly=false;
+int minLen=0; //minimum transcript length
+bool validCDSonly=false; // translation with no in-frame STOP
+bool bothStrands=false; //for single-exon mRNA validation, check the other strand too
+bool altPhases=false; //if original phase fails translation validation,
+                     //try the other 2 phases until one makes it
+bool addCDSattrs=false;
+bool add_hasCDS=false;
+//bool streamIn=false; // --stream option
+bool adjustStop=false; //automatic adjust the CDS stop coordinate
+bool covInfo=false; // --cov-info : only report genome coverage
+GStr tableFormat; //list of "attributes" to print in tab delimited format
+bool spliceCheck=false; //only known splice-sites
+bool decodeChars=false; //decode url-encoded chars in attrs (-D)
+bool StarStop=false; //use * instead of . for stop codon translation
+bool fullCDSonly=false; // starts with START, ends with STOP codon
+
+bool multiExon=false;
+bool writeExonSegs=false;
+char* tracklabel=NULL;
+char* rfltGSeq=NULL;
+char rfltStrand=0;
+uint rfltStart=0;
+uint rfltEnd=MAX_UINT;
+bool rfltWithin=false; //check for full containment within given range
+bool addDescr=false;
+
+
+bool fmtGFF3=true; //default output: GFF3
+//other formats only make sense in transcriptOnly mode
+bool fmtGTF=false;
+bool fmtBED=false;
+bool fmtTLF=false;
+bool fmtTable=false;
+
+GffPrintMode exonPrinting=pgffAny;
+
+GFastaDb gfasta;
+
+GHash<SeqInfo> seqinfo;
+GVec<CTableField> tableCols;
+GHash<RefTran> reftbl;
+
+GHash<int> isoCounter; //counts the valid isoforms
 
 void printFasta(FILE* f, GStr* defline, char* seq, int seqlen, bool useStar) {
  if (seq==NULL) return;
@@ -135,6 +196,561 @@ bool tMatch(GffObj& a, GffObj& b) {
 }
 
 
+char* getSeqDescr(char* seqid) {
+ static char charbuf[128];
+ if (seqinfo.Count()==0) return NULL;
+ char* suf=rstrchr(seqid, '.');
+ if (suf!=NULL) *suf=0;
+ SeqInfo* seqd=seqinfo.Find(seqid);
+ if (suf!=NULL) *suf='.';
+ if (seqd!=NULL) {
+  GStr s(seqd->descr);
+  //cleanup some Uniref gunk
+  if (s[0]=='[') {
+    int r=s.index(']');
+    if (r>=0 && r<8 && isdigit(s[1]))
+       s.remove(0,r+1);
+    }
+  if (s.length()>80) {
+    int r=s.index(';');
+    if (r>5) s.cut(r);
+    }
+  if (s.length()>127) {
+   s.cut(127);
+   int r=s.rindex(' ');
+   if (r>0) s.cut(r);
+   }
+  strcpy(charbuf, s.chars());
+  return charbuf;
+  }
+ else return NULL;
+}
+
+char* getSeqName(char* seqid) {
+  static char charbuf[128];
+  char* suf=rstrchr(seqid, '.');
+  if (suf!=NULL) *suf=0;
+  strcpy(charbuf, seqid);
+  if (suf!=NULL) *suf='.';
+  return charbuf;
+}
+
+
+int adjust_stopcodon(GffObj& gffrec, int adj, GList<GSeg>* seglst) {
+  //adj>0, extend CDS to include a potential stop codon
+  //when CDS is expanded, the terminal exon might have to be adjusted too
+  int realadj=0;
+  if (gffrec.strand=='-') {
+       if ((int)gffrec.CDstart>adj) {
+           gffrec.CDstart-=adj;
+           realadj=adj;
+           if (gffrec.exons.First()->start>gffrec.CDstart) {
+                 gffrec.covlen+=gffrec.exons.First()->start - gffrec.CDstart;
+                 gffrec.exons.First()->start=gffrec.CDstart;
+                 gffrec.start=gffrec.CDstart;
+                 }
+             }
+          }
+        else { // forward strand
+         //expand beyond
+         realadj=adj;
+         gffrec.CDend+=adj;
+         if (adj<0) {//restore
+           if (gffrec.exons.Last()->end==gffrec.CDend-adj) {
+                        gffrec.exons.Last()->end+=adj;
+                        gffrec.end=gffrec.exons.Last()->end;
+                        gffrec.covlen+=adj;
+                        }
+         }
+         else if (gffrec.exons.Last()->end<gffrec.CDend) {
+             gffrec.covlen+=gffrec.CDend-gffrec.exons.Last()->end;
+             gffrec.exons.Last()->end=gffrec.CDend;
+             gffrec.end=gffrec.CDend;
+             }
+         }
+  if (seglst!=NULL) seglst->Last()->end+=realadj;
+  return realadj;
+ }
+
+void printTableData(FILE* f, GffObj& g, bool inFasta) {
+ //using attribute list in tableCols
+	char* av=NULL;
+	for(int i=0;i<tableCols.Count();i++) {
+		if (i>0 || inFasta) {
+     	   if (!inFasta || tableCols[i].type!=ctfGFF_ID)
+     		   fprintf(f,"\t");
+		}
+		switch(tableCols[i].type) {
+		case ctfGFF_Attr:
+			av=g.getAttr(tableCols[i].name.chars());
+			fprintf(f,"%s",av!=NULL? av : ".");
+			break;
+		case ctfGFF_chr:
+			fprintf(f,"%s",g.getGSeqName());
+			break;
+		case ctfGFF_ID:
+			if (!inFasta)
+			  fprintf(f,"%s",g.getID());
+			break;
+		case ctfGFF_geneID:
+			fprintf(f,"%s",g.getGeneID()!=NULL ? g.getGeneID() : ".");
+			break;
+		case ctfGFF_geneName:
+			fprintf(f,"%s",g.getGeneName()!=NULL ? g.getGeneName() : ".");
+			break;
+		case ctfGFF_Parent:
+			fprintf(f,"%s",g.parent!=NULL ? g.parent->getID() : ".");
+			break;
+		case ctfGFF_feature:
+			fprintf(f,"%s",g.getFeatureName());
+			break;
+		case ctfGFF_start:
+			fprintf(f,"%d",g.start);
+			break;
+		case ctfGFF_end:
+			fprintf(f,"%d",g.end);
+			break;
+		case ctfGFF_strand:
+			fprintf(f,"%c",g.strand);
+			break;
+		case ctfGFF_numexons:
+			fprintf(f,"%d",g.exons.Count());
+			break;
+		case ctfGFF_exons:
+			if (g.exons.Count()>0) {
+				for (int x=0;x<g.exons.Count();x++) {
+					if (x>0) fprintf(f,",");
+					fprintf(f,"%d-%d",g.exons[x]->start, g.exons[x]->end);
+				}
+			} else fprintf(f,".");
+			break;
+		case ctfGFF_cds:
+			if (g.hasCDS()) {
+				GVec<GffExon> cds;
+				g.getCDSegs(cds);
+				for (int x=0;x<cds.Count();x++) {
+					if (x>0) fprintf(f,",");
+				    fprintf(f,"%d-%d",cds[x].start, cds[x].end);
+				}
+			}
+			else fprintf(f,".");
+			break;
+		case ctfGFF_covlen:
+			fprintf(f, "%d", g.covlen);
+			break;
+		case ctfGFF_cdslen:
+			if (g.hasCDS()) {
+				GVec<GffExon> cds;
+				g.getCDSegs(cds);
+				int clen=0;
+				for (int x=0;x<cds.Count();x++)
+				    clen+=cds[x].end-cds[x].start+1;
+				fprintf(f, "%d", clen);
+			}
+			else fprintf(f, "0");
+			break;
+		} //switch
+	}
+	fprintf(f,"\n");
+}
+bool GffLoader::validateGffRec(GffObj* gffrec) {
+	if (reftbl.Count()>0) { //check if we need to reject by ref seq filter
+		GStr refname(gffrec->getRefName());
+		RefTran* rt=reftbl.Find(refname.chars());
+		if (rt==NULL && refname.length()>2 && refname[-2]=='.' && isdigit(refname[-1])) {
+			//try removing the version suffix
+			refname.cut(-2);
+			//GMessage("[DEBUG] Trying ref name '%s'...\n", refname.chars());
+			rt=reftbl.Find(refname.chars());
+		}
+		if (rt) {
+			gffrec->setRefName(rt->new_name);
+		}
+		/* //no, do not discard non-matching entries, let them pass through!
+		else {
+			if (verbose)
+				GMessage("Info: %s discarded due to reference %s not being mapped\n",
+						gffrec->getID(), refname.chars());
+			return false; //discard, ref seq not in the given translation table
+		}*/
+	}
+	if (transcriptsOnly && gffrec->isDiscarded()) {
+		//discard generic "locus" features with no other detailed subfeatures
+		//GMessage("Warning: discarding %s GFF generic gene/locus container %s\n",gffrec->getID());
+		return false;
+	}
+	if (minLen>0 && gffrec->covlen<minLen) {
+		if (verbose)
+			GMessage("Info: %s discarded due to minimum length threshold %d\n",
+					gffrec->getID(), minLen);
+    	return false;
+	}
+	if (rfltGSeq!=NULL) { //filter by gseqName
+		if (strcmp(gffrec->getGSeqName(),rfltGSeq)!=0) {
+			return false;
+		}
+	}
+	if (rfltStrand>0 && gffrec->strand !=rfltStrand) {
+		return false;
+	}
+	//check coordinates
+	if (rfltStart!=0 || rfltEnd!=MAX_UINT) {
+		if (rfltWithin) {
+			if (gffrec->start<rfltStart || gffrec->end>rfltEnd) {
+				return false; //not within query range
+			}
+		}
+		else {
+			if (gffrec->start>rfltEnd || gffrec->end<rfltStart) {
+				return false;
+			}
+		}
+	}
+	if (multiExon && gffrec->exons.Count()<=1) {
+		return false;
+	}
+	if (wCDSonly && gffrec->CDstart==0) {
+		return false;
+	}
+	if (wNConly && gffrec->hasCDS()) return false;
+	return true;
+}
+
+bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
+ if (!gffrec.isTranscript()) return false; //shouldn't call this function unless it's a transcript
+ //returns true if the transcript passed the filter
+ char* gname=gffrec.getGeneName();
+ if (gname==NULL) gname=gffrec.getGeneID();
+ if (ensembl_convert && startsWith(gffrec.getID(), "ENS")) {
+      const char* biotype=gffrec.getAttr("gene_biotype");
+      if (biotype) {
+         gffrec.addAttr("type", biotype);
+         gffrec.removeAttr("gene_biotype");
+         }
+       else { //old Ensembl files lacking gene_biotype
+         gffrec.addAttr("type", gffrec.getTrackName());
+         }
+
+      //bool is_gene=false;
+      bool is_pseudo=false;
+      if (strcmp(biotype, "protein_coding")==0 || gffrec.hasCDS())
+                gffrec.setFeatureName("mRNA");
+       else {
+          if (strcmp(biotype, "processed_transcript")==0)
+              gffrec.setFeatureName("proc_RNA");
+            else {
+              //is_gene=endsWith(biotype, "gene");
+              is_pseudo=strifind(biotype, "pseudo");
+              if (is_pseudo) {
+                   gffrec.setFeatureName("pseudo_RNA");
+                   }
+                else if (endsWith(biotype, "RNA")) {
+                   gffrec.setFeatureName(biotype);
+                   } else gffrec.setFeatureName("misc_RNA");
+              }
+          }
+      }
+ if (gname && strcmp(gname, gffrec.getID())!=0) {
+   int* isonum=isoCounter.Find(gname);
+   if  (isonum==NULL) {
+       isonum=new int(1);
+       isoCounter.Add(gname,isonum);
+       }
+      else (*isonum)++;
+   //defline.appendfmt(" gene=%s", gname);
+   }
+  int seqlen=0;
+
+  const char* tlabel=tracklabel;
+  if (tlabel==NULL) tlabel=gffrec.getTrackName();
+  //defline.appendfmt(" track:%s",tlabel);
+  char* cdsnt = NULL;
+  char* cdsaa = NULL;
+  int aalen=0;
+  for (int i=1;i<gffrec.exons.Count();i++) {
+     int ilen=gffrec.exons[i]->start-gffrec.exons[i-1]->end-1;
+     if (verbose && ilen>4000000)
+            GMessage("Warning: very large intron (%d) for transcript %s\n",
+                           ilen, gffrec.getID());
+     if (ilen>maxintron) {
+         return false;
+     }
+  }
+  GMapSegments seglst(gffrec.strand);
+  GFaSeqGet* faseq=NULL;
+  if (f_x!=NULL || f_y!=NULL || f_w!=NULL || spliceCheck || validCDSonly || addCDSattrs) {
+	  faseq=fastaSeqGet(gfasta, gffrec.getGSeqName());
+      if (faseq==NULL)
+	    	GError("Error: no genomic sequence available (check -g option!).\n");
+  }
+  if (spliceCheck && gffrec.exons.Count()>1) {
+    //check introns for splice site consensi ( GT-AG, GC-AG or AT-AC )
+    int glen=gffrec.end-gffrec.start+1;
+    const char* gseq=faseq->subseq(gffrec.start, glen);
+    if (gseq==NULL) {
+    	GMessage("Error at GFF ID %s : could not retrieve subsequence %s:%d-%d !\n",
+    			  gffrec.getID(), gffrec.getRefName(), gffrec.start, gffrec.end);
+    	return false;
+    }
+    bool revcompl=(gffrec.strand=='-');
+    bool ssValid=true;
+    for (int e=1;e<gffrec.exons.Count();e++) {
+      const char* intron=gseq+gffrec.exons[e-1]->end+1-gffrec.start;
+      int intronlen=gffrec.exons[e]->start-gffrec.exons[e-1]->end-1;
+      GSpliceSite acceptorSite(intron,intronlen,true, revcompl);
+      GSpliceSite    donorSite(intron,intronlen, false, revcompl);
+      //GMessage("%c intron %d-%d : %s .. %s\n",
+      //           gffrec.strand, istart, iend, donorSite.nt, acceptorSite.nt);
+      if (acceptorSite=="AG") { // GT-AG or GC-AG
+         if (!donorSite.canonicalDonor()) {
+            ssValid=false;break;
+            }
+         }
+      else if (acceptorSite=="AC") { //AT-AC also accepted
+         if (donorSite!="AT") { ssValid=false; break; }
+         }
+      else { ssValid=false; break; }
+      }
+    if (!ssValid) {
+      if (verbose)
+         GMessage("Unrecognized splice sites found for '%s'\n",gffrec.getID());
+      return false; //don't print this one!
+    }
+  }
+  bool trprint=true;
+  bool inframeStop=false;
+  //int stopCodonAdjust=0;
+  int mCDphase=0;
+  bool fullCDS=false;
+  bool endStop=false;
+  bool stopAdjusted=false;
+  if (add_hasCDS && gffrec.hasCDS()) gffrec.addAttr("hasCDS", "true");
+  if (gffrec.CDphase=='1' || gffrec.CDphase=='2')
+      mCDphase = gffrec.CDphase-'0';
+  //CDS partialness only added when -y -x -V options are given
+  if (gffrec.hasCDS() && (f_y!=NULL || f_x!=NULL || validCDSonly || addCDSattrs)) {
+    int strandNum=0;
+    int phaseNum=0;
+  CDS_CHECK:
+    uint cds_olen=0;
+    cdsnt=gffrec.getSpliced(faseq, true, &seqlen, NULL, &cds_olen, &seglst, adjustStop);
+    //if adjustStop, seqlen has the CDS+3'UTR length, but cds_olen still has the original CDS length
+    if (cdsnt!=NULL && cdsnt[0]!='\0') { //has CDS
+         cdsaa=translateDNA(cdsnt, aalen, seqlen);
+         char* p=strchr(cdsaa,'.');
+         int cds_aalen=aalen;
+         if (adjustStop)
+        	 cds_aalen=cds_olen/3; //originally stated CDS length
+         endStop=false;
+         if (p!=NULL) { //stop codon found
+        	 if (p-cdsaa==cds_aalen-1) { //stop found as the stated last CDS codon
+                  *p='\0';//remove it
+                  endStop=true;
+                  if (adjustStop) {
+                	  seqlen=cds_aalen*3;
+                	  aalen=cds_aalen;
+                  }
+                  cds_aalen--;
+                  aalen--;
+                  //no need to adjust stop codon
+              }
+              else {//stop found in a different position than the last codon
+            	  if (p-cdsaa<cds_aalen-1 && !adjustStop) {
+            		  inframeStop=true;
+            	  }
+            	  if (adjustStop) {
+            		  *p='\0';
+            		  cds_aalen=p-cdsaa+1; //adjusted CDS length
+            		  seqlen=cds_aalen*3;
+            		  aalen=cds_aalen;
+            		  uint gc=seglst.gmap(seqlen);
+            		  if (gffrec.strand=='-') gffrec.CDstart=gc;
+            		  else gffrec.CDend=gc;
+            		  endStop=true;
+            		  stopAdjusted=true;
+            	  }
+              }
+         }//stop codon found
+         //if (trprint==false) { //failed CDS validity check
+         if (inframeStop) {
+           //in-frame stop codon found
+           if (altPhases && phaseNum<3) {
+              phaseNum++; //try a different phase
+              gffrec.CDphase = '0'+((mCDphase+phaseNum)%3);
+              GFREE(cdsaa);
+              goto CDS_CHECK;
+           }
+           if (gffrec.exons.Count()==1 && bothStrands) {
+              strandNum++;
+              phaseNum=0;
+              if (strandNum<2) {
+                 GFREE(cdsaa);
+                 gffrec.strand = (gffrec.strand=='-') ? '+':'-';
+                 goto CDS_CHECK; //repeat the CDS check for a different frame
+              }
+           }
+           if (verbose) GMessage("Warning: In-frame STOP found for '%s'\n",gffrec.getID());
+           if (addCDSattrs) gffrec.addAttr("InFrameStop", "true");
+         } //has in-frame STOP
+         if (stopAdjusted) {
+      	   if (addCDSattrs) gffrec.addAttr("CDStopAdjusted", "true");
+      	   inframeStop=false; //pretend it's OK now that we've adjusted it
+         }
+         if (!inframeStop) {
+			 bool hasStart=(cdsaa[0]=='M'); //for the regular eukaryotic translation table
+			 fullCDS=(endStop && hasStart);
+			 if (!fullCDS) {
+				 const char* partialness=NULL;
+				 if (hasStart) partialness="3";
+				 else {
+					partialness = endStop ? "5" : "5_3";
+				 }
+				 if (addCDSattrs) gffrec.addAttr("partialness", partialness);
+			 }
+         }
+         if (trprint && ((fullCDSonly && !fullCDS) || (validCDSonly && inframeStop)) )
+        	 trprint=false;
+         //} // Valid CDS only requested?
+      } //has CDS
+  } //translation or codon check was requested
+  if (!trprint) {
+    GFREE(cdsnt);
+    GFREE(cdsaa);
+    //if (adjstop!=NULL) delete adjstop;
+    return false;
+  }
+  /*
+  if (validCDSonly) {
+     int stopCodonAdjust=adjstop->restore();
+     if (stopCodonAdjust!=0 && !endStop) {
+        //restore stop codon location
+        //adjust_stopcodon(gffrec, -stopCodonAdjust, &seglst);
+	    if (seglst.Count()>0) seglst.Last()->end-=stopCodonAdjust;
+        if (cdsnt!=NULL && seqlen>0) {
+           seqlen-=stopCodonAdjust;
+           cdsnt[seqlen]=0;
+        }
+        if (cdsaa!=NULL) aalen--;
+     }
+  }
+  if (adjstop!=NULL) delete adjstop;
+  */
+  if (cdsnt!=NULL) { // && !inframeStop) {
+	  if (f_y!=NULL) { //CDS translation fasta output requested
+			 if (cdsaa==NULL) { //translate now if not done before
+			   cdsaa=translateDNA(cdsnt, aalen, seqlen);
+			 }
+			 if (aalen>0) {
+			   if (cdsaa[aalen-1]=='.' || cdsaa[aalen-1]=='\0') --aalen; //avoid printing the stop codon
+ 			   fprintf(f_y, ">%s", gffrec.getID());
+ 			   if (fmtTable) printTableData(f_y, gffrec, true);
+ 			   else fprintf(f_y, "\n");
+			   printFasta(f_y, NULL, cdsaa, aalen, StarStop);
+			 }
+	  }
+	  if (f_x!=NULL) { //CDS only
+			 GStr defline(gffrec.getID(), 94);
+			 if (writeExonSegs) {
+				  defline.append(" loc:");
+				  defline.append(gffrec.getGSeqName());
+				  defline.appendfmt("(%c)",gffrec.strand);
+				  //warning: not CDS coordinates are written here, but the exon ones
+				  defline+=(int)gffrec.start;
+				  defline+=(char)'-';
+				  defline+=(int)gffrec.end;
+				  // -- here these are CDS substring coordinates on the spliced sequence:
+				  defline.append(" segs:");
+				  for (int i=0;i<seglst.Count();i++) {
+					  if (i>0) defline.append(",");
+					  defline+=(int)seglst[i].start;
+					  defline.append("-");
+					  defline+=(int)seglst[i].end;
+					  }
+			 }
+			 fprintf(f_x, ">%s", defline.chars());
+			 if (fmtTable) printTableData(f_x, gffrec, true);
+			 else fprintf(f_x, "\n");
+			 printFasta(f_x, NULL, cdsnt, seqlen);
+	  }
+	  GFREE(cdsnt);
+	  GFREE(cdsaa);
+  } //writing CDS or its translation
+  if (f_w!=NULL) { //write spliced exons
+	  uint cds_start=0;
+	  uint cds_end=0;
+	  seglst.Clear();
+	  int padLeft=0;
+	  int padRight=0;
+	  if (wPadding>0) {
+		padLeft= (gffrec.start>(uint)wPadding) ? wPadding : gffrec.start - 1;
+		int ediff=faseq->getseqlen()-gffrec.end;
+	    padRight=(wPadding>ediff) ?  ediff : wPadding;
+   	    gffrec.addPadding(padLeft, padRight);
+	  }
+	  char* exont=gffrec.getSpliced(faseq, false, &seqlen, &cds_start, &cds_end, &seglst);
+	  //restore exons to normal (remove padding)
+	  if (wPadding>0)
+		  gffrec.removePadding(padLeft, padRight);
+
+	  GStr defline(gffrec.getID());
+	  if (exont!=NULL) {
+		  if (gffrec.CDstart>0) {
+			  defline.appendfmt(" CDS=%d-%d", cds_start, cds_end);
+		  }
+		  if (writeExonSegs) {
+			  defline.append(" loc:");
+			  defline.append(gffrec.getGSeqName());
+			  defline+=(char)'|';
+			  defline+=(int)gffrec.start;
+			  defline+=(char)'-';
+			  defline+=(int)gffrec.end;
+			  defline+=(char)'|';
+			  defline+=(char)gffrec.strand;
+			  defline.append(" exons:");
+			  for (int i=0;i<gffrec.exons.Count();i++) {
+				  if (i>0) defline.append(",");
+				  defline+=(int)gffrec.exons[i]->start;
+				  defline.append("-");
+				  defline+=(int)gffrec.exons[i]->end;
+			  }
+			if (wPadding>0) {
+				defline.append(" padding:");
+				defline.append(padLeft);
+				defline+=(char)'|';
+				defline.append(padRight);
+			}
+
+			defline.append(" segs:");
+			for (int i=0;i<seglst.Count();i++) {
+				if (i>0) defline.append(",");
+				defline+=(int)seglst[i].start;
+				defline.append("-");
+				defline+=(int)seglst[i].end;
+				}
+		  }
+
+		  fprintf(f_w, ">%s", defline.chars());
+		  if (fmtTable) printTableData(f_w, gffrec, true);
+		    else fprintf(f_w, "\n");
+		  printFasta(f_w, NULL, exont, seqlen);
+		  GFREE(exont);
+	  }
+  } //writing f_w (spliced exons)
+  return true;
+}
+
+
+GTData::GTData(GffObj* t, GenomicSeqData* gd):rna(t),gdata(gd), locus(NULL), replaced_by(NULL), geneinfo(NULL) {
+    if (rna!=NULL) {
+        //geneinfo=(GeneInfo*)rna->uptr; //take over geneinfo, if there
+        rna->uptr=this;
+    }
+    if (gdata!=NULL)
+ 	   gdata->tdata.Add(this);
+}
+
+
+
 bool GffLoader::unsplContained(GffObj& ti, GffObj&  tj) {
  //returns true only if ti (which MUST be single-exon) is "almost" contained in any of tj's exons
  //but it does not cross any intron-exon boundary of tj
@@ -375,8 +991,7 @@ bool GffLoader::placeGf(GffObj* t, GenomicSeqData* gdata) {
   				g.children.Add(t);
   			keep=true;
   			if (tdata==NULL) {
-  		       tdata=new GTData(t); //additional transcript data
-  		       gdata->tdata.Add(tdata);
+  		       tdata=new GTData(t, gdata); //additional transcript data
   			}
   			t->parent=&g;
   			//disable printing of gene if transcriptsOnly and --keep-genes wasn't given
@@ -400,8 +1015,8 @@ bool GffLoader::placeGf(GffObj* t, GenomicSeqData* gdata) {
   if (t->exons.Count()>0) { //treating this entry as a transcript
 	gdata->rnas.Add(t); //added it in sorted order
 	if (tdata==NULL) {
-	   tdata=new GTData(t); //additional transcript data
-	   gdata->tdata.Add(tdata);
+	   tdata=new GTData(t, gdata); //additional transcript data
+	   //gdata->tdata.Add(tdata);
 	}
 	keep=true;
   }
@@ -409,10 +1024,9 @@ bool GffLoader::placeGf(GffObj* t, GenomicSeqData* gdata) {
     if (t->isGene() || !this->transcriptsOnly) {
 	   gdata->gfs.Add(t);
 	   keep=true;
-	   //GTData* tdata=new GTData(t); //additional transcript data
 	   if (tdata==NULL) {
-		   tdata=new GTData(t); //additional transcript data
-		   gdata->tdata.Add(tdata);
+		   tdata=new GTData(t, gdata); //additional transcript data
+		   //gdata->tdata.Add(tdata);
 	   }
 	   noexon_gfs=true; //gene-like record, no exons defined
 	   keep=true;
@@ -420,6 +1034,27 @@ bool GffLoader::placeGf(GffObj* t, GenomicSeqData* gdata) {
        return false; //nothing to do with these non-transcript objects
     }
   }
+  //keeping track of genes in special cases
+	char* geneid=t->getGeneID();
+	bool trackGenes=!t->isGene() && ( (keepGenes && t->parent==NULL) ||
+			    (ensembl_convert && startsWith(t->getID(), "ENS") ) ) ;
+	if (trackGenes) {
+		GTData* tdata=(GTData*)(t->uptr);
+		//keep track of chr|gene_id data and coordinate range
+		if (geneid!=NULL) {
+			GeneInfo* ginfo=gene_ids.Find(geneid);
+			if (ginfo==NULL) {//first time seeing this gene ID
+				GeneInfo* geneinfo=new GeneInfo(t, tdata->gdata, ensembl_convert);
+				gene_ids.Add(geneid, geneinfo);
+				//if (gfnew!=NULL) //new gene features
+				//  gfnew->Add(geneinfo->gf);
+			}
+			else ginfo->update(t);
+		}
+	}
+
+
+
   if (!doCluster) return keep;
 
   if (!keep) return false;
@@ -677,7 +1312,7 @@ void warnPseudo(GffObj& m) {
 	GMessage("Info: pseudo gene/transcript record with ID=%s discarded.\n",m.getID());
 }
 
-void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate, GFFCommentParser* gf_parsecomment) {
+void GffLoader::load(GList<GenomicSeqData>& seqdata, GFFCommentParser* gf_parsecomment) {
 	if (f==NULL) GError("Error: GffLoader::load() cannot be called before ::openFile()!\n");
 	GffReader* gffr=new GffReader(f, this->transcriptsOnly, true); //not only mRNA features, sorted
 	clearHeaderLines();
@@ -692,6 +1327,30 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
 	gffr->setIgnoreLocus(ignoreLocus);
 	gffr->setRefAlphaSorted(this->sortRefsAlpha);
 	if (keepGff3Comments && gf_parsecomment!=NULL) gffr->setCommentParser(gf_parsecomment);
+    int outcounter=0;
+	if (streamIn) { //this will ignore any clustering options
+		GffObj* t=NULL;
+		while ((t=gffr->readNext())!=NULL) {
+			if (!validateGffRec(t)) {
+				delete t;
+				continue;
+			}
+			if (process_transcript(gfasta, *t)) {
+				outcounter++;
+				if (f_out) {
+				  if (fmtTable)
+						printTableData(f_out, *t);
+				  else //GFF3, GTF, BED, TLF
+					t->printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
+				}
+			}
+			delete t;
+		}
+		delete gffr;
+		return;
+	}
+
+
 	gffr->readAll();
 	GVec<int> pseudoFeatureIds; //feature type: pseudo*
 	GVec<int> pseudoAttrIds;  // attribute: [is]pseudo*=true/yes/1
@@ -720,9 +1379,7 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
 		}
 	}
 
-	//int redundant=0; //redundant annotation discarded
 	if (verbose) GMessage("   .. loaded %d genomic features from %s\n", gffr->gflst.Count(), fname.chars());
-	//int rna_deleted=0;
 	//add to GenomicSeqData, adding to existing loci and identifying intron-chain duplicates
 	for (int k=0;k<gffr->gflst.Count();k++) {
 		GffObj* m=gffr->gflst[k];
@@ -781,7 +1438,7 @@ void GffLoader::load(GList<GenomicSeqData>& seqdata, GFValidateFunc* gf_validate
 			m->subftype_id=gff_fid_exon;
 		}
 		//GList<GffObj> gfadd(false,false); -- for gf_validate()?
-		if (gf_validate!=NULL && !(*gf_validate)(m, NULL)) {
+		if (!validateGffRec(m)) {
 			continue;
 		}
 		m->isUsed(true); //so the gffreader won't destroy it


=====================================
gff_utils.h
=====================================
@@ -2,12 +2,64 @@
 #define GFF_UTILS_H
 #include "gff.h"
 #include "GStr.h"
+#include "GVec.hh"
 #include "GFaSeqGet.h"
 
 extern bool verbose;
 extern bool debugMode;
-
-typedef bool GFValidateFunc(GffObj* gf, GList<GffObj>* gfadd);
+extern bool ensembl_convert;
+
+extern FILE* ffasta;
+extern FILE* f_in;
+extern FILE* f_out;
+extern FILE* f_w; //writing fasta with spliced exons (transcripts)
+extern int wPadding; //padding for -w option
+extern FILE* f_x; //writing fasta with spliced CDS
+extern FILE* f_y; //wrting fasta with translated CDS
+
+extern int maxintron;
+
+extern bool wCDSonly;
+extern bool wNConly;
+extern int minLen; //minimum transcript length
+extern bool validCDSonly; // translation with no in-frame STOP
+extern bool bothStrands; //for single-exon mRNA validation, check the other strand too
+extern bool altPhases; //if original phase fails translation validation,
+                     //try the other 2 phases until one makes it
+extern bool addCDSattrs;
+extern bool add_hasCDS;
+
+extern bool adjustStop; //automatic adjust the CDS stop coordinate
+extern bool covInfo; // --cov-info : only report genome coverage
+extern GStr tableFormat; //list of "attributes" to print in tab delimited format
+extern bool spliceCheck; //only known splice-sites
+extern bool decodeChars; //decode url-encoded chars in attrs (-D)
+extern bool StarStop; //use * instead of . for stop codon translation
+extern bool fullCDSonly; // starts with START, ends with STOP codon
+
+extern bool multiExon;
+extern bool writeExonSegs;
+extern char* tracklabel;
+extern char* rfltGSeq;
+extern char rfltStrand;
+extern uint rfltStart;
+extern uint rfltEnd;
+extern bool rfltWithin; //check for full containment within given range
+extern bool addDescr;
+
+
+extern bool fmtGFF3; //output: GFF3
+//other formats only make sense in transcriptOnly mode
+extern bool fmtGTF;
+extern bool fmtBED;
+extern bool fmtTLF;
+extern bool fmtTable;
+
+
+extern GffPrintMode exonPrinting;
+
+//typedef bool GFValidateFunc(GffObj* gf, GList<GffObj>* gfadd);
+typedef bool GFValidateFunc(GffObj* gf);
 
 //test if a transcript should be printed (and not printed yet)
 #define T_PRINTABLE(d) (((d) & 0x100)==0)
@@ -26,7 +78,90 @@ typedef bool GFValidateFunc(GffObj* gf, GList<GffObj>* gfadd);
 //keep/set original/old strand
 #define T_SET_OSTRAND(d, s) d |= s
 
-class GeneInfo { //for Ensembl GTF conversion
+class SeqInfo { //populated from the -s option of gffread
+ public:
+  int len;
+  char* descr;
+  SeqInfo( int l, char* s): len(l), descr(NULL) {
+    if (s!=NULL)
+      descr=Gstrdup(s);
+  }
+  ~SeqInfo() {
+    GFREE(descr);
+  }
+};
+
+class RefTran {
+ public:
+   char* new_name;
+   RefTran(char *ns) {
+      new_name=NULL;
+      if (ns!=NULL)
+         new_name=Gstrdup(ns);
+      }
+   ~RefTran() {
+      GFREE(new_name);
+      }
+};
+
+extern GFastaDb gfasta;
+extern GHash<SeqInfo> seqinfo;
+extern GHash<int> isoCounter; //counts the valid isoforms
+extern GHash<RefTran> reftbl;
+
+char* getSeqDescr(char* seqid);
+char* getSeqName(char* seqid);
+int adjust_stopcodon(GffObj& gffrec, int adj, GList<GSeg>* seglst=NULL);
+void printTableData(FILE* f, GffObj& g, bool inFasta=false);
+bool validateGffRec(GffObj* gffrec);
+bool process_transcript(GFastaDb& gfasta, GffObj& gffrec);
+
+enum ETableFieldType {
+  ctfGFF_Attr=0, // attribute name as is
+  ctfGFF_ID, //ID or @id or transcript_id
+  ctfGFF_geneID, //geneID or @gene_id or @geneid
+  ctfGFF_geneName, //geneName or @gene_name or @genename
+  ctfGFF_Parent, //Parent or @parent
+  ctfGFF_chr, //@chr
+  ctfGFF_feature, //@feature
+  ctfGFF_start, //@start
+  ctfGFF_end, //@end
+  ctfGFF_strand, //@strand
+  ctfGFF_numexons, //@numexons
+  ctfGFF_exons, //@exons
+  ctfGFF_cds, //@cds
+  ctfGFF_covlen, //@covlen
+  ctfGFF_cdslen//@cdslen
+};
+
+class CTableField {
+ public:
+   ETableFieldType type;
+   GStr name; //only for type ctfGFF_Attr
+   CTableField(ETableFieldType atype=ctfGFF_Attr):type(atype) { }
+   CTableField(GStr& attrname):type(ctfGFF_Attr),name(attrname) { }
+};
+
+
+extern GVec<CTableField> tableCols; //table output format fields
+
+class GffLocus;
+class GenomicSeqData;
+class GeneInfo;
+
+class GTData { // transcript associated data
+ public:
+   GffObj* rna;
+   GenomicSeqData* gdata;
+   GffLocus* locus;
+   GffObj* replaced_by;
+   GeneInfo* geneinfo;
+   GTData(GffObj* t=NULL, GenomicSeqData* gd=NULL);
+   bool operator<(GTData& b) { return (rna < b.rna); }
+   bool operator==(GTData& b) { return (rna==b.rna); }
+};
+
+class GeneInfo {
  public:
    int flag;
    GffObj* gf;
@@ -36,17 +171,23 @@ class GeneInfo { //for Ensembl GTF conversion
      gf=NULL;
      flag=0;
    }
-   GeneInfo(GffObj* gfrec, bool ensembl_convert=false):gene_names(true, true, true),
+
+   GeneInfo(GffObj* gfrec, GenomicSeqData* gdata, bool ensembl_convert=false):flag(0), gf(NULL), gene_names(true, true, true),
                     transcripts(true,true,true) {
-     flag=0;
      if (gfrec->getGeneName())
         gene_names.Add(new GStr(gfrec->getGeneName()));
      transcripts.Add(new GStr(gfrec->getID()));
-     create_gf(gfrec, ensembl_convert);
-     }
+     create_gf(gfrec, gdata ,ensembl_convert);
+   }
+
+   ~GeneInfo() {
+      delete gf;
+   }
 
-   void create_gf(GffObj* gfrec, bool ensembl_convert) {
+   void create_gf(GffObj* gfrec, GenomicSeqData* gdata, bool ensembl_convert) {
      gf=new GffObj(gfrec->getGeneID());
+     GTData* gfdata=new GTData(gf, gdata);
+     gfdata->geneinfo=this;
      gf->gseq_id=gfrec->gseq_id;
      gf->track_id=gfrec->track_id;
      gf->start=gfrec->start;
@@ -55,28 +196,30 @@ class GeneInfo { //for Ensembl GTF conversion
      gf->setFeatureName("gene");
      gf->isGene(true);
      gf->isUsed(true);
-     gf->uptr=this;
+     //gf->uptr=gfdata; //for these new gene objects
      gfrec->incLevel();
      gfrec->parent=gf;
      gf->children.Add(gfrec);
+     const char* s=NULL;
+     if ((s=gfrec->getGeneName())) {
+    	 gf->addAttr("Name", s);
+    	 gf->copyAttrs(gfrec);
+     }
      if (ensembl_convert) {
        //gf->addAttr("type", gf->getTrackName());
        const char* biotype=gfrec->getAttr("type");
        if (biotype) gf->addAttr("type", biotype);
        }
-     //gf->children.Add(gfrec);
-     }
-   //~GeneInfo() {
-   //  }
+       // gf->children.Add(gfrec);
+   }
+
    void update(GffObj* gfrec) {
      if (transcripts.AddedIfNew(new GStr(gfrec->getID()))<0)
        return;
      gene_names.AddedIfNew(new GStr(gfrec->getGeneName()));
      if (gf==NULL) {
         GError("GeneInfo::update() called on uninitialized gf!\n");
-        //create_gf(gfrec);
-        //return;
-        }
+     }
      gfrec->parent=gf;
      gf->children.Add(gfrec);
      gfrec->incLevel();
@@ -85,53 +228,49 @@ class GeneInfo { //for Ensembl GTF conversion
      if (gf->end<gfrec->end)
            gf->end=gfrec->end;
      }
-    void finalize() {
+
+   void finalize() {
      //prepare attributes for printing
      //must be called right before printing
      if (gf==NULL || transcripts.Count()==0) return;
      if (gene_names.Count()>0) {
        gf->addAttr("Name", gene_names[0]->chars());
-       /*
-       GStr s(gene_names[0]->chars());
-       for (int i=1;i<gene_names.Count();i++) {
-          s.append(",");
-          s.append(gene_names[i]->chars());
-          }
-       gf->addAttr("genes", s.chars());
-       */
-       } //has gene names
-       GStr t(transcripts[0]->chars());
-       for (int i=1;i<transcripts.Count();i++) {
+     } //has gene names
+     GStr t(transcripts[0]->chars());
+     for (int i=1;i<transcripts.Count();i++) {
           t.append(",");
           t.append(transcripts[i]->chars());
-          }
-       gf->addAttr("transcripts", t.chars());
      }
+     gf->addAttr("transcripts", t.chars());
+   }
 };
-
-
-class GffLocus;
-
-class GTData { //transcript associated data
+class GenomicSeqData {
+  int gseq_id;
  public:
-   GffObj* rna;
-   GffLocus* locus;
-   GffObj* replaced_by;
-   GeneInfo* geneinfo;
-   //int flag;
-   GTData(GffObj* t=NULL) {
-       rna=t;
-       //flag=0;
-       locus=NULL;
-       replaced_by=NULL;
-       geneinfo=NULL;
-       if (rna!=NULL) {
-           geneinfo=(GeneInfo*)rna->uptr; //take over geneinfo, if there
-           rna->uptr=this;
-       }
-   }
-   bool operator<(GTData& b) { return (rna < b.rna); }
-   bool operator==(GTData& b) { return (rna==b.rna); }
+  const char* gseq_name;
+  int seqreg_start; //if given by ##sequence-region comment
+  int seqreg_end;
+  GList<GffObj> gfs; //all non-transcript features -> usually gene features
+  GList<GffObj> rnas; //all transcripts on this genomic sequence
+  GList<GffLocus> loci; //all loci clusters
+  GList<GTData> tdata; //transcript data (uptr holder for all rnas loaded here)
+  uint64 f_bases;//base coverage on forward strand
+  uint64 r_bases;//base coverage on reverse strand
+  uint64 u_bases;//base coverage on undetermined strand
+  //GenomicSeqData(int gid=-1):rnas(true,true,false),loci(true,true,true),
+  GenomicSeqData(int gid=-1):gseq_id(gid), gseq_name(NULL), seqreg_start(0), seqreg_end(0),
+		  gfs(true, true, false),rnas((GCompareProc*)gfo_cmpByLoc),loci(true,true,false),
+		  tdata(false,true,false),  f_bases(0), r_bases(0), u_bases(0) {
+  if (gseq_id>=0)
+    gseq_name=GffObj::names->gseqs.getName(gseq_id);
+  }
+
+  bool operator==(GenomicSeqData& d){
+    return gseq_id==d.gseq_id;
+  }
+  bool operator<(GenomicSeqData& d){
+    return (gseq_id<d.gseq_id);
+  }
 };
 
 class CGeneSym {
@@ -450,34 +589,6 @@ public:
     }
 };
 
-class GenomicSeqData {
-  int gseq_id;
- public:
-  const char* gseq_name;
-  int seqreg_start; //if given by ##sequence-region comment
-  int seqreg_end;
-  GList<GffObj> gfs; //all non-transcript features -> usually gene features
-  GList<GffObj> rnas; //all transcripts on this genomic sequence
-  GList<GffLocus> loci; //all loci clusters
-  GList<GTData> tdata; //transcript data (uptr holder for all rnas loaded here)
-  uint64 f_bases;//base coverage on forward strand
-  uint64 r_bases;//base coverage on reverse strand
-  uint64 u_bases;//base coverage on undetermined strand
-  //GenomicSeqData(int gid=-1):rnas(true,true,false),loci(true,true,true),
-  GenomicSeqData(int gid=-1):gseq_id(gid), gseq_name(NULL), seqreg_start(0), seqreg_end(0),
-		  gfs(true, true, false),rnas((GCompareProc*)gfo_cmpByLoc),loci(true,true,false),
-		  tdata(false,true,false),  f_bases(0), r_bases(0), u_bases(0) {
-  if (gseq_id>=0)
-    gseq_name=GffObj::names->gseqs.getName(gseq_id);
-  }
-
-  bool operator==(GenomicSeqData& d){
-    return gseq_id==d.gseq_id;
-  }
-  bool operator<(GenomicSeqData& d){
-    return (gseq_id<d.gseq_id);
-  }
-};
 
 int gseqCmpName(const pointer p1, const pointer p2);
 
@@ -574,6 +685,7 @@ class GffLoader {
 		bool fuzzSpan:1; //matching/contained redundancy relaxed to disregard full boundary containment
 		bool dOvlSET:1; //discard overlapping Single Exon Transcripts on any strand
 		bool forceExons:1;
+		bool streamIn:1;
 	  };
   };
 
@@ -599,7 +711,9 @@ class GffLoader {
       }
   }
 
-  void load(GList<GenomicSeqData>&seqdata, GFValidateFunc* gf_validate=NULL, GFFCommentParser* gf_parsecomment=NULL);
+  bool validateGffRec(GffObj* gffrec);
+
+  void load(GList<GenomicSeqData>&seqdata, GFFCommentParser* gf_parsecomment=NULL);
 
   bool placeGf(GffObj* t, GenomicSeqData* gdata);
 


=====================================
gffread.cpp
=====================================
@@ -4,13 +4,13 @@
 #define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 
-#define VERSION "0.11.8"
+#define VERSION "0.12.1"
 
 #define USAGE "gffread v" VERSION ". Usage:\n\
 gffread <input_gff> [-g <genomic_seqs_fasta> | <dir>][-s <seq_info.fsize>] \n\
  [-o <outfile>] [-t <trackname>] [-r [[<strand>]<chr>:]<start>..<end> [-R]]\n\
  [-CTVNJMKQAFPGUBHZWTOLE] [-w <exons.fa>] [-x <cds.fa>] [-y <tr_cds.fa>]\n\
- [-i <maxintron>] [--bed] [--table <attrlist>] [--sort-by <refseq_list.txt>]\n\
+ [-i <maxintron>] [--stream] [--bed] [--table <attrlist>] [--sort-by <ref.lst>]\n\
  \n\
  Filter, convert or cluster GFF/GTF/BED records, extract the sequence of\n\
  transcripts (exon or CDS) and more.\n\
@@ -74,6 +74,8 @@ Misc options: \n\
  --in-tlf: input GFF-like one-line-per-transcript format without exon/CDS\n\
            features (see --tlf option below); automatic if the input\n\
            filename ends with .tlf)\n\
+ --stream: fast processing of input GFF/BED transcripts as they are received\n\
+           ((no sorting, exons must be grouped by transcript in the input data)\n\
 Clustering:\n\
  -M/--merge : cluster the input transcripts into loci, discarding\n\
       \"duplicated\" transcripts (those with the same exact introns\n\
@@ -134,58 +136,7 @@ Output options:\n\
        problems with the given GFF/GTF records\n\
 "
 
-class SeqInfo { //populated from the -s option of gffread
- public:
-  int len;
-  char* descr;
-  SeqInfo( int l, char* s): len(l), descr(NULL) {
-    if (s!=NULL)
-      descr=Gstrdup(s);
-  }
-  ~SeqInfo() {
-    GFREE(descr);
-  }
-};
-
-class RefTran {
- public:
-   char* new_name;
-   RefTran(char *ns) {
-      new_name=NULL;
-      if (ns!=NULL)
-         new_name=Gstrdup(ns);
-      }
-   ~RefTran() {
-      GFREE(new_name);
-      }
-};
-
-enum ETableFieldType {
-  ctfGFF_Attr=0, // attribute name as is
-  ctfGFF_ID, //ID or @id or transcript_id
-  ctfGFF_geneID, //geneID or @gene_id or @geneid
-  ctfGFF_geneName, //geneName or @gene_name or @genename
-  ctfGFF_Parent, //Parent or @parent
-  ctfGFF_chr, //@chr
-  ctfGFF_feature, //@feature
-  ctfGFF_start, //@start
-  ctfGFF_end, //@end
-  ctfGFF_strand, //@strand
-  ctfGFF_numexons, //@numexons
-  ctfGFF_exons, //@exons
-  ctfGFF_cds, //@cds
-  ctfGFF_covlen, //@covlen
-  ctfGFF_cdslen//@cdslen
-};
-
-class CTableField {
- public:
-   ETableFieldType type;
-   GStr name; //only for type ctfGFF_Attr
-   CTableField(ETableFieldType atype=ctfGFF_Attr):type(atype) { }
-   CTableField(GStr& attrname):type(ctfGFF_Attr),name(attrname) { }
-};
-
+/*
 FILE* ffasta=NULL;
 FILE* f_in=NULL;
 FILE* f_out=NULL;
@@ -203,65 +154,51 @@ bool altPhases=false; //if original phase fails translation validation,
                      //try the other 2 phases until one makes it
 bool addCDSattrs=false;
 bool add_hasCDS=false;
+//bool streamIn=false; // --stream option
 bool adjustStop=false; //automatic adjust the CDS stop coordinate
 bool covInfo=false; // --cov-info : only report genome coverage
-//bool transcriptsOnly=true;
-//bool keepGenes=false; //for transcriptsOnly
-
-//bool sortAlpha=false;
-GStr sortBy; //file name with chromosomes listed in the desired order
-//bool keepRefOrder=false; //sort within chromosomes, but follow the input chromosome order -- default!
 GStr tableFormat; //list of "attributes" to print in tab delimited format
-//bool NoPseudo=false;
 bool spliceCheck=false; //only known splice-sites
 bool decodeChars=false; //decode url-encoded chars in attrs (-D)
 bool StarStop=false; //use * instead of . for stop codon translation
-
 bool fullCDSonly=false; // starts with START, ends with STOP codon
-//bool fullattr=false; //-F
-//bool gatherExonAttrs=false; //-G
+*/
+
+GStr sortBy; //file name with chromosomes listed in the desired order
 
-//bool sortByLoc=false; // if the GFF output should be sorted by location
-bool ensembl_convert=false; //-L, assist in converting Ensembl GTF to GFF3
 bool BEDinput=false;
 bool TLFinput=false;
 
-bool fmtGFF3=true; //default output: GFF3
-//other formats only make sens in transcriptOnly mode
-bool fmtGTF=false;
-bool fmtBED=false;
-bool fmtTLF=false;
-bool fmtTable=false;
-bool addDescr=false;
-//bool protmap=false;
+//--- output options
+/*
+extern bool fmtGFF3=true; //default output: GFF3
+//other formats only make sense in transcriptOnly mode
+extern bool fmtGTF=false;
+extern bool fmtBED=false;
+extern bool fmtTLF=false;
+extern bool fmtTable=false;
+
+
 bool multiExon=false;
 bool writeExonSegs=false;
 char* tracklabel=NULL;
-int maxintron=999000000;
-//bool mergeCloseExons=false;
-//range filter:
 char* rfltGSeq=NULL;
 char rfltStrand=0;
 uint rfltStart=0;
 uint rfltEnd=MAX_UINT;
 bool rfltWithin=false; //check for full containment within given range
+bool addDescr=false;
 
-GffLoader gffloader;
-
-GList<GenomicSeqData> g_data(true,true,true); //list of GFF records by genomic seq
-
-//hash with sequence info
-GHash<SeqInfo> seqinfo;
-GHash<int> isoCounter; //counts the valid isoforms
-GHash<RefTran> reftbl;
-GHash<GeneInfo> gene_ids;
-  //min-max gene span associated to chr|gene_id (mostly for Ensembl conversion)
+*/
 
-bool debugMode=false;
-//bool verbose=false;
+//bool protmap=false;
+//int maxintron=999000000;
+//bool mergeCloseExons=false;
+//range filter:
 
+GffLoader gffloader;
 
-GVec<CTableField> tableCols; //table output format fields
+GList<GenomicSeqData> g_data(true,true,true); //list of GFF records by genomic seq
 
 void loadSeqInfo(FILE* f, GHash<SeqInfo> &si) {
   GLineReader fr(f);
@@ -365,484 +302,6 @@ void loadRefTable(FILE* f, GHash<RefTran>& rt) {
       } //while lines
 }
 
-char* getSeqDescr(char* seqid) {
- static char charbuf[128];
- if (seqinfo.Count()==0) return NULL;
- char* suf=rstrchr(seqid, '.');
- if (suf!=NULL) *suf=0;
- SeqInfo* seqd=seqinfo.Find(seqid);
- if (suf!=NULL) *suf='.';
- if (seqd!=NULL) {
-  GStr s(seqd->descr);
-  //cleanup some Uniref gunk
-  if (s[0]=='[') {
-    int r=s.index(']');
-    if (r>=0 && r<8 && isdigit(s[1]))
-       s.remove(0,r+1);
-    }
-  if (s.length()>80) {
-    int r=s.index(';');
-    if (r>5) s.cut(r);
-    }
-  if (s.length()>127) {
-   s.cut(127);
-   int r=s.rindex(' ');
-   if (r>0) s.cut(r);
-   }
-  strcpy(charbuf, s.chars());
-  return charbuf;
-  }
- else return NULL;
-}
-
-char* getSeqName(char* seqid) {
-  static char charbuf[128];
-  char* suf=rstrchr(seqid, '.');
-  if (suf!=NULL) *suf=0;
-  strcpy(charbuf, seqid);
-  if (suf!=NULL) *suf='.';
-  return charbuf;
-}
-
-
-int adjust_stopcodon(GffObj& gffrec, int adj, GList<GSeg>* seglst=NULL) {
-  //adj>0, extend CDS to include a potential stop codon
-  //when CDS is expanded, the terminal exon might have to be adjusted too
-  int realadj=0;
-  if (gffrec.strand=='-') {
-       if ((int)gffrec.CDstart>adj) {
-           gffrec.CDstart-=adj;
-           realadj=adj;
-           if (gffrec.exons.First()->start>gffrec.CDstart) {
-                 gffrec.covlen+=gffrec.exons.First()->start - gffrec.CDstart;
-                 gffrec.exons.First()->start=gffrec.CDstart;
-                 gffrec.start=gffrec.CDstart;
-                 }
-             }
-          }
-        else { // forward strand
-         //expand beyond
-         realadj=adj;
-         gffrec.CDend+=adj;
-         if (adj<0) {//restore
-           if (gffrec.exons.Last()->end==gffrec.CDend-adj) {
-                        gffrec.exons.Last()->end+=adj;
-                        gffrec.end=gffrec.exons.Last()->end;
-                        gffrec.covlen+=adj;
-                        }
-         }
-         else if (gffrec.exons.Last()->end<gffrec.CDend) {
-             gffrec.covlen+=gffrec.CDend-gffrec.exons.Last()->end;
-             gffrec.exons.Last()->end=gffrec.CDend;
-             gffrec.end=gffrec.CDend;
-             }
-         }
-  if (seglst!=NULL) seglst->Last()->end+=realadj;
-  return realadj;
- }
-
-void printTableData(FILE* f, GffObj& g, bool inFasta=false) {
- //using attribute list in tableCols
-	char* av=NULL;
-	for(int i=0;i<tableCols.Count();i++) {
-		if (i>0 || inFasta) {
-     	   if (!inFasta || tableCols[i].type!=ctfGFF_ID)
-     		   fprintf(f,"\t");
-		}
-		switch(tableCols[i].type) {
-		case ctfGFF_Attr:
-			av=g.getAttr(tableCols[i].name.chars());
-			fprintf(f,"%s",av!=NULL? av : ".");
-			break;
-		case ctfGFF_chr:
-			fprintf(f,"%s",g.getGSeqName());
-			break;
-		case ctfGFF_ID:
-			if (!inFasta)
-			  fprintf(f,"%s",g.getID());
-			break;
-		case ctfGFF_geneID:
-			fprintf(f,"%s",g.getGeneID()!=NULL ? g.getGeneID() : ".");
-			break;
-		case ctfGFF_geneName:
-			fprintf(f,"%s",g.getGeneName()!=NULL ? g.getGeneName() : ".");
-			break;
-		case ctfGFF_Parent:
-			fprintf(f,"%s",g.parent!=NULL ? g.parent->getID() : ".");
-			break;
-		case ctfGFF_feature:
-			fprintf(f,"%s",g.getFeatureName());
-			break;
-		case ctfGFF_start:
-			fprintf(f,"%d",g.start);
-			break;
-		case ctfGFF_end:
-			fprintf(f,"%d",g.end);
-			break;
-		case ctfGFF_strand:
-			fprintf(f,"%c",g.strand);
-			break;
-		case ctfGFF_numexons:
-			fprintf(f,"%d",g.exons.Count());
-			break;
-		case ctfGFF_exons:
-			if (g.exons.Count()>0) {
-				for (int x=0;x<g.exons.Count();x++) {
-					if (x>0) fprintf(f,",");
-					fprintf(f,"%d-%d",g.exons[x]->start, g.exons[x]->end);
-				}
-			} else fprintf(f,".");
-			break;
-		case ctfGFF_cds:
-			if (g.hasCDS()) {
-				GVec<GffExon> cds;
-				g.getCDSegs(cds);
-				for (int x=0;x<cds.Count();x++) {
-					if (x>0) fprintf(f,",");
-				    fprintf(f,"%d-%d",cds[x].start, cds[x].end);
-				}
-			}
-			else fprintf(f,".");
-			break;
-		case ctfGFF_covlen:
-			fprintf(f, "%d", g.covlen);
-			break;
-		case ctfGFF_cdslen:
-			if (g.hasCDS()) {
-				GVec<GffExon> cds;
-				g.getCDSegs(cds);
-				int clen=0;
-				for (int x=0;x<cds.Count();x++)
-				    clen+=cds[x].end-cds[x].start+1;
-				fprintf(f, "%d", clen);
-			}
-			else fprintf(f, "0");
-			break;
-		} //switch
-	}
-	fprintf(f,"\n");
-}
-
-bool process_transcript(GFastaDb& gfasta, GffObj& gffrec) {
- if (!gffrec.isTranscript()) return false; //shouldn't call this function unless it's a transcript
- //returns true if the transcript passed the filter
- char* gname=gffrec.getGeneName();
- if (gname==NULL) gname=gffrec.getGeneID();
- if (ensembl_convert && startsWith(gffrec.getID(), "ENS")) {
-      const char* biotype=gffrec.getAttr("gene_biotype");
-      if (biotype) {
-         gffrec.addAttr("type", biotype);
-         gffrec.removeAttr("gene_biotype");
-         }
-       else { //old Ensembl files lacking gene_biotype
-         gffrec.addAttr("type", gffrec.getTrackName());
-         }
-
-      //bool is_gene=false;
-      bool is_pseudo=false;
-      if (strcmp(biotype, "protein_coding")==0 || gffrec.hasCDS())
-                gffrec.setFeatureName("mRNA");
-       else {
-          if (strcmp(biotype, "processed_transcript")==0)
-              gffrec.setFeatureName("proc_RNA");
-            else {
-              //is_gene=endsWith(biotype, "gene");
-              is_pseudo=strifind(biotype, "pseudo");
-              if (is_pseudo) {
-                   gffrec.setFeatureName("pseudo_RNA");
-                   }
-                else if (endsWith(biotype, "RNA")) {
-                   gffrec.setFeatureName(biotype);
-                   } else gffrec.setFeatureName("misc_RNA");
-              }
-          }
-      }
- if (gname && strcmp(gname, gffrec.getID())!=0) {
-   int* isonum=isoCounter.Find(gname);
-   if  (isonum==NULL) {
-       isonum=new int(1);
-       isoCounter.Add(gname,isonum);
-       }
-      else (*isonum)++;
-   //defline.appendfmt(" gene=%s", gname);
-   }
-  int seqlen=0;
-
-  const char* tlabel=tracklabel;
-  if (tlabel==NULL) tlabel=gffrec.getTrackName();
-  //defline.appendfmt(" track:%s",tlabel);
-  char* cdsnt = NULL;
-  char* cdsaa = NULL;
-  int aalen=0;
-  for (int i=1;i<gffrec.exons.Count();i++) {
-     int ilen=gffrec.exons[i]->start-gffrec.exons[i-1]->end-1;
-     if (verbose && ilen>4000000)
-            GMessage("Warning: very large intron (%d) for transcript %s\n",
-                           ilen, gffrec.getID());
-     if (ilen>maxintron) {
-         return false;
-     }
-  }
-  GMapSegments seglst(gffrec.strand);
-  GFaSeqGet* faseq=NULL;
-  if (f_x!=NULL || f_y!=NULL || f_w!=NULL || spliceCheck || validCDSonly || addCDSattrs) {
-	  faseq=fastaSeqGet(gfasta, gffrec.getGSeqName());
-      if (faseq==NULL)
-	    	GError("Error: no genomic sequence available (check -g option!).\n");
-  }
-  if (spliceCheck && gffrec.exons.Count()>1) {
-    //check introns for splice site consensi ( GT-AG, GC-AG or AT-AC )
-    int glen=gffrec.end-gffrec.start+1;
-    const char* gseq=faseq->subseq(gffrec.start, glen);
-    bool revcompl=(gffrec.strand=='-');
-    bool ssValid=true;
-    for (int e=1;e<gffrec.exons.Count();e++) {
-      const char* intron=gseq+gffrec.exons[e-1]->end+1-gffrec.start;
-      int intronlen=gffrec.exons[e]->start-gffrec.exons[e-1]->end-1;
-      GSpliceSite acceptorSite(intron,intronlen,true, revcompl);
-      GSpliceSite    donorSite(intron,intronlen, false, revcompl);
-      //GMessage("%c intron %d-%d : %s .. %s\n",
-      //           gffrec.strand, istart, iend, donorSite.nt, acceptorSite.nt);
-      if (acceptorSite=="AG") { // GT-AG or GC-AG
-         if (!donorSite.canonicalDonor()) {
-            ssValid=false;break;
-            }
-         }
-      else if (acceptorSite=="AC") { //AT-AC also accepted
-         if (donorSite!="AT") { ssValid=false; break; }
-         }
-      else { ssValid=false; break; }
-      }
-    if (!ssValid) {
-      if (verbose)
-         GMessage("Unrecognized splice sites found for '%s'\n",gffrec.getID());
-      return false; //don't print this one!
-    }
-  }
-  bool trprint=true;
-  bool inframeStop=false;
-  //int stopCodonAdjust=0;
-  int mCDphase=0;
-  bool fullCDS=false;
-  bool endStop=false;
-  bool stopAdjusted=false;
-  if (add_hasCDS && gffrec.hasCDS()) gffrec.addAttr("hasCDS", "true");
-  if (gffrec.CDphase=='1' || gffrec.CDphase=='2')
-      mCDphase = gffrec.CDphase-'0';
-  //CDS partialness only added when -y -x -V options are given
-  if (gffrec.hasCDS() && (f_y!=NULL || f_x!=NULL || validCDSonly || addCDSattrs)) {
-    int strandNum=0;
-    int phaseNum=0;
-  CDS_CHECK:
-    uint cds_olen=0;
-    cdsnt=gffrec.getSpliced(faseq, true, &seqlen, NULL, &cds_olen, &seglst, adjustStop);
-    //if adjustStop, seqlen has the CDS+3'UTR length, but cds_olen still has the original CDS length
-    if (cdsnt!=NULL && cdsnt[0]!='\0') { //has CDS
-         cdsaa=translateDNA(cdsnt, aalen, seqlen);
-         char* p=strchr(cdsaa,'.');
-         int cds_aalen=aalen;
-         if (adjustStop)
-        	 cds_aalen=cds_olen/3; //originally stated CDS length
-         endStop=false;
-         if (p!=NULL) { //stop codon found
-        	 if (p-cdsaa==cds_aalen-1) { //stop found as the stated last CDS codon
-                  *p='\0';//remove it
-                  endStop=true;
-                  if (adjustStop) {
-                	  seqlen=cds_aalen*3;
-                	  aalen=cds_aalen;
-                  }
-                  cds_aalen--;
-                  aalen--;
-                  //no need to adjust stop codon
-              }
-              else {//stop found in a different position than the last codon
-            	  if (p-cdsaa<cds_aalen-1 && !adjustStop) {
-            		  inframeStop=true;
-            	  }
-            	  if (adjustStop) {
-            		  *p='\0';
-            		  cds_aalen=p-cdsaa+1; //adjusted CDS length
-            		  seqlen=cds_aalen*3;
-            		  aalen=cds_aalen;
-            		  uint gc=seglst.gmap(seqlen);
-            		  if (gffrec.strand=='-') gffrec.CDstart=gc;
-            		  else gffrec.CDend=gc;
-            		  endStop=true;
-            		  stopAdjusted=true;
-            	  }
-              }
-         }//stop codon found
-         //if (trprint==false) { //failed CDS validity check
-         if (inframeStop) {
-           //in-frame stop codon found
-           if (altPhases && phaseNum<3) {
-              phaseNum++; //try a different phase
-              gffrec.CDphase = '0'+((mCDphase+phaseNum)%3);
-              GFREE(cdsaa);
-              goto CDS_CHECK;
-           }
-           if (gffrec.exons.Count()==1 && bothStrands) {
-              strandNum++;
-              phaseNum=0;
-              if (strandNum<2) {
-                 GFREE(cdsaa);
-                 gffrec.strand = (gffrec.strand=='-') ? '+':'-';
-                 goto CDS_CHECK; //repeat the CDS check for a different frame
-              }
-           }
-           if (verbose) GMessage("Warning: In-frame STOP found for '%s'\n",gffrec.getID());
-           if (addCDSattrs) gffrec.addAttr("InFrameStop", "true");
-         } //has in-frame STOP
-         if (stopAdjusted) {
-      	   if (addCDSattrs) gffrec.addAttr("CDStopAdjusted", "true");
-      	   inframeStop=false; //pretend it's OK now that we've adjusted it
-         }
-         if (!inframeStop) {
-			 bool hasStart=(cdsaa[0]=='M'); //for the regular eukaryotic translation table
-			 fullCDS=(endStop && hasStart);
-			 if (!fullCDS) {
-				 const char* partialness=NULL;
-				 if (hasStart) partialness="3";
-				 else {
-					partialness = endStop ? "5" : "5_3";
-				 }
-				 if (addCDSattrs) gffrec.addAttr("partialness", partialness);
-			 }
-         }
-         if (trprint && ((fullCDSonly && !fullCDS) || (validCDSonly && inframeStop)) )
-        	 trprint=false;
-         //} // Valid CDS only requested?
-      } //has CDS
-  } //translation or codon check was requested
-  if (!trprint) {
-    GFREE(cdsnt);
-    GFREE(cdsaa);
-    //if (adjstop!=NULL) delete adjstop;
-    return false;
-  }
-  /*
-  if (validCDSonly) {
-     int stopCodonAdjust=adjstop->restore();
-     if (stopCodonAdjust!=0 && !endStop) {
-        //restore stop codon location
-        //adjust_stopcodon(gffrec, -stopCodonAdjust, &seglst);
-	    if (seglst.Count()>0) seglst.Last()->end-=stopCodonAdjust;
-        if (cdsnt!=NULL && seqlen>0) {
-           seqlen-=stopCodonAdjust;
-           cdsnt[seqlen]=0;
-        }
-        if (cdsaa!=NULL) aalen--;
-     }
-  }
-  if (adjstop!=NULL) delete adjstop;
-  */
-  if (cdsnt!=NULL) { // && !inframeStop) {
-	  if (f_y!=NULL) { //CDS translation fasta output requested
-			 if (cdsaa==NULL) { //translate now if not done before
-			   cdsaa=translateDNA(cdsnt, aalen, seqlen);
-			 }
-			 if (aalen>0) {
-			   if (cdsaa[aalen-1]=='.' || cdsaa[aalen-1]=='\0') --aalen; //avoid printing the stop codon
- 			   fprintf(f_y, ">%s", gffrec.getID());
- 			   if (fmtTable) printTableData(f_y, gffrec, true);
- 			   else fprintf(f_y, "\n");
-			   printFasta(f_y, NULL, cdsaa, aalen, StarStop);
-			 }
-	  }
-	  if (f_x!=NULL) { //CDS only
-			 GStr defline(gffrec.getID(), 94);
-			 if (writeExonSegs) {
-				  defline.append(" loc:");
-				  defline.append(gffrec.getGSeqName());
-				  defline.appendfmt("(%c)",gffrec.strand);
-				  //warning: not CDS coordinates are written here, but the exon ones
-				  defline+=(int)gffrec.start;
-				  defline+=(char)'-';
-				  defline+=(int)gffrec.end;
-				  // -- here these are CDS substring coordinates on the spliced sequence:
-				  defline.append(" segs:");
-				  for (int i=0;i<seglst.Count();i++) {
-					  if (i>0) defline.append(",");
-					  defline+=(int)seglst[i].start;
-					  defline.append("-");
-					  defline+=(int)seglst[i].end;
-					  }
-			 }
-			 fprintf(f_x, ">%s", defline.chars());
-			 if (fmtTable) printTableData(f_x, gffrec, true);
-			 else fprintf(f_x, "\n");
-			 printFasta(f_x, NULL, cdsnt, seqlen);
-	  }
-	  GFREE(cdsnt);
-	  GFREE(cdsaa);
-  } //writing CDS or its translation
-  if (f_w!=NULL) { //write spliced exons
-	  uint cds_start=0;
-	  uint cds_end=0;
-	  seglst.Clear();
-	  //TODO: ? if wPadding is set, *temporarily* change first and last exon coordinates ?!?
-	  // or perhaps getSpliced() should take an additional padding parameter ?!?
-	  int padLeft=0;
-	  int padRight=0;
-	  if (wPadding>0) {
-		padLeft= (gffrec.start>(uint)wPadding) ? wPadding : gffrec.start - 1;
-		int ediff=faseq->getseqlen()-gffrec.end;
-	    padRight=(wPadding>ediff) ?  ediff : wPadding;
-   	    gffrec.addPadding(padLeft, padRight);
-	  }
-	  char* exont=gffrec.getSpliced(faseq, false, &seqlen, &cds_start, &cds_end, &seglst);
-	  //restore exons to normal (remove padding)
-	  if (wPadding>0)
-		  gffrec.removePadding(padLeft, padRight);
-
-	  GStr defline(gffrec.getID());
-	  if (exont!=NULL) {
-		  if (gffrec.CDstart>0) {
-			  defline.appendfmt(" CDS=%d-%d", cds_start, cds_end);
-		  }
-		  if (writeExonSegs) {
-			  defline.append(" loc:");
-			  defline.append(gffrec.getGSeqName());
-			  defline+=(char)'|';
-			  defline+=(int)gffrec.start;
-			  defline+=(char)'-';
-			  defline+=(int)gffrec.end;
-			  defline+=(char)'|';
-			  defline+=(char)gffrec.strand;
-			  defline.append(" exons:");
-			  for (int i=0;i<gffrec.exons.Count();i++) {
-				  if (i>0) defline.append(",");
-				  defline+=(int)gffrec.exons[i]->start;
-				  defline.append("-");
-				  defline+=(int)gffrec.exons[i]->end;
-			  }
-			if (wPadding>0) {
-				defline.append(" padding:");
-				defline.append(padLeft);
-				defline+=(char)'|';
-				defline.append(padRight);
-			}
-
-			defline.append(" segs:");
-			for (int i=0;i<seglst.Count();i++) {
-				if (i>0) defline.append(",");
-				defline+=(int)seglst[i].start;
-				defline.append("-");
-				defline+=(int)seglst[i].end;
-				}
-		  }
-
-		  fprintf(f_w, ">%s", defline.chars());
-		  if (fmtTable) printTableData(f_w, gffrec, true);
-		    else fprintf(f_w, "\n");
-		  printFasta(f_w, NULL, exont, seqlen);
-		  GFREE(exont);
-	  }
-  } //writing f_w (spliced exons)
-  return true;
-}
-
 void openfw(FILE* &f, GArgs& args, char opt) {
 	GStr s=args.getOpt(opt);
 	if (!s.is_empty()) {
@@ -863,10 +322,9 @@ void printGff3Header(FILE* f, GArgs& args) {
 		fprintf(f, "%s\n", gffloader.headerLines[i]);
 	}
   } else {
-    fprintf(f, "# ");
-    args.printCmdLine(f);
-    fprintf(f, "# gffread v" VERSION "\n");
     fprintf(f, "##gff-version 3\n");
+    fprintf(f, "# gffread v" VERSION "\n");
+    fprintf(f, "# ");args.printCmdLine(f);
   }
 }
 
@@ -906,83 +364,6 @@ void processGffComment(const char* cmline, GfList* gflst) {
  }
 }
 
-bool validateGffRec(GffObj* gffrec, GList<GffObj>* gfnew) {
-	if (reftbl.Count()>0) { //check if we need to reject by ref seq filter
-		GStr refname(gffrec->getRefName());
-		RefTran* rt=reftbl.Find(refname.chars());
-		if (rt==NULL && refname.length()>2 && refname[-2]=='.' && isdigit(refname[-1])) {
-			//try removing the version suffix
-			refname.cut(-2);
-			//GMessage("[DEBUG] Trying ref name '%s'...\n", refname.chars());
-			rt=reftbl.Find(refname.chars());
-		}
-		if (rt) {
-			gffrec->setRefName(rt->new_name);
-		}
-		/* //no, do not discard non-matching entries, let them pass through!
-		else {
-			if (verbose)
-				GMessage("Info: %s discarded due to reference %s not being mapped\n",
-						gffrec->getID(), refname.chars());
-			return false; //discard, ref seq not in the given translation table
-		}*/
-	}
-	if (gffloader.transcriptsOnly && gffrec->isDiscarded()) {
-		//discard generic "locus" features with no other detailed subfeatures
-		//GMessage("Warning: discarding %s GFF generic gene/locus container %s\n",gffrec->getID());
-		return false;
-	}
-	if (minLen>0 && gffrec->covlen<minLen) {
-		if (verbose)
-			GMessage("Info: %s discarded due to minimum length threshold %d\n",
-					gffrec->getID(), minLen);
-    	return false;
-	}
-	if (rfltGSeq!=NULL) { //filter by gseqName
-		if (strcmp(gffrec->getGSeqName(),rfltGSeq)!=0) {
-			return false;
-		}
-	}
-	if (rfltStrand>0 && gffrec->strand !=rfltStrand) {
-		return false;
-	}
-	//check coordinates
-	if (rfltStart!=0 || rfltEnd!=MAX_UINT) {
-		if (rfltWithin) {
-			if (gffrec->start<rfltStart || gffrec->end>rfltEnd) {
-				return false; //not within query range
-			}
-		}
-		else {
-			if (gffrec->start>rfltEnd || gffrec->end<rfltStart) {
-				return false;
-			}
-		}
-	}
-	if (multiExon && gffrec->exons.Count()<=1) {
-		return false;
-	}
-	if (wCDSonly && gffrec->CDstart==0) {
-		return false;
-	}
-	if (wNConly && gffrec->hasCDS()) return false;
-	if (ensembl_convert && startsWith(gffrec->getID(), "ENS")) {
-		//keep track of chr|gene_id data -- coordinate range
-		char* geneid=gffrec->getGeneID();
-		if (geneid!=NULL) {
-			GeneInfo* ginfo=gene_ids.Find(geneid);
-			if (ginfo==NULL) {//first time seeing this gene ID
-				GeneInfo* geneinfo=new GeneInfo(gffrec, ensembl_convert);
-				gene_ids.Add(geneid, geneinfo);
-				if (gfnew!=NULL)
-					gfnew->Add(geneinfo->gf); //do we really need this?
-			}
-			else ginfo->update(gffrec);
-		}
-	}
-	return true;
-}
-
 void printGffObj(FILE* f, GffObj* gfo, GStr& locname, GffPrintMode exonPrinting, int& out_counter) {
     GffObj& t=*gfo;
     GTData* tdata=(GTData*)(t.uptr);
@@ -1027,9 +408,20 @@ void printAsTable(FILE* f, GffObj* gfo, int* out_counter=NULL) {
     printTableData(f, *gfo);
 }
 
+void shutDown() {
+	seqinfo.Clear();
+	//if (faseq!=NULL) delete faseq;
+	//if (gcdb!=NULL) delete gcdb;
+	GFREE(rfltGSeq);
+	FWCLOSE(f_out);
+	FWCLOSE(f_w);
+	FWCLOSE(f_x);
+	FWCLOSE(f_y);
+}
+
 int main(int argc, char* argv[]) {
  GArgs args(argc, argv,
-   "version;debug;merge;adj-stop;bed;in-bed;tlf;in-tlf;cluster-only;nc;cov-info;help;"
+   "version;debug;merge;stream;adj-stop;bed;in-bed;tlf;in-tlf;cluster-only;nc;cov-info;help;"
     "sort-alpha;keep-genes;w-add=;keep-comments;keep-exon-attrs;force-exons;t-adopt;gene2exon;"
     "ignore-locus;no-pseudo;table=sort-by=hvOUNHPWCVJMKQYTDARSZFGLEBm:g:i:r:s:l:t:o:w:x:y:d:");
  args.printError(USAGE, true);
@@ -1041,6 +433,7 @@ int main(int argc, char* argv[]) {
  debugMode=(args.getOpt("debug")!=NULL);
  decodeChars=(args.getOpt('D')!=NULL);
  gffloader.forceExons=(args.getOpt("force-exons")!=NULL);
+ gffloader.streamIn=(args.getOpt("stream")!=NULL);
  gffloader.noPseudo=(args.getOpt("no-pseudo")!=NULL);
  gffloader.ignoreLocus=(args.getOpt("ignore-locus")!=NULL);
  gffloader.transcriptsOnly=(args.getOpt('O')==NULL);
@@ -1152,10 +545,13 @@ int main(int argc, char* argv[]) {
  }
 
  gffloader.mergeCloseExons=(args.getOpt('Z')!=NULL);
+
  multiExon=(args.getOpt('U')!=NULL);
  writeExonSegs=(args.getOpt('W')!=NULL);
  tracklabel=args.getOpt('t');
- GFastaDb gfasta(args.getOpt('g'));
+
+ if (args.getOpt('g'))
+	   gfasta.init(args.getOpt('g'));
  //if (gfasta.fastaPath!=NULL)
  //    sortByLoc=true; //enforce sorting by chromosome/contig
  GStr s=args.getOpt('i');
@@ -1163,7 +559,7 @@ int main(int argc, char* argv[]) {
  s=args.getOpt('l');
  if (!s.is_empty()) minLen=s.asInt();
 
- FILE* f_repl=NULL;
+ FILE* f_repl=NULL; //duplicate/collapsing info output file
  s=args.getOpt('d');
  if (!s.is_empty()) {
    if (s=="-") f_repl=stdout;
@@ -1249,6 +645,17 @@ int main(int argc, char* argv[]) {
  int numfiles = args.startNonOpt();
  //GList<GffObj> gfkept(false,true); //unsorted, free items on delete
  int out_counter=0; //number of records printed
+
+ if (fmtGTF)
+	 exonPrinting = gffloader.forceExons ? pgtfBoth : pgtfAny;
+ else if (fmtBED)
+	 exonPrinting=pgffBED;
+ else if (fmtTLF)
+	exonPrinting=pgffTLF;
+ else { //printing regular GFF3
+	exonPrinting = gffloader.forceExons ? pgffBoth : pgffAny;
+ }
+
  while (true) {
    GStr infile;
    if (numfiles) {
@@ -1268,7 +675,20 @@ int main(int argc, char* argv[]) {
    if (TLFinput || (Gstricmp(fext, "tlf")==0))
 	   gffloader.TLFinput=true;
    gffloader.openFile(infile);
-   gffloader.load(g_data, &validateGffRec, &processGffComment);
+   if (gffloader.streamIn) { //streaming in - disable all bulk load features
+  	 gffloader.transcriptsOnly=true;
+  	 gffloader.doCluster=false;
+  	 covInfo=false;
+   }
+
+   gffloader.load(g_data, &processGffComment);
+   if (gffloader.streamIn) {
+	   //we're done, GffLoader::load() took care of everything
+	   shutDown();
+	   return 0;
+   }
+
+
    // will also place the transcripts in loci, if doCluster is enabled
    if (gffloader.doCluster)
      collectLocusData(g_data, covInfo);
@@ -1293,16 +713,6 @@ int main(int argc, char* argv[]) {
  if (tracklabel) loctrack=tracklabel;
  if (gffloader.sortRefsAlpha)
     g_data.setSorted(&gseqCmpName);
- GffPrintMode exonPrinting;
- if (fmtGTF)
-	 exonPrinting = pgtfAny;
- else if (fmtBED)
-	 exonPrinting=pgffBED;
- else if (fmtTLF)
-	exonPrinting=pgffTLF;
- else { //printing regular GFF3
-	exonPrinting = gffloader.forceExons ? pgffBoth : pgffAny;
- }
  bool firstGff3Print=fmtGFF3;
  if (gffloader.doCluster) {
    //grouped in loci
@@ -1433,13 +843,14 @@ int main(int argc, char* argv[]) {
 				 }
 				 //for GFF3 && table output, print the parent first, if any
 				 if ((fmtGFF3 || fmtTable) && t.parent!=NULL && T_PRINTABLE(t.parent->udata)) {
-					 GTData* pdata=(GTData*)(t.parent->uptr);
-					 if (pdata && pdata->geneinfo!=NULL)
-						  pdata->geneinfo->finalize();
+					 //GTData* pdata=(GTData*)(t.parent->uptr);
+					 //if (pdata && pdata->geneinfo!=NULL)
+					 //  pdata->geneinfo->finalize();
 					 if (fmtTable)
 						 printTableData(f_out, *(t.parent));
-					 else
+					 else { //GFF3 output
 						 t.parent->printGxf(f_out, exonPrinting, tracklabel, NULL, decodeChars);
+					 }
 					 T_NO_PRINT(t.parent->udata);
 				 }
 				 if (fmtTable)
@@ -1469,14 +880,7 @@ int main(int argc, char* argv[]) {
     } //for each genomic seq
    } //no clustering
  if (f_repl && f_repl!=stdout) fclose(f_repl);
- seqinfo.Clear();
- //if (faseq!=NULL) delete faseq;
- //if (gcdb!=NULL) delete gcdb;
- GFREE(rfltGSeq);
- FWCLOSE(f_out);
- FWCLOSE(f_w);
- FWCLOSE(f_x);
- FWCLOSE(f_y);
+ shutDown();
  }
 
 


=====================================
tag_git.sh
=====================================
@@ -3,5 +3,12 @@ git checkout master
 ver=$(fgrep '#define VERSION ' gffread.cpp)
 ver=${ver#*\"}
 ver=${ver%%\"*}
+git fetch --tags
+if [[ "$1" == "delete" || "$1" == "del" ]]; then
+  echo "Deleting tag v$ver .."
+  git tag -d v$ver
+  git push origin :refs/tags/v$ver
+  exit
+fi
 git tag -a "v$ver" -m "release $ver"
 git push --tags



View it on GitLab: https://salsa.debian.org/med-team/gffread/-/compare/0cdbae03c6fe3d778d864dbcf4c738a8c8eecdbb...d719fba162be014938c92a2cad7558260fe37731

-- 
View it on GitLab: https://salsa.debian.org/med-team/gffread/-/compare/0cdbae03c6fe3d778d864dbcf4c738a8c8eecdbb...d719fba162be014938c92a2cad7558260fe37731
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200810/f27bb1ac/attachment-0001.html>


More information about the debian-med-commit mailing list