[med-svn] [Git][med-team/aegean][upstream] New upstream version 0.16.0+dfsg

Sascha Steinbiss gitlab at salsa.debian.org
Fri Oct 5 17:11:00 BST 2018


Sascha Steinbiss pushed to branch upstream at Debian Med / aegean


Commits:
96c39eb4 by Sascha Steinbiss at 2018-10-05T16:03:28Z
New upstream version 0.16.0+dfsg
- - - - -


10 changed files:

- CHANGELOG.md
- VERSION
- data/gff3/aech-dachsous-out.gff3
- data/gff3/mrot-cst-out-cds.gff3
- data/gff3/nvit-exospindle-out.gff3
- data/misc/amel-ogs-vs-ncbi-parseval-html/index.html
- data/scripts/miloci.py
- src/core/AgnCompareReportHTML.c
- src/core/AgnLocusRefineStream.c
- src/core/AgnUtils.c


Changes:

=====================================
CHANGELOG.md
=====================================
@@ -2,6 +2,13 @@
 All notable changes to this project will be documented in this file.
 This project adheres to [Semantic Versioning](http://semver.org/).
 
+## [0.16.0] - 2016-05-09
+
+### Fixed
+- Outer gene in intron gene cases now designated as a ciLocus.
+- ciLoci are now exempt from merging into miLoci.
+- Fixes to how polycistrons and other edge cases are handled.
+
 ## [0.15.2] - 2016-03-17
 
 ### Fixed


=====================================
VERSION
=====================================
@@ -1 +1 @@
-v0.15.2 stable
+v0.16.0 stable


=====================================
data/gff3/aech-dachsous-out.gff3
=====================================
@@ -8,7 +8,7 @@
 #!annotation-source NCBI Acromyrmex echinatior Annotation Release 100
 NW_011626563.1	AEGeAn::LocusPocus	locus	1190754	1200781	.	.	.	child_gene=1;child_mRNA=2;riil=769;effective_length=10028;iLocus_type=siLocus
 NW_011626563.1	AEGeAn::LocusPocus	locus	1200782	1201550	.	.	.	fg_orient=FR;effective_length=769;iLocus_type=iiLocus
-NW_011626563.1	AEGeAn::LocusPocus	locus	1201551	1566216	.	.	.	effective_length=364666;iiLocus_exception=complex-overlap-3;liil=769;riil=8884;iLocus_type=siLocus;child_gene=1;child_mRNA=1
+NW_011626563.1	AEGeAn::LocusPocus	locus	1201551	1566216	.	.	.	iLocus_type=ciLocus;effective_length=364666;iiLocus_exception=complex-overlap-3;liil=769;riil=8884;child_gene=1;child_mRNA=1
 NW_011626563.1	AEGeAn::LocusPocus	locus	1216473	1219475	.	.	.	iiLocus_exception=intron-gene;liil=0;riil=0;iLocus_type=siLocus;child_gene=1;child_mRNA=2
 NW_011626563.1	AEGeAn::LocusPocus	locus	1537022	1540189	.	.	.	liil=0;riil=0;iLocus_type=niLocus;child_gene=1;child_ncRNA=1
 NW_011626563.1	AEGeAn::LocusPocus	locus	1566217	1575100	.	.	.	fg_orient=RR;effective_length=8884;iLocus_type=iiLocus


=====================================
data/gff3/mrot-cst-out-cds.gff3
=====================================
@@ -6,5 +6,5 @@
 #!genome-build-accession NCBI_Assembly:GCF_000220905.1
 #!annotation-date 7 April 2015
 #!annotation-source NCBI Megachile rotundata Annotation Release 101
-NW_003797177.1	AEGeAn::LocusPocus	locus	9652	19311	.	.	.	effective_length=9660;iLocus_type=siLocus;child_gene=1;child_mRNA=1
+NW_003797177.1	AEGeAn::LocusPocus	locus	9652	19311	.	.	.	iLocus_type=ciLocus;effective_length=9660;child_gene=1;child_mRNA=1
 NW_003797177.1	AEGeAn::LocusPocus	locus	11405	18146	.	.	.	iiLocus_exception=intron-gene;iLocus_type=siLocus;child_gene=1;child_mRNA=4


=====================================
data/gff3/nvit-exospindle-out.gff3
=====================================
@@ -7,6 +7,6 @@
 #!annotation-date 3 June 2014
 #!annotation-source NCBI Nasonia vitripennis Annotation Release 101
 NC_015867.2	AEGeAn::LocusPocus	locus	370926	373152	.	.	.	child_gene=1;child_mRNA=1;right_overlap=262;iiLocus_exception=delta-overlap-delta;riil=0;effective_length=1965;iLocus_type=siLocus
-NC_015867.2	AEGeAn::LocusPocus	locus	372891	380536	.	.	.	effective_length=6758;iLocus_type=siLocus;child_gene=1;child_mRNA=1
+NC_015867.2	AEGeAn::LocusPocus	locus	372891	380536	.	.	.	iLocus_type=ciLocus;effective_length=6758;child_gene=1;child_mRNA=1
 NC_015867.2	AEGeAn::LocusPocus	locus	374998	378903	.	.	.	iiLocus_exception=intron-gene;iLocus_type=siLocus;child_gene=1;child_mRNA=1
 NC_015867.2	AEGeAn::LocusPocus	locus	379649	381835	.	.	.	left_overlap=888;liil=0;child_gene=1;child_mRNA=1;effective_length=2187;iLocus_type=siLocus


=====================================
data/misc/amel-ogs-vs-ncbi-parseval-html/index.html
=====================================
@@ -43,7 +43,7 @@ Executing command:      bin/parseval --datashare=data/share/ --outformat=html --
         </tbody>
       </table>
 
-      <h2>Gene loci <span class="tooltip">[?]<span class="tooltip_text">If a gene annotation overlaps with another gene annotation, those annotations are associated with the same gene locus. See <a target="_blank" href="http://aegean.readthedocs.org/en/refactor/loci.html">this page</a> for a formal definition of a locus annotation.</span></span></h2>
+      <h2>Gene loci <span class="tooltip">[?]<span class="tooltip_text">If a gene annotation overlaps with another gene annotation, those annotations are associated with the same gene locus. See <a target="_blank" href="http://aegean.readthedocs.io/en/latest/loci.html">this page</a> for a formal definition of a locus annotation.</span></span></h2>
       <table class="table_normal">
         <tr><td>shared</td><td>7</td></tr>
         <tr><td>unique to reference</td><td>1</td></tr>


=====================================
data/scripts/miloci.py
=====================================
@@ -11,29 +11,67 @@ import re
 import sys
 
 
+class Locus(object):
+    def __init__(self, line):
+        self._rawdata = line
+        self.fields = line.strip().split('\t')
+        assert len(self.fields) == 9
+
+    @property
+    def seqid(self):
+        return self.fields[0]
+
+    @property
+    def start(self):
+        return int(self.fields[3])
+
+    @property
+    def end(self):
+        return int(self.fields[4])
+
+    @property
+    def ilocus_class(self):
+        typematch = re.search('iLocus_type=([^;\n]+)', self.fields[8])
+        assert typematch, 'could not determine iLocus type: ' + self._rawdata
+        return typematch.group(1)
+
+    @property
+    def mergeable(self):
+        if self.ilocus_class not in ['siLocus', 'niLocus']:
+            return False
+        if 'iiLocus_exception=intron-gene' in self.fields[8]:
+            return False
+        return True
+
+    def __len__(self):
+        return self.end - self.start + 1
+
+    def __str__(self):
+        return '\t'.join(self.fields)
+
+    def strip(self):
+        self.fields[8] = re.sub('ID=[^;\n]+;*', '', self.fields[8])
+        self.fields[8] = re.sub('Name=[^;\n]+;*', '', self.fields[8])
+
+
 def merge_iloci(loci):
     """Merge ajacent or overlapping gene-containing iLoci."""
     assert len(loci) > 0
     if len(loci) == 1:
-        line = re.sub('ID=[^;\n]+;*', '', loci[0])
-        line = re.sub('Name=[^;\n]+;*', '', line)
-        return line
+        loci[0].strip()
+        return loci[0]
 
     seqid = None
     start, end = -1, -1
     attrs = {}
     for locus in loci:
-        fields = locus.split('\t')
-        assert len(fields) == 9
         if seqid:
-            assert fields[0] == seqid
-        seqid = fields[0]
-        lstart = int(fields[3])
-        lend = int(fields[4])
-        if start == -1 or lstart < start:
-            start = lstart
-        end = max(end, lend)
-        numeric_attrs = re.findall('([^;=]+=\d+)', fields[8])
+            assert locus.seqid == seqid
+        seqid = locus.seqid
+        if start == -1 or locus.start < start:
+            start = locus.start
+        end = max(end, locus.end)
+        numeric_attrs = re.findall('([^;=]+=\d+)', locus.fields[8])
         for key_value_pair in numeric_attrs:
             assert '=' in key_value_pair, \
                 'malformed key/value pair %s' % key_value_pair
@@ -49,8 +87,9 @@ def merge_iloci(loci):
     for key in sorted(attrs):
         attrstring += ';%s=%d' % (key, attrs[key])
     gff3 = [seqid, 'AEGeAn::miloci.py', 'locus', str(start), str(end),
-            '%d' % len(loci), '.', '.',    attrstring]
-    return '\t'.join(gff3)
+            str(len(loci)), '.', '.', attrstring]
+    line = '\t'.join(gff3)
+    return Locus(line)
 
 
 def parse_iloci(fp):
@@ -59,34 +98,28 @@ def parse_iloci(fp):
     Output: merged iLoci; gene-containing iLoci that are adjacent or
             overlapping are combined
     """
-    seqid = None
-    prev_loci = []
+    locus_buffer = []
     for line in fp:
-        line = line.rstrip()
         if '\tlocus\t' not in line:
             continue
+        locus = Locus(line)
+
+        if len(locus_buffer) > 0 and locus.seqid != locus_buffer[0].seqid:
+            yield merge_iloci(locus_buffer)
+            locus_buffer = []
 
-        locusseqid = re.match('([^\t]+)', line).group(1)
-        if seqid is None:
-            seqid = locusseqid
-        elif locusseqid != seqid:
-            if len(prev_loci) > 0:
-                yield merge_iloci(prev_loci)
-                prev_loci = []
-            seqid = locusseqid
-
-        if ';child_gene=' in line:
-            prev_loci.append(line)
+        if locus.mergeable:
+            locus_buffer.append(locus)
             continue
         else:
-            if len(prev_loci) > 0:
-                yield merge_iloci(prev_loci)
-                prev_loci = []
-            line = re.sub('ID=[^;\n]+;*', '', line)
-            line = re.sub('Name=[^;\n]+;*', '', line)
-            yield line
-    if len(prev_loci) > 0:
-        yield merge_iloci(prev_loci)
+            if len(locus_buffer) > 0:
+                yield merge_iloci(locus_buffer)
+                locus_buffer = []
+            locus.strip()
+            yield locus
+
+    if len(locus_buffer) > 0:
+        yield merge_iloci(locus_buffer)
 
 
 if __name__ == '__main__':


=====================================
src/core/AgnCompareReportHTML.c
=====================================
@@ -403,7 +403,7 @@ static void compare_report_html_footer(FILE *outstream)
   fprintf(outstream,
           "      <p class=\"footer\">\n"
           "        Generated by <a href=\"%s\">AEGeAn %s (%s %s)</a>.<br />\n"
-          "        Copyright © %s <a href=\"http://aegean.readthedocs.org/en/"
+          "        Copyright © %s <a href=\"http://aegean.readthedocs.io/en/"
           "latest/contrib.html\">AEGeAn authors</a>.<br />\n"
           "        See <a href=\"LICENSE\">LICENSE</a> for details."
           "      </p>\n", AGN_VERSION_LINK, AGN_SEMANTIC_VERSION,
@@ -712,7 +712,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
 {
   GtArray *compclassdata;
   GtUword i;
-  
+
   compclassdata = gt_hashmap_get(rpt->compclassdata, "perfect");
   if(gt_array_size(compclassdata) > 0)
   {
@@ -734,7 +734,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
     compare_report_html_seqfile_footer(outstream);
     fclose(outstream);
   }
-  
+
   compclassdata = gt_hashmap_get(rpt->compclassdata, "mislabeled");
   if(gt_array_size(compclassdata) > 0)
   {
@@ -757,7 +757,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
     compare_report_html_seqfile_footer(outstream);
     fclose(outstream);
   }
-  
+
   compclassdata = gt_hashmap_get(rpt->compclassdata, "cds");
   if(gt_array_size(compclassdata) > 0)
   {
@@ -779,7 +779,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
     compare_report_html_seqfile_footer(outstream);
     fclose(outstream);
   }
-  
+
   compclassdata = gt_hashmap_get(rpt->compclassdata, "exon");
   if(gt_array_size(compclassdata) > 0)
   {
@@ -801,7 +801,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
     compare_report_html_seqfile_footer(outstream);
     fclose(outstream);
   }
-  
+
   compclassdata = gt_hashmap_get(rpt->compclassdata, "utr");
   if(gt_array_size(compclassdata) > 0)
   {
@@ -823,7 +823,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
     compare_report_html_seqfile_footer(outstream);
     fclose(outstream);
   }
-  
+
   compclassdata = gt_hashmap_get(rpt->compclassdata, "nonmatch");
   if(gt_array_size(compclassdata) > 0)
   {
@@ -1127,7 +1127,7 @@ static void compare_report_html_summary_annot(AgnCompInfo *info,
           "      <h2>Gene loci <span class=\"tooltip\">[?]<span class=\"tooltip_text\">If a gene "
           "annotation overlaps with another gene annotation, those annotations are associated "
           "with the same gene locus. See <a target=\"_blank\" "
-          "href=\"http://aegean.readthedocs.org/en/refactor/loci.html\">"
+          "href=\"http://aegean.readthedocs.io/en/latest/loci.html\">"
           "this page</a> for a formal definition of a locus annotation.</span></span></h2>\n"
           "      <table class=\"table_normal\">\n"
           "        <tr><td>shared</td><td>%lu</td></tr>\n"


=====================================
src/core/AgnLocusRefineStream.c
=====================================
@@ -311,6 +311,8 @@ static bool refine_locus_check_intron_genes(AgnLocusRefineStream *stream,
   GtStr *seqid = gt_genome_node_get_seqid(*gn1);
   AgnLocus *locus = agn_locus_new(seqid);
   agn_locus_add_feature(locus, fn1);
+  gt_feature_node_add_attribute((GtFeatureNode *)locus, "iLocus_type",
+                                "ciLocus");
   gt_genome_node_ref(*gn1);
   gt_array_add(iloci, locus);
 
@@ -408,11 +410,14 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
   for(i = 0; i < numloci; i++)
   {
     GtGenomeNode **gn = gt_array_get(iloci, i);
+    GtFeatureNode *fn = gt_feature_node_cast(*gn);
     if(i == 0)
-      coding_status = agn_locus_num_mrnas(*gn) > 0;
+    {
+      coding_status = agn_typecheck_count(fn, agn_typecheck_cds) > 0;
+    }
     else
     {
-      bool test_status = agn_locus_num_mrnas(*gn) > 0;
+      bool test_status = agn_typecheck_count(fn, agn_typecheck_cds) > 0;
       same_coding_status = coding_status == test_status;
       if(!same_coding_status)
         break;
@@ -467,7 +472,10 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
             fprintf(stream->ilenfile, "%s\t0\n", gt_str_get(seqid));
         }
       }
-      gt_feature_node_add_attribute(fn, "iLocus_type", typestr);
+      if(gt_feature_node_get_attribute(fn, "iLocus_type") == NULL)
+      {
+        gt_feature_node_add_attribute(fn, "iLocus_type", typestr);
+      }
     }
     if(numloci == 1)
     {
@@ -489,7 +497,7 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
     GtFeatureNode *fn1 = gt_feature_node_cast(*gn1);
     GtFeatureNode *fn2 = gt_feature_node_cast(*gn2);
 
-    bool cds1 = agn_locus_num_mrnas(*gn1) > 0;
+    bool cds1 = agn_typecheck_count(fn1, agn_typecheck_cds) > 0;;
     if(cds1 == true)
     {
       gt_feature_node_add_attribute(fn1, "iLocus_type", "siLocus");
@@ -624,7 +632,10 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
       {
         type = "ciLocus";
       }
-      gt_feature_node_add_attribute(fn, "iLocus_type", type);
+      if(gt_feature_node_get_attribute(fn, "iLocus_type") == NULL)
+      {
+        gt_feature_node_add_attribute(fn, "iLocus_type", type);
+      }
     }
   }
 


=====================================
src/core/AgnUtils.c
=====================================
@@ -269,6 +269,9 @@ bool agn_overlap_ilocus(GtGenomeNode *f1, GtGenomeNode *f2,
   if(gt_str_cmp(seqid1, seqid2) != 0)
     return false;
 
+  GtRange r1 = gt_genome_node_get_range(f1);
+  GtRange r2 = gt_genome_node_get_range(f2);
+
   if(by_cds)
   {
     GtRange c1 = agn_feature_node_get_cds_range((GtFeatureNode *)f1);
@@ -286,13 +289,18 @@ bool agn_overlap_ilocus(GtGenomeNode *f1, GtGenomeNode *f2,
     {
       // Both have coding sequences, use those instead of the complete feature
       // coordinates.
+
+      if(gt_range_compare(&r1, &r2) == 0)
+      {
+        // Polycistrons belong together
+        return true;
+      }
+
       return gt_range_overlap_delta(&c1, &c2, minoverlap);
     }
   }
 
   // Either we are not in CDS mode, or the features don't have a CDS.
-  GtRange r1 = gt_genome_node_get_range(f1);
-  GtRange r2 = gt_genome_node_get_range(f2);
   return gt_range_overlap_delta(&r1, &r2, minoverlap);
 }
 



View it on GitLab: https://salsa.debian.org/med-team/aegean/commit/96c39eb486a856f52e9770b7a0fde07bef032724

-- 
View it on GitLab: https://salsa.debian.org/med-team/aegean/commit/96c39eb486a856f52e9770b7a0fde07bef032724
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181005/629b6076/attachment-0001.html>


More information about the debian-med-commit mailing list