[med-svn] [Git][med-team/aegean][upstream] New upstream version 0.16.0+dfsg
Sascha Steinbiss
gitlab at salsa.debian.org
Fri Oct 5 17:11:00 BST 2018
Sascha Steinbiss pushed to branch upstream at Debian Med / aegean
Commits:
96c39eb4 by Sascha Steinbiss at 2018-10-05T16:03:28Z
New upstream version 0.16.0+dfsg
- - - - -
10 changed files:
- CHANGELOG.md
- VERSION
- data/gff3/aech-dachsous-out.gff3
- data/gff3/mrot-cst-out-cds.gff3
- data/gff3/nvit-exospindle-out.gff3
- data/misc/amel-ogs-vs-ncbi-parseval-html/index.html
- data/scripts/miloci.py
- src/core/AgnCompareReportHTML.c
- src/core/AgnLocusRefineStream.c
- src/core/AgnUtils.c
Changes:
=====================================
CHANGELOG.md
=====================================
@@ -2,6 +2,13 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).
+## [0.16.0] - 2016-05-09
+
+### Fixed
+- Outer gene in intron gene cases now designated as a ciLocus.
+- ciLoci are now exempt from merging into miLoci.
+- Fixes to how polycistrons and other edge cases are handled.
+
## [0.15.2] - 2016-03-17
### Fixed
=====================================
VERSION
=====================================
@@ -1 +1 @@
-v0.15.2 stable
+v0.16.0 stable
=====================================
data/gff3/aech-dachsous-out.gff3
=====================================
@@ -8,7 +8,7 @@
#!annotation-source NCBI Acromyrmex echinatior Annotation Release 100
NW_011626563.1 AEGeAn::LocusPocus locus 1190754 1200781 . . . child_gene=1;child_mRNA=2;riil=769;effective_length=10028;iLocus_type=siLocus
NW_011626563.1 AEGeAn::LocusPocus locus 1200782 1201550 . . . fg_orient=FR;effective_length=769;iLocus_type=iiLocus
-NW_011626563.1 AEGeAn::LocusPocus locus 1201551 1566216 . . . effective_length=364666;iiLocus_exception=complex-overlap-3;liil=769;riil=8884;iLocus_type=siLocus;child_gene=1;child_mRNA=1
+NW_011626563.1 AEGeAn::LocusPocus locus 1201551 1566216 . . . iLocus_type=ciLocus;effective_length=364666;iiLocus_exception=complex-overlap-3;liil=769;riil=8884;child_gene=1;child_mRNA=1
NW_011626563.1 AEGeAn::LocusPocus locus 1216473 1219475 . . . iiLocus_exception=intron-gene;liil=0;riil=0;iLocus_type=siLocus;child_gene=1;child_mRNA=2
NW_011626563.1 AEGeAn::LocusPocus locus 1537022 1540189 . . . liil=0;riil=0;iLocus_type=niLocus;child_gene=1;child_ncRNA=1
NW_011626563.1 AEGeAn::LocusPocus locus 1566217 1575100 . . . fg_orient=RR;effective_length=8884;iLocus_type=iiLocus
=====================================
data/gff3/mrot-cst-out-cds.gff3
=====================================
@@ -6,5 +6,5 @@
#!genome-build-accession NCBI_Assembly:GCF_000220905.1
#!annotation-date 7 April 2015
#!annotation-source NCBI Megachile rotundata Annotation Release 101
-NW_003797177.1 AEGeAn::LocusPocus locus 9652 19311 . . . effective_length=9660;iLocus_type=siLocus;child_gene=1;child_mRNA=1
+NW_003797177.1 AEGeAn::LocusPocus locus 9652 19311 . . . iLocus_type=ciLocus;effective_length=9660;child_gene=1;child_mRNA=1
NW_003797177.1 AEGeAn::LocusPocus locus 11405 18146 . . . iiLocus_exception=intron-gene;iLocus_type=siLocus;child_gene=1;child_mRNA=4
=====================================
data/gff3/nvit-exospindle-out.gff3
=====================================
@@ -7,6 +7,6 @@
#!annotation-date 3 June 2014
#!annotation-source NCBI Nasonia vitripennis Annotation Release 101
NC_015867.2 AEGeAn::LocusPocus locus 370926 373152 . . . child_gene=1;child_mRNA=1;right_overlap=262;iiLocus_exception=delta-overlap-delta;riil=0;effective_length=1965;iLocus_type=siLocus
-NC_015867.2 AEGeAn::LocusPocus locus 372891 380536 . . . effective_length=6758;iLocus_type=siLocus;child_gene=1;child_mRNA=1
+NC_015867.2 AEGeAn::LocusPocus locus 372891 380536 . . . iLocus_type=ciLocus;effective_length=6758;child_gene=1;child_mRNA=1
NC_015867.2 AEGeAn::LocusPocus locus 374998 378903 . . . iiLocus_exception=intron-gene;iLocus_type=siLocus;child_gene=1;child_mRNA=1
NC_015867.2 AEGeAn::LocusPocus locus 379649 381835 . . . left_overlap=888;liil=0;child_gene=1;child_mRNA=1;effective_length=2187;iLocus_type=siLocus
=====================================
data/misc/amel-ogs-vs-ncbi-parseval-html/index.html
=====================================
@@ -43,7 +43,7 @@ Executing command: bin/parseval --datashare=data/share/ --outformat=html --
</tbody>
</table>
- <h2>Gene loci <span class="tooltip">[?]<span class="tooltip_text">If a gene annotation overlaps with another gene annotation, those annotations are associated with the same gene locus. See <a target="_blank" href="http://aegean.readthedocs.org/en/refactor/loci.html">this page</a> for a formal definition of a locus annotation.</span></span></h2>
+ <h2>Gene loci <span class="tooltip">[?]<span class="tooltip_text">If a gene annotation overlaps with another gene annotation, those annotations are associated with the same gene locus. See <a target="_blank" href="http://aegean.readthedocs.io/en/latest/loci.html">this page</a> for a formal definition of a locus annotation.</span></span></h2>
<table class="table_normal">
<tr><td>shared</td><td>7</td></tr>
<tr><td>unique to reference</td><td>1</td></tr>
=====================================
data/scripts/miloci.py
=====================================
@@ -11,29 +11,67 @@ import re
import sys
+class Locus(object):
+ def __init__(self, line):
+ self._rawdata = line
+ self.fields = line.strip().split('\t')
+ assert len(self.fields) == 9
+
+ @property
+ def seqid(self):
+ return self.fields[0]
+
+ @property
+ def start(self):
+ return int(self.fields[3])
+
+ @property
+ def end(self):
+ return int(self.fields[4])
+
+ @property
+ def ilocus_class(self):
+ typematch = re.search('iLocus_type=([^;\n]+)', self.fields[8])
+ assert typematch, 'could not determine iLocus type: ' + self._rawdata
+ return typematch.group(1)
+
+ @property
+ def mergeable(self):
+ if self.ilocus_class not in ['siLocus', 'niLocus']:
+ return False
+ if 'iiLocus_exception=intron-gene' in self.fields[8]:
+ return False
+ return True
+
+ def __len__(self):
+ return self.end - self.start + 1
+
+ def __str__(self):
+ return '\t'.join(self.fields)
+
+ def strip(self):
+ self.fields[8] = re.sub('ID=[^;\n]+;*', '', self.fields[8])
+ self.fields[8] = re.sub('Name=[^;\n]+;*', '', self.fields[8])
+
+
def merge_iloci(loci):
"""Merge ajacent or overlapping gene-containing iLoci."""
assert len(loci) > 0
if len(loci) == 1:
- line = re.sub('ID=[^;\n]+;*', '', loci[0])
- line = re.sub('Name=[^;\n]+;*', '', line)
- return line
+ loci[0].strip()
+ return loci[0]
seqid = None
start, end = -1, -1
attrs = {}
for locus in loci:
- fields = locus.split('\t')
- assert len(fields) == 9
if seqid:
- assert fields[0] == seqid
- seqid = fields[0]
- lstart = int(fields[3])
- lend = int(fields[4])
- if start == -1 or lstart < start:
- start = lstart
- end = max(end, lend)
- numeric_attrs = re.findall('([^;=]+=\d+)', fields[8])
+ assert locus.seqid == seqid
+ seqid = locus.seqid
+ if start == -1 or locus.start < start:
+ start = locus.start
+ end = max(end, locus.end)
+ numeric_attrs = re.findall('([^;=]+=\d+)', locus.fields[8])
for key_value_pair in numeric_attrs:
assert '=' in key_value_pair, \
'malformed key/value pair %s' % key_value_pair
@@ -49,8 +87,9 @@ def merge_iloci(loci):
for key in sorted(attrs):
attrstring += ';%s=%d' % (key, attrs[key])
gff3 = [seqid, 'AEGeAn::miloci.py', 'locus', str(start), str(end),
- '%d' % len(loci), '.', '.', attrstring]
- return '\t'.join(gff3)
+ str(len(loci)), '.', '.', attrstring]
+ line = '\t'.join(gff3)
+ return Locus(line)
def parse_iloci(fp):
@@ -59,34 +98,28 @@ def parse_iloci(fp):
Output: merged iLoci; gene-containing iLoci that are adjacent or
overlapping are combined
"""
- seqid = None
- prev_loci = []
+ locus_buffer = []
for line in fp:
- line = line.rstrip()
if '\tlocus\t' not in line:
continue
+ locus = Locus(line)
+
+ if len(locus_buffer) > 0 and locus.seqid != locus_buffer[0].seqid:
+ yield merge_iloci(locus_buffer)
+ locus_buffer = []
- locusseqid = re.match('([^\t]+)', line).group(1)
- if seqid is None:
- seqid = locusseqid
- elif locusseqid != seqid:
- if len(prev_loci) > 0:
- yield merge_iloci(prev_loci)
- prev_loci = []
- seqid = locusseqid
-
- if ';child_gene=' in line:
- prev_loci.append(line)
+ if locus.mergeable:
+ locus_buffer.append(locus)
continue
else:
- if len(prev_loci) > 0:
- yield merge_iloci(prev_loci)
- prev_loci = []
- line = re.sub('ID=[^;\n]+;*', '', line)
- line = re.sub('Name=[^;\n]+;*', '', line)
- yield line
- if len(prev_loci) > 0:
- yield merge_iloci(prev_loci)
+ if len(locus_buffer) > 0:
+ yield merge_iloci(locus_buffer)
+ locus_buffer = []
+ locus.strip()
+ yield locus
+
+ if len(locus_buffer) > 0:
+ yield merge_iloci(locus_buffer)
if __name__ == '__main__':
=====================================
src/core/AgnCompareReportHTML.c
=====================================
@@ -403,7 +403,7 @@ static void compare_report_html_footer(FILE *outstream)
fprintf(outstream,
" <p class=\"footer\">\n"
" Generated by <a href=\"%s\">AEGeAn %s (%s %s)</a>.<br />\n"
- " Copyright © %s <a href=\"http://aegean.readthedocs.org/en/"
+ " Copyright © %s <a href=\"http://aegean.readthedocs.io/en/"
"latest/contrib.html\">AEGeAn authors</a>.<br />\n"
" See <a href=\"LICENSE\">LICENSE</a> for details."
" </p>\n", AGN_VERSION_LINK, AGN_SEMANTIC_VERSION,
@@ -712,7 +712,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
{
GtArray *compclassdata;
GtUword i;
-
+
compclassdata = gt_hashmap_get(rpt->compclassdata, "perfect");
if(gt_array_size(compclassdata) > 0)
{
@@ -734,7 +734,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
compare_report_html_seqfile_footer(outstream);
fclose(outstream);
}
-
+
compclassdata = gt_hashmap_get(rpt->compclassdata, "mislabeled");
if(gt_array_size(compclassdata) > 0)
{
@@ -757,7 +757,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
compare_report_html_seqfile_footer(outstream);
fclose(outstream);
}
-
+
compclassdata = gt_hashmap_get(rpt->compclassdata, "cds");
if(gt_array_size(compclassdata) > 0)
{
@@ -779,7 +779,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
compare_report_html_seqfile_footer(outstream);
fclose(outstream);
}
-
+
compclassdata = gt_hashmap_get(rpt->compclassdata, "exon");
if(gt_array_size(compclassdata) > 0)
{
@@ -801,7 +801,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
compare_report_html_seqfile_footer(outstream);
fclose(outstream);
}
-
+
compclassdata = gt_hashmap_get(rpt->compclassdata, "utr");
if(gt_array_size(compclassdata) > 0)
{
@@ -823,7 +823,7 @@ static void compare_report_html_print_compclassfiles(AgnCompareReportHTML *rpt)
compare_report_html_seqfile_footer(outstream);
fclose(outstream);
}
-
+
compclassdata = gt_hashmap_get(rpt->compclassdata, "nonmatch");
if(gt_array_size(compclassdata) > 0)
{
@@ -1127,7 +1127,7 @@ static void compare_report_html_summary_annot(AgnCompInfo *info,
" <h2>Gene loci <span class=\"tooltip\">[?]<span class=\"tooltip_text\">If a gene "
"annotation overlaps with another gene annotation, those annotations are associated "
"with the same gene locus. See <a target=\"_blank\" "
- "href=\"http://aegean.readthedocs.org/en/refactor/loci.html\">"
+ "href=\"http://aegean.readthedocs.io/en/latest/loci.html\">"
"this page</a> for a formal definition of a locus annotation.</span></span></h2>\n"
" <table class=\"table_normal\">\n"
" <tr><td>shared</td><td>%lu</td></tr>\n"
=====================================
src/core/AgnLocusRefineStream.c
=====================================
@@ -311,6 +311,8 @@ static bool refine_locus_check_intron_genes(AgnLocusRefineStream *stream,
GtStr *seqid = gt_genome_node_get_seqid(*gn1);
AgnLocus *locus = agn_locus_new(seqid);
agn_locus_add_feature(locus, fn1);
+ gt_feature_node_add_attribute((GtFeatureNode *)locus, "iLocus_type",
+ "ciLocus");
gt_genome_node_ref(*gn1);
gt_array_add(iloci, locus);
@@ -408,11 +410,14 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
for(i = 0; i < numloci; i++)
{
GtGenomeNode **gn = gt_array_get(iloci, i);
+ GtFeatureNode *fn = gt_feature_node_cast(*gn);
if(i == 0)
- coding_status = agn_locus_num_mrnas(*gn) > 0;
+ {
+ coding_status = agn_typecheck_count(fn, agn_typecheck_cds) > 0;
+ }
else
{
- bool test_status = agn_locus_num_mrnas(*gn) > 0;
+ bool test_status = agn_typecheck_count(fn, agn_typecheck_cds) > 0;
same_coding_status = coding_status == test_status;
if(!same_coding_status)
break;
@@ -467,7 +472,10 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
fprintf(stream->ilenfile, "%s\t0\n", gt_str_get(seqid));
}
}
- gt_feature_node_add_attribute(fn, "iLocus_type", typestr);
+ if(gt_feature_node_get_attribute(fn, "iLocus_type") == NULL)
+ {
+ gt_feature_node_add_attribute(fn, "iLocus_type", typestr);
+ }
}
if(numloci == 1)
{
@@ -489,7 +497,7 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
GtFeatureNode *fn1 = gt_feature_node_cast(*gn1);
GtFeatureNode *fn2 = gt_feature_node_cast(*gn2);
- bool cds1 = agn_locus_num_mrnas(*gn1) > 0;
+ bool cds1 = agn_typecheck_count(fn1, agn_typecheck_cds) > 0;;
if(cds1 == true)
{
gt_feature_node_add_attribute(fn1, "iLocus_type", "siLocus");
@@ -624,7 +632,10 @@ static void locus_refine_stream_extend(AgnLocusRefineStream *stream,
{
type = "ciLocus";
}
- gt_feature_node_add_attribute(fn, "iLocus_type", type);
+ if(gt_feature_node_get_attribute(fn, "iLocus_type") == NULL)
+ {
+ gt_feature_node_add_attribute(fn, "iLocus_type", type);
+ }
}
}
=====================================
src/core/AgnUtils.c
=====================================
@@ -269,6 +269,9 @@ bool agn_overlap_ilocus(GtGenomeNode *f1, GtGenomeNode *f2,
if(gt_str_cmp(seqid1, seqid2) != 0)
return false;
+ GtRange r1 = gt_genome_node_get_range(f1);
+ GtRange r2 = gt_genome_node_get_range(f2);
+
if(by_cds)
{
GtRange c1 = agn_feature_node_get_cds_range((GtFeatureNode *)f1);
@@ -286,13 +289,18 @@ bool agn_overlap_ilocus(GtGenomeNode *f1, GtGenomeNode *f2,
{
// Both have coding sequences, use those instead of the complete feature
// coordinates.
+
+ if(gt_range_compare(&r1, &r2) == 0)
+ {
+ // Polycistrons belong together
+ return true;
+ }
+
return gt_range_overlap_delta(&c1, &c2, minoverlap);
}
}
// Either we are not in CDS mode, or the features don't have a CDS.
- GtRange r1 = gt_genome_node_get_range(f1);
- GtRange r2 = gt_genome_node_get_range(f2);
return gt_range_overlap_delta(&r1, &r2, minoverlap);
}
View it on GitLab: https://salsa.debian.org/med-team/aegean/commit/96c39eb486a856f52e9770b7a0fde07bef032724
--
View it on GitLab: https://salsa.debian.org/med-team/aegean/commit/96c39eb486a856f52e9770b7a0fde07bef032724
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181005/629b6076/attachment-0001.html>
More information about the debian-med-commit
mailing list