[med-svn] [gfapy] 01/02: New upstream version 1.0.0+dfsg
Sascha Steinbiss
satta at debian.org
Wed Jul 19 11:49:10 UTC 2017
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository gfapy.
commit fd9c8aa725c91f3b1fbb3a78cf513bdc36497332
Author: Sascha Steinbiss <satta at debian.org>
Date: Wed Jul 19 13:38:59 2017 +0200
New upstream version 1.0.0+dfsg
---
.gitignore | 13 +
.travis.yml | 10 +
CHANGES.txt | 3 +
CONTRIBUTORS | 6 +
LICENSE.txt | 19 +
MANIFEST.in | 3 +
Makefile | 49 +
README.rst | 72 +
benchmarks/.gitignore | 3 +
benchmarks/gfapy-benchmark-collectdata | 65 +
benchmarks/gfapy-plot-benchmarkdata.R | 120 ++
benchmarks/gfapy-plot-preparedata.py | 62 +
benchmarks/gfapy-profiler.sh | 61 +
benchmarks/gfapy-randomgraph | 87 ++
benchmarks/gfapy-reproduce-manuscript-figure.py | 76 +
benchmarks/gfapy-run-benchmarks.sh | 67 +
bin/gfapy-convert | 28 +
bin/gfapy-diff | 47 +
bin/gfapy-mergelinear | 37 +
bin/gfapy-validate | 21 +
doc/.gitignore | 2 +
doc/Makefile | 23 +
doc/changelog.rst | 4 +
doc/conf.py | 173 +++
doc/index.rst | 35 +
doc/readme.rst | 3 +
doc/run_apidoc.sh | 1 +
doc/tutorial/alignments.rst | 238 +++
doc/tutorial/comments.rst | 71 +
doc/tutorial/custom_records.rst | 296 ++++
doc/tutorial/errors.rst | 43 +
doc/tutorial/gfa.rst | 409 +++++
doc/tutorial/graph_operations.rst | 14 +
doc/tutorial/header.rst | 169 ++
doc/tutorial/placeholders.rst | 69 +
doc/tutorial/positional_fields.rst | 448 ++++++
doc/tutorial/positions.rst | 75 +
doc/tutorial/references.rst | 443 ++++++
doc/tutorial/tags.rst | 420 +++++
doc/tutorial/validation.rst | 78 +
gfapy/__init__.py | 25 +
gfapy/alignment/__init__.py | 2 +
gfapy/alignment/alignment.py | 159 ++
gfapy/alignment/cigar.py | 197 +++
gfapy/alignment/placeholder.py | 20 +
gfapy/alignment/trace.py | 80 +
gfapy/byte_array.py | 63 +
gfapy/error.py | 67 +
gfapy/field/__init__.py | 4 +
gfapy/field/alignment_gfa1.py | 43 +
gfapy/field/alignment_gfa2.py | 32 +
gfapy/field/alignment_list_gfa1.py | 54 +
gfapy/field/byte_array.py | 44 +
gfapy/field/char.py | 28 +
gfapy/field/comment.py | 28 +
gfapy/field/custom_record_type.py | 34 +
gfapy/field/field.py | 203 +++
gfapy/field/float.py | 36 +
gfapy/field/generic.py | 29 +
gfapy/field/identifier_gfa2.py | 42 +
gfapy/field/identifier_list_gfa2.py | 61 +
gfapy/field/integer.py | 40 +
gfapy/field/json.py | 58 +
gfapy/field/numeric_array.py | 45 +
gfapy/field/optional_identifier_gfa2.py | 62 +
gfapy/field/optional_integer.py | 44 +
gfapy/field/orientation.py | 36 +
gfapy/field/oriented_identifier_gfa2.py | 46 +
gfapy/field/oriented_identifier_list_gfa1.py | 53 +
gfapy/field/oriented_identifier_list_gfa2.py | 53 +
gfapy/field/parser.py | 100 ++
gfapy/field/path_name_gfa1.py | 38 +
gfapy/field/position_gfa1.py | 40 +
gfapy/field/position_gfa2.py | 42 +
gfapy/field/segment_name_gfa1.py | 48 +
gfapy/field/sequence_gfa1.py | 45 +
gfapy/field/sequence_gfa2.py | 45 +
gfapy/field/string.py | 29 +
gfapy/field/validator.py | 81 +
gfapy/field/writer.py | 94 ++
gfapy/field_array.py | 134 ++
gfapy/gfa.py | 315 ++++
gfapy/graph_operations/__init__.py | 1 +
gfapy/graph_operations/artifacts.py | 39 +
gfapy/graph_operations/copy_number.py | 79 +
gfapy/graph_operations/graph_operations.py | 13 +
gfapy/graph_operations/invertible_segments.py | 89 ++
gfapy/graph_operations/linear_paths.py | 364 +++++
gfapy/graph_operations/multiplication.py | 197 +++
gfapy/graph_operations/p_bubbles.py | 48 +
gfapy/graph_operations/redundant_linear_paths.py | 116 ++
gfapy/graph_operations/superfluous_links.py | 76 +
gfapy/graph_operations/topology.py | 189 +++
gfapy/lastpos.py | 161 ++
gfapy/line/__init__.py | 10 +
gfapy/line/comment/__init__.py | 1 +
gfapy/line/comment/comment.py | 30 +
gfapy/line/comment/construction.py | 11 +
gfapy/line/comment/tags.py | 14 +
gfapy/line/comment/version_conversion.py | 5 +
gfapy/line/comment/writer.py | 13 +
gfapy/line/common/__init__.py | 13 +
gfapy/line/common/cloning.py | 35 +
gfapy/line/common/connection.py | 110 ++
gfapy/line/common/construction.py | 335 ++++
gfapy/line/common/default_record_definition.py | 19 +
gfapy/line/common/disconnection.py | 116 ++
gfapy/line/common/dynamic_fields.py | 84 +
gfapy/line/common/equivalence.py | 171 +++
gfapy/line/common/field_data.py | 198 +++
gfapy/line/common/field_datatype.py | 66 +
gfapy/line/common/update_references.py | 119 ++
gfapy/line/common/validate.py | 70 +
gfapy/line/common/version_conversion.py | 86 ++
gfapy/line/common/virtual_to_real.py | 77 +
gfapy/line/common/writer.py | 145 ++
gfapy/line/custom_record/__init__.py | 1 +
gfapy/line/custom_record/construction.py | 59 +
gfapy/line/custom_record/custom_record.py | 25 +
gfapy/line/edge/__init__.py | 4 +
gfapy/line/edge/common/__init__.py | 0
gfapy/line/edge/common/alignment_type.py | 39 +
gfapy/line/edge/common/from_to.py | 138 ++
gfapy/line/edge/containment/__init__.py | 1 +
gfapy/line/edge/containment/canonical.py | 31 +
gfapy/line/edge/containment/containment.py | 53 +
gfapy/line/edge/containment/pos.py | 19 +
gfapy/line/edge/containment/to_gfa2.py | 31 +
gfapy/line/edge/edge.py | 8 +
gfapy/line/edge/gfa1/__init__.py | 0
gfapy/line/edge/gfa1/alignment_type.py | 4 +
gfapy/line/edge/gfa1/oriented_segments.py | 23 +
gfapy/line/edge/gfa1/other.py | 62 +
gfapy/line/edge/gfa1/references.py | 35 +
gfapy/line/edge/gfa1/to_gfa2.py | 93 ++
gfapy/line/edge/gfa2/__init__.py | 1 +
gfapy/line/edge/gfa2/alignment_type.py | 62 +
gfapy/line/edge/gfa2/gfa2.py | 36 +
gfapy/line/edge/gfa2/other.py | 47 +
gfapy/line/edge/gfa2/references.py | 66 +
gfapy/line/edge/gfa2/to_gfa1.py | 219 +++
gfapy/line/edge/gfa2/validation.py | 22 +
gfapy/line/edge/link/__init__.py | 1 +
gfapy/line/edge/link/canonical.py | 50 +
gfapy/line/edge/link/complement.py | 54 +
gfapy/line/edge/link/equivalence.py | 229 +++
gfapy/line/edge/link/link.py | 50 +
gfapy/line/edge/link/references.py | 7 +
gfapy/line/edge/link/to_gfa2.py | 36 +
gfapy/line/fragment/__init__.py | 1 +
gfapy/line/fragment/fragment.py | 26 +
gfapy/line/fragment/references.py | 17 +
gfapy/line/fragment/validation.py | 21 +
gfapy/line/gap/__init__.py | 1 +
gfapy/line/gap/gap.py | 23 +
gfapy/line/gap/references.py | 43 +
gfapy/line/group/__init__.py | 4 +
gfapy/line/group/gfa2/__init__.py | 0
gfapy/line/group/gfa2/references.py | 51 +
gfapy/line/group/gfa2/same_id.py | 26 +
gfapy/line/group/group.py | 6 +
gfapy/line/group/ordered/__init__.py | 1 +
gfapy/line/group/ordered/captured_path.py | 221 +++
gfapy/line/group/ordered/ordered.py | 25 +
gfapy/line/group/ordered/references.py | 79 +
gfapy/line/group/ordered/to_gfa1.py | 23 +
gfapy/line/group/path/__init__.py | 1 +
gfapy/line/group/path/captured_path.py | 38 +
gfapy/line/group/path/path.py | 22 +
gfapy/line/group/path/references.py | 83 +
gfapy/line/group/path/to_gfa2.py | 15 +
gfapy/line/group/path/topology.py | 20 +
gfapy/line/group/path/validation.py | 22 +
gfapy/line/group/unordered/__init__.py | 1 +
gfapy/line/group/unordered/induced_set.py | 89 ++
gfapy/line/group/unordered/references.py | 69 +
gfapy/line/group/unordered/unordered.py | 20 +
gfapy/line/header/__init__.py | 2 +
gfapy/line/header/connection.py | 12 +
gfapy/line/header/field_data.py | 15 +
gfapy/line/header/header.py | 26 +
gfapy/line/header/multiline.py | 147 ++
gfapy/line/header/version_conversion.py | 37 +
gfapy/line/line.py | 59 +
gfapy/line/segment/__init__.py | 3 +
gfapy/line/segment/coverage.py | 37 +
gfapy/line/segment/gfa1.py | 35 +
gfapy/line/segment/gfa1_to_gfa2.py | 25 +
gfapy/line/segment/gfa2.py | 30 +
gfapy/line/segment/gfa2_to_gfa1.py | 23 +
gfapy/line/segment/length_gfa1.py | 60 +
gfapy/line/segment/references.py | 205 +++
gfapy/line/segment/segment.py | 23 +
gfapy/line/segment/writer_wo_sequence.py | 22 +
gfapy/line/unknown/__init__.py | 1 +
gfapy/line/unknown/unknown.py | 22 +
gfapy/lines/__init__.py | 2 +
gfapy/lines/collections.py | 321 ++++
gfapy/lines/creators.py | 184 +++
gfapy/lines/destructors.py | 50 +
gfapy/lines/finders.py | 131 ++
gfapy/lines/headers.py | 21 +
gfapy/lines/lines.py | 39 +
gfapy/logger.py | 185 +++
gfapy/numeric_array.py | 215 +++
gfapy/oriented_line.py | 176 +++
gfapy/placeholder.py | 103 ++
gfapy/segment_end.py | 147 ++
gfapy/segment_end_path.py | 19 +
gfapy/sequence.py | 62 +
gfapy/symbol_invert.py | 25 +
setup.cfg | 2 +
setup.py | 41 +
tests/__init__.py | 0
tests/extension.py | 50 +
tests/test_api_alignment.py | 188 +++
tests/test_api_comments.py | 119 ++
tests/test_api_custom_records.py | 53 +
tests/test_api_extensions.py | 36 +
tests/test_api_gfa1_lines.py | 179 +++
tests/test_api_gfa2_lines.py | 49 +
tests/test_api_gfa_basics.py | 77 +
tests/test_api_header.py | 69 +
tests/test_api_linear_paths.py | 49 +
tests/test_api_linear_paths_extended.py | 30 +
tests/test_api_lines_collections.py | 95 ++
tests/test_api_lines_creators.py | 132 ++
tests/test_api_lines_destructors.py | 76 +
tests/test_api_lines_finders.py | 170 ++
tests/test_api_multiplication.py | 198 +++
tests/test_api_placeholders.py | 26 +
tests/test_api_positionals.py | 249 +++
tests/test_api_positions.py | 60 +
tests/test_api_references_edge_gfa1.py | 125 ++
tests/test_api_references_edge_gfa2.py | 181 +++
tests/test_api_references_f_g_lines.py | 96 ++
tests/test_api_references_groups.py | 259 ++++
tests/test_api_references_virtual.py | 131 ++
tests/test_api_rename_lines.py | 23 +
tests/test_api_tags.py | 376 +++++
tests/test_api_version.py | 241 +++
tests/test_api_version_conversion.py | 217 +++
tests/test_gfapy_alignment.py | 40 +
tests/test_gfapy_byte_array.py | 28 +
tests/test_gfapy_cigar.py | 23 +
tests/test_gfapy_line_containment.py | 51 +
tests/test_gfapy_line_edge.py | 18 +
tests/test_gfapy_line_header.py | 12 +
tests/test_gfapy_line_link.py | 84 +
tests/test_gfapy_line_path.py | 57 +
tests/test_gfapy_line_segment.py | 63 +
tests/test_gfapy_line_version.py | 50 +
tests/test_gfapy_numeric_array.py | 49 +
tests/test_gfapy_segment_references.py | 16 +
tests/test_gfapy_sequence.py | 18 +
tests/test_gfapy_trace.py | 17 +
tests/test_graphop_artifacts.py | 32 +
tests/test_graphop_copy_number.py | 35 +
tests/test_internals_field_parser.py | 48 +
tests/test_internals_field_validator.py | 42 +
tests/test_internals_field_writer.py | 33 +
tests/test_internals_tag_datatype.py | 22 +
tests/test_unit_alignment.py | 55 +
tests/test_unit_field_array.py | 50 +
tests/test_unit_gfa_lines.py | 61 +
tests/test_unit_header.py | 110 ++
tests/test_unit_line.py | 89 ++
tests/test_unit_line_cloning.py | 56 +
tests/test_unit_line_connection.py | 154 ++
tests/test_unit_line_dynamic_fields.py | 96 ++
tests/test_unit_line_equivalence.py | 150 ++
tests/test_unit_lines_finders.py | 74 +
tests/test_unit_multiplication.py | 46 +
tests/test_unit_numeric_array.py | 21 +
tests/test_unit_oriented_line.py | 100 ++
tests/test_unit_segment_end.py | 96 ++
tests/test_unit_symbol_invert.py | 16 +
tests/test_unit_unknown.py | 25 +
tests/testdata/all_line_types.gfa1.gfa | 22 +
tests/testdata/all_line_types.gfa2.gfa | 33 +
tests/testdata/copynum.1.gfa | 3 +
tests/testdata/copynum.1.gfa2 | 3 +
tests/testdata/copynum.2.gfa | 4 +
tests/testdata/copynum.2.gfa2 | 4 +
tests/testdata/dead_ends.gfa | 12 +
tests/testdata/dead_ends.gfa2 | 12 +
tests/testdata/example1.gfa | 45 +
tests/testdata/example1.gfa2 | 45 +
tests/testdata/example_from_spec.gfa | 9 +
tests/testdata/example_from_spec.gfa2 | 9 +
tests/testdata/example_from_spec.path14.seq | 1 +
tests/testdata/example_from_spec2.gfa | 13 +
tests/testdata/example_from_spec2.gfa2 | 13 +
tests/testdata/gfa2_edges_classification.gfa | 1619 ++++++++++++++++++++
tests/testdata/invalid/edge_missing.gfa2 | 32 +
tests/testdata/invalid/edge_wrong_lastpos.gfa2 | 12 +
tests/testdata/invalid/fragment_wrong_lastpos.gfa2 | 33 +
tests/testdata/invalid/inconsistent_length.gfa1 | 12 +
tests/testdata/invalid/link_missing.gfa1 | 21 +
tests/testdata/invalid/segment_missing.gfa1 | 21 +
tests/testdata/invalid/segment_missing.gfa2 | 32 +
tests/testdata/linear_merging.1.gfa | 8 +
tests/testdata/linear_merging.1.gfa2 | 8 +
tests/testdata/linear_merging.2.gfa | 8 +
tests/testdata/linear_merging.2.gfa2 | 8 +
tests/testdata/linear_merging.3.gfa | 8 +
tests/testdata/linear_merging.3.gfa2 | 8 +
tests/testdata/linear_merging.4.gfa | 9 +
tests/testdata/linear_merging.4.gfa2 | 9 +
tests/testdata/linear_merging.5.gfa | 9 +
tests/testdata/linear_merging.5.gfa2 | 9 +
tests/testdata/links_distri.l1.gfa | 4 +
tests/testdata/links_distri.l1.gfa2 | 4 +
tests/testdata/links_distri.l1.m2.gfa | 6 +
tests/testdata/links_distri.l1.m2.gfa2 | 6 +
tests/testdata/links_distri.l2.gfa | 6 +
tests/testdata/links_distri.l2.gfa2 | 6 +
tests/testdata/links_distri.l2.m2.gfa | 7 +
tests/testdata/links_distri.l2.m2.gfa2 | 7 +
tests/testdata/links_distri.l2.m2.no_ld.gfa | 9 +
tests/testdata/links_distri.l2.m2.no_ld.gfa2 | 9 +
tests/testdata/links_distri.l2.m3.gfa | 8 +
tests/testdata/links_distri.l2.m3.gfa2 | 8 +
tests/testdata/links_distri.l2.m3.no_ld.gfa | 12 +
tests/testdata/links_distri.l2.m3.no_ld.gfa2 | 12 +
tests/testdata/links_distri.l3.gfa | 8 +
tests/testdata/links_distri.l3.gfa2 | 8 +
tests/testdata/links_distri.l3.m2.gfa | 10 +
tests/testdata/links_distri.l3.m2.gfa2 | 10 +
tests/testdata/links_distri.l3.m2.no_ld.gfa | 12 +
tests/testdata/links_distri.l3.m2.no_ld.gfa2 | 12 +
tests/testdata/loop.gfa | 10 +
tests/testdata/loop.gfa2 | 10 +
tests/testdata/sample.gfa | 12 +
tests/testdata/sample.gfa2 | 12 +
tests/testdata/spec_q1.gfa | 8 +
tests/testdata/spec_q1.gfa2 | 8 +
tests/testdata/spec_q2.gfa | 9 +
tests/testdata/spec_q2.gfa2 | 9 +
tests/testdata/spec_q2.path_circular.seq | 1 +
tests/testdata/spec_q2.path_linear.seq | 1 +
tests/testdata/spec_q3.gfa | 13 +
tests/testdata/spec_q3.gfa2 | 13 +
tests/testdata/spec_q4.gfa | 14 +
tests/testdata/spec_q4.gfa2 | 14 +
tests/testdata/spec_q4.path_more_than_circular.seq | 1 +
tests/testdata/spec_q5.gfa | 11 +
tests/testdata/spec_q5.gfa2 | 8 +
tests/testdata/spec_q6.gfa | 9 +
tests/testdata/spec_q6.gfa2 | 9 +
tests/testdata/spec_q7.gfa | 9 +
tests/testdata/spec_q7.gfa2 | 9 +
tests/testdata/two_components.gfa | 11 +
tests/testdata/two_components.gfa2 | 11 +
tests/testdata/unnamed_and_named_links.gfa | 8 +
tests/testdata/unnamed_link.gfa | 4 +
356 files changed, 22596 insertions(+)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d37fce1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+# Compiled python modules
+*.pyc
+
+# Setuptools distribution folder
+/dist/
+
+# Python egg metadata, regenerated from source files by setuptools
+/*.egg-info
+/*.egg
+
+# Wheel data
+build
+conda
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..d376c4c
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,10 @@
+language: python
+python:
+ - "3.4"
+env:
+ - PYTHONHASHSEED=0
+install:
+ - pip install .
+ - pip install nose
+ - pip install Sphinx
+script: "make tests"
diff --git a/CHANGES.txt b/CHANGES.txt
new file mode 100644
index 0000000..758951e
--- /dev/null
+++ b/CHANGES.txt
@@ -0,0 +1,3 @@
+== 1.0.0 ==
+
+- initial release
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
new file mode 100644
index 0000000..fbf3cf9
--- /dev/null
+++ b/CONTRIBUTORS
@@ -0,0 +1,6 @@
+The following contributors helped to develop gfapy. Please drop a note to
+gonnella at zbh.uni-hamburg.de if I left someone out or missed something.
+
+- Tim Weber (translation of parts of the code from Ruby to Python)
+- Stefan Kurtz (advises)
+
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..230ef64
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,19 @@
+All code of gfapy is released under the following ISC license.
+It is functionally equivalent to a two-term BSD copyright with
+language removed that is made unnecessary by the Berne convention.
+See http://openbsd.org/policy.html for more information on copyrights.
+
+Copyright (c) 2017 Giorgio Gonnella and CONTRIBUTORS
+Copyright (c) 2017 Center for Bioinformatics, University of Hamburg
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..8b66251
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include README.rst
+include tests/testdata/*
+include manual/*.pdf
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e510fe5
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,49 @@
+default: tests
+
+.PHONY: manual tests cleanup upload conda sdist wheel install
+
+PYTHON=python3
+PIP=pip3
+
+# Install using pip
+install:
+ ${PIP} install --upgrade --user --editable .
+
+# Source distribution
+sdist:
+ ${PYTHON} setup.py sdist
+
+# Pure Python Wheel
+wheel:
+ ${PYTHON} setup.py bdist_wheel
+
+# Create the manual
+manual:
+ cd doc && make latexpdf
+ mkdir -p manual
+ cp doc/_build/latex/Gfapy.pdf manual/gfapy-manual.pdf
+
+
+# Run unit tests
+tests:
+ cd doc && make doctest
+ @echo
+ @echo "Running unit test suite..."
+ @PYTHONHASHSEED=0 ${PYTHON} -m unittest discover
+
+# Remove distribution files
+cleanup:
+ rm -rf dist/ build/ gfapy.egg-info/
+
+upload: tests cleanup sdist wheel
+ cd dist; \
+ for file in *; do \
+ twine register $$file; \
+ twine upload $$file; \
+ done
+
+conda:
+ mkdir -p conda
+ cd conda; \
+ conda skeleton pypi gfapy; \
+ conda build gfapy
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..51c5c66
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,72 @@
+The Graphical Fragment Assembly (GFA) are formats for the representation
+of sequence graphs, including assembly, variation and splicing graphs.
+Two versions of GFA have been defined (GFA1 and GFA2) and several sequence
+analysis programs have been adopting the formats as an interchange format,
+which allow to easily combine different sequence analysis tools.
+
+This library implements the GFA1 and GFA2 specification
+described at https://github.com/GFA-spec/GFA-spec/blob/master/GFA-spec.md.
+It allows to create a Gfa object from a file in the GFA format
+or from scratch, to enumerate the graph elements (segments, links,
+containments, paths and header lines), to traverse the graph (by
+traversing all links outgoing from or incoming to a segment), to search for
+elements (e.g. which links connect two segments) and to manipulate the
+graph (e.g. to eliminate a link or a segment or to duplicate a segment
+distributing the read counts evenly on the copies).
+
+The GFA format can be easily extended by users by defining own custom
+tags and record types. In Gfapy, it is easy to write extensions modules,
+which allow to define custom record types and datatypes for the parsing
+and validation of custom fields. The custom lines can be connected, using
+references, to each other and to lines of the standard record types.
+
+Requirements
+~~~~~~~~~~~~
+
+Gfapy has been written for Python 3 and tested using Python version 3.3.
+It does not require any additional Python packages or other software.
+
+Installation
+~~~~~~~~~~~~
+
+Gfapy is distributed as a Python package and can be installed using
+the Python package manager pip, as well as conda (in the Bioconda channel).
+
+The following command installs the current stable version from the Python
+Packages index::
+
+ pip install gfapy
+
+If you would like to install the current development version from Github,
+use the following command::
+
+ pip install -e git+https://github.com/ggonnella/gfapy.git#egg=gfapy
+
+Alternatively it is possible to install gfapy using conda. Gfapy is
+included in the Bioconda (https://bioconda.github.io/) channel::
+
+ conda install -c bioconda gfapy
+
+Usage
+~~~~~
+
+If you installed gfapy as described above, you can import it in your script
+using the conventional Python syntax::
+
+ >>> import gfapy
+
+Documentation
+~~~~~~~~~~~~~
+
+The documentation, including this introduction to Gfapy, an user manual
+and the API documentation is hosted on the ReadTheDocs server,
+at the URL http://gfapy.readthedocs.io/en/latest/ and it can be
+downloaded as PDF from the URL
+https://github.com/ggonnella/gfapy/blob/master/manual/gfapy-manual.pdf.
+
+References
+~~~~~~~~~~
+
+Giorgio Gonnella and Stefan Kurtz "GfaPy: a flexible and extensible software
+library for handling sequence graphs in Python", Bioinformatics (2017) btx398
+https://doi.org/10.1093/bioinformatics/btx398
diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
new file mode 100644
index 0000000..ba48196
--- /dev/null
+++ b/benchmarks/.gitignore
@@ -0,0 +1,3 @@
+benchmark_results*
+jobs_out
+figure*
diff --git a/benchmarks/gfapy-benchmark-collectdata b/benchmarks/gfapy-benchmark-collectdata
new file mode 100755
index 0000000..a894334
--- /dev/null
+++ b/benchmarks/gfapy-benchmark-collectdata
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+#
+# This script is derived from rdj-spacepeak.sh in
+# the GenomeTools repository (www.genometools.org).
+#
+# (c) 2010-2017 Giorgio Gonnella, ZBH, University of Hamburg
+#
+
+sleeptime=0.1
+
+if [ $# -eq 0 ]; then
+ echo "Usage: $0 <command> [args]"
+ echo
+ echo "The following information is polled each $sleeptime seconds"
+ echo "from /proc/[pid]/status:"
+ echo
+ echo " VmPeak: Peak virtual memory size."
+ echo " VmSize: Virtual memory size."
+ echo " VmLck: Locked memory size."
+ echo " VmHWM: Peak resident set size (\"high water mark\")."
+ echo " VmRSS: Resident set size."
+ echo " VmData, VmStk, VmExe: Size of data, stack, and text segments."
+ echo " VmLib: Shared library code size."
+ echo " VmPTE: Page table entries size (since Linux 2.6.10)."
+ echo
+ echo "The command is run under /usr/bin/time."
+ exit
+fi
+
+# code inspired by:
+# http://stackoverflow.com/questions/1080461/
+# /peak-memory-measurement-of-long-running-process-in-linux
+function __measure_space_peak {
+ types="Peak Size Lck HWM RSS Data Stk Exe Lib PTE"
+ declare -A maxVm
+ for vm in $types; do maxVm[$vm]=0; done
+ ppid=$$
+ /usr/bin/time $@ &
+ tpid=`pgrep -P ${ppid} -n -f time`
+ if [[ ${tpid} -ne "" ]]; then
+ pid=`pgrep -P ${tpid} -n -f $1` # $! may work here but not later
+ fi
+ declare -A Vm
+ while [[ ${tpid} -ne "" ]]; do
+ for vm in $types; do
+ if [[ ${pid} -ne "" ]]; then
+ Vm[$vm]=`cat /proc/${pid}/status 2> /dev/null \
+ | grep Vm${vm} | awk '{print $2}'`
+ if [[ ${Vm[$vm]} -gt ${maxVm[$vm]} ]]; then
+ maxVm[$vm]=${Vm[$vm]}
+ fi
+ fi
+ done
+ sleep $sleeptime
+ savedtpid=${tpid}
+ tpid=`pgrep -P ${ppid} -n -f time`
+ done
+ wait ${savedtpid} # don't wait, job is finished
+ exitstatus=$? # catch the exit status of wait, the same of $@
+ echo "Memory usage for $@:" >> /dev/stderr
+ for vm in $types; do echo " Vm$vm: ${maxVm[$vm]} kB" >> /dev/stderr; done
+ echo "Exit status: ${exitstatus}" >> /dev/stderr
+}
+__measure_space_peak $*
diff --git a/benchmarks/gfapy-plot-benchmarkdata.R b/benchmarks/gfapy-plot-benchmarkdata.R
new file mode 100755
index 0000000..2ad8793
--- /dev/null
+++ b/benchmarks/gfapy-plot-benchmarkdata.R
@@ -0,0 +1,120 @@
+#!/usr/bin/env Rscript
+# (c) Giorgio Gonnella, ZBH, Uni Hamburg, 2017
+
+script.name = "./gfapy-plot-benchmarkdata.R"
+args <- commandArgs(trailingOnly=TRUE)
+if (is.na(args[3])) {
+ cat("Usage: ",script.name, " <inputfile> <outpfx> <variable>", "\n")
+ cat("variable: either 'segments' or 'connectivity'\n")
+ stop("Too few command-line parameters")
+}
+infname <- args[1]
+cat("input data: ",infname,"\n")
+outpfx <- args[2]
+cat("output prefix:", outpfx, "\n")
+xvar <- args[3]
+if (xvar != 'segments' && xvar != 'connectivity') {
+ stop("variable must be one of: segments, connectivity")
+}
+
+library("ggplot2")
+
+#
+# The following function is described here:
+# http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper%20functions
+# Licence: CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
+#
+## Gives count, mean, standard deviation, standard error of the mean, and
+## confidence interval (default 95%).
+## data: a data frame.
+## measurevar: the name of a column that contains the var to be summariezed
+## groupvars: a vector containing names of columns that contain grouping vars
+## na.rm: a boolean that indicates whether to ignore NA's
+## conf.interval: the percent range of the confidence interval (default 95%)
+summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
+ conf.interval=.95, .drop=TRUE) {
+ library(plyr)
+
+ # New version of length which can handle NA's: if na.rm==T, don't count them
+ length2 <- function (x, na.rm=FALSE) {
+ if (na.rm) sum(!is.na(x))
+ else length(x)
+ }
+
+ # This does the summary. For each group's data frame, return a vector with
+ # N, mean, and sd
+ datac <- ddply(data, groupvars, .drop=.drop,
+ .fun = function(xx, col) {
+ c(N = length2(xx[[col]], na.rm=na.rm),
+ mean = mean (xx[[col]], na.rm=na.rm),
+ sd = sd (xx[[col]], na.rm=na.rm)
+ )
+ },
+ measurevar
+ )
+
+ # Rename the "mean" column
+ datac <- rename(datac, c("mean" = measurevar))
+
+ datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
+
+ # Confidence interval multiplier for standard error
+ # Calculate t-statistic for confidence interval:
+ # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
+ ciMult <- qt(conf.interval/2 + .5, datac$N-1)
+ datac$ci <- datac$se * ciMult
+
+ return(datac)
+}
+
+data <- read.table(infname, header=T, sep="\t")
+
+if (xvar == "segments") {
+ xvarname = "lines"
+ xlab="Lines (segments 1/3; dovetails 2/3)"
+} else {
+ xvarname = "mult"
+ xlab="Dovetails/segment (segments=4000)"
+ data[c("lines")] = (data[c("mult")]+1)*4000
+}
+
+time.data <- summarySE(data, measurevar="time", groupvars=c(xvarname))
+outfname = paste0(outpfx,"_time.log")
+sink(outfname)
+print(time.data)
+time.lm <- lm(time ~ lines, data=data)
+summary(time.lm)
+time.nls <- nls(time ~ b + a * lines,
+ data=data, start=list(a=0,b=0),
+ algorithm="port", lower=c(0,0))
+print(time.nls)
+sink()
+
+outfname = paste0(outpfx,"_space.log")
+sink(outfname)
+space.data <- summarySE(data, measurevar="space", groupvars=c(xvarname))
+print(space.data)
+space.lm <- lm(space ~ lines, data=data)
+summary(space.lm)
+space.nls <- nls(space ~ b + a * lines,
+ data=data, start=list(a=0,b=0),
+ algorithm="port", lower=c(0,0))
+print(space.nls)
+sink()
+
+outfname = paste0(outpfx,"_time.pdf")
+pdf(outfname)
+print(ggplot(time.data, aes_string(x=xvarname, y="time")) +
+ geom_errorbar(aes(ymin=time-se, ymax=time+se), width=2) +
+ geom_line(size=0.2) + geom_point(size=3) +
+ ylab("Total elapsed time (s)") +
+ xlab(xlab))
+outfname = paste0(outpfx,"_space.pdf")
+pdf(outfname)
+print(ggplot(space.data, aes_string(x=xvarname, y="space")) +
+ geom_errorbar(aes(ymin=space-se, ymax=space+se), width=2) +
+ geom_line(size=0.2) + geom_point(size=3) +
+ ylab("Memory peak (MB)") +
+ xlab(xlab))
+dev.off()
+
diff --git a/benchmarks/gfapy-plot-preparedata.py b/benchmarks/gfapy-plot-preparedata.py
new file mode 100755
index 0000000..114c9ef
--- /dev/null
+++ b/benchmarks/gfapy-plot-preparedata.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+"""
+Prepare the output of the convert benchmark script for the R plotting script.
+"""
+
+import argparse
+import os
+import sys
+import re
+
+op = argparse.ArgumentParser(description=__doc__)
+op.add_argument('--version', action='version', version='%(prog)s 1.0')
+op.add_argument("--mult", "-m", action="store_true",
+ help="set if variable n of edges/segment")
+op.add_argument("inputfile")
+opts = op.parse_args()
+
+if not os.path.exists(opts.inputfile):
+ sys.stderr.write("Input file not found: {}\n".format(opts.inputfile))
+ exit(1)
+
+with open(opts.inputfile) as inputfile:
+ header = True
+ if opts.mult:
+ outdata = ["mult", "time", "space", "time_per_line", "space_per_line"]
+ else:
+ outdata = ["lines", "time", "space", "time_per_line", "space_per_line"]
+ print("\t".join(outdata))
+ for line in inputfile:
+ if line[:3] == "###":
+ header = False
+ elif not header:
+ data = line.rstrip("\n\r").split("\t")
+ n_segments = data[2]
+ multiplier = data[3]
+ n_lines = int(int(n_segments) * (1+float(multiplier)))
+ elapsed = data[5]
+ elapsed_match = re.compile(r'\s+(\d+):(\d+\.\d+)').match(elapsed)
+ if elapsed_match:
+ minutes = int(elapsed_match.groups()[0])
+ seconds = float(elapsed_match.groups()[1])
+ seconds += minutes * 60
+ else:
+ elapsed_match = re.compile(r'\s+(\d+):(\d+):(\d+)').match(elapsed)
+ if elapsed_match:
+ hours = int(elapsed_match.groups()[0])
+ minutes = int(elapsed_match.groups()[1])
+ seconds = int(elapsed_match.groups()[2])
+ minutes += hours * 60
+ seconds += minutes * 60
+ else:
+ continue
+ memory = data[6]
+ memory = int(re.compile(r'(\d+) kB').match(memory).groups()[0])
+ megabytes = memory / 1024
+ if opts.mult:
+ outdata = [str(multiplier)]
+ else:
+ outdata = [str(n_lines)]
+ outdata += [str(seconds),str(megabytes),
+ str(seconds/n_lines), str(megabytes/n_lines)]
+ print("\t".join(outdata))
diff --git a/benchmarks/gfapy-profiler.sh b/benchmarks/gfapy-profiler.sh
new file mode 100755
index 0000000..c7068f7
--- /dev/null
+++ b/benchmarks/gfapy-profiler.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+#$ -clear
+#$ -q 16c.q
+#$ -cwd
+#$ -V
+#$ -S /bin/bash
+#$ -o jobs_out
+#$ -j y
+
+if [ $# -ne 4 ]; then
+ echo "Usage: $0 <operation> <version> <variable> <range>" > /dev/stderr
+ echo " operation: (mergelinear/convert) ../bin/gfapy-<operation> <gfafile> will be called" > /dev/stderr
+ echo " version: (gfa1/gfa2) gfa version" > /dev/stderr
+ echo " variable: (segments/connectivity)" > /dev/stderr
+ echo " range: (all/fast/slow)" > /dev/stderr
+ exit 1
+fi
+
+operation=$1
+version=$2
+variable=$3
+range=$4
+
+if [ $variable == "segments" ]; then
+ if [ $range == "fast" ]; then
+ nsegments="1000 2000 4000 8000 16000 32000 64000 128000"
+ elif [ $range == "slow" ]; then
+ nsegments="256000 512000 1024000 2048000 4096000"
+ elif [ $range == "all"]; then
+ nsegments="1000 2000 4000 8000 16000 32000 64000 128000 256000 512000 1024000 2048000 4096000"
+ fi
+else
+ nsegments=4000
+fi
+
+if [ $variable == "connectivity" ]; then
+ if [ $range == "fast" ]; then
+ multipliers="2 4 8 16 32 64"
+ elif [ $range == "slow" ]; then
+ multipliers="128 256"
+ elif [ $range == "all"]; then
+ multipliers="2 4 8 16 32 64 128 256"
+ fi
+else
+ multipliers=2
+fi
+
+replicate=1
+for i in $nsegments; do
+ for m in $multipliers; do
+ fname="${i}_e${m}x.$replicate.${version}"
+ if [ ! -e $fname ]; then
+ ./gfapy-randomgraph --segments $i -g $version \
+ --dovetails-per-segment $m --with-sequence > $fname
+ fi
+ echo "Profiling $operation $fname ..."
+ rm -f $fname.$operation.prof
+ python3 -m cProfile -o $fname.$operation.prof \
+ ../bin/gfapy-$operation $fname 1> /dev/null
+ done
+done
diff --git a/benchmarks/gfapy-randomgraph b/benchmarks/gfapy-randomgraph
new file mode 100755
index 0000000..8c14b19
--- /dev/null
+++ b/benchmarks/gfapy-randomgraph
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+"""
+Creates a random graph for testing
+"""
+
+import argparse
+import sys
+import random
+
+op = argparse.ArgumentParser(description=__doc__)
+op.add_argument("--segments", "-s", type=int,
+ help="number of segments", required=True)
+op.add_argument("--slen", "-l", type=int, default=100,
+ help="lenght of segments sequence")
+op.add_argument("--with-sequence", "-w", action="store_true")
+op.add_argument("--dovetails-per-segment", "-d",
+ help="average number of dovetail edges per segment",
+ default=2.0, type=float)
+op.add_argument('--gfa-version', "-g", default="gfa1",
+ help="gfa version", choices=("gfa1", "gfa2"))
+op.add_argument('--version', action='version', version='%(prog)s 1.0')
+opts = op.parse_args()
+
+if opts.segments < 0:
+ sys.stderr.write("Error: the number of segments must be "+
+ ">= 0 ({})\n".format(opts.segments))
+ exit(1)
+if opts.dovetails_per_segment < 0:
+ sys.stderr.write("Error: the average number of dovetails per segment must "+
+ "be >= 0 ({})\n".format(opts.dovetails_per_segment))
+ exit(1)
+if opts.slen <= 0:
+ sys.stderr.write("Error: the length of segments sequence must be > 0"+
+ " ({})\n".format(opts.slen))
+ exit(1)
+
+if opts.gfa_version == "gfa1":
+ print("H\tVN:Z:1.0")
+else:
+ print("H\tVN:Z:2.0")
+
+def random_sequence(slen):
+ sequence = []
+ for i in range(slen):
+ sequence.append(random.choice('ACGT'))
+ return "".join(sequence)
+
+for i in range(opts.segments):
+ if opts.with_sequence:
+ sequence = random_sequence(opts.slen)
+ else:
+ sequence = "*"
+ if opts.gfa_version == "gfa1":
+ print("S\ts{}\t{}\tLN:i:{}".format(i, sequence, opts.slen))
+ else:
+ print("S\ts{}\t{}\t{}".format(i, opts.slen, sequence))
+
+n_dovetails = int(opts.segments * opts.dovetails_per_segment)
+edges = {}
+for i in range(n_dovetails):
+ edge = False
+ while not edge:
+ s_from = random.randint(0, opts.segments-1)
+ s_from_or = random.choice('+-')
+ s_to = random.randint(0, opts.segments-1)
+ s_to_or = random.choice('+-')
+ if s_from not in edges:
+ edges[s_from] = {'+': {}, '-': {}}
+ if s_to not in edges[s_from][s_from_or]:
+ edges[s_from][s_from_or][s_to] = {'+': False, '-': False}
+ if not edges[s_from][s_from_or][s_to][s_to_or]:
+ edges[s_from][s_from_or][s_to][s_to_or] = True
+ edge = True
+ ovlen = opts.slen//10
+ if ovlen == 0: ovlen = 1
+ cigar = "{}M".format(ovlen)
+ if opts.gfa_version == "gfa1":
+ print("L\ts{}\t{}\ts{}\t{}\t{}\tID:Z:e{}".format(s_from, s_from_or, s_to,
+ s_to_or, cigar, i))
+ else:
+ s_from_begin = opts.slen - ovlen if s_from_or == "+" else 0
+ s_from_end = "{}$".format(opts.slen) if s_from_or == "+" else ovlen
+ s_to_begin = opts.slen - ovlen if s_to_or == "-" else 0
+ s_to_end = "{}$".format(opts.slen) if s_to_or == "-" else ovlen
+ print("E\te{}\ts{}{}\ts{}{}\t{}\t{}\t{}\t{}\t{}".format(
+ i, s_from, s_from_or, s_to, s_to_or, s_from_begin, s_from_end,
+ s_to_begin, s_to_end, cigar))
diff --git a/benchmarks/gfapy-reproduce-manuscript-figure.py b/benchmarks/gfapy-reproduce-manuscript-figure.py
new file mode 100755
index 0000000..89919ba
--- /dev/null
+++ b/benchmarks/gfapy-reproduce-manuscript-figure.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+"""
+Run the benchmarks necessary to reproduce the figures of Section 3
+of the Supplementary Information of the manuscript \"Gfapy: a flexible
+and extensible software library for handling sequence graphs in Python\"
+and plots the figures using R.
+"""
+
+import argparse
+import os
+
+op = argparse.ArgumentParser(description=__doc__)
+op.add_argument("fignum", help="Figure number", type=int,
+ choices=range(5,9))
+op.add_argument("--queue", default=None,
+ help="Use the specified queue of a Grid Engine cluster system "+
+ "(e.g. 16c.q). If not provided, the benchmarks are run on the "+
+ "local computer.")
+op.add_argument("--nrepl",type=int, default=3,
+ help="Number of replicates (default: 3)")
+op.add_argument("--fast",action="store_true",
+ help="Run only the three fastest datapoints of the benchmark")
+opts = op.parse_args()
+
+if opts.fignum == 5:
+ testvar="segments"
+ operation="convert"
+elif opts.fignum == 6:
+ testvar="connectivity"
+ operation="convert"
+elif opts.fignum == 7:
+ testvar="segments"
+ operation="mergelinear"
+else: # 8
+ testvar="connectivity"
+ operation="mergelinear"
+
+if opts.fast:
+ subset="fast"
+else:
+ subset="all"
+
+run_benchmarks_args="figure{}.out {} gfa2 {} {} {}".format(
+ opts.fignum, operation, testvar, subset, opts.nrepl)
+
+if not opts.queue:
+ os.system("./gfapy-run-benchmarks.sh {}".format(run_benchmarks_args))
+else:
+ qsub_script_pfx=\
+"""#!/bin/bash
+#$ -clear
+#$ -q {}
+#$ -cwd
+#$ -V
+#$ -S /bin/bash
+#$ -o jobs_out
+#$ -j y
+#$ -sync y
+
+""".format(opts.queue)
+ with open("gfapy-run-benchmarks.sh", "r") as input_file:
+ content = input_file.read()
+ with open("gfapy-run-benchmarks.qsub", "w") as output_file:
+ output_file.write(qsub_script_pfx)
+ output_file.write(content)
+ os.system("mkdir -p jobs_out")
+ os.system("qsub gfapy-run-benchmarks.qsub {}".format(run_benchmarks_args))
+
+if testvar == "segments":
+ prepareflag=""
+else:
+ prepareflag="--mult"
+os.system("./gfapy-plot-preparedata.py {} figure{}.out > figure{}.dat".format(
+ prepareflag, opts.fignum, opts.fignum))
+os.system("./gfapy-plot-benchmarkdata.R figure{}.dat figure{} {}".format(
+ opts.fignum, opts.fignum, testvar))
diff --git a/benchmarks/gfapy-run-benchmarks.sh b/benchmarks/gfapy-run-benchmarks.sh
new file mode 100755
index 0000000..de023c8
--- /dev/null
+++ b/benchmarks/gfapy-run-benchmarks.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+if [ $# -ne 6 ]; then
+ echo "Usage: $0 <outfile> <operation> <version> <variable> <range> <nrepl>" > /dev/stderr
+ echo " outfile: will be overwritten if exists" > /dev/stderr
+ echo " operation: (mergelinear/convert) ../bin/gfapy-<operation> <gfafile> will be called" > /dev/stderr
+ echo " version: (gfa1/gfa2) gfa version" > /dev/stderr
+ echo " variable: (segments/connectivity)" > /dev/stderr
+ echo " range: (all/fast/slow)" > /dev/stderr
+ echo " nrepl: (e.g. 3) number of replicates" > /dev/stderr
+ exit 1
+fi
+
+outfile=$1
+operation=$2
+version=$3
+variable=$4
+range=$5
+nrepl=$6
+
+if [ $variable == "segments" ]; then
+ if [ $range == "fast" ]; then
+ nsegments="1000 2000 4000"
+ elif [ $range == "slow" ]; then
+ nsegments="8000 16000 32000 64000 128000 256000 512000 1024000 2048000"
+ elif [ $range == "all"]; then
+ nsegments="1000 2000 4000 8000 16000 32000 64000 128000 256000 512000 1024000 2048000"
+ fi
+else
+ nsegments=4000
+fi
+
+if [ $variable == "connectivity" ]; then
+ if [ $range == "fast" ]; then
+ multipliers="2 4 8"
+ elif [ $range == "slow" ]; then
+ multipliers="16 32 64 128 256"
+ elif [ $range == "all"]; then
+ multipliers="2 4 8 16 32 64 128 256"
+ fi
+else
+ multipliers=2
+fi
+
+mkdir -p benchmark_results
+rm -f $outfile
+echo "# hostname: $HOSTNAME" > $outfile
+echo "### benchmark data:" >> $outfile
+for ((replicate=1;replicate<=nrepl;++replicate)); do
+ for i in $nsegments; do
+ for m in $multipliers; do
+ fname="benchmark_results/${i}_e${m}x.$replicate.${version}"
+ bmout="$fname.$operation.benchmark"
+ rm -f $bmout
+ if [ ! -e $fname ]; then
+ ./gfapy-randomgraph --segments $i -g $version \
+ --dovetails-per-segment $m --with-sequence > $fname
+ fi
+ ./gfapy-benchmark-collectdata ../bin/gfapy-$operation $fname \
+ 1> /dev/null 2> $bmout
+ elapsed=$(grep -P -o "(?<=) [^ ]*(?=elapsed)" $bmout)
+ memory=$(grep -P -o "(?<=VmHWM: ).*" $bmout)
+ filesize=( $(ls -ln $fname) );filesize=${filesize[4]}
+ echo -e "gfapy-$operation\t$version\t$i\t$m\t$replicate\t$elapsed\t$memory\t$filesize" >> $outfile
+ done
+ done
+done
diff --git a/bin/gfapy-convert b/bin/gfapy-convert
new file mode 100755
index 0000000..f054bed
--- /dev/null
+++ b/bin/gfapy-convert
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+"""
+Convert a GFA file to the other specification version
+"""
+
+import sys
+import os
+import gfapy
+import argparse
+
+op = argparse.ArgumentParser(description=__doc__)
+op.add_argument("filename")
+op.add_argument('--version', action='version', version='%(prog)s 1.0')
+opts = op.parse_args()
+
+gfa = gfapy.Gfa.from_file(opts.filename)
+try:
+ for line in gfa.lines:
+ if gfa.version == "gfa1":
+ print(line.to_gfa2_s())
+ else:
+ converted_line = line.to_gfa1_s()
+ if (converted_line):
+ print(converted_line)
+except gfapy.Error as err:
+ sys.stderr.write(str(err))
+ sys.exit(1)
+
diff --git a/bin/gfapy-diff b/bin/gfapy-diff
new file mode 100755
index 0000000..8223235
--- /dev/null
+++ b/bin/gfapy-diff
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+"""
+Compare two GFA files
+
+Note: the current version is not yet functional and only checking segments.
+Work in progress.
+"""
+
+import sys
+import os
+import gfapy
+import argparse
+
+op = argparse.ArgumentParser(description=__doc__)
+op.add_argument('--version', action='version', version='%(prog)s 0.1')
+op.add_argument("filename1")
+op.add_argument("filename2")
+opts = op.parse_args()
+
+gfa1 = gfapy.Gfa.from_file(opts.filename1)
+gfa2 = gfapy.Gfa.from_file(opts.filename2)
+
+different = False
+
+if gfa1.version != gfa2.version:
+ print("# different version")
+ exit(1)
+else:
+ for s in gfa1.segments:
+ s2 = gfa2.segment(s)
+ if s2 is None:
+ different = True
+ print("# segment {} in {} but not in {}".format(s.name, opts.filename1, opts.filename2))
+ if s.diff(s2):
+ different = True
+ for diff in s.diff(s2):
+ print(diff)
+ for s in gfa2.segments:
+ s1 = gfa1.segment(s)
+ if s1 is None:
+ different = True
+ print("# segment {} in {} but not in {}".format(s.name, opts.filename2, opts.filename1))
+
+if different:
+ exit(1)
+else:
+ exit(0)
diff --git a/bin/gfapy-mergelinear b/bin/gfapy-mergelinear
new file mode 100755
index 0000000..76d25f7
--- /dev/null
+++ b/bin/gfapy-mergelinear
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+"""
+Merge linear paths in a GFA graph
+"""
+
+import sys
+import os
+import gfapy
+import argparse
+
+op = argparse.ArgumentParser(description=__doc__)
+op.add_argument("filename")
+op.add_argument("--redundant", '-r', help="create redundant paths, "+
+ "similar to the contigs constructed by Readjoiner", action="store_true")
+op.add_argument("--no-progress", '-p', help="do not show progress log",
+ action="store_false", dest="progress")
+op.add_argument("--quiet", '-q', help="suppress output", action="store_false",
+ dest="output")
+op.add_argument("--vlevel", help="validation level", default=0, type=int)
+op.add_argument('--version', action='version', version='%(prog)s 1.0')
+opts = op.parse_args()
+
+gfa = gfapy.Gfa(vlevel=opts.vlevel)
+if opts.progress:
+ gfa.enable_progress_logging(part=0.01)
+gfa.read_file(opts.filename)
+if opts.redundant:
+ # remove isolated segments, as this mode is for comparison
+ # with readjoiner contigs, and isolated vertices are not output by readjoiner
+ for cc in gfa.connected_components():
+ if len(cc) == 1:
+ gfa.segment(cc[0]).disconnect()
+gfa.merge_linear_paths(redundant_junctions=opts.redundant,
+ enable_tracking=False,
+ merged_name="short")
+if opts.output:
+ print(gfa)
diff --git a/bin/gfapy-validate b/bin/gfapy-validate
new file mode 100755
index 0000000..c29e1de
--- /dev/null
+++ b/bin/gfapy-validate
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+"""
+Validate a GFA file
+"""
+
+import sys
+import os
+import gfapy
+import argparse
+
+op = argparse.ArgumentParser(description=__doc__)
+op.add_argument("filename")
+op.add_argument('--version', action='version', version='%(prog)s 1.0')
+opts = op.parse_args()
+
+try:
+ gfa = gfapy.Gfa.from_file(opts.filename)
+ gfa.validate()
+except gfapy.Error as err:
+ sys.stderr.write(str(err)+"\n")
+ sys.exit(1)
diff --git a/doc/.gitignore b/doc/.gitignore
new file mode 100644
index 0000000..a123a00
--- /dev/null
+++ b/doc/.gitignore
@@ -0,0 +1,2 @@
+source
+_build
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 0000000..18ee039
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,23 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+SPHINXPROJ = Gfapy
+SOURCEDIR = .
+BUILDDIR = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+cleanup:
+ rm source _build -rf
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @PYTHONHASHSEED=0 $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/doc/changelog.rst b/doc/changelog.rst
new file mode 100644
index 0000000..a591e56
--- /dev/null
+++ b/doc/changelog.rst
@@ -0,0 +1,4 @@
+Changelog
+---------
+.. include:: ../CHANGES.txt
+ :literal:
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100644
index 0000000..c7759f6
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Gfapy documentation build configuration file, created by
+# sphinx-quickstart on Thu Mar 16 10:13:57 2017.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(0, os.path.abspath('../'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.todo',
+ 'sphinx.ext.coverage',
+ 'sphinx.ext.imgmath',
+ 'sphinx.ext.ifconfig',
+ 'sphinx.ext.viewcode',
+ 'sphinx.ext.githubpages',
+ 'sphinx.ext.napoleon'
+]
+
+# Napoleon
+napoleon_numpy_docstring = True
+napoleon_google_docstring = True
+napoleon_use_param = False
+napoleon_use_ivar = True
+
+# Default role:
+default_role = 'any'
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = '.rst'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = 'Gfapy'
+copyright = '2017, Giorgio Gonnella and others (see CONTRIBUTORS)'
+author = 'Giorgio Gonnella and others (see CONTRIBUTORS)'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '1.0.0'
+# The full version, including alpha/beta/rc tags.
+release = '1.0.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+
+# -- Options for HTMLHelp output ------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Gfapydoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+ # The paper size ('letterpaper' or 'a4paper').
+ #
+ 'papersize': 'a4paper',
+
+ # The font size ('10pt', '11pt' or '12pt').
+ #
+ # 'pointsize': '10pt',
+
+ # Additional stuff for the LaTeX preamble.
+ #
+ # 'preamble': '',
+
+ # Latex figure (float) alignment
+ #
+ # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ (master_doc, 'Gfapy.tex', 'Gfapy Documentation',
+ 'Giorgio Gonnella', 'manual'),
+]
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ (master_doc, 'gfapy', 'Gfapy Documentation',
+ [author], 1)
+]
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ (master_doc, 'Gfapy', 'Gfapy Documentation',
+ author, 'Gfapy',
+ 'Python library for the Graphic Fragment Assembly (GFA) format.',
+ 'Miscellaneous'),
+]
+
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000..defe453
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,35 @@
+.. Gfapy documentation master file, created by
+ sphinx-quickstart on Thu Mar 16 10:13:57 2017.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+Gfapy documentation
+===================
+
+.. toctree::
+ :maxdepth: 2
+ :caption: Contents:
+
+ readme
+ changelog
+
+ tutorial/gfa
+ tutorial/validation
+ tutorial/positional_fields
+ tutorial/placeholders
+ tutorial/positions
+ tutorial/alignments
+ tutorial/tags
+ tutorial/references
+ tutorial/header
+ tutorial/custom_records
+ tutorial/comments
+ tutorial/errors
+ tutorial/graph_operations
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/doc/readme.rst b/doc/readme.rst
new file mode 100644
index 0000000..d8ad18b
--- /dev/null
+++ b/doc/readme.rst
@@ -0,0 +1,3 @@
+Introduction
+============
+.. include:: ../README.rst
diff --git a/doc/run_apidoc.sh b/doc/run_apidoc.sh
new file mode 100755
index 0000000..a13f4b1
--- /dev/null
+++ b/doc/run_apidoc.sh
@@ -0,0 +1 @@
+sphinx-apidoc -o source/ ../gfapy
diff --git a/doc/tutorial/alignments.rst b/doc/tutorial/alignments.rst
new file mode 100644
index 0000000..de4c28e
--- /dev/null
+++ b/doc/tutorial/alignments.rst
@@ -0,0 +1,238 @@
+.. testsetup:: *
+
+ import gfapy
+ from gfapy import is_placeholder, Alignment
+ h = "H\tVN:Z:2.0\tTS:i:100"
+ sA = "S\tA\t100\t*"
+ sB = "S\tB\t100\t*"
+ x = "E\tx\tA+\tB-\t0\t100$\t0\t100$\t4,2\tTS:i:50"
+ gfa = gfapy.Gfa([h, sA, sB, x])
+
+.. _alignments:
+
+Alignments
+~~~~~~~~~~
+
+Some GFA1 (L/C overlap, P overlaps) and GFA2 (E/F alignment) fields contain
+alignments or lists of alignments. The alignment can be left unspecified and a
+placeholder symbol ``*`` used instead. In GFA1 the alignments can be given as
+CIGAR strings, in GFA2 also as Dazzler traces.
+
+Gfapy uses three different classes for representing the content of alignment fields:
+:class:`~gfapy.alignment.cigar.CIGAR`, :class:`~gfapy.alignment.trace.Trace`
+and :class:`~gfapy.alignment.placeholder.AlignmentPlaceholder`.
+
+Creating an alignment
+^^^^^^^^^^^^^^^^^^^^^
+
+An alignment instance is usually created from its GFA string
+representation or from a list by using the
+:class:`gfapy.Alignment() <gfapy.alignment.alignment.Alignment>`
+constructor.
+
+.. doctest::
+
+ >>> from gfapy import Alignment
+ >>> Alignment("*")
+ gfapy.AlignmentPlaceholder()
+ >>> Alignment("10,10,10")
+ gfapy.Trace([10,10,10])
+ >>> Alignment([10,10,10])
+ gfapy.Trace([10,10,10])
+ >>> Alignment("30M2I")
+ gfapy.CIGAR([gfapy.CIGAR.Operation(30,'M'), gfapy.CIGAR.Operation(2,'I')])
+
+If the argument is an alignment object it will be returned,
+so that is always safe to call the method on a variable which can
+contain a string or an alignment instance:
+
+.. doctest::
+
+ >>> Alignment(Alignment("*"))
+ gfapy.AlignmentPlaceholder()
+ >>> Alignment(Alignment("10,10"))
+ gfapy.Trace([10,10])
+
+Recognizing undefined alignments
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The :func:`gfapy.is_placeholder() <gfapy.placeholder.is_placeholder>` method
+allows to test if an alignment field contains an undefined value (placeholder)
+instead of a defined value (CIGAR string, trace). The method accepts as
+argument either an alignment object or a string or list representation.
+
+.. doctest::
+
+ >>> from gfapy import is_placeholder, Alignment
+ >>> is_placeholder(Alignment("30M"))
+ False
+ >>> is_placeholder(Alignment("10,10"))
+ False
+ >>> is_placeholder(Alignment("*"))
+ True
+ >>> is_placeholder("*")
+ True
+ >>> is_placeholder("30M")
+ False
+ >>> is_placeholder("10,10")
+ False
+ >>> is_placeholder([])
+ True
+ >>> is_placeholder([10,10])
+ False
+
+Note that, as a placeholder is ``False`` in boolean context, just a
+``if not aligment`` will also work, if alignment is an alignment object.
+But this of course, does not work, if it is a string representation.
+Therefore it is better to use the
+:func:`gfapy.is_placeholder() <gfapy.placeholder.is_placeholder>` method,
+which works in both cases.
+
+.. doctest::
+
+ >>> if not Alignment("*"): print('no alignment')
+ no alignment
+ >>> if is_placeholder(Alignment("*")): print('no alignment')
+ no alignment
+ >>> if "*": print('not a placeholder...?')
+ not a placeholder...?
+ >>> if is_placeholder("*"): print('really? it is a placeholder!')
+ really? it is a placeholder!
+
+Reading and editing CIGARs
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+CIGARs are represented by specialized lists, instances of the class
+:class:`~gfapy.alignment.cigar.CIGAR`, whose elements are CIGAR operations
+CIGAR operations are represented by instance of the class
+:class:`~gfapy.alignment.cigar.CIGAR.Operation`,
+and provide the properties ``length`` (lenght of the operation, an integer)
+and ``code`` (one-letter string which specifies the type of operation).
+Note that not all operations allowed in SAM files (for which CIGAR strings
+were first defined) are also meaningful in GFA and thus GFA2 only allows
+the operations ``M``, ``I``, ``D`` and ``P``.
+
+.. doctest::
+
+ >>> cigar = gfapy.Alignment("30M")
+ >>> isinstance(cigar, list)
+ True
+ >>> operation = cigar[0]
+ >>> type(operation)
+ <class 'gfapy.alignment.cigar.CIGAR.Operation'>
+ >>> operation.code
+ 'M'
+ >>> operation.code = 'D'
+ >>> operation.length
+ 30
+ >>> len(operation)
+ 30
+ >>> str(operation)
+ '30D'
+
+As a CIGAR instance is a list, list methods apply to it. If the array is
+emptied, its string representation will be the placeholder symbol ``*``.
+
+.. doctest::
+
+ >>> cigar = gfapy.Alignment("1I20M2D")
+ >>> cigar[0].code = "M"
+ >>> cigar.pop(1)
+ gfapy.CIGAR.Operation(20,'M')
+ >>> str(cigar)
+ '1M2D'
+ >>> cigar[:] = []
+ >>> str(cigar)
+ '*'
+
+The validate :func:`CIGAR.validate() <gfapy.alignment.cigar.CIGAR.validate>`
+function checks if a CIGAR instance is valid. A version can be provided, as the
+CIGAR validation is version specific (as GFA2 forbids some CIGAR operations).
+
+.. doctest::
+
+ >>> cigar = gfapy.Alignment("30M10D20M5I10M")
+ >>> cigar.validate()
+ >>> cigar[1].code = "L"
+ >>> cigar.validate()
+ Traceback (most recent call last):
+ ...
+ gfapy.error.ValueError:
+ >>> cigar = gfapy.Alignment("30M10D20M5I10M")
+ >>> cigar[1].code = "X"
+ >>> cigar.validate(version="gfa1")
+ >>> cigar.validate(version="gfa2")
+ Traceback (most recent call last):
+ ...
+ gfapy.error.ValueError:
+
+Reading and editing traces
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Traces are arrays of non-negative integers. The values are interpreted
+using a trace spacing value. If traces are used, a trace spacing value
+must be defined in a TS integer tag, either in the header, or in the
+single lines which contain traces (which takes precedence over the
+header global value).
+
+.. doctest::
+
+ >>> print(gfa) #doctest: +SKIP
+ H TS:i:100
+ E x A+ B- 0 100$ 0 100$ 4,2 TS:i:50
+ ...
+ >>> gfa.header.TS
+ 100
+ >>> gfa.line("x").TS
+ 50
+
+Query, reference and complement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+CIGARs are asymmetric, i.e.\ they consider one sequence as reference and
+another sequence as query.
+
+The :func:`~gfapy.alignment.cigar.CIGAR.length_on_reference` and
+:func:`~gfapy.alignment.cigar.CIGAR.length_on_query` methods compute the length
+of the alignment on the two sequences. These methods are used by the library
+e.g. to convert GFA1 L lines to GFA2 E lines (which is only possible if CIGARs
+are provided).
+
+.. doctest::
+
+ >>> cigar = gfapy.Alignment("30M10D20M5I10M")
+ >>> cigar.length_on_reference()
+ 70
+ >>> cigar.length_on_query()
+ 65
+
+CIGARs are dependent on which sequence is taken as reference and which
+is taken as query. For each alignment, a complement CIGAR can be
+computed using the method
+:func:`~gfapy.alignment.cigar.CIGAR.complement`; it is the CIGAR obtained
+when the two sequences are switched.
+
+.. doctest::
+
+ >>> cigar = gfapy.Alignment("2M1D3M")
+ >>> str(cigar.complement())
+ '3M1I2M'
+
+The current version of Gfapy does not provide a way to compute the
+alignment, thus the trace information can be accessed and edited, but
+not used for this purpose. Because of this there is currently no way in
+Gfapy to compute a complement trace (trace obtained when the sequences
+are switched).
+
+.. doctest::
+
+ >>> trace = gfapy.Alignment("1,2,3")
+ >>> str(trace.complement())
+ '*'
+
+The complement of a placeholder is a placeholder:
+
+.. doctest::
+
+ >>> str(gfapy.Alignment("*").complement())
+ '*'
diff --git a/doc/tutorial/comments.rst b/doc/tutorial/comments.rst
new file mode 100644
index 0000000..94ca5e2
--- /dev/null
+++ b/doc/tutorial/comments.rst
@@ -0,0 +1,71 @@
+.. testsetup:: *
+
+ import gfapy
+ g = gfapy.Gfa()
+
+.. _comments:
+
+Comments
+--------
+
+GFA lines starting with a ``#`` symbol are considered comments. In Gfapy
+comments are represented by instances of the class :class:`gfapy.line.Comment
+<gfapy.line.comment.comment.Comment>`. They have a similar interface to other
+line instances, with some differences, e.g. they do not support tags.
+
+The comments collection
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The comments of a Gfa object are accessed using the :func:`Gfa.comments
+<gfapy.lines.collections.Collections.comments>` property. This is a list of
+comment line instances. The single elements can be modified, but the list
+itself is read-only. To remove a comment from the Gfa, you need to find the
+instance in the list, and call
+:func:`~gfapy.line.common.disconnection.Disconnection.disconnect` on it. To
+add a comment to a :class:`~gfapy.gfa.Gfa` instance is done similary to other
+lines, by using the :func:`Gfa.add_line(line)
+<gfapy.lines.creators.Creators.add_line>` method.
+
+.. doctest::
+
+ >>> g.add_line("# this is a comment") #doctest: +ELLIPSIS
+ >>> [str(c) for c in g.comments]
+ ['# this is a comment']
+ >>> g.comments[0].disconnect()
+ >>> g.comments
+ []
+
+Accessing the comment content
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The content of the comment line, excluding the initial ``#`` and eventual
+initial spacing characters, is included in the ``content`` field. The initial
+spacing characters can be read/changed using the ``spacer`` field. The default
+value is a single space.
+
+.. doctest::
+
+ >>> g.add_line("# this is a comment") #doctest: +ELLIPSIS
+ >>> c = g.comments[-1]
+ >>> c.content
+ 'this is a comment'
+ >>> c.spacer
+ ' '
+ >>> c.spacer = '___'
+ >>> str(c)
+ '#___this is a comment'
+
+Tags are not supported by comment lines. If the line contains tags,
+these are nor parsed, but included in the ``content`` field. Trying to set
+tags raises exceptions.
+
+.. doctest::
+
+ >>> c = gfapy.Line("# this is not a tag\txx:i:1")
+ >>> c.content
+ 'this is not a tag\txx:i:1'
+ >>> c.xx
+ >>> c.xx = 1
+ Traceback (most recent call last):
+ ...
+ gfapy.error.RuntimeError: Tags of comment lines cannot be set
diff --git a/doc/tutorial/custom_records.rst b/doc/tutorial/custom_records.rst
new file mode 100644
index 0000000..c0887c7
--- /dev/null
+++ b/doc/tutorial/custom_records.rst
@@ -0,0 +1,296 @@
+.. testsetup:: *
+
+ import gfapy
+ g = gfapy.Gfa(version = 'gfa2')
+
+.. _custom_records:
+
+Custom records
+--------------
+
+The GFA2 specification considers each line which starts with a non-standard
+record type a custom (i.e. user- or program-specific) record.
+Gfapy allows to retrieve these records and access their data using a
+similar interface to that for the predefined record types.
+
+Retrieving, adding and deleting custom records
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Gfa instances have the property
+:func:`~gfapy.lines.collections.Collections.custom_records`,
+a list of all line instances with a non-standard record type. Among these,
+records of a specific record type are retrieved using the method
+:func:`Gfa.custom_records_of_type(record_type)
+<gfapy.lines.collections.Collections.custom_records_of_type>`.
+Lines are added and deleted using the same methods
+(:func:`~gfapy.lines.creators.Creators.add_line` and
+:func:`~gfapy.line.common.disconnection.Disconnection.disconnect`) as for
+other line types.
+
+.. doctest::
+
+ >>> g.add_line("X\tcustom line") #doctest: +ELLIPSIS
+ >>> g.add_line("Y\tcustom line") #doctest: +ELLIPSIS
+ >>> [str(line) for line in g.custom_records] #doctest: +SKIP
+ ['X\tcustom line', 'Y\tcustom line']
+ >>> g.custom_record_keys) #doctest: +SKIP
+ ['X', 'Y']
+ >>> [str(line) for line in g.custom_records_of_type('X')]
+ ['X\tcustom line']
+ >>> g.custom_records_of_type("X")[-1].disconnect()
+ >>> g.custom_records_of_type('X')
+ []
+
+Interface without extensions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If no extension (see :ref:`extensions` section) has been defined to handle a
+custom record type, the interface has some limitations: the field content is
+not validated, and the field names are unknown. The generic custom record
+class is employed
+(:class:`~gfapy.line.custom_record.custom_record.CustomRecord`).
+
+As the name of the positional fields in a custom record is not known, a generic
+name ``field1``, ``field2``, ... is used. The number of positional fields is
+found by getting the length of the
+:attr:`~gfapy.line.custom_record.init.Init.positional_fieldnames` list.
+
+.. doctest::
+
+ >>> g.add_line("X\ta\tb\tcc:i:10\tdd:i:100") #doctest: +ELLIPSIS
+ >>> x = g.custom_records_of_type('X')[-1]
+ >>> len(x.positional_fieldnames)
+ 2
+ >>> x.field1
+ 'a'
+ >>> x.field2
+ 'b'
+
+Positional fields are allowed to contain any character (including non-printable
+characters and spacing characters), except tabs and newlines (as they are
+structural elements of the line). No further validation is performed.
+
+As Gfapy cannot know how many positional fields are present when parsing custom
+records, an heuristic approach is followed, to identify tags. A field resembles
+a tag if it starts with ``tn:d:`` where ``tn`` is a valid tag name and ``d`` a
+valid tag datatype (see :ref:`tags` chapter). The fields are parsed from the
+last to the first.
+
+As soon as a field is found which does not resemble a tag, all remaining fields
+are considered positionals (even if another field parsed later resembles a
+tag). Due to this, invalid tags are sometimes wrongly taken as positional
+fields (this can be avoided by writing an extension).
+
+.. doctest::
+
+ >>> g.add_line("X\ta\tb\tcc:i:10\tdd:i:100") #doctest: +ELLIPSIS
+ >>> x1 = g.custom_records_of_type("X")[-1]
+ >>> x1.cc
+ 10
+ >>> x1.dd
+ 100
+ >>> g.add_line("X\ta\tb\tcc:i:10\tdd:i:100\te") #doctest: +ELLIPSIS
+ >>> x2 = g.custom_records_of_type("X")[-1]
+ >>> x2.cc
+ >>> x2.field3
+ 'cc:i:10'
+ >>> g.add_line("Z\ta\tb\tcc:i:10\tddd:i:100") #doctest: +ELLIPSIS
+ >>> x3 = g.custom_records_of_type("Z")[-1]
+ >>> x3.cc
+ >>> x3.field3
+ 'cc:i:10'
+ >>> x3.field4
+ 'ddd:i:100'
+
+.. _extensions:
+
+Extensions
+~~~~~~~~~~
+
+The support for custom fields is limited, as Gfapy does not know which and how
+many fields are there and how shall they be validated. It is possible to create
+an extension of Gfapy, which defines new record types: this will allow to use
+these record types in a similar way to the built-in types.
+
+As an example, an extension will be described, which defines two record types:
+T for taxa and M for assignments of segments to taxa. For further information
+about the possible usage case for this extension, see the Supplemental
+Information to the manuscript describing Gfapy.
+
+The T records will contain a single positional field, ``tid``, a GFA2
+identifier, and an optional UL string tag. The M records will contain three
+positional fields (all three GFA2 identifier): a name field ``mid`` (optional),
+and two references, ``tid`` to a T line and ``sid`` to an S line. The SC
+integer tag will be also defined. Here is an example of a GFA containing M and
+T lines:
+
+.. code::
+
+ S sA 1000 *
+ S sB 1000 *
+ M assignment1 t123 sA SC:i:40
+ M assignment2 t123 sB
+ M * B12c sB SC:i:20
+ T B12c
+ T t123 UL:Z:http://www.taxon123.com
+
+Writing subclasses of the :class:`~gfapy.line.line.Line` class, it is possible to
+communicate to Gfapy, how records of the M and T class shall be handled. This
+only requires to define some constants and to call the class method
+:func:`~gfapy.line.line.Line.register_extension`.
+
+The constants to define are ``RECORD TYPE``, which shall be the content
+of the record type field (e.g. ``M``); ``POSFIELDS`` shall contain a ordered
+dict, specifying the datatype for each positional field, in the order these
+fields are found in the line; ``TAGS_DATATYPE`` is a dict, specifying the
+datatype of the predefined optional tags; ``NAME_FIELD`` is a field name,
+and specifies which field contains the identifier of the line.
+For details on predefined and custom datatypes, see the next sections
+(:ref:`predefined_datatypes` and :ref:`custom_datatypes`).
+
+To handle references, :func:`~gfapy.line.line.Line.register_extension`
+can be supplied with a ``references`` parameter, a list of triples
+``(fieldname, classname, backreferences)``. Thereby ``fieldname`` is the name
+of the field in the corresponding record containing the reference (e.g.
+``sid``), ``classname`` is the name of the class to which the reference goes
+(e.g. ``gfa.line.segment.GFA2``), and \texttt{backreferences} is how the
+collection of backreferences shall be called, in the records to which reference
+points to (e.g. ``metagenomic_assignments``).
+
+.. code:: python
+
+ from collections include OrderedDict
+
+ class Taxon(gfapy.Line):
+ RECORD_TYPE = "T"
+ POSFIELDS = OrderedDict([("tid","identifier_gfa2")])
+ TAGS_DATATYPE = {"UL":"Z"}
+ NAME_FIELD = "tid"
+
+ Taxon.register_extension()
+
+ class MetagenomicAssignment(gfapy.Line):
+ RECORD_TYPE = "M"
+ POSFIELDS = OrderedDict([("mid","optional_identifier_gfa2"),
+ ("tid","identifier_gfa2"),
+ ("sid","identifier_gfa2")])
+ TAGS_DATATYPE = {"SC":"i"}
+ NAME_FIELD = "mid"
+
+ MetagenomicAssignment.register_extension(references=
+ [("sid", gfapy.line.segment.GFA2, "metagenomic_assignments"),
+ ("tid", Taxon, "metagenomic_assignments")])
+
+.. _predefined_datatypes:
+
+Predefined datatypes for extensions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The datatype of fields is specified in Gfapy using classes, which provide
+functions for decoding, encoding and validating the corresponding data.
+Gfapy contains a number of datatypes which correspond to the description
+of the field content in the GFA1 and GFA2 specification.
+
+When writing extensions only the GFA2 field datatypes are generally used
+(as GFA1 does not contain custom fields). They are summarized in
+the following table:
+
++-------------------------------------+---------------+--------------------------------------------------------+
+| Name | Example | Description |
++=====================================+===============+========================================================+
+| ``alignment_gfa2`` | ``12M1I3M`` | CIGAR string, Trace alignment or Placeholder (``*``) |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``identifier_gfa2`` | ``S1`` | ID of a line |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``oriented_identifier_gfa2`` | ``S1+`` | ID of a line followed by ``+`` or ``-`` |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``optional_identifier_gfa2`` | ``*`` | ID of a line or Placeholder (``*``) |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``identifier_list_gfa2`` | ``S1 S2`` | space separated list of line IDs |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``oriented_identifier_list_gfa2`` | ``S1+ S2-`` | space separated list of line IDs plus orientations |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``position_gfa2`` | ``120$`` | non-negative integer, optionally followed by ``$`` |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``sequence_gfa2`` | ``ACGNNYR`` | sequence of printable chars., no whitespace |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``string`` | ``a b_c;d`` | string, no tabs and newlines (Z tags) |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``char`` | ``A`` | single character (A tags) |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``float`` | ``1.12`` | float (f tags) |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``integer`` | ``-12`` | integer (i tags) |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``optional_integer`` | ``*`` | integer or placeholder |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``numeric_array`` | ``c,10,3`` | array of integers or floats (B tags) |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``byte_array`` | ``12F1FF`` | hexadecimal byte string (H tags) |
++-------------------------------------+---------------+--------------------------------------------------------+
+| ``json`` | ``{’b’:2}`` | JSON string, no tabs and newlines (J tags) |
++-------------------------------------+---------------+--------------------------------------------------------+
+
+.. _custom_datatypes:
+
+Custom datatypes for extensions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For custom records, one sometimes needs datatypes not yet available in the GFA
+specification. For example, a custom datatype can be defined for
+the taxon identifier used in the ``tid`` field of the T and M records:
+accordingly the taxon identifier shall be only either
+in the form ``taxon:<n>``, where ``<n>`` is a positive integer,
+or consist of letters, numbers and underscores only
+(without ``:``).
+
+To define the datatype, a class is written, which contains the following
+functions:
+
+* ``validate_encoded(string)``: validates the content of the field,
+ if this is a string (e.g., the name of the T line)
+* ``validate_decoded(object)``: validates the content of the field,
+ if this is not a string (e.g., a reference to a T line)
+* ``decode(string)``: validates the content of the field (a string)
+ and returns the decoded content; note that references must not be resolved
+ (there is no access to the Gfa instance here), thus the name of the
+ T line will be returned unchanged
+* ``encode(string)``: validates the content of the field (not in string
+ form) and returns the string which codes it in the GFA file (also here
+ references are validated but not converted into strings)
+
+Finally the datatype is registered calling
+:func:`~gfapy.field.field.Field.register_datatype`. The code for
+the taxon ID extension is the following:
+
+.. code:: python
+
+ import re
+
+ class TaxonID:
+
+ def validate_encoded(string):
+ if not re.match(r"^taxon:(\d+)$",string) and \
+ not re.match(r"^[a-zA-Z0-9_]+$", string):
+ raise gfapy.ValueError("Invalid taxon ID: {}".format(string))
+
+ def decode(string):
+ TaxonID.validate_encoded(string)
+ return string
+
+ def validate_decoded(obj):
+ if isinstance(obj,Taxon):
+ TaxonID.validate_encoded(obj.name)
+ else:
+ raise gfapy.TypeError(
+ "Invalid type for taxon ID: "+"{}".format(repr(obj)))
+
+ def encode(obj):
+ TaxonID.validate_decoded(obj)
+ return obj
+
+ gfapy.Field.register_datatype("taxon_id", TaxonID)
+
+To use the new datatype in the T and M lines defined above (:ref:`extensions`),
+the definition of the two subclasses can be changed:
+in ``POSFIELDS`` the value ``taxon_id`` shall be assigned to the key ``tid``.
diff --git a/doc/tutorial/errors.rst b/doc/tutorial/errors.rst
new file mode 100644
index 0000000..0d1800e
--- /dev/null
+++ b/doc/tutorial/errors.rst
@@ -0,0 +1,43 @@
+.. _errors:
+
+Errors
+------
+
+The different types of errors defined in Gfapy are summarized in the
+following table. All exception raised in the library are subclasses of
+`Error`. Thus, ``except gfapy.Error`` can be use to catch
+all library errors.
+
++-----------------------+-------------------------------+---------------------------------+
+| Error | Description | Examples |
++=======================+===============================+=================================+
+| `VersionError` | An unknown or wrong version | "GFA0"; or GFA1 in GFA2 context |
+| | is specified or implied | |
++-----------------------+-------------------------------+---------------------------------+
+| `ValueError` | The value of an object is | a negative position is used |
+| | invalid | |
++-----------------------+-------------------------------+---------------------------------+
+| `TypeError` | The wrong type has been used | Z instead of i used for VN tag; |
+| | or specified | Hash for an i tag |
++-----------------------+-------------------------------+---------------------------------+
+| `FormatError` | The format of an object is | a line does not contain the |
+| | wrong | expected number of fields |
++-----------------------+-------------------------------+---------------------------------+
+| `NotUniqueError` | Something should be unique | duplicated tag name or line |
+| | but is not | identifier |
++-----------------------+-------------------------------+---------------------------------+
+| `InconsistencyError` | Pieces of information collide | length of sequence and LN tag |
+| | with each other | do not match |
++-----------------------+-------------------------------+---------------------------------+
+| `RuntimeError` | The user tried to do | editing from/to field in |
+| | something which is not | connected links |
+| | allowed | |
++-----------------------+-------------------------------+---------------------------------+
+| `ArgumentError` | Problem with the arguments of | wrong number of arguments in |
+| | a method | dynamically created method |
++-----------------------+-------------------------------+---------------------------------+
+| `AssertionError` | Something unexpected happened | there is a bug in the library or|
+| | | the library has been used in |
+| | | an unintended way |
++-----------------------+-------------------------------+---------------------------------+
+
diff --git a/doc/tutorial/gfa.rst b/doc/tutorial/gfa.rst
new file mode 100644
index 0000000..aa514e4
--- /dev/null
+++ b/doc/tutorial/gfa.rst
@@ -0,0 +1,409 @@
+.. testsetup:: *
+
+ import gfapy
+ gfa = gfapy.Gfa()
+ gfa1 = gfapy.Gfa()
+ gfa1.add_line("H\tVN:Z:1.0")
+ gfa1.add_line("# this is a comment")
+ gfa1.add_line("S\t1\t*")
+ gfa1.add_line("S\t2\t*")
+ gfa1.add_line("S\t3\t*")
+ gfa2 = gfapy.Gfa()
+ gfa2.add_line("H\tVN:Z:2.0\tTS:i:100")
+ gfa2.add_line("X\tcustom line")
+ gfa2.add_line("Y\tcustom line")
+
+.. _gfa:
+
+The Gfa class
+-------------
+
+The content of a GFA file is represented in Gfapy by an instance of the class
+:class:`~gfapy.gfa.Gfa`. In most cases, the Gfa instance will be constructed
+from the data contained in a GFA file, using the method
+:func:`Gfa.from_file() <gfapy.gfa.Gfa.from_file>`.
+
+Alternatively, it is possible to use the construct of the class; it takes an
+optional positional parameter, the content of a GFA file (as string, or as list
+of strings, one per line of the GFA file). If no GFA content is provided, the
+Gfa instance will be empty.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa("H\tVN:Z:1.0\nS\tA\t*")
+ >>> print(len(gfa.lines))
+ 2
+ >>> gfa = gfapy.Gfa(["H\tVN:Z:1.0", "S\tA\t*", "S\tB\t*"])
+ >>> print(len(gfa.lines))
+ 3
+ >>> gfa = gfapy.Gfa()
+ >>> print(len(gfa.lines))
+ 0
+
+The string representation of the Gfa object (which can be obtained using
+``str()``) is the textual representation in GFA format.
+Using :func:`Gfa.to_file(filename) <gfapy.gfa.Gfa.to_file>` allows
+writing this representation to a GFA file (the content of the file is
+overwritten).
+
+.. doctest::
+
+ >>> g1 = gfapy.Gfa()
+ >>> g1.append("H\tVN:Z:1.0")
+ >>> g1.append("S\ta\t*")
+ >>> g1.to_file("my.gfa") #doctest: +SKIP
+ >>> g2 = gfapy.Gfa.from_file("my.gfa") #doctest: +SKIP
+ >>> str(g1)
+ 'H\tVN:Z:1.0\nS\ta\t*'
+
+
+All methods for creating a Gfa (constructor and from_file) accept
+a ``vlevel`` parameter, the validation level,
+and can assume the values 0, 1, 2 and 3. An higher value means
+more validations are performed. The :ref:`validation` chapter explains
+the meaning of the different validation levels in detail.
+The default value is 1.
+
+.. doctest::
+
+ >>> gfapy.Gfa().vlevel
+ 1
+ >>> gfapy.Gfa(vlevel = 0).vlevel
+ 0
+
+A further parameter is ``version``. It can be set to ``'gfa1'``,
+``'gfa2'`` or left to the default value (``None``). The default
+is to auto-detect the version of the GFA from the line content.
+If the version is set manually, any content not compatible to the
+specified version will trigger an exception. If the version is
+set automatically, an exception will be raised if two lines
+are found, with content incompatible to each other (e.g. a GFA1
+segment followed by a GFA2 segment).
+
+.. doctest::
+
+ >>> g = gfapy.Gfa(version='gfa2')
+ >>> g.version
+ 'gfa2'
+ >>> g.add_line("S\t1\t*")
+ Traceback (most recent call last):
+ ...
+ gfapy.error.VersionError: Version: 1.0 (None)
+ ...
+ >>> g = gfapy.Gfa()
+ >>> g.version
+ >>> g.add_line("S\t1\t*")
+ >>> g.version
+ 'gfa1'
+ >>> g.add_line("S\t1\t100\t*")
+ Traceback (most recent call last):
+ ...
+ gfapy.error.VersionError: Version: 1.0 (None)
+ ...
+
+Collections of lines
+~~~~~~~~~~~~~~~~~~~~
+
+The property :attr:`~gfapy.lines.collections.Collections.lines`
+of the Gfa object is a list of all the lines
+in the GFA file (including the header, which is splitted into single-tag
+lines). The list itself shall not be modified by the user directly (i.e.
+adding and removing lines is done using a different interface, see
+below). However the single elements of the list can be edited.
+
+.. doctest::
+
+ >>> for line in gfa.lines: print(line)
+
+For most record types, a list of the lines of the record type is available
+as a read-only property, which is named after the record type, in plural.
+
+.. doctest::
+
+ >>> [str(line) for line in gfa1.segments]
+ ['S\t1\t*', 'S\t3\t*', 'S\t2\t*']
+ >>> [str(line) for line in gfa2.fragments]
+ []
+
+A particular case are edges; these are in GFA1 links and containments, while in
+GFA2 there is an unified edge record type, which also allows to represent
+internal alignments. In Gfapy, the
+:attr:`~gfapy.lines.collections.Collections.edges` property retrieves all edges
+(i.e. all E lines in GFA2, and all L and C lines in GFA1). The
+:attr:`~gfapy.lines.collections.Collections.dovetails` property is a list of
+all edges which represent dovetail overlaps (i.e. all L lines in GFA1 and a
+subset of the E lines in GFA2). The
+:attr:`~gfapy.lines.collections.Collections.containments` property is a list of
+all edges which represent containments (i.e. all C lines in GFA1 and a subset
+of the E lines in GFA2).
+
+.. doctest::
+
+ >>> gfa2.edges
+ []
+ >>> gfa2.dovetails
+ []
+ >>> gfa2.containments
+ []
+
+Paths are retrieved using the
+:attr:`~gfapy.lines.collections.Collections.paths` property. This list
+contains all P lines in GFA1 and all O lines in GFA2. Sets returns the list of
+all U lines in GFA2 (empty list in GFA1).
+
+.. doctest::
+
+ >>> gfa2.paths
+ []
+ >>> gfa2.sets
+ []
+
+The header contain metadata in a single or multiple lines. For ease of
+access to the header information, all its tags are summarized in a
+single line instance, which is retrieved using the
+:attr:`~gfapy.lines.headers.Headers.header` property. This list
+The :ref:`header` chapter of this manual explains more in
+detail, how to work with the header object.
+
+.. doctest::
+
+ >>> gfa2.header.TS
+ 100
+
+All lines which start by the string ``#`` are comments; they are handled in
+the :ref:`comments` chapter and are retrieved using the
+:attr:`~gfapy.lines.collections.Collections.comments` property.
+
+.. doctest::
+
+ >>> [str(line) for line in gfa1.comments]
+ ['# this is a comment']
+
+Custom lines are lines of GFA2 files which start
+with a non-standard record type. Gfapy provides basic built-in support
+for accessing the information in custom lines, and allows to define
+extensions for own record types for defining more advanced
+functionality (see the :ref:`custom_records` chapter).
+
+.. doctest::
+
+ >>> [str(line) for line in gfa2.custom_records]
+ ['Y\tcustom line', 'X\tcustom line']
+ >>> gfa2.custom_record_keys
+ ['Y', 'X']
+ >>> [str(line) for line in gfa2.custom_records_of_type('X')]
+ ['X\tcustom line']
+
+Line identifiers
+~~~~~~~~~~~~~~~~
+
+Some GFA lines have a mandatory or optional identifier field: segments and
+paths in GFA1, segments, gaps, edges, paths and sets in GFA2. A line of this
+type can be retrieved by identifier, using the method
+:func:`Gfa.line(ID) <gfapy.gfa.Gfa.line>` using the identifier as argument.
+
+.. doctest::
+
+ >>> str(gfa1.line('1'))
+ 'S\t1\t*'
+
+The GFA2 specification prescribes the exact namespace for the identifier
+(segments, paths, sets, edges and gaps identifier share the same namespace).
+The content of this namespace can be retrieved using the
+:attr:`~gfapy.lines.collections.Collections.names` property.
+The identifiers of single line types
+can be retrieved using the properties
+:attr:`~gfapy.lines.collections.Collections.segment_names`,
+:attr:`~gfapy.lines.collections.Collections.edge_names`,
+:attr:`~gfapy.lines.collections.Collections.gap_names`,
+:attr:`~gfapy.lines.collections.Collections.path_names` and
+:attr:`~gfapy.lines.collections.Collections.set_names`.
+
+.. doctest::
+
+ >>> g = gfapy.Gfa()
+ >>> g.add_line("S\tA\t100\t*")
+ >>> g.add_line("S\tB\t100\t*")
+ >>> g.add_line("S\tC\t100\t*")
+ >>> g.add_line("E\tb_c\tB+\tC+\t0\t10\t90\t100$\t*")
+ >>> g.add_line("O\tp1\tB+ C+")
+ >>> g.add_line("U\ts1\tA b_c g")
+ >>> g.add_line("G\tg\tA+\tB-\t1000\t*")
+ >>> g.names
+ ['B', 'C', 'A', 'b_c', 'g', 'p1', 's1']
+ >>> g.segment_names
+ ['B', 'C', 'A']
+ >>> g.path_names
+ ['p1']
+ >>> g.edge_names
+ ['b_c']
+ >>> g.gap_names
+ ['g']
+ >>> g.set_names
+ ['s1']
+
+The GFA1 specification does not handle the question of the namespace of
+identifiers explicitely. However, gfapy assumes and enforces
+a single namespace for segment, path names and the values of the ID tags
+of L and C lines. The content of this namespace can be found using
+:attr:`~gfapy.lines.collections.Collections.names` property.
+The identifiers of single line types
+can be retrieved using the properties
+:attr:`~gfapy.lines.collections.Collections.segment_names`,
+:attr:`~gfapy.lines.collections.Collections.edge_names`
+(ID tags of of links and containments) and
+:attr:`~gfapy.lines.collections.Collections.path_names`.
+For GFA1, the properties
+:attr:`~gfapy.lines.collections.Collections.gap_names`,
+:attr:`~gfapy.lines.collections.Collections.set_names`
+contain always empty lists.
+
+.. doctest::
+
+ >>> g = gfapy.Gfa()
+ >>> g.add_line("S\tA\t*")
+ >>> g.add_line("S\tB\t*")
+ >>> g.add_line("S\tC\t*")
+ >>> g.add_line("L\tB\t+\tC\t+\t*\tID:Z:b_c")
+ >>> g.add_line("P\tp1\tB+,C+\t*")
+ >>> g.names
+ ['B', 'C', 'A', 'b_c', 'p1']
+ >>> g.segment_names
+ ['B', 'C', 'A']
+ >>> g.path_names
+ ['p1']
+ >>> g.edge_names
+ ['b_c']
+ >>> g.gap_names
+ []
+ >>> g.set_names
+ []
+
+Identifiers of external sequences
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Fragments contain identifiers which refer to external sequences
+(not contained in the GFA file). According to the specification, the
+these identifiers are not part of the same namespace as the identifier
+of the GFA lines. They can be retrieved using the
+:attr:`~gfapy.lines.collections.Collections.external_names`
+property.
+
+.. doctest::
+
+ >>> g = gfapy.Gfa()
+ >>> g.add_line("S\tA\t100\t*")
+ >>> g.add_line("F\tA\tread1+\t10\t30\t0\t20$\t20M")
+ >>> g.external_names
+ ['read1']
+
+The method
+:func:`Gfa.fragments_for_external(external_ID) <gfapy.lines.finders.Finders.fragments_for_external>`
+retrieves all F lines with a specified external sequence identifier.
+
+.. doctest::
+
+ >>> f = g.fragments_for_external('read1')
+ >>> len(f)
+ 1
+ >>> str(f[0])
+ 'F\tA\tread1+\t10\t30\t0\t20$\t20M'
+
+Adding new lines
+~~~~~~~~~~~~~~~~
+
+New lines can be added to a Gfa instance using the
+:func:`Gfa.add_line(line) <gfapy.lines.creators.Creators.add_line>`
+method or its alias
+:func:`Gfa.append(line) <gfapy.lines.creators.Creators.append>`.
+The argument can be either a string
+describing a line with valid GFA syntax, or a :class:`~gfapy.line.line.Line`
+instance. If a string is added, a line instance is created and
+then added.
+
+.. doctest::
+
+ >>> g = gfapy.Gfa()
+ >>> g.add_line("S\tA\t*") #doctest: +ELLIPSIS
+ >>> g.segment_names
+ ['A']
+ >>> g.append("S\tB\t*") #doctest: +ELLIPSIS
+ >>> g.segment_names
+ ['B', 'A']
+
+Editing the lines
+~~~~~~~~~~~~~~~~~
+
+Accessing the information stored in the fields of a line instance is
+described in the :ref:`positional_fields` and :ref:`tags` chapters.
+
+In Gfapy, a line instance belonging to a Gfa instance is said
+to be *connected* to the Gfa instance. Direct editing the content of a connected
+line is only possible, for those fields which do not contain
+references to other lines. For more information on how to modify the content of
+the fields of connected line, see the :ref:`references` chapter.
+
+.. doctest::
+
+ >>> g = gfapy.Gfa()
+ >>> e = gfapy.Line("E\t*\tA+\tB-\t0\t10\t90\t100$\t*")
+ >>> e.sid1 = "C+"
+ >>> g.add_line(e) #doctest: +ELLIPSIS
+ >>> e.sid1 = "A+"
+ Traceback (most recent call last):
+ gfapy.error.RuntimeError: ...
+
+Removing lines
+~~~~~~~~~~~~~~
+
+Disconnecting a line from the Gfa instance is done using the
+:func:`Gfa.rm(line) <gfapy.lines.destructors.Destructors.rm>` method. The
+argument can be a line instance or the name of a line.
+
+In alternative, a line instance can also be disconnected using the
+`disconnect` method on it. Disconnecting a line
+may trigger other operations, such as the disconnection of other lines (see the
+:ref:`references` chapter).
+
+.. doctest::
+
+ >>> g = gfapy.Gfa()
+ >>> g.add_line("S\tA\t*") #doctest: +ELLIPSIS
+ >>> g.segment_names
+ ['A']
+ >>> g.rm('A') #doctest: +ELLIPSIS
+ >>> g.segment_names
+ []
+ >>> g.append("S\tB\t*") #doctest: +ELLIPSIS
+ >>> g.segment_names
+ ['B']
+ >>> b = g.line('B')
+ >>> b.disconnect()
+ >>> g.segment_names
+ []
+
+Renaming lines
+~~~~~~~~~~~~~~
+
+Lines with an identifier can be renamed. This is done simply by editing
+the corresponding field (such as ``name`` or ``sid`` for a segment).
+This field is not a reference to another line and can be freely edited
+also in line instances connected to a Gfa. All references to the line
+from other lines will still be up to date, as they will refer to the
+same instance (whose name has been changed) and their string
+representation will use the new name.
+
+.. doctest::
+
+ >>> g = gfapy.Gfa()
+ >>> g.add_line("S\tA\t*") #doctest: +ELLIPSIS
+ >>> g.add_line("L\tA\t+\tB\t-\t*") #doctest: +ELLIPSIS
+ >>> g.segment_names
+ ['B', 'A']
+ >>> g.dovetails[0].from_name
+ 'A'
+ >>> g.segment('A').name = 'C'
+ >>> g.segment_names
+ ['B', 'C']
+ >>> g.dovetails[0].from_name
+ 'C'
diff --git a/doc/tutorial/graph_operations.rst b/doc/tutorial/graph_operations.rst
new file mode 100644
index 0000000..6cc2a8d
--- /dev/null
+++ b/doc/tutorial/graph_operations.rst
@@ -0,0 +1,14 @@
+.. _graph_operations:
+
+Graph operations
+----------------
+
+Graph operations such as linear paths merging, multiplication of
+segments and other are provided. These operations are implemented
+in analogy to those provided by the Ruby library RGFA. As RGFA only
+handles GFA1 graphs, only dovetail overlaps are considered as
+connections. A detailed description of the operation can be
+found in Gonnella and Kurtz (2016). More information about the
+single operations are found in the method documentation of the
+submodules of `GraphOperations`.
+
diff --git a/doc/tutorial/header.rst b/doc/tutorial/header.rst
new file mode 100644
index 0000000..b49f49e
--- /dev/null
+++ b/doc/tutorial/header.rst
@@ -0,0 +1,169 @@
+.. testsetup:: *
+
+ import gfapy
+ gfa = gfapy.Gfa()
+
+.. _header:
+
+The Header
+----------
+
+GFA files may contain one or multiple header lines (record type: "H"). These
+lines may be present in any part of the file, not necessarily at the beginning.
+
+Although the header may consist of multiple lines, its content refers to the
+whole file. Therefore in Gfapy the header is accessed using a single line
+instance (accessible by the :attr:`~gfapy.lines.headers.Headers.header`
+property). Header lines contain only tags. If not header line is present in the
+Gfa, then the header line object will be empty (i.e. contain no tags).
+
+Note that header lines cannot be connected to the Gfa as other lines (i.e.
+calling :meth:`~gfapy.line.common.connection.Connection.connect` on them raises
+an exception). Instead they must be merged to the existing Gfa header, using
+`add_line` on the Gfa instance.
+
+.. doctest::
+
+ >>> gfa.add_line("H\tnn:f:1.0") #doctest: +ELLIPSIS
+ >>> gfa.header.nn
+ 1.0
+ >>> gfapy.Line("H\tnn:f:1.0").connect(gfa)
+ Traceback (most recent call last):
+ ...
+ gfapy.error.RuntimeError: ...
+
+Multiple definitions of the predefined header tags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For the predefined tags (``VN`` and ``TS``), the presence of multiple
+values in different lines is an error, unless the value is the same in
+each instance (in which case the repeated definitions are ignored).
+
+.. doctest::
+
+ >>> gfa.add_line("H\tVN:Z:1.0") #doctest: +ELLIPSIS
+ >>> gfa.add_line("H\tVN:Z:1.0") # ignored #doctest: +ELLIPSIS
+ >>> gfa.add_line("H\tVN:Z:2.0")
+ Traceback (most recent call last):
+ ...
+ gfapy.error.VersionError: ...
+
+Multiple definitions of custom header tags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If the tags are present only once in the header in its entirety, the access to
+the tags is the same as for any other line (see the :ref:`tags` chapter).
+
+However, the specification does not forbid custom tags to be defined with
+different values in different header lines (which we name "multi-definition
+tags"). This particular case is handled in the next sections.
+
+Reading multi-definitions tags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Reading, validating and setting the datatype of multi-definition tags is done
+using the same methods as for all other lines (see the :ref:`tags` chapter).
+However, if a tag is defined multiple times on multiple H lines, reading the
+tag will return a list of the values on the lines. This array is an instance of
+the subclass ``gfapy.FieldArray`` of list.
+
+.. doctest::
+
+ >>> gfa.add_line("H\txx:i:1") #doctest: +ELLIPSIS
+ >>> gfa.add_line("H\txx:i:2") #doctest: +ELLIPSIS
+ >>> gfa.add_line("H\txx:i:3") #doctest: +ELLIPSIS
+ >>> gfa.header.xx
+ gfapy.FieldArray('i',[1, 2, 3])
+
+Setting tags
+~~~~~~~~~~~~
+
+There are two possibilities to set a tag for the header. The first is
+the normal tag interface (using ``set`` or the tag name property). The
+second is to use ``add``. The latter supports multi-definition tags,
+i.e. it adds the value to the previous ones (if any), instead of
+overwriting them.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.header.xx
+ >>> gfa.header.add("xx", 1)
+ >>> gfa.header.xx
+ 1
+ >>> gfa.header.add("xx", 2)
+ >>> gfa.header.xx
+ gfapy.FieldArray('i',[1, 2])
+ >>> gfa.header.set("xx", 3)
+ >>> gfa.header.xx
+ 3
+
+Modifying field array values
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Field arrays can be modified directly (e.g. adding new values or
+removing some values). After modification, the user may check if the
+array values remain compatible with the datatype of the tag using the
+:meth:`~gfapy.line.common.validate.Validate.validate_field`` method.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.header.xx = gfapy.FieldArray('i',[1,2,3])
+ >>> gfa.header.xx
+ gfapy.FieldArray('i',[1, 2, 3])
+ >>> gfa.header.validate_field("xx")
+ >>> gfa.header.xx.append("X")
+ >>> gfa.header.validate_field("xx")
+ Traceback (most recent call last):
+ ...
+ gfapy.error.FormatError: ...
+
+If the field array is modified using array methods which return a list
+or data of any other type, a field array must be constructed, setting
+its datatype to the value returned by calling
+:meth:`~gfapy.line.common.field_datatype.FieldDatatype.get_datatype`
+on the header.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.header.xx = gfapy.FieldArray('i',[1,2,3])
+ >>> gfa.header.xx
+ gfapy.FieldArray('i',[1, 2, 3])
+ >>> gfa.header.xx = gfapy.FieldArray(gfa.header.get_datatype("xx"),
+ ... list(map(lambda x: x+1, gfa.header.xx)))
+ >>> gfa.header.xx
+ gfapy.FieldArray('i',[2, 3, 4])
+
+String representation of the header
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For consinstency with other line types, the string representation of the header
+is a single-line string, eventually non standard-compliant, if it contains
+multiple instances of the tag. (and when calling
+:meth:`~gfapy.line.common.writer.Writer.field_to_s` for a tag present multiple
+times, the output string will contain the instances of the tag, separated by
+tabs).
+
+However, when the Gfa is output to file or string, the header is splitted into
+multiple H lines with single tags, so that standard-compliant GFA is output.
+The splitted header can be retrieved using the
+:attr:`~gfapy.lines.headers.Headers.headers` property of the Gfa instance.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.header.VN = "1.0"
+ >>> gfa.header.xx = gfapy.FieldArray('i',[1,2])
+ >>> gfa.header.field_to_s("xx")
+ '1\t2'
+ >>> gfa.header.field_to_s("xx", tag=True)
+ 'xx:i:1\txx:i:2'
+ >>> str(gfa.header)
+ 'H\tVN:Z:1.0\txx:i:1\txx:i:2'
+ >>> [str(h) for h in gfa.headers]
+ ['H\tVN:Z:1.0', 'H\txx:i:1', 'H\txx:i:2']
+ >>> str(gfa)
+ 'H\tVN:Z:1.0\nH\txx:i:1\nH\txx:i:2'
+
diff --git a/doc/tutorial/placeholders.rst b/doc/tutorial/placeholders.rst
new file mode 100644
index 0000000..fd379c7
--- /dev/null
+++ b/doc/tutorial/placeholders.rst
@@ -0,0 +1,69 @@
+.. testsetup:: *
+
+ import gfapy
+
+.. _placeholders:
+
+Placeholders
+------------
+
+Some positional fields may contain an undefined value S: ``sequence``;
+L/C: ``overlap``; P: ``overlaps``; E: ``eid``, ``alignment``; F:
+``alignment``; G: ``gid``, ``var``; U/O: ``pid``. In GFA this value is
+represented by a ``*``.
+
+In Gfapy the class `Placeholder` represent the undefined value.
+
+Distinguishing placeholders
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The :func:`gfapy.is_placeholder() <gfapy.placeholder.is_placeholder>` method
+allows to check if a value is a placeholder; a value is a placeholder if
+it is a `Placeholder` instance, or would represent
+a placeholder in GFA (a string containing ``*``), or would be represented
+by a placeholder in GFA (e.g. an empty array).
+
+.. doctest::
+
+ >>> gfapy.is_placeholder("*")
+ True
+ >>> gfapy.is_placeholder("**")
+ False
+ >>> gfapy.is_placeholder([])
+ True
+ >>> gfapy.is_placeholder(gfapy.Placeholder())
+ True
+
+Note that, as a placeholder is ``False`` in boolean context, just a
+``if not placeholder`` will also work, if the value is an instance
+of `Placeholder`, but not always for the other cases (in particular not
+for the string representation ``*``).
+Therefore using
+:func:`gfapy.is_placeholder() <gfapy.placeholder.is_placeholder>`
+is better.
+
+.. doctest::
+
+ >>> if "*": print('* is not a placeholder')
+ * is not a placeholder
+ >>> if gfapy.is_placeholder("*"): print('but it represents a placeholder')
+ but it represents a placeholder
+
+Compatibility methods
+~~~~~~~~~~~~~~~~~~~~~
+
+Some methods are defined for placeholders, which allow them to respond
+to the same methods as defined values. This allows to write generic
+code.
+
+.. doctest::
+
+ >>> placeholder = gfapy.Placeholder()
+ >>> placeholder.validate() # does nothing
+ >>> len(placeholder)
+ 0
+ >>> placeholder[1]
+ gfapy.Placeholder()
+ >>> placeholder + 1
+ gfapy.Placeholder()
+
diff --git a/doc/tutorial/positional_fields.rst b/doc/tutorial/positional_fields.rst
new file mode 100644
index 0000000..329f2a8
--- /dev/null
+++ b/doc/tutorial/positional_fields.rst
@@ -0,0 +1,448 @@
+.. testsetup:: *
+
+ import gfapy
+ gfa = gfapy.Gfa()
+
+.. _positional_fields:
+
+Positional fields
+-----------------
+
+Most lines in GFA have positional fields (Headers are an exception).
+During parsing, if a line is encountered, which has too less or too many
+positional fields, an exception will be thrown. The correct number of
+positional fields is record type-specific.
+
+Positional fields are recognized by its position in the line. Each
+positional field has an implicit field name and datatype associated with
+it.
+
+Field names
+~~~~~~~~~~~
+
+The field names are derived from the specification. Lower case versions
+of the field names are used and spaces are subsituted with underscores.
+In some cases, the field names were changed, as they represent keywords
+in common programming languages (``from``, ``send``).
+
+The following tables shows the field names used in Gfapy, for each kind
+of line. Headers have no positional fields. Comments and custom records
+follow particular rules, see the respective chapters (:ref:`comments` and
+:ref:`custom_records`).
+
+GFA1 field names
+^^^^^^^^^^^^^^^^
+
++---------------+--------------------+---------------------+------------------+-----------------+---------------+---------------+
+| Record Type | Field 1 | Field 2 | Field 3 | Field 4 | Field 5 | Field 6 |
++===============+====================+=====================+==================+=================+===============+===============+
+| Segment | ``name`` | ``sequence`` | | | | |
++---------------+--------------------+---------------------+------------------+-----------------+---------------+---------------+
+| Link | ``from_segment`` | ``from_orient`` | ``to_segment`` | ``to_orient`` | ``overlap`` | |
++---------------+--------------------+---------------------+------------------+-----------------+---------------+---------------+
+| Containment | ``from_segment`` | ``from_orient`` | ``to_segment`` | ``to_orient`` | ``pos`` | ``overlap`` |
++---------------+--------------------+---------------------+------------------+-----------------+---------------+---------------+
+| Path | ``path_name`` | ``segment_names`` | ``overlaps`` | | | |
++---------------+--------------------+---------------------+------------------+-----------------+---------------+---------------+
+
+GFA2 field names
+^^^^^^^^^^^^^^^^
+
++---------------+-----------+----------------+----------------+-------------+-------------+-------------+-----------------+-----------------+
+| Record Type | Field 1 | Field 2 | Field 3 | Field 4 | Field 5 | Field 6 | Field 7 | Field 8 |
++===============+===========+================+================+=============+=============+=============+=================+=================+
+| Segment | ``sid`` | ``slen`` | ``sequence`` | | | | | |
++---------------+-----------+----------------+----------------+-------------+-------------+-------------+-----------------+-----------------+
+| Edge | ``eid`` | ``sid1`` | ``sid2`` | ``beg1`` | ``end1`` | ``beg2`` | ``end2`` | ``alignment`` |
++---------------+-----------+----------------+----------------+-------------+-------------+-------------+-----------------+-----------------+
+| Fragment | ``sid`` | ``external`` | ``s_beg`` | ``s_end`` | ``f_beg`` | ``f_end`` | ``alignment`` | |
++---------------+-----------+----------------+----------------+-------------+-------------+-------------+-----------------+-----------------+
+| Gap | ``gid`` | ``sid1`` | ``d1`` | ``d2`` | ``sid2`` | ``disp`` | ``var`` | |
++---------------+-----------+----------------+----------------+-------------+-------------+-------------+-----------------+-----------------+
+| Set | ``pid`` | ``items`` | | | | | | |
++---------------+-----------+----------------+----------------+-------------+-------------+-------------+-----------------+-----------------+
+| Path | ``pid`` | ``items`` | | | | | | |
++---------------+-----------+----------------+----------------+-------------+-------------+-------------+-----------------+-----------------+
+
+Datatypes
+~~~~~~~~~
+
+The datatype of each positional field is described in the specification
+and cannot be changed (differently from tags). Here is a short
+description of the Python classes used to represent data for different
+datatypes.
+
+Placeholders
+^^^^^^^^^^^^
+
+The positional fields in GFA can never be empty. However, there are some
+fields with optional values. If a value is not specified, a placeholder
+character is used instead (``*``). Such undefined values are represented
+in Gfapy by the `Placeholder` class, which is described more in
+detail in the :ref:`placeholders` chapter.
+
+Arrays
+^^^^^^
+
+The ``items`` field in unordered and ordered groups and the
+``segment_names`` and ``overlaps`` fields in paths are lists of objects
+and are represented by list instances.
+
+.. doctest::
+
+ >>> set = gfapy.Line("U\t*\t1 A 2")
+ >>> type(set.items)
+ <class 'list'>
+ >>> gfa2_path = gfapy.Line("O\t*\tA+ B-")
+ >>> type(gfa2_path.items)
+ <class 'list'>
+ >>> gfa1_path = gfapy.Line("P\tp1\tA+,B-\t10M,9M1D1M")
+ >>> type(gfa1_path.segment_names)
+ <class 'list'>
+ >>> type(gfa1_path.overlaps)
+ <class 'list'>
+
+Orientations
+^^^^^^^^^^^^
+
+Orientations are represented by strings. The ``gfapy.invert()`` method
+applied to an orientation string returns the other orientation.
+
+.. doctest::
+
+ >>> gfapy.invert("+")
+ '-'
+ >>> gfapy.invert("-")
+ '+'
+
+Identifiers
+^^^^^^^^^^^
+
+The identifier of the line itself (available for S, P, E, G, U, O lines)
+can always be accessed in Gfapy using the ``name`` alias and is
+represented in Gfapy by a string. If it is optional (E, G, U, O lines)
+and not specified, it is represented by a Placeholder instance. The
+fragment identifier is also a string.
+
+Identifiers which refer to other lines are also present in some line
+types (L, C, E, G, U, O, F). These are never placeholders and in
+stand-alone lines are represented by strings. In connected lines they
+are references to the Line instances to which they refer to (see the
+:ref:`references` chapter).
+
+Oriented identifiers
+^^^^^^^^^^^^^^^^^^^^
+
+Oriented identifiers (e.g. ``segment_names`` in GFA1 paths) are
+represented by elements of the class ``gfapy.OrientedLine``. The
+``segment`` method of the oriented segments returns the segment
+identifier (or segment reference in connected path lines) and the
+``orient`` method returns the orientation string. The ``name`` method
+returns the string of the segment, even if this is a reference to a
+segment. A new oriented line can be created using the
+``OL[line, orientation]`` method.
+
+Calling ``invert`` returns an oriented segment, with inverted
+orientation. To set the two attributes the methods ``segment=`` and
+``orient=`` are available.
+
+Examples:
+
+.. doctest::
+
+ >>> p = gfapy.Line("P\tP1\ta+,b-\t*")
+ >>> p.segment_names
+ [gfapy.OrientedLine('a','+'), gfapy.OrientedLine('b','-')]
+ >>> sn0 = p.segment_names[0]
+ >>> sn0.line
+ 'a'
+ >>> sn0.name
+ 'a'
+ >>> sn0.orient
+ '+'
+ >>> sn0.invert()
+ >>> sn0
+ gfapy.OrientedLine('a','-')
+ >>> sn0.orient
+ '-'
+ >>> sn0.line = gfapy.Line('S\tX\t*')
+ >>> str(sn0)
+ 'X-'
+ >>> sn0.name
+ 'X'
+ >>> sn0 = gfapy.OrientedLine(gfapy.Line('S\tY\t*'), '+')
+
+Sequences
+^^^^^^^^^
+
+Sequences (S field sequence) are represented by strings in Gfapy.
+Depending on the GFA version, the alphabet definition is more or less
+restrictive. The definitions are correctly applied by the validation
+methods.
+
+The method ``rc()`` is provided to compute the reverse complement of a
+nucleotidic sequence. The extended IUPAC alphabet is understood by the
+method. Applied to non nucleotidic sequences, the results will be
+meaningless:
+
+.. doctest::
+
+ >>> from gfapy.sequence import rc
+ >>> rc("gcat")
+ 'atgc'
+ >>> rc("*")
+ '*'
+ >>> rc("yatc")
+ 'gatr'
+ >>> rc("gCat")
+ 'atGc'
+ >>> rc("cag", rna=True)
+ 'cug'
+
+Integers and positions
+^^^^^^^^^^^^^^^^^^^^^^
+
+The C lines ``pos`` field and the G lines ``disp`` and ``var`` fields
+are represented by integers. The ``var`` field is optional, and thus can
+be also a placeholder. Positions are 0-based coordinates.
+
+The position fields of GFA2 E lines (``beg1, beg2, end1, end2``) and F
+lines (``s_beg, s_end, f_beg, f_end``) contain a dollar string as suffix
+if the position is equal to the segment length. For more information,
+see the :ref:`positions` chapter.
+
+Alignments
+^^^^^^^^^^
+
+Alignments are always optional, ie they can be placeholders. If they are
+specified they are CIGAR alignments or, only in GFA2, trace alignments.
+For more details, see the :ref:`alignments` chapter.
+
+GFA1 datatypes
+^^^^^^^^^^^^^^
+
++------------------------+---------------+--------------------------------+
+| Datatype | Record Type | Fields |
++========================+===============+================================+
+| Identifier | Segment | ``name`` |
++------------------------+---------------+--------------------------------+
+| | Path | ``path_name`` |
++------------------------+---------------+--------------------------------+
+| | Link | ``from_segment, to_segment`` |
++------------------------+---------------+--------------------------------+
+| | Containment | ``from_segment, to_segment`` |
++------------------------+---------------+--------------------------------+
+| [OrientedIdentifier] | Path | ``segment_names`` |
++------------------------+---------------+--------------------------------+
+| Orientation | Link | ``from_orient, to_orient`` |
++------------------------+---------------+--------------------------------+
+| | Containment | ``from_orient, to_orient`` |
++------------------------+---------------+--------------------------------+
+| Sequence | Segment | ``sequence`` |
++------------------------+---------------+--------------------------------+
+| Alignment | Link | ``overlap`` |
++------------------------+---------------+--------------------------------+
+| | Containment | ``overlap`` |
++------------------------+---------------+--------------------------------+
+| [Alignment] | Path | ``overlaps`` |
++------------------------+---------------+--------------------------------+
+| Position | Containment | ``pos`` |
++------------------------+---------------+--------------------------------+
+
+GFA2 datatypes
+^^^^^^^^^^^^^^
+
++------------------------+---------------+----------------------------------+
+| Datatype | Record Type | Fields |
++========================+===============+==================================+
+| Itentifier | Segment | ``sid`` |
++------------------------+---------------+----------------------------------+
+| | Fragment | ``sid`` |
++------------------------+---------------+----------------------------------+
+| OrientedIdentifier | Edge | ``sid1, sid2`` |
++------------------------+---------------+----------------------------------+
+| | Gap | ``sid1, sid2`` |
++------------------------+---------------+----------------------------------+
+| | Fragment | ``external`` |
++------------------------+---------------+----------------------------------+
+| OptionalIdentifier | Edge | ``eid`` |
++------------------------+---------------+----------------------------------+
+| | Gap | ``gid`` |
++------------------------+---------------+----------------------------------+
+| | U Group | ``oid`` |
++------------------------+---------------+----------------------------------+
+| | O Group | ``uid`` |
++------------------------+---------------+----------------------------------+
+| [Identifier] | U Group | ``items`` |
++------------------------+---------------+----------------------------------+
+| [OrientedIdentifier] | O Group | ``items`` |
++------------------------+---------------+----------------------------------+
+| Sequence | Segment | ``sequence`` |
++------------------------+---------------+----------------------------------+
+| Alignment | Edge | ``alignment`` |
++------------------------+---------------+----------------------------------+
+| | Fragment | ``alignment`` |
++------------------------+---------------+----------------------------------+
+| Position | Edge | ``beg1, end1, beg2, end2`` |
++------------------------+---------------+----------------------------------+
+| | Fragment | ``s_beg, s_end, f_beg, f_end`` |
++------------------------+---------------+----------------------------------+
+| Integer | Gap | ``disp, var`` |
++------------------------+---------------+----------------------------------+
+
+Reading and writing positional fields
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``positional_fieldnames`` method returns the list of the names (as
+strings) of the positional fields of a line. The positional fields can
+be read using a method on the Gfapy line object, which is called as the
+field name. Setting the value is done with an equal sign version of the
+field name method (e.g. segment.slen = 120). In alternative, the
+``set(fieldname, value)`` and ``get(fieldname)`` methods can also be
+used.
+
+.. doctest::
+
+ >>> s_gfa1 = gfapy.Line("S\t1\t*")
+ >>> s_gfa1.positional_fieldnames
+ ['name', 'sequence']
+ >>> s_gfa1.name
+ '1'
+ >>> s_gfa1.get("name")
+ '1'
+ >>> s_gfa1.name = "segment2"
+ >>> s_gfa1.name
+ 'segment2'
+ >>> s_gfa1.set('name',"3")
+ >>> s_gfa1.name
+ '3'
+
+When a field is read, the value is converted into an appropriate object.
+The string representation of a field can be read using the
+``field_to_s(fieldname)`` method.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.add_line("S\ts1\t*")
+ >>> gfa.add_line("L\ts1\t+\ts2\t-\t*")
+ >>> link = gfa.dovetails[0]
+ >>> str(link.from_segment)
+ 'S\ts1\t*'
+ >>> link.field_to_s('from_segment')
+ 's1'
+
+When setting a non-string field, the user can specify the value of a tag
+either as a Python non-string object, or as the string representation of
+the value.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa(version='gfa1')
+ >>> gfa.add_line("C\ta\t+\tb\t-\t10\t*")
+ >>> c = gfa.containments[0]
+ >>> c.pos
+ 10
+ >>> c.pos = 1
+ >>> c.pos
+ 1
+ >>> c.pos = "2"
+ >>> c.pos
+ 2
+ >>> c.field_to_s("pos")
+ '2'
+
+Note that setting the value of reference and backreferences-related
+fields is generally not allowed, when a line instance is connected to a
+Gfa object (see the :ref:`references` chapter).
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa(version='gfa1')
+ >>> l = gfapy.Line("L\ts1\t+\ts2\t-\t*")
+ >>> l.from_name
+ 's1'
+ >>> l.from_segment = "s3"
+ >>> l.from_name
+ 's3'
+ >>> gfa.add_line(l)
+ >>> l.from_segment = "s4"
+ Traceback (most recent call last):
+ ...
+ gfapy.error.RuntimeError: ...
+
+Validation
+~~~~~~~~~~
+
+The content of all positional fields must be a correctly formatted
+string according to the rules given in the GFA specifications (or a
+Python object whose string representation is a correctly formatted
+string).
+
+Depending on the validation level, more or less checks are done
+automatically (see the :ref:`validation` chapter). Not regarding which
+validation level is selected, the user can trigger a manual validation
+using the ``validate_field(fieldname)`` method for a single field, or
+using ``validate``, which does a full validation on the whole line,
+including all positional fields.
+
+.. doctest::
+
+ >>> line = gfapy.Line("H\txx:i:1")
+ >>> line.validate_field("xx")
+ >>> line.validate()
+
+Aliases
+~~~~~~~
+
+For some fields, aliases are defined, which can be used in all contexts
+where the original field name is used (i.e. as parameter of a method,
+and the same setter and getter methods defined for the original field
+name are also defined for each alias, see below).
+
+.. doctest::
+
+ >>> gfa1_path = gfapy.Line("P\tX\t1-,2+,3+\t*")
+ >>> gfa1_path.name == gfa1_path.path_name
+ True
+ >>> edge = gfapy.Line("E\t*\tA+\tB-\t0\t10\t90\t100$\t*")
+ >>> edge.eid == edge.name
+ True
+ >>> containment = gfapy.Line("C\tA\t+\tB\t-\t10\t*")
+ >>> containment.from_segment == containment.container
+ True
+ >>> segment = gfapy.Line("S\t1\t*")
+ >>> segment.sid == segment.name
+ True
+ >>> segment.sid
+ '1'
+ >>> segment.name = '2'
+ >>> segment.sid
+ '2'
+
+Name
+^^^^
+
+Different record types have an identifier field: segments (name in GFA1,
+sid in GFA2), paths (path\_name), edge (eid), fragment (sid), gap (gid),
+groups (pid).
+
+All these fields are aliased to ``name``. This allows the user for
+example to set the identifier of a line using the ``name=(value)``
+method using the same syntax for different record types (segments,
+edges, paths, fragments, gaps and groups).
+
+Version-specific field names
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For segments the GFA1 name and the GFA2 sid are equivalent fields. For
+this reason an alias ``sid`` is defined for GFA1 segments and ``name``
+for GFA2 segments.
+
+Crypical field names
+^^^^^^^^^^^^^^^^^^^^
+
+The definition of from and to for containments is somewhat cryptical.
+Therefore following aliases have been defined for containments:
+container[\_orient] for from[\_\|segment\|orient]; contained[\_orient]
+for to[\_segment\|orient].
diff --git a/doc/tutorial/positions.rst b/doc/tutorial/positions.rst
new file mode 100644
index 0000000..8fb0f3b
--- /dev/null
+++ b/doc/tutorial/positions.rst
@@ -0,0 +1,75 @@
+.. testsetup:: *
+
+ import gfapy
+
+.. _positions:
+
+Positions
+---------
+
+The only position field in GFA1 is the ``pos`` field in the C lines.
+This represents the starting position of the contained segment in the
+container segment and is 0-based.
+
+Some fields in GFA2 E lines (``beg1, beg2, end1, end2``) and F lines
+(``s_beg, s_end, f_beg, f_end``) are positions. According to the
+specification, they are 0-based and represent virtual ticks before and
+after each string in the sequence. Thus ranges are represented similarly
+to the Python range conventions: e.g. a 1-character prefix of a sequence
+will have begin 0 and end 1.
+
+Last positions in GFA2
+~~~~~~~~~~~~~~~~~~~~~~
+
+The GFA2 positions must contain an additional string (``$``) appended to
+the integer, if (and only if) they are the last position in the segment
+sequence. These particular positions are represented in Gfapy as
+instances of the class :class:`~gfapy.lastpos.LastPos`.
+
+To create a lastpos instance, the constructor can be used with an
+integer, or the string representation (which must end with the dollar
+sign, otherwise an integer is returned):
+
+.. doctest::
+
+ >>> str(gfapy.LastPos(12))
+ '12$'
+ >>> gfapy.LastPos("12")
+ 12
+ >>> str(gfapy.LastPos("12"))
+ '12'
+ >>> gfapy.LastPos("12$")
+ gfapy.LastPos(12)
+ >>> str(gfapy.LastPos("12$"))
+ '12$'
+
+Subtracting an integer from a lastpos returns a lastpos if 0 subtracted,
+an integer otherwise. This allows to do some arithmetic on positions
+without making them invalid.
+
+.. doctest::
+
+ >>> gfapy.LastPos(12) - 0
+ gfapy.LastPos(12)
+ >>> gfapy.LastPos(12) - 1
+ 11
+
+The functions :func:`~gfapy.lastpos.islastpos` and
+:func:`~gfapy.lastpos.isfirstpos` allow to
+determine if a position value is 0 (first), or the last position, using
+the same syntax for lastpos and integer instances.
+
+.. doctest::
+
+ >>> gfapy.isfirstpos(0)
+ True
+ >>> gfapy.islastpos(0)
+ False
+ >>> gfapy.isfirstpos(12)
+ False
+ >>> gfapy.islastpos(12)
+ False
+ >>> gfapy.islastpos(gfapy.LastPos("12"))
+ False
+ >>> gfapy.islastpos(gfapy.LastPos("12$"))
+ True
diff --git a/doc/tutorial/references.rst b/doc/tutorial/references.rst
new file mode 100644
index 0000000..f1beab7
--- /dev/null
+++ b/doc/tutorial/references.rst
@@ -0,0 +1,443 @@
+.. testsetup:: *
+
+ import gfapy
+ gfa = gfapy.Gfa()
+
+.. _references:
+
+References
+----------
+
+Some fields in GFA lines contain identifiers or lists of identifiers
+(sometimes followed by orientation strings), which reference other lines
+of the GFA file. In Gfapy it is possible to follow these references and
+traverse the graph.
+
+Connecting a line to a Gfa object
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In stand-alone line instances, the identifiers which reference other
+lines are either strings containing the line name, pairs of strings
+(name and orientation) in a ``gfapy.OrientedLine`` object, or lists of
+lines names or ``gfapy.OrientedLine`` objects.
+
+Using the ``add_line(line)`` (alias: ``append(line)``) method of the
+``gfapy.Gfa`` object, or the equivalent ``connect(gfa)`` method of the
+gfapy.Line instance, a line is added to a Gfa instance (this is done
+automatically when a GFA file is parsed). All strings expressing
+references are then changed into references to the corresponding line
+objects. The method ``is_connected()`` allows to determine if a line is
+connected to an gfapy instance. The read-only property ``gfa`` contains
+the ``gfapy.Gfa`` instance to which the line is connected.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa(version='gfa1')
+ >>> link = gfapy.Line("L\tA\t-\tB\t+\t20M")
+ >>> link.is_connected()
+ False
+ >>> link.gfa is None
+ True
+ >>> type(link.from_segment)
+ <class 'str'>
+ >>> gfa.append(link)
+ >>> link.is_connected()
+ True
+ >>> link.gfa #doctest: +ELLIPSIS
+ <gfapy.gfa.Gfa object at ...>
+ >>> type(link.from_segment)
+ <class 'gfapy.line.segment.gfa1.GFA1'>
+
+References for each record type
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following tables describes the references contained in each record
+type. The notation ``[]`` represent lists.
+
+GFA1
+^^^^
+
++---------------+-------------------+---------------------------+
+| Record type | Fields | Type of reference |
++===============+===================+===========================+
+| Link | from, to | Segment |
++---------------+-------------------+---------------------------+
+| Containment | from, to | Segment |
++---------------+-------------------+---------------------------+
+| Path | segment\_names, | [OrientedLine(Segment)] |
++---------------+-------------------+---------------------------+
+| | links (1) | [OrientedLine(Link)] |
++---------------+-------------------+---------------------------+
+
+(1): paths contain information in the fields segment\_names and
+overlaps, which allow to find the identify from which they depend; these
+links can be retrieved using ``links`` (which is not a field).
+
+GFA2
+^^^^
+
++---------------+--------------+------------------------------------+
+| Record type | Fields | Type of reference |
++===============+==============+====================================+
+| Edge | sid1, sid2 | Segment |
++---------------+--------------+------------------------------------+
+| Gap | sid1, sid2 | Segment |
++---------------+--------------+------------------------------------+
+| Fragment | sid | Segment |
++---------------+--------------+------------------------------------+
+| Set | items | [Edge/Set/Path/Segment] |
++---------------+--------------+------------------------------------+
+| Path | items | [OrientedLine(Edge/Set/Segment)] |
++---------------+--------------+------------------------------------+
+
+Backreferences for each record type
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a line containing a reference to another line is connected to a Gfa
+object, backreferences to it are created in the targeted line.
+
+For each backreference collection a read-only property exist, which is
+named as the collection (e.g. ``dovetails_L`` for segments). Note that
+the reference list returned by these arrays are read-only and editing
+the references is done using other methods (see the section "Editing
+reference fields" below).
+
+.. code:: python
+
+ segment.dovetails_L # => [gfapy.line.edge.Link(...), ...]
+
+The following tables describe the backreferences collections for each
+record type.
+
+GFA1
+^^^^
+
++---------------+-------------------------+
+| Record type | Backreferences |
++===============+=========================+
+| Segment | dovetails\_L |
++---------------+-------------------------+
+| | dovetails\_R |
++---------------+-------------------------+
+| | edges\_to\_contained |
++---------------+-------------------------+
+| | edges\_to\_containers |
++---------------+-------------------------+
+| | paths |
++---------------+-------------------------+
+| Link | paths |
++---------------+-------------------------+
+
+GFA2
+^^^^
+
++---------------+-------------------------+--------+
+| Record type | Backreferences | Type |
++===============+=========================+========+
+| Segment | dovetails\_L | E |
++---------------+-------------------------+--------+
+| | dovetails\_R | E |
++---------------+-------------------------+--------+
+| | edges\_to\_contained | E |
++---------------+-------------------------+--------+
+| | edges\_to\_containers | E |
++---------------+-------------------------+--------+
+| | internals | E |
++---------------+-------------------------+--------+
+| | gaps\_L | G |
++---------------+-------------------------+--------+
+| | gaps\_R | G |
++---------------+-------------------------+--------+
+| | fragments | F |
++---------------+-------------------------+--------+
+| | paths | O |
++---------------+-------------------------+--------+
+| | sets | U |
++---------------+-------------------------+--------+
+| Edge | paths | O |
++---------------+-------------------------+--------+
+| | sets | U |
++---------------+-------------------------+--------+
+| O Group | paths | O |
++---------------+-------------------------+--------+
+| | sets | U |
++---------------+-------------------------+--------+
+| U Group | sets | U |
++---------------+-------------------------+--------+
+
+Segment backreference convenience methods
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For segments, additional methods are available which combine in
+different way the backreferences information. The
+`dovetails_of_end` and `gaps_of_end` methods take an
+argument ``L`` or ``R`` and return the dovetails overlaps (or gaps) of the
+left or, respectively, right end of the segment sequence
+(equivalent to the segment properties ``dovetails_L``/``dovetails_R`` and
+``gaps_L``/``gaps_R``).
+
+The segment ``containments`` property is a list of both containments where the
+segment is the container or the contained segment. The segment ``edges``
+property is a list of all edges (dovetails, containments and internals)
+with a reference to the segment.
+
+Other methods directly compute list of segments from the edges lists
+mentioned above. The ``neighbours_L``, ``neighbours_R`` properties and
+the `neighbours` method compute the set of segment instances which are
+connected by dovetails to the segment.
+The ``containers`` and ``contained``
+properties similarly compute the set of segment instances which,
+respectively, contains the segment, or are contained in the segment.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.append('S\tA\t*')
+ >>> s = gfa.segment('A')
+ >>> gfa.append('S\tB\t*')
+ >>> gfa.append('S\tC\t*')
+ >>> gfa.append('L\tA\t-\tB\t+\t*')
+ >>> gfa.append('C\tA\t+\tC\t+\t10\t*')
+ >>> [str(l) for l in s.dovetails_of_end("L")]
+ ['L\tA\t-\tB\t+\t*']
+ >>> s.dovetails_L == s.dovetails_of_end("L")
+ True
+ >>> s.gaps_of_end("R")
+ []
+ >>> [str(e) for e in s.edges]
+ ['L\tA\t-\tB\t+\t*', 'C\tA\t+\tC\t+\t10\t*']
+ >>> [str(n) for n in s.neighbours_L]
+ ['S\tB\t*']
+ >>> s.containers
+ []
+ >>> [str(c) for c in s.contained]
+ ['S\tC\t*']
+
+Multiline group definitions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The GFA2 specification opens the possibility (experimental) to define
+groups on multiple lines, by using the same ID for each line defining
+the group. This is supported by gfapy.
+
+This means that if multiple `Ordered` or
+`Unordered` instances connected to a Gfa object have
+the same ``gid``, they are merged into a single instance (technically
+the last one getting added to the graph object). The items list are
+merged.
+
+The tags of multiple line defining a group shall not contradict each
+other (i.e. either are the tag names on different lines defining the
+group all different, or, if the same tag is present on different lines,
+the value and datatype must be the same, in which case the multiple
+definition will be ignored).
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.add_line("U\tu1\ts1 s2 s3")
+ >>> [s.name for s in gfa.sets[-1].items]
+ ['s1', 's2', 's3']
+ >>> gfa.add_line('U\tu1\t4 5')
+ >>> [s.name for s in gfa.sets[-1].items]
+ ['s1', 's2', 's3', '4', '5']
+
+Induced set and captured path
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The item list in GFA2 sets and paths may not contain elements which are
+implicitly involved. For example a path may contain segments, without
+specifying the edges connecting them, if there is only one such edge.
+Alternatively a path may contain edges, without explitely indicating the
+segments. Similarly a set may contain edges, but not the segments
+refered to in them, or contain segments which are connected by edges,
+without the edges themselves. Furthermore groups may refer to other
+groups (set to sets or paths, paths to paths only), which then
+indirectly contain references to segments and edges.
+
+Gfapy provides methods for the computation of the sets of segments and
+edges which are implied by an ordered or unordered group. Thereby all
+references to subgroups are resolved and implicit elements are added, as
+described in the specification. The computation can, therefore, only be
+applied to connected lines. For unordered groups, this computation is
+provided by the method ``induced_set()``, which returns an array of
+segment and edge instances. For ordered group, the computation is
+provided by the method ``captured_path()``, whcih returns a list of
+``gfapy.OrientedLine`` instances, alternating segment and edge instances
+(and starting and ending in segments).
+
+The methods ``induced_segments_set()``, ``induced_edges_set()``,
+``captured_segments()`` and ``captured_edges()`` return, respectively,
+the list of only segments or edges, in ordered or unordered groups.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.add_line("S\ts1\t100\t*")
+ >>> gfa.add_line("S\ts2\t100\t*")
+ >>> gfa.add_line("S\ts3\t100\t*")
+ >>> gfa.add_line("E\te1\ts1+\ts2-\t0\t10\t90\t100$\t*")
+ >>> gfa.add_line("U\tu1\ts1 s2 s3")
+ >>> u = gfa.sets[-1]
+ >>> [l.name for l in u.induced_edges_set]
+ ['e1']
+ >>> [l.name for l in u.induced_segments_set ]
+ ['s1', 's2', 's3']
+ >>> [l.name for l in u.induced_set ]
+ ['s1', 's2', 's3', 'e1']
+
+Disconnecting a line from a Gfa object
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Lines can be disconnected using the ``rm(line)`` method of the
+``gfapy.Gfa`` object or the ``disconnect()`` method of the line
+instance.
+
+.. doctest::
+
+ >>> gfa = gfapy.Gfa()
+ >>> gfa.append('S\tsA\t*')
+ >>> gfa.append('S\tsB\t*')
+ >>> line = gfa.segment("sA")
+ >>> gfa.segment_names
+ ['sB', 'sA']
+ >>> gfa.rm(line)
+ >>> gfa.segment_names
+ ['sB']
+ >>> line = gfa.segment('sB')
+ >>> line.disconnect()
+ >>> gfa.segment_names
+ []
+
+Disconnecting a line affects other lines as well. Lines which are
+dependent on the disconnected line are disconnected as well. Any other
+reference to disconnected lines is removed as well. In the disconnected
+line, references to lines are transformed back to strings and
+backreferences are deleted.
+
+The following tables show which dependent lines are disconnected if they
+refer to a line which is being disconnected.
+
+GFA1
+^^^^
+
++---------------+---------------------------------+
+| Record type | Dependent lines |
++===============+=================================+
+| Segment | links (+ paths), containments |
++---------------+---------------------------------+
+| Link | paths |
++---------------+---------------------------------+
+
+GFA2
+^^^^
+
++---------------+---------------------------------------+
+| Record type | Dependent lines |
++===============+=======================================+
+| Segment | edges, gaps, fragments, sets, paths |
++---------------+---------------------------------------+
+| Edge | sets, paths |
++---------------+---------------------------------------+
+| Sets | sets, paths |
++---------------+---------------------------------------+
+
+Editing reference fields
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+In connected line instances, it is not allowed to directly change the
+content of fields containing references to other lines, as this would
+make the state of the Gfa object invalid.
+
+Besides the fields containing references, some other fields are
+read-only in connected lines. Changing some of the fields would require
+moving the backreferences to other collections (position fields of edges
+and gaps, ``from_orient`` and ``to_orient`` of links). The overlaps
+field of connected links is readonly as it may be necessary to identify
+the link in paths.
+
+Renaming an element
+^^^^^^^^^^^^^^^^^^^
+
+The name field of a line (e.g. segment ``name``/``sid``) is not a
+reference and thus can be edited also in connected lines. When the name
+of the line is changed, no manual editing of references (e.g. from/to
+fields in links) is necessary, as all lines which refer to the line will
+still refer to the same instance. The references to the instance in the
+Gfa lines collections will be automatically updated. Also, the new name
+will be correctly used when converting to string, such as when the Gfa
+instance is written to a GFA file.
+
+Renaming a line to a name which already exists has the same effect of
+adding a line with that name. That is, in most cases,
+``gfapy.NotUniqueError`` is raised. An exception are GFA2 sets and
+paths: in this case the line will be appended to the existing line with
+the same name (as described in "Multiline group definitions").
+
+Adding and removing group elements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Elements of GFA2 groups can be added and removed from both connected and
+non-connected lines, using the following methods.
+
+To add an item to or remove an item from an unordered group, use the
+methods ``add_item(item)`` and ``rm_item(item)``, which take as argument
+either a string (identifier) or a line instance.
+
+To append or prepend an item to an ordered group, use the methods
+``append_item(item)`` and ``prepend_item(item)``. To remove the first or
+the last item of an ordered group use the methods ``rm_first_item()``
+and ``rm_last_item()``.
+
+Editing read-only fields of connected lines
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Editing the read-only information of edges, gaps, links, containments,
+fragments and paths is more complicated. These lines shall be
+disconnected before the edit and connected again to the Gfa object after
+it. Before disconnecting a line, you should check if there are other
+lines dependent on it (see tables above). If so, you will have to
+disconnect these lines first, eventually update their fields and
+reconnect them at the end of the operation.
+
+Virtual lines
+~~~~~~~~~~~~~
+
+The order of the lines in GFA is not prescribed. Therefore, during
+parsing, or constructing a Gfa in memory, it is possible that a line is
+referenced to, before it is added to the Gfa instance. Whenever this
+happens, Gfapy creates a "virtual" line instance.
+
+Users do not have to handle with virtual lines, if they work with
+complete and valid GFA files.
+
+Virtual lines are similar to normal line instances, with some
+limitations (they contain only limited information and it is not allowed
+to add tags to them). To check if a line is a virtual line, one can use
+the ``virtual`` property of the line.
+
+As soon as the parser founds the real line corresponding to a previously
+introduced virtual line, the virtual line is exchanged with the real
+line and all references are corrected to point to the real line.
+
+.. doctest::
+
+ >>> g = gfapy.Gfa()
+ >>> g.add_line("S\t1\t*")
+ >>> g.add_line("L\t1\t+\t2\t+\t*")
+ >>> l = g.dovetails[0]
+ >>> g.segment("1").virtual
+ False
+ >>> g.segment("2").virtual
+ True
+ >>> l.to_segment == g.segment("2")
+ True
+ >>> g.segment("2").dovetails == [l]
+ True
+ >>> g.add_line("S\t2\t*")
+ >>> g.segment("2").virtual
+ False
+ >>> l.to_segment == g.segment("2")
+ True
+ >>> g.segment("2").dovetails == [l]
+ True
diff --git a/doc/tutorial/tags.rst b/doc/tutorial/tags.rst
new file mode 100644
index 0000000..bfbabfb
--- /dev/null
+++ b/doc/tutorial/tags.rst
@@ -0,0 +1,420 @@
+.. testsetup:: *
+
+ import gfapy
+ gfa = gfapy.Gfa()
+
+.. _tags:
+
+Tags
+----
+
+Each record in GFA can contain tags. Tags are fields which consist in a
+tag name, a datatype and data. The format is ``NN:T:DATA`` where ``NN``
+is a two-letter tag name, ``T`` is an one-letter datatype string and
+``DATA`` is a string representing the data according to the specified
+datatype. Tag names must be unique for each line, i.e. each line may
+only contain a tag once.
+
+::
+
+ # Examples of GFA tags of different datatypes:
+ "aa:i:-12"
+ "bb:f:1.23"
+ "cc:Z:this is a string"
+ "dd:A:X"
+ "ee:B:c,12,3,2"
+ "ff:H:122FA0"
+ 'gg:J:["A","B"]'
+
+Custom tags
+~~~~~~~~~~~
+
+Some tags are explicitely defined in the specification (these are named
+*predefined tags* in Gfapy), and the user or an application can define
+its own custom tags.
+
+Custom tags are user or program specific and may of course collide with
+the tags used by other users or programs. For this reasons, if you write
+scripts which employ custom tags, you should always check that the
+values are of the correct datatype and plausible.
+
+.. doctest::
+
+ >>> line = gfapy.Line("H\txx:i:2")
+ >>> if line.get_datatype("xx") != "i":
+ ... raise Exception("I expected the tag xx to contain an integer!")
+ >>> myvalue = line.xx
+ >>> if (myvalue > 120) or (myvalue % 2 == 1):
+ ... raise Exception("The value in the xx tag is not an even value <= 120")
+ >>> # ... do something with myvalue
+
+Also it is good practice to allow the user of the script to change the
+name of the custom tags. For example, Gfapy employs the +or+ custom tag
+to track the original segment from which a segment in the final graph is
+derived. All methods which read or write the +or+ tag allow to specify
+an alternative tag name to use instead of +or+, for the case that this
+name collides with the custom tag of another program.
+
+.. code:: python
+
+ # E.g. a method which does something with myvalue, usually stored in tag xx
+ # allows the user to specify an alternative name for the tag
+ def mymethod(line, mytag="xx"):
+ myvalue = line.get(mytag)
+ # ...
+
+Tag names in GFA1
+~~~~~~~~~~~~~~~~~
+
+According to the GFA1 specification, custom tags are lower case, while
+predefined tags are upper case (in both cases the second character in
+the name can be a number). There is a number of predefined tags in the
+specification, different for each kind of line.
+
+::
+
+ "VN:Z:1.0" # VN is upcase => predefined tag
+ "z5:Z:1.0" # z5 first char is downcase => custom tag
+
+ # not forbidden, but not reccomended:
+ "zZ:Z:1.0" # => mixed case, first char downcase => custom tag
+ "Zz:Z:1.0" # => mixed case, first char upcase => custom tag
+ "vn:Z:1.0" # => same name as predefined tag, but downcase => custom tag
+
+Besides the tags described in the specification, in GFA1 headers, the TS
+tag is allowed, in order to simplify the translation of GFA2 files.
+
+Tag names in GFA2
+~~~~~~~~~~~~~~~~~
+
+The GFA2 specification is currently not as strict regarding tags: anyone
+can use both upper and lower case tags, and no tags are predefined
+except for VN and TS.
+
+However, Gfapy follows the same conventions as for GFA1: i.e. it allows
+the tags specified as predefined tags in GFA1 to be used also in GFA2.
+No other upper case tag is allowed in GFA2.
+
+Datatypes
+~~~~~~~~~
+
+The following table summarizes the datatypes available for tags:
+
++----------+-----------------+---------------------------+----------------------+
+| Symbol | Datatype | Example | Python class |
++==========+=================+===========================+======================+
+| Z | string | This is a string | str |
++----------+-----------------+---------------------------+----------------------+
+| i | integer | -12 | int |
++----------+-----------------+---------------------------+----------------------+
+| f | float | 1.2E-5 | float |
++----------+-----------------+---------------------------+----------------------+
+| A | char | X | str |
++----------+-----------------+---------------------------+----------------------+
+| J | JSON | [1,{"k1":1,"k2":2},"a"] | list/dict |
++----------+-----------------+---------------------------+----------------------+
+| B | numeric array | f,1.2,13E-2,0 | gfapy.NumericArray |
++----------+-----------------+---------------------------+----------------------+
+| H | byte array | FFAA01 | gfapy.ByteArray |
++----------+-----------------+---------------------------+----------------------+
+
+Validation
+~~~~~~~~~~
+
+The tag name is validated according the the rules described above:
+except for the upper case tags indicated in the GFA1 specification, and
+the TS header tag, all other tags must contain at least one lower case
+letter.
+
+::
+
+ "VN:i:1" # => in header: allowed, elsewhere: error
+ "TS:i:1" # => allowed in headers and GFA2 Edges
+ "KC:i:1" # => allowed in links, containments, GFA1/GFA2 segments
+ "xx:i:1" # => custom tag, always allowed
+ "xxx:i:1" # => error: name is too long
+ "x:i:1" # => error: name is too short
+ "11:i:1" # => error: at least one letter must be present
+
+The datatype must be one of the datatypes specified above. For
+predefined tags, Gfapy also checks that the datatype given in the
+specification is used.
+
+::
+
+ "xx:X:1" # => error: datatype X is unknown
+ "VN:i:1" # => error: VN must be of type Z
+
+The data must be a correctly formatted string for the specified datatype
+or a Python object whose string representation is a correctly formatted
+string.
+
+.. doctest::
+
+ # current value: xx:i:2
+ >>> line = gfapy.Line("S\tA\t*\txx:i:2")
+ >>> line.xx = 1
+ >>> line.xx
+ 1
+ >>> line.xx = "3"
+ >>> line.xx
+ 3
+ >>> line.xx = "A"
+ >>> line.xx
+ Traceback (most recent call last):
+ ...
+ gfapy.error.FormatError: ...
+
+Depending on the validation level, more or less checks are done
+automatically (see :ref:`validation` chapter). Per default - validation level
+(1) - validation is performed only during parsing or accessing values
+the first time, therefore the user must perform a manual validation if
+he changes values to something which is not guaranteed to be correct. To
+trigger a manual validation, the user can call the method
+``validate_field(fieldname)`` to validate a single tag, or
+``validate()`` to validate the whole line, including all tags.
+
+.. doctest::
+
+ >>> line = gfapy.Line("S\tA\t*\txx:i:2", vlevel = 0)
+ >>> line.validate_field("xx")
+ >>> line.validate()
+ >>> line.xx = "A"
+ >>> line.validate_field("xx")
+ Traceback (most recent call last):
+ ...
+ gfapy.error.FormatError: ...
+ >>> line.validate()
+ Traceback (most recent call last):
+ ...
+ gfapy.error.FormatError: ...
+ >>> line.xx = "3"
+ >>> line.validate_field("xx")
+ >>> line.validate()
+
+Reading and writing tags
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Tags can be read using a property on the Gfapy line object, which is
+called as the tag (e.g. line.xx). A special version of the property
+prefixed by ``try_get_`` raises an error if the tag was not available
+(e.g. ``line.try_get_LN``), while the tag property (e.g. ``line.LN``)
+would return ``None`` in this case. Setting the value is done assigning
+a value to it the tag name method (e.g. ``line.TS = 120``). In
+alternative, the ``set(fieldname, value)``, ``get(fieldname)`` and
+``try_get(fieldname)`` methods can also be used. To remove a tag from a
+line, use the ``delete(fieldname)`` method, or set its value to
+``None``. The ``tagnames`` property Line instances is a list of
+the names (as strings) of all defined tags for a line.
+
+
+.. doctest::
+
+ >>> line = gfapy.Line("S\tA\t*\txx:i:1", vlevel = 0)
+ >>> line.xx
+ 1
+ >>> line.xy is None
+ True
+ >>> line.try_get_xx()
+ 1
+ >>> line.try_get_xy()
+ Traceback (most recent call last):
+ ...
+ gfapy.error.NotFoundError: ...
+ >>> line.get("xx")
+ 1
+ >>> line.try_get("xy")
+ Traceback (most recent call last):
+ ...
+ gfapy.error.NotFoundError: ...
+ >>> line.xx = 2
+ >>> line.xx
+ 2
+ >>> line.xx = "a"
+ >>> line.tagnames
+ ['xx']
+ >>> line.xy = 2
+ >>> line.xy
+ 2
+ >>> line.set("xy", 3)
+ >>> line.get("xy")
+ 3
+ >>> line.tagnames
+ ['xx', 'xy']
+ >>> line.delete("xy")
+ 3
+ >>> line.xy is None
+ True
+ >>> line.xx = None
+ >>> line.xx is None
+ True
+ >>> line.try_get("xx")
+ Traceback (most recent call last):
+ ...
+ gfapy.error.NotFoundError: ...
+ >>> line.tagnames
+ []
+
+When a tag is read, the value is converted into an appropriate object
+(see Python classes in the datatype table above). When setting a value,
+the user can specify the value of a tag either as a Python object, or as
+the string representation of the value.
+
+.. doctest::
+
+ >>> line = gfapy.Line('H\txx:i:1\txy:Z:TEXT\txz:J:["a","b"]')
+ >>> line.xx
+ 1
+ >>> isinstance(line.xx, int)
+ True
+ >>> line.xy
+ 'TEXT'
+ >>> isinstance(line.xy, str)
+ True
+ >>> line.xz
+ ['a', 'b']
+ >>> isinstance(line.xz, list)
+ True
+
+The string representation of a tag can be read using the
+``field_to_s(fieldname)`` method. The default is to only output the
+content of the field. By setting \`\`tag: true\`\`\`, the entire tag is
+output (name, datatype, content, separated by colons). An exception is
+raised if the field does not exist.
+
+.. doctest::
+
+ >>> line = gfapy.Line("H\txx:i:1")
+ >>> line.xx
+ 1
+ >>> line.field_to_s("xx")
+ '1'
+ >>> line.field_to_s("xx", tag=True)
+ 'xx:i:1'
+
+Datatype of custom tags
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The datatype of an existing custom field (but not of predefined fields)
+can be changed using the ``set_datatype(fieldname, datatype)`` method.
+The current datatype specification can be read using
+``get_datatype(fieldname)``.
+
+.. doctest::
+
+ >>> line = gfapy.Line("H\txx:i:1")
+ >>> line.get_datatype("xx")
+ 'i'
+ >>> line.set_datatype("xx", "Z")
+ >>> line.get_datatype("xx")
+ 'Z'
+
+If a new custom tag is specified, Gfapy selects the correct datatype for
+it: i/f for numeric values, J/B for arrays, J for hashes and Z for
+strings and strings. If the user wants to specify a different datatype,
+he may do so by setting it with ``set_datatype()`` (this can be done
+also before assigning a value, which is necessary if full validation is
+active).
+
+.. doctest::
+
+ >>> line = gfapy.Line("H")
+ >>> line.xx = "1"
+ >>> line.xx
+ '1'
+ >>> line.set_datatype("xy", "i")
+ >>> line.xy = "1"
+ >>> line.xy
+ 1
+
+Arrays of numerical values
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+``B`` and ``H`` tags represent array with particular constraints (e.g.
+they can only contain numeric values, and in some cases the values must
+be in predefined ranges). In order to represent them correctly and allow
+for validation, Python classes have been defined for both kind of tags:
+``gfapy.ByteArray`` for ``H`` and ``gfapy.NumericArray`` for ``B``
+fields.
+
+Both are subclasses of list. Object of the two classes can be created by
+passing an existing list or the string representation to the class
+constructor.
+
+.. doctest::
+
+ >>> # create a byte array instance
+ >>> gfapy.ByteArray([12,3,14])
+ b'\x0c\x03\x0e'
+ >>> gfapy.ByteArray("A012FF")
+ b'\xa0\x12\xff'
+ >>> # create a numeric array instance
+ >>> gfapy.NumericArray.from_string("c,12,3,14")
+ [12, 3, 14]
+ >>> gfapy.NumericArray([12,3,14])
+ [12, 3, 14]
+
+Instances of the classes behave as normal lists, except that they
+provide a #validate() method, which checks the constraints, and that
+their string representation is the GFA string representation of the
+field value.
+
+.. doctest::
+
+ >>> gfapy.NumericArray([12,1,"1x"]).validate()
+ Traceback (most recent call last):
+ ...
+ gfapy.error.ValueError
+ >>> str(gfapy.NumericArray([12,3,14]))
+ 'C,12,3,14'
+ >>> gfapy.ByteArray([12,1,"1x"]).validate()
+ Traceback (most recent call last):
+ ...
+ gfapy.error.ValueError
+ >>> str(gfapy.ByteArray([12,3,14]))
+ '0C030E'
+
+For numeric values, the `compute_subtype` method allows to compute
+the subtype which will be used for the string representation. Unsigned
+subtypes are used if all values are positive. The smallest possible
+subtype range is selected. The subtype may change when the range of the
+elements changes.
+
+.. doctest::
+
+ >>> gfapy.NumericArray([12,13,14]).compute_subtype()
+ 'C'
+
+Special cases: custom records, headers, comments and virtual lines.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+GFA2 allows custom records, introduced by record type strings other than
+the predefined ones. Gfapy uses a pragmatical approach for identifying
+tags in custom records, and tries to interpret the rightmost fields as
+tags, until the first field from the right raises an error; all
+remaining fields are treated as positional fields.
+
+::
+
+ "X a b c xx:i:12" # => xx is tag, a, b, c are positional fields
+ "Y a b xx:i:12 c" # => all positional fields, as c is not a valid tag
+
+For easier access, the entire header of the GFA is summarized in a
+single line instance. A class (`FieldArray`) has been defined to
+handle the special case when multiple H lines define the same tag (see
+:ref:`header` chapter for details).
+
+Comment lines are represented by a subclass of the same class
+(`Line`) as the records. However, they cannot contain tags: the
+entire line is taken as content of the comment. See the :ref:`comments`
+chapter for more information about comments.
+
+::
+
+ "# this is not a tag: xx:i:1" # => xx is not a tag, xx:i:1 is part of the comment
+
+Virtual instances of the `Line` class (e.g. segment instances automatically
+created because of not yet resolved references found in edges) cannot be
+modified by the user, and tags cannot be specified for them. This
+includes all instances of the `Unknown` class. See the
+:ref:`references` chapter for more information about virtual lines.
diff --git a/doc/tutorial/validation.rst b/doc/tutorial/validation.rst
new file mode 100644
index 0000000..e796021
--- /dev/null
+++ b/doc/tutorial/validation.rst
@@ -0,0 +1,78 @@
+.. _validation:
+
+Validation
+----------
+
+Different validation levels are available. They represent different
+compromises between speed and warrant of validity. The validation level
+can be specified when the :class:`~gfapy.gfa.Gfa` object is created, using the
+``vlevel`` parameter of the constructor and of the
+`from_file` method. Four levels of validation are defined
+(0 = no validation, 1 = validation by reading, 2 = validation by reading
+and writing, 3 = continuous validation). The default validation level
+value is 1.
+
+Manual validation
+~~~~~~~~~~~~~~~~~
+
+Independently from the validation level choosen, the user can always check the
+value of a field calling
+:meth:`~gfapy.line.common.validate.Validate.validate_field` on the line
+instance. If no exeption is raised, the field content is valid.
+
+To check if the entire content of the line is valid, the user can call
+:meth:`~gfapy.line.common.validate.Validate.validate` on the line instance.
+This will check all fields and perform cross-field validations, such as
+comparing the length of the sequence of a GFA1 segment, to the value of the LN
+tag (if present).
+
+It is also possible to validate the structure of the GFA, for example to
+check if there are unresolved references to lines. To do this, use the
+:meth:`~gfapy.gfa.Gfa.validate` of the :class:`~gfapy.gfa.Gfa` instance.
+
+No validations
+~~~~~~~~~~~~~~
+
+If the validation is set to 0, Gfapy will try to accept any input and
+never raise an exception. This is not always possible, and in some
+cases, an exception will still be raised, if the data is invalid.
+
+Validation when reading
+~~~~~~~~~~~~~~~~~~~~~~~
+
+If the validation level is set to 1 or higher, basic validations will be
+performed, such as checking the number of positional fields, the
+presence of duplicated tags, the tag datatype of predefined tags.
+Additionally, all tags will be validated, either during parsing or on
+first access. Record-type cross-field validations will also be
+performed.
+
+In other words, a validation of 1 means that Gfapy guarantees (as good
+as it can) that the GFA content read from a file is valid, and will
+raise an exception on accessing the data if not.
+
+The user is supposed to call `validate_field` after changing
+a field content to something which can be potentially invalid, or
+:meth:`~gfapy.line.common.validate.Validate.validate` if potentially
+cross-field validations could fail.
+
+Validation when writing
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Setting the level to 2 will perform all validations described above,
+plus validate the fields content when their value is written to string.
+
+In other words, a validation of 2 means that Gfapy guarantee (as good as
+it can) that the GFA content read from a file and written to a file is
+valid and will raise an exception on accessing the data or writing to
+file if not.
+
+Continuous validation
+~~~~~~~~~~~~~~~~~~~~~
+
+If the validation level is set to 3, all validations for lower levels
+described above are run, plus a validation of fields contents each time
+a setter method is used.
+
+A validation of 3 means that Gfapy guarantees (as good as it can) that
+the GFA content is always valid.
diff --git a/gfapy/__init__.py b/gfapy/__init__.py
new file mode 100644
index 0000000..0c6a53f
--- /dev/null
+++ b/gfapy/__init__.py
@@ -0,0 +1,25 @@
+VERSIONS = ["gfa1", "gfa2"]
+from gfapy.error import *
+from gfapy.placeholder import Placeholder
+from gfapy.placeholder import is_placeholder
+from gfapy.byte_array import ByteArray
+from gfapy.field_array import FieldArray
+from gfapy.alignment import Alignment
+from gfapy.alignment.cigar import CIGAR
+from gfapy.alignment.placeholder import AlignmentPlaceholder
+from gfapy.alignment.trace import Trace
+from gfapy.numeric_array import NumericArray
+from gfapy.lastpos import LastPos
+from gfapy.lastpos import isfirstpos, islastpos, posvalue
+from gfapy.symbol_invert import invert
+from gfapy.field import Field
+from gfapy.line import Line
+from gfapy.logger import Logger
+from gfapy.segment_end_path import SegmentEndsPath
+from gfapy.segment_end import *
+from gfapy.oriented_line import OrientedLine
+from gfapy.lines import Lines
+from gfapy.graph_operations import GraphOperations
+from gfapy.gfa import Gfa
+import gfapy.sequence
+import gfapy.field
diff --git a/gfapy/alignment/__init__.py b/gfapy/alignment/__init__.py
new file mode 100644
index 0000000..4025f4d
--- /dev/null
+++ b/gfapy/alignment/__init__.py
@@ -0,0 +1,2 @@
+from .alignment import *
+from .cigar import CIGAR
diff --git a/gfapy/alignment/alignment.py b/gfapy/alignment/alignment.py
new file mode 100644
index 0000000..6df1994
--- /dev/null
+++ b/gfapy/alignment/alignment.py
@@ -0,0 +1,159 @@
+import gfapy
+
+class Alignment:
+ """Factory for instances of classes which represent alignments in GFA fields.
+
+ Args:
+ initializer (string, list): the alignment field content
+ version (str): GFA version, either ``'gfa1'`` or ``'gfa2'`` (default: ``'gfa2'``)
+ valid (bool): if ``True``, validation is skipped, when possible (default: ``False``)
+
+ Returns:
+ :class:`~gfapy.alignment.cigar.CIGAR`,
+ :class:`~gfapy.alignment.trace.Trace`,
+ :class:`~gfapy.alignment.placeholder.AlignmentPlaceholder`
+
+ Raises:
+ ~gfapy.error.ArgumentError: If more the one positional parameter is used.
+ ~gfapy.error.FormatError: If the ``initializer`` string/list is invalid.
+ ~gfapy.error.VersionError: If ``version`` is invalid, or ``initializer`` is
+ incompatible with the ``version``.
+
+ Examples:
+ >>> import gfapy
+ >>> from gfapy.alignment import Alignment
+ >>> Alignment("*")
+ gfapy.AlignmentPlaceholder()
+ >>> Alignment("12M2I2D")
+ gfapy.CIGAR([gfapy.CIGAR.Operation(12,'M'), gfapy.CIGAR.Operation(2,'I'), gfapy.CIGAR.Operation(2,'D')])
+ >>> Alignment("12,23,1")
+ gfapy.Trace([12,23,1])
+ >>> Alignment([])
+ gfapy.AlignmentPlaceholder()
+ >>> Alignment([gfapy.CIGAR.Operation(12,'M'), gfapy.CIGAR.Operation(2,'I'), gfapy.CIGAR.Operation(2,'D')])
+ gfapy.CIGAR([gfapy.CIGAR.Operation(12,'M'), gfapy.CIGAR.Operation(2,'I'), gfapy.CIGAR.Operation(2,'D')])
+ >>> Alignment([12,23,1])
+ gfapy.Trace([12,23,1])
+ """
+
+ def __new__(cls, *args, **kargs):
+ """Create an instance of an alignment field class."""
+ if args[0] is None or \
+ gfapy.is_placeholder(args[0]):
+ return gfapy.AlignmentPlaceholder()
+ if len(args) > 1:
+ raise gfapy.ArgumentError("The Alignment() constructor requires "+
+ "a single positional argument, {} found".format(len(args)))
+ if isinstance(args[0], gfapy.CIGAR) or \
+ isinstance(args[0], gfapy.Trace):
+ return args[0]
+ if isinstance(args[0], str):
+ return Alignment._from_string(*args, **kargs)
+ elif isinstance(args[0], list):
+ return Alignment._from_list(*args, **kargs)
+ else:
+ raise gfapy.ArgumentError("Cannot create an alignment "+
+ "from an instance of the class {}".format(type(args[0])))
+
+ @classmethod
+ def _from_string(cls, string, version = "gfa2", valid = False):
+ """
+ Parses an alignment field
+
+ Parameters
+ ----------
+ string : str
+ The string to parse.
+ version : str
+ GFA version (gfa1 or gfa2)
+ If *gfa1*, then CIGARs and Placeholders are supported.
+ If *gfa2*, also Traces are supported.
+ Defaults to *gfa2*.
+ valid : bool
+ If *True*, the string is guaranteed to be valid, and
+ further checks are skipped.
+ Defaults to *False*.
+
+ Returns
+ -------
+ gfapy.CIGAR or gfapy.Trace or gfapy.AlignentPlaceholder
+
+ Raises
+ ------
+ gfapy.FormatError
+ If the content of the field cannot be parsed.
+ gfapy.VersionError
+ If a wrong value is provided for the version parameter.
+ """
+ if version != "gfa1" and version != "gfa2":
+ raise gfapy.VersionError(
+ "Version error: {}".format(repr(version)))
+ first = True
+ for char in string:
+ if first:
+ if char.isdigit():
+ first = False
+ continue
+ elif char == "*" and len(string) == 1:
+ return gfapy.AlignmentPlaceholder()
+ else:
+ if char.isdigit():
+ continue
+ elif char == ",":
+ if version == "gfa2":
+ t = gfapy.Trace._from_string(string)
+ if not valid:
+ t.validate()
+ return t
+ else:
+ raise gfapy.FormatError(
+ "Trace alignments are not allowed in GFA1: {}"
+ .format(repr(string)))
+ elif char in ["M","I","D","P"] or (char in ["=","X","S","H","N"]
+ and version == "gfa1"):
+ return gfapy.CIGAR._from_string(string, valid=valid, version=version)
+ break
+ raise gfapy.FormatError("Alignment field contains invalid data {}"
+ .format(repr(string)))
+
+
+ @classmethod
+ def _from_list(cls, array, version = "gfa2", valid = True):
+ """
+ Converts an alignment array into a specific list type
+
+ Parameters
+ ----------
+ array : list
+ The alignment array.
+ version : str
+ GFA version (gfa1 or gfa2)
+ If *gfa1*, then CIGARs and Placeholders are supported.
+ If *gfa2*, also Traces are supported.
+ Defaults to *gfa2*.
+ valid : bool
+ If *True*, the list is guaranteed to be valid, and
+ further checks are skipped.
+ Defaults to *False*.
+
+ Returns
+ -------
+ gfapy.CIGAR or gfapy.Trace
+ """
+ if version != "gfa1" and version != "gfa2":
+ raise gfapy.VersionError(
+ "Version error: {}".format(repr(version)))
+ if not array:
+ return gfapy.AlignmentPlaceholder()
+ elif isinstance(array[0], int):
+ if version == "gfa2":
+ return gfapy.Trace(array)
+ else:
+ raise gfapy.VersionError(
+ "Trace alignments are not allowed in GFA1: {}".format(repr(array)))
+ elif isinstance(array[0], gfapy.CIGAR.Operation):
+ return gfapy.CIGAR(array)
+ else:
+ raise gfapy.FormatError(
+ "Array does not represent a valid alignment field: {}"
+ .format(repr(array)))
diff --git a/gfapy/alignment/cigar.py b/gfapy/alignment/cigar.py
new file mode 100644
index 0000000..e561bce
--- /dev/null
+++ b/gfapy/alignment/cigar.py
@@ -0,0 +1,197 @@
+import re
+import gfapy
+from .alignment import Alignment
+
+class CIGAR(list):
+ """
+ Representation of the contents of a CIGAR string.
+
+ Each operation is represented by a
+ :class:`CIGAR.Operation <gfapy.alignment.cigar.CIGAR.Operation>`,
+ which specifies an operation length and operation symbol.
+
+ Instances are usually created from their string representations, using the
+ :class:`~gfapy.alignment.alignment.Alignment` factory class constructor.
+
+ Warning:
+ Although the GFA1 specification does not forbid the
+ operation symbols NSHX=, these are not allowed in GFA2
+ and thus their use in GFA1 is discouraged.
+ """
+
+ def complement(self):
+ """The CIGAR when switching the role of the two aligned segments.
+
+ Example:
+ >>> import gfapy
+ >>> str(gfapy.Alignment("2M1D3M").complement())
+ '3M1I2M'
+
+ Returns:
+ CIGAR: the complement CIGAR
+ """
+ comp = list(reversed(self))
+ for op in comp:
+ if op.code == "I": op.code = "D"
+ elif op.code == "S": op.code = "D"
+ elif op.code == "D": op.code = "I"
+ elif op.code == "N": op.code = "I"
+ return CIGAR(comp)
+
+ def validate(self, version = "gfa1"):
+ """Validates the instance.
+
+ Parameters:
+ version (str): 'gfa1' or 'gfa2'
+
+ Raises:
+ ~gfapy.error.VersionError: If a wrong **version** is specified.
+ ~gfapy.error.TypeError: If a component of the list is not a
+ CIGAR Operation; If the CIGAR operation length is not an integer or
+ a string representing an integer.
+ ~gfapy.error.ValueError: If the length of an operation is < 0; If an
+ operation code is invalid in general or for the specified GFA version.
+ """
+ if version != "gfa1" and version != "gfa2":
+ raise gfapy.VersionError(
+ "Version error: {}".format(repr(version)))
+ for op in self:
+ if not isinstance(op, gfapy.CIGAR.Operation):
+ raise gfapy.TypeError(
+ "Element is not a CIGAR operation: {}\n".format(op)+
+ "CIGAR instance is invalid: {}".format(self))
+ op.validate(version = version)
+
+ def length_on_reference(self):
+ """Length of the aligned substring on the reference sequence
+ (**from** sequence for GFA1 links/containments;
+ **sid1** sequence for GFA2 edges)
+
+ Returns:
+ int
+ """
+ l = 0
+ for op in self:
+ if op.code in ["M", "=", "X", "D" , "N"]:
+ l += op.length
+ return l
+
+ def length_on_query(self):
+ """
+ Lenght of the aligned substring on the query sequence
+ (**to** sequence for GFA1 links/containments;
+ **sid2** sequence for GFA2 edges)
+
+ Returns:
+ int
+ """
+ l = 0
+ for op in self:
+ if op.code in ["M", "=", "X", "I", "S"]:
+ l += op.length
+ return l
+
+ @classmethod
+ def _from_string(cls, string, valid = False, version = "gfa1"):
+ """Create a CIGAR instance from its string representation.
+
+ Parameters:
+ string (str)
+ valid (bool): If **True** the string is guaranteed to be valid.
+ (Defaults to **False**)
+ version (str): 'gfa1' or 'gfa2'
+
+ Returns:
+ ~gfapy.alignment.cigar.CIGAR or
+ ~gfapy.alignment.placeholder.AlignmentPlaceholder
+
+ Raises:
+ ~gfapy.error.FormatError: If the string is not a valid CIGAR string.
+ """
+ if string == "*":
+ return gfapy.AlignmentPlaceholder()
+ cigar = CIGAR()
+ if not valid:
+ if version == "gfa1":
+ if not re.match(r"^([0-9]+[MIDNSHPX=])+$", string):
+ raise gfapy.FormatError()
+ elif version == "gfa2":
+ if not re.match(r"^([0-9]+[MIDP])+$", string):
+ raise gfapy.FormatError()
+ for m in re.finditer("([0-9]+)([MIDNSHPX=])", string):
+ cigar.append(CIGAR.Operation(int(m.group(1)), m.group(2)))
+ return cigar
+
+ def __str__(self):
+ if not self:
+ return "*"
+ else:
+ return "".join([str(op) for op in self])
+
+ def __repr__(self):
+ return "gfapy.CIGAR([{}])".format(", ".join([repr(op) for op in self]))
+
+ class Operation:
+ """An operation in a CIGAR string.
+
+ Attributes:
+ ~Operation.length (int): Operation length.
+ code (str): Operation code, one of
+ :attr:`~Operation.CODE`.
+ """
+
+ CODE_GFA1_ONLY = ["S", "H", "N", "X", "="]
+ """Operations only valid in GFA1"""
+
+ CODE_GFA1_GFA2 = ["M", "I", "D", "P"]
+ """Operations valid in GFA1 and GFA2"""
+
+ CODE = CODE_GFA1_ONLY + CODE_GFA1_GFA2
+ """CIGAR operation codes"""
+
+ def validate(self, version = "gfa1"):
+ """Validates the CIGAR operation.
+
+ Parameters:
+ version (str): 'gfa1' or 'gfa2'
+
+ Raises:
+ ~gfapy.error.VersionError: If a wrong **version** is specified.
+ ~gfapy.error.TypeError: If the CIGAR operation length is not an integer
+ or a string representing an integer.
+ ~gfapy.error.ValueError: If the length of an operation is < 0; If an
+ operation code is invalid in general or for the specified GFA
+ version.
+ """
+ if version != "gfa1" and version != "gfa2":
+ raise gfapy.VersionError(
+ "Version error: {}".format(repr(version)))
+ if not isinstance(self.length, int) and not isinstance(self.length, str):
+ raise gfapy.TypeError(
+ "Type error: length of CIGAR is {}".format(self.length))
+ if(int(self.length) < 0):
+ raise gfapy.ValueError("Length of CIGAR is {}".format(self.length))
+ if version == "gfa2":
+ if not self.code in Operation.CODE_GFA1_GFA2:
+ raise gfapy.ValueError()
+ else:
+ if not self.code in Operation.CODE:
+ raise gfapy.ValueError()
+
+ def __init__(self, length, code):
+ self.length = length
+ self.code = code
+
+ def __len__(self):
+ return self.length
+
+ def __str__(self):
+ return "{}{}".format(self.length, self.code)
+
+ def __repr__(self):
+ return "gfapy.CIGAR.Operation({},{})".format(self.length, repr(self.code))
+
+ def __eq__(self, other):
+ return self.length == other.length and self.code == other.code
+
+Operation = CIGAR.Operation
diff --git a/gfapy/alignment/placeholder.py b/gfapy/alignment/placeholder.py
new file mode 100644
index 0000000..e4db4fe
--- /dev/null
+++ b/gfapy/alignment/placeholder.py
@@ -0,0 +1,20 @@
+import gfapy
+from .alignment import Alignment
+
+class AlignmentPlaceholder(gfapy.Placeholder):
+ """
+ A placeholder subclass for alignment fields.
+
+ Instances are usually created from their string representations, using the
+ :class:`~gfapy.alignment.alignment.Alignment` factory class constructor.
+ """
+
+ def complement(self):
+ """For compatibility with CIGAR alignments
+ Returns:
+ AlignmentPlaceholder : self
+ """
+ return self
+
+ def __repr__(self):
+ return "gfapy.AlignmentPlaceholder()"
diff --git a/gfapy/alignment/trace.py b/gfapy/alignment/trace.py
new file mode 100644
index 0000000..1508e3a
--- /dev/null
+++ b/gfapy/alignment/trace.py
@@ -0,0 +1,80 @@
+import gfapy
+from .alignment import Alignment
+
+class Trace(list):
+ """Trace alignment.
+
+ A trace is a list of integers, each giving the number of characters
+ in the second segment to align to the next ``TS`` characters in the first
+ segment (where ``TS``, the trace spacing, is either the default spacing
+ given in the header line ``TS`` tag, or the the spacing given in the ``TS``
+ tag on the line itself, where the trace alignment is used).
+
+ Instances are usually created from their string representations, using the
+ :class:`~gfapy.alignment.alignment.Alignment` factory class constructor.
+ """
+
+ def complement(self):
+ """Computes the complement of the trace alignment.
+
+ A complement operation (such as for CIGARs) cannot be defined
+ for a trace, without computing the alignment. This is currently not
+ available in gfapy.
+
+ Returns:
+ gfapy.AlignmentPlaceholder
+ """
+ return gfapy.AlignmentPlaceholder()
+
+ def validate(self, ts = None, version = "gfa2"):
+ """Validates the trace alignment
+
+ Parameters:
+ ts (int): Trace Spacing. If specified, it will be checked that all values
+ are < **ts** (default: **None**, no check).
+ version (str) : GFA version (must be 'gfa1' or 'gfa2')
+
+ Raises:
+ ~gfapy.error.TypeError: If the list contains non-integer values
+ ~gfapy.error.ValueError: If the list contains values < 0 or > **ts**
+ ~gfapy.error.VersionError: If the version is 'gfa1' or an invalid version
+ string is provided
+ """
+ if version != "gfa2":
+ if version == "gfa1":
+ raise gfapy.VersionError("Traces are not compatible with GFA1")
+ else:
+ raise gfapy.VersionError("Version unknown: {}".format(repr(version)))
+ for e in self:
+ if not isinstance(e, int):
+ raise gfapy.TypeError(
+ ("Trace contains non-integer values ({0} found)\n" + "Content: {1}")
+ .format(e, repr(self)))
+ if e < 0:
+ raise gfapy.ValueError(
+ ("Trace contains value < 0 ({0} found)\n" + "Content: {1}")
+ .format(e, repr(self)))
+ if ts and e > ts:
+ raise gfapy.ValueError(
+ ("Trace contains value > TS ({0} found, TS = {2})\n" + "Content: {1}")
+ .format(e, repr(self), ts))
+
+ def __str__(self):
+ if not self:
+ return "*"
+ else:
+ return ",".join([str(v) for v in self])
+
+ def __repr__(self):
+ if not self:
+ return 'gfapy.Trace([])'
+ else:
+ return "gfapy.Trace([{}])".format(str(self))
+
+ @classmethod
+ def _from_string(cls,string):
+ try:
+ return Trace([int(v) for v in string.split(",")])
+ except:
+ raise gfapy.FormatError("string does not encode"+
+ " a valid trace alignment: {}".format(string))
diff --git a/gfapy/byte_array.py b/gfapy/byte_array.py
new file mode 100644
index 0000000..0bf50ec
--- /dev/null
+++ b/gfapy/byte_array.py
@@ -0,0 +1,63 @@
+import gfapy
+import binascii
+
+class ByteArray(bytes):
+ """Array of unsigned byte values.
+
+ The class is used for the representation of the data contained in H tags.
+ The content of instances of the class is read-only. To edit the array,
+ the instance must be cast to a list, edited, and casted back to ByteArray,
+ as in the example below.
+
+ Example:
+ >>> import gfapy
+ >>> a = gfapy.ByteArray([1,2,3])
+ >>> a[0] = 0
+ Traceback (most recent call last):
+ ...
+ TypeError: 'ByteArray' object does not support item assignment
+ >>> a_lst = list(a)
+ >>> a_lst[0] = 0
+ >>> a = gfapy.ByteArray(a_lst)
+ >>> str(a)
+ '000203'
+
+ Parameters:
+ arg (string or bytes): If the argument is of type string,
+ it has to be a valid hex string.
+
+ Raises:
+ gfapy.FormatError: If the argument is a string and has an invalid format.
+ gfapy.ValueError: If the argument is not an string or a byte array.
+ """
+
+ def __new__(cls, arg):
+ try:
+ if isinstance(arg, str):
+ if len(arg) == 0:
+ raise gfapy.FormatError
+ return bytes.__new__(cls, binascii.unhexlify(arg))
+ else:
+ return bytes.__new__(cls, arg)
+ except binascii.Error:
+ raise gfapy.FormatError
+ except ValueError:
+ raise gfapy.ValueError
+ except TypeError:
+ raise gfapy.ValueError
+
+ def __str__(self):
+ return str(binascii.hexlify(self), "utf8").upper()
+
+ def validate(self):
+ """Validates the content of the instance.
+
+ The content is always valid, as values cannot be modified directly (see
+ below) and trying to create from invalid data will raise an exception. So
+ the validation method is only a placeholder which always does nothing.
+ """
+ pass
+
+ def _default_gfa_tag_datatype(self):
+ """GFA tag datatype to use by default"""
+ return 'H'
diff --git a/gfapy/error.py b/gfapy/error.py
new file mode 100644
index 0000000..409d4c1
--- /dev/null
+++ b/gfapy/error.py
@@ -0,0 +1,67 @@
+class Error(Exception):
+ """Parent class for library-specific errors"""
+ pass
+
+class VersionError(Error):
+ """Unknown/wrong version of the specification"""
+ pass
+
+class RuntimeError(Error):
+ """The user tried to do something not allowed"""
+ pass
+
+class ValueError(Error):
+ """An object has the right type/form, but an invalid content
+
+ e.g. number out-of-range; string/array too big/small
+ """
+ pass
+
+class FormatError(Error):
+ """The format of an object is invalid
+
+ e.g. a line contains too many/few fields;
+ a tagname has the wrong format
+ """
+ pass
+
+class TypeError(Error):
+ """A wrong type has been used or specified
+
+ e.g. a field contains an array instead of an integer;
+ an invalid record type or datatype is found by parsing
+ """
+ pass
+
+class ArgumentError(Error):
+ """The argument of a method has the wrong type"""
+ pass
+
+class NotUniqueError(Error):
+ """An element which should have been unique is not unique
+
+ e.g. a tag name is duplicated in a line; a duplicated record ID is found
+ """
+ pass
+
+class InconsistencyError(Error):
+ """Contradictory information has been provided
+
+ e.g. GFA1 segment LN and sequence length differ;
+ a GFA2-only record is added to a GFA1 file
+ """
+ pass
+
+class NotFoundError(Error):
+ """An element which has been required is not found
+
+ e.g. a tag or record which is required is not found
+ """
+ pass
+
+class AssertionError(Error):
+ """An assertion has failed
+
+ An error of this kind indicates a probable bug.
+ """
+ pass
diff --git a/gfapy/field/__init__.py b/gfapy/field/__init__.py
new file mode 100644
index 0000000..42b5ab7
--- /dev/null
+++ b/gfapy/field/__init__.py
@@ -0,0 +1,4 @@
+import gfapy
+import re
+import builtins
+from .field import Field
diff --git a/gfapy/field/alignment_gfa1.py b/gfapy/field/alignment_gfa1.py
new file mode 100644
index 0000000..881ee51
--- /dev/null
+++ b/gfapy/field/alignment_gfa1.py
@@ -0,0 +1,43 @@
+import gfapy
+import re
+
+def decode(string):
+ return gfapy.Alignment(string, valid = False, version = "gfa1")
+
+def unsafe_decode(string):
+ return gfapy.Alignment(string, valid = True, version = "gfa1")
+
+def validate_encoded(string):
+ if not re.match(r"^(\*|([0-9]+[MIDNSHPX=])+)$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid GFA1 alignment\n".format(repr(string)) +
+ "(it is not * and is not a CIGAR string (([0-9]+[MIDNSHPX=])+)")
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.CIGAR):
+ obj.validate()
+ elif isinstance(obj, gfapy.Placeholder):
+ pass
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name) +
+ "(accepted classes: gfapy.CIGAR, gfapy.Placeholder)")
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ elif isinstance(obj, gfapy.CIGAR):
+ obj.validate()
+ return str(obj)
+ elif isinstance(obj, gfapy.Placeholder):
+ return str(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__) +
+ "(accepted classes: str, gfapy.CIGAR, gfapy.Placeholder)")
diff --git a/gfapy/field/alignment_gfa2.py b/gfapy/field/alignment_gfa2.py
new file mode 100644
index 0000000..092e589
--- /dev/null
+++ b/gfapy/field/alignment_gfa2.py
@@ -0,0 +1,32 @@
+import gfapy
+
+def unsafe_decode(string):
+ return gfapy.Alignment(string, valid = True, version = "gfa2")
+
+def decode(string):
+ return gfapy.Alignment(string, valid = False, version = "gfa2")
+
+validate_encoded = decode
+
+def validate_decoded(alignment):
+ alignment.validate()
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ elif isinstance(obj, gfapy.CIGAR) or isinstance(obj, gfapy.Trace):
+ obj.validate()
+ return str(obj)
+ elif isinstance(obj, gfapy.Placeholder):
+ return "*"
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: "+
+ "str, CIGAR, Trace, AlignmentPlaceholder)")
+
diff --git a/gfapy/field/alignment_list_gfa1.py b/gfapy/field/alignment_list_gfa1.py
new file mode 100644
index 0000000..972197a
--- /dev/null
+++ b/gfapy/field/alignment_list_gfa1.py
@@ -0,0 +1,54 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return [ gfapy.Alignment(s, version = "gfa1", valid = True) \
+ for s in string.split(",") ]
+
+def decode(string):
+ validate_encoded(string)
+ return unsafe_decode(string)
+
+def validate_encoded(string):
+ if not re.match(r"^(\*|(([0-9]+[MIDNSHPX=])+))(,(\*|(([0-9]+[MIDNSHPX=])+)))*$", string):
+ raise gfapy.FormatError(
+ "{} is not a comma separated list of * or CIGARs\n".format(repr(string))+
+ "(CIGAR strings must match ([0-9]+[MIDNSHPX=])+)")
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ pass
+ elif isinstance(obj, list):
+ for e in obj:
+ e = gfapy.Alignment(e, version = "gfa1")
+ e.validate()
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n".format(obj.__class__.__name__)+
+ "(accepted classes: list, AlignmentPlaceholder)")
+
+def unsafe_encode(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ return str(obj)
+ elif isinstance(obj, list):
+ return ",".join([str(gfapy.Alignment(cig)) for cig in obj])
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: list, AlignmentPlaceholder)")
+
+def encode(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ return str(obj)
+ if isinstance(obj, list):
+ def f(cig):
+ cig = gfapy.Alignment(cig)
+ cig.validate()
+ return str(cig)
+ return ",".join(map(f, obj))
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: list, AlignmentPlaceholder)")
diff --git a/gfapy/field/byte_array.py b/gfapy/field/byte_array.py
new file mode 100644
index 0000000..5e508e3
--- /dev/null
+++ b/gfapy/field/byte_array.py
@@ -0,0 +1,44 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return gfapy.ByteArray(string)
+
+def decode(string):
+ return gfapy.ByteArray(string)
+
+def validate_encoded(string):
+ if not re.match(r"^[0-9A-F]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid hex string\n".format(repr(string))+
+ "(it does not match the regular expression [0-9A-F]+)")
+
+def validate_decoded(byte_array):
+ return byte_array.validate()
+
+def unsafe_encode(obj):
+ if isinstance(obj, gfapy.ByteArray):
+ return str(obj)
+ if isinstance(obj, list):
+ return str(ByteArray(obj))
+ elif isinstance(obj, str):
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, list, gfapy.ByteArray)")
+
+def encode(obj):
+ if isinstance(obj, gfapy.ByteArray):
+ return str(obj)
+ elif isinstance(obj, list):
+ return str(ByteArray(obj))
+ elif isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, list, gfapy.ByteArray)")
diff --git a/gfapy/field/char.py b/gfapy/field/char.py
new file mode 100644
index 0000000..4bf6050
--- /dev/null
+++ b/gfapy/field/char.py
@@ -0,0 +1,28 @@
+import gfapy
+import re
+
+def decode(string):
+ validate_encoded(string)
+ return string
+
+unsafe_decode = decode
+
+def validate_encoded(string):
+ if not re.match(r"^[!-~]$", string):
+ raise gfapy.FormatError(
+ "{} is not a single printable character string".format(repr(string)))
+
+def validate_decoded(string):
+ return validate_encoded(string)
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if not isinstance(obj, str):
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str)")
+ validate_encoded(obj)
+ return obj
diff --git a/gfapy/field/comment.py b/gfapy/field/comment.py
new file mode 100644
index 0000000..fec235c
--- /dev/null
+++ b/gfapy/field/comment.py
@@ -0,0 +1,28 @@
+import gfapy
+
+def unsafe_decode(string):
+ return string
+
+def decode(string):
+ validate_encoded(string)
+ return string
+
+def validate_encoded(string):
+ if string.find("\n") != -1:
+ raise gfapy.FormatError("{} is not a single-line string"
+ .format(repr(string)))
+
+validate_decoded = validate_encoded
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(object.__class__.__name__)+
+ "(accepted classes: str)")
diff --git a/gfapy/field/custom_record_type.py b/gfapy/field/custom_record_type.py
new file mode 100644
index 0000000..40f7976
--- /dev/null
+++ b/gfapy/field/custom_record_type.py
@@ -0,0 +1,34 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return string
+
+def decode(string):
+ validate_encoded(string)
+ return string
+
+def validate_encoded(string):
+ if not re.match(r"^[!-~]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid custom record type\n".format(repr(string)) +
+ "(it contains spaces and/or non-printable characters)")
+ elif string in ["E", "G", "F", "O", "U", "H", "#", "S"]:
+ raise gfapy.FormatError(
+ "{} is not a valid custom record type\n".format(repr(string)) +
+ "(it is a predefined GFA2 record type)")
+
+validate_decoded = validate_encoded
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__) +
+ "(accepted classes: str)")
+ return obj
diff --git a/gfapy/field/field.py b/gfapy/field/field.py
new file mode 100644
index 0000000..04f3270
--- /dev/null
+++ b/gfapy/field/field.py
@@ -0,0 +1,203 @@
+import gfapy
+import builtins
+import re
+from .validator import Validator
+from .parser import Parser
+from .writer import Writer
+from . import alignment_gfa1 as field_alignment_gfa1
+from . import alignment_gfa2 as field_alignment_gfa2
+from . import alignment_list_gfa1 as field_alignment_list_gfa1
+from . import byte_array as field_byte_array
+from . import char as field_char
+from . import comment as field_comment
+from . import custom_record_type as field_custom_record_type
+from . import float as field_float
+from . import generic as field_generic
+from . import identifier_gfa2 as field_identifier_gfa2
+from . import oriented_identifier_gfa2 as field_oriented_identifier_gfa2
+from . import identifier_list_gfa2 as field_identifier_list_gfa2
+from . import integer as field_integer
+from . import json as field_json
+from . import numeric_array as field_numeric_array
+from . import optional_identifier_gfa2 as field_optional_identifier_gfa2
+from . import optional_integer as field_optional_integer
+from . import orientation as field_orientation
+from . import oriented_identifier_list_gfa1 as field_oriented_identifier_list_gfa1
+from . import oriented_identifier_list_gfa2 as field_oriented_identifier_list_gfa2
+from . import path_name_gfa1 as field_path_name_gfa1
+from . import position_gfa1 as field_position_gfa1
+from . import position_gfa2 as field_position_gfa2
+from . import segment_name_gfa1 as field_segment_name_gfa1
+from . import sequence_gfa1 as field_sequence_gfa1
+from . import sequence_gfa2 as field_sequence_gfa2
+from . import string as field_string
+
+class Field(Validator, Parser, Writer):
+ """
+ Support for the decoding, validation and encoding of data in GFA fields.
+
+ Classes are defined (and imported here) for each type of field (positional and
+ tags) defined in the GFA specifications. The field definition classes
+ implement the following methods:
+
+ * ``decode(str)``: decodes the content of a GFA field into a Python object
+ which represents the value; the content of the field is validated and
+ the returned object is guaranteed to be valid
+ * ``unsafe_decode(str)``: an optional method, which decodes the content
+ of the string faster than decode(), but does not perform validations
+ * ``validate_encoded(str)``: validates the content of a GFA field, when this
+ is in its string form; it can be called by the decode() method
+ * ``validate_decoded(obj)``: validates the content of a GFA field, when this
+ is in a non-string form; it can be called by the encode() method
+ * ``encode(obj)``: takes a non-string content of a GFA field and converts it
+ in its string representation according to the GFA specification; the
+ returned string is guaranteed to be valid
+ * ``unsafe_encode(obj)``: an optional method, which encodes the content
+ of a non-string field faster than encode(), but does not perform
+ validations
+
+ Notes:
+ The library user does not call these methods directly, as the interaction
+ is done using the interface of the `~gfapy.line.line.Line` class.
+ However, an user may define classes for custom datatypes, to be used with
+ custom record types.
+ """
+
+ _default_tag_datatypes = [
+ (builtins.int , "i"),
+ (builtins.float , "f"),
+ (builtins.dict , "J"),
+ (builtins.list , "J"),
+ (builtins.object , "Z")
+ ]
+ """Default tag datatype to be used if the value is of a built-in class.
+
+ For non build-in classes, the _default_gfa_tag_datatype() method of the
+ class is called instead.
+ """
+
+ GFA1_POSFIELD_DATATYPE = [
+ "alignment_gfa1",
+ "alignment_list_gfa1",
+ "oriented_identifier_list_gfa1",
+ "position_gfa1",
+ "segment_name_gfa1",
+ "sequence_gfa1",
+ "path_name_gfa1",
+ ]
+ """The names of the GFA1-specific datatypes for positional fields."""
+
+ GFA2_POSFIELD_DATATYPE = [
+ "alignment_gfa2",
+ "generic",
+ "identifier_gfa2",
+ "oriented_identifier_gfa2",
+ "identifier_list_gfa2",
+ "oriented_identifier_list_gfa2",
+ "optional_identifier_gfa2",
+ "position_gfa2",
+ "custom_record_type",
+ "sequence_gfa2",
+ "optional_integer",
+ ]
+ """The names of the GFA2-specific datatypes for positional fields."""
+
+ GFAX_POSFIELD_DATATYPE = [ "comment", "orientation" ]
+ """The names of the non version-specific datatypes for positional fields."""
+
+ POSFIELD_DATATYPE = GFA1_POSFIELD_DATATYPE + \
+ GFA2_POSFIELD_DATATYPE + \
+ GFAX_POSFIELD_DATATYPE
+ """The names of all datatypes for positional fields."""
+
+ TAG_DATATYPE = ["A", "i", "f", "Z", "J", "H", "B"]
+ """The names of all datatypes for tags."""
+
+ FIELD_DATATYPE = TAG_DATATYPE + POSFIELD_DATATYPE
+ """The names of all datatypes for positional fields and tags."""
+
+ FIELD_MODULE = {
+ "alignment_gfa1" : field_alignment_gfa1,
+ "alignment_gfa2" : field_alignment_gfa2,
+ "alignment_list_gfa1" : field_alignment_list_gfa1,
+ "byte_array" : field_byte_array,
+ "char" : field_char,
+ "comment" : field_comment,
+ "custom_record_type" : field_custom_record_type,
+ "float" : field_float,
+ "generic" : field_generic,
+ "identifier_gfa2" : field_identifier_gfa2,
+ "oriented_identifier_gfa2" : field_oriented_identifier_gfa2,
+ "identifier_list_gfa2" : field_identifier_list_gfa2,
+ "integer" : field_integer,
+ "json" : field_json,
+ "numeric_array" : field_numeric_array,
+ "optional_identifier_gfa2" : field_optional_identifier_gfa2,
+ "optional_integer" : field_optional_integer,
+ "orientation" : field_orientation,
+ "oriented_identifier_list_gfa1" : field_oriented_identifier_list_gfa1,
+ "oriented_identifier_list_gfa2" : field_oriented_identifier_list_gfa2,
+ "path_name_gfa1" : field_path_name_gfa1,
+ "position_gfa1" : field_position_gfa1,
+ "position_gfa2" : field_position_gfa2,
+ "segment_name_gfa1" : field_segment_name_gfa1,
+ "sequence_gfa1" : field_sequence_gfa1,
+ "sequence_gfa2" : field_sequence_gfa2,
+ "string" : field_string,
+ "H" : field_byte_array,
+ "A" : field_char,
+ "f" : field_float,
+ "i" : field_integer,
+ "J" : field_json,
+ "B" : field_numeric_array,
+ "Z" : field_string,
+ }
+ """Assignment of a class for the parsing, validation and encoding of data.
+
+ The dictionary contains keys for each GFA datatype; the value is a class name,
+ which provides the encoding, decoding and validation methods.
+
+ For simplicity of use, tag datatypes are present twice, once with a
+ one-letter symbol (such as i) and once with a longer labe; (such as integer).
+ """
+
+ # Returns the default GFA tag for the given object.
+ @staticmethod
+ def _get_default_gfa_tag_datatype(obj):
+ """Default GFA tag datatype for a given object
+
+ Parameters:
+ obj : an object of any Python class
+
+ Returns:
+ str : the identifier of a datatype (one of the keys of FIELD_MODULE)
+ to be used for a tag with obj as value, if a datatype has not
+ been specified by the user
+ """
+ if getattr(obj, "_default_gfa_tag_datatype",None):
+ return obj._default_gfa_tag_datatype()
+ else:
+ if isinstance(obj, list) and\
+ (all([isinstance(v, builtins.int) for v in obj]) or
+ all([isinstance(v, builtins.float) for v in obj])):
+ return "B"
+ for k,v in gfapy.Field._default_tag_datatypes:
+ if isinstance(obj, k):
+ return v
+ return "J"
+
+ @classmethod
+ def register_datatype(cls, name, klass):
+ """Register a custom-defined datatype class
+
+ Parameters:
+ name (str) : the identifier to be used for the datatype. This is
+ to be used in the field datatype declaration of extensions
+ definining custom records, which use this custom datatype
+ klass (class) : the class which provide the decode, encode,
+ validate_encoded and validate_decoded methods for handling
+ data of the custom datatype
+ """
+ cls.GFA2_POSFIELD_DATATYPE.append(name)
+ cls.FIELD_MODULE[name] = klass
+
diff --git a/gfapy/field/float.py b/gfapy/field/float.py
new file mode 100644
index 0000000..595f04c
--- /dev/null
+++ b/gfapy/field/float.py
@@ -0,0 +1,36 @@
+import gfapy
+import re
+
+def decode(string):
+ try:
+ return float(string)
+ except:
+ raise gfapy.FormatError
+
+unsafe_decode = decode
+
+def validate_decoded(integer):
+ pass
+ # always valid
+
+def validate_encoded(string):
+ if not re.match(r"^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$", string):
+ raise gfapy.FormatError(
+ "{} does not represent a valid float\n".format(repr(string)) +
+ "(it does not match [-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)")
+
+def unsafe_encode(obj):
+ return str(obj)
+
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ elif isinstance(obj, int) or isinstance(obj, float):
+ return str(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, int, float)")
diff --git a/gfapy/field/generic.py b/gfapy/field/generic.py
new file mode 100644
index 0000000..c005b85
--- /dev/null
+++ b/gfapy/field/generic.py
@@ -0,0 +1,29 @@
+import gfapy
+
+def unsafe_decode(string):
+ return string
+
+def decode(string):
+ validate_encoded(string)
+ return string
+
+def validate_encoded(string):
+ if string.find("\n") != -1 or string.find("\t") != -1:
+ raise gfapy.FormatError(
+ "{} is not a valid field content\n".format(repr(string)) +
+ "(it contains newlines and/or tabs)")
+
+validate_decoded = validate_encoded
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str)")
+ return obj
diff --git a/gfapy/field/identifier_gfa2.py b/gfapy/field/identifier_gfa2.py
new file mode 100644
index 0000000..c3df91e
--- /dev/null
+++ b/gfapy/field/identifier_gfa2.py
@@ -0,0 +1,42 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return string
+
+def decode(string):
+ validate_encoded(string)
+ return string
+
+def validate_encoded(string):
+ if not re.match("^[!-~]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid GFA2 identifier\n".format(repr(string))+
+ "(it contains spaces or non-printable characters)")
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.Line):
+ validate_encoded(obj.name)
+ elif isinstance(obj, str):
+ validate_encoded(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Line)")
+
+def unsafe_encode(obj):
+ if isinstance(obj, str):
+ return obj
+ if isinstance(obj, gfapy.Line):
+ return str(obj.name)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Line)")
+
+def encode(obj):
+ string = unsafe_encode(obj)
+ validate_encoded(string)
+ return string
diff --git a/gfapy/field/identifier_list_gfa2.py b/gfapy/field/identifier_list_gfa2.py
new file mode 100644
index 0000000..08b41d8
--- /dev/null
+++ b/gfapy/field/identifier_list_gfa2.py
@@ -0,0 +1,61 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return string.split(" ")
+
+def decode(string):
+ validate_encoded(string)
+ return unsafe_decode(string)
+
+def validate_encoded(string):
+ if not re.match("^[ !-~]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid list of GFA2 identifier\n".format(repr(string))+
+ "(it contains non-printable characters)")
+
+def validate_decoded(obj):
+ if isinstance(obj, list):
+ for elem in obj:
+ if isinstance(elem, gfapy.Line):
+ elem = str(elem.name)
+ elif not isinstance(elem, str):
+ raise gfapy.TypeError(
+ "the list contains an obj of class {}\n"
+ .format(elem.__class__.__name__)+
+ "(accepted classes: str, gfapy.Line)")
+ if not re.match("^[!-~]+$", elem):
+ raise gfapy.FormatError(
+ "the list contains an invalid GFA2 identifier ({})\n"
+ .format(repr(string))+
+ "(it contains spaces and/or non-printable characters)")
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__) +
+ "(accepted classes: list)")
+
+def unsafe_encode(obj):
+ if isinstance(obj, list):
+ def func(elem):
+ if isinstance(elem, str):
+ return elem
+ elif isinstance(elem, gfapy.Line):
+ return str(elem.name)
+ else:
+ raise gfapy.TypeError(
+ "the list contains an obj of class {}\n"
+ .format(elem.__class__.__name__)+
+ "(accepted classes: str, gfapy.Line)")
+ return " ".join(map(func, obj))
+ elif isinstance(obj, str):
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: list, str)")
+
+def encode(obj):
+ validate_decoded(obj)
+ return unsafe_encode(obj)
diff --git a/gfapy/field/integer.py b/gfapy/field/integer.py
new file mode 100644
index 0000000..adf65e0
--- /dev/null
+++ b/gfapy/field/integer.py
@@ -0,0 +1,40 @@
+import gfapy
+import re
+
+def decode(string):
+ try:
+ return int(string)
+ except:
+ raise gfapy.FormatError("the string does not represent a valid integer")
+
+unsafe_decode = decode
+
+def validate_decoded(obj):
+ if isinstance(obj, int):
+ pass
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, int)")
+
+def validate_encoded(string):
+ if not re.match("^[-+]?[0-9]+$", string):
+ raise gfapy.FormatError(
+ "{} does not represent a valid integer\n".format(repr(string)) +
+ "(it does not match the regular expression [-+]?[0-9]+)")
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ elif isinstance(obj, int):
+ return str(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, int)")
diff --git a/gfapy/field/json.py b/gfapy/field/json.py
new file mode 100644
index 0000000..422d3e4
--- /dev/null
+++ b/gfapy/field/json.py
@@ -0,0 +1,58 @@
+import gfapy
+import json
+import re
+
+def unsafe_decode(string):
+ return json.loads(string)
+
+def decode(string):
+ validate_all_printable(string)
+ return unsafe_decode(string)
+
+def validate_encoded(string):
+ # both regex and JSON parse are necessary,
+ # because string can be invalid JSON and
+ # JSON can contain forbidden chars (non-printable)
+ validate_all_printable(string)
+ try:
+ json.loads(string)
+ except Exception as err:
+ raise Exception(
+ "{} is not a valid JSON string\n".format(repr(string))+
+ "json.loads raised a {} exception\n".format(err.__class__.__name__)+
+ "error message: {}").format(str(err)) from err
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.FieldArray):
+ obj.validate()
+ elif isinstance(obj, list) or isinstance(obj, dict):
+ string = encode(obj)
+ validate_all_printable(string)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__) +
+ "(accepted classes: list, dict, gfapy.FieldArray)")
+
+def unsafe_encode(obj):
+ return json.dumps(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ elif isinstance(obj, list) or isinstance(obj, dict):
+ string = json.dumps(obj)
+ validate_all_printable(string)
+ return string
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__) +
+ "(accepted classes: list, dict, gfapy.FieldArray)")
+
+def validate_all_printable(string):
+ if not re.match("^[ !-~]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid JSON field\n".format(repr(string))+
+ "(it contains newlines, tabs and/or non-printable characters)")
diff --git a/gfapy/field/numeric_array.py b/gfapy/field/numeric_array.py
new file mode 100644
index 0000000..28e18ae
--- /dev/null
+++ b/gfapy/field/numeric_array.py
@@ -0,0 +1,45 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return gfapy.NumericArray.from_string(string, valid = True)
+
+def decode(string):
+ return gfapy.NumericArray.from_string(string)
+
+def validate_encoded(string):
+ if not re.match(r"^(f(,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+|[CSI](,\+?[0-9]+)+|[csi](,[-+]?[0-9]+)+)$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid numeric array string\n".format(repr(string))+
+ "(it must be one of [fcsiCSI] followed by a comma-separated list of:"+
+ " for f: floats; for csi: signed integers; for CSI: unsigned integers)")
+
+def validate_decoded(numeric_array):
+ numeric_array.validate()
+
+def unsafe_encode(obj):
+ if isinstance(obj, gfapy.NumericArray):
+ return str(obj)
+ elif isinstance(obj, list):
+ return str(gfapy.NumericArray(obj))
+ elif isinstance(obj, str):
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, line, gfapy.NumericArray)")
+
+def encode(obj):
+ if isinstance(obj, gfapy.NumericArray):
+ return str(obj)
+ elif isinstance(obj, list):
+ return str(gfapy.NumericArray(obj))
+ elif isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, line, gfapy.NumericArray)")
diff --git a/gfapy/field/optional_identifier_gfa2.py b/gfapy/field/optional_identifier_gfa2.py
new file mode 100644
index 0000000..8c9075b
--- /dev/null
+++ b/gfapy/field/optional_identifier_gfa2.py
@@ -0,0 +1,62 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ if string == "*":
+ return gfapy.Placeholder()
+ else:
+ return string
+
+def decode(string):
+ if string == "*":
+ return gfapy.Placeholder()
+ else:
+ validate_encoded(string)
+ return string
+
+def validate_encoded(string):
+ if not re.match("^[!-~]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid GFA2 optional identifier\n".format(repr(string))+
+ "(it contains spaces or non-printable characters)")
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ pass
+ elif isinstance(obj, gfapy.Line):
+ validate_encoded(obj.name)
+ elif isinstance(obj, String):
+ validate_encoded(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Line, gfapy.Placeholder)")
+
+def unsafe_encode(obj):
+ if isinstance(obj, str):
+ return obj
+ elif isinstance(gfapy.Placeholder):
+ return str(obj)
+ elif isinstance(obj, gfapy.Line):
+ return str(obj.name)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Line, gfapy.Placeholder)")
+
+def encode(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ return str(obj)
+ elif isinstance(obj, String):
+ obj = str(obj)
+ elif isinstance(obj, gfapy.Line):
+ obj = str(obj.name)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Line, gfapy.Placeholder)")
+ validate_encoded(obj)
+ return obj
diff --git a/gfapy/field/optional_integer.py b/gfapy/field/optional_integer.py
new file mode 100644
index 0000000..18f9918
--- /dev/null
+++ b/gfapy/field/optional_integer.py
@@ -0,0 +1,44 @@
+import gfapy
+import re
+
+def decode(string):
+ if string == "*":
+ return gfapy.Placeholder()
+ else:
+ try:
+ return int(string)
+ except:
+ raise gfapy.FormatError("the string does not represent a valid integer")
+
+unsafe_decode = decode
+
+def validate_decoded(obj):
+ if isinstance(obj, int) or isinstance(object, gfapy.Placeholder):
+ pass
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: int, gfapy.Placeholder)")
+
+def validate_encoded(string):
+ if not re.match(r"^(\*|[-+]?[0-9]+)$", string):
+ raise gfapy.FormatError(
+ "{} does not represent a valid optional integer value\n"
+ .format(repr(string))+
+ "(it is not * and does not match the regular expression [-+]?[0-9]+)")
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ elif isinstance(obj, int) or isinstance(obj, gfapy.Placeholder):
+ return str(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: int, str, gfapy.Placeholder)")
diff --git a/gfapy/field/orientation.py b/gfapy/field/orientation.py
new file mode 100644
index 0000000..0d3b982
--- /dev/null
+++ b/gfapy/field/orientation.py
@@ -0,0 +1,36 @@
+import gfapy
+
+def unsafe_decode(string):
+ return string
+
+def decode(string):
+ validate_decoded(string)
+ return string
+
+def validate_decoded(string):
+ if string != "+" and string != "-":
+ raise gfapy.FormatError(
+ "{} is not a valid orientation\n".format(repr(string))+
+ "(it must be + or -)")
+ return string
+
+#identical to validate_decoded, because python version uses strings for symbols
+def validate_encoded(string):
+ if string != "+" and string != "-":
+ raise gfapy.FormatError(
+ "{} is not a valid orientation\n".format(repr(string))+
+ "(it must be + or -)")
+ return string
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str)")
diff --git a/gfapy/field/oriented_identifier_gfa2.py b/gfapy/field/oriented_identifier_gfa2.py
new file mode 100644
index 0000000..574b54e
--- /dev/null
+++ b/gfapy/field/oriented_identifier_gfa2.py
@@ -0,0 +1,46 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return gfapy.OrientedLine(string[:-1], string[-1])
+
+def decode(string):
+ obj = unsafe_decode(string)
+ validate_decoded(obj)
+ return obj
+
+def validate_encoded(string):
+ if not re.match("^[!-~]+[+-]$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid oriented GFA2 identifier\n".format(repr(string))+
+ "(it contains spaces or non-printable characters, or a wrong orientation)")
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.OrientedLine):
+ if not re.match("^[!-~]+$", obj.name):
+ raise gfapy.FormatError(
+ "{} is not a valid oriented GFA2 identifier\n".format(repr(obj.name)))
+ if obj.orient != "+" and obj.orient != "-":
+ raise gfapy.FormatError(
+ "{} is not a valid orientation\n".format(repr(obj.orient)))
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: gfapy.OrientedLine)")
+
+def unsafe_encode(obj):
+ if isinstance(obj, str):
+ return obj
+ if isinstance(obj, gfapy.OrientedLine):
+ return str(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.OrientedLine)")
+
+def encode(obj):
+ string = unsafe_encode(obj)
+ validate_encoded(string)
+ return string
diff --git a/gfapy/field/oriented_identifier_list_gfa1.py b/gfapy/field/oriented_identifier_list_gfa1.py
new file mode 100644
index 0000000..5d5e0d4
--- /dev/null
+++ b/gfapy/field/oriented_identifier_list_gfa1.py
@@ -0,0 +1,53 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return [ gfapy.OrientedLine(str(l[0:-1]), str(l[-1]))
+ for l in string.split(",")]
+
+def decode(string):
+ validate_encoded(string)
+ return unsafe_decode(string)
+
+def validate_encoded(string):
+ if not re.match(r"^[!-)+-<>-~][!-~]*[+-](,[!-)+-<>-~][!-~]*[+-])+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid list of GFA1 segment names ".format(repr(string))+
+ "and orientations\n"+
+ "(the segment names must match [!-)+-<>-~][!-~]*;\n"+
+ " the orientations must be + or -;\n"+
+ " the list must be comma-separated "+
+ "NameOrient,NameOrient[,NameOrient...])")
+
+def validate_decoded(iterable):
+ for elem in iterable:
+ elem = gfapy.OrientedLine(elem)
+ elem.validate()
+ if not re.match(r"^[!-)+-<>-~][!-~]*$", elem.name):
+ raise gfapy.FormatError(
+ "#{elem.name} is not a valid GFA1 segment name\n".format(elem.name)+
+ "(it does not match [!-)+-<>-~][!-~]*)")
+
+def unsafe_encode(obj):
+ if isinstance(obj, str):
+ return obj
+ elif isinstance(obj, list):
+ return ",".join([str(gfapy.OrientedLine(os)) for os in obj])
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, list)")
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ elif isinstance(obj, list):
+ validate_decoded(obj)
+ return ",".join([str(gfapy.OrientedLine(os)) for os in obj])
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, list)")
diff --git a/gfapy/field/oriented_identifier_list_gfa2.py b/gfapy/field/oriented_identifier_list_gfa2.py
new file mode 100644
index 0000000..1d7730a
--- /dev/null
+++ b/gfapy/field/oriented_identifier_list_gfa2.py
@@ -0,0 +1,53 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return [ gfapy.OrientedLine(str(l[0:-1]), str(l[-1]))
+ for l in string.split(" ")]
+
+def decode(string):
+ validate_encoded(string)
+ return unsafe_decode(string)
+
+def validate_encoded(string):
+ if not re.match(r"^[!-~]+[+-]( [!-~]+[+-])*$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid list of GFA2 segment names ".format(repr(string))+
+ "and orientations")
+
+def validate_decoded(iterable):
+ for elem in iterable:
+ if not isinstance(elem, gfapy.OrientedLine):
+ raise gfapy.TypeError(
+ "the list contains an object of class {}\n".format(type(elem))+
+ "(accepted classes: gfapy.OrientedLine)")
+ elem.validate()
+ if not re.match(r"^[!-~]+$", elem.name):
+ raise gfapy.FormatError(
+ "the list contains an invalid GFA2 identifier\n".format(elem.name)+
+ "(it contains spaces and/or non-printable characters)")
+ if not elem.orient in ["+", "-"]:
+ raise gfapy.FormatError(
+ "{} is not a valid orientation".format(elem.orient))
+
+def unsafe_encode(obj):
+ if isinstance(obj, str):
+ return obj
+ elif isinstance(obj, list):
+ retval = []
+ for elem in obj:
+ if not isinstance(elem, gfapy.OrientedLine):
+ raise gfapy.TypeError(
+ "the list contains an object of class {}\n".format(type(elem))+
+ "(accepted classes: gfapy.OrientedLine)")
+ retval.append(str(elem))
+ return " ".join(retval)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, list)")
+
+def encode(obj):
+ validate_decoded(obj)
+ return unsafe_encode(obj)
diff --git a/gfapy/field/parser.py b/gfapy/field/parser.py
new file mode 100644
index 0000000..75719d6
--- /dev/null
+++ b/gfapy/field/parser.py
@@ -0,0 +1,100 @@
+"""
+Decoding of the GFA string representations into python objects
+"""
+import gfapy
+import re
+
+class Parser:
+
+ @staticmethod
+ def _parse_gfa_field(string, datatype, safe = True, fieldname = None,
+ line = None):
+ """
+ Parse a GFA string representation and decodes it into a python object
+
+ Parameters
+ ----------
+ string : str
+ the GFA string to parse
+ datatype : one of gfapy.Field.FIELD_DATATYPE
+ the datatype to use
+ safe : bool, optional
+ *(defaults to: ***True***)* if **True** the safe
+ version of the decode function for the datatype is used, which
+ validates the content of the string; if **False**, the string is
+ assumed to be valid and decoded into a value accordingly, which may
+ result in invalid values (but may be faster than the safe decoding)
+ fieldname : str, optional
+ fieldname for error messages
+ line : gfapy.Line, optional
+ line content for error messages
+
+ Raises
+ ------
+ gfapy.TypeError
+ if the specified datatype is unknown
+ gfapy.FormatError
+ if the string syntax is not valid
+ gfapy.ValueError
+ if the decoded value is not valid
+ """
+ mod = gfapy.Field.FIELD_MODULE.get(datatype)
+ if mod is None:
+ try:
+ linemsg = ("Line content: " + str(line) + "\n") if line is not None else ""
+ except:
+ linemsg = ""
+ fieldnamemsg = "Field: {}\n".format(fieldname) if fieldname else ""
+ contentmsg = "Content: {}\n".format(string)
+ raise gfapy.TypeError(
+ linemsg +
+ fieldnamemsg +
+ contentmsg +
+ "Datatype unknown: {}".format(repr(datatype)))
+ try:
+ if safe or not getattr(mod, "unsafe_decode"):
+ return mod.decode(string)
+ else:
+ return mod.unsafe_decode(string)
+ except Exception as err:
+ try:
+ linemsg = ("Line content: " + str(line) + "\n") if line is not None else ""
+ except:
+ linemsg = ""
+ fieldnamemsg = "Field: {}\n".format(fieldname) if fieldname else ""
+ contentmsg = "Content: {}\n".format(string)
+ datatypemsg = "Datatype: {}\n".format(datatype)
+ raise err.__class__(
+ linemsg +
+ fieldnamemsg +
+ datatypemsg +
+ contentmsg +
+ (err.message if hasattr(err, "message") else str(err))) from err
+
+ @staticmethod
+ def _parse_gfa_tag(tag):
+ """
+ Parses a GFA tag in the form **xx:d:content** into its components.
+ The **content** is not decoded (see :func:`_parse_gfa_field`).
+
+ Parameters
+ ----------
+ tag : str
+ the GFA tag to parse
+
+ Raises
+ ------
+ gfapy.FormatError
+ if the string does not represent a valid GFA tag
+
+ Returns
+ -------
+ list of (str, gfapy.Field.FIELD_DATATYPE)
+ the parsed content of the field
+ """
+ match = re.match(r"^([A-Za-z][A-Za-z0-9]):([AifZJHB]):(.+)$", tag)
+ if match:
+ return [match.group(1), match.group(2), match.group(3)]
+ else:
+ raise gfapy.FormatError(
+ "Expected GFA tag, found: {}".format(repr(tag)))
diff --git a/gfapy/field/path_name_gfa1.py b/gfapy/field/path_name_gfa1.py
new file mode 100644
index 0000000..5b344a0
--- /dev/null
+++ b/gfapy/field/path_name_gfa1.py
@@ -0,0 +1,38 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return string
+
+def decode(string):
+ validate_encoded(string)
+ return string
+
+def validate_encoded(string):
+ if not re.match("^[!-)+-<>-~][!-~]*$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid GFA1 path name\n".format(repr(string)) +
+ "(it does not match the regular expression [!-)+-<>-~][!-~]*")
+
+def validate_decoded(obj):
+ if isinstance(obj, str):
+ validate_encoded(object)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str)")
+
+def unsafe_encode(obj):
+ if isinstance(obj, str):
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str)")
+
+def encode(obj):
+ string = unsafe_encode(obj)
+ validate_encoded(string)
+ return string
diff --git a/gfapy/field/position_gfa1.py b/gfapy/field/position_gfa1.py
new file mode 100644
index 0000000..4982383
--- /dev/null
+++ b/gfapy/field/position_gfa1.py
@@ -0,0 +1,40 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ try:
+ return int(string)
+ except:
+ raise gfapy.FormatError(
+ "{} does not represent a valid integer".format(repr(string)))
+
+def decode(string):
+ value = unsafe_decode(string)
+ validate_decoded(value)
+ return value
+
+def validate_decoded(integer):
+ if integer < 0:
+ raise gfapy.ValueError(
+ "{} is not a positive integer".format(integer))
+
+def validate_encoded(string):
+ if not re.match(r"^[0-9]+$", string):
+ raise gfapy.FormatError(
+ "{} does not represent a valid unsigned integer".format(repr(string)))
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ elif isinstance(obj, int):
+ validate_decoded(obj)
+ return str(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, int)")
diff --git a/gfapy/field/position_gfa2.py b/gfapy/field/position_gfa2.py
new file mode 100644
index 0000000..1907c6f
--- /dev/null
+++ b/gfapy/field/position_gfa2.py
@@ -0,0 +1,42 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return gfapy.LastPos(string)
+
+def decode(string):
+ position = unsafe_decode(string)
+ value = gfapy.posvalue(position)
+ if value < 0:
+ raise gfapy.ValueError(
+ "{} is not a positive integer".format(value))
+ return position
+
+def validate_decoded(obj):
+ if isinstance(obj, int):
+ if obj < 0:
+ raise gfapy.ValueError(
+ "{} is not a positive integer".format(obj))
+ elif isinstance(obj, gfapy.LastPos):
+ obj.validate()
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: int, gfapy.LastPos)")
+
+def validate_encoded(string):
+ if not re.match(r"^[0-9]+\$?$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid GFA2 position\n".format(repr(string))+
+ "(it must be an unsigned integer eventually followed by a $)")
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, str):
+ validate_encoded(obj)
+ else:
+ validate_decoded(obj)
+ return str(obj)
diff --git a/gfapy/field/segment_name_gfa1.py b/gfapy/field/segment_name_gfa1.py
new file mode 100644
index 0000000..a2b18b4
--- /dev/null
+++ b/gfapy/field/segment_name_gfa1.py
@@ -0,0 +1,48 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ return string
+
+
+def decode(string):
+ validate_encoded(string)
+ return string
+
+
+def validate_encoded(string):
+ if not re.match(r"^[!-)+-<>-~][!-~]*$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid GFA1 segment name\n".format(repr(string))+
+ "(it does not match the regular expression [!-)+-<>-~][!-~]*")
+ elif re.search(r"[+-],", string):
+ raise gfapy.FormatError(
+ "{} is not a valid GFA1 segment name\n".format(repr(string))+
+ "(it contains + or - followed by ,)")
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.line.segment.GFA1):
+ validate_encoded(obj.name)
+ elif isinstance(obj, str):
+ validate_encoded(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.line.segment.GFA1)")
+
+def unsafe_encode(obj):
+ if isinstance(obj, str):
+ return obj
+ elif isinstance(obj, gfapy.line.segment.GFA1):
+ return obj.name
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.line.segment.GFA1)")
+
+def encode(obj):
+ string = unsafe_encode(obj)
+ validate_encoded(string)
+ return string
diff --git a/gfapy/field/sequence_gfa1.py b/gfapy/field/sequence_gfa1.py
new file mode 100644
index 0000000..949431b
--- /dev/null
+++ b/gfapy/field/sequence_gfa1.py
@@ -0,0 +1,45 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ if string == "*":
+ return gfapy.Placeholder()
+ else:
+ return string
+
+def decode(string):
+ obj = unsafe_decode(string)
+ validate_decoded(obj)
+ return obj
+
+def validate_encoded(string):
+ if not re.match(r"^\*$|^[A-Za-z=.]+$", string):
+ raise gfapy.FormatError(
+ "the string {} is not a valid GFA1 sequence\n".format(repr(string))+
+ "(it is not * and does not match the regular expression [A-Za-z=.]+")
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ pass
+ elif isinstance(obj, str):
+ validate_encoded(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Placeholder)")
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ return str(obj)
+ elif isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Placeholder)")
diff --git a/gfapy/field/sequence_gfa2.py b/gfapy/field/sequence_gfa2.py
new file mode 100644
index 0000000..42499cb
--- /dev/null
+++ b/gfapy/field/sequence_gfa2.py
@@ -0,0 +1,45 @@
+import gfapy
+import re
+
+def unsafe_decode(string):
+ if string == "*":
+ return gfapy.Placeholder()
+ else:
+ return string
+
+def decode(string):
+ obj = unsafe_decode(string)
+ validate_decoded(obj)
+ return obj
+
+def validate_encoded(string):
+ if not re.match(r"^[!-~]+$", string):
+ raise gfapy.FormatError(
+ "the string {} is not a valid GFA2 sequence\n".format(repr(string))+
+ "(it contains spaces and/or non-printable characters)")
+
+def validate_decoded(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ pass
+ elif isinstance(obj, str):
+ validate_encoded(obj)
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Placeholder)")
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if isinstance(obj, gfapy.Placeholder):
+ return str(obj)
+ elif isinstance(obj, str):
+ validate_encoded(obj)
+ return obj
+ else:
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str, gfapy.Placeholder)")
diff --git a/gfapy/field/string.py b/gfapy/field/string.py
new file mode 100644
index 0000000..152f22c
--- /dev/null
+++ b/gfapy/field/string.py
@@ -0,0 +1,29 @@
+import gfapy
+import re
+
+def decode(string):
+ validate_encoded(string)
+ return string
+
+def unsafe_decode(string):
+ return string
+
+def validate_encoded(string):
+ if not re.match("^[ !-~]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid string field\n".format(repr(string))+
+ "(it contains newlines/tabs and/or non-printable characters)")
+
+validate_decoded = validate_encoded
+
+def unsafe_encode(obj):
+ return str(obj)
+
+def encode(obj):
+ if not isinstance(obj, str):
+ raise gfapy.TypeError(
+ "the class {} is incompatible with the datatype\n"
+ .format(obj.__class__.__name__)+
+ "(accepted classes: str)")
+ validate_encoded(obj)
+ return obj
diff --git a/gfapy/field/validator.py b/gfapy/field/validator.py
new file mode 100644
index 0000000..4a8204f
--- /dev/null
+++ b/gfapy/field/validator.py
@@ -0,0 +1,81 @@
+import gfapy
+
+class Validator:
+
+ @staticmethod
+ def _validate_gfa_field(obj, datatype, fieldname = None):
+ """Validate the content of a field of a Line instance.
+
+ Parameters:
+ obj: the value to be validated. It can be either a string (in which case
+ the encoded validation method is used) or any other kind of Python
+ object (in which case the decoded validation method is used).
+ datatype (str) : the name of the datatype to be used for the validation.
+ The datatype name is used for the lookup in the FIELD_MODULE dictiorary
+ and the validation method of the returned class is used.
+ fieldname (str) : optional, for error messages
+
+ Raises:
+ gfapy.error.FormatError : if the format of the string representation is
+ invalid; or the object contains strings with an invalid format
+ gfapy.error.ValueError : if the value of the decoded field is invalid
+ gfapy.error.TypeError : if the specified datatype is not defined or
+ if the type of the decoded field is invalid
+ gfapy.error.VersionError : if the value is invalid for the GFA version
+ for which the datatype is specified
+ """
+ if isinstance(obj, str):
+ Validator.__validate_encoded_gfa_field( obj, datatype, fieldname)
+ else:
+ Validator.__validate_decoded_gfa_field( obj, datatype, fieldname)
+
+ @staticmethod
+ def __validate_decoded_gfa_field(obj, datatype, fieldname = None):
+ """Validate a non-string field content.
+
+ Parameters:
+ obj : the field content to validate
+ datatype (str) : the datatype identifier
+ fieldname (str) : for error messages
+
+ Raises:
+ gfapy.error.TypeError: if the specified datatype is invalid or the
+ object is of a class which is not compatible with the datatype
+ gfapy.error.FormatError: if the format of a string in the object
+ is not compatible with the datatype; or if the object encoded into
+ a GFA string is incompatible with the specification
+ gfapy.error.VersionError: if the object value is invalid
+ for the specific GFA version for which this datatype is used
+ gfapy.error.ValueError: if the value of the object is invalid
+ """
+ if isinstance(obj, gfapy.FieldArray):
+ return obj._validate_gfa_field(datatype, fieldname=fieldname)
+ mod = gfapy.Field.FIELD_MODULE.get(datatype)
+ if not mod:
+ raise gfapy.TypeError(
+ "Datatype unknown: {}".format(repr(datatype)))
+ return mod.validate_decoded(obj)
+
+ @staticmethod
+ def __validate_encoded_gfa_field(obj, datatype, fieldname = None):
+ """Validate a string field content.
+
+ Parameters:
+ obj (str): the field content to validate
+ datatype (str) : the datatype identifier
+ fieldname (str) : for error messages
+
+ Raises:
+ gfapy.error.TypeError: if the specified datatype is invalid
+ gfapy.error.FormatError: if the format of the string is invalid
+ for the specified datatype
+ gfapy.error.VersionError: if the format of the string is invalid
+ for the specific GFA version for which this datatype is used
+ gfapy.error.ValueError: if the format of the string is valid,
+ but the value encoded by the string is invalid
+ """
+ mod = gfapy.Field.FIELD_MODULE.get(datatype)
+ if not mod:
+ raise gfapy.TypeError(
+ "Datatype unknown: {}".format(repr(datatype)))
+ return mod.validate_encoded(obj)
diff --git a/gfapy/field/writer.py b/gfapy/field/writer.py
new file mode 100644
index 0000000..bf2c047
--- /dev/null
+++ b/gfapy/field/writer.py
@@ -0,0 +1,94 @@
+"""
+Encoding of python objects to GFA string representation
+"""
+import gfapy
+
+class Writer:
+
+ @staticmethod
+ def _to_gfa_field(obj, datatype = None, safe = True, fieldname = None,
+ line = None):
+ """Encode an object into its GFA string representation.
+
+ The python object can be either an encoded GFA field (in which case it
+ is already a string, thus it is only at most, depending on the other
+ parameters, validated), or an object of a class compatible
+ with the specified datatype, if a datatype is specified (see **datatype**),
+ e.g. Integer # for i fields.
+
+ Parameters
+ ----------
+ obj : object
+ the python object to encode
+ datatype : str
+ datatype to use (one of `~gfapy.field.field.Field.FIELD_DATATYPE`);
+ If none is specified, the datatype is used, which is returned by the
+ `gfapy.field.Field._get_default_gfa_tag_datatype` method.
+ fieldname : str, optional
+ fieldname, for error messages
+ line : gfapy.Line, optional
+ line, for error messages
+ safe : bool, optional
+ *(defaults to: ***True***)* if **True**, the safe
+ version of the encode function is used, which guarantees that the
+ resulting data is valid; if **False**, the unsafe version is used,
+ which, for some datatypes, skips validations in order to be faster
+ than the safe version
+
+ Raises
+ ------
+ gfapy.TypeError
+ if an unknown datatype is specified, or the object type is not
+ compatible with the datatype
+ gfapy.ValueError
+ if the object value is invalid for the datatype
+ gfapy.FormatError
+ if the object syntax is invalid for the
+ datatype (eg for invalid encoded strings, if **safe** is set)
+ """
+ if not datatype:
+ datatype = gfapy.Field._get_default_gfa_tag_datatype(obj)
+ mod = gfapy.Field.FIELD_MODULE.get(datatype)
+ if not mod:
+ fieldnamemsg = "Field: {}\n".format(fieldname) if fieldname else ""
+ contentmsg = "Content: {}\n".format(repr(obj))
+ raise gfapy.TypeError(
+ fieldnamemsg +
+ contentmsg +
+ "Datatype unknown: {}".format(repr(datatype)))
+ try:
+ if safe or not getattr(mod, "unsafe_encode"):
+ return mod.encode(obj)
+ else:
+ return mod.unsafe_encode(obj)
+ except Exception as err:
+ fieldnamemsg = "Field: {}\n".format(fieldname) if fieldname else ""
+ contentmsg = "Content: {}\n".format(repr(obj))
+ datatypemsg = "Datatype: {}\n".format(datatype)
+ raise err.__class__(
+ fieldnamemsg +
+ datatypemsg +
+ contentmsg +
+ str(err)) from err
+
+ @staticmethod
+ def _to_gfa_tag(obj, fieldname, datatype = None, line = None):
+ """Representation of the data as a GFA tag.
+
+ The representation is ``xx:d:content``, where ``xx`` is
+ the tag name and ``d`` is the datatype.
+
+ Parameters:
+ obj (object): the python object to encode
+ fieldname (string): the tag name
+ datatype (string): (one of gfapy.Field.TAG_DATATYPE)
+ the datatype; if not specified, the value returned by
+ :func:``~gfapy.field.Field._get_default_gfa_tag_datatype``
+ is used.
+ line (string): the line content, for error messages
+ """
+ if not datatype:
+ datatype = gfapy.Field._get_default_gfa_tag_datatype(obj)
+ return "{}:{}:{}".format(fieldname, datatype,
+ Writer._to_gfa_field(obj, datatype = datatype,
+ fieldname = fieldname, line = line))
diff --git a/gfapy/field_array.py b/gfapy/field_array.py
new file mode 100644
index 0000000..c8a1adb
--- /dev/null
+++ b/gfapy/field_array.py
@@ -0,0 +1,134 @@
+import gfapy
+
+class FieldArray:
+ """Multiple values of the same tag in different header lines.
+
+ Parameters:
+ datatype (str): (one of `gfapy.field.Field.TAG_DATATYPE`) the datatype of
+ the tags represented by the array.
+ data (list, None): a list of values. The values must be compatible with the
+ specified datatype. If no list is provided, the instance is initialized
+ with an empty list.
+ """
+
+ def __init__(self, datatype, data = None):
+ if data is None:
+ self._data = []
+ else:
+ self._data = data
+ self._datatype = datatype
+
+ @property
+ def datatype(self):
+ """Datatype of the tags represented by the list.
+
+ Returns:
+ str : one of `gfapy.field.Field.TAG_DATATYPE`.
+ """
+ return self._datatype
+
+ def validate(self, fieldname : str = None) -> None:
+ """Datatype-specific validation on each element of the list.
+
+ Parameters:
+ fieldname (str) : optional, for error messages.
+ """
+ self._validate_gfa_field(None, fieldname)
+
+ def __str__(self):
+ return self._to_gfa_field(self)
+
+ def __repr__(self):
+ return "gfapy.FieldArray({},{})".format(
+ repr(self._datatype),repr(self._data))
+
+ def __eq__(self, other):
+ if isinstance(other, list):
+ return other == self._data
+ elif isinstance(other, gfapy.FieldArray):
+ return other.datatype == self._datatype and \
+ other.data == self._data
+ else:
+ return False
+
+ def __add__(self, other):
+ if isinstance(other, list):
+ self._data += other
+ elif isinstance(other, gfapy.FieldArray):
+ self._data += other._data
+
+ def __iter__(self):
+ return self._data.__iter__()
+
+ def __getattr__(self, name):
+ return getattr(self._data, name)
+
+ def _validate_gfa_field(self, datatype : str, fieldname = None):
+ """Datatype-specific validation.
+
+ If no datatype is provided as parameter, the datatype of the
+ array is used.
+ """
+ if not datatype:
+ datatype = self._datatype
+ for elem in self._data:
+ gfapy.Field._validate_gfa_field(elem, datatype, fieldname)
+
+ def _default_gfa_tag_datatype(self):
+ """
+ Default GFA tag datatype.
+
+ Returns
+ -------
+ gfapy.Field::TAG_DATATYPE
+ """
+ return self.datatype
+
+ def _to_gfa_field(self, datatype = None, fieldname = None):
+ """Representation as tab-separated values (w/o XX:Y: prefixes)."""
+ if datatype is None:
+ datatype = self._datatype
+ return "\t".join(
+ [ gfapy.Field._to_gfa_field(x, datatype = self._datatype, \
+ fieldname = fieldname) for x in self._data ])
+
+ def _to_gfa_tag(self, fieldname, datatype = None):
+ """Representation as tab-separated tags (XX:Y:VALUE)."""
+ if datatype is None:
+ datatype = self.datatype
+ return "\t".join(
+ [ gfapy.Field._to_gfa_tag(x, fieldname, datatype) \
+ for x in self._data ])
+
+ def _vpush(self, value, datatype=None, fieldname=None):
+ """Add a value to the array and validate.
+
+ Raises
+ ------
+ gfapy.InconsistencyError
+ If the type of the new value does not correspond to the type of
+ existing values.
+
+ Parameters
+ ----------
+ value : Object
+ The value to add.
+ datatype : gfapy.Field.TAG_DATATYPE or None
+ The datatype to use.
+ If not **None**, it will be checked that the specified datatype is the
+ same as for previous elements of the field array.
+ If **None**, the value will be validated, according to the datatype
+ specified on field array creation.
+ fieldname : str
+ The field name to use for error messages.
+ """
+ if datatype is None:
+ gfapy.Field._validate_gfa_field(value, self.datatype, fieldname)
+ elif datatype != self.datatype:
+ raise gfapy.InconsistencyError(
+ "Datadatatype mismatch error for field {}:\n".format(fieldname)+
+ "value: {}\n".format(value)+
+ "existing datatype: {};\n".format(self.datatype)+
+ "new datatype: {}".format(datatype))
+ self._data.append(value)
+
diff --git a/gfapy/gfa.py b/gfapy/gfa.py
new file mode 100644
index 0000000..cb38868
--- /dev/null
+++ b/gfapy/gfa.py
@@ -0,0 +1,315 @@
+import gfapy
+from .lines import Lines
+from .graph_operations import GraphOperations
+from collections import defaultdict
+import sys
+
+class Gfa(Lines,GraphOperations):
+ """Representation of the data in a GFA file.
+
+ Parameters:
+ data (str or list): optional, string content of a GFA file, or
+ the same as a list (splitted on newlines); default: create an empty Gfa
+ instance
+ vlevel (int): validation level (default: 1)
+ version (str): GFA version ('gfa1' or 'gfa2';
+ default: automatic recognition)
+
+ Raises:
+ ~gfapy.error.ArgumentError: if the vlevel or version are invalid
+ ~gfapy.error.FormatError: if data is provided, which is invalid
+ ~gfapy.error.VersionError: if an unknown version is specified, or data is
+ provided, which is not compatible with the specified version
+ """
+
+ def __init__(self, *args, vlevel = 1, version = None):
+ if not isinstance(vlevel, int):
+ raise gfapy.ArgumentError("vlevel is not an integer ({})".format(vlevel))
+ if vlevel < 0:
+ raise gfapy.ArgumentError(
+ "vlevel is not a positive integer ({})".format(vlevel))
+ if not version in ['gfa1', 'gfa2', None]:
+ raise gfapy.VersionError("GFA version unknown ({})".format(version))
+ self._vlevel = vlevel
+ self._max_int_name = 0
+ self._records = defaultdict(dict)
+ self._records["H"] = gfapy.line.Header(["H"], vlevel = vlevel)
+ self._records["H"].connect(self)
+ self._records["S"] = {}
+ self._records["P"] = {}
+ self._records["F"] = {}
+ self._records["\n"] = {}
+ self._records["E"] = {}
+ self._records["U"] = {}
+ self._records["G"] = {}
+ self._records["O"] = {}
+ self._records["C"] = {}
+ self._records["L"] = {}
+ self._records["#"] = {}
+ self._segments_first_order = False
+ self._progress = None
+ self._default = {"count_tag": "RC", "unit_length": 1}
+ self._line_queue = []
+ if version is None:
+ self._version = None
+ self._version_explanation = None
+ self._version_guess = "gfa2"
+ else:
+ self._version = version
+ self._version_explanation = "set during initialization"
+ self._version_guess = version
+ self._validate_version()
+ if len(args) == 1:
+ lst = None
+ if isinstance(args[0], str):
+ lst = args[0].split("\n")
+ elif isinstance(args[0], list):
+ lst = args[0]
+ else:
+ raise gfapy.ArgumentError("Cannot create a Gfa"+
+ " instance from an object of type {}".format(type(args[0])))
+ for line in lst:
+ self.add_line(line)
+ self.process_line_queue()
+ if vlevel >= 1:
+ self.validate()
+ elif len(args) > 1:
+ raise gfapy.ArgumentError("Wrong number of arguments for Gfa()"+
+ "({})".format(len(args)))
+
+ @property
+ def version(self):
+ """GFA version ('gfa1' or 'gfa2')"""
+ return self._version
+
+ @version.setter
+ def version(self,value):
+ self._vlevel=value
+
+ @property
+ def vlevel(self):
+ """Level of validation"""
+ return self._vlevel
+
+ @vlevel.setter
+ def vlevel(self,value):
+ self._vlevel=value
+
+ def validate(self):
+ """Validate the GFA instance
+
+ Checks if all references are solved correctly.
+ """
+ self.__validate_segment_references()
+ self.__validate_path_links()
+ self.__validate_group_items()
+ self.__validate_gfa2_positions()
+
+ def __str__(self):
+ return "\n".join([str(line) for line in self.lines])
+
+ def to_gfa1_s(self):
+ """Create a GFA1 string representation for the GFA data
+
+ If the Gfa has version 'gfa1', its string representation is
+ returned. Otherwise a conversion from GFA2 is performed.
+ """
+ if self.version == "gfa1":
+ return str(self)
+ else:
+ lines = []
+ for line in self.lines:
+ converted = line.to_gfa1_s()
+ if converted:
+ lines.append(converted)
+ return "\n".join(lines)
+
+ def to_gfa1(self):
+ """Create a GFA1 Gfa instance for the GFA data
+
+ If the Gfa has version 'gfa1', it is
+ returned. Otherwise a conversion from GFA2 is performed.
+ """
+ if self.version == "gfa1":
+ return self
+ else:
+ gfa1 = gfapy.Gfa(version="gfa1", vlevel=self.vlevel)
+ for line in self.lines:
+ gfa1.add_line(line.to_gfa1(raise_on_failure=False))
+ return gfa1
+
+ def to_gfa2_s(self):
+ """Create a GFA2 string representation for the GFA data
+
+ If the Gfa has version 'gfa2', its string representation is
+ returned. Otherwise a conversion from GFA1 is performed.
+ """
+ if self.version == "gfa2":
+ return str(self)
+ else:
+ lines = []
+ for line in self.lines:
+ converted = line.to_gfa2_s()
+ if converted:
+ lines.append(converted)
+ return "\n".join(lines)
+
+ def to_gfa2(self):
+ """Create a GFA2 Gfa instance for the GFA data.
+
+ If the Gfa has version 'gfa2', it is
+ returned. Otherwise a conversion from GFA1 is performed.
+ """
+ if self.version == "gfa2":
+ return self
+ else:
+ gfa2 = gfapy.Gfa(version="gfa2", vlevel=self.vlevel)
+ for line in self.lines:
+ gfa2.add_line(line.to_gfa2(raise_on_failure=False))
+ return gfa2
+
+ # TODO: implement clone (see how clone for lines was implemented)
+
+ def read_file(self, filename):
+ """Read GFA data from a file and load it into the Gfa instance.
+
+ Parameters:
+ filename (str)
+ """
+ if self._progress:
+ linecount = 0
+ with open(filename) as f:
+ for line in f:
+ linecount += 1
+ # TODO: better implementation of linecount
+ self._progress_log_init("read_file", "lines", linecount,
+ "Parsing file {}".format(filename)+
+ " containing {} lines".format(linecount))
+ with open(filename) as f:
+ for line in f:
+ self.add_line(line.rstrip('\r\n'))
+ if self._progress:
+ self._progress_log("read_file")
+ if self._line_queue:
+ self._version = self._version_guess
+ self.process_line_queue()
+ if self._progress:
+ self._progress_log_end("read_file")
+ if self._vlevel >= 1:
+ self.validate()
+ return self
+
+ @classmethod
+ def from_file(cls, filename, vlevel = 1, version = None):
+ """Create a Gfa instance from the contents of a GFA file.
+
+ Parameters:
+ filename (str)
+ vlevel (int) : the validation level
+ version (str) : the GFA version ('gfa1' or 'gfa2'; default:
+ determine version automatically)
+
+ Returns:
+ gfapy.Gfa
+ """
+ gfa = cls(vlevel = vlevel, version = version)
+ gfa.read_file(filename)
+ return gfa
+
+ def to_file(self, filename):
+ """Write the content of the instance to a GFA file
+
+ Parameters:
+ filename (str)
+ """
+ with open(filename, "w") as f:
+ for line in self.lines:
+ f.write(str(line)+"\n")
+
+ def __eq__(self, other):
+ self.lines == other.lines
+
+ def __lenstats(self):
+ sln = [ s.try_length for s in self.segments ]
+ sln = sorted(sln)
+ n = len(sln)
+ tlen = 0
+ for l in sln:
+ tlen += l
+ n50_target = tlen//2
+ n50 = None
+ curr_sum = 0
+ for l in reversed(sln):
+ curr_sum += l
+ if curr_sum >= n50_target:
+ n50 = l
+ break
+ q = (sln[0], sln[(n//4)-2], sln[(n//2)-1], sln[((n*3)//4)-1], sln[-1])
+ return (q, n50, tlen)
+
+ def __validate_segment_references(self):
+ for s in self.segments:
+ if s.virtual:
+ raise gfapy.NotFoundError("Segment {} ".format(s.name)+
+ "does not exist\nReferences to {} ".format(s.name)+
+ "were found in the following lines:\n"+s.refstr())
+
+ def __validate_path_links(self):
+ for pt in self._gfa1_paths:
+ for ol in pt.links:
+ l = ol.line
+ if l.virtual:
+ raise gfapy.NotFoundError("A link equivalent to:\n{}\n".format(\
+ l.to_str(add_virtual_commentary=False))+
+ "does not exist, but is required by the following paths:\n"+
+ l.refstr())
+
+ def __validate_group_items(self):
+ if self.version == "gfa1":
+ return
+ for group in self.sets + self.paths:
+ for item in group.items:
+ if isinstance(item, gfapy.OrientedLine):
+ item = item.line
+ if item.virtual:
+ raise gfapy.NotFoundError("A line with identifier {}\n".format(\
+ item.name)+
+ "does not exist, but is required by the following groups:\n"+
+ item.refstr())
+
+ def __validate_gfa2_positions(self):
+ if self.version == "gfa1":
+ return
+ for line in self.edges + self.fragments:
+ line.validate_positions()
+
+ def _validate_version(self):
+ if (self._version != None) and (self._version not in gfapy.VERSIONS):
+ raise gfapy.VersionError("GFA specification version {} not supported".
+ format(self._version))
+
+ # Progress logging related-methods:
+
+ def enable_progress_logging(self, part=0.1, channel=sys.stderr):
+ '''Activate logging of progress for some graph operations.
+
+ Parameters:
+ part (float) : report when every specified portion of the computation
+ is completed (default: 0.1)
+ channel : output channel (default: standard error)
+ '''
+ self._progress = gfapy.Logger(channel=channel)
+ self._progress.enable_progress(part=part)
+
+ def _progress_log_init(self, symbol, units, total, initmsg = None):
+ if self._progress is not None:
+ self._progress.progress_init(symbol, units, total, initmsg)
+
+ def _progress_log(self, symbol, progress=1, **keyargs):
+ if self._progress is not None:
+ self._progress.progress_log(symbol, progress)
+
+ def _progress_log_end(self, symbol, **keyargs):
+ if self._progress is not None:
+ self._progress.progress_end(symbol)
+
diff --git a/gfapy/graph_operations/__init__.py b/gfapy/graph_operations/__init__.py
new file mode 100644
index 0000000..299eb73
--- /dev/null
+++ b/gfapy/graph_operations/__init__.py
@@ -0,0 +1 @@
+from .graph_operations import GraphOperations
diff --git a/gfapy/graph_operations/artifacts.py b/gfapy/graph_operations/artifacts.py
new file mode 100644
index 0000000..eef2d8d
--- /dev/null
+++ b/gfapy/graph_operations/artifacts.py
@@ -0,0 +1,39 @@
+import gfapy
+
+class Artifacts:
+
+ def remove_small_components(self, minlen):
+ """Remove connected components with combined segment length < minlen.
+
+ Note:
+ Connected components of the graph are computed, considering only dovetail
+ overlaps as connection of segments.
+
+ Parameters:
+ minlen (int) : the minimal length of the components to keep.
+ """
+ for cc in filter(lambda c: sum([self.segment(sn).length for sn in c]) \
+ < minlen, self.connected_components()):
+ for s in cc:
+ self.rm(s)
+
+ def remove_dead_ends(self, minlen):
+ """Remove dead end segments from the graph.
+
+ Dead end segments are defined as segment, with a sequence smaller
+ than a given minlen parameter, and whose removal does not split
+ connected components in the graph, and which have no connections (dovetail
+ overlaps) for at least one of the two ends of the sequence.
+
+ Note:
+ Only dovetail overlaps are considered as connections.
+
+ Parameters:
+ minlen (int) : the minimal length of an end to keep.
+ """
+ for s in self.segments:
+ c = s._connectivity()
+ if s.length < minlen and \
+ (c[0]==0 or c[1]==0) and \
+ not self.is_cut_segment(s):
+ self.rm(s)
diff --git a/gfapy/graph_operations/copy_number.py b/gfapy/graph_operations/copy_number.py
new file mode 100644
index 0000000..93bc5da
--- /dev/null
+++ b/gfapy/graph_operations/copy_number.py
@@ -0,0 +1,79 @@
+import gfapy
+
+class CopyNumber:
+
+ def set_default_count_tag(self, tag):
+ """Set the count tag to be used by default for the coverage computation"""
+ self._default["count_tag"] = tag
+
+ def set_count_unit_length(self, unit_length):
+ """Set the unit length to be used by default for the coverage computation"""
+ self._default["unit_length"] = unit_length
+
+ def delete_low_coverage_segments(self, mincov, count_tag=None,
+ unit_length=None):
+ """Remove the segments whose coverage is smaller than a specified value.
+
+ Parameters:
+ mincov (int) : the minimal coverage to keep a segment
+ count_tag (str) : the name of the tag to use for coverage computation
+ unit_length (int) : the unit length to use for coverage computation
+ """
+ if unit_length is None:
+ unit_length = self._default["unit_length"]
+ if count_tag is None:
+ count_tag = self._default["count_tag"]
+ for s in self.segments:
+ cov = s.coverage(count_tag=count_tag, unit_length=unit_length)
+ if cov < mincov:
+ s.disconnect()
+
+ def compute_copy_numbers(self, single_copy_coverage, mincov=None,
+ count_tag=None, cn_tag="cn", unit_length=None):
+ """Compute the estimated copy numbers of all segments, from their coverage.
+
+ Parameters:
+ mincov (int) : the minimal coverage to assign copy number 1; if not
+ specified, 1/4 of the single_copy_coverage is used
+ single_copy_coverage : the coverage corresponding to a copy number of 1
+ cn_tag (str) : the tag where to store the computed values (default: cn)
+ count_tag (str) : the name of the tag to use for coverage computation
+ unit_length (int) : the unit length to use for coverage computation
+ """
+ if mincov is None:
+ mincov = single_copy_coverage * 0.25
+ if count_tag is None:
+ count_tag = self._default["count_tag"]
+ if unit_length is None:
+ unit_length = self._default["unit_length"]
+ for s in self.segments:
+ cov = s.try_get_coverage(count_tag=count_tag, unit_length=unit_length)
+ if cov < mincov:
+ cn = 0
+ elif cov < single_copy_coverage:
+ cn = 1
+ else:
+ cn = round(cov / single_copy_coverage)
+ s.set(cn_tag, cn)
+
+ def apply_copy_numbers(self, count_tag="cn", distribute="auto",
+ origin_tag="or", conserve_components=True):
+ """Multiply each segment per its copy number.
+
+ The copy number must be stored in a tag (default: cn). It can be computed
+ e.g. using the compute_copy_numbers() method.
+
+ Parameters:
+ origin_tag (str) : the tag where to store the origin tracking
+ (default: or); see multiply()
+ distribute (str) : the value of the distribute parameter of multiply();
+ see multiply()
+ count_tag (str) : the name of the tag to use for coverage computation
+ conserve_components (bool) : If True, segments with copy number 0 are
+ not deleted, if their removal would split a connected component in two;
+ thereby only dovetail overlaps are considered (default: False)
+ """
+ for s in sorted(self.segments, key=lambda s:s.try_get(count_tag)):
+ self.multiply(s.name, s.get(count_tag), distribute=distribute,
+ copy_names=None, conserve_components=conserve_components,
+ origin_tag=origin_tag, track_origin=True)
diff --git a/gfapy/graph_operations/graph_operations.py b/gfapy/graph_operations/graph_operations.py
new file mode 100644
index 0000000..d03a63b
--- /dev/null
+++ b/gfapy/graph_operations/graph_operations.py
@@ -0,0 +1,13 @@
+import gfapy
+from .artifacts import Artifacts
+from .copy_number import CopyNumber
+from .invertible_segments import InvertibleSegments
+from .p_bubbles import PBubbles
+from .linear_paths import LinearPaths
+from .multiplication import Multiplication
+from .redundant_linear_paths import RedundantLinearPaths
+from .superfluous_links import SuperfluousLinks
+from .topology import Topology
+class GraphOperations(LinearPaths,Multiplication,RedundantLinearPaths,
+ Topology,Artifacts,CopyNumber,InvertibleSegments,PBubbles,SuperfluousLinks):
+ pass
diff --git a/gfapy/graph_operations/invertible_segments.py b/gfapy/graph_operations/invertible_segments.py
new file mode 100644
index 0000000..40dbe0f
--- /dev/null
+++ b/gfapy/graph_operations/invertible_segments.py
@@ -0,0 +1,89 @@
+class InvertibleSegments:
+
+ def randomly_orient_invertibles(self):
+ ''' Selects a random orientation for all invertible segments.
+
+ For the definition of invertible segment, see Gonnella and Kurtz (2016).'''
+ for s in segment_names:
+ if self._segment_same_links_both_ends(sn):
+ self._randomly_orient_proven_invertible_segment(sn)
+
+ def randomly_orient_invertible(self, segment):
+ '''Selects a random orientation for an invertible segment.
+
+ For the definition of invertible segment, see Gonnella and Kurtz (2016).'''
+ if isinstance(segment, gfapy.Line):
+ segment_name = segment.name
+ else:
+ segment_name = segment
+ if not self._segment_same_links_both_ends(segment_name):
+ raise gfapy.RuntimeError("Only segments with links to the same or "+
+ "equivalent segments at both ends can be randomly oriented")
+ self._randomly_orient_proven_invertible_segment(segment_name)
+
+ def _randomly_orient_proven_invertible_segment(self, segment_name):
+ se = gfapy.SegmentEnd([segment_name, "R"])
+ parts = self._partitioned_links_of(se)
+ if len(parts) == 2:
+ tokeep1_other_end = parts[0][0].other_end(se)
+ tokeep2_other_end = parts[1][0].other_end(se)
+ elif len(parts) == 1 and len(parts[0]) == 2:
+ tokeep1_other_end = parts[0][0].other_end(se)
+ tokeep2_other_end = parts[0][1].other_end(se)
+ else:
+ return
+ if len(tokeep1_other_end.segment.dovetails(
+ tokeep1_other_end.end_type)) < 2:
+ return
+ if len(tokeep2_other_end.segment.dovetails(
+ tokeep2_other_end.end_type)) < 2:
+ return
+ self._delete_other_links(se, tokeep1_other_end)
+ self._delete_other_links(se.inverted(), tokeep2_other_end)
+ self._annotate_random_orientation(segment_name)
+
+ def _link_targets_for_cmp(segment_end):
+ return ["".join(str(l.other_end(segment_end))) \
+ for l in segment_end.segment.dovetails(segment_end.end_type)]
+
+ def _segment_same_links_both_ends(self, segment_name):
+ e_links = self._link_targets_for_cmp(gfapy.SegmentEnd(segment_name, "R"))
+ b_links = self._link_targets_for_cmp(gfapy.SegmentEnd(segment_name, "L"))
+ return e_links == b_links
+
+ def _segment_signature(self, segment_end):
+ s = self.try_get_segment(segment_end.segment)
+ return ",".join(self._link_targets_for_cmp(segment_end))+"\t"+\
+ ",".join(self._link_targets_for_cmp(segment_end.inverted()))+"\t"+\
+ s.field_to_s("or")
+
+ def _partitioned_links_of(self, segment_end):
+ links = segment_end.segment.dovetails(segment_end.end_type)
+ sigs = {}
+ for l in links:
+ sigs[id(l)] = self._segment_signature(l.other_end(segment_end))
+ sig = lambda l: sigs[id(l)]
+ return [list(v) for k,v in groupby(sorted(links,key=sig),key=sig)]
+
+ def _annotate_random_orientation(self, segment_name):
+ s = self.try_get_segment(segment_name)
+ n = segment.name.split("_")
+ pairs = 0
+ pos = [1, segment.LN]
+ if segment.get("or"):
+ o = segment.field_to_s("or").split(",")
+ if len(o) > 2:
+ while o[-1]=="{}^".format(o[0]) or o[0]=="{}^".format(o[-1]):
+ pairs += 1
+ o.pop()
+ o.pop(0)
+ if segment.mp:
+ pos = [segment.mp[pairs*2], segment.mp[-1-pairs*2]]
+ rn = segment.rn
+ if rn is None:
+ rn = []
+ rn += pos
+ segment.rn = rn
+ n[pairs] = "({}".format(n[pairs])
+ n[-1-pairs] = "{})".format(n[-1-pairs])
+ self.rename(segment.name, "_".join(n))
diff --git a/gfapy/graph_operations/linear_paths.py b/gfapy/graph_operations/linear_paths.py
new file mode 100644
index 0000000..efcc440
--- /dev/null
+++ b/gfapy/graph_operations/linear_paths.py
@@ -0,0 +1,364 @@
+import gfapy
+
+class LinearPaths:
+
+ def linear_path(self, segment, exclude = None):
+ """Finnd a linear path which contains the specified segment
+
+ Parameters:
+ segment (str, Line): the segment to analyse
+ exclude : (API private)
+ """
+ if isinstance(segment, gfapy.Line):
+ segment_name = segment.name
+ else:
+ segment_name = segment
+ segment = self.segment(segment_name)
+ cs = segment._connectivity()
+ if exclude is None:
+ exclude = set()
+ segpath = gfapy.SegmentEndsPath()
+ for i, et in enumerate(["L", "R"]):
+ if cs[i] == 1:
+ exclude.add(segment_name)
+ if len(segpath) > 0:
+ segpath.pop()
+ segpath += self.__traverse_linear_path(
+ gfapy.SegmentEnd(segment, et), exclude)
+ return segpath
+
+ def linear_paths(self, redundant_junctions=False):
+ """Find linear paths of dovetail overlaps connecting segments.
+
+ Parameters:
+ redundant_junctions (bool): output the junction segments at the
+ end of each path which involves them; this mimics the construction
+ of contigs in string graph assemblers Readjoiner and SGA; default: False
+ """
+ exclude = set()
+ if redundant_junctions:
+ junction_exclude = set()
+ retval = []
+ segnames = self.segment_names
+ if self._progress:
+ self._progress_log_init("linear_paths", "segments", len(segnames),
+ "Detect linear paths ({})".format(len(segnames)))
+ for sn in segnames:
+ if self._progress:
+ self._progress_log("linear_paths")
+ if sn in exclude:
+ continue
+ lp = self.linear_path(sn, exclude)
+ if not redundant_junctions:
+ if len(lp) > 1:
+ retval.append(lp)
+ else:
+ if lp:
+ self._extend_linear_path_to_junctions(lp)
+ retval.append(lp)
+ else:
+ retval += self._junction_junction_paths(sn, junction_exclude)
+ if self._progress:
+ self._progress_log_end("linear_paths")
+ return retval
+
+ def merge_linear_path(self, segpath, redundant_junctions=False, jntag="jn",
+ enable_tracking=False, merged_name=None,
+ cut_counts=False):
+ """Merge a specified linear path of dovetail overlaps connecting segments.
+
+ Note:
+ for the parameter usage, see merge_linear_paths();
+ the only difference is that merged_name can be set to a string (different
+ from 'short'), which will be used as a name for the merged segment.
+ """
+ if len(segpath) < 2:
+ return self
+ if segpath[0] in [True, False]:
+ first_redundant = segpath.pop(0)
+ last_redundant = segpath.pop()
+ else:
+ first_redundant = False
+ last_redundant = False
+ segpath = [gfapy.SegmentEnd(s) for s in segpath]
+ merged, first_reversed, last_reversed = \
+ self.__create_merged_segment(segpath,
+ redundant_junctions=redundant_junctions, jntag=jntag,
+ merged_name=merged_name,cut_counts=cut_counts,
+ enable_tracking=enable_tracking)
+ self.append(merged)
+ if first_redundant:
+ self._link_duplicated_first(merged, self.segment(segpath[0].segment),
+ first_reversed, jntag)
+ else:
+ self.__link_merged(merged.name, segpath[0].inverted(), first_reversed)
+ if last_redundant:
+ self._link_duplicated_last(merged, self.segment(segpath[-1].segment),
+ last_reversed, jntag)
+ else:
+ self.__link_merged(merged.name, segpath[-1], last_reversed)
+ idx1 = 1 if first_redundant else 0
+ idx2 = -1 if last_redundant else None
+ for sn_et in segpath[idx1:idx2]:
+ self.segment(sn_et.segment).disconnect()
+ if self._progress:
+ self._progress_log("merge_linear_paths", 0.05)
+ return self
+
+ def merge_linear_paths(self, redundant_junctions=False, jntag="jn",
+ merged_name=None, enable_tracking=False,
+ cut_counts=False):
+ """Find and merge linear paths of dovetail overlaps connecting segments.
+
+ Note:
+ Besides obviously the dovetail overlaps, all lines refererring to the
+ merged segments (containments, internal edges, paths, sets, fragments,
+ gaps) are removed from the Gfa instance.
+
+ Parameters:
+ merged_name (str): if 'short', then a name is computed using an unused
+ integer; otherwise the name is computed using a combination of the
+ names of the merged segments, separated by an underscore
+ cut_counts (bool): if True, the total count in merged segment m,
+ composed of segments s of set S is multiplied by the factor
+ ``Sum(|s in S|)/|m|``
+ enable_tracking: if True, tracking information is added as follows;
+ the name of the component segments is stored in the ``or`` tag (or the
+ content of their ``or`` tag, instead of the name, if any) and their
+ starting positions is stored in the ``mp`` tag; the ``rn`` tag, used
+ for storing possibe inversion positions by the random orientation
+ methods of this library, is inherited and the positions updated;
+ unless merged_name is set to 'short', the computation of the merged
+ name is enhanced, in that reverse complement components are suffixed
+ with ``^`` and parenthesis added by the random orientation methods of
+ this library are inherited
+ redundant_junctions (bool): output the junction segments at the
+ end of each path which involves them; this mimics the construction
+ of contigs in string graph assemblers Readjoiner and SGA; default: False
+ jntag (str) : the tag to use for the temporary storage of junction
+ information, if the redundant_junctions flag is set (default: jn)
+ """
+ paths = self.linear_paths(redundant_junctions)
+ if self._progress:
+ psize = sum([len(path) for path in paths])
+ self._progress_log_init("merge_linear_paths", "segments", psize,
+ "Merge {} linear paths ".format(len(paths))+
+ "({} segments)".format(psize))
+ for path in paths:
+ self.merge_linear_path(path, redundant_junctions=redundant_junctions,
+ jntag=jntag, merged_name=merged_name,
+ cut_counts=cut_counts,
+ enable_tracking=enable_tracking)
+ if self._progress:
+ self._progress_log_end("merge_linear_paths")
+ if redundant_junctions:
+ self._remove_junctions(jntag)
+ return self
+
+ def __traverse_linear_path(self, segment_end, exclude):
+ lst = gfapy.SegmentEndsPath()
+ current = gfapy.SegmentEnd(segment_end)
+ current.segment = self.segment(current.segment)
+ while True:
+ after = current.segment.dovetails_of_end(current.end_type)
+ before = current.segment.dovetails_of_end(gfapy.invert(current.end_type))
+ if (len(before) == 1 and len(after) == 1) or not lst:
+ lst.append(gfapy.SegmentEnd(current.name, current.end_type))
+ exclude.add(current.name)
+ current = after[0].other_end(current).inverted()
+ if current.name in exclude:
+ break
+ elif len(before) == 1:
+ lst.append(gfapy.SegmentEnd(current.name, current.end_type))
+ exclude.add(current.name)
+ break
+ else:
+ break
+ if segment_end.end_type == "L":
+ return list(reversed(lst))
+ else:
+ return lst
+
+ def __sum_of_counts(self, segpath, multfactor = 1):
+ retval = {}
+ segs = [self.try_get_segment(sn_et.segment) for sn_et in segpath]
+ for count_tag in ["KC","RC","FC"]:
+ for s in segs:
+ if count_tag in s.tagnames:
+ if count_tag not in retval:
+ retval[count_tag] = 0
+ retval[count_tag] += int(retval[count_tag]*multfactor)
+ return retval
+
+ def _add_segment_to_merged(self, merged, segment, is_reversed, cut, init,
+ enable_tracking=False, merged_name=None):
+ n = segment.name
+ if is_reversed:
+ s = gfapy.sequence.rc(segment.sequence)[cut:]
+ if enable_tracking:
+ n = self._reverse_segment_name(segment.name, "_")
+ rn = self._reverse_pos_array(segment.rn, segment.LN)
+ mp = self._reverse_pos_array(segment.mp, segment.LN)
+ else:
+ s = segment.sequence[cut:]
+ if enable_tracking:
+ rn = segment.rn
+ mp = segment.mp
+ if enable_tracking:
+ if not mp and segment.LN:
+ mp = [1, segment.LN]
+ if segment.get("or") is None:
+ o = n
+ elif is_reversed:
+ o = self._reverse_segment_name(segment.get("or"), ",")
+ else:
+ o = segment.get("or")
+ if init:
+ merged.sequence = [s]
+ if merged_name:
+ merged.name = [merged_name]
+ else:
+ merged.name = [n]
+ merged.LN = segment.LN
+ if enable_tracking:
+ merged.rn = rn
+ merged.set("or",[o])
+ merged.mp = mp
+ else:
+ if gfapy.is_placeholder(segment.sequence):
+ merged.sequence = gfapy.Placeholder()
+ else:
+ merged.sequence.append(s)
+ if not merged_name:
+ merged.name.append(n)
+ if merged.LN:
+ if enable_tracking:
+ if rn:
+ rn = [pos - cut + merged.LN for pos in rn]
+ if not merged.rn:
+ merged.rn = rn
+ else:
+ merged.rn += rn
+ if mp and merged.mp:
+ merged.mp += [pos - cut + merged.LN for pos in mp]
+ if segment.LN:
+ merged.LN += (segment.LN - cut)
+ else:
+ merged.LN = None
+ elif enable_tracking:
+ merged.mp = None
+ if enable_tracking:
+ if not merged.get("or"):
+ merged.set("or", [o])
+ else:
+ merged.get("or").append(o)
+
+ @staticmethod
+ def _reverse_segment_name(name, separator):
+ retval = []
+ for part in name.split(separator):
+ has_openp = part[0] == "("
+ has_closep = part[-1] == ")"
+ if has_openp:
+ part = part[1:-2]
+ if has_closep:
+ part = part[:-1]
+ if part[-1] == "^":
+ part = part[:-1]
+ else:
+ part+="^"
+ if has_openp:
+ part+=")"
+ if has_closep:
+ part+="("+part
+ retval.append(part)
+ return separator.join(reversed(retval))
+
+ @staticmethod
+ def _reverse_pos_array(pos_array, lastpos):
+ if pos_array is None or lastpos is None:
+ return None
+ else:
+ return [lastpos-pos+1 for pos in pos_array].reverse()
+
+ def __create_merged_segment(self, segpath, redundant_junctions=False,
+ jntag="jn", merged_name=None, enable_tracking=False, cut_counts=False):
+ merged = self.try_get_segment(segpath[0].segment).clone()
+ merged.set(jntag, None)
+ merged_vlevel = merged.vlevel
+ merged.vlevel = 0
+ total_cut = 0
+ a = segpath[0]
+ first_reversed = (a.end_type == "L")
+ last_reversed = None
+ if merged_name == "short":
+ merged_name = self.unused_name()
+ self._add_segment_to_merged(merged, self.segment(a.segment),
+ first_reversed, 0, True, enable_tracking=enable_tracking,
+ merged_name=merged_name)
+ if self._progress:
+ self._progress_log("merge_linear_paths", 0.95)
+ for i in range(len(segpath)-1):
+ b = gfapy.SegmentEnd(segpath[i+1]).inverted()
+ ls = self.segment(a.segment).end_relations(a.end_type, b, "dovetails")
+ if len(ls) != 1:
+ msg = "A single link was expected between {}".format(a) + \
+ "and {}".format(b) + "{} were found".format(len(ls))
+ raise gfapy.ValueError(msg)
+ l = ls[0]
+ if not l.overlap:
+ cut = 0
+ elif all(op.code in ["M","="] for op in l.overlap):
+ cut = sum([len(op) for op in l.overlap])
+ else:
+ raise gfapy.ValueError(
+ "Merging is only allowed if all operations are M/=")
+ total_cut += cut
+ last_reversed = (b.end_type == "R")
+ self._add_segment_to_merged(merged, self.segment(b.segment),
+ last_reversed, cut, False, enable_tracking=enable_tracking,
+ merged_name=merged_name)
+ a = gfapy.SegmentEnd(b).inverted()
+ if self._progress:
+ self._progress_log("merge_linear_paths", 0.95)
+ merged.vlevel = merged_vlevel
+ if isinstance(merged.name, list):
+ merged.name = "_".join(merged.name)
+ ortag = merged.get("or")
+ if isinstance(ortag, list):
+ merged.set("or", ",".join(ortag))
+ if not gfapy.is_placeholder(merged.sequence):
+ merged.sequence = "".join(merged.sequence)
+ if self._version == "gfa1":
+ if not merged.LN:
+ merged.LN = len(merged.sequence)
+ elif self._vlevel > 0 and merged.LN != len(merged.sequence):
+ raise gfapy.InconsistencyError(
+ "Computed sequence length {} ".format(merged.sequence.length)+
+ "and computed LN {} differ".format(merged.LN))
+ if merged.length is not None:
+ for count_tag in ["KC", "RC", "FC"]:
+ merged.set(count_tag, None)
+ else:
+ factor = 1
+ if cut_counts:
+ factor = merged.length / (total_cut+merged.length)
+ for count_tag,count in self.__sum_of_counts(segpath,factor).items():
+ merged.set(count_tag, count)
+ return merged, first_reversed, last_reversed
+
+ def __link_merged(self, merged_name, segment_end, is_reversed):
+ for l in self.segment(segment_end.segment).dovetails_of_end(
+ segment_end.end_type):
+ l2 = l.clone()
+ if l2.to_segment == segment_end.segment:
+ l2.to_segment = merged_name
+ if is_reversed:
+ l2.to_orient = gfapy.invert(l2.to_orient)
+ else:
+ l2.from_segment = merged_name
+ if is_reversed:
+ l2.from_orient = gfapy.invert(l2.from_orient)
+ l.disconnect()
+ self.add_line(l2)
+
diff --git a/gfapy/graph_operations/multiplication.py b/gfapy/graph_operations/multiplication.py
new file mode 100644
index 0000000..713e76b
--- /dev/null
+++ b/gfapy/graph_operations/multiplication.py
@@ -0,0 +1,197 @@
+import gfapy
+import re
+
+class Multiplication:
+
+ def multiply(self, segment, factor, copy_names = None,
+ conserve_components = True, distribute = None,
+ track_origin = False, origin_tag="or", extended = False):
+ """Multiply a segment by a given factor.
+
+ The multiplication operation is implemented as described in
+ Gonnella and Kurtz (2016).
+
+ Parameters:
+ segment (Line, str): the segment to multiply
+ factor (int): the multiplication factor; if 0, the segment is
+ deleted; if 1, nothing is done; if > 1, the multiplication
+ is performed
+ copy_names (list, None): an optional list of strings, the names
+ of the copies which will result from the multiplication;
+ the length of this list must be equal to factor - 1; if no
+ list is specified, the names are computed automatically, adding
+ (or incrementing) an integer as suffix to the segment name,
+ until enough non-previously used names are found
+ conserve_components (bool): if True, the removal of segments
+ in the case where factor == 0 is only done if it does not
+ split an existing connected component (thereby only dovetail
+ overlaps are considered)
+ extended : if True, then dovetail distribution and track origin
+ are turned on by default
+ distribute (str, None) : select an end for which the dovetail
+ overlaps are distributed (see Gonnella and Kurtz, 2016); if ``auto``
+ (the default if extended is set), an end is selected automatically,
+ trying to maximize the number of links which can be deleted; if ``off``
+ (the default if extended is not set), no distribution is performed; if
+ ``L`` or ``R``, links of the specified end are distributed; if
+ ``equal``, an end is selected (if any), for which the number of links
+ is equal to the factor (if none, links are not distributed; if both,
+ then ``R`` is used)
+ track_origin (bool): if True, the name of the original segment (or
+ the content of its own origin tag, if any) is stored
+ in a tag in the copies (default: False)
+ origin_tag (str): the tag where to store the origin information,
+ if track_origin is set (default: ``or``)
+ """
+ if extended:
+ if distribute == None:
+ distribute = "auto"
+ track_origin = True
+ if factor < 0:
+ raise gfapy.ArgumentError("Mulitiplication factor must be >= 0"+
+ " ({} found)".format(factor))
+ elif factor == 0:
+ if conserve_components and factor == 1 and is_cut_segment(segment):
+ return self
+ else:
+ self.rm(segment)
+ return self
+ elif factor == 1:
+ return self
+ else:
+ s, sn = self._segment_and_segment_name(segment)
+ if track_origin and not s.get(origin_tag):
+ s.set(origin_tag, sn)
+ self.__divide_segment_and_connection_counts(s, factor)
+ if copy_names is None:
+ copy_names = self._compute_copy_names(sn, factor)
+ for cn in copy_names:
+ self.__clone_segment_and_connections(s, cn)
+ if distribute:
+ self._distribute_links(distribute, sn, copy_names, factor)
+ return self
+
+ def _compute_copy_names(self, segment_name, factor):
+ assert factor >= 2
+ retval = []
+ first = 2
+ m = re.search(r'(.*)\*(\d+)',segment_name)
+ if m:
+ segment_name = m.groups()[0]
+ i = int(m.groups()[1])
+ offset = 0
+ for i in range(first,factor+first-1):
+ name = "{}*{}".format(segment_name, i+offset)
+ while name in self.names:
+ offset+=1
+ name = "{}*{}".format(segment_name, i+offset)
+ retval.append(name)
+ return retval
+
+ def __divide_counts(self, gfa_line, factor):
+ for count_tag in ["KC", "RC", "FC"]:
+ if count_tag in gfa_line.tagnames:
+ gfa_line.set(count_tag, gfa_line.get(count_tag) // factor)
+
+ def __divide_segment_and_connection_counts(self, segment, factor):
+ self.__divide_counts(segment, factor)
+ processed_circulars = set()
+ for l in segment.dovetails + segment.containments:
+ if l.is_circular():
+ if l not in processed_circular:
+ self.__divide_counts(l, factor)
+ processed_circular.append(l)
+ else:
+ self.__divide_counts(l, factor)
+
+ def __clone_segment_and_connections(self, segment, clone_name):
+ cpy = segment.clone()
+ cpy.name = clone_name
+ cpy.connect(self)
+ for l in segment.dovetails + segment.containments:
+ lc = l.clone()
+ if lc.from_segment == segment.name:
+ lc.from_segment = clone_name
+ if lc.to_segment == segment.name:
+ lc.to_segment = clone_name
+ lc.connect(self)
+
+ LINKS_DISTRIBUTION_POLICY = ["off", "auto", "equal", "L", "R"]
+ '''Allowed values for the links_distribution_policy option'''
+
+ def _select_distribute_end(self, links_distribution_policy,
+ segment_name, factor):
+ if links_distribution_policy not in self.LINKS_DISTRIBUTION_POLICY:
+ raise gfa.ArgumentError("Unknown links distribution policy {}\n".format(\
+ links_distribution_policy)+"accepted values are: {}".format(\
+ ", ".join(self.LINKS_DISTRIBUTION_POLICY)))
+ if links_distribution_policy == "off":
+ return None
+ if links_distribution_policy in ["L", "R"]:
+ return links_distribution_policy
+ else:
+ s = self.segment(segment_name)
+ esize = len(s.dovetails_of_end("R"))
+ bsize = len(s.dovetails_of_end("L"))
+ return self._auto_select_distribute_end(factor, bsize, esize,
+ links_distribution_policy == "equal")
+
+ # (keep separate for testing)
+ # @tested_in unit_multiplication
+ @staticmethod
+ def _auto_select_distribute_end(factor, bsize, esize, equal_only):
+ if esize == factor:
+ return "R"
+ elif bsize == factor:
+ return "L"
+ elif equal_only:
+ return None
+ elif esize < 2:
+ if bsize < 2:
+ return None
+ else:
+ return "L"
+ elif bsize < 2:
+ return "R"
+ elif esize < factor:
+ if bsize <= esize:
+ return "R"
+ elif bsize < factor:
+ return "L"
+ else:
+ return "R"
+ elif bsize < factor:
+ return "L"
+ elif bsize <= esize:
+ return "L"
+ else:
+ return "R"
+
+ def _distribute_links(self, links_distribution_policy, segment_name,
+ copy_names, factor):
+ if factor < 2:
+ return
+ end_type = self._select_distribute_end(links_distribution_policy,
+ segment_name, factor)
+ if end_type is None:
+ return
+ et_links = self.segment(segment_name).dovetails_of_end(end_type)
+ diff = max([len(et_links)-factor, 0])
+ links_signatures = list([repr(l.other_end(gfapy.SegmentEnd(segment_name, \
+ end_type))) for l in et_links])
+ for i, sn in enumerate([segment_name]+copy_names):
+ to_keep = links_signatures[i:i+diff+1]
+ links = self.segment(sn).dovetails_of_end(end_type).copy()
+ for l in links:
+ l_sig = repr(l.other_end(gfapy.SegmentEnd(sn, end_type)))
+ if l_sig not in to_keep:
+ l.disconnect()
+
+ def _segment_and_segment_name(self, segment_or_segment_name):
+ if isinstance(segment_or_segment_name, gfapy.Line):
+ s = segment_or_segment_name
+ sn = segment_or_segment_name.name
+ else:
+ s = self.segment(segment_or_segment_name)
+ sn = segment_or_segment_name
+ return s, sn
diff --git a/gfapy/graph_operations/p_bubbles.py b/gfapy/graph_operations/p_bubbles.py
new file mode 100644
index 0000000..e5595d3
--- /dev/null
+++ b/gfapy/graph_operations/p_bubbles.py
@@ -0,0 +1,48 @@
+class PBubbles:
+
+ def remove_p_bubbles(self):
+ '''Removes all p-bubbles in the graph'''
+ visited = set()
+ for s in self.segments:
+ sn = s.name
+ if sn in visited:
+ continue
+ if s.connectivity == (1,1):
+ s1 = s.neighbours_of_end("L")[0]
+ s2 = s.neighbours_of_end("R")[0]
+ n1 = sorted(s1.neighbours, key=lambda s:s.name)
+ n2 = sorted(s2.neighbours, key=lambda s:s.name)
+ for se in n1:
+ visited.add(se[0].name)
+ if list(n1) == [os.inverted() for os in n2]:
+ self._remove_proven_p_bubble(s1, s2, n1)
+
+ def remove_p_bubble(self, segment_end1, segment_end2,
+ count_tag=None, unit_length=None):
+ '''Removes a p-bubble between segment_end1 and segment_end2'''
+ if count_tag is None:
+ count_tag=self.default["count_tag"]
+ if unit_length is None:
+ unit_length=self.default["unit_length"]
+ s1 = segment(segment_end1.segment)
+ s2 = segment(segment_end2.segment)
+ et1 = segment(segment_end1.end_type)
+ et2 = segment(segment_end2.end_type)
+ n1 = sorted(s1.neighbours(et1), key=lambda s:s.name)
+ n2 = sorted(s2.neighbours(et2), key=lambda s:s.name)
+ assert list(n1) == [os.inverted() for os in n2]
+ assert all(se[0].connectivity == (1,1) for se in n1)
+ self._remove_proven_p_bubble(segment_end1, segment_end2, n1,
+ count_tag=count_tag, unit_length=unit_length)
+
+ def _remove_proven_p_bubble(self, segment_end1, segment_end2, alternatives,
+ count_tag=None, unit_length=None):
+ if count_tag is None:
+ count_tag=self.default["count_tag"]
+ if unit_length is None:
+ unit_length=self.default["unit_length"]
+ coverages = [self.try_get_segment(s[0]).coverage(count_tag=count_tag, \
+ unit_length=unit_length) for s in alternatives]
+ alternatives.pop(coverages.index(max(coverages)))
+ for s in alternatives:
+ segment(s[0]).disconnect()
diff --git a/gfapy/graph_operations/redundant_linear_paths.py b/gfapy/graph_operations/redundant_linear_paths.py
new file mode 100644
index 0000000..8aa7c0c
--- /dev/null
+++ b/gfapy/graph_operations/redundant_linear_paths.py
@@ -0,0 +1,116 @@
+import gfapy
+
+class RedundantLinearPaths:
+
+ def _junction_junction_paths(self, sn, exclude):
+ retval = []
+ exclude.append(sn)
+ s = self.segment(sn)
+ for dL in s.dovetails_L:
+ eL = dL.other_end(gfapy.SegmentEnd(s, "L"))
+ if (eL.name in exclude) or (len(eL.segment.dovetails_of_end(eL.end_type)) == 1):
+ retval.append([True, eL, gfapy.SegmentEnd(s, "R"), True])
+ for dR in s.dovetails_R:
+ eR = dR.other_end(gfapy.SegmentEnd(s, "R"))
+ if (eR.name in exclude) or (len(eR.segment.dovetails_of_end(eR.end_type)) == 1):
+ retval.append([True, gfapy.SegmentEnd(s, "R"), eR.inverted(), True])
+ return retval
+
+ def _extend_linear_path_to_junctions(self, segpath):
+ segfirst = self.segment(segpath[0].segment)
+ segfirst_d = segfirst.dovetails_of_end(gfapy.invert(segpath[0].end_type))
+ redundant_first = (len(segfirst_d) > 0)
+ if len(segfirst_d) == 1:
+ segpath.insert(0, segfirst_d[0].other_end(segpath[0].inverted()))
+ segpath.insert(0, redundant_first)
+ seglast = self.segment(segpath[-1].segment)
+ seglast_d = seglast.dovetails_of_end(segpath[-1].end_type)
+ redundant_last = (len(seglast_d) > 0)
+ if len(seglast_d) == 1:
+ segpath.append(seglast_d[0].other_end(segpath[-1].inverted()))
+ segpath.append(redundant_last)
+
+ def _link_duplicated_first(self, merged, first, is_reversed, jntag):
+ # annotate junction
+ if jntag is None:
+ jntag = "jn"
+ if not first.get(jntag):
+ first.set(jntag, {"L":[],"R":[]})
+ if is_reversed:
+ first.get(jntag)["L"].append([merged.name, "-"])
+ else:
+ first.get(jntag)["R"].append([merged.name, "+"])
+ # create temporary link
+ ln = len(first.sequence)
+ if self._version == "gfa1":
+ tmp_link = gfapy.line.edge.Link([first.name, \
+ "-" if is_reversed else "+", merged.name, "+", \
+ "{}M".format(ln), "co:Z:temporary"])
+ self.add_line(tmp_link)
+ elif self._version == "gfa2":
+ tmp_link = gfapy.line.edge.GFA2(["*",first.name + \
+ ("-" if is_reversed else "+"), merged.name+"+",
+ "0" if is_reversed else str(ln-1), # on purpose fake
+ "1" if is_reversed else "{}$".format(ln), # on purpose fake
+ 0, str(ln), "{}M".format(ln), "co:Z:temporary"])
+ self.add_line(tmp_link)
+ else:
+ raise gfapy.AssertionError()
+
+ def _link_duplicated_last(self, merged, last, is_reversed, jntag):
+ # annotate junction
+ if jntag is None:
+ jntag = "jn"
+ if not last.get(jntag):
+ last.set(jntag, {"L":[],"R":[]})
+ if is_reversed:
+ last.get(jntag)["R"].append([merged.name, "-"])
+ else:
+ last.get(jntag)["L"].append([merged.name, "+"])
+ # create temporary link
+ ln = len(last.sequence)
+ if self._version == "gfa1":
+ tmp_link = gfapy.line.edge.Link([merged.name, "+",
+ last.name, "-" if is_reversed else "+",
+ "{}M".format(ln), "co:Z:temporary"])
+ self.add_line(tmp_link)
+ elif self._version == "gfa2":
+ mln = len(merged.sequence)
+ tmp_link = gfapy.line.edge.GFA2(["*",merged.name+"+", \
+ last_name+("-" if is_reversed else "+"),
+ str(mln - ln), "{}$".format(mln),
+ str(ln-1) if is_reversed else "0", # on purpose fake
+ "{}$".format(ln) if is_reversed else "1", # on purpose fake
+ "{}M".format(ln), "co:Z:temporary"])
+ self.add_line(tmp_link)
+ else:
+ raise gfapy.AssertionError()
+
+ def _remove_junctions(self, jntag):
+ if jntag is None:
+ jntag = "jn"
+ for s in self.segments:
+ jndata = s.get(jntag)
+ if jndata:
+ ln = len(s.sequence)
+ for m1, dir1 in jndata["L"].items():
+ for m2, dir2 in jndata["R"].items():
+ if self._version == "gfa1":
+ l = gfapy.line.edge.Link([m1,dir1,m2,dir2,"{}M".format(ln)])
+ self.add_line(l)
+ elif self._version == "gfa2":
+ m1ln = len(self.segment(m1).sequence)
+ m2ln = len(self.segment(m2).sequence)
+ r1 = (dir1 == "-")
+ r2 = (dir2 == "-")
+ l = gfapy.line.edge.GFA2(["*", m1+dir1, m2+dir2,
+ "0" if r1 else str(m1ln-ln),
+ str(ln) if r1 else str(m1ln)+"$",
+ "0" if r2 else str(m2ln-ln),
+ str(ln) if r1 else str(m2ln)+"$",
+ str(ln)+"M"])
+ self.add_line(l)
+ else:
+ raise gfapy.AssertionError()
+ s.disconnect()
+
diff --git a/gfapy/graph_operations/superfluous_links.py b/gfapy/graph_operations/superfluous_links.py
new file mode 100644
index 0000000..b70ac05
--- /dev/null
+++ b/gfapy/graph_operations/superfluous_links.py
@@ -0,0 +1,76 @@
+import gfapy
+
+class SuperfluousLinks:
+
+ def enforce_segment_mandatory_links(self, segment, conserve_components=True):
+ """Enforce mandatory dovetails overlaps of a given segment to other
+ segments, by removing all other dovetail overlaps between those segments.
+
+ The definition of mandatory links follows the one
+ given in Gonnella and Kurtz, 2016.
+
+ Parameters:
+ segment (Line, str) : the segment
+ conserve_components (bool): if True, then dovetail overlaps are only
+ removed, if their removal does not split connected components
+ of the graph (considering as connections only dovetail overlaps)
+ """
+ s, sn = self._segment_and_segment_name(segment)
+ se = {}; l = {}
+ for et in ["L", "R"]:
+ se[et] = [sn, et]
+ l[et] = segment(s).dovetails_of_end(et)
+ if len(l["L"])==1 and len(l["R"])==1:
+ oe = {}
+ for et in ["L", "R"]:
+ oe[et] = l[et][0].other_end(se[et])
+ if oe[:L] == oe[:R]:
+ return
+ for et in ["L", "R"]:
+ self._delete_other_links(oe[et], se[et],
+ conserve_components=conserve_components)
+ else:
+ if l[:L].size == 1:
+ et = "L"
+ elif l[:R].size == 1:
+ et = "R"
+ else:
+ return
+ oe = l[et][0].other_end(se[et])
+ self._delete_other_links(oe, se[et],
+ conserve_components=conserve_components)
+
+ def enforce_all_mandatory_links(self, conserve_components=True):
+ """Enforce mandatory dovetails between pairs of segments, by removing all
+ other dovetail overlaps between those segments.
+
+ The definition of mandatory links follows the one
+ given in Gonnella and Kurtz, 2016.
+
+ Parameters:
+ conserve_components (bool): if True, then dovetail overlaps are only
+ removed, if their removal does not split connected components
+ of the graph (considering as connections only dovetail overlaps)
+ """
+ for sn in segment_names:
+ self.enforce_segment_mandatory_links(sn, conserve_components=
+ conserve_components)
+
+ def remove_self_link(self, segment):
+ """Remove self links of a segment, if any.
+
+ Remove any dovetail overlap of a segment to itself.
+
+ Parameters:
+ segment (str, Line): the segment
+ """
+ if not isinstance(segment, gfa.Line):
+ segment = self.try_get_segment(segment)
+ for e in segment.dovetails:
+ if e.from_segment == e.to_segment:
+ e.disconnect()
+
+ def remove_self_links(self):
+ """Remove all dovetail overlap of segments to themselves, if any."""
+ for sn in segment_names:
+ self.remove_self_link(sn)
diff --git a/gfapy/graph_operations/topology.py b/gfapy/graph_operations/topology.py
new file mode 100644
index 0000000..0901f90
--- /dev/null
+++ b/gfapy/graph_operations/topology.py
@@ -0,0 +1,189 @@
+import gfapy
+
+class Topology:
+
+ def is_cut_link(self, link):
+ """Does the removal of a dovetail overlap split a connected component?
+
+ Note:
+ only dovetail overlaps are considered as connections
+
+ Parameters:
+ link (Line) : an edge instance, which represents a dovetail overlap
+
+ Returns:
+ bool
+ """
+ if link.is_circular():
+ return False
+ if not link.get("from").dovetails_of_end(\
+ gfapy.invert(link.from_end.end_type)):
+ return True
+ if not link.to.dovetails_of_end(gfapy.invert(link.to_end.end_type)):
+ return True
+ c = {}
+ for et in ["from", "to"]:
+ c[et] = set()
+ visited = set()
+ segend = link.get("from") if et == "from" else link.to
+ visited.append(segend.name)
+ visited.append(link.other_end(segend).name)
+ self.__traverse_component(segend, c[et], visited)
+ return c["from"] != c["to"]
+
+ def is_cut_segment(self, segment):
+ """Does the removal of a segment split a connected component?
+
+ Note:
+ only dovetail overlaps are considered as connections
+
+ Parameters:
+ segment (str, Line) : a segment name or instance
+
+ Returns:
+ bool
+ """
+ if isinstance(segment, str):
+ segment = self.try_get_segment(segment)
+ if segment._connectivity() in [(0,0),(0,1),(1,0)]:
+ return False
+ start_points = set()
+ for et in ["L", "R"]:
+ for l in segment.dovetails_of_end(et):
+ start_points.append(l.other_end(\
+ gfapy.SegmentEnd(segment_name, et)).inverted())
+ cc = []
+ for start_point in start_points:
+ cc.append(set())
+ visited = set()
+ visited.append(segment_name)
+ traverse_component(start_point, cc[-1], visited)
+ return any(c != cc[0] for c in cc)
+
+ def segment_connected_component(self, segment, visited = None):
+ """Compute the connected component to which a segment belong.
+
+ Note:
+ only dovetail overlaps are considered as connections
+
+ Parameters:
+ segment (str, Line) : a segment name or instance
+
+ Returns:
+ list : a list of segment instances
+ """
+ if visited is None:
+ visited = set()
+ if isinstance(segment, gfapy.Line):
+ segment_name = segment.name
+ else:
+ segment_name = segment
+ segment = self.segment(segment)
+ visited.add(segment_name)
+ c = [segment]
+ for e in ["L", "R"]:
+ self.__traverse_component(gfapy.SegmentEnd(segment, e), c, visited)
+ return c
+
+ def connected_components(self):
+ """Compute the connected components of the graph.
+
+ Note:
+ only dovetail overlaps are considered as connections
+
+ Returns:
+ list : a list of lists of segment instances; each sublist is
+ a connected component
+ """
+ components = []
+ visited = set()
+ for sn in self.segment_names:
+ if sn not in visited:
+ components.append(self.segment_connected_component(sn, visited))
+ return components
+
+ def split_connected_components(self):
+ """Split the connected components of the graph.
+
+ Note:
+ only dovetail overlaps are considered as connections
+
+ Returns:
+ list of Gfa
+ """
+ retval = []
+ for cc in self.connected_components():
+ gfa2 = self.clone
+ gfa2.rm(gfa2.segment_names - cc)
+ retval.append(gfa2)
+ return retval
+
+ @property
+ def n_dead_ends(self):
+ """Number of dead ends in the graph.
+
+ A dead end is a segment end which has no dovetail overlaps.
+
+ Returns:
+ int
+ """
+ n = 0
+ for s in self.segments:
+ if not s.dovetails_L: n+=1
+ if not s.dovetails_R: n+=1
+ return n
+
+ @property
+ def n_dovetails(self):
+ """Number of dovetail overlaps in the graph.
+
+ Returns:
+ int
+ """
+ n = 0
+ for s in self.segments:
+ n += len(s.dovetails_L)
+ n += len(s.dovetails_R)
+ return n // 2
+
+ @property
+ def n_internals(self):
+ """Number of non-dovetail non-containment overlaps in the graph.
+
+ Returns:
+ int
+ """
+ n = 0
+ for s in self.segments:
+ n += len(s.internals)
+ return n // 2
+
+ @property
+ def n_containments(self):
+ """Number of containment overlaps in the graph.
+
+ Returns:
+ int
+ """
+ n = 0
+ for s in self.segments:
+ n += len(s.edges_to_contained)
+ n += len(s.edges_to_containers)
+ return n // 2
+
+ def info(self, short):
+ pass
+
+ def __traverse_component(self, segment_end, c, visited):
+ s = segment_end.segment
+ assert(isinstance(s, gfapy.Line))
+ for l in s.dovetails_of_end(segment_end.end_type):
+ oe = l.other_end(segment_end)
+ sn = oe.name
+ s = oe.segment
+ if sn in visited:
+ continue
+ visited.add(sn)
+ c.append(s)
+ for e in ["L","R"]:
+ self.__traverse_component(gfapy.SegmentEnd(s, e), c, visited)
diff --git a/gfapy/lastpos.py b/gfapy/lastpos.py
new file mode 100644
index 0000000..6fa512e
--- /dev/null
+++ b/gfapy/lastpos.py
@@ -0,0 +1,161 @@
+import gfapy
+import copy
+from functools import total_ordering
+
+ at total_ordering
+class LastPos:
+ """The last position of a sequence.
+
+ In GFA2 lines (e.g. edges and fragments), the last position of a sequence is
+ suffixed by a ``$`` sign.
+
+ This class is provided to represent this particular case of a position value.
+ New instances are created passing an integer or the string representation
+ to the constructor. If a string without ``$`` is passed to the constructor,
+ an integer is returned instead (thus the constructor can be used to parse
+ any GFA position field string).
+
+ >>> from gfapy import LastPos
+ >>> LastPos("2$")
+ gfapy.LastPos(2)
+ >>> LastPos(2)
+ gfapy.LastPos(2)
+ >>> LastPos("2")
+ 2
+
+ Parameters:
+ value (str, int) : a string representation of a position, or an integer
+ representing the last position of a sequence
+ valid (bool) : if True, the value is always considered valid, and no
+ validation is performed (default: False)
+
+ Returns:
+ gfapy.LastPos, int : an int if the value is a string and has no dollar sign
+ suffix; otherwise a LastPos instance
+
+ Raises:
+ see validate method (no exceptions raised if valid is set to True)
+
+ """
+
+ def __new__(cls, value, valid=False):
+ if isinstance(value, str):
+ return cls._from_string(value, valid=valid)
+ else:
+ new_instance = object.__new__(cls)
+ new_instance.value = value
+ if not valid:
+ new_instance.validate()
+ return new_instance
+
+ def validate(self):
+ """Checks that the value is a positive integer.
+
+ Validation is performed by default on construction, unless the valid
+ parameter of the constructor is set to True.
+
+ Raises:
+ gfapy.error.TypeError : if the value is not an integer
+ gfapy.error.ValueError : if the value is not >= 0
+ """
+
+ if not isinstance(self.value, int):
+ raise gfapy.TypeError("LastPos value shall be an integer,"+
+ " {} found".format(type(self.value)))
+ elif self.value < 0:
+ raise gfapy.ValueError("LastPos value shall be >= 0,"+
+ " {} found".format(self.value))
+
+ def __str__(self):
+ return "{}$".format(self.value)
+
+ def __repr__(self):
+ return "gfapy.LastPos({})".format(self.value)
+
+ def __int__(self):
+ return self.value
+
+ def __eq__(self, other):
+ if isinstance(other, int):
+ return self.value == other
+ elif not isinstance(other, LastPos):
+ return NotImplemented
+ else:
+ return self.value == other.value
+
+ def __lt__(self, other):
+ if isinstance(other, int):
+ return self.value.__lt__(other)
+ elif not isinstance(other, LastPos):
+ return NotImplemented
+ else:
+ return self.value.__lt__(other.value)
+
+ def __sub__(self,other):
+ o = int(other)
+ if o == 0:
+ return gfapy.LastPos(self.value)
+ else:
+ return self.value - o
+
+ @classmethod
+ def _from_string(cls, string, valid=False):
+ if string[-1] == "$":
+ return cls(int(string[:-1]), valid=valid)
+ else:
+ try:
+ v = int(string)
+ except:
+ raise gfapy.FormatError(
+ "LastPos value has a wrong format: {}".format(string))
+ if not valid:
+ if v < 0:
+ raise gfapy.ValueError("LastPos value shall be >= 0,"+
+ " {} found".format(v))
+ return v
+
+def posvalue(obj):
+ """The integer representing a position.
+
+ Parameters:
+ obj (int, LastPos) : the position
+
+ Returns:
+ int : If obj is a LastPos, then its value.
+ If it is an integer, then the integer itself.
+ """
+ if isinstance(obj, LastPos):
+ return obj.value
+ else:
+ return obj
+
+def islastpos(obj):
+ """Checks if a position value is a last position.
+
+ Parameters:
+ obj (int, LastPos) : the position
+
+ Returns:
+ bool : If obj is a LastPos, then True.
+ If it is an integer, then False.
+ """
+ return isinstance(obj, LastPos)
+
+def isfirstpos(obj):
+ """Checks if a position value is the first position (0).
+
+ Note that the last position of an empty sequence
+ is also its first position, therefore:
+
+ >>> from gfapy.lastpos import isfirstpos
+ >>> isfirstpos(gfapy.LastPos("0$"))
+ True
+
+ Parameters:
+ obj (int, LastPos) : the position
+
+ Returns:
+ bool : If the value of the position is 0, then True.
+ """
+ return posvalue(obj) == 0
+
diff --git a/gfapy/line/__init__.py b/gfapy/line/__init__.py
new file mode 100644
index 0000000..1078131
--- /dev/null
+++ b/gfapy/line/__init__.py
@@ -0,0 +1,10 @@
+from .comment import Comment
+from .line import Line
+from .unknown import Unknown
+from .edge import Edge
+from .gap import Gap
+from .custom_record import CustomRecord
+from .fragment import Fragment
+from .header import Header
+from .segment import Segment
+from . import group
diff --git a/gfapy/line/comment/__init__.py b/gfapy/line/comment/__init__.py
new file mode 100644
index 0000000..4ea66d3
--- /dev/null
+++ b/gfapy/line/comment/__init__.py
@@ -0,0 +1 @@
+from .comment import Comment
diff --git a/gfapy/line/comment/comment.py b/gfapy/line/comment/comment.py
new file mode 100644
index 0000000..0d01c9f
--- /dev/null
+++ b/gfapy/line/comment/comment.py
@@ -0,0 +1,30 @@
+import gfapy
+from ..line import Line
+from .construction import Construction
+from .tags import Tags
+from .writer import Writer
+from .version_conversion import VersionConversion
+
+class Comment(Writer, Tags, Construction, VersionConversion, Line):
+ """
+ A comment line of a GFA file
+
+ The content of the comment line, excluding the initial **#** and eventual
+ initial spacing characters, is included in the field **content**.
+
+ The initial spacing characters can be read/changed using the **spacer**
+ field. The default value is a single space.
+
+ Tags are not supported by comment lines. If the line contains tags,
+ these are nor parsed, but included in the **content** field.
+ Trying to set or get tag values raises exceptions.
+ """
+
+ RECORD_TYPE = "#"
+ POSFIELDS = ["content", "spacer"]
+ DATATYPE = {
+ "content" : "comment",
+ "spacer" : "comment",
+ }
+
+Comment._apply_definitions()
diff --git a/gfapy/line/comment/construction.py b/gfapy/line/comment/construction.py
new file mode 100644
index 0000000..e4a39bc
--- /dev/null
+++ b/gfapy/line/comment/construction.py
@@ -0,0 +1,11 @@
+import gfapy
+
+class Construction:
+ def _initialize_positional_fields(self, strings):
+ self._init_field_value("content", "comment", strings[1], errmsginfo = strings)
+ sp = strings[2] if len(strings) > 2 else " "
+ self._init_field_value("spacer", "comment", sp, errmsginfo = strings)
+
+ def _initialize_tags(self, strings):
+ if len(strings) > 3:
+ raise gfapy.ValueError("Comment lines do not support tags")
diff --git a/gfapy/line/comment/tags.py b/gfapy/line/comment/tags.py
new file mode 100644
index 0000000..f90d56b
--- /dev/null
+++ b/gfapy/line/comment/tags.py
@@ -0,0 +1,14 @@
+import gfapy
+
+class Tags:
+
+ def set(self, fieldname, value):
+ """Set the value of a field.
+
+ The generic Line.set() method is overwritten for comments,
+ in order to disallow tags.
+ """
+ if fieldname in ["content", "spacer"]:
+ return super().set(fieldname, value)
+ else:
+ raise gfapy.RuntimeError("Tags of comment lines cannot be set")
diff --git a/gfapy/line/comment/version_conversion.py b/gfapy/line/comment/version_conversion.py
new file mode 100644
index 0000000..25f4b8b
--- /dev/null
+++ b/gfapy/line/comment/version_conversion.py
@@ -0,0 +1,5 @@
+class VersionConversion:
+
+ def _to_gfa1_a(self): return self.to_list()
+ def _to_gfa2_a(self): return self.to_list()
+
diff --git a/gfapy/line/comment/writer.py b/gfapy/line/comment/writer.py
new file mode 100644
index 0000000..17cde39
--- /dev/null
+++ b/gfapy/line/comment/writer.py
@@ -0,0 +1,13 @@
+import gfapy
+
+class Writer:
+ def __str__(self):
+ return "#" + str(self.spacer) + str(self.content)
+
+ def to_list(self):
+ """Convert the content of the comment line to a list.
+
+ The generic to_list() method of Line is overwritten,
+ in order to support an optional spacer specification.
+ """
+ return ["#", self.content, self.spacer]
diff --git a/gfapy/line/common/__init__.py b/gfapy/line/common/__init__.py
new file mode 100644
index 0000000..353cc51
--- /dev/null
+++ b/gfapy/line/common/__init__.py
@@ -0,0 +1,13 @@
+from .cloning import Cloning
+from .connection import Connection
+from .disconnection import Disconnection
+from .dynamic_fields import DynamicFields
+from .equivalence import Equivalence
+from .field_data import FieldData
+from .field_datatype import FieldDatatype
+from .construction import Construction
+from .update_references import UpdateReferences
+from .validate import Validate
+from .version_conversion import VersionConversion
+from .virtual_to_real import VirtualToReal
+from .writer import Writer
diff --git a/gfapy/line/common/cloning.py b/gfapy/line/common/cloning.py
new file mode 100644
index 0000000..7880889
--- /dev/null
+++ b/gfapy/line/common/cloning.py
@@ -0,0 +1,35 @@
+import gfapy
+import json
+from copy import deepcopy
+
+class Cloning:
+
+ def clone(self):
+ """Copy of a gfapy.Line instance.
+ The copy will be disconnected, ie do not belong to the GFA and do not
+ contain cross-references to other lines. This allows to edit the line
+ (eg. changing the unique ID) before adding it.
+ To achieve this, all reference fields are copied in their string
+ representation.
+ All other fields are copied as they are, and a deep copy is done for
+ arrays, strings and JSON fields.
+
+ Returns
+ -------
+ gfapy.Line
+ """
+ data_cpy = {}
+ for k,v in self._data.items():
+ if k in self.__class__.REFERENCE_FIELDS:
+ data_cpy[k] = self.field_to_s(k)
+ elif self._field_datatype(k) == "J":
+ data_cpy[k] = json.loads(json.dumps(v))
+ elif isinstance(v, list) or isinstance(v, str):
+ data_cpy[k] = deepcopy(v)
+ else:
+ data_cpy[k] = v
+ cpy = self.__class__(data_cpy, vlevel = self.vlevel,
+ virtual = self.virtual, version = self.version)
+ cpy._datatype = self._datatype.copy()
+ # cpy._refs and cpy._gfa are not set, so that the cpy is disconnected
+ return cpy
diff --git a/gfapy/line/common/connection.py b/gfapy/line/common/connection.py
new file mode 100644
index 0000000..f14a644
--- /dev/null
+++ b/gfapy/line/common/connection.py
@@ -0,0 +1,110 @@
+import gfapy
+
+class Connection:
+
+ def is_connected(self):
+ """
+ In a connected line, some of the fields are converted
+ into references or a list of references to other lines.
+ Furthermore instance variables are populated with back
+ references to the line (e.g. connection of a segment
+ are stored as references in segment arrays), to allow
+ graph traversal.
+
+ Returns
+ -------
+ bool
+ Is the line connected to other lines of a GFA instance?
+ """
+ return (self._gfa is not None)
+
+ @property
+ def gfa(self):
+ return self._gfa
+
+ def connect(self, gfa):
+ """
+ Connect the line to a GFA instance
+
+ Parameters
+ ----------
+ gfa : GFA
+ the GFA instance
+
+ Returns
+ -------
+ None
+ """
+ if self.is_connected():
+ raise gfapy.RuntimeError(
+ "Line {} is already connected to a GFA instance".format(self))
+ previous = gfa._search_duplicate(self)
+ if previous:
+ if previous.virtual:
+ return self._substitute_virtual_line(previous)
+ else:
+ return self._process_not_unique(previous)
+ else:
+ self._gfa = gfa
+ self._initialize_references()
+ self._gfa._register_line(self)
+ return None
+
+ @property
+ def all_references(self):
+ """List of lines which contain references to the line instance
+
+ Returns
+ -------
+ list
+ """
+ if not self._refs:
+ self._refs = {}
+ return [x for y in self._refs.values() for x in y]
+
+ def _add_reference(self, line, key, append = True):
+ if not self._refs:
+ self._refs = {}
+ if key not in self._refs:
+ self._refs[key] = []
+ if append:
+ self._refs[key].append(line)
+ else:
+ self._refs[key].insert(0, line)
+
+ def _initialize_references(self):
+ """
+ .. note::
+ SUBCLASSES with reference fields shall
+ overwrite this method to connect their reference
+ fields
+ """
+ if self.REFERENCE_INITIALIZERS:
+ for field, klass, refkey in self.REFERENCE_INITIALIZERS:
+ self._initialize_reference(field, klass, refkey)
+
+ def _initialize_reference(self, field, klass, refkey):
+ name = self.get(field)
+ line = self.gfa.line(name)
+ if line is None:
+ data = [klass.RECORD_TYPE]
+ for i in range(len(klass.POSFIELDS)):
+ data.append("1")
+ line = klass(data, virtual=True, version="gfa2")
+ line.name = name
+ line.connect(self.gfa)
+ self._set_existing_field(field, line, set_reference=True)
+ line._add_reference(self, refkey)
+
+ def _process_not_unique(self, previous):
+ """
+ .. note::
+ SUBCLASSES may overwrite this method
+ if some kind of non unique lines shall be
+ tolerated or handled differently (eg complement links)
+ """
+ raise gfapy.NotUniqueError(
+ "Line: {}\n".format(str(self))+
+ "Line or ID not unique\n"+
+ "Matching previous line: {}".format(str(previous))
+ )
diff --git a/gfapy/line/common/construction.py b/gfapy/line/common/construction.py
new file mode 100644
index 0000000..e94dfc8
--- /dev/null
+++ b/gfapy/line/common/construction.py
@@ -0,0 +1,335 @@
+from collections import OrderedDict
+from functools import partial
+import re
+import gfapy
+from .dynamic_fields import DynamicField
+
+try:
+ from functools import partialmethod
+except ImportError:
+ #for compatibility with old python versions
+ def partialmethod(method, **kwargs):
+ return lambda self: method(self, **kwargs)
+
+class Construction:
+
+ DELAYED_PARSING_DATATYPES = [
+ "alignment_gfa1",
+ "alignment_gfa2",
+ "alignment_list_gfa1",
+ "oriented_segments",
+ "H",
+ "J",
+ "B",
+ ]
+ """
+ List of datatypes which are parsed only on access.
+
+ All other are parsed when read.
+ """
+
+ RECORD_TYPE_VERSIONS = {
+ "specific" :
+ {"gfa1" : ["C", "L", "P"],
+ "gfa2" : ["E", "G", "F", "O", "U", "\n"]},
+ "generic" : ["H", "#"],
+ "different" : ["S"]
+ }
+ """
+ Dependency of record type from version.
+
+ * specific: only for a specific version
+ * generic: same syntax for all versions
+ * different: different syntax in different versions
+ """
+
+ def __new__(cls, data, vlevel = 1, virtual = False, version = None):
+ if isinstance(data, str):
+ data = data.split("\t")
+ if isinstance(data, list) and cls.RECORD_TYPE == None:
+ cls = gfapy.Line._subclass(data, version = version)
+ return object.__new__(cls)
+
+ def __init__(self, data, vlevel = 1, virtual = False, version = None):
+ self.vlevel = vlevel
+ self._virtual = virtual
+ self._datatype = {}
+ self._data = {}
+ self._gfa = None
+ self._version = version
+ self._refs = {}
+ if self.__class__ == gfapy.Line:
+ raise gfapy.AssertionError("Line subclass unknown")
+ if isinstance(data, dict):
+ # API private initialization using dict
+ self._data.update(data)
+ else:
+ # public initialization using list (or tab-separated string)
+ if self.__class__ == gfapy.line.Comment:
+ data = gfapy.Line._init_comment_data(data)
+ elif isinstance(data, str):
+ data = data.split(gfapy.Line.SEPARATOR)
+ if self.version is None:
+ self._compute_version(data[0])
+ else:
+ self._validate_version()
+ self._initialize_positional_fields(data)
+ self._initialize_tags(data)
+ if self.vlevel >= 1:
+ self._validate_record_type_specific_info()
+ if self.version is None:
+ raise gfapy.RuntimeError("version could not be determined, "+
+ "record_type={}".format(self.record_type))
+
+ @staticmethod
+ def _init_comment_data(data):
+ if isinstance(data, list) and (data[0] != "#"):
+ # unproperly splitten, rejoin
+ data = "\t".join(data)
+ if isinstance(data, str):
+ match = re.match(r"^#(\s*)(.*)$", data)
+ if match is None:
+ raise gfapy.FormatError("Comment lines must begin with #\n"+
+ "Line: {}".format(data))
+ data = ["#", match.group(2), match.group(1)]
+ return data
+
+ def _compute_version(self, rt):
+ if rt in Construction.RECORD_TYPE_VERSIONS["generic"]:
+ self._version = "generic"
+ elif rt in Construction.RECORD_TYPE_VERSIONS["different"]:
+ if hasattr(self.__class__, "VERSION"):
+ self._version = self.__class__.VERSION
+ else:
+ raise gfapy.RuntimeError(
+ "GFA version not specified\n"+
+ "Records of type {} ".format(rt)+
+ "have different syntax according to the version")
+ else:
+ for k, v in Construction.RECORD_TYPE_VERSIONS["specific"].items():
+ if rt in v:
+ self._version = k
+ break
+ if not self._version:
+ self._version = "gfa2"
+
+ def _validate_version(self):
+ rt = self.__class__.RECORD_TYPE
+ if self._version not in gfapy.VERSIONS:
+ raise gfapy.VersionError(
+ "GFA specification version unknown ({})".format(self._version))
+ else:
+ for k, v in Construction.RECORD_TYPE_VERSIONS["specific"].items():
+ if rt in v:
+ if self._version != k:
+ raise gfapy.VersionError(
+ "Records of type {} are incompatible ".format(self.record_type)+
+ "with version {}".format(self._version))
+ return
+
+ @property
+ def _n_positional_fields(self):
+ return len(self.POSFIELDS)
+
+ def _init_field_value(self, n ,t, s, errmsginfo = None):
+ if self.vlevel >= 1:
+ s = gfapy.Field._parse_gfa_field(s, t, safe = True, fieldname = n,
+ line = errmsginfo)
+ elif t not in self.DELAYED_PARSING_DATATYPES:
+ s = gfapy.Field._parse_gfa_field(s, t, safe = (self.vlevel >= 1),
+ fieldname = n, line = errmsginfo)
+ self._data[n] = s
+
+ def _initialize_positional_fields(self, strings):
+ if strings[0] != self.RECORD_TYPE and self.RECORD_TYPE != "\n":
+ raise gfapy.FormatError("Record type of records of "+
+ "class {} must be {} ({} found)".format(self.__class__,
+ self.RECORD_TYPE, strings[0]))
+ if self.version is None:
+ raise gfapy.AssertionError(
+ "Bug found, please report\n"+
+ "strings: {}".format(repr(strings)))
+ if (self.vlevel >= 1) and (len(strings)-1 < self._n_positional_fields):
+ raise gfapy.FormatError(
+ "{} positional fields expected, ".format(self._n_positional_fields) +
+ "{} found\n{}".format(len(strings)-1, repr(strings)))
+ for i, n in enumerate(self.POSFIELDS):
+ self._init_field_value(n, self.__class__.DATATYPE[n], strings[i+1],
+ errmsginfo = strings)
+
+ def _initialize_tags(self, strings):
+ for i in range(len(self.POSFIELDS)+1, len(strings)):
+ self._initialize_tag(*(gfapy.Field._parse_gfa_tag(strings[i])),
+ errmsginfo = strings)
+
+ def _initialize_tag(self, n, t, s, errmsginfo = None):
+ if (self.vlevel > 0):
+ if n in self._data:
+ raise gfapy.NotUniqueError(
+ "Tag {} found multiple times".format(n))
+ elif self._is_predefined_tag(n):
+ self._validate_predefined_tag_type(n, t)
+ else:
+ self._validate_custom_tagname(n)
+ self._datatype[n] = t
+ else:
+ if not self._field_datatype(t):
+ self._datatype[n] = t
+ self._init_field_value(n, t, s, errmsginfo = errmsginfo)
+
+ @staticmethod
+ def _subclass(data, version = None):
+ record_type = data[0]
+ if record_type and record_type[0] == "#":
+ return gfapy.line.Comment
+ elif version == "gfa1":
+ return gfapy.Line._subclass_GFA1(record_type)
+ elif version == "gfa2":
+ return gfapy.Line._subclass_GFA2(record_type)
+ elif version is None:
+ return gfapy.Line._subclass_unknown_version(data)
+ else:
+ raise gfapy.VersionError(
+ "GFA specification version unknown ({})".format(version))
+
+ @staticmethod
+ def _subclass_GFA1(record_type):
+ if record_type is None:
+ raise gfapy.VersionError(
+ "gfapy uses virtual records of unknown type for GFA2 only")
+ if record_type == "H": return gfapy.line.Header
+ elif record_type == "S": return gfapy.line.segment.GFA1
+ elif record_type == "#": return gfapy.line.Comment
+ elif record_type == "L": return gfapy.line.edge.Link
+ elif record_type == "C": return gfapy.line.edge.Containment
+ elif record_type == "P": return gfapy.line.group.Path
+ else:
+ raise gfapy.VersionError(
+ "Custom record types are not supported in GFA1: '{}'".format(
+ record_type))
+
+ EXTENSIONS = {}
+ """Extensions (definition of custom record types) registered by the user."""
+
+ @staticmethod
+ def _subclass_GFA2(record_type):
+ if record_type == "H": return gfapy.line.Header
+ elif record_type == "S": return gfapy.line.segment.GFA2
+ elif record_type == "#": return gfapy.line.Comment
+ elif record_type == "E": return gfapy.line.edge.GFA2
+ elif record_type == "F": return gfapy.line.Fragment
+ elif record_type == "G": return gfapy.line.Gap
+ elif record_type == "O": return gfapy.line.group.Ordered
+ elif record_type == "U": return gfapy.line.group.Unordered
+ elif record_type in gfapy.Line.EXTENSIONS:
+ return gfapy.Line.EXTENSIONS[record_type]
+ else: return gfapy.line.CustomRecord
+
+ @staticmethod
+ def _subclass_unknown_version(data):
+ record_type = data[0]
+ if record_type == "H": return gfapy.line.Header
+ elif record_type == "S": return gfapy.line.Segment._subclass(data)
+ elif record_type == "#": return gfapy.line.Comment
+ elif record_type == "L": return gfapy.line.edge.Link
+ elif record_type == "C": return gfapy.line.edge.Containment
+ elif record_type == "P": return gfapy.line.group.Path
+ elif record_type == "E": return gfapy.line.edge.GFA2
+ elif record_type == "F": return gfapy.line.Fragment
+ elif record_type == "G": return gfapy.line.Gap
+ elif record_type == "O": return gfapy.line.group.Ordered
+ elif record_type == "U": return gfapy.line.group.Unordered
+ elif record_type in gfapy.Line.EXTENSIONS:
+ return gfapy.Line.EXTENSIONS[record_type]
+ else: return gfapy.line.CustomRecord
+
+ @classmethod
+ def _apply_definitions(cls):
+ """
+ This avoids calls for fields which are already defined
+ """
+ cls._define_field_accessors()
+ cls._define_field_aliases()
+ cls._define_reference_getters()
+
+ @classmethod
+ def _define_field_accessors(cls):
+ if not cls.PREDEFINED_TAGS:
+ cls.PREDEFINED_TAGS = list(set(cls.DATATYPE.keys()) - set(cls.POSFIELDS))
+ fieldnames = cls.POSFIELDS + cls.PREDEFINED_TAGS
+ if cls.NAME_FIELD and cls.NAME_FIELD not in fieldnames:
+ fieldnames.append(cls.NAME_FIELD)
+ for fieldname in fieldnames:
+ def get_method(self, fieldname):
+ return self.get(fieldname)
+ def set_method(self, value, fieldname):
+ return self._set_existing_field(fieldname, value)
+ setattr(cls, fieldname,
+ DynamicField(partial(get_method, fieldname = fieldname),
+ partial(set_method, fieldname = fieldname)))
+ def try_get_method(self, fieldname):
+ return self.try_get(fieldname)
+ setattr(cls, "try_get_" + fieldname,
+ partialmethod(try_get_method, fieldname = fieldname))
+
+ @classmethod
+ def _define_field_aliases(cls):
+ if cls.STORAGE_KEY is None and cls.NAME_FIELD is not None:
+ cls.STORAGE_KEY = "name"
+ if cls.FIELD_ALIAS is None:
+ cls.FIELD_ALIAS = {}
+ if cls.NAME_FIELD is not None and "name" not in cls.POSFIELDS:
+ cls.FIELD_ALIAS["name"] = cls.NAME_FIELD
+ for k,v in cls.FIELD_ALIAS.items():
+ setattr(cls, k, getattr(cls, v))
+ setattr(cls, "try_get_" + k, getattr(cls, "try_get_" + v))
+
+ @classmethod
+ def _define_reference_getters(cls):
+ for k in cls.DEPENDENT_LINES + cls.OTHER_REFERENCES:
+ def get_method(self, k):
+ return self._refs.get(k , [])
+ def set_method(self, value, k):
+ raise gfapy.AttributeError(
+ "References collections cannot be set directly")
+ setattr(cls, k,
+ DynamicField(partial(get_method, k = k),
+ partial(set_method, k = k)))
+ def all_references(self):
+ return [ item for item in values for values in self._refs ]
+
+ @classmethod
+ def register_extension(cls, references=[]):
+ # check the definitions
+ if isinstance(cls.POSFIELDS,OrderedDict):
+ for fieldname, datatype in cls.POSFIELDS.items():
+ cls.DATATYPE[fieldname] = datatype
+ cls.POSFIELDS = list(cls.POSFIELDS.keys())
+ else:
+ for posfield in cls.POSFIELDS:
+ if posfield not in cls.DATATYPE:
+ raise gfapy.RuntimeError("Extension {} ".format(str(cls))+
+ "defines no datatype for the positional field {}".format(posfield))
+ if hasattr(cls, "TAGS_DATATYPE"):
+ for fieldname, datatype in cls.TAGS_DATATYPE.items():
+ cls.DATATYPE[fieldname] = datatype
+ if not cls.RECORD_TYPE:
+ raise gfapy.RuntimeError("Extension {} ".format(str(cls))+
+ "does not define the RECORD_TYPE constant")
+ if cls.NAME_FIELD is not None:
+ gfapy.lines.finders.Finders.RECORDS_WITH_NAME.append(cls.RECORD_TYPE)
+ for field, klass, refkey in references:
+ if field not in cls.REFERENCE_FIELDS:
+ if not cls.REFERENCE_FIELDS:
+ cls.REFERENCE_FIELDS = []
+ cls.REFERENCE_FIELDS.append(field)
+ if refkey not in klass.DEPENDENT_LINES:
+ klass.DEPENDENT_LINES.append(refkey)
+ klass._define_reference_getters()
+ if cls.REFERENCE_INITIALIZERS is None:
+ cls.REFERENCE_INITIALIZERS = []
+ cls.REFERENCE_INITIALIZERS.append((field, klass, refkey))
+ cls._apply_definitions()
+ gfapy.Line.EXTENSIONS[cls.RECORD_TYPE] = cls
+ gfapy.Line.RECORD_TYPE_VERSIONS["specific"]["gfa2"].append(cls.RECORD_TYPE)
+
diff --git a/gfapy/line/common/default_record_definition.py b/gfapy/line/common/default_record_definition.py
new file mode 100644
index 0000000..901dae7
--- /dev/null
+++ b/gfapy/line/common/default_record_definition.py
@@ -0,0 +1,19 @@
+import gfapy
+
+class DefaultRecordDefinition:
+ """
+ Provides default values for the constants for the definition of record types.
+ """
+ RECORD_TYPE = None
+ POSFIELDS = []
+ PREDEFINED_TAGS = []
+ DATATYPE = {}
+ NAME_FIELD = None
+ STORAGE_KEY = None
+ FIELD_ALIAS = None
+ REFERENCE_FIELDS = []
+ BACKREFERENCE_RELATED_FIELDS = []
+ DEPENDENT_LINES = []
+ OTHER_REFERENCES = []
+ REFERENCE_INITIALIZERS = None
+
diff --git a/gfapy/line/common/disconnection.py b/gfapy/line/common/disconnection.py
new file mode 100644
index 0000000..cc1759a
--- /dev/null
+++ b/gfapy/line/common/disconnection.py
@@ -0,0 +1,116 @@
+import gfapy
+
+class Disconnection:
+
+ def disconnect(self):
+ """
+ Remove the line from the GFA instance it belongs to, if any.
+
+ The Line instance itself will still exist, but all references from it to
+ other lines are deleted, as well as references to it from other lines.
+ Mandatory references are turned into their non-reference representations
+ (e.g. segments references in the sid fields of E lines
+ or in the from/to lines of L/C lines are changed into symbols).
+ """
+ if not self.is_connected():
+ raise gfapy.RuntimeError(
+ "Line {} is not connected to a GFA instance".format(self))
+ self._remove_field_backreferences()
+ self._remove_field_references()
+ self._disconnect_dependent_lines()
+ self._remove_nonfield_backreferences()
+ self._remove_nonfield_references()
+ self._gfa._unregister_line(self)
+ self._gfa = None
+
+ def _delete_reference(self, line, key):
+ if key not in self._refs: return
+ idx = None
+ for i, x in enumerate(self._refs[key]):
+ if x is line:
+ idx = i
+ if idx is None:
+ return
+ elif idx == 0:
+ self._refs[key].pop(0)
+ elif idx == len(self._refs[key])-1:
+ self._refs[key].pop()
+ else:
+ self._refs[key] = (self._refs[key][0:idx] + self._refs[key][idx+1:])
+
+ def _delete_first_reference(self, key):
+ if not self._refs or not self._refs[key]:
+ return
+ self._refs[key].pop(0)
+
+ def _delete_last_reference(self, key):
+ if not self._refs or not self._refs[key]:
+ return
+ self._refs[key].pop()
+
+ def _remove_field_references(self):
+ """
+ .. note::
+ currently this method supports fields which are: references,
+ oriented lines and lists of references of oriented lines;
+ if SUBCLASSES have reference fields which contain references
+ in a different fashion, the method must be updated or overwritten
+ in the subclass
+ """
+ for k in self.__class__.REFERENCE_FIELDS:
+ ref = self.get(k)
+ if isinstance(ref, gfapy.Line):
+ self._set_existing_field(k, ref.name, set_reference = True)
+ elif isinstance(ref, gfapy.OrientedLine):
+ ref.line = ref.name
+ elif isinstance(ref, list):
+ for i, elem in enumerate(ref):
+ if isinstance(elem, gfapy.Line):
+ ref[i] = elem.name
+ elif isinstance(elem, gfapy.OrientedLine):
+ ref[i].line = elem.name
+
+ def _remove_backreference(self, ref, k):
+ if isinstance(ref, gfapy.Line):
+ ref._update_references(self, None, k)
+ elif isinstance(ref, gfapy.OrientedLine):
+ if isinstance(ref.line, gfapy.Line):
+ ref.line._update_references(self, None, k)
+ elif isinstance(ref, list):
+ for i in range(len(ref)):
+ self._remove_backreference(ref[i], k)
+
+ def _disconnect_dependent_line(self, ref):
+ if isinstance(ref, gfapy.Line):
+ ref.disconnect()
+ elif isinstance(ref, gfapy.OrientedLine):
+ if isinstance(ref.line, gfapy.Line):
+ ref.line.disconnect()
+ elif isinstance(ref, list):
+ for i in range(len(ref)):
+ self._disconnect_dependent_line(ref[i])
+
+ def _remove_field_backreferences(self):
+ """
+ .. note::
+ currently this method supports fields which are: references,
+ oriented lines and lists of references of oriented lines;
+ if SUBCLASSES have reference fields which contain references
+ in a different fashion, the method must be updated or overwritten
+ in the subclass
+ """
+ for k in self.__class__.REFERENCE_FIELDS:
+ self._remove_backreference(self.get(k), k)
+
+ def _disconnect_dependent_lines(self):
+ for k in self.__class__.DEPENDENT_LINES:
+ for ref in self._refs.get(k, []):
+ self._disconnect_dependent_line(ref)
+
+ def _remove_nonfield_backreferences(self):
+ for k in self.__class__.OTHER_REFERENCES:
+ for ref in self._refs.get(k, []):
+ self._remove_backreference(ref, k)
+
+ def _remove_nonfield_references(self):
+ self._refs = {}
diff --git a/gfapy/line/common/dynamic_fields.py b/gfapy/line/common/dynamic_fields.py
new file mode 100644
index 0000000..7a725d9
--- /dev/null
+++ b/gfapy/line/common/dynamic_fields.py
@@ -0,0 +1,84 @@
+import gfapy
+from types import MethodType
+
+class DynamicField:
+ def __init__(self, get, set):
+ self.get = get
+ self.set = set
+
+class DynamicFields:
+ """
+ Methods are dynamically created for non-existing but valid tag names.
+ Methods for predefined tags and positional fields
+ are created dynamically for each subclass; methods for existing tags
+ are created on instance initialization.
+ """
+
+ def __getattribute__(self, name):
+ try:
+ attr = super().__getattribute__(name)
+ if not isinstance(attr, DynamicField):
+ return attr
+ else:
+ return attr.get(self)
+ except AttributeError as err:
+ return self._get_dynamic_field(name, err)
+
+ def __setattr__(self, name, value):
+ try:
+ attr = super().__getattribute__(name)
+ if not isinstance(attr, DynamicField):
+ return super().__setattr__(name, value)
+ else:
+ attr.set(self, value)
+ except AttributeError as err:
+ return self._set_dynamic_field(name, value)
+
+ def _get_dynamic_field(self, name, err):
+ if self.virtual:
+ raise err
+ if name.startswith("try_get_"):
+ name = name[8:]
+ try_get = True
+ else:
+ try_get = False
+ if name in self._data:
+ return (lambda : self.try_get(name)) if try_get else self.get(name)
+ if (name in self.__class__.PREDEFINED_TAGS or
+ self._is_valid_custom_tagname(name)):
+ if not try_get:
+ return None
+ else:
+ raise gfapy.NotFoundError(
+ "No value defined for tag {}".format(name))
+ else:
+ raise err
+
+ def _set_dynamic_field(self, name, value):
+ try:
+ virtual = super().__getattribute__("_virtual")
+ data = super().__getattribute__("_data")
+ if virtual:
+ super().__setattr__(name, value)
+ if name in data:
+ self._set_existing_field(name, value)
+ if (name in self.__class__.PREDEFINED_TAGS or
+ self._is_valid_custom_tagname(name)):
+ self.set(name, value)
+ else:
+ super().__setattr__(name, value)
+ except AttributeError:
+ super().__setattr__(name, value)
+
+ def _define_field_methods(self, fieldname):
+ """Define field methods for a single field"""
+ def getter(self):
+ return self.get(fieldname)
+ def try_get(self):
+ return self.try_get(fieldname)
+ def setter(self, value):
+ self._set_existing_field(fieldname, value)
+ super().__setattr__(fieldname, DynamicField(getter, setter))
+ super().__setattr__("try_get_" + fieldname, MethodType(try_get, self))
+
+# XXX: class methods are missing
diff --git a/gfapy/line/common/equivalence.py b/gfapy/line/common/equivalence.py
new file mode 100644
index 0000000..a4ba562
--- /dev/null
+++ b/gfapy/line/common/equivalence.py
@@ -0,0 +1,171 @@
+import gfapy
+
+class Equivalence:
+
+ def __hash__(self):
+ name = self.get("name")
+ if name:
+ return name.__hash__()
+ else:
+ return NotImplemented
+
+ def __eq__(self, o):
+ """
+ Equivalence check
+
+ Returns
+ -------
+ bool
+ does the line has the same record type,
+ contains the same tags
+ and all positional fields and tags contain the same field values?
+
+ See Also
+ --------
+ gfapy.line.edge.Link.__eq__
+ """
+ if o is self:
+ return True
+ if isinstance(o, str):
+ name = self.get("name")
+ if name:
+ return name == str(o)
+ if not isinstance(o, gfapy.Line):
+ return False
+ if (o.record_type != self.record_type):
+ return False
+ if sorted(o._data.keys()) != sorted(self._data.keys()):
+ return False
+ for k,v in o._data.items():
+ if self._data[k] != v:
+ if self.field_to_s(k) != o.field_to_s(k):
+ return False
+ return True
+
+ def diff(self, other):
+ if self.record_type != other.record_type:
+ return ("incompatible", "record_type", \
+ self.record_type, other.record_type)
+ if self.__class__ != other.__class__:
+ if self.version == other.version:
+ raise gfapy.AssertionError
+ return ("incompatible", "version", self.version, other.version)
+ differences = []
+ for fieldname in self.positional_fieldnames:
+ value1 = self.field_to_s(fieldname)
+ value2 = other.field_to_s(fieldname)
+ if value1 != value2:
+ differences.append(("different", "positional_field",
+ fieldname, value1, value2))
+ for tagname in other.tagnames:
+ if tagname not in self.tagnames:
+ differences.append(("exclusive", ">", "tag",
+ tagname, other.get_datatype(tagname),
+ other.get(tagname)))
+ for tagname in self.tagnames:
+ if tagname not in other.tagnames:
+ differences.append(("exclusive", "<", "tag",
+ tagname, self.get_datatype(tagname), self.get(tagname)))
+ else:
+ tag1 = self.field_to_s(tagname, tag=True)
+ tag2 = other.field_to_s(tagname, tag=True)
+ if tag1 != tag2:
+ differences.append(("different", "tag", tagname,
+ self.get_datatype(tagname),
+ self.field_to_s(tagname),
+ other.get_datatype(tagname),
+ other.field_to_s(tagname)))
+ return differences
+
+ def diffscript(self, other, selfvar):
+ outscript = []
+ for diffitem in self.diff(other):
+ if diffitem[0] == "incompatible":
+ if diffitem[1] == "record_type":
+ raise gfapy.RuntimeError(
+ "Cannot compute conversion script: different record type\n"+
+ "Line: {}\n".format(self)+
+ "Other: {}\n".format(other)+
+ "{0} != {1}",format(diffitem[2], diffitem[3]))
+ elif diffitem[1] == "version":
+ raise gfapy.RuntimeError(
+ "Cannot compute conversion script: different GFA version\n"+
+ "Line: {}\n".format(self)+
+ "Other: {}\n".format(other)+
+ "{0} != {1}",format(diffitem[2], diffitem[3]))
+ elif diffitem[0] == "different":
+ if diffitem[1] == "positional_field":
+ outscript.append("{0}.set('{1}', '{2}')".format(selfvar,
+ diffitem[2].replace("'","\\'"),
+ diffitem[4].replace("'","\\'")))
+ elif diffitem[1] == "tag":
+ if diffitem[3] != diffitem[5]:
+ outscript.append("{0}.set_datatype('{1}', '{2}')".format(selfvar,
+ diffitem[2].replace("'","\\'"),
+ diffitem[5].replace("'","\\'")))
+ if diffitem[4] != diffitem[6]:
+ outscript.append("{0}.set('{1}', '{2}')".format(selfvar,
+ diffitem[2].replace("'","\\'"),
+ diffitem[6].replace("'","\\'")))
+ elif diffitem[0] == "exclusive":
+ if diffitem[1] == ">":
+ if diffitem[2] == "tag":
+ outscript.append("{0}.set_datatype('{1}', '{2}')".format(selfvar,
+ diffitem[3].replace("'","\\'"),
+ diffitem[4].replace("'","\\'")))
+ outscript.append("{0}.set('{1}', '{2}')".format(selfvar,
+ diffitem[3].replace("'","\\'"),
+ diffitem[5].replace("'","\\'")))
+ elif diffitem[1] == "<":
+ if diffitem[2] == "tag":
+ outscript.append("{0}.delete('{1}')".format(selfvar,
+ diffitem[3].replace("'","\\'")))
+ return "\n".join(outscript)
+
+
+ def _has_field_values(self, hsh, ignore_fields = None):
+ assert(isinstance(hsh, dict))
+ if ignore_fields is None:
+ ignore_fields = []
+ if ("record_type" in hsh) and ("record_type" not in ignore_fields) \
+ and (self.record_type != hsh["record_type"]):
+ return False
+ ignore_fields.append("record_type")
+ fieldnames = [i for i in hsh.keys() if i not in ignore_fields]
+ for fieldname in fieldnames:
+ value = self.get(fieldname)
+ if value is None:
+ return False
+ if gfapy.is_placeholder(value):
+ continue
+ if value != hsh[fieldname] and \
+ (self.field_to_s(fieldname) != hsh[fieldname]):
+ return False
+ return True
+
+ def _has_eql_fields(self, refline, ignore_fields = None):
+ assert(isinstance(refline, gfapy.Line))
+ if ignore_fields is None:
+ ignore_fields = []
+ self._dealias_fieldnames(ignore_fields)
+ if ("record_type" not in ignore_fields) \
+ and (self.record_type != refline.record_type):
+ return False
+ fieldnames = refline.positional_fieldnames + refline.tagnames
+ fieldnames = [i for i in fieldnames if i not in ignore_fields]
+ if "name" in ignore_fields:
+ name_field = refline.__class__.NAME_FIELD
+ if name_field in fieldnames:
+ fieldnames.remove(name_field)
+ for fieldname in fieldnames:
+ refvalue = refline.get(fieldname)
+ if gfapy.is_placeholder(refvalue):
+ continue
+ value = self.get(fieldname)
+ if value is None:
+ return False
+ if gfapy.is_placeholder(value):
+ continue
+ if value != refvalue:
+ return False
+ return True
diff --git a/gfapy/line/common/field_data.py b/gfapy/line/common/field_data.py
new file mode 100644
index 0000000..520e7e4
--- /dev/null
+++ b/gfapy/line/common/field_data.py
@@ -0,0 +1,198 @@
+import gfapy
+
+class FieldData:
+
+ @property
+ def positional_fieldnames(self):
+ """Name of the positional fields.
+ Returns:
+ str list
+
+ Note:
+ The field names can vary from the exact name of the
+ fields in the specification, due to compatibility issues.
+ """
+ return self.__class__.POSFIELDS
+
+ @property
+ def tagnames(self):
+ """Names of the tags defined in the line.
+ Returns:
+ str list
+ """
+ return [ x for x in self._data.keys() \
+ if x not in self.positional_fieldnames ]
+
+ @property
+ def record_type(self):
+ """The record type.
+
+ The content of the first field of the line. In all predefined record types
+ this is a single-letter upper case string.
+
+ Returns:
+ str
+ """
+ return self.__class__.RECORD_TYPE
+
+ def set(self, fieldname, value):
+ """Set the value of a field.
+
+ If a datatype for a new custom tag is not set,
+ the default for the value assigned to the field will be used
+ (e.g. J for Hashes, i for Integer, etc).
+
+ Parameters
+ ----------
+ fieldname : str
+ The name of the field to set.
+ (positional field, predefined tag (uppercase) or custom tag (lowercase))
+
+ Raises
+ ------
+ gfapy.FormatError
+ If **fieldname** is not a valid predefined or
+ custom tag name (and **validate["tags"]**).
+
+ Returns
+ -------
+ object
+ **value**
+ """
+ if fieldname in self._data or self._is_predefined_tag(fieldname):
+ return self._set_existing_field(fieldname, value)
+ elif fieldname in self.__class__.FIELD_ALIAS:
+ return self.set(self.__class__.FIELD_ALIAS[fieldname], value)
+ elif self.virtual:
+ raise gfapy.RuntimeError("Virtual lines do not have tags")
+ elif (self.vlevel == 0) or self._is_valid_custom_tagname(fieldname):
+ self._define_field_methods(fieldname)
+ if self._datatype.get(fieldname, None) is not None:
+ return self._set_existing_field(fieldname, value)
+ elif value is not None:
+ self._datatype[fieldname] = gfapy.Field._get_default_gfa_tag_datatype(value)
+ self._data[fieldname] = value
+ return self._data[fieldname]
+ else:
+ raise gfapy.FormatError(
+ "{} is not a positional field,".format(fieldname)+
+ "an existing tag, an alias, a predefined tag or a valid custom tag\n"+
+ "positional fields: {}\n".format(", ".join(self.positional_fieldnames))+
+ "existing tags: {}\n".format(", ".join(self.tagnames))+
+ "aliases: {}\n".format(", ".join(self.__class__.FIELD_ALIAS.keys()))+
+ "predefined tags: {}\n".format(", ".join(self.__class__.PREDEFINED_TAGS)))
+
+ def get(self, fieldname):
+ """
+ Get the value of a field.
+
+ Parameters
+ ----------
+ fieldname : str
+ Name of the field.
+
+ Returns
+ -------
+ object or None
+ Value of the field or **None** if field is not defined.
+ """
+ v = self._data.get(fieldname, None)
+ if isinstance(v, str):
+ t = self._field_datatype(fieldname)
+ if t != "Z" and t != "seq":
+ # value was not parsed or was set to a string by the user
+ self._data[fieldname] = gfapy.Field._parse_gfa_field(v, t,
+ safe = (self.vlevel >= 1),
+ fieldname = fieldname,
+ line = self)
+ return self._data[fieldname]
+ else:
+ if (self.vlevel >= 3):
+ gfapy.Field._validate_gfa_field(v, t, fieldname)
+ elif v is not None:
+ if (self.vlevel >= 3):
+ t = self._field_datatype(fieldname)
+ gfapy.Field._validate_gfa_field(v, t, fieldname)
+ else:
+ dealiased_fieldname = self.__class__.FIELD_ALIAS.get(fieldname, None)
+ if dealiased_fieldname is not None:
+ return self.get(dealiased_fieldname)
+ return v
+
+ def try_get(self, fieldname):
+ """
+ Value of a field, raising an exception if it is not defined.
+
+ Parameters
+ ----------
+ fieldname : str
+ Name of the field.
+
+ Raises
+ ------
+ gfapy.NotFoundError
+ If field is not defined.
+
+ Returns
+ -------
+ object or None
+ Value of the field.
+ """
+ v = self.get(fieldname)
+ if v is None:
+ raise gfapy.NotFoundError(
+ "No value defined for tag {}".format(fieldname))
+ return v
+
+ def delete(self, tagname):
+ """
+ Remove a tag from the line, if it exists; do nothing if it does not.
+
+ Parameters
+ ----------
+ tagname : Symbol
+ The tag name of the tag to remove.
+
+ Returns
+ -------
+ object or None
+ The deleted value or None, if the field was not defined.
+ """
+ if tagname in self.tagnames:
+ if tagname in self._datatype:
+ self._datatype.pop(tagname)
+ return self._data.pop(tagname)
+ else:
+ return None
+
+ def _set_existing_field(self, fieldname, value, set_reference = False):
+ renaming_connected = False
+ if self._gfa:
+ if not set_reference and \
+ (fieldname in self.__class__.REFERENCE_FIELDS or \
+ fieldname in self.__class__.BACKREFERENCE_RELATED_FIELDS):
+ raise gfapy.RuntimeError(
+ "The value of field '{}' cannot be changed, ".format(fieldname)+
+ "as the line belongs to a GFA instance")
+ if (fieldname == self.__class__.STORAGE_KEY) or \
+ (self.__class__.STORAGE_KEY == "name" and \
+ fieldname == self.__class__.NAME_FIELD):
+ renaming_connected = True
+ self._gfa._unregister_line(self)
+ if value is None:
+ if fieldname in self._data:
+ self._data.pop(fieldname)
+ else:
+ if self.vlevel >= 3:
+ self._field_or_default_datatype(fieldname, value)
+ gfapy.Field._validate_gfa_field(value, self._field_datatype(fieldname),
+ fieldname)
+ self._data[fieldname] = value
+ if renaming_connected:
+ self._gfa._register_line(self)
+
+ def _dealias_fieldname(self, fieldname):
+ return self.__class__.FIELD_ALIAS.get(fieldname, fieldname)
+
+ def _dealias_fieldnames(self, fieldnames):
+ fieldnames[:] = map(self._dealias_fieldname, fieldnames)
diff --git a/gfapy/line/common/field_datatype.py b/gfapy/line/common/field_datatype.py
new file mode 100644
index 0000000..bc3f96d
--- /dev/null
+++ b/gfapy/line/common/field_datatype.py
@@ -0,0 +1,66 @@
+import gfapy
+
+class FieldDatatype:
+
+ def get_datatype(self, fieldname):
+ """
+ Returns a string, which specifies the datatype of a field.
+
+ Parameters
+ ----------
+ fieldname : str
+ The tag name of the field.
+
+ Returns
+ -------
+ str
+ The datatype symbol.
+ (One of gfapy.Field.FIELD_DATATYPE)
+ """
+ fieldname = self.__class__.FIELD_ALIAS.get(fieldname, fieldname)
+ return self._field_or_default_datatype(fieldname,
+ self._data.get(fieldname,None))
+
+ def set_datatype(self, fieldname, datatype):
+ """
+ Set the datatype of a tag.
+
+ If an existing tag datatype is changed, its content may become
+ invalid (call **validate_field** if necessary).
+
+ Parameters
+ ----------
+ fieldname : str
+ The field name (it is not required that the field exists already)
+ datatype : gfapy.Field.FIELD_DATATYPE
+ The datatype.
+
+ Raises
+ ------
+ gfapy.ArgumentError
+ If **datatype** is not a valid datatype for tags.
+ """
+ if self._is_predefined_tag(fieldname):
+ if self.get_datatype(fieldname) != datatype:
+ raise gfapy.RuntimeError(
+ "Cannot set the datatype of {} to {}\n".format(fieldname, datatype)+
+ "The datatype of a predefined tag cannot be changed")
+ elif not self._is_valid_custom_tagname(fieldname) and self.vlevel > 0:
+ raise gfapy.FormatError(
+ "{} is not a valid custom tag name".format(fieldname))
+ if datatype not in gfapy.Field.TAG_DATATYPE:
+ raise gfapy.ArgumentError("Unknown datatype: {}".format(datatype))
+ self._datatype[fieldname] = datatype
+
+ def _field_datatype(self, fieldname):
+ return self._datatype.get(fieldname,
+ self.__class__.DATATYPE.get(fieldname, None))
+
+ def _field_or_default_datatype(self, fieldname, value):
+ t = self._field_datatype(fieldname)
+ if t is None:
+ if value is None:
+ return None
+ t = gfapy.Field._get_default_gfa_tag_datatype(value)
+ self._datatype[fieldname] = t
+ return t
diff --git a/gfapy/line/common/update_references.py b/gfapy/line/common/update_references.py
new file mode 100644
index 0000000..821693e
--- /dev/null
+++ b/gfapy/line/common/update_references.py
@@ -0,0 +1,119 @@
+"""
+Update of references caused by a virtual line becoming real.
+"""
+import gfapy
+
+class UpdateReferences:
+
+ def _update_references(self, oldref, newref, key_in_ref):
+ """
+ This is called on lines which were referenced by virtual lines,
+ when a real line is found which substitutes the virtual line.
+
+ .. note::
+ SUBCLASSES which can be referenced by virtual lines
+ may implement a specialized *_backreference_keys* method to
+ support this mechanism (the default will work in all cases
+ of the current specification, but is not optimized for record type)
+
+ Parameters
+ ----------
+ oldref : gfapy.Line
+ newref : gfapy.Line
+ key_in_ref : str list
+ """
+ keys = self._backreference_keys(oldref, key_in_ref)
+ assert(keys is not None)
+ self.__update_field_references(oldref, newref,
+ list(set(self.__class__.REFERENCE_FIELDS)
+ .intersection(keys)))
+ if hasattr(self, "_refs"):
+ # note: keeping the two types of nonfield references separate helps
+ # in subclasses where only one must be redefined
+ self.__update_dependent_line_references(oldref, newref,
+ set(self.__class__.DEPENDENT_LINES)
+ .intersection(self._refs.keys())
+ .intersection(keys))
+ self.__update_other_references(oldref, newref,
+ list(set(self.__class__.OTHER_REFERENCES)
+ .intersection(self._refs.keys())
+ .intersection(keys)))
+
+ def _backreference_keys(self, ref, key_in_ref):
+ """
+ Return a list of fields and/or @ref keys, which indicates
+ where a reference "ref" _may_ be stored (in order to be able
+ to locate it and update it).
+
+ The default is: all reference fields, dependent line references
+ and other references.
+
+ .. note::
+ SUBCLASSES may overwrite this method if they
+ can be referenced by virtual lines, by providing more
+ specific results, depending on the ref and key_in_ref;
+ this can make the update faster.
+
+ Returns
+ -------
+ str list
+ Fieldnames and/or _refs keys.
+ """
+ return (self.__class__.REFERENCE_FIELDS +
+ self.__class__.DEPENDENT_LINES +
+ self.__class__.OTHER_REFERENCES)
+
+ def __update_reference_in_field(self, field, oldref, newref):
+ """
+ .. note::
+ This methods supports fields which contain references,
+ oriented lines or lists of references or oriented lines;
+ if SUBCLASSES contain fields which reference to line in a
+ different fashion, the method must be updated or overwritten
+ by the subclass
+ """
+ value = self.get(field)
+ if isinstance(value, gfapy.Line):
+ if value is oldref:
+ self._set_existing_field(field, newref, set_reference = True)
+ elif isinstance(value, gfapy.OrientedLine):
+ if value.line is oldref:
+ value.line = newref
+ elif isinstance(value, list):
+ self.__update_reference_in_list(value, oldref, newref)
+
+ def __update_reference_in_list(self, lst, oldref, newref):
+ found = False
+ for idx, elem in enumerate(lst):
+ if isinstance(elem, gfapy.Line):
+ if elem is oldref:
+ lst[idx] = newref
+ found = True
+ elif isinstance(elem, gfapy.OrientedLine):
+ if elem.line is oldref:
+ if hasattr(oldref, "is_complement") and \
+ oldref.is_complement(newref):
+ elem.orient = gfapy.invert(elem.orient)
+ elem.line = newref
+ found = True
+ if newref is None and found:
+ lst[:] = [e for e in lst if e is not None]
+
+ def __update_field_references(self, oldref, newref, possible_fieldnames):
+ for fn in possible_fieldnames:
+ self.__update_reference_in_field(fn, oldref,
+ newref if newref else str(oldref))
+
+ def __update_nonfield_references(self, oldref, newref, possible_keys):
+ for key in possible_keys:
+ if key in self._refs:
+ self.__update_reference_in_list(self._refs[key], oldref, newref)
+
+ def __update_dependent_line_references(self ,oldref, newref, possible_keys):
+ self.__update_nonfield_references(oldref, newref, possible_keys)
+
+ def __update_other_references(self, oldref, newref, possible_keys):
+ """
+ .. note:: SUBCLASSES may redefine this method
+ """
+ self.__update_nonfield_references(oldref, newref, possible_keys)
diff --git a/gfapy/line/common/validate.py b/gfapy/line/common/validate.py
new file mode 100644
index 0000000..ef04fa8
--- /dev/null
+++ b/gfapy/line/common/validate.py
@@ -0,0 +1,70 @@
+import re
+import gfapy
+
+class Validate:
+
+ def validate_field(self, fieldname):
+ """
+ Raises an error if the content of the field does not correspond to
+ the field type.
+
+ Parameters
+ ----------
+ fieldname : str
+ The tag name of the field to validate.
+
+ Raises
+ ------
+ gfapy.FormatError
+ If the content of the field is not valid, according to its required type.
+ """
+ fieldname = self.__class__.FIELD_ALIAS.get(fieldname, fieldname)
+ v = self._data[fieldname]
+ t = self._field_or_default_datatype(fieldname, v)
+ gfapy.Field._validate_gfa_field(v, t, fieldname)
+
+ def validate(self):
+ """
+ Validate the gfapy.Line instance.
+
+ Raises
+ ------
+ gfapy.FormatError
+ If any field content is not valid.
+ """
+ fieldnames = self.positional_fieldnames + self.tagnames
+ if self.vlevel == 0:
+ self._validate_tagnames_and_types()
+ for fieldname in fieldnames:
+ self.validate_field(fieldname)
+ self._validate_record_type_specific_info()
+
+ def _validate_tagnames_and_types(self):
+ for n in self.tagnames:
+ if self._is_predefined_tag(n):
+ self._validate_predefined_tag_type(n, self._field_datatype(n))
+ elif not self._is_valid_custom_tagname(n):
+ raise gfapy.FormatError("Custom tags must be lower case\n"+
+ "Found: {}".format(n))
+
+ def _validate_predefined_tag_type(self, tagname, datatype):
+ if datatype != self.__class__.DATATYPE[tagname]:
+ raise gfapy.TypeError(
+ "Tag {} must be of type ".format(tagname) +
+ "{}, {} found".format(self.__class__.DATATYPE[tagname], datatype))
+
+ def _validate_custom_tagname(self, tagname):
+ if not self._is_valid_custom_tagname(tagname):
+ raise gfapy.FormatError("Custom tags must be lower case\n"+
+ "Found: {}".format(tagname))
+
+ @staticmethod
+ def _is_valid_custom_tagname(tagname):
+ return (re.match(r"^[a-z][a-z0-9]$", tagname))
+
+ def _validate_record_type_specific_info(self):
+ pass
+
+ def _is_predefined_tag(self, fieldname):
+ return fieldname in self.__class__.PREDEFINED_TAGS
+
diff --git a/gfapy/line/common/version_conversion.py b/gfapy/line/common/version_conversion.py
new file mode 100644
index 0000000..8c9d35a
--- /dev/null
+++ b/gfapy/line/common/version_conversion.py
@@ -0,0 +1,86 @@
+import gfapy
+try:
+ from functools import partialmethod
+except ImportError:
+ #for compatibility with old python versions
+ def partialmethod(method, **kwargs):
+ return lambda self: method(self, **kwargs)
+
+class VersionConversion:
+
+ @property
+ def version(self):
+ """
+ Returns
+ -------
+ gfapy.VERSIONS, None
+ GFA specification version
+ """
+ return self._version
+
+ def to_version_s(self, version):
+ """
+ Returns
+ -------
+ str
+ A string representation of self.
+ """
+ return gfapy.Line.SEPARATOR.join(getattr(self, "_to_"+version+"_a")())
+
+ def _to_version_a(self, version):
+ """
+ .. note::
+ The default is an alias of to_list() if version is equal to the
+ version of the line, and an empty list otherwise.
+ gfapy.Line subclasses can redefine this method to convert
+ between versions.
+
+ Returns
+ -------
+ str list
+ A list of string representations of the fields.
+ """
+ if version == self._version:
+ return self.to_list()
+ else:
+ return []
+
+ def to_version(self, version, raise_on_failure=True):
+ """
+ Returns
+ -------
+ gfapy.Line
+ Conversion to the selected version.
+ """
+ if version == self._version:
+ return self
+ elif version not in gfapy.VERSIONS:
+ raise gfapy.VersionError("Version unknown ({})".format(version))
+ else:
+ l = getattr(self, "_to_"+version+"_a")()
+ if l:
+ try:
+ converted = gfapy.Line(l, version=version, vlevel=self.vlevel)
+ except:
+ raise gfapy.RuntimeError("Conversion to {} failed\n".format(version)+
+ "Line: {}".format(str(self)))
+ return converted
+ elif raise_on_failure:
+ raise gfapy.VersionError("Records of type {} ".format(self.record_type)+
+ "cannot be converted from version {} ".format(self._version)+
+ "to version {}".format(version))
+ else:
+ return None
+
+for shall_version in ["gfa1", "gfa2"]:
+ setattr(VersionConversion, "to_"+shall_version+"_s",
+ partialmethod(VersionConversion.to_version_s,
+ version = shall_version))
+
+ setattr(VersionConversion, "_to_"+shall_version+"_a",
+ partialmethod(VersionConversion._to_version_a,
+ version = shall_version))
+
+ setattr(VersionConversion, "to_"+shall_version,
+ partialmethod(VersionConversion.to_version,
+ version = shall_version))
diff --git a/gfapy/line/common/virtual_to_real.py b/gfapy/line/common/virtual_to_real.py
new file mode 100644
index 0000000..0d5df5f
--- /dev/null
+++ b/gfapy/line/common/virtual_to_real.py
@@ -0,0 +1,77 @@
+"""
+Methods in this module are important for lines
+which can be virtual
+"""
+import gfapy
+
+class VirtualToReal:
+
+ @property
+ def virtual(self):
+ """
+ Is the line virtual?
+
+ Is this gfapy.Line a virtual line representation
+ (i.e. a placeholder for an expected but not yet encountered line)?
+
+ Returns
+ -------
+ bool
+ """
+ return self._virtual
+
+ def _substitute_virtual_line(self, previous):
+ self._gfa = previous.gfa
+ self._import_references(previous)
+ self._gfa._unregister_line(previous)
+ self._gfa._register_line(self)
+ return None
+
+ def _import_references(self, previous):
+ """
+ This is called when a virtual line (previous) is
+ substituted by a real line
+ """
+ if not isinstance(previous, gfapy.line.Unknown):
+ self._import_field_references(previous)
+ self._update_field_backreferences(previous)
+ else:
+ self._initialize_references()
+ self._import_nonfield_references(previous)
+ self._update_nonfield_backreferences(previous)
+
+ def _import_field_references(self, previous):
+ for k in (self.__class__.REFERENCE_FIELDS +
+ self.__class__.BACKREFERENCE_RELATED_FIELDS):
+ ref = previous.get(k)
+ self._set_existing_field(k, ref, set_reference = True)
+
+ def _update_backreference_in(self, ref, previous, k):
+ if isinstance(ref, gfapy.Line):
+ ref._update_references(previous, self, k)
+ elif isinstance(ref, gfapy.OrientedLine):
+ ref.line._update_references(previous, self, k)
+ elif isinstance(ref, list):
+ for item in ref:
+ self._update_backreference_in(item, previous, k)
+
+ def _update_field_backreferences(self, previous):
+ """
+ .. note::
+ Currently this method supports fields which are: references,
+ oriented lines and lists of references of oriented lines.
+ If SUBCLASSES have reference fields which contain references
+ in a different fashion, the method must be updated or overwritten
+ in the subclass.
+ """
+ for k in self.__class__.REFERENCE_FIELDS:
+ ref = self.get(k)
+ self._update_backreference_in(ref, previous, k)
+
+ def _import_nonfield_references(self, previous):
+ self._refs = previous._refs
+
+ def _update_nonfield_backreferences(self, previous):
+ for k, v in self._refs.items():
+ for ref in v:
+ self._update_backreference_in(ref, previous, k)
diff --git a/gfapy/line/common/writer.py b/gfapy/line/common/writer.py
new file mode 100644
index 0000000..673a9f9
--- /dev/null
+++ b/gfapy/line/common/writer.py
@@ -0,0 +1,145 @@
+import gfapy
+
+class Writer:
+
+ def __str__(self):
+ """
+ Returns
+ -------
+ str
+ A string representation of self.
+ """
+ return gfapy.Line.SEPARATOR.join(self.to_list())
+
+ def to_str(self, add_virtual_commentary=True):
+ """
+ Parameters
+ ----------
+ add_virtual_commentary : bool
+ add a 'co' tag to virtual lines (default: True)
+
+ Returns
+ -------
+ str
+ A string representation of self.
+ """
+ return gfapy.Line.SEPARATOR.join(self.to_list(
+ add_virtual_commentary=add_virtual_commentary))
+
+ def to_list(self, add_virtual_commentary=True):
+ """
+ Parameters
+ ----------
+ add_virtual_commentary : bool
+ add a 'co' tag to virtual lines (default: True)
+
+ Returns
+ -------
+ str list
+ A list of string representations of the fields.
+ """
+ a = [self.record_type]
+ errors = []
+ for fn in self.positional_fieldnames:
+ try:
+ fstr = self.field_to_s(fn, tag = False)
+ except:
+ fstr = str(self.get(fn))
+ errors.append(fn)
+ a.append(fstr)
+ for fn in self.tagnames:
+ try:
+ fstr = self.field_to_s(fn, tag = True)
+ except:
+ fstr = str(self.get(fn))
+ errors.append(fn)
+ a.append(fstr)
+ if self.virtual and add_virtual_commentary:
+ a.append("co:Z:GFAPY_virtual_line")
+ if errors:
+ a.append("# INVALID; errors found in fields: "+
+ ",".join(errors))
+ return a
+
+ def field_to_s(self, fieldname, tag = False):
+ """
+ Compute the string representation of a field.
+
+ Parameters
+ ----------
+ fieldname : str
+ The tag name of the field.
+ tag : bool
+ *(defaults to: ***False***)*
+ Return the tagname:datatype:value representation.
+
+ Raises
+ ------
+ gfapy.NotFoundError
+ If field is not defined.
+
+ Returns
+ -------
+ str
+ The string representation
+ """
+ fieldname = self.__class__.FIELD_ALIAS.get(fieldname, fieldname)
+ v = self._data.get(fieldname, None)
+ if v is None:
+ raise gfapy.NotFoundError("Field {} not found".format(fieldname))
+ t = self._field_or_default_datatype(fieldname, v)
+ if not isinstance(v, str):
+ v = gfapy.Field._to_gfa_field(v, datatype = t, fieldname = fieldname,
+ line = self)
+ if self.vlevel >= 2:
+ gfapy.Field._validate_gfa_field(v, t, fieldname)
+ if tag:
+ return gfapy.Field._to_gfa_tag(v, fieldname, datatype = t, line = self)
+ else:
+ return v
+
+ def __repr__(self):
+ try:
+ s = str(self)
+ except:
+ s = "\t".join([ self.record_type + "(error!)" ] + \
+ [ repr(self.get(fn)) for fn in self.positional_fieldnames ] + \
+ [ (fn + ":" + self.get_datatype(fn) + ":" + repr(self.get(fn))) for fn in self.tagnames ])
+ return "gfapy.Line('{0}',version='{1}',vlevel={2})".format(s,self.version,self.vlevel)
+
+ def refstr(self, maxlen=10):
+ """String containing a list of lines referencing to this line.
+
+ Parameters
+ ----------
+ maxlen : int
+ Shorten lists longer than the specified value (default: 10)
+
+ Returns
+ -------
+ str
+ """
+ andmore = 0
+ references = self.all_references
+ if len(references) > maxlen:
+ andmore = len(references) - 10
+ references = references[:10]
+ lines_list = "\n".join([str(l) for l in references])
+ if andmore > 0:
+ lines_list += "\n... ({} more)".format(andmore)
+ return lines_list
+
+ @property
+ def _tags(self):
+ """
+ Returns the tags as an array of [fieldname, datatype, value]
+ triples.
+
+ Returns
+ -------
+ (str, str, object) list
+ """
+ retval = []
+ for of in tagnames:
+ retval.append([of, self.get_datatype(of), self.get(of)])
+ return retval
diff --git a/gfapy/line/custom_record/__init__.py b/gfapy/line/custom_record/__init__.py
new file mode 100644
index 0000000..5d3aeab
--- /dev/null
+++ b/gfapy/line/custom_record/__init__.py
@@ -0,0 +1 @@
+from .custom_record import CustomRecord
diff --git a/gfapy/line/custom_record/construction.py b/gfapy/line/custom_record/construction.py
new file mode 100644
index 0000000..46cb919
--- /dev/null
+++ b/gfapy/line/custom_record/construction.py
@@ -0,0 +1,59 @@
+import gfapy
+
+class Construction:
+
+ @property
+ def positional_fieldnames(self):
+ """The names of the positional fields.
+
+ The property is implemented differently for CustomRecord
+ instances, as the positional fieldnames are unknown.
+
+ Returns:
+ list of str
+ """
+ return self._positional_fieldnames
+
+ @property
+ def tagnames(self):
+ """The names of the tags defined in the line.
+
+ The property is implemented differently for CustomRecord
+ instances, as tags are identified heuristically (as the number
+ of positional fields is unknown).
+
+ Returns:
+ list of str
+ """
+ return[x for x in self._data.keys() \
+ if (not x in self.positional_fieldnames) \
+ and (x != "record_type")]
+
+ def _initialize_positional_fields(self, strings):
+ """delayed, see #delayed_inizialize_positional_fields"""
+ pass
+
+ def _initialize_tags(self, strings):
+ first_tag = len(strings)
+ for i in range(len(strings)-1, 0, -1):
+ try:
+ self._initialize_tag(*(gfapy.Field._parse_gfa_tag(strings[i])))
+ except:
+ break
+ first_tag = i
+ self._delayed_initialize_positional_fields(strings, first_tag)
+
+ def _delayed_initialize_positional_fields(self, strings, n_positional_fields):
+ self._positional_fieldnames = []
+ if strings[0] in ["P", "C", "L"]:
+ raise gfapy.VersionError(
+ "GFA-like line (P,C,L) found in GFA2\n"+
+ "Line: {}\n".format(" ".join(strings))+
+ "Custom lines with record_type P, C and L are not supported by gfapy.")
+ self._init_field_value("record_type", "custom_record_type", strings[0],
+ errmsginfo = strings)
+ for i in range(1, n_positional_fields):
+ n = "field{}".format(i)
+ self._init_field_value(n, "generic", strings[i], errmsginfo = strings)
+ self.positional_fieldnames.append(n)
+ self._datatype[n] = "generic"
diff --git a/gfapy/line/custom_record/custom_record.py b/gfapy/line/custom_record/custom_record.py
new file mode 100644
index 0000000..65c0a34
--- /dev/null
+++ b/gfapy/line/custom_record/custom_record.py
@@ -0,0 +1,25 @@
+from .construction import Construction
+from ..line import Line
+
+class CustomRecord(Construction, Line):
+ """Custom record of a GFA2 file.
+
+ According to the specification, any line that does not begin with a
+ recognized code can be ignored. This allows users to have additional
+ descriptor lines specific to their special processes.
+
+ Parsing of custom lines is handled as follows:
+ - divide content by tabs
+ - from the back, fields are parsed as GFA tags (XX:Y:...); until an exception
+ is thrown, they are all considered tags
+ - from the first exception back to the first field, they are all considered
+ positional fields with name field1, field2, etc
+ """
+
+ RECORD_TYPE = None
+ POSFIELDS = ["record_type"]
+ DATATYPE = {
+ "record_type" : "custom_record_type"
+ }
+
+CustomRecord._apply_definitions()
diff --git a/gfapy/line/edge/__init__.py b/gfapy/line/edge/__init__.py
new file mode 100644
index 0000000..777818f
--- /dev/null
+++ b/gfapy/line/edge/__init__.py
@@ -0,0 +1,4 @@
+from .edge import Edge
+from .link import Link
+from .containment import Containment
+from .gfa2 import GFA2
diff --git a/gfapy/line/edge/common/__init__.py b/gfapy/line/edge/common/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gfapy/line/edge/common/alignment_type.py b/gfapy/line/edge/common/alignment_type.py
new file mode 100644
index 0000000..fed6c0a
--- /dev/null
+++ b/gfapy/line/edge/common/alignment_type.py
@@ -0,0 +1,39 @@
+class AlignmentType:
+
+ def is_internal(self):
+ """Does the edge represent an internal alignment?
+
+ Note:
+ only GFA2 E lines may represent internal alignments
+
+ Returns:
+ bool
+ """
+ return self._alignment_type == "I"
+
+ def is_containment(self):
+ """Does the edge represent a containment?
+
+ Note:
+ A containment is either a C line (GFA1) or an E line, for which the
+ coordinates of at least one of the two sequences go from 0 to the end of
+ the sequence (GFA2).
+
+ Returns:
+ bool
+ """
+ return self._alignment_type == "C"
+
+ def is_dovetail(self):
+ """Does the edge represent a dovetail overlap?
+
+ Note:
+ A dovetail is either a L line (GFA1) or an E line (GFA2), for which the
+ coordinates of both sequences go from the beginning of the sequence
+ to some internal position, or from some internal position to the end of
+ the sequence.
+
+ Returns:
+ bool
+ """
+ return self._alignment_type == "L"
diff --git a/gfapy/line/edge/common/from_to.py b/gfapy/line/edge/common/from_to.py
new file mode 100644
index 0000000..9257338
--- /dev/null
+++ b/gfapy/line/edge/common/from_to.py
@@ -0,0 +1,138 @@
+import gfapy
+
+class FromTo:
+
+ def is_circular(self):
+ """Does the edge represent an alignment of a segment with itself?
+
+ Returns:
+ bool : if the sid1/sid2 (E) or from_segment/to_segment (L/C)
+ segments are equal to each other; thereby, the orientations
+ are not considered
+ """
+ return str(self.from_segment) == str(self.to_segment)
+
+ def is_circular_same_end(self):
+ """Does the edge represent an alignment of a segment end with itself?
+
+ Returns
+ bool : if the edge is a dovetail overlap, for which sid1/sid2 (E) or
+ from_segment/to_segment (L) are equal to each other, and the same
+ end of the sequence overlaps itself; (note that this kind of edge
+ is actually quite meaningless, but can be defined)
+ """
+ return self.from_end == self.to_end
+
+ @property
+ def from_end(self):
+ """The segment end corresponding to the from_segment field of L lines.
+
+ Note:
+ The result is meaningful only for dovetails overlaps (GFA1 L lines
+ or GFA2 E lines representing dovetail overlaps).
+
+ For a L line, the from_orient field is used to compute if the overlap
+ involves the left (5') or right (3') end of the from_segment and the
+ SegmentEnd end_type property is set accordingly to 'L' or 'R'. For a E
+ line, it is first computed which of the sid1/sid2 corresponds to the
+ from_segment field of a L line, then the same computation is done, as for L
+ lines.
+
+ Returns:
+ gfapy.segment_end.SegmentEnd
+ """
+ return gfapy.SegmentEnd(self.from_segment,
+ "R" if self.from_orient == "+" else "L")
+
+ @property
+ def to_end(self):
+ """The segment end corresponding to the to_segment field of L lines.
+
+ Note:
+ The result is meaningful only for dovetails overlaps (GFA1 L lines
+ or GFA2 E lines representing dovetail overlaps).
+
+ For a L line, the to_orient field is used to compute if the overlap
+ involves the left (5') or right (3') end of the to_segment and the
+ SegmentEnd end_type property is set accordingly to 'L' or 'R'. For a E
+ line, it is first computed which of the sid1/sid2 corresponds to the
+ to_segment field of a L line, then the same computation is done, as for L
+ lines.
+
+ Returns:
+ gfapy.segment_end.SegmentEnd
+ """
+ return gfapy.SegmentEnd(self.to_segment,
+ "L" if self.to_orient == "+" else "R")
+
+ def other_end(self, segment_end, tolerant=False):
+ """The other segment end involved in the alignment represented by the edge.
+
+ Note:
+ The result is meaningful only for dovetails overlaps (GFA1 L lines
+ or GFA2 E lines representing dovetail overlaps).
+
+ Parameters:
+ segment_end (`gfapy.segment_end.SegmentEnd`) : one of the two segment
+ ends involved in the alignment represented by the edge
+
+ Returns:
+ gfapy.segment_end.SegmentEnd
+
+ Raises:
+ gfapy.error.ArgumentError: If segment_end is not a valid segment end
+ gfapy.RuntimeError: if the segment_end is not involved in the alignment
+ represented by the line.
+ """
+ segment_end
+ if (self.from_end == segment_end):
+ return self.to_end
+ elif (self.to_end == segment_end):
+ return self.from_end
+ elif tolerant:
+ return None
+ else:
+ raise gfapy.ArgumentError(
+ "Segment end '{}' not found\n".format(repr(segment_end))+
+ "(from={};to={})".format(repr(self.from_end), repr(self.to_end)))
+
+ @property
+ def from_name(self):
+ """Segment name of the segment with the role of a L/C from_segment.
+
+ The method allows to compute the segment name in both cases in which
+ the segment is a string (i.e. the segment name itself) or a reference
+ to a segment line.
+
+ Returns:
+ str : the name of the segment which is specified in the field which
+ corresponds to the from_segment field in a GFA1 line (from_segment
+ if GFA1, sid1 or sid2 if GFA2)
+ """
+ if isinstance(self.from_segment, str):
+ return self.from_segment
+ else:
+ return self.from_segment.name
+
+ @property
+ def to_name(self):
+ """Segment name of the segment with the role of a L/C to_segment.
+
+ The method allows to compute the segment name in both cases in which
+ the segment is a string (i.e. the segment name itself) or a reference
+ to a segment line.
+
+ Returns:
+ str : the name of the segment which is specified in the field which
+ corresponds to the to_segment field in a GFA1 line (to_segment
+ if GFA1, sid1 or sid2 if GFA2)
+ """
+ if isinstance(self.to_segment, str):
+ return self.to_segment
+ else:
+ return self.to_segment.name
+
+ @property
+ def _segment_ends_s(self):
+ """Signature of the segment ends, for debugging."""
+ return "---".join([str(self.from_end), str(self.to_end)])
diff --git a/gfapy/line/edge/containment/__init__.py b/gfapy/line/edge/containment/__init__.py
new file mode 100644
index 0000000..6de555b
--- /dev/null
+++ b/gfapy/line/edge/containment/__init__.py
@@ -0,0 +1 @@
+from .containment import Containment
diff --git a/gfapy/line/edge/containment/canonical.py b/gfapy/line/edge/containment/canonical.py
new file mode 100644
index 0000000..9a1a483
--- /dev/null
+++ b/gfapy/line/edge/containment/canonical.py
@@ -0,0 +1,31 @@
+class Canonical:
+
+ def is_canonical(self):
+ """Checks if a containment line is in the canonical form.
+
+ As all containments (GFA1 C lines) can be specified using one of the
+ two sequences in the positive orientation, only one of the possible
+ variants is considered canonical by Gfapy; this allow to check if
+ two C lines are equivalent to each other. In particular, the C line
+ is considered canonical, if the from_orient is +.
+
+ Note:
+ An example: a containment of B (length:8) in A (length:100) at
+ position 9 of A, with a cigar 1M1I2M3D4M (i.e. rpos = 19).
+
+ ::
+ A+ B+ 1M1I2M3D4M 9 == A- B- 4M3D2M1I1M 80
+ A+ B- 1M1I2M3D4M 9 == A- B+ 4M3D2M1I1M 80
+ A- B+ 1M1I2M3D4M 9 == A+ B- 4M3D2M1I1M 80
+ A- B- 1M1I2M3D4M 9 == A+ B+ 4M3D2M1I1M 80
+
+ Pos in the complement is equal to the length of A minus the right pos
+ of B before reversing.
+ We require here that A != B as A == B makes no sense for containments.
+ Thus it is always possible to express the containment using a positive
+ from orientation.
+
+ Returns:
+ bool
+ """
+ return self.from_orient() == "+"
diff --git a/gfapy/line/edge/containment/containment.py b/gfapy/line/edge/containment/containment.py
new file mode 100644
index 0000000..de8f4e8
--- /dev/null
+++ b/gfapy/line/edge/containment/containment.py
@@ -0,0 +1,53 @@
+from ..common.from_to import FromTo
+from ..common.alignment_type import AlignmentType
+from ..gfa1.to_gfa2 import ToGFA2 as GFA1_ToGFA2
+from ..gfa1.alignment_type import AlignmentType as GFA1_AlignmentType
+from ..gfa1.oriented_segments import OrientedSegments
+from ..gfa1.references import References
+from ..gfa1.other import Other
+from ..containment.canonical import Canonical
+from ..containment.pos import Pos
+from ..containment.to_gfa2 import ToGFA2 as Containment_ToGFA2
+from ..edge import Edge
+
+class Containment(Containment_ToGFA2, Pos, Canonical, Other,
+ GFA1_AlignmentType, OrientedSegments, References,
+ GFA1_ToGFA2, AlignmentType, FromTo, Edge):
+ """A containment line (C) of a GFA1 file
+
+ Note:
+ from_segment and to_segment are used instead of from/to
+ as from is not a valid method name in Python. However, when not
+ used as method name (e.g. as argument of get()), from and to can
+ be used, as an alias has been defined.
+
+ Note:
+ The from segment is considered the container, the to segment the contained
+ sequence. This is not indicated in the specification, but examples where
+ done with this assumption in the GFA forum.
+ """
+ RECORD_TYPE = "C"
+ POSFIELDS = ["from_segment", "from_orient", "to_segment",
+ "to_orient", "pos", "overlap"]
+ FIELD_ALIAS = {"container" : "from_segment",
+ "contained" : "to_segment",
+ "from" : "from_segment",
+ "to" : "to_segment",
+ "container_orient" : "from_orient",
+ "contained_orient" : "to_orient"}
+ PREDEFINED_TAGS = ["MQ", "NM", "ID"]
+ NAME_FIELD = "ID"
+ DATATYPE = {
+ "from_segment" : "segment_name_gfa1",
+ "from_orient" : "orientation",
+ "to_segment" : "segment_name_gfa1",
+ "to_orient" : "orientation",
+ "pos" : "position_gfa1",
+ "overlap" : "alignment_gfa1",
+ "MQ" : "i",
+ "NM" : "i",
+ "ID" : "Z",
+ }
+ REFERENCE_FIELDS = ["from_segment", "to_segment"]
+
+Containment._apply_definitions()
diff --git a/gfapy/line/edge/containment/pos.py b/gfapy/line/edge/containment/pos.py
new file mode 100644
index 0000000..b91389b
--- /dev/null
+++ b/gfapy/line/edge/containment/pos.py
@@ -0,0 +1,19 @@
+import gfapy
+
+class Pos:
+
+ @property
+ def rpos(self):
+ """The rightmost coordinate of the contained sequence in the container.
+
+ Returns:
+ int : 0-based coordinate
+
+ Raises:
+ gfapy.ValueError : If the overlap is a placeholder, thus the computation
+ cannot be performed.
+ """
+ if isinstance(self.overlap, gfapy.Placeholder):
+ raise gfapy.ValueError("The overlap is a placeholder, therefore"+
+ "rpos cannot be computed")
+ return self.pos + self.overlap.length_on_reference()
diff --git a/gfapy/line/edge/containment/to_gfa2.py b/gfapy/line/edge/containment/to_gfa2.py
new file mode 100644
index 0000000..2d95e6a
--- /dev/null
+++ b/gfapy/line/edge/containment/to_gfa2.py
@@ -0,0 +1,31 @@
+class ToGFA2:
+
+ @property
+ def from_coords(self):
+ """
+ GFA2 positions of the alignment on the **from** segment.
+
+ Returns
+ -------
+ (Integer|Lastpos, Integer|Lastpos)
+ begin and end
+
+ Raises
+ ------
+ gfapy.RuntimeError
+ If the segment length cannot be determined, because the segment line is unknown.
+ gfapy.ValueError
+ If the segment length is not specified in the segment line.
+ """
+ self._check_overlap()
+ rpos = self.pos + self.overlap.length_on_reference()
+ if rpos == self._lastpos_of("from"):
+ rpos = gfapy.LastPos(rpos)
+ return [self.pos, rpos]
+
+ @property
+ def to_coords(self):
+ """
+ GFA2 positions of the alignment on the **to** segment
+ """
+ return [0, self._lastpos_of("to")]
diff --git a/gfapy/line/edge/edge.py b/gfapy/line/edge/edge.py
new file mode 100644
index 0000000..daf615a
--- /dev/null
+++ b/gfapy/line/edge/edge.py
@@ -0,0 +1,8 @@
+from ...line import Line
+
+class Edge(Line):
+ """
+ Superclass for edge lines, i.e. E lines of GFA2 files
+ and L and C lines of GFA1 files.
+ """
+ pass
diff --git a/gfapy/line/edge/gfa1/__init__.py b/gfapy/line/edge/gfa1/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gfapy/line/edge/gfa1/alignment_type.py b/gfapy/line/edge/gfa1/alignment_type.py
new file mode 100644
index 0000000..dd02225
--- /dev/null
+++ b/gfapy/line/edge/gfa1/alignment_type.py
@@ -0,0 +1,4 @@
+class AlignmentType:
+ @property
+ def _alignment_type(self):
+ return self.record_type
diff --git a/gfapy/line/edge/gfa1/oriented_segments.py b/gfapy/line/edge/gfa1/oriented_segments.py
new file mode 100644
index 0000000..6b0b5f4
--- /dev/null
+++ b/gfapy/line/edge/gfa1/oriented_segments.py
@@ -0,0 +1,23 @@
+import gfapy
+
+class OrientedSegments:
+
+ @property
+ def oriented_from(self):
+ """
+ Returns
+ -------
+ gfapy.OrientedLine
+ The oriented segment represented by the from_segment/from_orient fields.
+ """
+ return gfapy.OrientedLine(self.from_segment, self.from_orient)
+
+ @property
+ def oriented_to(self):
+ """
+ Returns
+ -------
+ gfapy.OrientedLine
+ The oriented segment represented by the to_segment/to_orient fields.
+ """
+ return gfapy.OrientedLine(self.to_segment, self.to_orient)
diff --git a/gfapy/line/edge/gfa1/other.py b/gfapy/line/edge/gfa1/other.py
new file mode 100644
index 0000000..0463933
--- /dev/null
+++ b/gfapy/line/edge/gfa1/other.py
@@ -0,0 +1,62 @@
+import gfapy
+
+class Other:
+
+ def other_oriented_segment(self, oriented_segment, tolerant = False):
+ """
+ Parameters
+ ----------
+ oriented_segment : gfapy.OrientedLine
+ One of the two oriented segments of the line.
+
+ Returns
+ -------
+ gfapy.OrientedLine
+ The other oriented segment.
+
+ Raises
+ ------
+ gfapy.NotFoundError
+ If segment_end is not a segment end of the line.
+ """
+ if (self.oriented_from == oriented_segment):
+ return self.oriented_to
+ elif (self.oriented_to == oriented_segment):
+ return self.oriented_from
+ elif tolerant:
+ return None
+ else:
+ raise gfapy.NotFoundError(
+ "Oriented segment '{}' not found\n".format(repr(oriented_segment))+
+ "Line: {}".format(self))
+
+ def other(self, segment, tolerant = False):
+ """
+ The other segment of a connection line.
+
+ Parameters
+ ----------
+ segment : gfapy.line.segment.GFA1 or str
+ Segment name or instance.
+
+ Raises
+ ------
+ gfapy.NotFoundError
+ If segment is not involved in the connection.
+
+ Returns
+ -------
+ str
+ The name or instance of the other segment of the connection.
+ If circular, then **segment**.
+ """
+ segment_name = str(segment)
+ if segment_name == str(self.from_segment):
+ return self.to
+ elif segment_name == str(self.to_segment):
+ return self.from_segment
+ elif tolerant:
+ return None
+ else:
+ raise gfapy.NotFoundError(
+ "Line {} does not involve segment {}".format(self, segment_name))
diff --git a/gfapy/line/edge/gfa1/references.py b/gfapy/line/edge/gfa1/references.py
new file mode 100644
index 0000000..542366a
--- /dev/null
+++ b/gfapy/line/edge/gfa1/references.py
@@ -0,0 +1,35 @@
+import gfapy
+
+class References:
+ def _initialize_references(self):
+ for d in ["from", "to"]:
+ s = self._gfa.segment(self.get(d))
+ if s is None:
+ if self._gfa._segments_first_order:
+ raise gfapy.NotFoundError()
+ s = gfapy.line.segment.GFA1({"name" : self.get(d),
+ "sequence" : "*"},
+ version = "gfa1",
+ virtual = True)
+ s.connect(self._gfa)
+ self._set_existing_field(d+"_segment", s, set_reference = True)
+ if self.record_type == "L":
+ et = self.from_end.end_type if d == "from" else self.to_end.end_type
+ key = "dovetails_{}".format(et)
+ else:
+ key = \
+ "edges_to_contained" if (d == "from") else "edges_to_containers"
+ s._add_reference(self, key)
+
+ def _import_field_references(self, previous):
+ for d in ["from_segment", "to_segment"]:
+ self._set_existing_field(d, self._gfa.segment(self.get(d)),
+ set_reference = True)
+
+ def _backreference_keys(self, ref, key_in_ref):
+ if ref.record_type == "P":
+ return ["paths"]
+ elif ref.record_type == "S":
+ return ["from_segment", "to_segment"]
+ else:
+ return []
diff --git a/gfapy/line/edge/gfa1/to_gfa2.py b/gfapy/line/edge/gfa1/to_gfa2.py
new file mode 100644
index 0000000..f0cb057
--- /dev/null
+++ b/gfapy/line/edge/gfa1/to_gfa2.py
@@ -0,0 +1,93 @@
+import gfapy
+
+class ToGFA2:
+ """
+ Access of / conversion from a GFA1 link/containment as / to a GFA2 edge.
+ """
+
+ @property
+ def eid(self):
+ """The content of the ID tag"""
+ i = self.get("ID")
+ if i is None:
+ return gfapy.Placeholder()
+ return i
+
+ name = eid
+
+ @property
+ def sid1(self):
+ """The combination of the from_segment and from_orientation fields"""
+ return self.oriented_from
+
+ @property
+ def sid2(self):
+ """The combination of the to_segment and to_orientation fields"""
+ return self.oriented_to
+
+ @property
+ def beg1(self):
+ """The start coordinate of the alignment on the from segment"""
+ return self.from_coords[0]
+
+ @property
+ def end1(self):
+ """The end coordinate of the alignment on the from segment"""
+ return self.from_coords[1]
+
+ @property
+ def beg2(self):
+ """The start coordinate of the alignment on the to segment"""
+ return self.to_coords[1]
+
+ @property
+ def end2(self):
+ """The end coordinate of the alignment on the to segment"""
+ return self.to_coords[1]
+
+ @property
+ def alignment(self):
+ """The content of the overlap field (CIGAR or Placeholder)"""
+ return self.overlap
+
+ def _to_gfa2_a(self):
+ a = ["E"]
+ if not self.get("ID") and self.is_connected():
+ self.set("ID", self._gfa.unused_name())
+ if self.get("ID"):
+ a.append(str(self.get("ID")))
+ else:
+ a.append("*")
+ a.append(str(self.sid1))
+ a.append(str(self.sid2))
+ a += [ str(x) for x in self.from_coords ]
+ a += [ str(x) for x in self.to_coords ]
+ try:
+ self.overlap.validate(version = "gfa2")
+ except:
+ raise gfapy.RuntimeError(
+ "Conversion of edge line from GFA1 to GFA2 failed\n"+
+ "Overlap is invalid or not compatible with GFA2\n"+
+ "Edge line: {}\n".format(str(self)))
+ a.append(self.field_to_s("overlap"))
+ for fn in self.tagnames:
+ if fn != "ID":
+ a.append(self.field_to_s(fn, tag = True))
+ return a
+
+ def _lastpos_of(self, field):
+ line = getattr(self,field)
+ if not isinstance(line, gfapy.Line):
+ raise gfapy.RuntimeError(
+ "Line {} is not embedded in a GFA object".format(self))
+ length = line.length
+ if length is None:
+ raise gfapy.ValueError(
+ "Length of segment {} unknown".format(self.to.name))
+ return gfapy.LastPos(length)
+
+ def _check_overlap(self):
+ if isinstance(self.overlap, gfapy.Placeholder):
+ raise gfapy.ValueError(
+ "Link: {}\n".format(self)+
+ "Missing overlap, cannot compute overlap coordinates")
diff --git a/gfapy/line/edge/gfa2/__init__.py b/gfapy/line/edge/gfa2/__init__.py
new file mode 100644
index 0000000..2ded87e
--- /dev/null
+++ b/gfapy/line/edge/gfa2/__init__.py
@@ -0,0 +1 @@
+from .gfa2 import GFA2
diff --git a/gfapy/line/edge/gfa2/alignment_type.py b/gfapy/line/edge/gfa2/alignment_type.py
new file mode 100644
index 0000000..d718cce
--- /dev/null
+++ b/gfapy/line/edge/gfa2/alignment_type.py
@@ -0,0 +1,62 @@
+import gfapy
+
+class AlignmentType:
+
+ @property
+ def _alignment_type(self):
+ """The alignment type (C, L or I).
+
+ Returns:
+ C, L or I: C if containment, L if link, I (internal) otherwise.
+ """
+ st1 = self._substring_type(self.beg1, self.end1)[0]
+ st2 = self._substring_type(self.beg2, self.end2)[0]
+ return self._alignment_type_for_substring_types(st1, st2)
+
+ def _alignment_type_for_substring_types(self, st1, st2):
+ if st1 == "whole" or st2 == "whole":
+ return "C"
+ elif self.sid1.orient == self.sid2.orient:
+ if (st1 == "pfx" and st2 == "sfx") or (st1 == "sfx" and st2 == "pfx"):
+ return "L"
+ else:
+ return "I"
+ else:
+ if (st1 == "pfx" and st2 == "pfx") or (st1 == "sfx" and st2 == "sfx"):
+ return "L"
+ else:
+ return "I"
+
+ def _substring_type(self, begpos, endpos):
+ """Type of substring (pfx, sfx, whole, internal) given start and end pos.
+
+ Analyzes the begin and end position and determine if the substring is
+ the whole string, or a (possibly empty) other substring, ie a prefix,
+ a suffix, or an internal alignment.
+ """
+ if gfapy.posvalue(begpos) > gfapy.posvalue(endpos):
+ raise gfapy.ValueError(
+ "Line: {}\n".format(str(self))+
+ "begin > end: {}$ > {}".format(gfapy.posvalue(begpos),
+ gfapy.posvalue(endpos)))
+ if gfapy.isfirstpos(begpos):
+ if gfapy.isfirstpos(endpos):
+ return ("pfx", True)
+ elif gfapy.islastpos(endpos):
+ return ("whole", False)
+ else:
+ return ("pfx", False)
+ elif gfapy.islastpos(begpos):
+ if not gfapy.islastpos(endpos):
+ raise gfapy.FormatError(
+ "Line: {}\n".format(str(self))+
+ "Wrong use of $ marker\n"+
+ "{} >= {}$".format(gfapy.posvalue(endpos),
+ gfapy.posvalue(begpos)))
+ return ("sfx", True)
+ else:
+ if gfapy.islastpos(endpos):
+ return ("sfx", False)
+ else:
+ return ("internal",
+ gfapy.posvalue(begpos) == gfapy.posvalue(endpos))
diff --git a/gfapy/line/edge/gfa2/gfa2.py b/gfapy/line/edge/gfa2/gfa2.py
new file mode 100644
index 0000000..2702ae2
--- /dev/null
+++ b/gfapy/line/edge/gfa2/gfa2.py
@@ -0,0 +1,36 @@
+from ..common.from_to import FromTo
+from ..common.alignment_type import AlignmentType
+from ..gfa2.to_gfa1 import ToGFA1
+from ..gfa2.alignment_type import AlignmentType as GFA2_AlignmentType
+from ..gfa2.references import References
+from ..gfa2.other import Other
+from ..gfa2.validation import Validation
+from ..edge import Edge
+
+class GFA2(Other, References, GFA2_AlignmentType, AlignmentType, FromTo,
+ ToGFA1, Validation, Edge):
+ """An edge line of a GFA2 file."""
+
+ RECORD_TYPE = "E"
+ POSFIELDS = ["eid", "sid1", "sid2", "beg1", "end1", "beg2", "end2",
+ "alignment"]
+ PREDEFINED_TAGS = ["TS"]
+ DATATYPE = {
+ "eid" : "optional_identifier_gfa2",
+ "sid1" : "oriented_identifier_gfa2",
+ "sid2" : "oriented_identifier_gfa2",
+ "beg1" : "position_gfa2",
+ "end1" : "position_gfa2",
+ "beg2" : "position_gfa2",
+ "end2" : "position_gfa2",
+ "alignment" : "alignment_gfa2",
+ "TS" : "i",
+ }
+ NAME_FIELD = "eid"
+ FIELD_ALIAS = { "name" : "eid" }
+ REFERENCE_FIELDS = ["sid1", "sid2"]
+ BACKREFERENCE_RELATED_FIELDS = ["beg1", "end1", "beg2", "end2"]
+ DEPENDENT_LINES = ["paths", "sets"]
+
+GFA2._apply_definitions()
+
diff --git a/gfapy/line/edge/gfa2/other.py b/gfapy/line/edge/gfa2/other.py
new file mode 100644
index 0000000..59fb1f9
--- /dev/null
+++ b/gfapy/line/edge/gfa2/other.py
@@ -0,0 +1,47 @@
+import gfapy
+
+class Other:
+
+ def other_oriented_segment(self, oriented_segment):
+ """The other oriented segment.
+
+ Parameters:
+ oriented_segment (gfapy.OrientedLine) : One of the two oriented segments of the line.
+
+ Returns:
+ gfapy.OrientedLine
+
+ Raises:
+ gfapy.error.NotFoundError: If segment_end is not a segment end of the line.
+ """
+ if (self.sid1 == oriented_segment):
+ return self.sid2
+ elif (self.sid2 == oriented_segment):
+ return self.sid1
+ else:
+ raise gfapy.NotFoundError(
+ "Oriented segment '{}' not found\n".format(oriented_segment) +
+ "Line: {}".format(self))
+
+ def other(self, segment):
+ """The other segment of an edge line.
+
+ Parameters:
+ segment (gfapy.line.segment.GFA2) : Segment name or instance.
+
+ Raises:
+ gfapy.error.NotFoundError: If segment is not a segment of the line.
+
+ Returns:
+ gfapy.line.segment.GFA2 or str : instance or name of the other segment
+ of the connection (the segment itself, if the connection is circular)
+ """
+ if isinstance(segment, gfapy.Line):
+ segment = segment.name
+ if segment == self.sid1.name:
+ return self.sid2.line
+ elif segment == self.sid2.name:
+ return self.sid1.line
+ else:
+ raise gfapy.NotFoundError(
+ "Line {} does not involve segment {}".format(self, segment))
diff --git a/gfapy/line/edge/gfa2/references.py b/gfapy/line/edge/gfa2/references.py
new file mode 100644
index 0000000..4ac9b8c
--- /dev/null
+++ b/gfapy/line/edge/gfa2/references.py
@@ -0,0 +1,66 @@
+import gfapy
+
+class References:
+
+ def _initialize_references(self):
+ st1 = self._substring_type(self.beg1, self.end1)[0]
+ st2 = self._substring_type(self.beg2, self.end2)[0]
+ for snum in [1, 2]:
+ sid = "sid{}".format(snum)
+ orient = self.get(sid).orient
+ s = self._gfa.segment(self.get(sid).line)
+ if s is None:
+ if self._gfa._segments_first_order:
+ raise gfapy.NotFoundError()
+ s = gfapy.line.segment.GFA2({"sid" : self.get(sid).line,
+ "slen" : 1,
+ "sequence" : "*"},
+ version = "gfa2",
+ virtual = True)
+ s.connect(self._gfa)
+ self._set_existing_field(sid, gfapy.OrientedLine(s, orient),
+ set_reference = True)
+ s._add_reference(self, self._refkey_for_s(snum, st1, st2))
+
+ def _refkey_for_s(self, snum, st1, st2):
+ if st1 == "whole":
+ if st2 == "whole":
+ return "edges_to_contained" if snum == 1 else "edges_to_containers"
+ else:
+ return "edges_to_containers" if snum == 1 else "edges_to_contained"
+ elif st2 == "whole":
+ return "edges_to_containers" if snum == 2 else "edges_to_contained"
+ elif self.sid1.orient == self.sid2.orient:
+ if (st1 == "pfx" and st2 == "sfx"):
+ return "dovetails_L" if snum == 1 else "dovetails_R"
+ elif (st1 == "sfx" and st2 == "pfx"):
+ return "dovetails_R" if snum == 1 else "dovetails_L"
+ else:
+ return "internals"
+ else:
+ if (st1 == "pfx" and st2 == "pfx"):
+ return "dovetails_L"
+ elif (st1 == "sfx" and st2 == "sfx"):
+ return "dovetails_R"
+ else:
+ return "internals"
+
+ def _import_field_references(self, previous):
+ for sid in ["sid1", "sid2"]:
+ self._set_existing_field(sid,
+ gfapy.OrientedLine(self._gfa.segment(self.get(sid).line),
+ self.get(sid).orient),
+ set_reference = True)
+
+ def _backreference_keys(self, ref, key_in_ref):
+ if ref.record_type == "U":
+ return ["sets"]
+ elif ref.record_type == "O":
+ return ["paths"]
+ elif ref.record_type == "S":
+ return ["sid1", "sid2"]
+ else:
+ raise gfapy.AssertionError(
+ "Bug found, please report\n"+
+ "ref: {}\n".format(ref)+
+ "key_in_ref: {}".format(key_in_ref))
diff --git a/gfapy/line/edge/gfa2/to_gfa1.py b/gfapy/line/edge/gfa2/to_gfa1.py
new file mode 100644
index 0000000..44ba41d
--- /dev/null
+++ b/gfapy/line/edge/gfa2/to_gfa1.py
@@ -0,0 +1,219 @@
+import gfapy
+
+class ToGFA1:
+
+ def _to_gfa1_a(self):
+ """List of the field content of the line in GFA1.
+ """
+ at = self._alignment_type
+ if at == "I":
+ raise gfapy.RuntimeError(
+ "Conversion of edge line from GFA2 to GFA1 failed\n"+
+ "Edge represents an internal overlap:\n"+
+ "Edge line: {}\n".format(str(self)))
+ a = [ at ]
+ if self._is_sid1_from():
+ ol1 = self.get("sid1")
+ ol2 = self.get("sid2")
+ else:
+ ol1 = self.get("sid2")
+ ol2 = self.get("sid1")
+ a.append(ol1.name)
+ a.append(ol1.orient)
+ a.append(ol2.name)
+ a.append(ol2.orient)
+ if self._alignment_type == "C":
+ a.append(str(self.pos))
+ try:
+ self.overlap.validate(version = "gfa1")
+ except:
+ raise gfapy.RuntimeError(
+ "Conversion of edge line from GFA2 to GFA1 failed\n"+
+ "Overlap is invalid or not compatible with GFA1\n"+
+ "Edge line: {}\n".format(str(self)))
+ a.append(str(self.overlap))
+ if not gfapy.is_placeholder(self.eid):
+ a.append(gfapy.Field._to_gfa_tag(self.eid, "ID", datatype = "Z"))
+ for fn in self.tagnames:
+ a.append(self.field_to_s(fn, tag = True))
+ return a
+
+ @property
+ def overlap(self):
+ """Value of the GFA1 **overlap** field, if the edge is a link or containment.
+
+ Returns:
+ gfapy.Alignment.Placeholder or gfapy.Alignment.CIGAR
+
+ Raises:
+ gfapy.error.ValueError: If the edge is internal
+ """
+ self._check_not_internal("overlap")
+ return self.alignment.complement() if self._is_sid1_from() else self.alignment
+
+ @property
+ def oriented_from(self):
+ return self.sid1 if self._is_sid1_from() else self.sid2
+
+ @property
+ def oriented_to(self):
+ return self.sid2 if self._is_sid1_from() else self.sid1
+
+ @property
+ def from_segment(self):
+ """Value of the GFA1 **from_segment** field, if the edge is a link or containment.
+
+ Returns:
+ str or gfapy.line.segment.GFA2
+
+ Raises:
+ gfapy.error.ValueError: If the edge is internal.
+ """
+ return self.oriented_from.line
+
+ @from_segment.setter
+ def from_segment(self, value):
+ """Set the field which will be returned by calling from_segment
+
+ Parameters:
+ value (str, gfapy.line.segment.GFA2)
+ """
+ self.oriented_from.line = value
+
+ @property
+ def from_orient(self):
+ """Value of the GFA1 **from_orient** field.
+
+ This method can only be applied if the edge is a link or containment.
+
+ Returns:
+ str: one of ["+", "-"]
+
+ Raises:
+ gfapy.error.ValueError: If the edge is internal.
+ """
+ return self.oriented_from.orient
+
+ @from_orient.setter
+ def from_orient(self, value):
+ """Set the orientation of the field which will be returned by calling from
+
+ Parameters:
+ value (str): one of ["+", "-"]
+ """
+ self.oriented_from.orient = value
+
+ @property
+ def to_segment(self):
+ """Value of the GFA1 ``to_segment`` field.
+
+ This method can only be applied if the edge is a link or containment.
+
+ Returns:
+ str or gfapy.line.segment.GFA2
+
+ Raises:
+ gfapy.error.ValueError: If the edge is internal.
+ """
+ return self.oriented_to.line
+
+ @to_segment.setter
+ def to_segment(self, value):
+ """Set the field which will be returned by calling ``to``.
+
+ Parameters:
+ value (str or gfapy.line.segment.GFA2)
+ """
+ self.oriented_to.line = value
+
+ @property
+ def to_orient(self):
+ """Value of the GFA1 **to_orient** field.
+
+ This method can only be applied if the edge is a link or containment.
+
+ Returns:
+ str : one of ["+", "-"]
+
+ Raises:
+ gfapy.error.ValueError: If the edge is internal.
+ """
+ return self.oriented_to.orient
+
+ @to_orient.setter
+ def to_orient(self, value):
+ """Set the orientation of the field which will be returned by calling ``to``.
+
+ Parameters:
+ value (str): one of ["+", "-"]
+ """
+ self.oriented_to.orient = value
+
+ @property
+ def pos(self):
+ """Value of the GFA1 **pos** field, if the edge is a containment.
+
+ Returns:
+ int or gfapy.Lastpos
+
+ Raises:
+ gfapy.error.ValueError: If the edge is not a containment.
+ """
+ if self._alignment_type == "I":
+ raise gfapy.ValueError("Line: {}\n".format(str(self)) +
+ "Internal alignment, pos is not defined")
+ elif self._alignment_type == "L":
+ raise gfapy.ValueError("Line: {}\n".format(str(self)) +
+ "Dovetail alignment, pos is not defined")
+ elif self._alignment_type == "C":
+ if gfapy.isfirstpos(self.beg1):
+ return self.beg1 if (gfapy.isfirstpos(self.beg2) and
+ gfapy.islastpos(self.end2)) else self.beg2
+ else:
+ return self.beg1
+
+ def _check_not_internal(self, fn):
+ if self.is_internal():
+ raise gfapy.ValueError(
+ "Line: {}\n".format(str(self))+
+ "Internal alignment, {} is not defined".format(fn))
+
+ @staticmethod
+ def _segment_role(begpos, endpos, orient):
+ if gfapy.isfirstpos(begpos):
+ if gfapy.islastpos(endpos):
+ return "contained"
+ elif orient == "+":
+ return "pfx"
+ else:
+ return "sfx"
+ else:
+ if gfapy.islastpos(endpos):
+ if orient == "+":
+ return "sfx"
+ else:
+ return "pfx"
+ else:
+ return "other"
+
+ def _check_GFA1_overlap_compatibility(self):
+ pass
+
+ def _is_sid1_from(self):
+ sr1 = self._segment_role(self.beg1, self.end1, self.sid1.orient)
+ sr2 = self._segment_role(self.beg2, self.end2, self.sid2.orient)
+ if sr2 == "contained":
+ return True
+ elif sr1 == "contained":
+ return False
+ elif sr1 == "sfx" and sr2 == "pfx":
+ return True
+ elif sr2 == "sfx" and sr1 == "pfx":
+ return False
+ else:
+ raise gfapy.ValueError(
+ "Line: {}\n".format(str(self))+
+ "Internal overlap, 'from' is undefined\n"+
+ "Roles: segment1 is {} ({},{}), segment2 is {} ({},{})".format(sr1,
+ self.beg1, self.end1, sr2, self.beg2, self.end2))
+
diff --git a/gfapy/line/edge/gfa2/validation.py b/gfapy/line/edge/gfa2/validation.py
new file mode 100644
index 0000000..2f8e20e
--- /dev/null
+++ b/gfapy/line/edge/gfa2/validation.py
@@ -0,0 +1,22 @@
+import gfapy
+
+class Validation:
+
+ def validate_positions(self):
+ "Checks that positions suffixed by $ are the last position of segments"
+ if self.is_connected():
+ for n in ["1","2"]:
+ seg = self.get("sid"+n).line
+ seq = seg.sequence
+ if not gfapy.is_placeholder(seq):
+ seqlen = len(seq)
+ for pfx in ["beg", "end"]:
+ fn = pfx+n
+ pos = self.get(fn)
+ if gfapy.islastpos(pos):
+ if pos != seqlen:
+ raise gfapy.InconsistencyError(
+ "Edge: {}\n".format(str(self))+
+ "Field {}: $ after ".format(fn)+
+ "non-last position\n".format(str(pos))+
+ "Segment: {}".format(str(seg)))
diff --git a/gfapy/line/edge/link/__init__.py b/gfapy/line/edge/link/__init__.py
new file mode 100644
index 0000000..d9f5a5c
--- /dev/null
+++ b/gfapy/line/edge/link/__init__.py
@@ -0,0 +1 @@
+from .link import Link
diff --git a/gfapy/line/edge/link/canonical.py b/gfapy/line/edge/link/canonical.py
new file mode 100644
index 0000000..8d01a1f
--- /dev/null
+++ b/gfapy/line/edge/link/canonical.py
@@ -0,0 +1,50 @@
+class Canonical:
+
+ def is_canonical(self):
+ """Checks if a link is expressed in the canonical form.
+
+ Returns:
+ bool
+
+ Links can be expressed in two different forms. If a link is
+ expressed in the other form, it is converted before storing.
+
+ Note:
+ A link is considered canonical in Gfapy if either the from segment name
+ is lexicographically smaller than the to segment name, or the two segment
+ names are equal, and at least one orientation is positive.
+
+ Note:
+ In the special case in which from == to (== s) we have the
+ following equivalences:
+
+ ::
+ s + s + == s - s -
+ s - s - == s + s + (same as previous case)
+ s + s - == s + s - (equivalent to itself)
+ s - s + == s - s + (equivalent to itself)
+
+ Considering the values on the left, the first one can be taken as
+ canonical, the second not, because it can be transformed in the first
+ one; the other two values are canonical, as they are only equivalent
+ to themselves.
+ """
+ if self.from_name < self.to_name:
+ return True
+ elif self.from_name > self.to_name:
+ return False
+ else:
+ return "+" in [self.from_orient, self.to_orient]
+
+ def canonicize(self):
+ """The link itself if canonical, the complement link otherwise.
+
+ .. note::
+ The method shall be only used before the link is connected to
+ a Gfa instance.
+
+ Returns:
+ gfapy.line.edge.Link
+ """
+ if not self.is_canonical():
+ return self.complement()
diff --git a/gfapy/line/edge/link/complement.py b/gfapy/line/edge/link/complement.py
new file mode 100644
index 0000000..8fe4f46
--- /dev/null
+++ b/gfapy/line/edge/link/complement.py
@@ -0,0 +1,54 @@
+import gfapy
+
+class Complement:
+
+ def complement(self):
+ """Creates the equivalent link with from and to inverted.
+
+ The CIGAR operations (order and type) are inverted as well.
+ Tags are left unchanged.
+
+ Note:
+ The path references are not copied to the complement link.
+
+ Note:
+ This method shall be overridden if custom tags are defined, which have a
+ complementation operation which determines their value in the
+ equivalent complement link.
+
+ Returns:
+ gfapy.line.edge.Link: The inverted link.
+ """
+ l = self.clone()
+ l.from_segment = self.to
+ l.from_orient = gfapy.invert(self.to_orient)
+ l.to_segment = self.from_segment
+ l.to_orient = gfapy.invert(self.from_orient)
+ l.overlap = self.overlap.complement()
+ return l
+
+ def make_complement(self):
+ """Complements the link inplace.
+
+ The tags are left unchanged.
+
+ Note:
+ The path references are not complemented by this method; therefore
+ the method shall be used before the link is embedded in a graph.
+
+ Note:
+ This method shall be overridden if custom tags are defined, which have a
+ complementation operation which determines their value in the
+ complement link.
+
+ Returns:
+ gfapy.line.edge.Link: self
+ """
+ tmp = self.from_segment
+ self.from_segment = self.to_segment
+ self.to_segment = tmp
+ tmp = self.from_orient
+ self.from_orient = gfapy.invert(self.to_orient)
+ self.to_orient = gfapy.invert(tmp)
+ self.overlap = self.overlap.complement()
+ return self
diff --git a/gfapy/line/edge/link/equivalence.py b/gfapy/line/edge/link/equivalence.py
new file mode 100644
index 0000000..3003804
--- /dev/null
+++ b/gfapy/line/edge/link/equivalence.py
@@ -0,0 +1,229 @@
+import gfapy
+
+class Equivalence:
+
+ def __hash__(self):
+ """
+ Computes an hash for including the link in a dict,
+ so that the hash of a link and its complement is the same.
+ Thereby, tags are not considered.
+ """
+ hash(str(self.from_end)) + \
+ hash(str(to_end)) + \
+ hash(str(overlap)) + \
+ hash(str(overlap.complement()))
+
+ def is_eql(self, other):
+ """
+ Compares two links and determine their equivalence.
+ Thereby, tags are not considered.
+
+ .. note::
+ Inverting the strand of both links and reversing
+ the CIGAR operations (order/type), one obtains an
+ equivalent complement link.
+
+ Parameters
+ ----------
+ other : gfapy.line.edge.Link
+ A link.
+
+ Returns
+ -------
+ bool
+ Are self and other equivalent?
+
+ See Also
+ --------
+ ==
+ is_same
+ is_complement
+ """
+ return (self.is_same(other) or self.is_complement(other))
+
+ def are_tags_eql(self, other):
+ """
+ Compares the tags of two links.
+
+ .. note::
+ This method shall be overridden if custom tags
+ are defined, which have a complementation operation which determines
+ their value in the equivalent but complement link.
+
+ Parameters
+ ----------
+ other : gfapy.line.edge.Link
+ A link.
+
+ Returns
+ -------
+ bool
+ Are self and other equivalent?
+
+ See Also
+ --------
+ __eq__
+ """
+ return (sorted(self.tagnames) == sorted(other.tagnames)) and \
+ all(self.get(fn) == other.get(fn) for fn in self.tagnames)
+
+# def __eq__(self, other):
+# """
+# Compares two links and determine their equivalence.
+# Tags must have the same content.
+#
+# .. note:: Inverting the strand of both links and reversing
+# the CIGAR operations (order/type), one obtains an equivalent
+# link.
+#
+# Parameters
+# ----------
+# other : gfapy.line.edge.Link
+# A link.
+#
+# Returns
+# -------
+# bool
+# Are self and other equivalent?
+#
+# See Also
+# --------
+# is_eql
+# are_tags_eql
+# """
+# return self.is_eql(other) and self.are_tags_eql(other)
+
+ def is_same(self, other):
+ """
+ Compares two links and determine their equivalence.
+ Thereby, tags are not considered.
+
+ Parameters
+ ----------
+ other : gfapy.line.edge.Link
+ A link.
+
+ Returns
+ -------
+ bool
+ Are self and other equivalent?
+
+ See Also
+ --------
+ is_eql
+ is_complement
+ __eq__
+ """
+ return (self.from_end == other.from_end and
+ self.to_end == other.to_end and
+ self.overlap == other.overlap)
+
+ def is_complement(self, other):
+ """
+ Compares the link to the complement of another link
+ and determine their equivalence.
+ Thereby, tags are not considered.
+
+ Parameters
+ ----------
+ other : gfapy.line.edge.Link
+ The other link.
+
+ Returns
+ -------
+ bool
+ Are self and the complement of other equivalent?
+
+ See Also
+ --------
+ is_eql
+ is_same
+ __eq__
+ """
+ return (self.from_end == other.to_end and
+ self.to_end == other.from_end and
+ self.overlap == other.overlap.complement())
+
+ def is_compatible(self, other_oriented_from, other_oriented_to,
+ other_overlap = None, allow_complement = True):
+ """
+ Compares a link and optionally the complement link,
+ with two oriented_segments and optionally an overlap.
+
+ Parameters
+ ----------
+ other_oriented_from : gfapy.OrientedLine
+ other_oriented_to : gfapy.OrientedLine
+ allow_complement : bool
+ Shall the complement link also be considered?
+ other_overlap : gfapy.Alignment.CIGAR
+ Compared only if not empty.
+
+ Returns
+ -------
+ bool
+ Does the link or, if **allow_complement**, the complement link go from
+ the first, oriented segment to the second with an overlap equal to the
+ provided one (if not empty)?
+ """
+ other_overlap = gfapy.Alignment(other_overlap, version = "gfa1",
+ valid = True)
+ if self.is_compatible_direct(other_oriented_from, other_oriented_to,
+ other_overlap):
+ return True
+ elif allow_complement:
+ return self.is_compatible_complement(other_oriented_from,
+ other_oriented_to,
+ other_overlap)
+ else:
+ return False
+
+ def is_compatible_direct(self, other_oriented_from, other_oriented_to,
+ other_overlap = None):
+ """
+ Compares a link with two oriented segments and optionally an overlap.
+
+ Parameters
+ ----------
+ other_oriented_from : gfapy.OrientedLine
+ other_oriented_to : gfapy.OrientedLine
+ other_overlap : gfapy.Alignment.CIGAR
+ Compared only if not empty.
+
+ Returns
+ -------
+ bool
+ Does the link go from the first oriented segment to the second
+ with an overlap equal to the provided one (if not empty)?
+ """
+ return ((self.oriented_from == other_oriented_from and
+ self.oriented_to == other_oriented_to) and
+ (not self.overlap or not other_overlap or
+ (self.overlap == other_overlap)))
+
+ def is_compatible_complement(self, other_oriented_from, other_oriented_to,
+ other_overlap = None):
+ """
+ Compares the complement link with two oriented segments and optionally an
+ overlap.
+
+ Parameters
+ ----------
+ other_oriented_from : gfapy.OrientedLine
+ other_oriented_to : gfapy.OrientedLine
+ other_overlap : gfapy.Alignment.CIGAR
+ Compared only if not empty.
+
+ Returns
+ -------
+ bool
+ Does the complement link go from the first oriented segment
+ to the second with an overlap equal to the provided one (if not empty)?
+ """
+ return ((self.oriented_to == other_oriented_from.inverted() and
+ (self.oriented_from == other_oriented_to.inverted()) and
+ (not self.overlap or not other_overlap or
+ (self.overlap == other_overlap.complement()))))
+
+ def _complement_ends(self, other):
+ return (self.from_end == other.to_end and self.to_end == other.from_end)
diff --git a/gfapy/line/edge/link/link.py b/gfapy/line/edge/link/link.py
new file mode 100644
index 0000000..fb64b21
--- /dev/null
+++ b/gfapy/line/edge/link/link.py
@@ -0,0 +1,50 @@
+from ..common.alignment_type import AlignmentType
+from ..common.from_to import FromTo
+from ..gfa1.to_gfa2 import ToGFA2 as GFA1_ToGFA2
+from ..gfa1.references import References as GFA1_References
+from ..gfa1.oriented_segments import OrientedSegments
+from ..gfa1.alignment_type import AlignmentType as GFA1_AlignmentType
+from ..gfa1.other import Other
+from .canonical import Canonical
+from .complement import Complement
+from .equivalence import Equivalence
+from .references import References as Link_References
+from .to_gfa2 import ToGFA2 as Link_ToGFA2
+from ..edge import Edge
+
+class Link(Link_ToGFA2, GFA1_ToGFA2, Link_References, Equivalence, Complement, \
+ Canonical, Other, GFA1_AlignmentType, OrientedSegments, GFA1_References, \
+ AlignmentType, FromTo, Edge):
+ """A link line (L) of a GFA1 file.
+
+ Note:
+ from_segment and to_segment are used instead of from/to
+ as from is not a valid method name in Python. However, when not
+ used as method name (e.g. as argument of get()), from and to can
+ be used, as an alias has been defined.
+ """
+
+ RECORD_TYPE = "L"
+ POSFIELDS = ["from_segment", "from_orient", "to_segment", "to_orient",
+ "overlap"]
+ PREDEFINED_TAGS = ["MQ", "NM", "RC", "FC", "KC", "ID"]
+ FIELD_ALIAS = {"from": "from_segment", "to": "to_segment"}
+ DATATYPE = {
+ "from_segment" : "segment_name_gfa1",
+ "from_orient" : "orientation",
+ "to_segment" : "segment_name_gfa1",
+ "to_orient" : "orientation",
+ "overlap" : "alignment_gfa1",
+ "MQ" : "i",
+ "NM" : "i",
+ "RC" : "i",
+ "FC" : "i",
+ "KC" : "i",
+ "ID" : "Z",
+ }
+ NAME_FIELD = "ID"
+ REFERENCE_FIELDS = ["from_segment", "to_segment"]
+ BACKREFERENCE_RELATED_FIELDS = ["to_orient", "from_orient", "overlap"]
+ DEPENDENT_LINES = ["paths"]
+
+Link._apply_definitions()
diff --git a/gfapy/line/edge/link/references.py b/gfapy/line/edge/link/references.py
new file mode 100644
index 0000000..91cb938
--- /dev/null
+++ b/gfapy/line/edge/link/references.py
@@ -0,0 +1,7 @@
+class References:
+
+ def _process_not_unique(self, previous):
+ if self.is_complement(previous):
+ pass
+ else:
+ super()._process_not_unique(previous)
diff --git a/gfapy/line/edge/link/to_gfa2.py b/gfapy/line/edge/link/to_gfa2.py
new file mode 100644
index 0000000..75abd08
--- /dev/null
+++ b/gfapy/line/edge/link/to_gfa2.py
@@ -0,0 +1,36 @@
+class ToGFA2:
+
+ @property
+ def from_coords(self):
+ """GFA2 positions of the alignment on the from segment.
+
+ Returns
+ -------
+ (Integer|Lastpos,Integer|Lastpos)
+ begin and end
+
+ Raises
+ ------
+ gfapy.ValueError
+ If the overlap is not specified.
+ gfapy.RuntimeError
+ If the segment length cannot be determined, because the segment line is unknown.
+ gfapy.ValueError
+ If the segment length is not specified in the segment line.
+ """
+ self._check_overlap()
+ if self.from_orient == "+":
+ from_l = self._lastpos_of("from")
+ return [from_l - self.overlap.length_on_reference(), from_l]
+ else:
+ return [0, self.overlap.length_on_reference()]
+
+ @property
+ def to_coords(self):
+ """GFA2 positions of the alignment on the **to** segment."""
+ self._check_overlap()
+ if self.to_orient == "+":
+ return [0, self.overlap.length_on_query()]
+ else:
+ to_l = self._lastpos_of("to")
+ return [to_l - self.overlap.length_on_query(), to_l]
diff --git a/gfapy/line/fragment/__init__.py b/gfapy/line/fragment/__init__.py
new file mode 100644
index 0000000..4ffffff
--- /dev/null
+++ b/gfapy/line/fragment/__init__.py
@@ -0,0 +1 @@
+from .fragment import Fragment
diff --git a/gfapy/line/fragment/fragment.py b/gfapy/line/fragment/fragment.py
new file mode 100644
index 0000000..c8a9cb6
--- /dev/null
+++ b/gfapy/line/fragment/fragment.py
@@ -0,0 +1,26 @@
+from .references import References
+from .validation import Validation
+from ..line import Line
+
+class Fragment(References, Validation, Line):
+ """
+ A fragment line of a GFA2 file
+ """
+ RECORD_TYPE = "F"
+ POSFIELDS = ["sid", "external", "s_beg", "s_end", "f_beg", "f_end",
+ "alignment"]
+ PREDEFINED_TAGS = ["VN", "TS"]
+ STORAGE_KEY = "external"
+ DATATYPE = {
+ "sid" : "identifier_gfa2",
+ "external" : "oriented_identifier_gfa2",
+ "s_beg" : "position_gfa2",
+ "s_end" : "position_gfa2",
+ "f_beg" : "position_gfa2",
+ "f_end" : "position_gfa2",
+ "alignment" : "alignment_gfa2",
+ "TS" : "i",
+ }
+ REFERENCE_FIELDS = ["sid"]
+
+Fragment._apply_definitions()
diff --git a/gfapy/line/fragment/references.py b/gfapy/line/fragment/references.py
new file mode 100644
index 0000000..0fa68c8
--- /dev/null
+++ b/gfapy/line/fragment/references.py
@@ -0,0 +1,17 @@
+import gfapy
+
+class References():
+
+ def _initialize_references(self):
+ s = self._gfa.segment(self.get("sid"))
+ if s is None:
+ if self._gfa._segments_first_order:
+ raise gfapy.NotFoundError()
+ s = gfapy.line.segment.GFA2({"sid": self.get("sid"),
+ "slen": 1,
+ "sequence": "*"},
+ version = "gfa2",
+ virtual = True)
+ s.connect(self._gfa)
+ self._set_existing_field("sid", s, set_reference = True)
+ s._add_reference(self, "fragments")
diff --git a/gfapy/line/fragment/validation.py b/gfapy/line/fragment/validation.py
new file mode 100644
index 0000000..d5aa17c
--- /dev/null
+++ b/gfapy/line/fragment/validation.py
@@ -0,0 +1,21 @@
+import gfapy
+
+class Validation:
+
+ def validate_positions(self):
+ "Checks that positions suffixed by $ are the last position of segments"
+ if self.is_connected():
+ seg = self.get("sid")
+ seq = seg.sequence
+ if not gfapy.is_placeholder(seq):
+ seqlen = len(seq)
+ for sfx in ["beg", "end"]:
+ fn = "s_"+sfx
+ pos = self.get(fn)
+ if gfapy.islastpos(pos):
+ if pos != seqlen:
+ raise gfapy.InconsistencyError(
+ "Fragment: {}\n".format(str(self))+
+ "Field {}: $ after ".format(str(fn))+
+ "non-last position ({})\n".format(str(pos))+
+ "Segment: {}".format(str(seg)))
diff --git a/gfapy/line/gap/__init__.py b/gfapy/line/gap/__init__.py
new file mode 100644
index 0000000..ec13a42
--- /dev/null
+++ b/gfapy/line/gap/__init__.py
@@ -0,0 +1 @@
+from .gap import Gap
diff --git a/gfapy/line/gap/gap.py b/gfapy/line/gap/gap.py
new file mode 100644
index 0000000..42bb779
--- /dev/null
+++ b/gfapy/line/gap/gap.py
@@ -0,0 +1,23 @@
+import gfapy
+from .references import References
+from ..line import Line
+
+class Gap(References, Line):
+ """
+ A gap line of a GFA2 file
+ """
+ RECORD_TYPE = "G"
+ POSFIELDS = ["gid", "sid1", "sid2", "disp", "var"]
+ FIELD_ALIAS = { "name" : "gid" }
+ NAME_FIELD = "gid"
+ STORAGE_KEY = "name"
+ DATATYPE = {
+ "gid" : "optional_identifier_gfa2",
+ "sid1" : "oriented_identifier_gfa2",
+ "sid2" : "oriented_identifier_gfa2",
+ "disp" : "i",
+ "var" : "optional_integer"
+ }
+ REFERENCE_FIELDS = ["sid1", "sid2"]
+
+Gap._apply_definitions()
diff --git a/gfapy/line/gap/references.py b/gfapy/line/gap/references.py
new file mode 100644
index 0000000..d94f889
--- /dev/null
+++ b/gfapy/line/gap/references.py
@@ -0,0 +1,43 @@
+import gfapy
+
+class References:
+ def _initialize_references(self):
+ for snum in [1,2]:
+ sid = "sid{}".format(snum)
+ orient = self.get(sid).orient
+ linesymbol = self.get(sid).line
+ s = self._gfa.segment(linesymbol)
+ if s is None:
+ if self._gfa._segments_first_order:
+ raise gfapy.NotFoundError()
+ s = gfapy.line.segment.GFA2({"sid" : linesymbol,
+ "slen" : 1,
+ "sequence" : "*"},
+ version = "gfa2",
+ virtual = True)
+ s.connect(self._gfa)
+ self._set_existing_field(sid, gfapy.OrientedLine(s,orient),
+ set_reference = True)
+ s._add_reference(self, self._refkey_for_s(snum))
+
+ def _refkey_for_s(self, snum):
+ a = [self.sid1.orient, self.sid2.orient]
+ if a == ["+", "+"]:
+ return "gaps_R" if (snum == 1) else "gaps_L"
+ elif a == ["+", "-"]:
+ return "gaps_R"
+ elif a == ["-", "+"]:
+ return "gaps_L"
+ elif a == ["-", "-"]:
+ return "gaps_L" if (snum == 1) else "gaps_R"
+ else:
+ raise gfapy.AssertionError("Bug found, please report\n"+
+ "snum: {}".format(snum))
+
+ def _import_field_references(self, previous):
+ for sid in ["sid1", "sid2"]:
+ orient = self.get(sid).orient
+ linesymbol = self.get(sid).line
+ self._set_existing_field(sid,
+ gfapy.OrientedLine(self._gfa.segment(linesymbol),orient),
+ set_reference = True)
diff --git a/gfapy/line/group/__init__.py b/gfapy/line/group/__init__.py
new file mode 100644
index 0000000..1c1d7bb
--- /dev/null
+++ b/gfapy/line/group/__init__.py
@@ -0,0 +1,4 @@
+from .group import Group
+from .path import Path
+from .ordered import Ordered
+from .unordered import Unordered
diff --git a/gfapy/line/group/gfa2/__init__.py b/gfapy/line/group/gfa2/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/gfapy/line/group/gfa2/references.py b/gfapy/line/group/gfa2/references.py
new file mode 100644
index 0000000..5219c84
--- /dev/null
+++ b/gfapy/line/group/gfa2/references.py
@@ -0,0 +1,51 @@
+import gfapy
+
+class References:
+
+ def _prepare_and_check_ref(self, ref):
+ if isinstance(ref, str):
+ ref = self._line_for_ref_symbol(ref)
+ self._check_ref_class(ref)
+ self._check_ref_connection(ref)
+ self._check_ref_not_self(ref)
+ return ref
+
+ def _check_ref_class(self, item):
+ if item.__class__ not in [
+ gfapy.line.edge.GFA2,
+ gfapy.line.segment.GFA2,
+ gfapy.line.gap.Gap,
+ gfapy.line.group.Ordered,
+ self.__class__]:
+ raise gfapy.ArgumentError(
+ "Line: {}\n".format(self)+
+ "Cannot add items of class {}\n".format(item.__class__.__name__)+
+ "Only GFA2 edges, segments, gaps, groups[*] "+
+ "can be added\n(* = unordered groups to unordered groups only).")
+
+ def _check_ref_connection(self, item):
+ if item.line.gfa != self._gfa:
+ raise gfapy.ArgumentError(
+ "Line: {}\n".format(self)+
+ "Item: {}".format(repr(item))+
+ "The item added to the group must be connected\n"+
+ "to the same GFA object as the group")
+
+ def _check_ref_not_self(self, item):
+ if (item.line == self):
+ raise gfapy.RuntimeError(
+ "Line: {}\n".format(self)+
+ "Item is the line itself\n"+
+ "A group is not allowed to refer to itself")
+
+ def _line_for_ref_symbol(self, ref):
+ line = self._gfa.line(ref)
+ if line is None:
+ if self._gfa._segments_first_order:
+ raise gfapy.NotFoundError("Group: {}\n".format(self)+
+ "requires a non-existing ref with ID {}".format(ref))
+ line = gfapy.line.unknown.Unknown({"name" : ref}, virtual = True,
+ version = "gfa2")
+ self._gfa.add_line(line)
+ line._add_reference(self, "paths" if (self.record_type == "O") else "sets")
+ return line
diff --git a/gfapy/line/group/gfa2/same_id.py b/gfapy/line/group/gfa2/same_id.py
new file mode 100644
index 0000000..419fbfc
--- /dev/null
+++ b/gfapy/line/group/gfa2/same_id.py
@@ -0,0 +1,26 @@
+class SameID:
+
+ def _process_not_unique(self, previous):
+ self._gfa = previous.gfa
+ self._initialize_references()
+ cur_items = self.get("items")
+ self._substitute_virtual_line(previous)
+ self._set_existing_field("items", self.get("items") + cur_items,
+ set_reference = True)
+ self._import_tags_of_previous_group_definition(previous)
+ return None
+
+ def _import_tags_of_previous_group_definition(self, previous):
+ for tag in previous.tagnames:
+ prv = previous.get(tag)
+ cur = self.get(tag)
+ if cur:
+ if cur != prv:
+ raise gfapy.NotUniqueError(
+ "Same tag defined differently in "+
+ "multiple group lines with same ID\n"+
+ "Previous tag definition: {}\n".format(prv)+
+ "New tag definition: {}\n".format(cur)+
+ "Group ID: {}".format(self.name))
+ else:
+ self.set(tag, prv)
diff --git a/gfapy/line/group/group.py b/gfapy/line/group/group.py
new file mode 100644
index 0000000..4e52177
--- /dev/null
+++ b/gfapy/line/group/group.py
@@ -0,0 +1,6 @@
+from ..line import Line
+class Group(Line):
+ """
+ A group is a U O or P line
+ """
+ pass
diff --git a/gfapy/line/group/ordered/__init__.py b/gfapy/line/group/ordered/__init__.py
new file mode 100644
index 0000000..a1e0137
--- /dev/null
+++ b/gfapy/line/group/ordered/__init__.py
@@ -0,0 +1 @@
+from .ordered import Ordered
diff --git a/gfapy/line/group/ordered/captured_path.py b/gfapy/line/group/ordered/captured_path.py
new file mode 100644
index 0000000..7d39d11
--- /dev/null
+++ b/gfapy/line/group/ordered/captured_path.py
@@ -0,0 +1,221 @@
+import gfapy
+
+class CapturedPath:
+
+ @property
+ def captured_segments(self):
+ return [ x for x in self.captured_path if isinstance(x.line, gfapy.line.segment.GFA2) ]
+
+ @property
+ def captured_edges(self):
+ return [ x for x in self.captured_path if isinstance(x.line, gfapy.line.edge.GFA2) ]
+
+ @property
+ def captured_path(self):
+ if not self.is_connected():
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed\n"+
+ "Line is not connected to a GFA instance\n"+
+ "Line: {}".format(self))
+ return self._compute_captured_path()[0]
+
+ def _compute_captured_path(self):
+ path = []
+ prev_edge = False
+ for item in self.items:
+ path, prev_edge = self._push_item_on_se_path(path, prev_edge, item)
+ return path, prev_edge
+
+ def _push_item_on_se_path(self, path, prev_edge, item):
+ if isinstance(item.line, str):
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed; a reference has not been resolved\n"+
+ "Line: {}\n".format(self)+
+ "Unresolved reference: {} (String found)".format(item.line))
+ elif isinstance(item.line, gfapy.line.segment.GFA2):
+ if not item.line.is_connected():
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed; item is not connected\n"+
+ "Line: {}\n".format(self)+
+ "Item: {}".format(item.line))
+ self._push_segment_on_se_path(path, prev_edge, item)
+ prev_edge = False
+ elif isinstance(item.line, gfapy.line.edge.GFA2):
+ if not item.line.is_connected():
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed; item is not connected\n"+
+ "Line: {}\n".format(self)+
+ "Item: {}".format(item.line))
+ if not path:
+ self._push_first_edge_on_se_path(path, self.items)
+ else:
+ self._push_nonfirst_edge_on_se_path(path, item)
+ prev_edge = True
+ elif isinstance(item.line, gfapy.line.group.Ordered):
+ if not item.line.is_connected():
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed; item is not connected\n"+
+ "Line: {}\n".format(self)+
+ "Item: {}".format(item.line))
+ subpath, prev_edge_subpath = item.line._compute_captured_path()
+ if not subpath:
+ raise gfapy.AssertionError()
+ if item.orient == "+":
+ for subpath_item in subpath:
+ path, prev_edge = self._push_item_on_se_path(path, prev_edge,
+ subpath_item)
+ else:
+ for subpath_item in reversed(subpath):
+ path, prev_edge = self._push_item_on_se_path(path, prev_edge,
+ subpath_item.inverted())
+ prev_edge = prev_edge_subpath
+ elif isinstance(item.line, gfapy.line.unknown.Unknown):
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed; a reference has not been resolved\n"+
+ "Line: {}\n".format(self)+
+ "Unresolved reference: {} (Virtual unknown line)".format(item.name))
+ else:
+ raise gfapy.TypeError(
+ "Line: {}\t".format(self)+
+ "Cannot compute captured path:\t"+
+ "Error: items of type {} are not supported\t".format(item.line.__class__.__name__)+
+ "Unsupported item: {}".format(item))
+ return path, prev_edge
+
+ def _push_first_edge_on_se_path(self, path, items):
+ oriented_edge = items[0]
+ oss = [oriented_edge.line.sid1, oriented_edge.line.sid2]
+ if oriented_edge.orient == "-":
+ for i in range(len(oss)):
+ oss[i].invert()
+ if len(items) > 1:
+ nextitem = items[1]
+ if isinstance(nextitem.line, gfapy.line.segment.GFA2):
+ if nextitem == oss[0]:
+ oss.reverse()
+ # if oss does not include nextitem an error will be raised
+ # in the next iteration, so does not need to be handled here
+ elif isinstance(nextitem.line, gfapy.line.edge.GFA2):
+ oss_of_next = [nextitem.line.sid1, nextitem.line.sid2]
+ if oriented_edge.orient == "-":
+ for i in range(len(oss_of_next)):
+ oss_of_next[i].invert()
+ if oss[0] in oss_of_next:
+ oss.reverse()
+ # if oss_of_next have no element in common with oss an error will be
+ # raised in the next iteration, so does not need to be handled here
+ elif isinstance(nextitem.line, gfapy.line.group.Ordered):
+ subpath = item.line.captured_path
+ if not subpath: return# does not need to be further handled here
+ if item.orient == "+":
+ firstsubpathsegment = supath[0]
+ else:
+ firstsubpathsegment = supath[-1].inverted()
+ if firstsubpathsegment == oss[0]:
+ oss.reverse()
+ # if oss does not include in firstsubpathsegment
+ # error will be raised in next iteration, ie not handled here
+ else:
+ pass
+ # don't need to handle here other cases, as they will be handled
+ # in the next iteration of push_item_on_se_path
+ path.append(oss[0])
+ path.append(oriented_edge)
+ path.append(oss[1])
+
+ def _push_nonfirst_edge_on_se_path(self, path, oriented_edge):
+ prev_os = path[-1]
+ path.append(oriented_edge)
+ possible_prev = [oriented_edge.line.sid1, oriented_edge.line.sid2]
+ if oriented_edge.orient == "-":
+ for i, v in enumerate(possible_prev):
+ possible_prev[i].invert()
+ if prev_os == possible_prev[0]:
+ path.append(possible_prev[1])
+ elif prev_os == possible_prev[1]:
+ path.append(possible_prev[0])
+ else:
+ raise gfapy.NotFoundError(
+ "Path is not valid, elements are not contiguous\n"+
+ "Line: {}\n".format(self)+
+ "Previous elements:\n"+
+ "".join([" {} ({})\n".format(e, e.line) for e in path])+
+ "Current element:\n"+
+ " {} ({})".format(oriented_edge, oriented_edge.line))
+
+ def _push_segment_on_se_path(self, path, prev_edge, oriented_segment):
+ if path:
+ if isinstance(path[-1].line, gfapy.line.segment.GFA2):
+ if prev_edge:
+ self._check_s_is_as_expected(path, oriented_segment)
+ return # do not add segment, as it is already there
+ else:
+ path.append(self._find_edge_from_path_to_segment(path, oriented_segment))
+ elif isinstance(path[-1].line, gfapy.line.edge.GFA2):
+ self._check_s_to_e_contiguity(path, oriented_segment)
+ else:
+ raise gfapy.AssertionError()
+ path.append(oriented_segment)
+
+ def _check_s_is_as_expected(self, path, oriented_segment):
+ if path[-1] != oriented_segment:
+ raise gfapy.InconsistencyError(
+ "Path is not valid\n"+
+ "Line: {}\n".format(self)+
+ "Previous elements:\n"+
+ "".join([" {} ({})\n".format(e, e.line) for e in path[0:-2]])+
+ "Expected element:\n"+
+ " {} ({})\n".format(path[-1], path[-1].line)+
+ "Current element:\n"+
+ " {} ({})\n".format(segment, segment.line))
+
+ def _check_s_to_e_contiguity(self, path, oriented_segment):
+ # check that segment is an extremity of path[-1]
+ # and that the other extremity is path[-2]
+ if not (path[-1].sid1 == self.segment and path[-1].sid2 == path[-2]) and \
+ not (path[-1].sid1 == path[-2] and path[-1].sid2 == self.segment):
+ raise gfapy.InconsistencyError(
+ "Path is not valid\n"+
+ "Line: {}\n".format(self)+
+ "Previous elements:\n"+
+ "".join([" {} ({})\n".format(e, e.line) for e in path])+
+ "Current element:\n"+
+ " {} ({})\n".format(oriented_segment, oriented_segment.line))
+
+ def _find_edge_from_path_to_segment(self, path, oriented_segment):
+ edges = []
+ for edge in oriented_segment.line.edges:
+ if (edge.sid1 == oriented_segment and edge.sid2 == path[-1]) or \
+ (edge.sid1 == path[-1] and edge.sid2 == oriented_segment):
+ edges.append(gfapy.OrientedLine(edge, "+"))
+ elif (edge.sid1 == oriented_segment.inverted() and
+ edge.sid2 == path[-1].inverted()) or\
+ (edge.sid1 == path[-1].inverted() and
+ edge.sid2 == oriented_segment.inverted()):
+ edges.append(gfapy.OrientedLine(edge, "-"))
+ if len(edges) == 0:
+ raise gfapy.NotFoundError(
+ "Path is not valid, segments are not contiguous\n"+
+ "Line: {}\n".format(self)+
+ "Previous elements:\n"+
+ "".join([" {} ({})\n".format(e, e.line) for e in path])+
+ "Current element:\n"+
+ " {} ({})\n".format(oriented_segment, oriented_segment.line))
+ elif len(edges) > 1:
+ raise gfapy.NotUniqueError(
+ "Path is not unique\n"+
+ "Line: {}\n".format(self)+
+ "Previous elements:\n"+
+ "".join([" {} ({})\n".format(e, e.line) for e in path])+
+ "Current element:\n"+
+ " {} ({})\n".format(oriented_segment, oriented_segment.line)+
+ "Possible edges\n"+
+ "".join([" {} ({})\n".format(e, e.line) for e in edges]))
+ return edges[0]
+
+ def _check_captured_path_elem_connected(self, item):
+ if not item.is_connected():
+ raise gfapy.RuntimeError(
+ "Cannot compute induced set\n"+
+ "Non-connected element found\n"+
+ "Item: {}\nLine: {}".format(item, self))
diff --git a/gfapy/line/group/ordered/ordered.py b/gfapy/line/group/ordered/ordered.py
new file mode 100644
index 0000000..a24a631
--- /dev/null
+++ b/gfapy/line/group/ordered/ordered.py
@@ -0,0 +1,25 @@
+from ..gfa2.references import References as GFA2_References
+from ..gfa2.same_id import SameID
+from .references import References as Ordered_References
+from .captured_path import CapturedPath
+from .to_gfa1 import ToGFA1
+from .. import Group
+
+class Ordered(Ordered_References, CapturedPath, GFA2_References, SameID,
+ ToGFA1, Group):
+ """
+ An ordered group line of a GFA2 file
+ """
+
+ RECORD_TYPE = "O"
+ POSFIELDS = ["pid", "items"]
+ FIELD_ALIAS = { "name" : "pid" }
+ DATATYPE = {
+ "pid" : "optional_identifier_gfa2",
+ "items" : "oriented_identifier_list_gfa2",
+ }
+ NAME_FIELD = "pid"
+ REFERENCE_FIELDS = ["items"]
+ DEPENDENT_LINES = ["paths", "sets"]
+
+Ordered._apply_definitions()
diff --git a/gfapy/line/group/ordered/references.py b/gfapy/line/group/ordered/references.py
new file mode 100644
index 0000000..a2617ac
--- /dev/null
+++ b/gfapy/line/group/ordered/references.py
@@ -0,0 +1,79 @@
+class References:
+
+ def append_item(self, item):
+ """
+ Add an item to the group as last item.
+
+ Parameters
+ ----------
+ item : gfapy.Line or str
+ GFA2 edge, segment, gap or group line to add.
+ """
+ if not self.is_connected():
+ self._add_item_to_unconnected_group(item, True)
+ else:
+ self._add_item_to_connected_group(item, True)
+ self.compute_induced_set() # check contiguity
+
+ def prepend_item(self, item):
+ """
+ Add an item to the group as first item.
+
+ Parameters
+ ----------
+ item : gfapy.Line or str
+ GFA2 edge, segment, gap or group line to add.
+ """
+ if not self.is_connected():
+ self._add_item_to_unconnected_group(item, False)
+ else:
+ self._add_item_to_connected_group(item, False)
+ self.compute_induced_set() # check contiguity
+
+ def rm_first_item(self):
+ """
+ Remove the first item from the group.
+
+ Parameters
+ ----------
+ item : str or gfapy.Line
+ GFA2 edge, segment, gap or group line to remove.
+ """
+ if not self.is_connected():
+ self.items = self.items[1:]
+ else:
+ self.items[0].update_reference(self, "paths")
+ self._delete_reference(items[0], "items")
+ self.compute_induced_set() # check contiguity
+
+ def rm_last_item(self):
+ """
+ Remove the last item from the group.
+
+ Parameters
+ ----------
+ item : str or gfapy.Line
+ GFA2 edge, segment, gap or group line to remove.
+ """
+ if not self.is_connected():
+ self.items = self.items[0:-1]
+ else:
+ self.items[-1].update_reference(self, "paths")
+ self._delete_reference(self.items[-1], "items")
+ self.compute_induced_set() # check contiguity
+
+ def _add_item_to_unconnected_group(self, item, append = True):
+ if isinstance(item.line, gfapy.Line):
+ item.line = item.name
+ if append:
+ items.append(item)
+ else:
+ items.insert(0, item)
+
+ def _add_item_to_connected_group(self, item, append = True):
+ item.line = self.prepare_and_check_ref(item.line)
+ self._add_reference(item, "items", append = append)
+
+ def _initialize_references(self):
+ for i in range(len(self.items)):
+ self.items[i].line = self._line_for_ref_symbol(self.items[i].line)
diff --git a/gfapy/line/group/ordered/to_gfa1.py b/gfapy/line/group/ordered/to_gfa1.py
new file mode 100644
index 0000000..e6aeb41
--- /dev/null
+++ b/gfapy/line/group/ordered/to_gfa1.py
@@ -0,0 +1,23 @@
+import gfapy
+
+class ToGFA1:
+
+ def _to_gfa1_a(self):
+ a = ["P"]
+ if gfapy.is_placeholder(self.name):
+ raise gfapy.ValueError(
+ "Conversion to GFA1 failed\n"+
+ "The path name is a placeholder\t"+
+ "Line: {}".format(self))
+ a.append(self.name)
+ segment_names = []
+ for oline in self.captured_segments:
+ gfapy.Field._validate_gfa_field(oline.name, "segment_name_gfa1")
+ segment_names.append(str(oline))
+ a.append(",".join(segment_names))
+ overlaps = []
+ for oline in self.captured_edges:
+ gfapy.Field._validate_gfa_field(oline.line.overlap, "alignment_gfa1")
+ overlaps.append(str(oline.line.overlap))
+ a.append(",".join(overlaps))
+ return a
diff --git a/gfapy/line/group/path/__init__.py b/gfapy/line/group/path/__init__.py
new file mode 100644
index 0000000..2afc89e
--- /dev/null
+++ b/gfapy/line/group/path/__init__.py
@@ -0,0 +1 @@
+from .path import Path
diff --git a/gfapy/line/group/path/captured_path.py b/gfapy/line/group/path/captured_path.py
new file mode 100644
index 0000000..47abf3b
--- /dev/null
+++ b/gfapy/line/group/path/captured_path.py
@@ -0,0 +1,38 @@
+import gfapy
+
+class CapturedPath:
+
+ @property
+ def captured_edges(self):
+ if not self.is_connected():
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed\n"+
+ "Line is not connected to a GFA instance\n"+
+ "Line: {}".format(self))
+ return self.links
+
+ @property
+ def captured_segments(self):
+ if not self.is_connected():
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed\n"+
+ "Line is not connected to a GFA instance\n"+
+ "Line: {}".format(self))
+ return self.segment_names
+
+ @property
+ def captured_path(self):
+ if not self.is_connected():
+ raise gfapy.RuntimeError(
+ "Captured path cannot be computed\n"+
+ "Line is not connected to a GFA instance\n"+
+ "Line: {}".format(self))
+ retval = []
+ for i in range(len(self.segment_names) - 1):
+ retval.append(self.segment_names[i])
+ retval.append(self.links[i])
+ retval.append(self.segment_names[-1])
+ if len(self.segment_names) == len(self.links):
+ retval.append(self.links[-1])
+ retval.append(self.segment_names[0])
+ return retval
diff --git a/gfapy/line/group/path/path.py b/gfapy/line/group/path/path.py
new file mode 100644
index 0000000..f8c11f4
--- /dev/null
+++ b/gfapy/line/group/path/path.py
@@ -0,0 +1,22 @@
+from .topology import Topology
+from .references import References
+from .validation import Validation
+from .captured_path import CapturedPath
+from .to_gfa2 import ToGFA2
+from ..group import Group
+
+class Path(Topology, References, Validation, CapturedPath, ToGFA2, Group):
+ """A path line of a GFA1 file"""
+ RECORD_TYPE = "P"
+ POSFIELDS = ["path_name", "segment_names", "overlaps"]
+ FIELD_ALIAS = { "name" : "path_name" }
+ DATATYPE = {
+ "path_name" : "path_name_gfa1",
+ "segment_names" : "oriented_identifier_list_gfa1",
+ "overlaps" : "alignment_list_gfa1",
+ }
+ NAME_FIELD = "path_name"
+ REFERENCE_FIELDS = ["segment_names", "overlaps"]
+ OTHER_REFERENCES = ["links"]
+
+Path._apply_definitions()
diff --git a/gfapy/line/group/path/references.py b/gfapy/line/group/path/references.py
new file mode 100644
index 0000000..c669e28
--- /dev/null
+++ b/gfapy/line/group/path/references.py
@@ -0,0 +1,83 @@
+import gfapy
+
+class References:
+
+ def _compute_required_links(self):
+ """
+ Computes the list of links which are required to support
+ the path.
+
+ Returns
+ -------
+ list of (gfapy.OrientedLine, gfapy.OrientedLine, gfapy.CIGAR)
+ An array, which elements are 3-tuples
+ (from oriented segment, to oriented segment, cigar)
+ """
+ has_undef_overlaps = self._undef_overlaps()
+ retval = []
+ is_circular = self.is_circular()
+ for i in range(len(self.segment_names)):
+ j = i+1
+ if j == len(self.segment_names):
+ if is_circular:
+ j = 0
+ else:
+ break
+ if has_undef_overlaps:
+ cigar = gfapy.AlignmentPlaceholder()
+ else:
+ cigar = self.overlaps[i]
+ retval.append([self.segment_names[i], self.segment_names[j], cigar])
+ return retval
+
+ def _undef_overlaps(self):
+ """
+ Are the overlaps a single "*"? This is a compact representation of
+ a linear path where all CIGARs are "*".
+
+ Returns
+ -------
+ bool
+ """
+ return len(self.overlaps) == 1 and gfapy.is_placeholder(self.overlaps[0])
+
+ def _initialize_references(self):
+ self._initialize_links()
+ self._initialize_segments()
+
+ def _initialize_links(self):
+ self._refs["links"] = []
+ for from_segment, to_segment, cigar in self._compute_required_links():
+ l = None
+ orient = "+"
+ if self._gfa.segment(from_segment.line) and self._gfa.segment(to_segment.line):
+ l = self._gfa._search_link(from_segment, to_segment, cigar)
+ if l is not None and l.is_compatible_complement(from_segment, to_segment, cigar):
+ orient = "-"
+ if l is None:
+ if self._gfa._segments_first_order:
+ raise gfapy.NotFoundError("Path: {}\n".format(self)+
+ "requires a non-existing link:\n"+
+ "from={} to={} cigar={}".format(from_segment, to_segment, cigar))
+ l = gfapy.line.edge.Link({"from_segment" : from_segment.line,
+ "from_orient" : from_segment.orient,
+ "to_segment" : to_segment.line,
+ "to_orient" : to_segment.orient,
+ "overlap" : cigar},
+ virtual = True,
+ version = "gfa1")
+ l.connect(self._gfa)
+ self._refs["links"].append(gfapy.OrientedLine(l,orient))
+ l._add_reference(self, "paths")
+
+ def _initialize_segments(self):
+ for sn_with_o in self.segment_names:
+ s = self._gfa.segment(sn_with_o.line)
+ sn_with_o.line = s
+ s._add_reference(self, "paths")
+
+ def _backreference_keys(self, ref, key_in_ref):
+ if ref.record_type == "L":
+ return ["links"]
+ elif ref.record_type == "S":
+ return ["segment_names"]
diff --git a/gfapy/line/group/path/to_gfa2.py b/gfapy/line/group/path/to_gfa2.py
new file mode 100644
index 0000000..de1f188
--- /dev/null
+++ b/gfapy/line/group/path/to_gfa2.py
@@ -0,0 +1,15 @@
+import gfapy
+
+class ToGFA2:
+
+ def _to_gfa2_a(self):
+ items = []
+ for oline in self.captured_path:
+ if isinstance(oline.line, gfapy.line.segment.GFA1):
+ items.append(str(oline))
+ elif isinstance(oline.line, gfapy.line.edge.Link):
+ items.append(oline.line.eid + str(oline.orient))
+ a = ["O"]
+ a.append(self.field_to_s("path_name"))
+ a.append(" ".join(items))
+ return a
diff --git a/gfapy/line/group/path/topology.py b/gfapy/line/group/path/topology.py
new file mode 100644
index 0000000..5bd02ae
--- /dev/null
+++ b/gfapy/line/group/path/topology.py
@@ -0,0 +1,20 @@
+class Topology:
+
+ def is_circular(self):
+ """
+ Is the path circular? In this case the number of CIGARs must be
+ equal to the number of segments.
+
+ Returns
+ -------
+ bool
+ """
+ return len(self.overlaps) == len(self.segment_names)
+
+ def is_linear(self):
+ """
+ Is the path linear? This is the case when the number of CIGARs
+ is equal to the number of segments minus 1, or the CIGARs are
+ represented by a single "*".
+ """
+ return not self.is_circular()
diff --git a/gfapy/line/group/path/validation.py b/gfapy/line/group/path/validation.py
new file mode 100644
index 0000000..1e254ea
--- /dev/null
+++ b/gfapy/line/group/path/validation.py
@@ -0,0 +1,22 @@
+import gfapy
+
+class Validation:
+ def _validate_lists_size(self):
+ n_overlaps = len(self.overlaps)
+ n_segments = len(self.segment_names)
+ if n_overlaps == n_segments - 1:
+ # case 1: linear path
+ return True
+ elif n_overlaps == 1 and not self.overlaps[0]:
+ # case 2: linear path, single "*" to represent overlaps which are all "*"
+ return True
+ elif n_overlaps == n_segments:
+ # case 3: circular path
+ pass
+ else:
+ raise gfapy.InconsistencyError(
+ "Path has {} oriented segments, ".format(n_segments)+
+ "but {} overlaps".format(n_overlaps))
+
+ def _validate_record_type_specific_info(self):
+ self._validate_lists_size()
diff --git a/gfapy/line/group/unordered/__init__.py b/gfapy/line/group/unordered/__init__.py
new file mode 100644
index 0000000..1e28af4
--- /dev/null
+++ b/gfapy/line/group/unordered/__init__.py
@@ -0,0 +1 @@
+from .unordered import Unordered
diff --git a/gfapy/line/group/unordered/induced_set.py b/gfapy/line/group/unordered/induced_set.py
new file mode 100644
index 0000000..eddf148
--- /dev/null
+++ b/gfapy/line/group/unordered/induced_set.py
@@ -0,0 +1,89 @@
+import gfapy
+
+class InducedSet:
+
+ @property
+ def induced_set(self):
+ if not self.is_connected:
+ raise gfapy.RuntimeError(
+ "Induced set cannot be computed\n"+
+ "Line is not connected to a GFA instance\n"+
+ "Line: {}".format(self))
+ iss = self.induced_segments_set
+ ise = self._compute_induced_edges_set(iss)
+ return iss + ise
+
+ @property
+ def induced_edges_set(self):
+ if not self.is_connected():
+ raise gfapy.RuntimeError(
+ "Induced set cannot be computed\n"+
+ "Line is not connected to a GFA instance\n"+
+ "Line: {}".format(self))
+ return self._compute_induced_edges_set(self.induced_segments_set)
+
+ @property
+ def induced_segments_set(self):
+ if not self.is_connected():
+ raise gfapy.RuntimeError(
+ "Induced set cannot be computed\n"+
+ "Line is not connected to a GFA instance\n"+
+ "Line: {}".format(self))
+ segments_set = list()
+ for item in self.items:
+ if isinstance(item, str):
+ raise gfapy.RuntimeError(
+ "Induced set cannot be computed; a reference has not been resolved\n"+
+ "Line: {}\n".format(self)+
+ "Unresolved reference: {} (String found)".format(item.line))
+ elif isinstance(item, gfapy.line.segment.GFA2):
+ self._check_induced_set_elem_connected(item)
+ segments_set.append(item)
+ elif isinstance(item, gfapy.line.edge.GFA2):
+ self._check_induced_set_elem_connected(item)
+ for sl in [item.sid1.line, item.sid2.line]:
+ self._check_induced_set_elem_connected(sl)
+ segments_set.append(sl)
+ elif isinstance(item, gfapy.line.group.Ordered):
+ self._check_induced_set_elem_connected(item)
+ subset = item.captured_segments
+ assert(subset)
+ for elem in subset:
+ segments_set.append(elem.line)
+ elif isinstance(item, gfapy.line.group.Unordered):
+ self._check_induced_set_elem_connected(item)
+ subset = item.induced_segments_set
+ assert(subset)
+ for elem in subset:
+ segments_set.append(elem)
+ elif isinstance(item, gfapy.line.Unknown):
+ raise gfapy.RuntimeError(
+ "Induced set cannot be computed; a reference has not been resolved\n"+
+ "Line: {}\n".format(self)+
+ "Unresolved reference: {} (Virtual unknown line)".format(item.name))
+ else:
+ raise gfapy.TypeError(
+ "Line: {}\t".format(self)+
+ "Cannot compute induced set:\t"+
+ "Error: items of type {} are not supported\t".format(item.__class__.__name__)+
+ "Unsupported item: {}".format(item))
+ unique_ids = set()
+ return [e for e in segments_set \
+ if id(e) not in unique_ids and not unique_ids.add(id(e))]
+
+ def _check_induced_set_elem_connected(self, item):
+ if not item.is_connected():
+ raise gfapy.RuntimeError(
+ "Cannot compute induced set\n"+
+ "Non-connected element found\n"+
+ "Item: {}\nLine: {}".format(item, self))
+
+ def _compute_induced_edges_set(self, segments_set):
+ edges_set = list()
+ for item in segments_set:
+ for edge in item.edges:
+ if edge.other(item) in segments_set:
+ edges_set.append(edge)
+ unique_ids = set()
+ return [e for e in edges_set \
+ if id(e) not in unique_ids and not unique_ids.add(id(e))]
diff --git a/gfapy/line/group/unordered/references.py b/gfapy/line/group/unordered/references.py
new file mode 100644
index 0000000..f7ccbbb
--- /dev/null
+++ b/gfapy/line/group/unordered/references.py
@@ -0,0 +1,69 @@
+class References:
+
+ def add_item(self, item):
+ """
+ Add an item to the group.
+
+ Parameters
+ ----------
+ item : gfapy.Line, str
+ GFA2 edge, segment, gap or group line to add.
+ """
+ if not self.is_connected():
+ self._add_item_to_unconnected_group(item)
+ else:
+ self._add_item_to_connected_group(item)
+
+ def rm_item(self, item):
+ """
+ Remove an item from the group.
+
+ Parameters
+ ----------
+ item : str, gfapy.Line
+ GFA2 edge, segment, gap or group line to remove.
+ """
+ if not self.is_connected():
+ self._rm_item_from_unconnected_group(item)
+ else:
+ self._rm_item_from_connected_group(item)
+
+ def _rm_item_from_unconnected_group(self, item):
+ if isinstance(item, gfapy.Line):
+ item = item.name
+ self._check_item_included(item)
+ self.items.delete(item)
+ return None
+
+ def _rm_item_from_connected_group(self, item):
+ if isinstance(item, str):
+ item = self._gfa.line(item)
+ self._check_item_included(item)
+ line._delete_reference(self, "sets")
+ self._delete_reference(line, "items")
+ return None
+
+ def _check_item_included(self, item):
+ if item not in self.items:
+ raise gfapy.NotFoundError(
+ "Line: {}\n".format(self)+
+ "Item: {}".format(repr(item))+
+ "Items of the line do not include the item")
+
+ def _add_item_to_unconnected_group(self, item, append = True):
+ if isinstance(item, gfapy.Line):
+ item = item.name
+ if append:
+ self.items.append(item)
+ else:
+ self.items.insert(0, item)
+ return None
+
+ def _add_item_to_connected_group(self, item, append = True):
+ self._add_reference(self.prepare_and_check_ref(item),
+ "items", append = append)
+ return None
+
+ def _initialize_references(self):
+ for i in range(len(self.items)):
+ self.items[i] = self._line_for_ref_symbol(self.items[i])
diff --git a/gfapy/line/group/unordered/unordered.py b/gfapy/line/group/unordered/unordered.py
new file mode 100644
index 0000000..e8d9e06
--- /dev/null
+++ b/gfapy/line/group/unordered/unordered.py
@@ -0,0 +1,20 @@
+from ..gfa2.references import References
+from ..gfa2.same_id import SameID
+from ..unordered.references import References as UnorderedReferences
+from ..unordered.induced_set import InducedSet
+from ..group import Group
+
+class Unordered(UnorderedReferences, InducedSet, References, SameID, Group):
+ """An unordered group line of a GFA2 file"""
+ RECORD_TYPE = "U"
+ POSFIELDS = ["pid", "items"]
+ FIELD_ALIAS = {"name" : "pid"}
+ DATATYPE = {
+ "pid" : "optional_identifier_gfa2",
+ "items" : "identifier_list_gfa2",
+ }
+ NAME_FIELD = "pid"
+ REFERENCE_FIELDS = ["items"]
+ DEPENDENT_LINES = ["sets"]
+
+Unordered._apply_definitions()
diff --git a/gfapy/line/header/__init__.py b/gfapy/line/header/__init__.py
new file mode 100644
index 0000000..7016e75
--- /dev/null
+++ b/gfapy/line/header/__init__.py
@@ -0,0 +1,2 @@
+from ..line import Line
+from .header import Header
diff --git a/gfapy/line/header/connection.py b/gfapy/line/header/connection.py
new file mode 100644
index 0000000..7a9a95c
--- /dev/null
+++ b/gfapy/line/header/connection.py
@@ -0,0 +1,12 @@
+import gfapy
+
+class Connection:
+
+ def connect(self, gfa):
+ if gfa.header is not self:
+ raise gfapy.RuntimeError(
+ "gfapy.line.Header instances cannot be connected\n"+
+ "Use gfa.add_line(this_line) to add the information\n"+
+ "contained in this header line to the header of a GFA instance.")
+ else:
+ self._gfa = gfa
diff --git a/gfapy/line/header/field_data.py b/gfapy/line/header/field_data.py
new file mode 100644
index 0000000..11b077d
--- /dev/null
+++ b/gfapy/line/header/field_data.py
@@ -0,0 +1,15 @@
+import gfapy
+
+class FieldData:
+ """
+ Disallow editing the VN tag in connected header lines
+ """
+
+ def _set_existing_field(self, fieldname, value, set_reference=False):
+ if fieldname == "VN" and self.get("VN") is not None and self.is_connected():
+ raise gfapy.RuntimeError(
+ "The value of the header tag VN cannot be edited\n"+
+ "For version conversion use to_gfa1 or to_gfa2")
+ else:
+ super()._set_existing_field(fieldname, value,
+ set_reference=set_reference)
diff --git a/gfapy/line/header/header.py b/gfapy/line/header/header.py
new file mode 100644
index 0000000..bbb7cb0
--- /dev/null
+++ b/gfapy/line/header/header.py
@@ -0,0 +1,26 @@
+from ..line import Line
+from .connection import Connection
+from .multiline import Multiline
+from .field_data import FieldData
+from .version_conversion import VersionConversion
+
+class Header(VersionConversion, Multiline, Connection, FieldData, Line):
+ """
+ A header line of a GFA file.
+
+ For examples on how to set the header data, see {GFA.Headers}.
+
+ See Also
+ --------
+ gfapy.Line
+ """
+
+ RECORD_TYPE = "H"
+ PREDEFINED_TAGS = ["VN", "TS"]
+ DATATYPE = {
+ "VN" : "Z",
+ "TS" : "i"
+ }
+ STORAGE_KEY = "merge"
+
+Header._apply_definitions()
diff --git a/gfapy/line/header/multiline.py b/gfapy/line/header/multiline.py
new file mode 100644
index 0000000..57fe331
--- /dev/null
+++ b/gfapy/line/header/multiline.py
@@ -0,0 +1,147 @@
+import gfapy
+
+class Multiline:
+ """
+ Implementation of the support for multiple header line in the
+ GFA file (which also may contain the same value defined multiple
+ times in different lines).
+ """
+
+ SINGLE_DEFINITION_TAGS = ["VN", "TS"]
+
+ def add(self, tagname, value, datatype = None):
+ """
+ Set a header value (multi-value compatible).
+
+ If a field does not exist yet, set it to value. If it exists and it is a
+ *gfapy.FieldArray*, add the value to the field array. If it exists and it
+ is not a field array, create a field array with the previous value and
+ the new one.
+
+ Parameters
+ ----------
+ tagname : str
+ value : object
+ datatype : gfapy.Field.TAG_DATATYPE, optional
+ The datatype to use.
+ The default is to determine the datatype according to the value or the
+ previous values present in the field.
+ """
+ prev = self.get(tagname)
+ if prev is None:
+ if datatype is not None:
+ self.set_datatype(tagname, datatype)
+ self.set(tagname, value)
+ return
+ elif not isinstance(prev, gfapy.FieldArray):
+ if tagname in self.SINGLE_DEFINITION_TAGS:
+ if self.field_to_s(tagname) == \
+ gfapy.Field._to_gfa_field(value, fieldname=tagname):
+ return
+ else:
+ raise gfapy.InconsistencyError(
+ "Inconsistent values for header tag {} found\n".format(tagname)+
+ "Previous definition: {}\n".format(prev)+
+ "Current definition: {}".format(value))
+ prev = gfapy.FieldArray(self.get_datatype(tagname), [prev])
+ self._set_existing_field(tagname, prev)
+ if self.vlevel > 1:
+ prev.vpush(value, datatype, tagname)
+ else:
+ prev.append(value)
+
+ def field_to_s(self, fieldname, tag = False):
+ """
+ Compute the string representation of a field.
+
+ Parameters
+ ----------
+ fieldname : str
+ The tag name of the field.
+ tag : bool
+ *(defaults to: ***False***)*
+ Return the 'tagname:datatype:value' representation.
+
+ Raises
+ ------
+ gfapy.NotFoundError
+ If field is not defined.
+
+ Returns
+ -------
+ str
+ The string representation.
+ """
+ prev = self.get(fieldname)
+ if isinstance(prev, gfapy.FieldArray):
+ if self.vlevel >= 2:
+ prev._validate_gfa_field(None, fieldname)
+ return prev._to_gfa_tag(fieldname=fieldname) if tag else \
+ prev._to_gfa_field(fieldname=fieldname)
+ else:
+ return super(gfapy.line.header.Line, self).field_to_s(fieldname, tag)
+
+ def _n_duptags(self):
+ n = 0
+ for tn in self.tagnames:
+ if isinstance(self.get(tn),gfapy.FieldArray):
+ n+=1
+ return n
+
+ def _split(self):
+ """
+ Split the header line into single-tag lines.
+
+ If a tag is a FieldArray, this is splitted into multiple fields
+ with the same fieldname.
+
+ Returns
+ -------
+ gfapy.line.Header list
+ """
+ retval = []
+ for tagname, datatype, value in self._tags():
+ h = gfapy.line.Header(["H"], vlevel = self.vlevel)
+ h.set_datatype(tagname, datatype)
+ h.set(tagname, value)
+ retval.append(h)
+ return retval
+
+ def _merge(self, gfa_line):
+ """
+ Merge an additional **gfa.line.Header** line into this header line.
+
+ Parameters
+ ----------
+ gfa_line : gfapy.line.Header
+ The header line to merge.
+
+ Returns
+ -------
+ self
+ """
+ for of in gfa_line.tagnames:
+ self.add(of, gfa_line.get(of), gfa_line.get_datatype(of))
+ return self
+
+ def _tags(self):
+ """
+ List of tags data.
+
+ Returns the tags as an list of [fieldname, datatype, value]
+ lists. If a field is a FieldArray, this is splitted into multiple fields
+ with the same fieldname.
+
+ Returns
+ -------
+ (str, str, object) list
+ """
+ retval = []
+ for of in self.tagnames:
+ value = self.get(of)
+ if isinstance(value, gfapy.FieldArray):
+ for elem in value:
+ retval.append((of, value.datatype, elem))
+ else:
+ retval.append((of, self.get_datatype(of), value))
+ return retval
diff --git a/gfapy/line/header/version_conversion.py b/gfapy/line/header/version_conversion.py
new file mode 100644
index 0000000..8ebf513
--- /dev/null
+++ b/gfapy/line/header/version_conversion.py
@@ -0,0 +1,37 @@
+class VersionConversion:
+
+ def _to_gfa2_a(self):
+ """
+ Return the string representation of the tags, changing the value
+ of the VN tag to 2.0, if this is present
+
+ Returns
+ -------
+ list of str
+ Array of strings representing the tags.
+ """
+ a = ["H"]
+ if self.VN:
+ a.append("VN:Z:2.0")
+ for fn in self.tagnames:
+ if fn != "VN":
+ a.append(self.field_to_s(fn, tag = True))
+ return a
+
+ def _to_gfa1_a(self):
+ """
+ Return the string representation of the tags, changing the value
+ of the VN tag to 1.0, if this is present
+
+ Returns
+ -------
+ list of str
+ Array of strings representing the tags.
+ """
+ a = ["H"]
+ if self.VN:
+ a.append("VN:Z:1.0")
+ for fn in self.tagnames:
+ if fn != "VN":
+ a.append(self.field_to_s(fn, tag = True))
+ return a
diff --git a/gfapy/line/line.py b/gfapy/line/line.py
new file mode 100644
index 0000000..84844b1
--- /dev/null
+++ b/gfapy/line/line.py
@@ -0,0 +1,59 @@
+from .common.construction import Construction
+from .common.dynamic_fields import DynamicFields, DynamicField
+from .common.writer import Writer
+from .common.version_conversion import VersionConversion
+from .common.field_datatype import FieldDatatype
+from .common.field_data import FieldData
+from .common.equivalence import Equivalence
+from .common.cloning import Cloning
+from .common.connection import Connection
+from .common.virtual_to_real import VirtualToReal
+from .common.update_references import UpdateReferences
+from .common.disconnection import Disconnection
+from .common.validate import Validate
+from .common.default_record_definition import DefaultRecordDefinition
+
+import gfapy
+
+class Line(Construction, DynamicFields, Writer, VersionConversion,
+ FieldDatatype, FieldData, Equivalence, Cloning, Connection,
+ VirtualToReal, UpdateReferences, Disconnection, Validate,
+ DefaultRecordDefinition):
+ """
+ A line of a GFA file.
+
+ Parameters:
+ data (str, list of str) : the content of a line in a GFA file, either as
+ a string, or as a list derived from tab-splitting the line string
+ vlevel (int) : an integer from 0 to 3, which specifies the validation level;
+ if 0, no validation is performed (the user can still validate manually if
+ needed); if 1 (the default), validation is performed when the line
+ is constructed, or, for some fields, when the value is accessed
+ for the first time; if 2, the validation is performed also when converting
+ the content of a field to string; if 3, also each time the value
+ of a field is read or written
+ version (str) : one of 'gfa1' and 'gfa2'; the GFA version; if not specified,
+ then the version is guessed from the record type and syntax, or set
+ to 'generic'
+
+ Notes:
+ The private interface to the Line constructor also allows to pass a
+ dictionary instead of a list for data. Furthermore the private parameter
+ virtual allows to create virtual line instances, which are useful during
+ parsing.
+
+ Raises:
+ gfapy.error.FormatError: If the line contains a wrong number of positional
+ fields, if non-predefined tags use upcase letters, or if the content of a
+ field has a wrong format.
+ gfapy.error.NotUniqueError: If a tag name is used more than once.
+ gfapy.error.TypeError: If the value of a predefined tag does not
+ respect the datatype specified in the tag.
+
+ Returns:
+ an instance of a subclass of gfapy.line.Line
+ """
+
+ SEPARATOR = "\t"
+ """Separator in the string representation of GFA lines"""
+
diff --git a/gfapy/line/segment/__init__.py b/gfapy/line/segment/__init__.py
new file mode 100644
index 0000000..7d1d38e
--- /dev/null
+++ b/gfapy/line/segment/__init__.py
@@ -0,0 +1,3 @@
+from .segment import Segment
+from .gfa1 import GFA1
+from .gfa2 import GFA2
diff --git a/gfapy/line/segment/coverage.py b/gfapy/line/segment/coverage.py
new file mode 100644
index 0000000..fbbec4e
--- /dev/null
+++ b/gfapy/line/segment/coverage.py
@@ -0,0 +1,37 @@
+import gfapy
+
+class Coverage:
+
+ def coverage(self, count_tag = "RC", unit_length = 1):
+ """Compute the coverage from the value a count_tag (RC, KC or FC).
+
+ If unit_length is provided then: count/(length-unit_length+1),
+ otherwise: count/length. The latter is a good approximation if
+ length >>> unit_length.
+
+ Parameters:
+ count_tag (str): integer tag from which the count shall be
+ taken (defaults to RC)
+ unit_length (int): average length of the sequence which is counted
+ (read for RC, fragment for FC, k-mer for KC).
+
+ Returns:
+ int : Coverage, if count_tag and length are defined.
+ None : Otherwise
+ """
+ if count_tag in self.tagnames and self.length:
+ return (float(self.get(count_tag)))/(self.length - unit_length + 1)
+ else:
+ return None
+
+ def try_get_coverage(self, count_tag = "RC", unit_length = 1):
+ """
+ As coverage, but raises an exception if the coverage cannot be computed.
+ """
+ c = self.coverage(count_tag = count_tag, unit_length = unit_length)
+ if c is None:
+ self.try_get_length()
+ raise gfapy.NotFoundError(
+ "Tag {} undefined for segment {}".format(count_tag, self.name))
+ else:
+ return c
diff --git a/gfapy/line/segment/gfa1.py b/gfapy/line/segment/gfa1.py
new file mode 100644
index 0000000..afb60f4
--- /dev/null
+++ b/gfapy/line/segment/gfa1.py
@@ -0,0 +1,35 @@
+from .gfa1_to_gfa2 import GFA1ToGFA2
+from .length_gfa1 import LengthGFA1
+from .coverage import Coverage
+from .references import References
+from .writer_wo_sequence import WriterWoSequence
+from . import Segment
+
+class GFA1(WriterWoSequence, References, Coverage,
+ LengthGFA1, GFA1ToGFA2, Segment):
+ """
+ A segment line of a GFA file
+ """
+
+ VERSION = "gfa1"
+ RECORD_TYPE = "S"
+ POSFIELDS = ["name", "sequence"]
+ PREDEFINED_TAGS = ["LN", "RC", "FC", "KC", "SH", "UR"]
+ DATATYPE = {
+ "name" : "segment_name_gfa1",
+ "sequence" : "sequence_gfa1",
+ "LN" : "i",
+ "RC" : "i",
+ "FC" : "i",
+ "KC" : "i",
+ "SH" : "H",
+ "UR" : "Z",
+ }
+ NAME_FIELD = "name"
+ FIELD_ALIAS = { "sid" : "name" }
+ DEPENDENT_LINES = ["dovetails_L", "dovetails_R",
+ "edges_to_contained", "edges_to_containers", "paths"]
+ gfa2_compatibility = ["gaps_L", "gaps_R", "fragments", "internals", "sets"]
+ OTHER_REFERENCES = gfa2_compatibility
+
+GFA1._apply_definitions()
diff --git a/gfapy/line/segment/gfa1_to_gfa2.py b/gfapy/line/segment/gfa1_to_gfa2.py
new file mode 100644
index 0000000..4dd463f
--- /dev/null
+++ b/gfapy/line/segment/gfa1_to_gfa2.py
@@ -0,0 +1,25 @@
+import gfapy
+
+class GFA1ToGFA2:
+
+ def _to_gfa2_a(self):
+ """
+ Returns
+ -------
+ list of str
+ A list of GFA2 field strings.
+ """
+ try:
+ length = self.try_get_length()
+ except gfapy.NotFoundError:
+ raise gfapy.RuntimeError(
+ "Conversion of GFA1 segment line to GFA2 failed\n"+
+ "GFA2 requires to specify a length\n"+
+ "No length information available in the GFA1 segment:\n"+
+ "Segment line: {}".format(str(self)))
+ a = ["S", self.field_to_s("name", tag = False), str(self.try_get_length()),
+ self.field_to_s("sequence", tag = False)]
+ for fn in self.tagnames:
+ if fn != "LN":
+ a.append(self.field_to_s(fn, tag = True))
+ return a
diff --git a/gfapy/line/segment/gfa2.py b/gfapy/line/segment/gfa2.py
new file mode 100644
index 0000000..f5c9545
--- /dev/null
+++ b/gfapy/line/segment/gfa2.py
@@ -0,0 +1,30 @@
+from .gfa2_to_gfa1 import GFA2ToGFA1
+from .coverage import Coverage
+from .references import References
+from .writer_wo_sequence import WriterWoSequence
+from . import Segment
+
+class GFA2(WriterWoSequence, References, Coverage, GFA2ToGFA1, Segment):
+ """A segment line of a GFA file"""
+
+ VERSION = "gfa2"
+ RECORD_TYPE = "S"
+ POSFIELDS = ["sid", "slen", "sequence"]
+ PREDEFINED_TAGS = ["RC", "FC", "KC", "SH", "UR"]
+ DATATYPE = {
+ "sid" : "identifier_gfa2",
+ "slen" : "i",
+ "sequence" : "sequence_gfa2",
+ "RC" : "i",
+ "FC" : "i",
+ "KC" : "i",
+ "SH" : "H",
+ "UR" : "Z",
+ }
+ NAME_FIELD = "sid"
+ FIELD_ALIAS = { "name" : "sid", "length" : "slen", "LN" : "slen" }
+ DEPENDENT_LINES = ["dovetails_L", "dovetails_R", "gaps_L", "gaps_R",
+ "edges_to_contained", "edges_to_containers",
+ "fragments", "internals", "paths", "sets"]
+
+GFA2._apply_definitions()
diff --git a/gfapy/line/segment/gfa2_to_gfa1.py b/gfapy/line/segment/gfa2_to_gfa1.py
new file mode 100644
index 0000000..03578f5
--- /dev/null
+++ b/gfapy/line/segment/gfa2_to_gfa1.py
@@ -0,0 +1,23 @@
+import gfapy
+
+class GFA2ToGFA1:
+
+ def _to_gfa1_a(self, slen_tag = "LN"):
+ """
+ Notes:
+ According to the GFA2 specification, slen must be not be really the length
+ of the sequence. By default, this is ignored, and the content of slen is
+ stored in the LN tag.
+
+ Parameters:
+ slen_tag (str) : tag to use in GFA1 to store the content of slen
+
+ Returns:
+ str list : A array of GFA1 field strings.
+ """
+ a = ["S", self.field_to_s("name", tag = False),
+ self.field_to_s("sequence", tag = False)]
+ a.append(gfapy.Field._to_gfa_tag(self.slen, slen_tag, datatype = "i"))
+ for fn in self.tagnames:
+ a.append(self.field_to_s(fn, tag = True))
+ return a
diff --git a/gfapy/line/segment/length_gfa1.py b/gfapy/line/segment/length_gfa1.py
new file mode 100644
index 0000000..4c3033a
--- /dev/null
+++ b/gfapy/line/segment/length_gfa1.py
@@ -0,0 +1,60 @@
+import gfapy
+
+class LengthGFA1:
+
+ @property
+ def length(self):
+ """
+ Returns
+ -------
+ int
+ Value of LN tag, if segment has LN tag.
+ int
+ Sequence length if no LN and sequence not "*".
+ None
+ If sequence is "*".
+
+ See Also
+ --------
+ try_get_length
+ """
+ if self.LN:
+ return self.LN
+ elif not gfapy.is_placeholder(self.sequence):
+ return len(self.sequence)
+ else:
+ return None
+
+ def try_get_length(self):
+ """
+ Raises
+ ------
+ gfapy.NotFoundError
+ If not an LN tag and the sequence is "*".
+
+ See Also
+ --------
+ __len__
+ """
+ l = self.length
+ if l is None:
+ raise gfapy.NotFoundError("No length information available")
+ return l
+
+ def validate_length(self):
+ """
+ Raises
+ ------
+ gfapy.InconsistencyError
+ If sequence length and LN tag are not consistent.
+ """
+ if not gfapy.is_placeholder(self.sequence) and "LN" in self.tagnames:
+ if self.LN != len(self.sequence):
+ raise gfapy.InconsistencyError(
+ "Segment: {}\n".format(str(self))+
+ "Length in LN tag ({}) ".format(self.LN)+
+ "is different from length of sequence field ({})"
+ .format(len(self.sequence)))
+
+ def _validate_record_type_specific_info(self):
+ self.validate_length()
diff --git a/gfapy/line/segment/references.py b/gfapy/line/segment/references.py
new file mode 100644
index 0000000..cf9a8d6
--- /dev/null
+++ b/gfapy/line/segment/references.py
@@ -0,0 +1,205 @@
+import gfapy
+
+class References:
+
+ @property
+ def dovetails(self):
+ """
+ References to the graph lines which involve the segment as dovetail overlap.
+
+ Returns
+ -------
+ gfapy.line.Edge list
+ A list of lines.
+ The lines themselves can be modified, but the list is immutable.
+ """
+ return self.dovetails_L + self.dovetails_R
+
+ def dovetails_of_end(self, extremity):
+ """
+ References to the graph lines which involve the segment as dovetail overlap.
+
+ Returns
+ -------
+ gfapy.line.Edge list
+ A list of lines.
+ The lines themselves can be modified, but the list is immutable.
+ """
+ return getattr(self, "dovetails_{}".format(extremity))
+
+ @property
+ def gaps(self):
+ """
+ References to the gap lines which involve the segment.
+ """
+ return self.gaps_L + self.gaps_R
+
+ def gaps_of_end(self, extremity):
+ """
+ References to the gap lines which involve the segment.
+ """
+ return getattr(self, "gaps_{}".format(extremity))
+
+ @property
+ def containments(self):
+ """
+ References to graph edges (C lines for GFA1, E for GFA2) which involve the
+ segment in a containment relationship.
+ """
+ return self.edges_to_contained + self.edges_to_containers
+
+ def _connectivity(self):
+ """
+ Computes the connectivity of a segment from its number of dovetail overlaps.
+
+ Returns
+ -------
+ (conn_symbol,conn_symbol) list
+
+ conn. symbols respectively of the :L and :R ends of +segment+.
+
+ <b>Connectivity symbol:</b> (+conn_symbol+)
+ - Let _n_ be the number of links to an end (+:L+ or +:R+) of a segment.
+ Then the connectivity symbol is +:M+ if <i>n > 1</i>, otherwise _n_.
+ """
+ if not self.is_connected():
+ raise gfapy.ArgumentError(
+ "Cannot compute the connectivity of {}\n".format(self)+
+ "Segment is not connected to a GFA instance")
+ return self._connectivity_symbols(len(self.dovetails_L),
+ len(self.dovetails_R))
+
+ @property
+ def neighbours(self):
+ """
+ List of dovetail-neighbours of a segment.
+
+ Returns
+ -------
+ gfapy.line.Segment list
+ Segments connected to the current segment by dovetail overlap
+ relationships (L lines for GFA1, dovetail-representing E lines for GFA2)
+ """
+ seen = set()
+ return [l.other(self) for l in self.dovetails \
+ if id(l) not in seen and not seen.add(id(l))]
+
+ @property
+ def neighbours_L(self):
+ """
+ List of dovetail-neighbours of a segment.
+
+ Returns
+ -------
+ gfapy.line.Segment list
+ Segments connected to the current segment by dovetail overlap
+ relationships (L lines for GFA1, dovetail-representing E lines for GFA2)
+ """
+ seen = set()
+ return [l.other(self) for l in self.dovetails_L \
+ if id(l) not in seen and not seen.add(id(l))]
+
+ @property
+ def neighbours_R(self):
+ """
+ List of dovetail-neighbours of a segment.
+
+ Returns
+ -------
+ gfapy.line.Segment list
+ Segments connected to the current segment by dovetail overlap
+ relationships (L lines for GFA1, dovetail-representing E lines for GFA2)
+ """
+ seen = set()
+ return [l.other(self) for l in self.dovetails_R \
+ if id(l) not in seen and not seen.add(id(l))]
+
+ def neighbours_of_end(self, extremity):
+ return getattr(self, "neighbours_{}".format(extremity))
+
+ @property
+ def containers(self):
+ """
+ List of segments which contain the segment.
+
+ Returns
+ -------
+ gfapy.line.Segment list
+ Segments connected to the current segment by containment relationships
+ (C lines for GFA1, containment-representing E lines for GFA2),
+ where the current segment is the contained segment.
+ """
+ seen = set()
+ return [l.from_segment for l in self.edges_to_containers \
+ if id(l) not in seen and not seen.add(id(l))]
+
+ @property
+ def contained(self):
+ """
+ List of segments which are contained in the segment.
+
+ Returns
+ -------
+ gfapy.line.Segment list
+ Segments connected to the current segment by containment relationships
+ (C lines for GFA1, containment-representing E lines for GFA2),
+ where the current segment is the container segment.
+ """
+ seen = set()
+ return [l.to_segment for l in self.edges_to_contained \
+ if id(l) not in seen and not seen.add(id(l))]
+
+ @property
+ def edges(self):
+ """
+ List of edges which refer to the segment
+
+ Returns
+ -------
+ gfapy.line.Edge list
+ """
+ return self.dovetails + self.containments + self.internals
+
+ def relations_to(self, segment, collection="edges"):
+ if isinstance(segment, gfapy.Line):
+ return [e for e in getattr(self, collection) \
+ if (e.other(self) is segment)]
+ else:
+ return [e for e in getattr(self, collection) \
+ if (e.other(self).name == segment)]
+
+ def oriented_relations(self, orientation, oriented_segment, collection="edges"):
+ return [e for e in getattr(self, collection) if \
+ (e.other_oriented_segment(gfapy.OrientedLine(self, orientation), tolerant=True) == \
+ oriented_segment)]
+
+ def end_relations(self, extremity, segment_end, collection ="edges"):
+ return [e for e in getattr(self, collection) if \
+ (e.other_end(gfapy.SegmentEnd(self, extremity), tolerant=True) == \
+ segment_end)]
+
+ def _connectivity_symbols(self, n, m):
+ return (self._connectivity_symbol(n), self._connectivity_symbol(m))
+
+ def _connectivity_symbol(self, n):
+ return "M" if n > 1 else n
+
+ def _backreference_keys(self, ref, key_in_ref):
+ if ref.record_type == "E":
+ return ["dovetails_L", "dovetails_R", "internals",
+ "edges_to_containers", "edges_to_contained"]
+ elif ref.record_type == "L":
+ return ["dovetails_L", "dovetails_R"]
+ elif ref.record_type == "C":
+ return ["edges_to_contained"] if (key_in_ref == "from_segment") \
+ else ["edges_to_containers"]
+ elif ref.record_type == "G":
+ return ["gaps_L", "gaps_R"]
+ elif ref.record_type == "F":
+ return ["fragments"]
+ elif ref.record_type == "P" or ref.record_type == "O":
+ return ["paths"]
+ elif ref.record_type == "U":
+ return ["sets"]
+ else:
+ return []
diff --git a/gfapy/line/segment/segment.py b/gfapy/line/segment/segment.py
new file mode 100644
index 0000000..8c2b7d4
--- /dev/null
+++ b/gfapy/line/segment/segment.py
@@ -0,0 +1,23 @@
+from ..line import Line
+import gfapy
+import re
+
+class Segment(Line):
+ """
+ Parent class for classes representing segment lines
+ """
+
+ @staticmethod
+ def _subclass(data):
+ n_positionals = len(data)-1
+ for i in range(len(data)-1, 0, -1):
+ if not re.search(r"^..:.:.*$", data[i]):
+ break
+ n_positionals = i-1
+ if n_positionals == 2:
+ return gfapy.line.segment.GFA1
+ elif n_positionals == 3:
+ return gfapy.line.segment.GFA2
+ else:
+ raise gfapy.FormatError("Wrong number of positional fields for "
+ "segment line; GFA1=2, GFA2=3, found={}\n".format(n_positionals))
diff --git a/gfapy/line/segment/writer_wo_sequence.py b/gfapy/line/segment/writer_wo_sequence.py
new file mode 100644
index 0000000..da123cf
--- /dev/null
+++ b/gfapy/line/segment/writer_wo_sequence.py
@@ -0,0 +1,22 @@
+class WriterWoSequence:
+
+ def __str__(self, without_sequence = False):
+ """
+ Parameters
+ ----------
+ without_sequence : bool
+ If **True**, output "*" instead of sequence.
+
+ Returns
+ -------
+ str
+ String representation of the segment.
+ """
+ if not without_sequence:
+ return super().__str__()
+ else:
+ saved = self.sequence
+ self.sequence = "*"
+ retval = super().__str__()
+ self.sequence = saved
+ return retval
diff --git a/gfapy/line/unknown/__init__.py b/gfapy/line/unknown/__init__.py
new file mode 100644
index 0000000..d88c2cf
--- /dev/null
+++ b/gfapy/line/unknown/__init__.py
@@ -0,0 +1 @@
+from .unknown import Unknown
diff --git a/gfapy/line/unknown/unknown.py b/gfapy/line/unknown/unknown.py
new file mode 100644
index 0000000..7ed181d
--- /dev/null
+++ b/gfapy/line/unknown/unknown.py
@@ -0,0 +1,22 @@
+from ..line import Line
+
+class Unknown(Line):
+ """
+ A GFA2 line which was referred to only by G or O lines
+ and has not been found yet (ie is always virtual)
+ """
+
+ RECORD_TYPE = "\n"
+ POSFIELDS = ["name"]
+ DATATYPE = {"name": "identifier_gfa2"}
+ NAME_FIELD = "name"
+ DEPENDENT_LINES = ["sets", "paths"]
+
+ def __str__(self):
+ return "?record_type?\t{}\tco:Z:line_created_by_gfapy".format(self.name)
+
+ @property
+ def virtual(self):
+ return True
+
+Unknown._apply_definitions()
diff --git a/gfapy/lines/__init__.py b/gfapy/lines/__init__.py
new file mode 100644
index 0000000..5c4e0d6
--- /dev/null
+++ b/gfapy/lines/__init__.py
@@ -0,0 +1,2 @@
+from .lines import Lines
+import gfapy.line
diff --git a/gfapy/lines/collections.py b/gfapy/lines/collections.py
new file mode 100644
index 0000000..24d01c0
--- /dev/null
+++ b/gfapy/lines/collections.py
@@ -0,0 +1,321 @@
+import gfapy
+
+class Collections:
+ @property
+ def comments(self):
+ """List of the comment lines (lines starting with #).
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ d = self._records["#"]
+ return list(d.values())
+
+ @property
+ def gaps(self):
+ """List of the gap (G) lines. The list is empty in GFA1.
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ d = self._records["G"]
+ return list(d.values())
+
+ @property
+ def sets(self):
+ """List of the set (U) lines. The list is empty in GFA1.
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ d = self._records["U"]
+ return list(d.values())
+
+ @property
+ def segments(self):
+ """List of the segment (S) lines.
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ d = self._records["S"]
+ return list(d.values())
+
+ @property
+ def edges(self):
+ """List of the edge lines.
+
+ Edge lines are L and C lines in GFA1 and E lines in GFA2.
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ if self._version == "gfa1":
+ return self._gfa1_links + self._gfa1_containments
+ elif self._version == "gfa2":
+ return self._gfa2_edges
+ else:
+ return self._gfa1_links + self._gfa1_containments + self._gfa2_edges
+
+ @property
+ def dovetails(self):
+ """List of the dovetail edge lines.
+
+ Dovetail edge lines are L lines in GFA1 and E lines representing dovetail
+ overlaps in GFA2.
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ if self._version == "gfa1":
+ return self._gfa1_links
+ elif self._version == "gfa2":
+ return [ e for e in self._gfa2_edges if e.is_dovetail() ]
+ else:
+ return self._gfa1_links + \
+ [ e for e in self._gfa2_edges if e.is_dovetail() ]
+
+ @property
+ def containments(self):
+ """List of the containment edge lines.
+
+ Containment edge lines are C lines in GFA1 and E lines representing
+ containments in GFA2.
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ if self._version == "gfa1":
+ return self._gfa1_containments
+ elif self._version == "gfa2":
+ return [ e for e in self._gfa2_edges if e.is_containment() ]
+ else:
+ return self._gfa1_containments + \
+ [ e for e in self._gfa2_edges if e.is_containment() ]
+
+ @property
+ def paths(self):
+ """List of the path lines (P in GFA1, O in GFA2).
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ return self._gfa1_paths + self._gfa2_paths
+
+ @property
+ def fragments(self):
+ """List of the fragment (F) lines. The list is empty in GFA1.
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ d = self._records["F"]
+ return [f for e in d.values() for f in e.values()]
+
+ @property
+ def custom_records(self):
+ """List of the custom records of the GFA. The list is empty for GFA1.
+
+ All records with a non-standard first field (record type) are considered
+ custom records.
+
+ Note:
+ adding or removing elements to the list, does not add or removes
+ lines from the Gfa instance. For this, the add_line() and rm() methods
+ shall be used. Calling disconnect() on a line of the list, however,
+ removes the line from the instance.
+ """
+ cr = []
+ for k in self.custom_record_keys:
+ collection = self._records[k]
+ cr += list(collection.values())
+ return cr
+
+ @property
+ def _gfa1_containments(self):
+ d = self._records["C"]
+ return list(d.values())
+
+ @property
+ def _gfa1_links(self):
+ d = self._records["L"]
+ return list(d.values())
+
+ @property
+ def _gfa2_edges(self):
+ d = self._records["E"]
+ return list(d.values())
+
+ @property
+ def _gfa2_paths(self):
+ d = self._records["O"]
+ return list(d.values())
+
+ @property
+ def gap_names(self):
+ """List of the names of the gap (G) lines. The list is empty in GFA1.
+ """
+ d = self._records["G"]
+ return list([k for k in d.keys() if isinstance(k, str)])
+
+ @property
+ def set_names(self):
+ """List of the names of the set (U) lines. The list is empty in GFA1.
+ """
+ d = self._records["U"]
+ return list([k for k in d.keys() if isinstance(k, str)])
+
+ @property
+ def segment_names(self):
+ """List of the names of the segment (S) lines.
+ """
+ d = self._records["S"]
+ return list(d.keys())
+
+ @property
+ def edge_names(self):
+ """List of the names of the edge (E, L, C) lines.
+
+ For the L and C lines, the content of the custom tag id
+ is taken as name.
+ """
+ if self._version == "gfa1":
+ return self._link_names + self._containment_names
+ elif self._version == "gfa2":
+ return self._gfa2_edge_names
+ else:
+ return self._gfa2_edge_names + self._link_names + self._containment_names
+
+ @property
+ def path_names(self):
+ """List of the names of the path lines (P for GFA1, O for GFA2).
+ """
+ return self._gfa1_path_names + self._gfa2_path_names
+
+ @property
+ def names(self):
+ """All identifiers in the GFA identifiers namespace.
+
+ Notes:
+ GFA1: in Gfapy the P and S namespaces are joined (i.e. paths with
+ the same name as segments are not accepted). Furthermore, to simplify
+ the conversion to/from GFA2, the ID tag is used in L and C lines,
+ and their content is also included in the same namespace as the S/P
+ identifiers. GFA2: the namespace for identifiers is described in
+ the specification and includes all the S, E, G, U and O lines; the
+ external sequence identifiers in F lines are not included.
+ """
+ return self.segment_names + \
+ self.edge_names + \
+ self.gap_names + \
+ self.path_names + \
+ self.set_names
+
+ def unused_name(self):
+ """Compute a GFA identifier not yet in use in the Gfa object."""
+ self._max_int_name += 1
+ return str(self._max_int_name)
+
+ @property
+ def external_names(self):
+ """List of the identifiers of external sequences mentioned in F records.
+ The list is empty in GFA1.
+ """
+ return list(self._records["F"].keys())
+
+ @property
+ def _gfa2_edge_names(self):
+ d = self._records["E"]
+ return list([k for k in d.keys() if isinstance(k, str)])
+
+ @property
+ def _link_names(self):
+ d = self._records["L"]
+ return list([k for k in d.keys() if isinstance(k, str)])
+
+ @property
+ def _containment_names(self):
+ d = self._records["C"]
+ return list([k for k in d.keys() if isinstance(k, str)])
+
+ @property
+ def _gfa2_path_names(self):
+ d = self._records["O"]
+ return list([k for k in d.keys() if isinstance(k, str)])
+
+ @property
+ def _gfa1_paths(self):
+ d = self._records["P"]
+ return list(d.values())
+
+ @property
+ def _gfa1_path_names(self):
+ d = self._records["P"]
+ return list(d.keys())
+
+ GFA1_ONLY_KEYS = ["L", "C", "P"]
+
+ NONCUSTOM_GFA2_KEYS = ["H", "#", "F", "S", "E", "G", "U", "O", "\t"]
+
+ @property
+ def custom_record_keys(self):
+ """Record types of the custom records.
+
+ Returns:
+ list of str
+ """
+ if self._version == "gfa1":
+ return []
+ else:
+ keys = [k for k,v in self._records.items() if v]
+ if self._version == "gfa2":
+ return [k for k in keys if k not in self.NONCUSTOM_GFA2_KEYS]
+ else:
+ return [k for k in keys \
+ if k not in self.NONCUSTOM_GFA2_KEYS and k not in self.GFA1_ONLY_KEYS]
+
+ def custom_records_of_type(self, record_type):
+ """List of custom records of the specified type."""
+ if record_type not in self.custom_record_keys:
+ return []
+ return list(self._records[record_type].values())
+
+ @property
+ def lines(self):
+ """All the lines of the GFA"""
+ return self.comments + \
+ self.headers + \
+ self.segments + \
+ self.edges + \
+ self.paths + \
+ self.sets + \
+ self.gaps + \
+ self.fragments + \
+ self.custom_records
diff --git a/gfapy/lines/creators.py b/gfapy/lines/creators.py
new file mode 100644
index 0000000..7bbcd53
--- /dev/null
+++ b/gfapy/lines/creators.py
@@ -0,0 +1,184 @@
+import gfapy
+
+class Creators:
+
+ def add_line(self, gfa_line):
+ """Add a line to a GFA instance.
+
+ Note:
+ append() is an alias to this method
+
+ Parameters:
+ gfa_line (str, Line): a line instance or a string, containing a line
+ of a GFA file (if a string, a line instance is constructed using
+ the string)
+
+ Raises:
+ gfapy.error.VersionError : If a wrong line type is used, for the GFA
+ version
+ gfapy.error.FormatError : If the content of the line string is
+ not valid
+ """
+ if gfa_line is None:
+ return
+ if self._version == "gfa1":
+ self.__add_line_GFA1(gfa_line)
+ elif self._version == "gfa2":
+ self.__add_line_GFA2(gfa_line)
+ elif self._version is None:
+ self.__add_line_unknown_version(gfa_line)
+ else:
+ raise gfapy.AssertionError("This point should never be reached")
+
+ append = add_line
+
+ def process_line_queue(self):
+ """Process the lines kept by side while parsing GFA of unknown version.
+
+ This method is called after adding the lines, if the GFA version is
+ not specified, at soon as the GFA version becomes clear, from the
+ syntax or type of a line.
+
+ Sometimes it is necessary to call this method, when constructing manually
+ Gfa instances, which are not complete.
+ """
+ if self._version is None:
+ self._version = self._version_guess
+ for i in range(0,len(self._line_queue)):
+ self.add_line(self._line_queue[i])
+ self._line_queue = []
+
+ def _register_line(self, gfa_line):
+ self._api_private_check_gfa_line(gfa_line, "_register_line")
+ storage_key = gfa_line.__class__.STORAGE_KEY
+ if storage_key == "merge":
+ self._records[gfa_line.record_type]._merge(gfa_line)
+ elif storage_key == "name":
+ if gfa_line.record_type not in self._records:
+ self._records[gfa_line.record_type] = {}
+ key = gfa_line.name
+ if gfapy.is_placeholder(key):
+ key = id(gfa_line)
+ elif key.isdigit():
+ keynum = int(key)
+ if keynum > self._max_int_name:
+ self._max_int_name = keynum
+ self._records[gfa_line.record_type][key] = gfa_line
+ elif storage_key == "external":
+ if gfa_line.external.line not in self._records[gfa_line.record_type]:
+ self._records[gfa_line.record_type][gfa_line.external.line] = {}
+ self._records[gfa_line.record_type][\
+ gfa_line.external.line][id(gfa_line)] = gfa_line
+ elif storage_key is None:
+ if gfa_line.record_type not in self._records:
+ self._records[gfa_line.record_type] = {}
+ self._records[gfa_line.record_type][id(gfa_line)] = gfa_line
+
+ def __add_line_unknown_version(self, gfa_line):
+ if isinstance(gfa_line, str):
+ rt = gfa_line[0]
+ elif isinstance(gfa_line, gfapy.Line):
+ rt = gfa_line.record_type
+ else:
+ raise gfapy.ArgumentError(\
+ "Only strings and gfapy.Line instances can be added")
+ if rt == "#":
+ if isinstance(gfa_line, str):
+ gfa_line = gfapy.Line(gfa_line)
+ gfa_line.connect(self)
+ elif rt == "H":
+ if isinstance(gfa_line, str):
+ gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel)
+ self.header._merge(gfa_line)
+ if gfa_line.VN:
+ if gfa_line.VN == "1.0":
+ self._version = "gfa1"
+ elif gfa_line.VN == "2.0":
+ self._version = "gfa2"
+ else:
+ self._version = gfa_line.VN
+ self._version_explanation = "specified in header VN tag"
+ if self._vlevel > 0:
+ self._validate_version()
+ self.process_line_queue()
+ elif rt == "S":
+ if isinstance(gfa_line, str):
+ gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel)
+ self._version = gfa_line.version
+ self._version_explanation = \
+ "implied by: syntax of S {} line".format(gfa_line.name)
+ self.process_line_queue()
+ gfa_line.connect(self)
+ elif rt in ["E", "F", "G", "U", "O"]:
+ self._version = "gfa2"
+ self._version_explanation = "implied by: presence of a {} line".format(rt)
+ if isinstance(gfa_line, str):
+ gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel,
+ version=self._version)
+ self.process_line_queue()
+ gfa_line.connect(self)
+ elif rt in ["L", "C", "P"]:
+ self._version_guess = "gfa1"
+ self._line_queue.append(gfa_line)
+ else:
+ self._line_queue.append(gfa_line)
+
+ def __add_line_GFA1(self, gfa_line):
+ if isinstance(gfa_line, str):
+ if gfa_line[0] == "S":
+ gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel)
+ else:
+ gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel,
+ version="gfa1")
+ elif gfa_line.__class__ in gfapy.Lines.GFA2Specific:
+ raise gfapy.VersionError(
+ "Version: 1.0 ({})\n".format(self._version_explanation)+
+ "Cannot add instance of incompatible line type "+
+ str(type(gfa_line)))
+ if gfa_line.record_type == "H":
+ if self._vlevel > 0 and gfa_line.VN and gfa_line.VN != "1.0":
+ raise gfapy.VersionError(
+ "Header line specified wrong version ({})\n".format(gfa_line.VN)+
+ "Line: {}\n".format(gfa_line)+
+ "File version: 1.0 ({})".format(self._version_explanation))
+ self.header._merge(gfa_line)
+ elif gfa_line.record_type == "S":
+ if gfa_line.version == "gfa2":
+ raise gfapy.VersionError(
+ "Version: 1.0 ({})\n".format(self._version_explanation)+
+ "GFA2 segment found: {}".format(gfa_line))
+ gfa_line.connect(self)
+ elif gfa_line.record_type in ["L", "P", "C", "#"]:
+ gfa_line.connect(self)
+ else:
+ raise gfapy.AssertionError(
+ "Invalid record type {}. This should never happen".format(rt))
+
+ def __add_line_GFA2(self, gfa_line):
+ if isinstance(gfa_line, str):
+ if gfa_line[0] == "S":
+ gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel)
+ else:
+ gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel,
+ version="gfa2")
+ elif gfa_line.__class__ in gfapy.Lines.GFA1Specific:
+ raise gfapy.VersionError(
+ "Version: 2.0 ({})\n".format(self._version_explanation)+
+ "Cannot add instance of incompatible line type "+
+ str(type(gfa_line)))
+ if gfa_line.record_type == "H":
+ if self._vlevel > 0 and gfa_line.VN and gfa_line.VN != "2.0":
+ raise gfapy.VersionError(
+ "Header line specified wrong version ({})\n".format(gfa_line.VN)+
+ "Line: {}\n".format(gfa_line)+
+ "File version: 2.0 ({})".format(self._version_explanation))
+ self.header._merge(gfa_line)
+ elif gfa_line.record_type == "S":
+ if gfa_line.version == "gfa1":
+ raise gfapy.VersionError(
+ "Version: 2.0 ({})\n".format(self._version_explanation)+
+ "GFA1 segment found: {}".format(gfa_line))
+ gfa_line.connect(self)
+ else:
+ gfa_line.connect(self)
+
diff --git a/gfapy/lines/destructors.py b/gfapy/lines/destructors.py
new file mode 100644
index 0000000..7a07618
--- /dev/null
+++ b/gfapy/lines/destructors.py
@@ -0,0 +1,50 @@
+import gfapy
+
+class Destructors:
+
+ def rm(self, gfa_line):
+ """Remove a line from the Gfa instance.
+
+ Parameters:
+ gfa_line (Line, str): if a Line instance, then disconnect is called
+ on it; otherwise, it is assumed to be a line identifier, the
+ corresponding line is searched and then disconnected.
+
+ Raises:
+ gfapy.error.NotFoundError : is the line is specified using an identifier,
+ an no line exists in the Gfa instance, with that identifier
+ """
+ self.try_get_line(gfa_line).disconnect()
+
+ def _delete_other_links(self, segment_end, other_end,
+ conserve_components = False):
+ segment_end = gfapy.SegmentEnd(segment_end)
+ other_end = gfapy.SegmentEnd(other_end)
+ s = self.try_get_segment(segment_end.segment)
+ for d in s.dovetails_of_end(segment_end.end_type):
+ if not conserve_components or not self.is_cut_link(l):
+ l.disconnect()
+
+ def _unregister_line(self, gfa_line):
+ self._api_private_check_gfa_line(gfa_line, "unregister_line")
+ rt = gfa_line.record_type
+ if rt == "H":
+ raise gfapy.AssertionError("Bug found, please report\n"+
+ "gfa_line: {}".format(gfa_line))
+ collection = self._records[rt]
+ key = gfa_line
+ delete_if_empty = None
+ storage_key = gfa_line.__class__.STORAGE_KEY
+ if storage_key == "name":
+ name = gfa_line.name
+ if gfapy.is_placeholder(name):
+ name = id(gfa_line)
+ collection.pop(name)
+ elif storage_key == "external":
+ subkey = gfa_line.external.name
+ collection = collection[subkey]
+ collection.pop(id(gfa_line))
+ if not collection:
+ self._records[rt].pop(subkey)
+ else:
+ collection.pop(id(gfa_line))
diff --git a/gfapy/lines/finders.py b/gfapy/lines/finders.py
new file mode 100644
index 0000000..06cd48b
--- /dev/null
+++ b/gfapy/lines/finders.py
@@ -0,0 +1,131 @@
+import gfapy
+
+class Finders:
+ def segment(self, s):
+ """Search a segment in a GFA.
+
+ If the argument is a line, it is returned. If it is a string,
+ it is used as a segment identifier, and the segment with that identifier
+ is returned. If no segment has the identifier, None is returned.
+ Note that None is also returned if a line of another type (not segment)
+ has the identifier (differently from the line() method, which returns
+ the lines independently from the record type).
+
+ Parameters:
+ l (str, gfapy.Line)
+ """
+ if isinstance(s, gfapy.Line):
+ return s
+ else:
+ return self._records["S"].get(s, None)
+
+ def try_get_segment(self, s):
+ """Call segment() and raise an exception is the segment is not found."""
+ seg = self.segment(s)
+ if seg is None:
+ raise gfapy.NotFoundError("No segment has name {}".format(s))
+ else:
+ return seg
+
+ RECORDS_WITH_NAME = ["E", "S", "P", "U", "G", "O", "\n"]
+
+ def line(self, l):
+ """Search a line in a GFA.
+
+ If the argument is a line, it is returned. If it is a string,
+ it is used as a line identifier, and the line with that identifier
+ is returned. If no line has the identifier, None is returned.
+
+ Parameters:
+ l (str, gfapy.Line)
+ """
+ if gfapy.is_placeholder(l):
+ return None
+ elif isinstance(l, gfapy.Line):
+ return l
+ elif isinstance(l, str):
+ return self.__line_by_name(l)
+ else:
+ return None
+
+ def try_get_line(self, l):
+ """Call line() and raise an exception is the line is not found."""
+ gfa_line = self.line(l)
+ if gfa_line is None:
+ if gfapy.is_placeholder(l):
+ raise gfapy.ValueError(
+ "'*' is a placeholder and not a valid name for a line")
+ else:
+ raise gfapy.NotFoundError(
+ "No line found with ID {}".format(l))
+ return gfa_line
+
+ def fragments_for_external(self, external_id):
+ """List of F lines with a given external ID."""
+ return list(self._records["F"].get(external_id,{}).values())
+
+ def select(self, dict_or_line):
+ """Select all lines which respect a chriterion.
+
+ The chriterion is expressed by the argument, which is either a line
+ instance or a dictionary. If it is a dictionary, it shall contain
+ pairs of fieldnames/values and the method returns all lines
+ where the mentioned fieldnames have the corresponding values.
+ If it is a line, it is compared with the lines of the same type
+ in the Gfa instance and lines with the same field values
+ are returned (undefined placeholder values are thereby not compared).
+ """
+ is_dict = isinstance(dict_or_line, dict)
+ name = dict_or_line.get("name",None) if is_dict else dict_or_line.get("name")
+ if name is not None and not gfapy.is_placeholder(name):
+ collection = [self.__line_by_name(name)]
+ else:
+ if is_dict:
+ record_type = dict_or_line.get("record_type",None)
+ else:
+ record_type = dict_or_line.record_type
+ collection = self.__collection_for_select(record_type)
+ method = "_has_field_values" if is_dict else "_has_eql_fields"
+ return [line for line in collection \
+ if getattr(line, method)(dict_or_line, ["record_type","name"])]
+
+ def _search_duplicate(self, gfa_line):
+ if gfa_line.record_type == "L":
+ return self._search_link(gfa_line.oriented_from, gfa_line.oriented_to,
+ gfa_line.alignment)
+ elif gfa_line.record_type in self.RECORDS_WITH_NAME:
+ return self.line(gfa_line.name)
+ else:
+ return None
+
+ def _search_link(self, orseg1, orseg2, cigar):
+ s = self.segment(orseg1.line)
+ if s is None:
+ return None
+ for l in s.dovetails:
+ if isinstance(l, gfapy.line.edge.Link) and \
+ l.is_compatible(orseg1, orseg2, cigar, True):
+ return l
+ return None
+
+ def __line_by_name(self, name):
+ for rt in self.RECORDS_WITH_NAME:
+ if rt not in self._records:
+ next
+ found = self._records[rt].get(name, None)
+ if found is not None:
+ return found
+ return None
+
+ def __collection_for_select(self, record_type):
+ if record_type is None:
+ return self.lines
+ else:
+ d = self._records[record_type]
+ if record_type == "F":
+ retval = []
+ for v in d.values():
+ retval.extend(list(v.values()))
+ return retval
+ else:
+ return list(d.values())
diff --git a/gfapy/lines/headers.py b/gfapy/lines/headers.py
new file mode 100644
index 0000000..040dfe6
--- /dev/null
+++ b/gfapy/lines/headers.py
@@ -0,0 +1,21 @@
+import gfapy
+
+class Headers:
+ @property
+ def header(self):
+ """The header of the Gfa instance.
+
+ For simplicity of access, all tags are summarized in a single
+ Header instance. If the same tag is defined on different H lines,
+ the values are collected into a FieldArray instance.
+ """
+ return self._records["H"]
+
+ @property
+ def headers(self):
+ """The splitted header of the Gfa instance.
+
+ The header of the Gfa instance, splitted into H lines containing
+ each a single tag.
+ """
+ return self._records["H"]._split()
diff --git a/gfapy/lines/lines.py b/gfapy/lines/lines.py
new file mode 100644
index 0000000..899997e
--- /dev/null
+++ b/gfapy/lines/lines.py
@@ -0,0 +1,39 @@
+import gfapy
+import gfapy.line
+from .collections import Collections
+from .headers import Headers
+from .collections import Collections
+from .creators import Creators
+from .destructors import Destructors
+from .finders import Finders
+
+class Lines(Collections, Creators, Destructors, Finders, Headers):
+
+ GFA1Specific = [
+ gfapy.line.edge.Link,
+ gfapy.line.edge.Containment,
+ gfapy.line.group.Path,
+ gfapy.line.segment.GFA1
+ ]
+
+ GFA2Specific = [
+ gfapy.line.CustomRecord,
+ gfapy.line.Fragment,
+ gfapy.line.Gap,
+ gfapy.line.edge.GFA2,
+ gfapy.line.segment.GFA2,
+ gfapy.line.group.Unordered,
+ gfapy.line.group.Ordered,
+ gfapy.line.Unknown
+ ]
+
+ def _api_private_check_gfa_line(self, gfa_line, callermeth):
+ if not isinstance(gfa_line, gfapy.Line):
+ raise gfapy.TypeError("Note: {} is API private, ".format(callermeth)+
+ "do not call it directly\n"+
+ "Error: line class is {} and not gfapy.Line")
+ elif not gfa_line._gfa is self:
+ raise gfapy.RuntimeError("Note: {} is API private, ".format(callermeth)+
+ "do not call it directly\n"+
+ "Error: line.gfa is not the expected instance of gfapy.Gfa\n"+
+ repr(gfa_line.gfa)+" != "+repr(self))
diff --git a/gfapy/logger.py b/gfapy/logger.py
new file mode 100644
index 0000000..022490f
--- /dev/null
+++ b/gfapy/logger.py
@@ -0,0 +1,185 @@
+import sys
+import gfapy
+import time
+
+class Logger:
+ """
+ Output messages to the standard error or a logfile and
+ keep track of the progress of long running methods.
+
+ Parameters:
+ verbose_level (int) : 0: no logging; >0: the higher, the more logging
+ (default: 1); messages output using the log() method can be provided
+ with a min_verbose_level, and are output only if that value is equal
+ or higher than verbose_level
+ channel : where to output (default: sys.stderr), it must provide a
+ write() method.
+ prefix (str) : output prefix (default: ```#```)
+
+ Returns:
+ gfapy.Logger
+ """
+
+ class ProgressData:
+ """
+ Information about the progress of a computation
+ """
+ def __init__(self, counter, units, partsize, lastpart, total, starttime,
+ strlen):
+ self._counter = counter
+ self._units = units
+ self._partsize = partsize
+ self._lastpart = lastpart
+ self._total = total
+ self._starttime = starttime
+ self._strlen = strlen
+
+ def __init__(self, verbose_level = 1, channel = sys.stderr, prefix = "#"):
+ self._progress = False
+ if not isinstance(verbose_level, int):
+ raise gfapy.ArgumentError("verbose_level must be an Integer")
+ if not(getattr(channel, "write", None) and callable(channel.write)):
+ raise gfapy.TypeError("channel must provide a 'write' method")
+ self._channel = channel
+ self._pfx = prefix
+ self._verbose_level = verbose_level
+ self._data = {}
+
+ def log(self, msg, min_verbose_level=1):
+ """Output a log message to the logger output channel.
+
+ Parameters:
+ msg (str) : message to output
+ min_verbose_level (int) : output the message only if the
+ verbose level of the logger is at least the specified one
+ (default: 1)
+ """
+ if self._verbose_level >= min_verbose_level:
+ self._channel.write("{} {}\n".format(self._pfx, msg))
+
+ def enable_progress(self, part = 0.1):
+ """Enable output of progress of long running methods.
+
+ Parameters
+ part (float between 0 and 1) : if part = 0, output at every call of
+ progress_log(); if 0 < part < 1, output once per part of the total
+ progress (e.g. 0.001 = log every 0.1% progress); if part = 1, output
+ only total elapsed time at the end of the computation.
+ """
+ if part < 0 or part > 1:
+ raise gfapy.ArgumentError("part must be in range [0..1]")
+ self._progress = True
+ self._part = part
+ if self._verbose_level > 0:
+ self._channel.write("{} Progress logging enabled\n".format(self._pfx))
+
+ def disable_progress(self):
+ """Disable output of progress of long running methods."""
+ self._progress = False
+ if self._verbose_level > 0:
+ self._channel.write("{} Progress logging disabled\n".format(self._pfx))
+
+ def progress_init(self, symbol, units, total, initmsg = None):
+ """Initialize progress logging for a long running computation.
+
+ Parameters:
+ symbol (str) : an identifier assigned to the computation
+ units (str) : the name of the units of computation, in plural, for the
+ output messages
+ total (int) : the total number of units of the computation
+ initmsg (str) : an optional message to output at the beginning of the
+ computation
+ """
+ if not self._progress or total == 0:
+ return
+ string = "{} 0.0% {} processed".format(self._pfx, units)
+ self._data[symbol] = Logger.ProgressData(0, units, int(self._part*total),
+ 1, total, time.time(), len(string))
+ if initmsg:
+ self._channel.write("{} {}\n".format(self._pfx, initmsg))
+ if self._part != 1:
+ self._channel.write(string)
+
+ def progress_log(self, symbol, progress=1, **keyargs):
+ """Updates progress of a computation.
+
+ A logging message is output or not, depending on the part parameter
+ (see the `enable_progress` method).
+
+ Parameters:
+ symbol (str) : the identifier assigned to the computation when
+ `progress_init` was called
+ progress (int) : how many units of computations were completed
+ in the last interaction (default: 1)
+ **keyargs (dict) : additional units of computation to display (keys),
+ together with their current progress value (values);
+ (e.g. segments_processed: 10000)
+ """
+ if not self._progress or self._part == 1:
+ return
+ data = self._data.get(symbol, None)
+ if data is None:
+ return
+ data._counter += progress
+ if data._counter == data._total:
+ self.progress_end(symbol)
+ elif data._partsize == 0 or \
+ int(data._counter / data._partsize) > data._lastpart:
+ if data._partsize == 0 and self._part > 0:
+ return
+ # this means total is very small
+ if data._partsize > 0:
+ data._lastpart = data._counter / data._partsize
+ done = data._counter / data._total
+ t = int(time.time() - data._starttime)
+ eta = int((t / done) - t)
+ tstr= ("Elapsed: {:02d}h {:02d}min {:02d}s"
+ .format(t//3600, t//60%60, t%60))
+ etastr = ("ETA: {:02d}h {:02d}min {:02d}s"
+ .format(eta//3600, eta//60%60, eta%60))
+ donestr = "{:.1f}".format(done*100)
+ keystr = "".join([ "; {}: {}".format(k, v) for k,v in keyargs.items()])
+ string = "{} {}% {} processed " \
+ .format(self._pfx, donestr, data._units) + \
+ "[{}; {}{}]" \
+ .format(tstr, etastr, keystr)
+ if len(string) > data._strlen:
+ data._strlen = len(string)
+ spacediff = ""
+ else:
+ spacediff = " "*(data._strlen-len(string))
+ self._channel.write("\r{}{}".format(string, spacediff))
+ self._channel.flush()
+
+ def progress_end(self, symbol, **keyargs):
+ """Completes progress logging for a computation.
+
+ A message is always output. The progress is set to 100%.
+
+ Parameters:
+ symbol (str) : the identifier assigned to the computation when
+ `progress_init` was called
+ **keyargs (dict) : additional units of computation to display (keys),
+ together with their final value (values);
+ (e.g. segments_processed: 100000)
+ """
+ if not self._progress:
+ return
+ data = self._data.get(symbol, None)
+ if data is None:
+ return
+ t = int(time.time() - data._starttime)
+ tstr= ("Elapsed time: {:02d}h {:02d}min {:02d}s"
+ .format(t//3600, t//60%60, t%60))
+ quantity = str(data._total) if self._part == 1 else "100.0%"
+ keystr = "".join([ "; {}: {}".format(k, v) for k,v in keyargs.items()])
+ string = "{} {} {} processed " \
+ .format(self._pfx, quantity, data._units) + \
+ "[{}{}]" \
+ .format(tstr, keystr)
+ spacediff = " " * (max(data._strlen - len(string),0))
+ if self._part != 1:
+ self._channel.write("\r")
+ self._channel.write("{}{}\n".format(string, spacediff))
+ self._channel.flush()
+ self._data.pop(symbol)
diff --git a/gfapy/numeric_array.py b/gfapy/numeric_array.py
new file mode 100644
index 0000000..442100b
--- /dev/null
+++ b/gfapy/numeric_array.py
@@ -0,0 +1,215 @@
+import gfapy
+
+class NumericArray(list):
+ """
+ A numeric array representable using the data type B of the GFA specification.
+ """
+
+ SIGNED_INT_SUBTYPE = ["c", "s", "i"]
+ """
+ Subtypes for signed integers, from the smallest to the largest
+ """
+
+ UNSIGNED_INT_SUBTYPE = [ st.upper() for st in SIGNED_INT_SUBTYPE ]
+ """
+ Subtypes for unsigned integers, from the smallest to the largest
+ """
+
+ INT_SUBTYPE = UNSIGNED_INT_SUBTYPE + SIGNED_INT_SUBTYPE
+ """
+ Subtypes for integers
+ """
+
+ FLOAT_SUBTYPE = ["f"]
+ """
+ Subtypes for floats
+ """
+
+ SUBTYPE = INT_SUBTYPE + FLOAT_SUBTYPE
+ """
+ Subtypes
+ """
+
+ SUBTYPE_BITS = {"c" : 8, "s" : 16, "i" : 32}
+ """
+ Number of bits of unsigned integer subtypes
+ """
+
+ SUBTYPE_RANGE = {
+ "C" : (0, 2**8),
+ "S" : (0, 2**16),
+ "I" : (0, 2**32),
+ "c" : (-(2**(8 - 1)), 2**(8 - 1)),
+ "s" : (-(2**(16 - 1)), 2**(16 - 1)),
+ "i" : (-(2**(32 - 1)), 2**(32 - 1))
+ }
+ """
+ Range for integer subtypes
+ (Python-style, i.e. range[1] is not included)
+ """
+
+ def validate(self):
+ """
+ Validate the numeric array
+
+ Raises
+ ------
+ gfapy.ValueError
+ If the array is not valid
+ """
+ self.compute_subtype()
+
+ def compute_subtype(self):
+ """
+ Computes the subtype of the array from its content.
+
+ If all elements are float, then the computed subtype is "f".
+ If all elements are integer, the smallest possible numeric subtype
+ is computed; thereby,
+ if all elements are non-negative, an unsigned subtype is selected,
+ otherwise a signed subtype.
+ In all other cases an exception is raised.
+
+ Raises
+ ------
+ gfapy.ValueError
+ If the array is not a valid numeric array
+
+ Returns
+ -------
+ one of gfapy.NumericArray.SUBTYPE
+ """
+ if all([ isinstance(f, float) for f in self]):
+ return "f"
+ else:
+ e_max = None
+ e_min = None
+ for e in self:
+ if not isinstance(e, int):
+ raise gfapy.ValueError(
+ "NumericArray does not contain homogenous numeric values\n"+
+ "Content: {}".format(repr(self)))
+ if (e_max is None or e > e_max): e_max = e
+ if (e_min is None or e < e_min): e_min = e
+ return gfapy.NumericArray.integer_type((e_min,e_max))
+
+ @staticmethod
+ def integer_type(range):
+ """
+ Computes the subtype for integers in a given range.
+
+ If all elements are non-negative, an unsigned subtype is selected,
+ otherwise a signed subtype.
+
+ Parameters
+ ----------
+ range : (int, int)
+ The integer range (min, max)
+
+ Raises
+ ------
+ gfapy.ValueError
+ If the integer range is outside all subtype ranges
+
+ Returns
+ -------
+ one of gfapy.NumericArray.INT_SUBTYPE
+ subtype code
+ """
+ if range[0] < 0:
+ for st in NumericArray.SIGNED_INT_SUBTYPE:
+ st_range = NumericArray.SUBTYPE_RANGE[st]
+ if st_range[0] <= range[0] and st_range[1] > range[1]:
+ return st
+ else:
+ for st in NumericArray.UNSIGNED_INT_SUBTYPE:
+ st_range = NumericArray.SUBTYPE_RANGE[st]
+ if st_range[1] > range[1]:
+ return st
+ raise gfapy.ValueError(
+ "NumericArray: values are outside of all integer subtype ranges\n"+
+ "Range: {}".format(repr(range)))
+
+ def __str__(self):
+ """
+ GFA datatype B representation of the numeric array
+
+ Raises
+ ------
+ gfapy.ValueError
+ if the array is not a valid numeric array
+
+ Returns
+ -------
+ str
+ """
+ subtype = self.compute_subtype()
+ return "{},{}".format(subtype, ",".join([str(v) for v in self]))
+
+ def _default_gfa_tag_datatype(self):
+ """
+ GFA tag datatype to use, if none is provided
+
+ Returns
+ -------
+ one of gfapy.Field.TAG_DATATYPE
+ """
+ return "B"
+
+ @classmethod
+ def from_string(cls, string, valid = False):
+ """
+ Create a numeric array from a string
+
+ Parameters
+ ----------
+ string : str
+ valid : optional bool
+ *(default:* **False** *)*
+ If **False**, validate the range of the numeric values, according
+ to the array subtype. If **True** the string is guaranteed to be valid.
+
+ Raises
+ ------
+ gfapy.ValueError
+ If any value is not compatible with the subtype.
+ gfapy.TypeError
+ If the subtype code is invalid.
+
+ Returns
+ -------
+ gfapy.NumericArray
+ The numeric array
+ """
+ if not valid:
+ if len(string) == 0:
+ raise gfapy.FormatError("Numeric array string shall not be empty")
+ if string[-1] == ",":
+ raise gfapy.FormatError("Numeric array string ends with comma\n"+
+ "String: {}".format(string))
+ elems = string.split(",")
+ subtype = elems[0]
+ if subtype not in NumericArray.SUBTYPE:
+ raise gfapy.TypeError("Subtype {} unknown".format(subtype))
+ if subtype != "f":
+ range = NumericArray.SUBTYPE_RANGE[subtype]
+ def gen():
+ for e in elems[1:]:
+ if subtype != "f":
+ try:
+ e = int(e)
+ except:
+ raise gfapy.ValueError("Value is not valid: {}\n".format(e)+
+ "Numeric array string: {}".format(string))
+ if not valid and not (e >= range[0] and e < range[1]):
+ raise gfapy.ValueError((
+ "NumericArray: "+
+ "value is outside of subtype {0} range\n"+
+ "Value: {1}\n"+
+ "Range: {2}\n"+
+ "Content: {3}").format(subtype, e,
+ repr(range), repr(array)))
+ yield e
+ else:
+ yield float(e)
+ return cls(list(gen()))
diff --git a/gfapy/oriented_line.py b/gfapy/oriented_line.py
new file mode 100644
index 0000000..86e49c9
--- /dev/null
+++ b/gfapy/oriented_line.py
@@ -0,0 +1,176 @@
+import gfapy
+import re
+
+class OrientedLine:
+ """A line plus an orientation.
+
+ The line can be an instance of `~gfapy.line.line.Line` or a string (line
+ identifier). The orientation is a string, either ``'+'`` or ``'-'``.
+ Methods not defined in this class are delegated to the line element.
+
+ Parameters:
+ value (str, list, OrientedLine) : a line identifier with a 1-letter
+ orientation suffix + or -, or a list of two elements (identifier
+ or line instance and orientation string), or an OrientedLine instance
+
+ Returns:
+ OrientedLine: if value is an OrientedLine, then
+ it is returned; if it is a string, then an OrientedLine where line
+ is a string (the string without the last char, which is the orientation);
+ if it is a list, then an OrientedLine where line is the first element,
+ orientation the second
+ """
+
+ def __new__(cls, *args):
+ if isinstance(args[0], OrientedLine):
+ return args[0]
+ else:
+ new_instance = object.__new__(cls)
+ return new_instance
+
+ def __init__(self, *args):
+ if len(args) == 1:
+ if isinstance(args[0], OrientedLine):
+ return
+ elif isinstance(args[0], str):
+ self.__line = args[0][0:-1]
+ self.__orient = args[0][-1]
+ elif isinstance(args[0], list):
+ self.__line = args[0][0]
+ self.__orient = args[0][1]
+ else:
+ raise gfapy.ArgumentError("Cannot create an OrientedLine"+
+ " instance from an object of type {}".format(type(args[0])))
+ elif len(args) == 2:
+ self.__line = args[0]
+ self.__orient = args[1]
+ else:
+ raise gfapy.ArgumentError("Wrong number of arguments for OrientedLine()")
+ self.__editable = True
+
+ @property
+ def line(self):
+ """The line.
+
+ Returns:
+ str or `~gfapy.line.line.Line`
+ """
+ return self.__line
+
+ @property
+ def orient(self):
+ """The orientation.
+
+ Returns:
+ str : '+' or '-'
+ """
+ return self.__orient
+
+ @line.setter
+ def line(self, line):
+ if self.__editable:
+ self.__line = line
+ else:
+ raise gfapy.RuntimeError(
+ "gfapy.OrientedLine instance cannot be edited ({})".format(self))
+
+ @orient.setter
+ def orient(self, orient):
+ if self.__editable:
+ self.__orient = orient
+ else:
+ raise gfapy.RuntimeError(
+ "gfapy.OrientedLine instance cannot be edited ({})".format(self))
+
+ @property
+ def name(self):
+ """The name of the line.
+
+ Returns:
+ str : if line is a string, then line; if it is a line instance,
+ then line.name
+ """
+ if isinstance(self.__line, str):
+ return self.__line
+ else:
+ return self.__line.name
+
+ def validate(self):
+ """Validate the content of the instance
+
+ Raises:
+ gfapy.error.ValueError: if the orientation is invalid
+ gfapy.error.TypeError: if the line is not a string or a
+ `gfapy.line.line.Line` instance
+ gfapy.error.FormatError: if the line is a string which is not a valid
+ line identifier, or it is a Line instance with an invalid name
+ """
+ self.__validate_line()
+ self.__validate_orient()
+ return None
+
+ def inverted(self):
+ """An oriented line with the same line element, but inverted orientation.
+
+ Note:
+ the inverted() method returns an OrientedLine with inverted orientation;
+ the invert() method inverts the orientation in place (and returns None)
+ """
+ return OrientedLine(self.line, gfapy.invert(self.orient))
+
+ def invert(self):
+ """Invert the orientation of the OrientedLine instance.
+
+ Note:
+ the inverted() method returns an OrientedLine with inverted orientation;
+ the invert() method inverts the orientation in place (and returns None)
+ """
+ self.orient = gfapy.invert(self.orient)
+
+ def __str__(self):
+ if self.name:
+ return "{}{}".format(self.name, self.orient)
+ else:
+ return "({}){}".format(str(self.line), self.orient)
+
+ def __repr__(self):
+ return "gfapy.OrientedLine({},{})".format(repr(self.line),repr(self.orient))
+
+ def __eq__(self, other):
+ if isinstance(other, OrientedLine):
+ pass
+ elif isinstance(other, list):
+ other = OrientedLine(other)
+ elif isinstance(other, str):
+ other = OrientedLine(other)
+ else:
+ return False
+ return (self.name == other.name) and (self.orient == other.orient)
+
+ # Delegate methods to the line
+ def __getattr__(self, name):
+ return getattr(self.__line, name)
+
+ def _block(self):
+ self.__editable = False
+
+ def _unblock(self):
+ self.__editable = True
+
+ def __validate_orient(self):
+ if not self.orient in ["+", "-"]:
+ raise gfapy.ValueError("Invalid orientation ({})".format(self.orient))
+
+ def __validate_line(self):
+ if isinstance(self.line, gfapy.Line):
+ string = self.line.name
+ elif isinstance(self.line, str):
+ string = self.line
+ else:
+ raise gfapy.TypeError(
+ "Invalid class ({}) for line reference ({})"
+ .format(self.line.__class__, self.line))
+ if not re.match(r"^[!-~]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid GFA identifier\n".format(repr(string))+
+ "(it contains spaces or non-printable characters)")
diff --git a/gfapy/placeholder.py b/gfapy/placeholder.py
new file mode 100644
index 0000000..86f0e47
--- /dev/null
+++ b/gfapy/placeholder.py
@@ -0,0 +1,103 @@
+import gfapy
+
+class Placeholder:
+ """
+ A placeholder is used in mandatory fields when a value is not specified.
+
+ Its string representation is an asterix ``*``.
+ """
+
+ def __str__(self):
+ return "*"
+
+ def __repr__(self):
+ return "gfapy.Placeholder()"
+
+ def __bool__(self):
+ return False
+
+ def complement(self):
+ return self
+
+ def is_empty(self):
+ """
+ A placeholder is always empty.
+
+ Returns
+ -------
+ True : bool
+ Returns always **True**
+ """
+ return True
+
+ def validate(self, *args, **keyargs):
+ """
+ A placeholder is always valid.
+ """
+ return None
+
+ def rc(self):
+ """
+ For compatibility with String#rc (gfapy.sequence module).
+
+ Returns
+ -------
+ self : Placeholder
+ """
+ return self
+
+ def __len__(self):
+ """
+ Length/size of a placeholder is always 0.
+
+ Returns
+ -------
+ 0 : int
+ Returns always 0
+ """
+ return 0
+
+ def __getitem__(self, key):
+ """
+ Any cut of the placeholder returns the placeholder itself.
+
+ Parameters
+ ----------
+ key
+ Ignored
+
+ Returns
+ -------
+ self : Placeholder
+ """
+ return self
+
+ def __add__(self, other):
+ """
+ Adding the placeholder to anything returns the placeholder itself.
+
+ Parameters
+ ----------
+ other
+ Ignored
+
+ Returns
+ -------
+ self : Placeholder
+ """
+ return self
+
+ def __eq__(self, other):
+ return gfapy.is_placeholder(other)
+
+def is_placeholder(object):
+ if object is Placeholder:
+ return True
+ elif object is None:
+ return True
+ elif object == "*":
+ return True
+ elif isinstance(object, list) and len(object) == 0:
+ return True
+ else:
+ return False
diff --git a/gfapy/segment_end.py b/gfapy/segment_end.py
new file mode 100644
index 0000000..3a8d621
--- /dev/null
+++ b/gfapy/segment_end.py
@@ -0,0 +1,147 @@
+import gfapy
+import re
+
+class SegmentEnd:
+ """A segment plus an end type (L or R).
+
+ The ``segment`` can be an instance of a segment subclass of
+ `~gfapy.line.line.Line` or a string (line identifier). The ``end_type``
+ symbol is a string, either ``'L'`` or ``'R'``. Methods not defined in this
+ class are delegated to the segment element.
+
+ Parameters:
+ value (str, list, SegmentEnd) : a line identifier with a 1-letter
+ end symbol L or R, or a list of two elements (identifier
+ or line instance and end symbol), or an SegmentEnd instance
+
+ Returns:
+ SegmentEnd: if value is an SegmentEnd, then
+ it is returned; if it is a string, then an SegmentEnd where line
+ is a string (the string without the last char, which is the end symbol);
+ if it is a list, then an SegmentEnd where line is the first element,
+ end symbol the second
+ """
+
+ def __new__(cls, *args):
+ if isinstance(args[0], SegmentEnd):
+ return args[0]
+ else:
+ new_instance = object.__new__(cls)
+ return new_instance
+
+ def __init__(self, *args):
+ if len(args) == 1:
+ if isinstance(args[0], SegmentEnd):
+ return
+ elif isinstance(args[0], str):
+ self.__segment = args[0][0:-1]
+ self.__end_type = args[0][-1]
+ elif isinstance(args[0], list):
+ if len(args[0]) != 2:
+ raise gfapy.ArgumentError("Cannot create a SegmentEnd "+
+ " from a list of size {}".format(len(args[0])))
+ self.__segment = args[0][0]
+ self.__end_type = args[0][1]
+ else:
+ raise gfapy.ArgumentError("Cannot create an SegmentEnd "+
+ " from an object of type {}".format(type(args[0])))
+ elif len(args) == 2:
+ self.__segment = args[0]
+ self.__end_type = args[1]
+ else:
+ raise gfapy.ArgumentError("Wrong number of arguments for SegmentEnd()")
+
+ def validate(self):
+ """Validate the content of the instance
+
+ Raises:
+ gfapy.error.ValueError: if the orientation is invalid
+ gfapy.error.TypeError: if the segment is not a string or
+ an instance of a segment subclass of `gfapy.line.line.Line`
+ gfapy.error.FormatError: if the segment is a string which is not a valid
+ segment identifier, or it is a segment Line instance with an invalid
+ name
+ """
+ self.__validate_segment()
+ self.__validate_end_type()
+ return None
+
+ def __validate_end_type(self):
+ if not self.__end_type in ["L", "R"]:
+ raise gfapy.ValueError(
+ "Invalid end type ({})".format(repr(self.__end_type)))
+
+ def __validate_segment(self):
+ if isinstance(self.segment, gfapy.line.Segment):
+ string = self.segment.name
+ elif isinstance(self.segment, str):
+ string = self.segment
+ else:
+ raise gfapy.TypeError(
+ "Invalid class ({}) for segment reference ({})"
+ .format(self.segment.__class__, self.segment))
+ if not re.match(r"^[!-~]+$", string):
+ raise gfapy.FormatError(
+ "{} is not a valid segment identifier\n".format(repr(string))+
+ "(it contains spaces or non-printable characters)")
+
+ @property
+ def segment(self):
+ """The segment.
+
+ Returns:
+ str or `gfapy.line.segment.gfa1.GFA1` or `gfapy.line.segment.gfa2.GFA2`
+ """
+ return self.__segment
+
+ @segment.setter
+ def segment(self, value):
+ self.__segment=value
+
+ @property
+ def name(self):
+ """The name of the segment.
+
+ Returns:
+ str : if segment is a string, then segment; if it is a segment instance,
+ then segment.name
+ """
+ if isinstance(self.__segment, gfapy.Line):
+ return self.__segment.name
+ else:
+ return str(self.__segment)
+
+ @property
+ def end_type(self):
+ """The end type.
+
+ Returns:
+ str : 'L' or 'R'
+ """
+ return self.__end_type
+
+ @end_type.setter
+ def end_type(self, value):
+ self.__end_type = value
+
+ def inverted(self):
+ return SegmentEnd(self.__segment, gfapy.invert(self.end_type))
+
+ def __str__(self):
+ return "{}{}".format(self.name, self.end_type)
+
+ def __repr__(self):
+ return "gfapy.SegmentEnd({},{})".format(repr(self.segment),
+ repr(self.end_type))
+
+ def __eq__(self, other):
+ if isinstance(other, list):
+ other = SegmentEnd(other)
+ elif isinstance(other, str):
+ other = SegmentEnd(other)
+ elif not isinstance(other, gfapy.SegmentEnd):
+ return False
+ return (self.name == other.name) and (self.end_type == other.end_type)
+
+ def __getattr__(self, name):
+ return getattr(self.__segment, name)
diff --git a/gfapy/segment_end_path.py b/gfapy/segment_end_path.py
new file mode 100644
index 0000000..d5779f5
--- /dev/null
+++ b/gfapy/segment_end_path.py
@@ -0,0 +1,19 @@
+class SegmentEndsPath(list):
+ """
+ a list containing {gfapy.SegmentEnd} elements, which defines a path
+ in the graph
+ """
+
+ def reverse(self):
+ """
+ Reverses the direction of the path in place
+ """
+ self[:] = list(reversed(self))
+
+ def __reversed__(self):
+ """
+ Iterator over the reverse-direction path
+ """
+ for elem in SegmentEndsPath(reversed([segment_end.inverted()
+ for segment_end in self])):
+ yield elem
diff --git a/gfapy/sequence.py b/gfapy/sequence.py
new file mode 100644
index 0000000..5812b4e
--- /dev/null
+++ b/gfapy/sequence.py
@@ -0,0 +1,62 @@
+"""
+Methods for processing strings as nucleotidic sequences
+"""
+import gfapy
+
+def rc(sequence, valid = False, rna = False):
+ """Compute the reverse complement of a nucleotidic sequence.
+
+ All characters in the IUPAC extended alphabet are supported
+ (ACGTUBVHDRYKMSWN). The character ".-=", spaces and newlines
+ are left as they are. The case of each character is preserved.
+
+ Returns
+ str : reverse complement, without newlines and spaces;
+ "*" if string is "*"
+
+ Parameters:
+ sequence (str) : the sequence to reverse-complement
+ valid (bool) : if True, the reverse complement of any invalid character
+ is the character itself
+ rna (bool) : if True, t/T are substituted by u/U in the output
+
+ Raises:
+ gfapy.error.ValueError : if valid is False and an invalid character
+ (not in the IUPAC extended alphabet for nucleotide sequences, .-=,
+ spaces or newline) is found
+ """
+ if gfapy.is_placeholder(sequence): return sequence
+ def fun(c):
+ wcc = WCC.get(c, c if valid else None)
+ if not wcc:
+ raise gfapy.ValueError("{}: no Watson-Crick complement for {}".format(sequence, c))
+ return wcc
+ retval = "".join(reversed([ fun(c) for c in sequence ]))
+ if rna:
+ retval = retval.translate(str.maketrans("tT", "uU"))
+ return retval
+
+WCC = {"a":"t","t":"a","A":"T","T":"A",
+ "c":"g","g":"c","C":"G","G":"C",
+ "b":"v","B":"V","v":"b","V":"B",
+ "h":"d","H":"D","d":"h","D":"H",
+ "R":"Y","Y":"R","r":"y","y":"r",
+ "K":"M","M":"K","k":"m","m":"k",
+ "S":"S","s":"s","w":"w","W":"W",
+ "n":"n","N":"N","u":"a","U":"A",
+ "-":"-",".":".","=":"=",
+ " ":"","\n":""}
+"""Watson-Crick Complements"""
+
+def Sequence(string):
+ """Parses the content of a sequence field.
+
+ Parameters:
+ string (str) : content of a sequence field
+
+ Returns:
+ str, gfapy.Placeholder : if the string is the placeholder
+ symbol ``*`` then a placeholder, otherwise the string
+ itself
+ """
+ return gfapy.Placeholder() if (string == "*") else string
diff --git a/gfapy/symbol_invert.py b/gfapy/symbol_invert.py
new file mode 100644
index 0000000..3ca0bed
--- /dev/null
+++ b/gfapy/symbol_invert.py
@@ -0,0 +1,25 @@
+import gfapy
+
+def invert(symbol):
+ """Computes the inverted orientation or end_type symbol.
+
+ Parameters:
+ symbol (str) : a one-character string, symbolizing an orientation (+ or -)
+ or an end-type (L or R)
+
+ Returns:
+ str : the other one character string of the same category (e.g. - for +)
+
+ Raises:
+ gfapy.error.ValueError : if a string other than the mentioned ones is used
+ """
+ if symbol == "+":
+ return "-"
+ elif symbol == "-":
+ return "+"
+ elif symbol == "L":
+ return "R"
+ elif symbol == "R":
+ return "L"
+ else:
+ raise gfapy.ValueError("No inverse defined for {}".format(symbol))
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..dd38343
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,2 @@
+[bdist_wheel]
+python-tag = py3
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..f8be775
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,41 @@
+from setuptools import setup, find_packages
+
+def readme():
+ with open('README.rst') as f:
+ return f.read()
+
+import sys
+if not sys.version_info[0] == 3:
+ sys.exit("Sorry, only Python 3 is supported")
+
+setup(name='gfapy',
+ version='1.0.0',
+ description='Library for handling data in the GFA1 and GFA2 formats',
+ long_description=readme(),
+ url='https://github.com/ggonnella/gfapy',
+ keywords="bioinformatics genomics sequences GFA assembly graphs",
+ author='Giorgio Gonnella and others (see CONTRIBUTORS)',
+ author_email='gonnella at zbh.uni-hamburg.de',
+ license='ISC',
+ # see https://pypi.python.org/pypi?%3Aaction=list_classifiers
+ classifiers=[
+ 'Development Status :: 5 - Production/Stable',
+ 'Environment :: Console',
+ 'Intended Audience :: Developers',
+ 'Intended Audience :: End Users/Desktop',
+ 'Intended Audience :: Science/Research',
+ 'License :: OSI Approved :: ISC License (ISCL)',
+ 'Operating System :: MacOS :: MacOS X',
+ 'Operating System :: POSIX :: Linux',
+ 'Programming Language :: Python :: 3 :: Only',
+ 'Topic :: Scientific/Engineering :: Bio-Informatics',
+ 'Topic :: Software Development :: Libraries',
+ ],
+ packages=find_packages(),
+ scripts=['bin/gfapy-convert','bin/gfapy-validate',
+ 'bin/gfapy-mergelinear'],
+ zip_safe=False,
+ test_suite="nose.collector",
+ include_package_data=True,
+ tests_require=['nose'],
+ )
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/extension.py b/tests/extension.py
new file mode 100644
index 0000000..e9c947a
--- /dev/null
+++ b/tests/extension.py
@@ -0,0 +1,50 @@
+import gfapy
+import re
+from collections import OrderedDict
+
+class Taxon(gfapy.Line):
+ RECORD_TYPE = "T"
+ POSFIELDS = OrderedDict([("tid","identifier_gfa2")])
+ TAGS_DATATYPE = {"UL":"Z"}
+ NAME_FIELD = "tid"
+
+Taxon.register_extension()
+
+class MetagenomicAssignment(gfapy.Line):
+ RECORD_TYPE = "M"
+ POSFIELDS = OrderedDict([("mid","optional_identifier_gfa2"),
+ ("tid","identifier_gfa2"),
+ ("sid","identifier_gfa2")])
+ TAGS_DATATYPE = {"SC":"i"}
+ NAME_FIELD = "mid"
+
+MetagenomicAssignment.register_extension(references=
+ [("sid", gfapy.line.segment.GFA2, "metagenomic_assignments"),
+ ("tid", Taxon, "metagenomic_assignments")])
+
+class TaxonID:
+
+ def validate_encoded(string):
+ if not re.match(r"^taxon:(\d+)$",string) and \
+ not re.match(r"^[a-zA-Z0-9_]+$", string):
+ raise gfapy.ValueError("Invalid taxon ID: {}".format(string))
+
+ def decode(string):
+ TaxonID.validate_encoded(string)
+ return string
+
+ def validate_decoded(obj):
+ if isinstance(obj,Taxon):
+ TaxonID.validate_encoded(obj.name)
+ else:
+ raise gfapy.TypeError(
+ "Invalid type for taxon ID: "+"{}".format(repr(obj)))
+
+ def encode(obj):
+ TaxonID.validate_decoded(obj)
+ return obj
+
+gfapy.Field.register_datatype("taxon_id", TaxonID)
+
+Taxon.DATATYPE["tid"] = "taxon_id"
+MetagenomicAssignment.DATATYPE["tid"] = "taxon_id"
diff --git a/tests/test_api_alignment.py b/tests/test_api_alignment.py
new file mode 100644
index 0000000..49729af
--- /dev/null
+++ b/tests/test_api_alignment.py
@@ -0,0 +1,188 @@
+import unittest
+from copy import deepcopy
+import gfapy
+
+class TestApiAlignment(unittest.TestCase):
+
+ cigar_1 = gfapy.CIGAR([
+ gfapy.CIGAR.Operation(12,"M"),
+ gfapy.CIGAR.Operation(1,"D"),
+ gfapy.CIGAR.Operation(2,"I"),
+ gfapy.CIGAR.Operation(0,"M"),
+ gfapy.CIGAR.Operation(1,"P")])
+ cigar_1_s = "12M1D2I0M1P"
+
+ cigar_gfa1_1_s = "1S2M3I4=5X6D7P8N9H"
+ cigar_gfa1_1_c_s = "9H8I7P6I5X4=3D2M1D"
+ cigar_gfa1_1_rlen = 2+4+5+6+8
+ cigar_gfa1_1_qlen = 1+2+3+4+5
+
+ cigar_gfa2_1_s = "1M2I3D4P"
+ cigar_gfa2_1_c_s = "4P3I2D1M"
+ cigar_gfa2_1_rlen = 1+3
+ cigar_gfa2_1_qlen = 1+2
+
+ trace_1 = gfapy.Trace([12,12,0])
+ trace_1_s = "12,12,0"
+
+ cigar_invalid_value_1 = gfapy.CIGAR([
+ gfapy.CIGAR.Operation(-12,"M"),
+ gfapy.CIGAR.Operation(1,"D"),
+ gfapy.CIGAR.Operation(2,"I")])
+ cigar_invalid_value_2 = gfapy.CIGAR([
+ gfapy.CIGAR.Operation(12, "Y"),
+ gfapy.CIGAR.Operation(1,"D"),
+ gfapy.CIGAR.Operation(2,"I")])
+ cigar_invalid_type_1 = gfapy.CIGAR([
+ "x",
+ gfapy.CIGAR.Operation(1,"D"),
+ gfapy.CIGAR.Operation(2,"I")])
+
+ trace_invalid_value_1 = gfapy.Trace([-2,1,12])
+ trace_invalid_type_1 = gfapy.Trace([12.0,1,12])
+
+ cigar_empty = gfapy.CIGAR([])
+ trace_empty = gfapy.Trace([])
+ placeholder = gfapy.AlignmentPlaceholder()
+ placeholder_s = "*"
+
+ string_invalid = [
+ "-12M1D2I", "12Y1D2I", "x1D2I",
+ "-2,1,12", "12.0,1,12", "*x",
+ ]
+
+ cigar_op_1 = gfapy.CIGAR.Operation(1,"D")
+ cigar_op_1_s = "1D"
+ cigar_op_1_len = 1
+ cigar_op_1_code = "D"
+ cigar_op_2 = gfapy.CIGAR.Operation(2,"I")
+ cigar_op_2_s = "2I"
+ cigar_op_2_len = 2
+ cigar_op_2_code = "I"
+
+ def test_to_s(self):
+ self.assertEqual(TestApiAlignment.cigar_1_s, str(TestApiAlignment.cigar_1_s))
+ self.assertEqual(TestApiAlignment.cigar_1_s, str(TestApiAlignment.cigar_1))
+ self.assertEqual(TestApiAlignment.trace_1_s, str(TestApiAlignment.trace_1))
+ self.assertEqual(TestApiAlignment.placeholder_s, str(TestApiAlignment.placeholder))
+ self.assertEqual(TestApiAlignment.placeholder_s, str(TestApiAlignment.cigar_empty))
+ self.assertEqual(TestApiAlignment.placeholder_s, str(TestApiAlignment.trace_empty))
+
+ def test_cigar_clone(self):
+ cigar1_clone = deepcopy(gfapy.Alignment(TestApiAlignment.cigar_1))
+ self.assertEqual(TestApiAlignment.cigar_1_s, str(cigar1_clone))
+ cigar1_clone[0].code = "="
+ # copy is deep, only the clone has changed:
+ self.assertNotEqual(TestApiAlignment.cigar_1_s, str(cigar1_clone))
+ self.assertEqual(TestApiAlignment.cigar_1_s, str(TestApiAlignment.cigar_1))
+
+ def test_to_alignment(self):
+ self.assertEqual(TestApiAlignment.cigar_1, gfapy.Alignment(TestApiAlignment.cigar_1_s))
+ self.assertEqual(TestApiAlignment.trace_1, gfapy.Alignment(TestApiAlignment.trace_1_s))
+ self.assertEqual(TestApiAlignment.placeholder, gfapy.Alignment(TestApiAlignment.placeholder_s))
+ for alignment in [TestApiAlignment.cigar_1, TestApiAlignment.trace_1, TestApiAlignment.cigar_empty,
+ TestApiAlignment.trace_empty, TestApiAlignment.placeholder]:
+ self.assertEqual(alignment, gfapy.Alignment(alignment))
+ for string in TestApiAlignment.string_invalid:
+ self.assertRaises(gfapy.FormatError, gfapy.Alignment, string)
+
+ def test_decode_encode_invariant(self):
+ for string in [TestApiAlignment.trace_1_s, TestApiAlignment.cigar_1_s, TestApiAlignment.placeholder_s]:
+ self.assertEqual(string, str(gfapy.Alignment(string)))
+
+ def test_is_placeholder(self):
+ for a in [TestApiAlignment.cigar_empty, TestApiAlignment.trace_empty, TestApiAlignment.placeholder, TestApiAlignment.placeholder_s]:
+ assert(gfapy.is_placeholder(a))
+ for a in [TestApiAlignment.cigar_1, TestApiAlignment.cigar_1_s, TestApiAlignment.trace_1, TestApiAlignment.trace_1_s]:
+ assert(not gfapy.is_placeholder(a))
+
+ def test_validate(self):
+ TestApiAlignment.trace_1.validate() # nothing raised
+ TestApiAlignment.trace_empty.validate() # nothing raised
+ TestApiAlignment.cigar_1.validate() # nothing raised
+ TestApiAlignment.cigar_empty.validate() # nothing raised
+ TestApiAlignment.placeholder.validate() # nothing raised
+ self.assertRaises(gfapy.ValueError,TestApiAlignment.trace_invalid_value_1.validate)
+ self.assertRaises(gfapy.ValueError,TestApiAlignment.cigar_invalid_value_1.validate)
+ self.assertRaises(gfapy.ValueError,TestApiAlignment.cigar_invalid_value_2.validate)
+ self.assertRaises(gfapy.TypeError,TestApiAlignment.trace_invalid_type_1.validate)
+ self.assertRaises(gfapy.TypeError,TestApiAlignment.cigar_invalid_type_1.validate)
+
+ def test_version_specific_validate(self):
+ gfapy.Alignment(TestApiAlignment.cigar_gfa1_1_s,
+ version="gfa1", valid=False) # nothing raised
+ self.assertRaises(gfapy.FormatError, gfapy.Alignment,
+ TestApiAlignment.cigar_gfa1_1_s, version="gfa2", valid=False)
+ gfapy.Alignment(TestApiAlignment.cigar_gfa2_1_s,
+ version="gfa1", valid=False) # nothing raised
+ gfapy.Alignment(TestApiAlignment.cigar_gfa2_1_s,
+ version="gfa2", valid=False) # nothing raised
+
+ def test_array_methods(self):
+ for a in [TestApiAlignment.cigar_empty, TestApiAlignment.trace_empty]:
+ assert(not a)
+ for a in [TestApiAlignment.cigar_1, TestApiAlignment.trace_1]:
+ assert(a)
+ self.assertEqual(gfapy.CIGAR.Operation(1,"D"), TestApiAlignment.cigar_1[1])
+ self.assertEqual(12, TestApiAlignment.trace_1[1])
+
+
+ def test_cigar_operation_methods(self):
+ self.assertEqual(TestApiAlignment.cigar_op_1_len, TestApiAlignment.cigar_op_1.length)
+ self.assertEqual(TestApiAlignment.cigar_op_1_code, TestApiAlignment.cigar_op_1.code)
+ self.assertEqual(TestApiAlignment.cigar_op_1_s, str(TestApiAlignment.cigar_op_1))
+ TestApiAlignment.cigar_op_1.length = TestApiAlignment.cigar_op_2_len
+ TestApiAlignment.cigar_op_1.code = TestApiAlignment.cigar_op_2_code
+ self.assertEqual(TestApiAlignment.cigar_op_2, TestApiAlignment.cigar_op_1)
+ self.assertEqual(TestApiAlignment.cigar_op_2_len, TestApiAlignment.cigar_op_1.length)
+ self.assertEqual(TestApiAlignment.cigar_op_2_code, TestApiAlignment.cigar_op_1.code)
+ self.assertEqual(TestApiAlignment.cigar_op_2_s, str(TestApiAlignment.cigar_op_2))
+
+ def test_cigar_operation_validation(self):
+ TestApiAlignment.cigar_op_1.validate() # nothing raised
+ TestApiAlignment.cigar_op_1.validate(version="gfa2") # nothing raised
+ TestApiAlignment.cigar_op_2.validate() # nothing raised
+ TestApiAlignment.cigar_op_2.validate(version="gfa2") # nothing raised
+ self.assertRaises(gfapy.VersionError, TestApiAlignment.cigar_op_1.validate, version="gfaX")
+ stringlen = gfapy.CIGAR.Operation("1", "M")
+ stringlen.validate() # nothing raised
+ stringcode = gfapy.CIGAR.Operation(1, "M")
+ stringcode.validate() # nothing raised
+ malformed1 = gfapy.CIGAR.Operation([1], "M")
+ self.assertRaises(gfapy.TypeError, malformed1.validate)
+ malformed2 = gfapy.CIGAR.Operation(-1, "M")
+ self.assertRaises(gfapy.ValueError, malformed2.validate)
+ malformed3 = gfapy.CIGAR.Operation(1, "L")
+ self.assertRaises(gfapy.ValueError, malformed3.validate)
+ gfa1only = gfapy.CIGAR.Operation(1, "X")
+ gfa1only.validate() # nothing raised
+ self.assertRaises(gfapy.ValueError, gfa1only.validate, version="gfa2")
+
+ def test_cigar_complement(self):
+ self.assertEqual(TestApiAlignment.cigar_gfa1_1_c_s,
+ str(gfapy.Alignment(TestApiAlignment.cigar_gfa1_1_s, version="gfa1").complement()))
+ self.assertEqual(TestApiAlignment.cigar_gfa2_1_c_s,
+ str(gfapy.Alignment(TestApiAlignment.cigar_gfa2_1_s).complement()))
+
+ def test_cigar_length_on(self):
+ self.assertEqual(TestApiAlignment.cigar_gfa1_1_rlen,
+ gfapy.Alignment(TestApiAlignment.cigar_gfa1_1_s,version="gfa1").
+ length_on_reference())
+ self.assertEqual(TestApiAlignment.cigar_gfa1_1_qlen,
+ gfapy.Alignment(TestApiAlignment.cigar_gfa1_1_s,version="gfa1").
+ length_on_query())
+ self.assertEqual(TestApiAlignment.cigar_gfa1_1_qlen,
+ gfapy.Alignment(TestApiAlignment.cigar_gfa1_1_c_s,version="gfa1").
+ length_on_reference())
+ self.assertEqual(TestApiAlignment.cigar_gfa1_1_rlen,
+ gfapy.Alignment(TestApiAlignment.cigar_gfa1_1_c_s,version="gfa1").
+ length_on_query())
+ self.assertEqual(TestApiAlignment.cigar_gfa2_1_rlen,
+ gfapy.Alignment(TestApiAlignment.cigar_gfa2_1_s).length_on_reference())
+ self.assertEqual(TestApiAlignment.cigar_gfa2_1_qlen,
+ gfapy.Alignment(TestApiAlignment.cigar_gfa2_1_s).length_on_query())
+ self.assertEqual(TestApiAlignment.cigar_gfa2_1_qlen,
+ gfapy.Alignment(TestApiAlignment.cigar_gfa2_1_c_s).length_on_reference())
+ self.assertEqual(TestApiAlignment.cigar_gfa2_1_rlen,
+ gfapy.Alignment(TestApiAlignment.cigar_gfa2_1_c_s).length_on_query())
+
diff --git a/tests/test_api_comments.py b/tests/test_api_comments.py
new file mode 100644
index 0000000..590c37e
--- /dev/null
+++ b/tests/test_api_comments.py
@@ -0,0 +1,119 @@
+import unittest
+import gfapy
+
+class TestApiComments(unittest.TestCase):
+
+ def test_initialize(self):
+ l = gfapy.line.Comment("# hallo")
+ self.assertEqual("# hallo", str(l))
+ l = gfapy.line.Comment(["#", "hallo", "\t"])
+ self.assertEqual("#\thallo", str(l))
+
+ def test_fields(self):
+ l = gfapy.line.Comment("# hallo")
+ self.assertEqual("hallo", l.content)
+ self.assertEqual(" ", l.spacer)
+ l.content = "hello"
+ self.assertEqual("hello", l.content)
+ self.assertEqual("# hello", str(l))
+ l.spacer = " "
+ self.assertEqual("hello", l.content)
+ self.assertEqual("# hello", str(l))
+
+ def test_validation(self):
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.line.Comment(["#", "hallo\nhallo"])
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.line.Comment(["#", "hallo", "\n"])
+ gfapy.line.Comment(["#", "hallo", "\n"], vlevel=0) # nothing raised
+ l = gfapy.line.Comment(["#", "hallo"])
+ l.content = "hallo\n" # nothing raised
+ with self.assertRaises(gfapy.FormatError): str(l)
+ l.content = "hallo"
+ str(l) # nothing raised
+ l.spacer = "\n" # nothing raised
+ with self.assertRaises(gfapy.FormatError): str(l)
+ l = gfapy.line.Comment(["#", "hallo"], vlevel=3)
+ with self.assertRaises(gfapy.FormatError): l.content = "hallo\n"
+ with self.assertRaises(gfapy.FormatError): l.spacer = "\n"
+
+ def test_from_string(self):
+ s = "# this is a comment"
+ l = gfapy.Line(s)
+ self.assertEqual(gfapy.line.Comment, l.__class__)
+ self.assertEqual(s[2:], l.content)
+ self.assertEqual(" ", l.spacer)
+ s = "#this is another comment"
+ l = gfapy.Line(s)
+ self.assertEqual(gfapy.line.Comment, l.__class__)
+ self.assertEqual(s[1:], l.content)
+ self.assertEqual("", l.spacer)
+ s = "#\t and this too"
+ l = gfapy.Line(s)
+ self.assertEqual(gfapy.line.Comment, l.__class__)
+ self.assertEqual(s[3:], l.content)
+ self.assertEqual(s[1:3], l.spacer)
+ s = "#: and this too"
+ l = gfapy.Line(s)
+ self.assertEqual(gfapy.line.Comment, l.__class__)
+ self.assertEqual(s[1:], l.content)
+ self.assertEqual("", l.spacer)
+
+ def test_to_s(self):
+ s = "# this is a comment"
+ l = gfapy.Line(s)
+ self.assertEqual(s, str(l))
+ s = "#this is another\tcomment"
+ l = gfapy.Line(s)
+ self.assertEqual(s, str(l))
+ s = "#this is another\tcomment"
+ l = gfapy.Line(s)
+ l.spacer = " "
+ self.assertEqual("# "+s[1:], str(l))
+
+ def test_tags(self):
+ with self.assertRaises(gfapy.ValueError):
+ gfapy.line.Comment(["#", "hallo", " ", "zz:Z:hallo"])
+ l = gfapy.Line("# hallo zz:Z:hallo")
+ self.assertEqual("hallo zz:Z:hallo", l.content)
+ self.assertEqual(None, l.zz)
+ with self.assertRaises(gfapy.RuntimeError): l.zz = 1
+ with self.assertRaises(gfapy.RuntimeError): l.set("zz", 1)
+ self.assertEqual(None, l.get("zz"))
+
+ def test_to_gfa1(self):
+ s = "# this is a comment"
+ l = gfapy.Line(s,version="gfa2")
+ self.assertEqual(gfapy.line.Comment, l.__class__)
+ self.assertEqual("gfa2", l.version)
+ self.assertEqual(s, str(l))
+ self.assertEqual("gfa2", l.to_gfa2().version)
+ self.assertEqual(s, str(l.to_gfa2()))
+ self.assertEqual("gfa1", l.to_gfa1().version)
+ self.assertEqual(s, str(l.to_gfa1()))
+
+ def test_to_gfa2(self):
+ s = "# this is a comment"
+ l = gfapy.Line(s,version="gfa1")
+ self.assertEqual(gfapy.line.Comment, l.__class__)
+ self.assertEqual("gfa1", l.version)
+ self.assertEqual(s, str(l))
+ self.assertEqual("gfa1", l.to_gfa1().version)
+ self.assertEqual(s, str(l.to_gfa1()))
+ self.assertEqual("gfa2", l.to_gfa2().version)
+ self.assertEqual(s, str(l.to_gfa2()))
+
+ def test_rgfa_comments(self):
+ gfa = gfapy.Gfa()
+ c1 = "#this is a comment"
+ c2 = "# this is also a comment"
+ c3 = "#and \tthis too!"
+ gfa.add_line(c1) # nothing raised
+ gfa.add_line(c2) # nothing raised
+ gfa.add_line(c3) # nothing raised
+ self.assertEqual([c1,c2,c3], [str(x) for x in gfa.comments])
+ self.assertEqual(c1, str(gfa.comments[0]))
+ gfa.rm(gfa.comments[0])
+ self.assertEqual([c2,c3], [str(x) for x in gfa.comments])
+ gfa.comments[0].disconnect()
+ self.assertEqual([c3], [str(x) for x in gfa.comments])
diff --git a/tests/test_api_custom_records.py b/tests/test_api_custom_records.py
new file mode 100644
index 0000000..6b4d9a6
--- /dev/null
+++ b/tests/test_api_custom_records.py
@@ -0,0 +1,53 @@
+import gfapy
+import unittest
+
+class TestApiCustomRecords(unittest.TestCase):
+
+ def test_from_string(self):
+ str1 = "X\tthis is a\tcustom line"
+ l1 = gfapy.Line(str1)
+ self.assertEqual(gfapy.line.CustomRecord, l1.__class__)
+ self.assertEqual("X", l1.record_type)
+ self.assertEqual("this is a", l1.field1)
+ self.assertEqual("custom line", l1.field2)
+
+ def test_from_string_with_tags(self):
+ str2 = "XX\txx:i:2\txxxxxx\txx:i:1"
+ l2 = gfapy.Line(str2)
+ self.assertEqual(gfapy.line.CustomRecord, l2.__class__)
+ self.assertEqual("XX", l2.record_type)
+ self.assertEqual("xx:i:2", l2.field1)
+ self.assertEqual("xxxxxx", l2.field2)
+ with self.assertRaises(AttributeError): l2.field3
+ self.assertEqual(1, l2.xx)
+ l2.xx = 3
+ self.assertEqual(3, l2.xx)
+ l2.field1 = "blabla"
+ self.assertEqual("blabla", l2.field1)
+
+ def test_to_s(self):
+ str1 = "X\tthis is a\tcustom line"
+ self.assertEqual(str1, str(gfapy.Line(str1)))
+ str2 = "XX\txx:i:2\txxxxxx\txx:i:1"
+ self.assertEqual(str2, str(gfapy.Line(str2)))
+
+ def test_add_custom_records(self):
+ gfa = gfapy.Gfa(version="gfa2")
+ x1 = "X\tthis is a custom record"
+ gfa.append(x1) # nothing raised
+ self.assertEqual(["X"], gfa.custom_record_keys)
+ self.assertEqual([x1], [str(x) for x in gfa.custom_records_of_type("X")])
+
+ def test_delete_custom_records(self):
+ gfa = gfapy.Gfa(version="gfa2")
+ c = "X\tThis is a custom_record"
+ gfa.append(c)
+ self.assertEqual([c], [str(x) for x in gfa.custom_records_of_type("X")])
+ for x in gfa.custom_records_of_type("X"): x.disconnect()
+ self.assertEqual([], gfa.custom_records_of_type("X"))
+
+ def test_custom_records(self):
+ x = ["X\tVN:Z:1.0", "Y\ttesttesttest"]
+ self.assertEqual(x[0], str(gfapy.Gfa(x).custom_records_of_type("X")[0]))
+ self.assertEqual(x[1], str(gfapy.Gfa(x).custom_records_of_type("Y")[0]))
+
diff --git a/tests/test_api_extensions.py b/tests/test_api_extensions.py
new file mode 100644
index 0000000..f36fe89
--- /dev/null
+++ b/tests/test_api_extensions.py
@@ -0,0 +1,36 @@
+import gfapy
+import unittest
+from .extension import *
+
+class TestAPIExtensions(unittest.TestCase):
+
+ def test_extensions(self):
+ g = gfapy.Gfa(version="gfa2", vlevel=0)
+ MetagenomicAssignment(["M", "*","N12","C","SC:i:20"])
+ sA = gfapy.Line("S\tA\t1000\t*")
+ g.append(sA)
+ tB12 = gfapy.Line("T\tB12_c")
+ g.append(tB12)
+ m1 = gfapy.Line("M\t1\ttaxon:123\tA\tSC:i:40\txx:Z:cjaks536")
+ g.append(m1)
+ m2 = gfapy.Line("M\t2\ttaxon:123\tB\txx:Z:cga5r5cs")
+ g.append(m2)
+ sB = gfapy.Line("S\tB\t1000\t*")
+ g.append(sB)
+ mx = gfapy.Line("M\t*\tB12_c\tB\tSC:i:20")
+ g.append(mx)
+ t123 = gfapy.Line(
+ "T\ttaxon:123\tUL:Z:http://www.taxon123.com")
+ g.append(t123)
+ self.assertEqual(MetagenomicAssignment, m1.__class__)
+ self.assertEqual(Taxon, tB12.__class__)
+ self.assertEqual("1", m1.mid)
+ assert(gfapy.is_placeholder(mx.mid))
+ self.assertEqual(t123, m1.tid)
+ self.assertEqual(sA, m1.sid)
+ self.assertEqual("cjaks536", m1.xx)
+ self.assertEqual([m2,mx], sB.metagenomic_assignments)
+ self.assertEqual([m1,m2], t123.metagenomic_assignments)
+ self.assertEqual("taxon:123", t123.tid)
+ self.assertEqual("http://www.taxon123.com", t123.UL)
+
diff --git a/tests/test_api_gfa1_lines.py b/tests/test_api_gfa1_lines.py
new file mode 100644
index 0000000..9e215a3
--- /dev/null
+++ b/tests/test_api_gfa1_lines.py
@@ -0,0 +1,179 @@
+import gfapy
+import unittest
+
+class TestApiGfa1Lines(unittest.TestCase):
+
+ def test_C(self):
+ fields=["C","1","+","2","-","12","12M","MQ:i:1232","NM:i:3","ab:Z:abcd"]
+ s="\t".join(fields)
+ gfapy.Line(s) # nothing raised
+ self.assertEqual(gfapy.line.edge.Containment, gfapy.Line(s).__class__)
+ self.assertEqual(fields[0], gfapy.Line(s).record_type)
+ self.assertEqual(fields[1], gfapy.Line(s).from_segment)
+ self.assertEqual(fields[2], gfapy.Line(s).from_orient)
+ self.assertEqual(fields[3], gfapy.Line(s).to_segment)
+ self.assertEqual(fields[4], gfapy.Line(s).to_orient)
+ self.assertEqual(12, gfapy.Line(s).pos)
+ self.assertEqual([gfapy.CIGAR.Operation(12,"M")], gfapy.Line(s).overlap)
+ self.assertEqual(1232, gfapy.Line(s).MQ)
+ self.assertEqual(3, gfapy.Line(s).NM)
+ self.assertEqual("abcd", gfapy.Line(s).ab)
+ with self.assertRaises(gfapy.FormatError): (str+gfapy.Line("\tH1"))
+ with self.assertRaises(gfapy.FormatError): gfapy.Line("C\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[2]="x"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[4]="x"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[5]="x"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[6]="x"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields.copy(); f[7]="MQ:Z:1232"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields.copy(); f[8]="NM:Z:1232"; gfapy.Line("\t".join(f),vlevel=1)
+
+ def test_L(self):
+ fields=["L","1","+","2","-","12M","RC:i:1232","NM:i:3","ab:Z:abcd",
+ "FC:i:2321","KC:i:1212","MQ:i:40"]
+ s="\t".join(fields)
+ gfapy.Line(s) # nothing raised
+ self.assertEqual(gfapy.line.edge.Link, gfapy.Line(s).__class__)
+ self.assertEqual(fields[0], gfapy.Line(s).record_type)
+ self.assertEqual(fields[1], gfapy.Line(s).from_segment)
+ self.assertEqual(fields[2], gfapy.Line(s).from_orient)
+ self.assertEqual(fields[3], gfapy.Line(s).to_segment)
+ self.assertEqual(fields[4], gfapy.Line(s).to_orient)
+ self.assertEqual([gfapy.CIGAR.Operation(12,"M")],
+ gfapy.Line(s).overlap)
+ self.assertEqual(1232, gfapy.Line(s).RC)
+ self.assertEqual(3, gfapy.Line(s).NM)
+ self.assertEqual(2321, gfapy.Line(s).FC)
+ self.assertEqual(1212, gfapy.Line(s).KC)
+ self.assertEqual(40, gfapy.Line(s).MQ)
+ self.assertEqual("abcd", gfapy.Line(s).ab)
+ with self.assertRaises(gfapy.FormatError): (str+gfapy.Line("\tH1"))
+ with self.assertRaises(gfapy.FormatError): gfapy.Line("L\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[2]="x"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[4]="x"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[5]="x"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields.copy(); f[6]="RC:Z:1232"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields.copy(); f[7]="NM:Z:1232"; gfapy.Line("\t".join(f),vlevel=1)
+
+ def test_L_coords(self):
+ g = gfapy.Gfa(version="gfa1")
+ g.append("S\t1\t*\tLN:i:100")
+ g.append("L\t1\t+\t2\t-\t1M2D10M1I")
+ self.assertEqual(["87","100$"], [str(s) for s in g.dovetails[0].from_coords])
+ with self.assertRaises(gfapy.ValueError): g.dovetails[0].to_coords
+ g.append("S\t2\t*\tLN:i:100")
+ self.assertEqual(["88","100$"], [str(s) for s in g.dovetails[0].to_coords])
+ g.append("L\t3\t-\t4\t+\t10M2P3D1M")
+ self.assertEqual(["0","14"], [str(s) for s in g.dovetails[1].from_coords])
+ self.assertEqual(["0","11"], [str(s) for s in g.dovetails[1].to_coords])
+
+ def test_L_other(self):
+ l = gfapy.Line("L\t1\t+\t2\t-\t*")
+ self.assertEqual("2", l.other("1"))
+ self.assertEqual("1", l.other("2"))
+ with self.assertRaises(gfapy.NotFoundError): l.other("0")
+
+ def test_L_circular(self):
+ l = gfapy.Line("L\t1\t+\t2\t-\t*")
+ self.assertEqual(False, l.is_circular())
+ l = gfapy.Line("L\t1\t+\t1\t-\t*")
+ self.assertEqual(True, l.is_circular())
+
+ def test_S(self):
+ fields=["S","1","ACGTCACANNN","RC:i:1232","LN:i:11","ab:Z:abcd",
+ "FC:i:2321","KC:i:1212"]
+ s="\t".join(fields)
+ gfapy.Line(s) # nothing raised
+ self.assertEqual(gfapy.line.segment.GFA1, gfapy.Line(s).__class__)
+ self.assertEqual(fields[0], gfapy.Line(s).record_type)
+ self.assertEqual(fields[1], gfapy.Line(s).name)
+ self.assertEqual(fields[2], gfapy.Line(s).sequence)
+ self.assertEqual(1232, gfapy.Line(s).RC)
+ self.assertEqual(11, gfapy.Line(s).LN)
+ self.assertEqual(2321, gfapy.Line(s).FC)
+ self.assertEqual(1212, gfapy.Line(s).KC)
+ self.assertEqual("abcd", gfapy.Line(s).ab)
+ with self.assertRaises(gfapy.FormatError): (str+gfapy.Line("\tH1"))
+ with self.assertRaises(gfapy.FormatError): gfapy.Line("S\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[2]="!@#?"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields.copy(); f[3]="RC:Z:1232"; gfapy.Line("\t".join(f),version="gfa1")
+ f=["S","2","ACGTCACANNN","LN:i:3"]
+ with self.assertRaises(gfapy.InconsistencyError):
+ gfapy.Line("\t".join(f),vlevel=1, version="gfa1")
+ f=["S","2","ACGTCACANNN","LN:i:11"]
+ gfapy.Line("\t".join(f)) # nothing raised
+ f=["S","2","*","LN:i:3"]
+ gfapy.Line("\t".join(f)) # nothing raised
+
+ def test_forbidden_segment_names(self):
+ gfapy.Line("S\tA+B\t*") # nothing raised
+ gfapy.Line("S\tA-B\t*") # nothing raised
+ gfapy.Line("S\tA,B\t*") # nothing raised
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("S\tA+,B\t*",vlevel=1)
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("S\tA-,B\t*",vlevel=1)
+
+ def test_coverage(self):
+ l = gfapy.Line("S\t0\t*\tRC:i:600\tLN:i:100")
+ self.assertEqual(6, l.coverage())
+ self.assertEqual(6, l.try_get_coverage())
+ l = gfapy.Line("S\t0\t*\tRC:i:600")
+ self.assertEqual(None, l.coverage())
+ with self.assertRaises(gfapy.NotFoundError): l.try_get_coverage()
+ l = gfapy.Line("S\t0\t*\tLN:i:100")
+ self.assertEqual(None, l.coverage())
+ with self.assertRaises(gfapy.NotFoundError): l.try_get_coverage()
+ l = gfapy.Line("S\t0\t*\tFC:i:600\tLN:i:100")
+ self.assertEqual(None, l.coverage())
+ with self.assertRaises(gfapy.NotFoundError): l.try_get_coverage()
+ self.assertEqual(6, l.coverage(count_tag="FC"))
+ self.assertEqual(6, l.try_get_coverage(count_tag="FC"))
+
+ def test_P(self):
+ fields=["P","4","1+,2-,3+","9M2I3D1M,12M","ab:Z:abcd"]
+ s="\t".join(fields)
+ gfapy.Line(s) # nothing raised
+ self.assertEqual(gfapy.line.group.Path, gfapy.Line(s).__class__)
+ self.assertEqual(fields[0], gfapy.Line(s).record_type)
+ self.assertEqual(fields[1], gfapy.Line(s).path_name)
+ self.assertEqual([gfapy.OrientedLine("1","+"),gfapy.OrientedLine("2","-"),
+ gfapy.OrientedLine("3","+")],
+ gfapy.Line(s).segment_names)
+ self.assertEqual([[gfapy.CIGAR.Operation(9,"M"),
+ gfapy.CIGAR.Operation(2,"I"),
+ gfapy.CIGAR.Operation(3,"D"),
+ gfapy.CIGAR.Operation(1,"M")],
+ [gfapy.CIGAR.Operation(12,"M")]],
+ gfapy.Line(s).overlaps)
+ self.assertEqual("abcd", gfapy.Line(s).ab)
+ with self.assertRaises(gfapy.FormatError): (str+gfapy.Line("\tH1"))
+ with self.assertRaises(gfapy.FormatError): gfapy.Line("P\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[2]="1,2,3"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.InconsistencyError):
+ f=fields.copy(); f[2]="1+,2+"; f[3]="9M,12M,3M"
+ gfapy.Line("\t".join(f),vlevel=1)
+ f=fields.copy(); f[3]="*,*";
+ gfapy.Line("\t".join(f),vlevel=1)
+ f=fields.copy(); f[3]="9M2I3D1M,12M,12M";
+ gfapy.Line("\t".join(f),vlevel=3)
+ f=fields.copy(); f[3]="*";
+ gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[3]="12,12"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[3]="12M|12M"; gfapy.Line("\t".join(f),vlevel=1)
+
diff --git a/tests/test_api_gfa2_lines.py b/tests/test_api_gfa2_lines.py
new file mode 100644
index 0000000..37ce4d9
--- /dev/null
+++ b/tests/test_api_gfa2_lines.py
@@ -0,0 +1,49 @@
+import gfapy
+import unittest
+
+class TestApiGFA2Lines(unittest.TestCase):
+
+ def test_S(self):
+ fields=["S","1","ACGTCACANNN","RC:i:1232","LN:i:11","ab:Z:abcd",
+ "FC:i:2321","KC:i:1212"]
+ s="\t".join(fields)
+ gfapy.Line(s) # nothing raised
+ self.assertEqual(gfapy.line.segment.GFA1, gfapy.Line(s).__class__)
+ self.assertEqual(fields[0], gfapy.Line(s).record_type)
+ self.assertEqual(fields[1], gfapy.Line(s).name)
+ self.assertEqual(fields[2], gfapy.Line(s).sequence)
+ self.assertEqual(1232, gfapy.Line(s).RC)
+ self.assertEqual(11, gfapy.Line(s).LN)
+ self.assertEqual(2321, gfapy.Line(s).FC)
+ self.assertEqual(1212, gfapy.Line(s).KC)
+ self.assertEqual("abcd", gfapy.Line(s).ab)
+ with self.assertRaises(gfapy.FormatError): s+gfapy.Line("\tH1")
+ with self.assertRaises(gfapy.FormatError): gfapy.Line("S\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f=fields.copy(); f[2]="!@#?"; gfapy.Line("\t".join(f),vlevel=1)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields.copy(); f[3]="RC:Z:1232"; gfapy.Line("\t".join(f),version="gfa1")
+ f=["S","2","ACGTCACANNN","LN:i:3"]
+ with self.assertRaises(gfapy.InconsistencyError):
+ gfapy.Line("\t".join(f),vlevel=1, version="gfa1")
+ f=["S","2","ACGTCACANNN","LN:i:11"]
+ gfapy.Line("\t".join(f)) # nothing raised
+ f=["S","2","*","LN:i:3"]
+ gfapy.Line("\t".join(f)) # nothing raised
+
+ def test_coverage(self):
+ l = gfapy.Line("S\t0\t*\tRC:i:600\tLN:i:100")
+ self.assertEqual(6, l.coverage())
+ self.assertEqual(6, l.try_get_coverage())
+ l = gfapy.Line("S\t0\t*\tRC:i:600")
+ self.assertEqual(None, l.coverage())
+ with self.assertRaises(gfapy.NotFoundError): l.try_get_coverage()
+ l = gfapy.Line("S\t0\t*\tLN:i:100")
+ self.assertEqual(None, l.coverage())
+ with self.assertRaises(gfapy.NotFoundError): l.try_get_coverage()
+ l = gfapy.Line("S\t0\t*\tFC:i:600\tLN:i:100")
+ self.assertEqual(None, l.coverage())
+ with self.assertRaises(gfapy.NotFoundError): l.try_get_coverage()
+ self.assertEqual(6, l.coverage(count_tag="FC"))
+ self.assertEqual(6, l.try_get_coverage(count_tag="FC"))
+
diff --git a/tests/test_api_gfa_basics.py b/tests/test_api_gfa_basics.py
new file mode 100644
index 0000000..29b9901
--- /dev/null
+++ b/tests/test_api_gfa_basics.py
@@ -0,0 +1,77 @@
+import gfapy
+import unittest
+
+class TestAPIGfaBasics(unittest.TestCase):
+
+ def test_initialize(self):
+ gfapy.Gfa() # nothing raised
+ gfa = gfapy.Gfa()
+ self.assertEqual(gfapy.Gfa, gfa.__class__)
+
+ def test_version_empty(self):
+ gfa = gfapy.Gfa()
+ self.assertIsNone(gfa.version)
+ gfa = gfapy.Gfa(version="gfa1")
+ self.assertEqual("gfa1", gfa.version)
+ gfa = gfapy.Gfa(version="gfa2")
+ self.assertEqual("gfa2", gfa.version)
+ with self.assertRaises(gfapy.VersionError): gfapy.Gfa(version="0.0")
+
+ def test_validate(self):
+ gfa = gfapy.Gfa(version="gfa1")
+ gfa.append("S\t1\t*")
+ gfa.validate() # nothing raised
+ gfa.append("L\t1\t+\t2\t-\t*")
+ with self.assertRaises(gfapy.NotFoundError): gfa.validate()
+ gfa.append("S\t2\t*")
+ gfa.validate() # nothing raised
+ gfa.append("P\t3\t1+,4-\t*")
+ with self.assertRaises(gfapy.NotFoundError): gfa.validate()
+ gfa.append("S\t4\t*")
+ with self.assertRaises(gfapy.NotFoundError): gfa.validate()
+ gfa.append("L\t4\t+\t1\t-\t*")
+ gfa.validate() # nothing raised
+
+ def test_to_s(self):
+ lines = ["H\tVN:Z:1.0","S\t1\t*","S\t2\t*","S\t3\t*",
+ "L\t1\t+\t2\t-\t*","C\t1\t+\t3\t-\t12\t*","P\t4\t1+,2-\t*"]
+ gfa = gfapy.Gfa()
+ for l in lines: gfa.append(l)
+ self.assertEqual(set(lines), set(str(gfa).split("\n")))
+
+ ## def test_from_file(self):
+ ## filename = "tests/testdata/example1.gfa"
+ ## gfa = gfapy.Gfa.from_file(filename)
+ ## assert(gfa)
+ ## with open(filename) as f:
+ ## txt = f.read()
+ ## self.assertEqual(txt, str(gfa))
+
+ ## def test_to_file(self):
+ ## filename = "tests/testdata/example1.gfa"
+ ## gfa = gfapy.Gfa.from_file(filename)
+ ## tmp = Tempfile("example1")
+ ## gfa.to_file(tmp.path)
+ ## tmp.rewind
+ ## self.assertEqual(IO.read(filename), IO.read(tmp))
+
+ def test_from_string(self):
+ lines = ["H\tVN:Z:1.0","S\t1\t*","S\t2\t*","S\t3\t*",
+ "L\t1\t+\t2\t-\t*","C\t1\t+\t3\t-\t12\t*","P\t4\t1+,2-\t*"]
+ gfa1 = gfapy.Gfa()
+ for l in lines: gfa1.append(l)
+ gfa2 = gfapy.Gfa("\n".join(lines))
+ assert(gfa2)
+ self.assertEqual(gfapy.Gfa, gfa2.__class__)
+ self.assertEqual(str(gfa1), str(gfa2))
+
+ def test_from_list(self):
+ lines = ["H\tVN:Z:1.0","S\t1\t*","S\t2\t*","S\t3\t*",
+ "L\t1\t+\t2\t-\t*","C\t1\t+\t3\t-\t12\t*","P\t4\t1+,2-\t*"]
+ gfa1 = gfapy.Gfa()
+ for l in lines: gfa1.append(l)
+ gfa2 = gfapy.Gfa(lines)
+ assert(gfa2)
+ self.assertEqual(gfapy.Gfa, gfa2.__class__)
+ self.assertEqual(str(gfa1), str(gfa2))
+
diff --git a/tests/test_api_header.py b/tests/test_api_header.py
new file mode 100644
index 0000000..d51bbb6
--- /dev/null
+++ b/tests/test_api_header.py
@@ -0,0 +1,69 @@
+import unittest
+import gfapy
+
+class TestApiHeader(unittest.TestCase):
+
+ def test_gfa_header(self):
+ g = gfapy.Gfa()
+ assert(isinstance(g.header, gfapy.line.Header))
+ self.assertEqual([], g.header.tagnames)
+ g.add_line("H\txx:i:1")
+ self.assertEqual(["xx"], g.header.tagnames)
+
+ def test_gfa_header_line_connect(self):
+ g = gfapy.Gfa()
+ line = gfapy.Line("H\txx:i:1")
+ self.assertRaises(gfapy.RuntimeError, line.connect, g)
+ g.add_line(line) # nothing raised
+
+ def test_header_version_editing(self):
+ standalone = gfapy.Line("H\txx:i:1\tVN:Z:1.0")
+ standalone.VN = "2.0" # nothing raised
+ g = gfapy.Gfa()
+ g.add_line("H\txx:i:1\tVN:Z:1.0")
+ g.header.xx = 2 # nothing raised
+ with self.assertRaises(gfapy.RuntimeError):
+ g.header.VN = "2.0"
+
+ def test_error_inconsistent_definitions(self):
+ g = gfapy.Gfa()
+ g.add_line("H\txx:i:1")
+ g.add_line("H\txx:i:2") # nothing raised
+ g.add_line("H\tTS:i:120")
+ g.add_line("H\tTS:i:120") # nothing raised
+ self.assertRaises(gfapy.InconsistencyError, g.add_line, "H\tTS:i:122")
+
+ def test_gfa_multiple_def_tags(self):
+ g = gfapy.Gfa()
+ for i in range(4):
+ g.add_line("H\txx:i:{}".format(i))
+ self.assertEqual(["xx"], g.header.tagnames)
+ self.assertEqual([0,1,2,3], g.header.xx)
+ self.assertEqual([0,1,2,3], g.header.get("xx"))
+ self.assertEqual("i", g.header.get_datatype("xx"))
+ g.header.validate_field("xx") # nothing raised
+ for i in [0,2,3]:
+ g.header.xx.remove(i)
+ g.header.xx = [1, 4]
+ self.assertRaises(gfapy.TypeError, g.header.validate_field, "xx")
+ g.header.xx = gfapy.FieldArray("i", data = g.header.xx)
+ g.header.validate_field("xx") # nothing raised
+ self.assertEqual([1,4], g.header.get("xx"))
+ self.assertEqual("1\t4", g.header.field_to_s("xx"))
+ self.assertEqual("xx:i:1\txx:i:4", g.header.field_to_s("xx", tag=True))
+ self.assertEqual(sorted(["H\txx:i:1","H\txx:i:4"]),
+ sorted([str(h) for h in g.headers]))
+ g.header.add("xx", 12)
+ g.header.add("yy", 13)
+ self.assertEqual([1,4,12], g.header.xx)
+ self.assertEqual(13, g.header.yy)
+
+ def test_gfa_single_def_tags(self):
+ g = gfapy.Gfa()
+ g.add_line("H\txx:i:1")
+ self.assertEqual(["xx"], g.header.tagnames)
+ self.assertEqual(1, g.header.xx)
+ g.header.set("xx", 12)
+ self.assertEqual(12, g.header.xx)
+ g.header.delete("xx")
+ self.assertEqual(None, g.header.xx)
diff --git a/tests/test_api_linear_paths.py b/tests/test_api_linear_paths.py
new file mode 100644
index 0000000..0240f01
--- /dev/null
+++ b/tests/test_api_linear_paths.py
@@ -0,0 +1,49 @@
+import gfapy
+import unittest
+
+class TestAPILinearPaths(unittest.TestCase):
+
+ def test_linear_path_merging(self):
+ for sfx in ["gfa", "gfa2"]:
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.1."+"{}".format(sfx))
+ with self.assertRaises(gfapy.ValueError):
+ gfa.merge_linear_path([["0", "R"],["1", "R"],["2", "L"],["3", "R"]])
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.2."+"{}".format(sfx))
+ gfa.merge_linear_path([["0", "R"],["1", "R"],["2", "L"],["3", "R"]])
+ with self.assertRaises(gfapy.NotFoundError): gfa.try_get_segment("0")
+ with self.assertRaises(gfapy.NotFoundError): gfa.try_get_segment("1")
+ with self.assertRaises(gfapy.NotFoundError): gfa.try_get_segment("2")
+ with self.assertRaises(gfapy.NotFoundError): gfa.try_get_segment("3")
+ gfa.try_get_segment("0_1_2_3") # nothing raised
+ self.assertEqual([], gfa.dovetails)
+ self.assertEqual("ACGACGACGTCGA", gfa.segment("0_1_2_3").sequence)
+
+ def test_linear_path_merge_all(self):
+ for sfx in ["gfa", "gfa2"]:
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.3."+"{}".format(sfx))
+ gfa.merge_linear_paths()
+ gfa.merge_linear_paths() # nothing raised
+ self.assertEqual(len(gfa.segment_names), 1)
+ self.assertIn(gfa.segment_names[0], ["0_1_2_3","3_2_1_0"])
+ self.assertEqual(len(gfa.segments), 1)
+ self.assertEqual(len(gfa.dovetails), 0)
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.4."+"{}".format(sfx))
+ gfa.merge_linear_paths() # nothing raised
+ self.assertEqual(3, len(gfa.segments))
+ for x in gfa.segments:
+ self.assertIn(x.name, {"0","3","1_2","2_1"})
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.5."+"{}".format(sfx))
+ gfa.merge_linear_paths() # nothing raised
+ self.assertEqual(3, len(gfa.segments))
+ self.assertEqual({"0", "1", "2_3"}, {x.name for x in gfa.segments})
+
+ def test_linear_path_merge_example1(self):
+ for sfx in ["gfa", "gfa2"]:
+ gfa = gfapy.Gfa.from_file("tests/testdata/example1."+"{}".format(sfx))
+ lps = set()
+ for i, lp in enumerate(gfa.linear_paths()):
+ if int(lp[0].name) > int(lp[-1].name):
+ lp.reverse()
+ lps.add(" ".join([s.name for s in lp]))
+ self.assertEqual({"1 19 18", "11 9 12", "22 16 20 21 23"}, lps)
+
diff --git a/tests/test_api_linear_paths_extended.py b/tests/test_api_linear_paths_extended.py
new file mode 100644
index 0000000..586c612
--- /dev/null
+++ b/tests/test_api_linear_paths_extended.py
@@ -0,0 +1,30 @@
+import gfapy
+import unittest
+
+class TestgfapyGfaToolsLinearPaths(unittest.TestCase):
+
+ def test_linear_path_merging(self):
+ for sfx in ["gfa", "gfa2"]:
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.2."+"{}".format(sfx))
+ gfa.merge_linear_path([["0", "R"],["1", "R"],["2", "L"],["3", "R"]],
+ enable_tracking=True)
+ gfa.try_get_segment("0_1_2^_3") # nothing raised
+ self.assertEqual("ACGACGACGTCGA", gfa.segment("0_1_2^_3").sequence)
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.2."+"{}".format(sfx))
+ gfa.merge_linear_path([["0", "R"],["1", "R"],["2", "L"],["3", "R"]],
+ enable_tracking=True)
+ gfa.try_get_segment("0_1_2^_3") # nothing raised
+ self.assertEqual("ACGACGACGTCGA", gfa.segment("0_1_2^_3").sequence)
+
+ def test_linear_path_merge_all(self):
+ for sfx in ["gfa", "gfa2"]:
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.3.{}".format(sfx))
+ gfa.merge_linear_paths(enable_tracking=True)
+ self.assertIn(gfa.segment_names[0], ["0_1_2^_3","3^_2_1^_0^"])
+ gfa = gfapy.Gfa.from_file("tests/testdata/linear_merging.4.{}".format(sfx))
+ gfa.merge_linear_paths(enable_tracking=True)
+ try:
+ self.assertEqual({"0","3","1_2^"}, {x.name for x in gfa.segments})
+ except:
+ self.assertEqual({"0","3","2_1^"}, {x.name for x in gfa.segments})
+
diff --git a/tests/test_api_lines_collections.py b/tests/test_api_lines_collections.py
new file mode 100644
index 0000000..7ca038b
--- /dev/null
+++ b/tests/test_api_lines_collections.py
@@ -0,0 +1,95 @@
+import gfapy
+import unittest
+
+class TestAPILinesCollections(unittest.TestCase):
+
+ def test_gfa1_collections(self):
+ gfa = gfapy.Gfa.from_file("tests/testdata/all_line_types.gfa1.gfa")
+ # comments
+ self.assertEqual(1, len(gfa.comments))
+ self.assertRegex(gfa.comments[0].content, r'collections')
+ # containments
+ self.assertEqual(2, len(gfa.containments))
+ self.assertEqual({"2_to_6", "1_to_5"}, {x.name for x in gfa.containments})
+ # dovetails
+ self.assertEqual(4, len(gfa.dovetails))
+ self.assertEqual(set(["1_to_2", "1_to_3", "11_to_12", "11_to_13"]),
+ set([x.name for x in gfa.dovetails]))
+ # edges
+ self.assertEqual(6, len(gfa.edges))
+ self.assertEqual(set(["1_to_2", "1_to_3", "11_to_12",
+ "11_to_13", "2_to_6", "1_to_5"]),
+ set([x.name for x in gfa.edges]))
+ # segments
+ self.assertSetEqual(set(["1", "3", "5", "13", "11", "12", "4", "6", "2"]),
+ set([x.name for x in gfa.segments]))
+ # segment_names
+ self.assertSetEqual(set(["1", "3", "5", "13", "11", "12", "4", "6", "2"]),
+ set(gfa.segment_names))
+ # paths
+ self.assertSetEqual(set(["14", "15"]), set([x.name for x in gfa.paths]))
+ # path_names
+ self.assertSetEqual(set(["14", "15"]), set(gfa.path_names))
+ # names
+ self.assertSetEqual(set(gfa.segment_names + gfa.path_names +
+ gfa.edge_names), set(gfa.names))
+ # lines
+ self.assertEqual(set([str(x) for x in gfa.comments + gfa.headers + gfa.segments + gfa.edges +
+ gfa.paths]), set([str(x) for x in gfa.lines]))
+
+ def test_gfa2_collections(self):
+ gfa = gfapy.Gfa.from_file("tests/testdata/all_line_types.gfa2.gfa")
+ # comments
+ self.assertEqual(3, len(gfa.comments))
+ self.assertRegex(gfa.comments[0].content, r'collections')
+ # edges
+ self.assertSetEqual(set(["1_to_2", "2_to_6", "1_to_3",
+ "11_to_12", "11_to_13", "1_to_5"]),
+ set([x.name for x in gfa.edges]))
+ # edge_names
+ self.assertSetEqual(set(["1_to_2", "2_to_6", "1_to_3",
+ "11_to_12", "11_to_13", "1_to_5"]),
+ set(gfa.edge_names))
+ # dovetails
+ self.assertSetEqual(set(["1_to_2", "1_to_3", "11_to_12", "11_to_13"]),
+ set([x.name for x in gfa.dovetails]))
+ # containments
+ self.assertSetEqual(set(["2_to_6", "1_to_5"]),
+ set([x.name for x in gfa.containments]))
+ # gaps
+ self.assertSetEqual(set(["1_to_11", "2_to_12"]), set([x.name for x in gfa.gaps]))
+ # gap_names
+ self.assertSetEqual(set(["1_to_11", "2_to_12"]), set(gfa.gap_names))
+ # sets
+ self.assertSetEqual(set(["16", "16sub"]), set([x.name for x in gfa.sets]))
+ # set_names
+ self.assertSetEqual(set(["16", "16sub"]), set(gfa.set_names))
+ # paths
+ self.assertSetEqual(set(["14", "15"]), set([x.name for x in gfa.paths]))
+ # path_names
+ self.assertSetEqual(set(["14", "15"]), set(gfa.path_names))
+ # segments
+ self.assertSetEqual(set(["1", "3", "5", "13", "11", "12", "4", "6", "2"]),
+ set([x.name for x in gfa.segments]))
+ # segment_names
+ self.assertSetEqual(set(["1", "3", "5", "13", "11", "12", "4", "6", "2"]),
+ set(gfa.segment_names))
+ # fragments
+ self.assertSetEqual(set(["read1_in_2", "read2_in_2"]),
+ set([x.get("id") for x in gfa.fragments]))
+ # external_names
+ self.assertSetEqual(set(["read1", "read2"]), set(gfa.external_names))
+ # custom_record_keys
+ self.assertSetEqual(set(["X", "Y"]), set(gfa.custom_record_keys))
+ # custom_records
+ self.assertEqual(3, len(gfa.custom_records))
+ self.assertSetEqual(set(["X", "X", "Y"]), set([x.record_type for x in gfa.custom_records]))
+ # custom_records("X")
+ self.assertSetEqual(set(["X", "X"]), set([x.record_type for x in gfa.custom_records_of_type("X")]))
+ # names
+ self.assertSetEqual(set(gfa.segment_names + gfa.edge_names + gfa.gap_names +
+ gfa.path_names + gfa.set_names), set(gfa.names))
+ # lines
+ self.assertSetEqual(set([str(x) for x in gfa.comments + gfa.headers + gfa.segments + gfa.edges +
+ gfa.paths + gfa.sets + gfa.gaps + gfa.fragments +
+ gfa.custom_records]), set([str(x) for x in gfa.lines]))
diff --git a/tests/test_api_lines_creators.py b/tests/test_api_lines_creators.py
new file mode 100644
index 0000000..8006e31
--- /dev/null
+++ b/tests/test_api_lines_creators.py
@@ -0,0 +1,132 @@
+import gfapy
+import unittest
+
+class TestAPILinesCreators(unittest.TestCase):
+
+ def test_add_headers(self):
+ gfa = gfapy.Gfa()
+ h = "H\tVN:Z:1.0"
+ gfa.append(h) # nothing raised
+ self.assertEqual([h], [str(x) for x in gfa.headers])
+
+ def test_add_segments(self):
+ gfa = gfapy.Gfa()
+ s1 = gfapy.Line("S\t1\t*")
+ s2 = gfapy.Line("S\t2\t*")
+ s2c = gfapy.Line("S\t2\t*")
+ gfa.append(s1) # nothing raised
+ gfa.append(s2) # nothing raised
+ self.assertSetEqual(set([str(s) for s in [s1, s2]]), set([str(s) for s in gfa.segments]))
+ self.assertSetEqual(set(["1", "2"]), set(gfa.segment_names))
+ self.assertEqual(s1, gfa.segment("1"))
+ self.assertEqual(None, gfa.segment("0"))
+ gfa.try_get_segment("1") # nothing raised
+ with self.assertRaises(gfapy.NotFoundError): gfa.try_get_segment("0")
+ with self.assertRaises(gfapy.NotUniqueError): gfa.append(s2c)
+
+ def test_add_links(self):
+ s1 = "S\t1\t*"
+ s2 = "S\t2\t*"
+ l1 = gfapy.Line("L\t1\t+\t2\t+\t12M")
+ l2 = "L\t1\t+\t3\t+\t12M"
+ gfa = gfapy.Gfa()
+ gfa.append(s1)
+ gfa.append(s2)
+ gfa.append(l1) # nothing raised
+ self.assertEqual([l1], gfa.dovetails)
+ self.assertEqual([l1], gfa.segment("1").end_relations("R", ["2", "L"]))
+ self.assertEqual([l1], gfa.segment("2").end_relations("L", ["1", "R"]))
+ self.assertEqual([], gfa.segment("2").end_relations("R", gfapy.SegmentEnd("1", "L")))
+ gfa.append(l2) # nothing raised
+
+ def test_add_containments(self):
+ s1 = "S\t1\t*"
+ s2 = "S\t2\t*"
+ c1 = gfapy.Line("C\t1\t+\t2\t+\t12\t12M")
+ c2 = "C\t1\t+\t3\t+\t12\t12M"
+ gfa = gfapy.Gfa()
+ gfa.append(s1)
+ gfa.append(s2)
+ gfa.append(c1) # nothing raised
+ self.assertEqual([c1], gfa.containments)
+ self.assertEqual([c1],
+ gfa.segment("1").relations_to("2", "edges_to_contained"))
+ self.assertEqual([],
+ gfa.segment("2").relations_to("1", "edges_to_contained"))
+ gfa.append(c2) # nothing raised
+
+ def test_add_paths(self):
+ s1 = "S\t1\t*"
+ s2 = "S\t2\t*"
+ p1 = gfapy.Line("P\t4\t1+,2+\t122M")
+ p2 = "P\t1\t1+,2+\t122M"
+ p3 = "P\t5\t1+,2+,3+\t122M,120M"
+ gfa = gfapy.Gfa()
+ gfa.append(s1)
+ gfa.append(s2)
+ gfa.append(p1) # nothing raised
+ self.assertEqual([p1], gfa.paths)
+ self.assertEqual(["4"], gfa.path_names)
+ self.assertEqual(p1, gfa.line("4"))
+ self.assertEqual(None, gfa.line("5"))
+ with self.assertRaises(gfapy.NotUniqueError): gfa.append(p2)
+ gfa.append(p3) # nothing raised
+
+## def test_segments_first_order(self):
+## s1 = "S\t1\t*"
+## s2 = "S\t2\t*"
+## l1 = "L\t1\t+\t2\t+\t122M"
+## l2 = "L\t1\t+\t3\t+\t122M"
+## c1 = "C\t1\t+\t2\t+\t12\t12M"
+## c2 = "C\t1\t+\t3\t+\t12\t12M"
+## p1 = "P\t4\t1+,2+\t122M"
+## p2 = "P\t1\t1+,2+\t122M"
+## p3 = "P\t5\t1+,3+\t122M"
+## gfa = gfapy.Gfa()
+## gfa.append(s1)
+## gfa.append(s2)
+## gfa.append(l1) # nothing raised
+## with self.assertRaises(gfapy.NotFoundError): gfa.append(l2)
+## gfa.append(c1) # nothing raised
+## with self.assertRaises(gfapy.NotFoundError): gfa.append(c2)
+## gfa.append(p1) # nothing raised
+## with self.assertRaises(gfapy.NotUniqueError): gfa.append(p2)
+## with self.assertRaises(gfapy.NotFoundError): gfa.append(p3)
+
+ def test_header_add(self):
+ gfa = gfapy.Gfa()
+ gfa.append("H\tVN:Z:1.0")
+ gfa.append("H\taa:i:12\tab:Z:test1")
+ gfa.append("H\tac:Z:test2")
+ gfa.header.add("aa", 15)
+ self.assertSetEqual(
+ set([
+ "H\tVN:Z:1.0",
+ "H\taa:i:12",
+ "H\taa:i:15",
+ "H\tab:Z:test1",
+ "H\tac:Z:test2",
+ ]),
+ set([str(x) for x in gfa.headers]))
+ gfa.header.add("aa", 16)
+ self.assertSetEqual(
+ set([
+ "H\tVN:Z:1.0",
+ "H\taa:i:12",
+ "H\taa:i:15",
+ "H\taa:i:16",
+ "H\tab:Z:test1",
+ "H\tac:Z:test2",
+ ]),
+ set([str(x) for x in gfa.headers]))
+ gfa.header.delete("aa")
+ gfa.header.aa = 26
+ self.assertEqual(
+ set([
+ "H\tVN:Z:1.0",
+ "H\taa:i:26",
+ "H\tab:Z:test1",
+ "H\tac:Z:test2",
+ ]),
+ set([str(x) for x in gfa.headers]))
+
diff --git a/tests/test_api_lines_destructors.py b/tests/test_api_lines_destructors.py
new file mode 100644
index 0000000..f60327d
--- /dev/null
+++ b/tests/test_api_lines_destructors.py
@@ -0,0 +1,76 @@
+import gfapy
+import unittest
+
+class TestAPILinesDestructors(unittest.TestCase):
+
+ def test_delete_links(self):
+ gfa = gfapy.Gfa()
+ s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"]
+ l = "L\t1\t+\t2\t+\t12M"
+ c = "C\t1\t+\t0\t+\t12\t12M"
+ for line in (s + [l,c]): gfa.append(line)
+ self.assertEqual([l], [str(x) for x in gfa.dovetails])
+ self.assertEqual([l], [str(x) for x in \
+ gfa.segment("1").end_relations("R", ["2", "L"])])
+ for x in gfa.segment("1").oriented_relations("+", \
+ gfapy.OrientedLine("2", "+")):
+ x.disconnect()
+ self.assertEqual([], gfa.dovetails)
+ self.assertEqual([], gfa.segment("1").end_relations("R", ["2", "L"]))
+ self.assertEqual([c], [str(x) for x in gfa.containments])
+ self.assertEqual(c,
+ str(gfa.segment("1").relations_to(gfa.segment("0"),
+ "edges_to_contained")[0]))
+ gfa.append(l)
+ self.assertNotEqual([], gfa.dovetails)
+ for x in gfa.segment("1").oriented_relations("+", \
+ gfapy.OrientedLine("2", "+")):
+ x.disconnect()
+ self.assertEqual([], gfa.dovetails)
+
+ def test_delete_containments(self):
+ gfa = gfapy.Gfa()
+ s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"]
+ l = "L\t1\t+\t2\t+\t12M"
+ c = "C\t1\t+\t0\t+\t12\t12M"
+ for line in (s + [l,c]): gfa.append(line)
+ for x in gfa.segment("1").relations_to(gfa.segment("0"), "edges_to_contained"):
+ x.disconnect()
+ self.assertEqual([], gfa.containments)
+ self.assertEqual(0, len(gfa.segment("1").relations_to("0",
+ "edges_to_contained")))
+ gfa.append(c)
+ self.assertNotEqual([], gfa.containments)
+ self.assertEqual(c, str(gfa.segment("1").relations_to("0",
+ "edges_to_contained")[0]))
+ for x in gfa.segment("1").relations_to(gfa.segment("0"), "edges_to_contained"):
+ x.disconnect()
+ self.assertEqual([], gfa.containments)
+
+ def test_delete_segment(self):
+ gfa = gfapy.Gfa()
+ gfa.append("H\tVN:Z:1.0")
+ s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"]
+ l = "L\t1\t+\t2\t+\t12M"
+ c = "C\t1\t+\t0\t+\t12\t12M"
+ p = "P\t4\t2+,0-\t12M"
+ for line in (s + [l,c,p]): gfa.append(line)
+ self.assertEqual(set(s), set([str(x) for x in gfa.segments]))
+ self.assertEqual(set(["0", "1", "2"]), set(gfa.segment_names))
+ self.assertEqual([l], [str(x) for x in gfa.dovetails if not x.virtual])
+ self.assertEqual([c], [str(x) for x in gfa.containments])
+ self.assertEqual([p], [str(x) for x in gfa.paths])
+ self.assertEqual(["4"], gfa.path_names)
+ gfa.segment("0").disconnect()
+ self.assertEqual(set([s[1],s[2]]), set([str(x) for x in gfa.segments]))
+ self.assertEqual(set(["1", "2"]), set(gfa.segment_names))
+ self.assertEqual([l], [str(x) for x in gfa.dovetails if not x.virtual])
+ self.assertEqual([], [str(x) for x in gfa.containments])
+ self.assertEqual([], [str(x) for x in gfa.paths])
+ self.assertEqual([], gfa.path_names)
+ gfa.segment("1").disconnect()
+ self.assertEqual([s[2]], [str(x) for x in gfa.segments])
+ self.assertEqual([], gfa.dovetails)
+ gfa.rm("2")
+ self.assertEqual([], gfa.segments)
+
diff --git a/tests/test_api_lines_finders.py b/tests/test_api_lines_finders.py
new file mode 100644
index 0000000..cfeeb24
--- /dev/null
+++ b/tests/test_api_lines_finders.py
@@ -0,0 +1,170 @@
+import gfapy
+import unittest
+
+class TestAPILinesFinders(unittest.TestCase):
+
+ l_gfa1_a = ["S\t1\t*",
+ "S\t2\t*",
+ "S\t3\t*",
+ "S\t4\tCGAT",
+ "L\t1\t+\t2\t+\t*",
+ "L\t1\t-\t3\t+\t*",
+ "C\t1\t-\t4\t-\t1\t*",
+ "P\tp1\t1+,2+\t*"]
+ l_gfa1 = [gfapy.Line(x) for x in l_gfa1_a]
+ l_gfa2_a = ["S\t1\t100\t*",
+ "S\t2\t110\t*",
+ "E\te1\t1+\t2-\t0\t100$\t10\t110$\t*",
+ "G\tg1\t1-\t2-\t1000\t*",
+ "O\to1\t1+ 2-",
+ "U\tu1\t1 e1",
+ "F\t1\tread1-\t0\t10\t102\t122\t*",
+ "F\t1\tread1-\t30\t100$\t180\t255\t*",
+ "F\t2\tread1-\t40\t50\t52\t64\t*",
+ "X\tx1\txx:Z:A",
+ "X\tx2",
+ "G\t*\t1+\t2+\t2000\t*"]
+ l_gfa2 = [gfapy.Line(x) for x in l_gfa2_a]
+ gfa1 = gfapy.Gfa(l_gfa1)
+ gfa2 = gfapy.Gfa(l_gfa2)
+
+ def test_segment_gfa1(self):
+ # existing name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa1.segment("1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa1.try_get_segment("1"))
+ # not existing name as argument
+ self.assertEqual(None,TestAPILinesFinders.gfa1.segment("0"))
+ with self.assertRaises(gfapy.NotFoundError): TestAPILinesFinders.gfa1.try_get_segment("0")
+ # line as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa1.segment(TestAPILinesFinders.l_gfa1[0]))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa1.try_get_segment(TestAPILinesFinders.l_gfa1[0]))
+ # connection to rgfa is not checked if argument is line
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa1.segment(TestAPILinesFinders.l_gfa2[0]))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa1.try_get_segment(TestAPILinesFinders.l_gfa2[0]))
+
+ def test_segment_gfa2(self):
+ # existing name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa2.segment("1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa2.try_get_segment("1"))
+ # not existing name as argument
+ self.assertEqual(None,TestAPILinesFinders.gfa2.segment("0"))
+ with self.assertRaises(gfapy.NotFoundError): TestAPILinesFinders.gfa2.try_get_segment("0")
+ # line as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa2.segment(TestAPILinesFinders.l_gfa2[0]))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa2.try_get_segment(TestAPILinesFinders.l_gfa2[0]))
+ # connection to rgfa is not checked if argument is line
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa2.segment(TestAPILinesFinders.l_gfa1[0]))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa2.try_get_segment(TestAPILinesFinders.l_gfa1[0]))
+
+ def test_line_gfa1(self):
+ # segment name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa1.line("1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa1.try_get_line("1"))
+ # path name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa1[7],TestAPILinesFinders.gfa1.line("p1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[7],TestAPILinesFinders.gfa1.try_get_line("p1"))
+ # not existing name as argument
+ self.assertEqual(None,TestAPILinesFinders.gfa1.line("0"))
+ with self.assertRaises(gfapy.NotFoundError): TestAPILinesFinders.gfa1.try_get_line("0")
+ # line as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa1.line(TestAPILinesFinders.l_gfa1[0]))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa1.try_get_line(TestAPILinesFinders.l_gfa1[0]))
+ # connection to rgfa is not checked if argument is line
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa1.line(TestAPILinesFinders.l_gfa2[0]))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa1.try_get_line(TestAPILinesFinders.l_gfa2[0]))
+
+ def test_line_gfa2(self):
+ # segment name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa2.line("1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa2.try_get_line("1"))
+ # edge name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa2[2],TestAPILinesFinders.gfa2.line("e1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[2],TestAPILinesFinders.gfa2.try_get_line("e1"))
+ # gap name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa2[3],TestAPILinesFinders.gfa2.line("g1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[3],TestAPILinesFinders.gfa2.try_get_line("g1"))
+ # path name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa2[4],TestAPILinesFinders.gfa2.line("o1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[4],TestAPILinesFinders.gfa2.try_get_line("o1"))
+ # set name as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa2[5],TestAPILinesFinders.gfa2.line("u1"))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[5],TestAPILinesFinders.gfa2.try_get_line("u1"))
+ # not existing name as argument
+ self.assertIsNone(TestAPILinesFinders.gfa2.line("0"))
+ with self.assertRaises(gfapy.NotFoundError): TestAPILinesFinders.gfa2.try_get_line("0")
+ # line as argument
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa2.line(TestAPILinesFinders.l_gfa2[0]))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[0],TestAPILinesFinders.gfa2.try_get_line(TestAPILinesFinders.l_gfa2[0]))
+ # connection to rgfa is not checked if argument is line
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa2.line(TestAPILinesFinders.l_gfa1[0]))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0],TestAPILinesFinders.gfa2.try_get_line(TestAPILinesFinders.l_gfa1[0]))
+
+ def test_fragments_for_external(self):
+ self.assertEqual(TestAPILinesFinders.l_gfa2[6:9], TestAPILinesFinders.gfa2.fragments_for_external("read1"))
+ self.assertEqual([], TestAPILinesFinders.gfa2.fragments_for_external("read2"))
+
+ def test_select_by_hash_gfa1(self):
+ # search segments
+ self.assertEqual(set(TestAPILinesFinders.l_gfa1_a[0:4]),
+ set([str(x) for x in TestAPILinesFinders.gfa1.select({"record_type":"S",
+ "sequence":"CGAT"})]))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0:1], TestAPILinesFinders.gfa1.select({"record_type":"S",
+ "name":"1"}))
+ # search links
+ self.assertEqual(TestAPILinesFinders.l_gfa1[4:5], TestAPILinesFinders.gfa1.select({"record_type":"L",
+ "from":"1",
+ "from_orient":"+"}))
+ # search containments
+ self.assertEqual(TestAPILinesFinders.l_gfa1[6:7], TestAPILinesFinders.gfa1.select({"record_type":"C",
+ "from":"1",
+ "pos":1}))
+ # search paths
+ self.assertEqual(TestAPILinesFinders.l_gfa1[7:8], TestAPILinesFinders.gfa1.select({"record_type":"P",
+ "segment_names":"1+,2+"}))
+ # no record type specified
+ self.assertEqual(TestAPILinesFinders.l_gfa1[0:1], TestAPILinesFinders.gfa1.select({"name":"1"}))
+ self.assertEqual(TestAPILinesFinders.l_gfa1[4:7], TestAPILinesFinders.gfa1.select({"from":"1"}))
+ # reference as value
+ self.assertEqual(TestAPILinesFinders.l_gfa1[4:7], TestAPILinesFinders.gfa1.select({"from":TestAPILinesFinders.l_gfa1[0]}))
+ # placeholder is equal to any value
+ self.assertEqual(set(TestAPILinesFinders.l_gfa1_a[0:3]),
+ set([str(x) for x in TestAPILinesFinders.gfa1.select({"sequence":"ACC"})]))
+
+ def test_select_by_line_gfa1(self):
+ for i in range(len(TestAPILinesFinders.l_gfa1)):
+ self.assertEqual(TestAPILinesFinders.l_gfa1[i:i+1],
+ TestAPILinesFinders.gfa1.select(TestAPILinesFinders.l_gfa1[i]))
+
+ def test_select_by_hash_gfa2(self):
+ # search segments
+ self.assertEqual(set(TestAPILinesFinders.l_gfa2_a[0:2]),
+ set([str(x) for x in TestAPILinesFinders.gfa2.select({"record_type":"S",
+ "sequence":"CGAT"})]))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[1:2], TestAPILinesFinders.gfa2.select({"record_type":"S",
+ "slen":110}))
+ # search edges
+ self.assertEqual(TestAPILinesFinders.l_gfa2[2:3], TestAPILinesFinders.gfa2.select({"record_type":"E",
+ "sid1":gfapy.OrientedLine("1","+")}))
+ # search gaps
+ self.assertEqual(TestAPILinesFinders.l_gfa2[3:4], TestAPILinesFinders.gfa2.select({"record_type":"G",
+ "sid1":gfapy.OrientedLine("1","-")}))
+ self.assertEqual(TestAPILinesFinders.l_gfa2[11:12], TestAPILinesFinders.gfa2.select({"record_type":"G",
+ "disp":2000}))
+ # search paths
+ self.assertEqual(TestAPILinesFinders.l_gfa2[4:5], TestAPILinesFinders.gfa2.select({"record_type":"O",
+ "items":"1+ 2-"}))
+ # search sets
+ self.assertEqual(TestAPILinesFinders.l_gfa2[5:6], TestAPILinesFinders.gfa2.select({"record_type":"U",
+ "name":"u1"}))
+ # search fragments
+ self.assertEqual(TestAPILinesFinders.l_gfa2[6:9], TestAPILinesFinders.gfa2.select({"record_type":"F",
+ "external":"read1-"}))
+ # search custom records
+ self.assertEqual(TestAPILinesFinders.l_gfa2[9:10], TestAPILinesFinders.gfa2.select({"record_type":"X",
+ "xx":"A"}))
+
+ def test_select_by_line_gfa2(self):
+ for i in range(len(TestAPILinesFinders.l_gfa2)):
+ self.assertEqual(TestAPILinesFinders.l_gfa2[i:i+1],
+ TestAPILinesFinders.gfa2.select(TestAPILinesFinders.l_gfa2[i]))
+
diff --git a/tests/test_api_multiplication.py b/tests/test_api_multiplication.py
new file mode 100644
index 0000000..7401777
--- /dev/null
+++ b/tests/test_api_multiplication.py
@@ -0,0 +1,198 @@
+import gfapy
+import unittest
+
+class TestAPIMultiplication(unittest.TestCase):
+
+ def test_multiply_segment(self):
+ gfa = gfapy.Gfa()
+ gfa.append("H\tVN:Z:1.0")
+ s = {"S\t0\t*\tRC:i:600",
+ "S\t1\t*\tRC:i:6000",
+ "S\t2\t*\tRC:i:60000"}
+ l = "L\t1\t+\t2\t+\t12M"
+ c = "C\t1\t+\t0\t+\t12\t12M"
+ p = "P\t3\t2+,0-\t12M"
+ for line in (list(s) + [l,c,p]): gfa.append(line)
+ self.assertEqual(s, {str(x) for x in gfa.segments})
+ self.assertEqual([l], [str(x) for x in gfa.dovetails if not x.virtual])
+ self.assertEqual([c], [str(x) for x in gfa.containments])
+ self.assertEqual([l], [str(x) for x in gfa.segment("1").end_relations("R", ["2", "L"])])
+ self.assertEqual([c], [str(x) for x in gfa.segment("1").relations_to("0")])
+ self.assertEqual(6000, gfa.segment("1").RC)
+ gfa.multiply("1", 2)
+ self.assertEqual([l], [str(x) for x in gfa.segment("1").end_relations("R", ["2", "L"])])
+ self.assertEqual([c], [str(x) for x in gfa.segment("1").relations_to("0")])
+ self.assertNotEqual([], [str(x) for x in gfa.segment("1*2").end_relations("R", ["2", "L"])])
+ self.assertNotEqual([], [str(x) for x in gfa.segment("1*2").relations_to("0")])
+ self.assertEqual(3000, gfa.segment("1").RC)
+ self.assertEqual(3000, gfa.segment("1*2").RC)
+ gfa.multiply("1*2", 3, copy_names=["6","7"])
+ self.assertEqual([l], [str(x) for x in gfa.segment("1").end_relations("R", ["2", "L"])])
+ self.assertNotEqual([], [str(x) for x in gfa.segment("1*2").end_relations("R", ["2", "L"])])
+ self.assertNotEqual([], [str(x) for x in gfa.segment("6").end_relations("R", ["2", "L"])])
+ self.assertNotEqual([], [str(x) for x in gfa.segment("7").end_relations("R", ["2", "L"])])
+ self.assertNotEqual([], gfa.segment("1*2").relations_to("0"))
+ self.assertNotEqual([], gfa.segment("6").relations_to("0"))
+ self.assertNotEqual([], gfa.segment("7").relations_to("0"))
+ self.assertEqual(3000, gfa.segment("1").RC)
+ self.assertEqual(1000, gfa.segment("1*2").RC)
+ self.assertEqual(1000, gfa.segment("6").RC)
+ self.assertEqual(1000, gfa.segment("7").RC)
+
+ def test_multiply_segment_copy_names(self):
+ gfa = gfapy.Gfa(["H\tVN:Z:1.0",
+ "S\t1\t*\tRC:i:600",
+ "S\t1b\t*\tRC:i:6000",
+ "S\t2\t*\tRC:i:60000",
+ "S\t3\t*\tRC:i:60000"])
+ gfa.multiply("2", 2)
+ gfa.try_get_segment("2*2") # nothing raised
+ gfa.multiply("2*2", 2)
+ gfa.try_get_segment("2*3") # nothing raised
+ gfa.multiply("2*2", 2, copy_names = ["x"])
+ gfa.try_get_segment("x") # nothing raised
+
+ def test_links_distribution_l1_m2(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l1.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l1.m2.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True)
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_links_distribution_l2_m2(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.m2.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True)
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_no_links_distribution_l2_m2(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.m2.no_ld.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True, distribute="off")
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_links_distribution_l2_m3(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.m3.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 3, extended=True)
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_no_links_distribution_l2_m3(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.m3.no_ld.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 3, extended=True, distribute="off")
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_links_distribution_l3_m2(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l3.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l3.m2.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True)
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_no_links_distribution_l3_m2(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l3.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l3.m2.no_ld.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True, distribute="off")
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_muliply_without_rgfatools(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l3.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l3.m2.no_ld.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2)
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_distribution_policy_equal_with_equal(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.m2.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True, distribute="equal")
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_distribution_policy_equal_with_not_equal(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l3.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l3.m2.no_ld.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True, distribute="equal")
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_distribution_policy_L(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.m2.no_ld.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True, distribute="L")
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
+ def test_distribution_policy_R(self):
+ for sfx in ["gfa", "gfa2"]:
+ g1 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.{}".format(sfx))
+ g2 = gfapy.Gfa.from_file("tests/testdata/links_distri.l2.m2.{}".format(sfx))
+ self.assertNotEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertNotEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+ g1.multiply("1", 2, extended=True, distribute="R")
+ self.assertEqual(set(g2.segment_names),set(g1.segment_names))
+ self.assertEqual(set([str(x) for x in g2.dovetails]),
+ set([str(x) for x in g1.dovetails]))
+
diff --git a/tests/test_api_placeholders.py b/tests/test_api_placeholders.py
new file mode 100644
index 0000000..c0e8432
--- /dev/null
+++ b/tests/test_api_placeholders.py
@@ -0,0 +1,26 @@
+import unittest
+from gfapy import *
+
+class TestApiPlaceholders(unittest.TestCase):
+
+ def test_str(self):
+ self.assertEqual("*", str(Placeholder()))
+
+ def test_is_placeholder(self):
+ self.assertTrue(is_placeholder(Placeholder()))
+ self.assertTrue(is_placeholder("*"))
+ self.assertTrue(is_placeholder([]))
+ self.assertFalse(is_placeholder("a"))
+ self.assertFalse(is_placeholder("**"))
+ self.assertFalse(is_placeholder(1))
+ self.assertFalse(is_placeholder(1.0))
+ self.assertFalse(is_placeholder(["x"]))
+
+ def test_compatibility_methods(self):
+ p = Placeholder()
+ self.assertTrue(p.is_empty())
+ self.assertTrue(is_placeholder(p))
+ self.assertEqual(0, len(p))
+ self.assertTrue(is_placeholder(p.rc()))
+ self.assertTrue(is_placeholder(p + 1))
+ self.assertTrue(is_placeholder(p[0]))
diff --git a/tests/test_api_positionals.py b/tests/test_api_positionals.py
new file mode 100644
index 0000000..c3e20c6
--- /dev/null
+++ b/tests/test_api_positionals.py
@@ -0,0 +1,249 @@
+import gfapy
+import unittest
+
+class TestAPIPositionals(unittest.TestCase):
+
+ s = {
+ "S1": "S\t1\t*",
+ "L": "L\t1\t+\t2\t+\t*",
+ "C": "C\t1\t+\t2\t+\t10\t*",
+ "P": "P\tx\t1+,2+\t*",
+ "S2": "S\t2\t100\t*",
+ "E": "E\t*\t1+\t2+\t10\t20\t30\t40\t*",
+ "F": "F\t1\t5+\t11\t21\t31\t41\t*",
+ "G": "G\t*\t1+\t2+\t1000\t1",
+ "U": "U\t*\t1 2 3",
+ "O": "O\t*\t1+ 2+ 3+",
+ }
+ f = {k:v.split("\t") for k, v in s.items()}
+ l = {k:gfapy.Line(v) for k, v in s.items()}
+
+ fieldnames = {
+ "S1":["name", "sequence"],
+ "L" :["from_segment", "from_orient", "to_segment", "to_orient", "overlap"],
+ "C" :["from_segment", "from_orient", "to_segment", "to_orient", "pos", "overlap"],
+ "P" :["path_name", "segment_names", "overlaps"],
+ "S2":["sid", "slen", "sequence"],
+ "E" :["eid", "sid1", "sid2", "beg1", "end1", "beg2", "end2", "alignment"],
+ "F" :["sid", "external", "s_beg", "s_end", "f_beg", "f_end", "alignment"],
+ "G" :["gid", "sid1", "sid2", "disp", "var"],
+ "U" :["pid", "items"],
+ "O" :["pid", "items"],
+ }
+
+ # alternative values to set tests
+ v1 = {
+ "S1":{"name":"sx", "sequence":"accg"},
+ "L":{"from_segment":"a1", "from_orient":"-", "to_segment":"a2", "to_orient":"-",
+ "overlap": gfapy.Alignment("12M")},
+ "C":{"from_segment":"cx", "from_orient":"-", "to_segment":"cy", "to_orient":"-",
+ "pos":123, "overlap": gfapy.Alignment("120M")},
+ "P":{"path_name":"px", "segment_names":[gfapy.OrientedLine("x","+"), gfapy.OrientedLine("y","-")],
+ "overlaps":[gfapy.Alignment("10M")]},
+ "S2":{"sid":"s2s", "slen":999, "sequence":"gggg"},
+ "E" :{"eid":"e2e", "sid1":gfapy.OrientedLine("s2s","-"),
+ "sid2":gfapy.OrientedLine("t2t","-"),
+ "beg1":0, "end1":gfapy.LastPos("100$"),
+ "beg2":10, "end2":gfapy.LastPos("110$"),
+ "alignment":gfapy.Alignment("10M1I10M1D80M")},
+ "F" :{"sid":"s2s", "external":gfapy.OrientedLine("ex2ex","-"),
+ "s_beg":0, "s_end":gfapy.LastPos("100$"),
+ "f_beg":10, "f_end":gfapy.LastPos("110$"),
+ "alignment":gfapy.Alignment("10M1I10M1D80M")},
+ "G" :{"gid":"g2g", "sid1":gfapy.OrientedLine("s2s","+"), "sid2":gfapy.OrientedLine("t2t","-"),
+ "disp":2000, "var":100},
+ "O" :{"pid":"O100", "items":[gfapy.OrientedLine("x1","+"),
+ gfapy.OrientedLine("x2","+"),
+ gfapy.OrientedLine("x3","-")]},
+ "U" :{"pid":"U100", "items":["x1", "x2", "x3"]},
+ }
+ v2 = {
+ "S1":{"name":"xs", "sequence":"aggc"},
+ "L":{"from_segment":"a5", "from_orient":"+", "to_segment":"a7", "to_orient":"+",
+ "overlap":gfapy.Alignment("9M3I3M")},
+ "C":{"from_segment":"cp", "from_orient":"+", "to_segment":"cl", "to_orient":"+",
+ "pos":213, "overlap":gfapy.Alignment("110M4D10M")},
+ "P":{"path_name":"pu", "segment_names":[gfapy.OrientedLine("k","-"),
+ gfapy.OrientedLine("l","+")], "overlaps":[gfapy.Alignment("11M")]},
+ "S2":{"sid":"s4s", "slen":1999, "sequence":"aaaa"},
+ "E" :{"eid":"e4e", "sid1":gfapy.OrientedLine("s4s","+"),
+ "sid2":gfapy.OrientedLine("t4t","+"),
+ "beg1":10, "end1":gfapy.LastPos("110$"),
+ "beg2":0, "end2":gfapy.LastPos("100$"),
+ "alignment":gfapy.Alignment("10M1I20M1D80M")},
+ "F" :{"sid":"s4s", "external":gfapy.OrientedLine("ex4ex", "+"),
+ "s_beg":10, "s_end":gfapy.LastPos("110$"),
+ "f_beg":0, "f_end":gfapy.LastPos("100$"),
+ "alignment":gfapy.Alignment("10M1I20M1D80M")},
+ "G" :{"gid":"g4g", "sid1":gfapy.OrientedLine("s4s","-"), "sid2":gfapy.OrientedLine("t4t","+"),
+ "disp":3000, "var":200},
+ "O" :{"pid":"O200", "items":[gfapy.OrientedLine("x7","-"),
+ gfapy.OrientedLine("x6","+"),
+ gfapy.OrientedLine("x3","+")]},
+ "U" :{"pid":"U200", "items":["x6", "x7", "x4"]},
+ }
+ aliases = {
+ "S1":{"name":"sid"}, "P":{"path_name":"name"},
+ "S2":{"sid":"name"}, "E":{"eid":"name"}, "G":{"gid":"name"},
+ "U":{"pid":"name"}, "O":{"pid":"name"},
+ "L":{"from_segment": "from", "to_segment": "to"},
+ "C":{"from_segment":"container", "from_orient":"container_orient",
+ "to_segment":"contained", "to_orient":"contained_orient"}
+ }
+
+ def test_number_of_positionals(self):
+ for rt, fields in TestAPIPositionals.f.items():
+ gfapy.Line(fields) # nothing raised
+ too_less = fields.copy(); too_less.pop()
+ with self.assertRaises(gfapy.FormatError): gfapy.Line(too_less)
+ too_many = fields.copy(); too_many.append("*")
+ with self.assertRaises(gfapy.FormatError): gfapy.Line(too_many)
+
+ def test_positional_fieldnames(self):
+ for rt, line in TestAPIPositionals.l.items():
+ self.assertEqual(TestAPIPositionals.fieldnames[rt], line.positional_fieldnames)
+
+ def test_field_getters_and_setters(self):
+ for rt, fn_list in TestAPIPositionals.fieldnames.items():
+ for i, fn in enumerate(fn_list):
+ i+=1 # skip record_type
+ # field_to_s()
+ self.assertEqual(TestAPIPositionals.f[rt][i], TestAPIPositionals.l[rt].field_to_s(fn))
+ # validate_field/validate
+ TestAPIPositionals.l[rt].validate_field(fn) # nothing raised
+ TestAPIPositionals.l[rt].validate # nothing raised
+ # fieldname() == get(fieldname)
+ self.assertEqual(getattr(TestAPIPositionals.l[rt], fn), TestAPIPositionals.l[rt].get(fn))
+ # fieldname=() and fieldname()
+ l = TestAPIPositionals.l[rt].clone()
+ setattr(l,fn,TestAPIPositionals.v1[rt][fn])
+ self.assertEqual(TestAPIPositionals.v1[rt][fn], getattr(l, fn))
+ # set() and get()
+ l.set(fn, TestAPIPositionals.v2[rt][fn])
+ self.assertEqual(TestAPIPositionals.v2[rt][fn], l.get(fn))
+
+ def test_aliases(self):
+ for rt, aliasmap in TestAPIPositionals.aliases.items():
+ for orig, al in aliasmap.items():
+ # get(orig) == get(alias)
+ self.assertEqual(getattr(TestAPIPositionals.l[rt], orig), getattr(TestAPIPositionals.l[rt],al))
+ self.assertEqual(TestAPIPositionals.l[rt].get(orig), TestAPIPositionals.l[rt].get(al))
+ # validate_field/validate
+ TestAPIPositionals.l[rt].validate_field(al) # nothing raised
+ TestAPIPositionals.l[rt].validate # nothing raised
+ # field_to_s(orig) == field_to_s(alias)
+ self.assertEqual(TestAPIPositionals.l[rt].field_to_s(orig), TestAPIPositionals.l[rt].field_to_s(al))
+ # set(al, value) + get(orig)
+ l = TestAPIPositionals.l[rt].clone()
+ self.assertNotEqual(TestAPIPositionals.v1[rt][orig], getattr(l,orig))
+ l.set(al, TestAPIPositionals.v1[rt][orig])
+ self.assertEqual(TestAPIPositionals.v1[rt][orig], getattr(l,orig))
+ # alias=value + orig()
+ self.assertNotEqual(TestAPIPositionals.v2[rt][orig], getattr(l,orig))
+ setattr(l, al, TestAPIPositionals.v2[rt][orig])
+ self.assertEqual(TestAPIPositionals.v2[rt][orig], getattr(l,orig))
+ # set(orig, value) + get(alias)
+ self.assertNotEqual(TestAPIPositionals.v1[rt][orig], getattr(l,al))
+ l.set(orig, TestAPIPositionals.v1[rt][orig])
+ self.assertEqual(TestAPIPositionals.v1[rt][orig], getattr(l,al))
+ # orig=value + alias()
+ self.assertNotEqual(TestAPIPositionals.v2[rt][orig], getattr(l,al))
+ setattr(l, orig, TestAPIPositionals.v2[rt][orig])
+ self.assertEqual(TestAPIPositionals.v2[rt][orig], getattr(l,al))
+
+ def test_array_fields(self):
+ assert(isinstance(TestAPIPositionals.l["P"].segment_names, list))
+ assert(isinstance(TestAPIPositionals.l["P"].segment_names[0], gfapy.OrientedLine))
+ assert(isinstance(TestAPIPositionals.l["P"].overlaps, list))
+ assert(isinstance(TestAPIPositionals.l["P"].overlaps[0], gfapy.AlignmentPlaceholder))
+ assert(isinstance(TestAPIPositionals.l["O"].items, list))
+ assert(isinstance(TestAPIPositionals.l["O"].items[0], gfapy.OrientedLine))
+ assert(isinstance(TestAPIPositionals.l["U"].items, list))
+ assert(isinstance(TestAPIPositionals.l["U"].items[0], str))
+
+ def test_orientation(self):
+ # orientation is symbol
+ self.assertEqual("+", TestAPIPositionals.l["L"].from_orient)
+ self.assertEqual("+", TestAPIPositionals.l["L"].to_orient)
+ # invert
+ self.assertEqual("-", gfapy.invert(TestAPIPositionals.l["L"].to_orient))
+ self.assertEqual("+", gfapy.invert("-"))
+ self.assertEqual("-", gfapy.invert("+"))
+ # string representation
+ self.assertEqual("+", TestAPIPositionals.l["L"].field_to_s("from_orient"))
+ # assigning the string representation
+ l = TestAPIPositionals.l["L"].clone()
+ l.from_orient = "+"
+ self.assertEqual("+", l.from_orient)
+ self.assertEqual("-", gfapy.invert(l.from_orient))
+ # non "+"/"-" symbols is an error
+ with self.assertRaises(gfapy.FormatError):
+ l.from_orient = "x"
+ l.validate()
+ # only "+"/"-" and their string representations are accepted
+ with self.assertRaises(gfapy.FormatError):
+ l.from_orient = "x"
+ l.validate()
+ with self.assertRaises(gfapy.FormatError):
+ l.from_orient = 1
+ l.validate()
+
+ def test_oriented_segment(self):
+ os = TestAPIPositionals.l["P"].segment_names[0]
+ # getter methods
+ self.assertEqual("1", os.line)
+ self.assertEqual("+", os.orient)
+ # invert
+ self.assertEqual("1", os.inverted().line)
+ self.assertEqual("-", os.inverted().orient)
+ self.assertEqual("-", gfapy.invert(os.orient))
+ # setter methods
+ os.line = "one"
+ os.orient = "-"
+ self.assertEqual("one", os.line)
+ self.assertEqual("-", os.orient)
+ # name
+ self.assertEqual("one", os.name)
+ os.line = TestAPIPositionals.l["S1"]
+ self.assertEqual(TestAPIPositionals.l["S1"], os.line)
+ self.assertEqual(TestAPIPositionals.l["S1"].name, os.name)
+
+ def test_sequence(self):
+ # placeholder
+ assert(gfapy.is_placeholder(TestAPIPositionals.l["S1"].sequence))
+ assert(gfapy.is_placeholder(TestAPIPositionals.l["S2"].sequence))
+ s = TestAPIPositionals.l["S1"].clone()
+ s.sequence = "ACCT"
+ assert(not gfapy.is_placeholder(s.sequence))
+ # sequence is string
+ self.assertEqual("ACCT", s.sequence)
+ # rc
+ self.assertEqual("AGGT", gfapy.sequence.rc(s.sequence))
+ # GFA2 allowed alphabet is larger than GFA1
+ s.validate # nothing raised
+ s.sequence = ";;;{}"
+ with self.assertRaises(gfapy.FormatError): s.validate()
+ s = TestAPIPositionals.l["S2"].clone()
+ s.sequence = ";;;{}"
+ s.validate() # nothing raised
+ # Sequence
+ assert(isinstance(gfapy.sequence.Sequence("*"),gfapy.Placeholder))
+ assert(isinstance(gfapy.sequence.Sequence("ACG"),str))
+
+ def test_sequence_rc(self):
+ self.assertEqual("gcatcgatcgt",gfapy.sequence.rc("acgatcgatgc"))
+ # case
+ self.assertEqual("gCaTCgatcgt",gfapy.sequence.rc("acgatcGAtGc"))
+ # wildcards
+ self.assertEqual("gcatcnatcgt",gfapy.sequence.rc("acgatngatgc"))
+ self.assertEqual("gcatcYatcgt",gfapy.sequence.rc("acgatRgatgc"))
+ # RNA
+ self.assertEqual("gcaucgaucgu",gfapy.sequence.rc("acgaucgaugc",rna=True))
+ self.assertEqual("===.",gfapy.sequence.rc(".==="))
+ # valid
+ with self.assertRaises(gfapy.ValueError): gfapy.sequence.rc("acgatXgatgc")
+ gfapy.sequence.rc("acgatXgatgc",valid=True) # nothing raised
+ # placeholder
+ self.assertEqual("*",gfapy.sequence.rc("*"))
+ with self.assertRaises(gfapy.ValueError): gfapy.sequence.rc("**")
+
diff --git a/tests/test_api_positions.py b/tests/test_api_positions.py
new file mode 100644
index 0000000..7508fc3
--- /dev/null
+++ b/tests/test_api_positions.py
@@ -0,0 +1,60 @@
+import gfapy
+import unittest
+
+class TestAPIPositions(unittest.TestCase):
+
+ def test_positions(self):
+ # from string and integer
+ pos1 = gfapy.LastPos(12); pos2 = gfapy.LastPos("12$")
+ self.assertEqual(pos1, pos2)
+ assert(isinstance(pos1, gfapy.LastPos))
+ assert(isinstance(pos2, gfapy.LastPos))
+ # value
+ self.assertEqual(12, gfapy.posvalue(pos1))
+ self.assertEqual(12, gfapy.posvalue(pos2))
+ self.assertEqual(12, gfapy.posvalue(12))
+ # to_pos on string without dollar
+ self.assertEqual(12, gfapy.LastPos("12"))
+ assert(isinstance(gfapy.LastPos("12"), int))
+ # to pos: wrong format
+ with self.assertRaises(gfapy.FormatError): gfapy.LastPos("12=")
+ # 0$ is allowed, although unclear if useful
+ assert(gfapy.islastpos(gfapy.LastPos("0$")))
+ # comparison with integer and string
+ self.assertEqual(gfapy.LastPos(10), 10)
+ self.assertEqual(10, gfapy.LastPos(10))
+ # to_s
+ self.assertEqual("12$", str(pos1))
+ # to_i
+ self.assertEqual(12, int(pos1))
+
+ def test_positions_negative(self):
+ # negative values
+ with self.assertRaises (gfapy.ValueError): gfapy.LastPos("-1")
+ with self.assertRaises (gfapy.ValueError): gfapy.LastPos("-1$")
+ # negative values, valid: True
+ self.assertEqual(-1, gfapy.LastPos("-1",valid=True))
+ assert(isinstance(gfapy.LastPos("-1",valid=True), int))
+ self.assertEqual(gfapy.LastPos(-1, valid=True), gfapy.LastPos("-1$",valid=True))
+ self.assertEqual(gfapy.LastPos(-1, valid=True), gfapy.LastPos(-1,valid=True))
+ # validate
+ with self.assertRaises (gfapy.ValueError): gfapy.LastPos("-1$",valid=True).validate()
+ with self.assertRaises (gfapy.ValueError): gfapy.LastPos(-1,valid=True).validate()
+
+ def test_positions_first_last(self):
+ assert(not gfapy.islastpos(gfapy.LastPos("0")))
+ assert(not gfapy.islastpos(gfapy.LastPos("12")))
+ assert(gfapy.islastpos(gfapy.LastPos("12$")))
+ assert(gfapy.isfirstpos(gfapy.LastPos("0")))
+ assert(not gfapy.isfirstpos(gfapy.LastPos("12")))
+ assert(not gfapy.isfirstpos(gfapy.LastPos("12$")))
+
+ def test_positions_subtract(self):
+ a = gfapy.LastPos("13$")
+ a1 = a - 0
+ a2 = a - 1
+ self.assertEqual(13, a1)
+ self.assertEqual(12, a2)
+ assert(gfapy.islastpos(a1))
+ assert(not gfapy.islastpos(a2))
+
diff --git a/tests/test_api_references_edge_gfa1.py b/tests/test_api_references_edge_gfa1.py
new file mode 100644
index 0000000..00b7d35
--- /dev/null
+++ b/tests/test_api_references_edge_gfa1.py
@@ -0,0 +1,125 @@
+import gfapy
+import unittest
+
+class TestAPIReferencesEdgesGFA1(unittest.TestCase):
+
+ def test_links_references(self):
+ g = gfapy.Gfa()
+ lab = gfapy.Line("L\ta\t+\tb\t+\t*")
+ self.assertEqual("a", lab.from_segment)
+ self.assertEqual("b", lab.to_segment)
+ g.append(lab)
+ sa = gfapy.Line("S\ta\t*")
+ g.append(sa)
+ sb = gfapy.Line("S\tb\t*")
+ g.append(sb)
+ self.assertEqual(sa, lab.from_segment)
+ self.assertEqual(sb, lab.to_segment)
+ lab.disconnect()
+ self.assertEqual("a", lab.from_segment)
+ self.assertEqual("b", lab.to_segment)
+ # disconnection of segment cascades on links
+ g.append(lab)
+ assert(lab.is_connected())
+ self.assertEqual(sa, lab.from_segment)
+ sa.disconnect()
+ assert(not lab.is_connected())
+ self.assertEqual("a", lab.from_segment)
+
+ def test_links_backreferences(self):
+ g = gfapy.Gfa()
+ sa = gfapy.Line("S\ta\t*")
+ g.append(sa)
+ # links
+ s = {}; l = {}
+ for name in ["b", "c", "d", "e", "f", "g", "h", "i"]:
+ s[name] = gfapy.Line("S\t{}\t*".format(name))
+ g.append(s[name])
+ for name in \
+ ["a+b+", "a+c-", "a-d+", "a-e-", "f+a+", "g+a-", "h-a+", "i-a-"]:
+ l[name] = gfapy.Line("\t".join(list("L"+name+"*")))
+ g.append(l[name])
+ # dovetails_[LR]()
+ self.assertEqual([l["a+b+"], l["a+c-"],
+ l["g+a-"], l["i-a-"]], sa.dovetails_R)
+ self.assertEqual([l["a-d+"], l["a-e-"],
+ l["f+a+"], l["h-a+"]], sa.dovetails_L)
+ # dovetails()
+ self.assertEqual(sa.dovetails_R, sa.dovetails_of_end("R"))
+ self.assertEqual(sa.dovetails_L, sa.dovetails_of_end("L"))
+ self.assertEqual(sa.dovetails_L + sa.dovetails_R, sa.dovetails)
+ # neighbours
+ self.assertEqual(set(["b", "c", "d", "e", "f", "g", "h", "i"]),
+ set([x.name for x in sa.neighbours]))
+ # gfa2 specific collections are empty in gfa1
+ self.assertEqual([], sa.gaps)
+ self.assertEqual([], sa.fragments)
+ self.assertEqual([], sa.internals)
+ # upon disconnection
+ sa.disconnect()
+ self.assertEqual([], sa.dovetails_R)
+ self.assertEqual([], sa.dovetails_R)
+ self.assertEqual([], sa.dovetails_of_end("L"))
+ self.assertEqual([], sa.dovetails_of_end("R"))
+ self.assertEqual([], sa.dovetails)
+ self.assertEqual([], sa.neighbours)
+
+ def test_containments_references(self):
+ g = gfapy.Gfa()
+ cab = gfapy.Line("C\ta\t+\tb\t+\t10\t*")
+ self.assertEqual("a", cab.from_segment)
+ self.assertEqual("b", cab.to_segment)
+ sa = gfapy.Line("S\ta\t*")
+ g.append(sa)
+ sb = gfapy.Line("S\tb\t*")
+ g.append(sb)
+ g.append(cab)
+ self.assertEqual(sa, cab.from_segment)
+ self.assertEqual(sb, cab.to_segment)
+ cab.disconnect()
+ self.assertEqual("a", cab.from_segment)
+ self.assertEqual("b", cab.to_segment)
+ # disconnection of segment cascades on containments
+ g.append(cab)
+ assert(cab.is_connected())
+ self.assertEqual(sa, cab.from_segment)
+ sa.disconnect()
+ assert(not cab.is_connected())
+ self.assertEqual("a", cab.from_segment)
+
+ def test_containments_backreferences(self):
+ g = gfapy.Gfa()
+ sa = gfapy.Line("S\ta\t*")
+ g.append(sa)
+ # containments:
+ s = {}; c = {}
+ for name in ["b", "c", "d", "e", "f", "g", "h", "i"]:
+ s[name] = gfapy.Line("S\t"+"{}".format(name)+"\t*")
+ g.append(s[name])
+ for name in \
+ ["a+b+", "a+c-", "a-d+", "a-e-", "f+a+", "g+a-", "h-a+", "i-a-"]:
+ c[name] = gfapy.Line("\t".join(list("C{}9*".format(name))))
+ g.append(c[name])
+ # edges to contained/containers
+ self.assertEqual([c["a+b+"], c["a+c-"], c["a-d+"], c["a-e-"]],
+ sa.edges_to_contained)
+ self.assertEqual([c["f+a+"], c["g+a-"], c["h-a+"], c["i-a-"]],
+ sa.edges_to_containers)
+ # containments
+ self.assertEqual(sa.edges_to_contained + sa.edges_to_containers,
+ sa.containments)
+ # contained/containers
+ self.assertEqual([s["b"], s["c"], s["d"], s["e"]], sa.contained)
+ self.assertEqual([s["f"], s["g"], s["h"], s["i"]], sa.containers)
+ # gfa2 specific collections are empty in gfa1
+ self.assertEqual([], sa.gaps)
+ self.assertEqual([], sa.fragments)
+ self.assertEqual([], sa.internals)
+ # upon disconnection
+ sa.disconnect()
+ self.assertEqual([], sa.edges_to_contained)
+ self.assertEqual([], sa.edges_to_containers)
+ self.assertEqual([], sa.containments)
+ self.assertEqual([], sa.contained)
+ self.assertEqual([], sa.containers)
+
diff --git a/tests/test_api_references_edge_gfa2.py b/tests/test_api_references_edge_gfa2.py
new file mode 100644
index 0000000..c61c12f
--- /dev/null
+++ b/tests/test_api_references_edge_gfa2.py
@@ -0,0 +1,181 @@
+import gfapy
+import unittest
+
+class TestAPIReferencesEdgesGFA2(unittest.TestCase):
+
+ def test_edges_references(self):
+ g = gfapy.Gfa()
+ lab = gfapy.Line("E\t*\ta+\tb+\t0\t10\t90\t100$\t*")
+ self.assertEqual(gfapy.OrientedLine("a","+"), lab.sid1)
+ self.assertEqual(gfapy.OrientedLine("b","+"), lab.sid2)
+ sa = gfapy.Line("S\ta\t100\t*")
+ g.append(sa)
+ sb = gfapy.Line("S\tb\t100\t*")
+ g.append(sb)
+ g.append(lab)
+ self.assertEqual(sa, lab.sid1.line)
+ self.assertEqual(sb, lab.sid2.line)
+ lab.disconnect()
+ self.assertEqual("a", lab.sid1.line)
+ self.assertEqual("b", lab.sid2.line)
+ # disconnection of segment cascades on edges
+ g.append(lab)
+ assert(lab.is_connected())
+ self.assertEqual(sa, lab.sid1.line)
+ sa.disconnect()
+ assert(not lab.is_connected())
+ self.assertEqual("a", lab.sid1.line)
+
+ def test_edges_backreferences(self):
+ g = gfapy.Gfa()
+ sa = gfapy.Line("S\ta\t100\t*")
+ g.append(sa)
+ s = {}
+ for sbeg1, beg1 in {"0":0,"1":30,"2":70,"$":gfapy.LastPos("100$")}.items():
+ for send1, end1 in {"0":0,"1":30,"2":70,"$":gfapy.LastPos("100$")}.items():
+ if beg1 > end1:
+ continue
+ for sbeg2, beg2 in {"0":0,"1":30,"2":70,"$":gfapy.LastPos("100$")}.items():
+ for send2, end2 in {"0":0,"1":30,"2":70,"$":gfapy.LastPos("100$")}.items():
+ if beg2 > end2:
+ continue
+ for or1 in ["+","-"]:
+ for or2 in ["+","-"]:
+ eid = "<{}".format(or1)+"{}".format(or2)+"{}".format(sbeg1)+"{}".format(send1)+"{}".format(sbeg2)+"{}".format(send2)
+ other = "s{}".format(eid)
+ g.append("\t".join(["E",eid,"a{}".format(or1),"{}".format(other)+"{}".format(or2),
+ str(beg1),str(end1),str(beg2),str(end2),"*"]))
+ s[other] = gfapy.Line("S\t{}".format(other)+"\t100\t*")
+ g.append(s[other])
+ eid = ">{}".format(or1)+"{}".format(or2)+"{}".format(sbeg1)+"{}".format(send1)+"{}".format(sbeg2)+"{}".format(send2)
+ other = "s{}".format(eid)
+ g.append("\t".join(["E",eid,"{}".format(other)+"{}".format(or1),"a{}".format(or2),
+ str(beg1),str(end1),str(beg2),str(end2),"*"]))
+ s[other] = gfapy.Line("S\t{}".format(other)+"\t100\t*")
+ g.append(s[other])
+ exp_sa_d_L = []
+ exp_sa_d_R = []
+ exp_sa_e_cr = []
+ exp_sa_e_cd = []
+ exp_sa_i = []
+ # a from 0 to non-$, other from non-0 to $;
+ # same orientation;"d_L"
+ # opposite orientations;"internals"
+ for e_a in ["0","1","2"]:
+ for b_other in ["1","2","$"]:
+ for ors in ["++","--"]:
+ exp_sa_d_L.append("<{}".format(ors)+"0{}".format(e_a)+"{}".format(b_other)+"$")
+ exp_sa_d_L.append(">{}".format(ors)+"{}".format(b_other)+"$0{}".format(e_a))
+ for ors in ["+-","-+"]:
+ exp_sa_i.append("<{}".format(ors)+"0{}".format(e_a)+"{}".format(b_other)+"$")
+ exp_sa_i.append(">{}".format(ors)+"{}".format(b_other)+"$0{}".format(e_a))
+ # one from non-0 to non-$, other non-0 to non-$;"internals"
+ for pos_one in ["11","12","22"]:
+ for pos_other in ["11","12","22"]:
+ for ors in ["++","--","+-","-+"]:
+ for d in ["<",">"]:
+ exp_sa_i.append("{}".format(d)+"{}".format(ors)+"{}".format(pos_one)+"{}".format(pos_other))
+ # one from non-0 to non-$, other 0 to non-$;"internals"
+ for pos_one in ["11","12","22"]:
+ for pos_other in ["00","01","02"]:
+ for ors in ["++","--","+-","-+"]:
+ for d in ["<",">"]:
+ exp_sa_i.append("{}".format(d)+"{}".format(ors)+"{}".format(pos_one)+"{}".format(pos_other))
+ exp_sa_i.append("{}".format(d)+"{}".format(ors)+"{}".format(pos_other)+"{}".format(pos_one))
+ # one from non-0 to non-$, other non-0 to $;"internals"
+ for pos_one in ["11","12","22"]:
+ for pos_other in ["1$","2$","$$"]:
+ for ors in ["++","--","+-","-+"]:
+ for d in ["<",">"]:
+ exp_sa_i.append("{}".format(d)+"{}".format(ors)+"{}".format(pos_one)+"{}".format(pos_other))
+ exp_sa_i.append("{}".format(d)+"{}".format(ors)+"{}".format(pos_other)+"{}".format(pos_one))
+ # other from 0 to non-$, a from non-0 to $
+ # same orientation;"d_R"
+ # opposite orientations;"internals"
+ for e_other in ["0","1","2"]:
+ for b_a in ["1","2","$"]:
+ for ors in ["++","--"]:
+ exp_sa_d_R.append("<"+"{}".format(ors)+"{}".format(b_a)+"$0"+"{}".format(e_other))
+ exp_sa_d_R.append(">"+"{}".format(ors)+"0"+"{}".format(e_other)+"{}".format(b_a)+"$")
+ for ors in ["+-","-+"]:
+ exp_sa_i.append("<"+"{}".format(ors)+"{}".format(b_a)+"$0"+"{}".format(e_other))
+ exp_sa_i.append(">"+"{}".format(ors)+"0"+"{}".format(e_other)+"{}".format(b_a)+"$")
+ # both from 0 to non-$,
+ # opposite orientations;"d_L"
+ # same orientation;"internals"
+ for e1 in ["0","1","2"]:
+ for e2 in ["0","1","2"]:
+ pos = "0"+"{}".format(e1)+"0"+"{}".format(e2)
+ for ors in ["+-","-+"]:
+ for d in ["<",">"]:
+ exp_sa_d_L.append("{}".format(d)+"{}".format(ors)+"{}".format(pos))
+ for ors in ["++","--"]:
+ for d in ["<",">"]:
+ exp_sa_i.append("{}".format(d)+"{}".format(ors)+"{}".format(pos))
+ # both from non-0 to $,
+ # opposite orientations;"d_R"
+ # same orientation;"internals"
+ for e1 in ["1","2","$"]:
+ for e2 in ["1","2","$"]:
+ pos = "{}".format(e1)+"$"+"{}".format(e2)+"$"
+ for ors in ["+-","-+"]:
+ for d in ["<",">"]:
+ exp_sa_d_R.append("{}".format(d)+"{}".format(ors)+"{}".format(pos))
+ for ors in ["++","--"]:
+ for d in ["<",">"]:
+ exp_sa_i.append("{}".format(d)+"{}".format(ors)+"{}".format(pos))
+ # a whole; other non-whole:edges_to_containers
+ for pos_other in ["00","01","02","11","12","1$","22","2$","$$"]:
+ for ors in ["++","--","+-","-+"]:
+ exp_sa_e_cr.append("<{}".format(ors)+"0${}".format(pos_other))
+ exp_sa_e_cr.append(">{}".format(ors)+"{}".format(pos_other)+"0$")
+ # a not-whole; other whole:edges_to_contained
+ for pos_a in ["00","01","02","11","12","1$","22","2$","$$"]:
+ for ors in ["++","--","+-","-+"]:
+ exp_sa_e_cd.append("<{}".format(ors)+"{}".format(pos_a)+"0$")
+ exp_sa_e_cd.append(">{}".format(ors)+"0${}".format(pos_a))
+ # a sid1; both whole:edges_to_contained
+ for ors in ["++","--","+-","-+"]:
+ exp_sa_e_cd.append("<{}".format(ors)+"0$0$")
+ # a sid2; both whole:edges_to_containers
+ for ors in ["++","--","+-","-+"]:
+ exp_sa_e_cr.append(">{}".format(ors)+"0$0$")
+ # dovetails_[LR]
+ self.assertEqual(set(exp_sa_d_L), set([x.name for x in sa.dovetails_L]))
+ self.assertEqual(set(exp_sa_d_R), set([x.name for x in sa.dovetails_R]))
+ # dovetails()
+ self.assertEqual(sa.dovetails_L, sa.dovetails_of_end("L"))
+ self.assertEqual(sa.dovetails_R, sa.dovetails_of_end("R"))
+ self.assertEqual((sa.dovetails_L + sa.dovetails_R), sa.dovetails)
+ # neighbours
+ self.assertEqual(set(["s"+x for x in (exp_sa_d_L+exp_sa_d_R)]),
+ set([x.name for x in sa.neighbours]))
+ # edges_to_containers/contained
+ self.assertEqual(set(exp_sa_e_cr),
+ set([x.name for x in sa.edges_to_containers]))
+ self.assertEqual(set(exp_sa_e_cd),
+ set([x.name for x in sa.edges_to_contained]))
+ # containments
+ self.assertEqual(set(exp_sa_e_cr+exp_sa_e_cd),
+ set([x.name for x in sa.containments]))
+ # contained/containers
+ self.assertEqual(set(["s"+x for x in exp_sa_e_cr]),
+ set([x.name for x in sa.containers]))
+ self.assertEqual(set(["s"+x for x in exp_sa_e_cd]),
+ set([x.name for x in sa.contained]))
+ # internals
+ self.assertEqual(set(exp_sa_i), set([x.name for x in sa.internals]))
+ # upon disconnection
+ sa.disconnect()
+ self.assertEqual([], sa.dovetails_L)
+ self.assertEqual([], sa.dovetails_R)
+ self.assertEqual([], sa.dovetails_of_end("L"))
+ self.assertEqual([], sa.dovetails_of_end("R"))
+ self.assertEqual([], sa.neighbours)
+ self.assertEqual([], sa.edges_to_containers)
+ self.assertEqual([], sa.edges_to_contained)
+ self.assertEqual([], sa.containments)
+ self.assertEqual([], sa.contained)
+ self.assertEqual([], sa.containers)
+ self.assertEqual([], sa.internals)
+
diff --git a/tests/test_api_references_f_g_lines.py b/tests/test_api_references_f_g_lines.py
new file mode 100644
index 0000000..90875ec
--- /dev/null
+++ b/tests/test_api_references_f_g_lines.py
@@ -0,0 +1,96 @@
+import gfapy
+import unittest
+
+class TestAPIReferencesFGLines(unittest.TestCase):
+
+ def test_fragments_references(self):
+ g = gfapy.Gfa()
+ f = gfapy.Line("F\ta\tf+\t0\t200\t281\t502$\t*")
+ self.assertEqual("a", f.sid)
+ self.assertEqual(gfapy.OrientedLine("f","+"), f.external)
+ sa = gfapy.Line("S\ta\t100\t*")
+ g.append(sa)
+ g.append(f)
+ self.assertEqual(sa, f.sid)
+ f.disconnect()
+ self.assertEqual("a", f.sid)
+ # disconnection of segment cascades on fragments
+ g.append(f)
+ assert(f.is_connected())
+ self.assertEqual(sa, f.sid)
+ sa.disconnect()
+ assert(not f.is_connected())
+ self.assertEqual("a", f.sid)
+
+ def test_fragments_backreferences(self):
+ g = gfapy.Gfa()
+ f1 = gfapy.Line("F\ta\tf+\t0\t200\t281\t502$\t*")
+ f2 = gfapy.Line("F\ta\tf+\t240\t440$\t0\t210\t*")
+ sa = gfapy.Line("S\ta\t100\t*")
+ g.append(sa)
+ g.append(f1)
+ g.append(f2)
+ self.assertEqual([f1,f2], sa.fragments)
+ # disconnection effects
+ f1.disconnect()
+ self.assertEqual([f2], sa.fragments)
+ sa.disconnect()
+ self.assertEqual([], sa.fragments)
+
+ def test_gap_references(self):
+ g = gfapy.Gfa()
+ gap = gfapy.Line("G\t*\ta+\tb+\t90\t*")
+ self.assertEqual(gfapy.OrientedLine("a","+"), gap.sid1)
+ self.assertEqual(gfapy.OrientedLine("b","+"), gap.sid2)
+ sa = gfapy.Line("S\ta\t100\t*");
+ g.append(sa)
+ sb = gfapy.Line("S\tb\t100\t*");
+ g.append(sb)
+ g.append(gap)
+ self.assertEqual(sa, gap.sid1.line)
+ self.assertEqual(sb, gap.sid2.line)
+ gap.disconnect()
+ self.assertEqual("a", gap.sid1.line)
+ self.assertEqual("b", gap.sid2.line)
+ # disconnection of segment cascades on gaps
+ g.append(gap)
+ assert(gap.is_connected())
+ self.assertEqual(sa, gap.sid1.line)
+ sa.disconnect()
+ assert(not gap.is_connected())
+ self.assertEqual("a", gap.sid1.line)
+
+ def test_gaps_backreferences(self):
+ g = gfapy.Gfa()
+ sa = gfapy.Line("S\ta\t100\t*")
+ g.append(sa)
+ # gaps
+ s = {}
+ gap = {}
+ for name in ["b", "c", "d", "e", "f", "g", "h", "i"]:
+ s[name] = gfapy.Line("S\t{}\t100\t*".format(name))
+ g.append(s[name])
+ for name in \
+ ["a+b+", "a+c-", "a-d+", "a-e-", "f+a+", "g+a-", "h-a+", "i-a-"]:
+ gap[name] = gfapy.Line("\t".join(
+ ["G","*",name[0:2],name[2:4],"200","*"]))
+ g.append(gap[name])
+ # gaps_[LR]()
+ self.assertEqual([gap["a-d+"], gap["a-e-"], gap["f+a+"], gap["h-a+"]],
+ sa.gaps_L)
+ self.assertEqual([gap["a+b+"], gap["a+c-"], gap["g+a-"], gap["i-a-"]],
+ sa.gaps_R)
+ # gaps()
+ self.assertEqual(sa.gaps_L, sa.gaps_of_end("L"))
+ self.assertEqual(sa.gaps_R, sa.gaps_of_end("R"))
+ self.assertEqual(sa.gaps_L + sa.gaps_R, sa.gaps)
+ # disconnection effects
+ gap["a-d+"].disconnect()
+ self.assertEqual([gap["a-e-"], gap["f+a+"], gap["h-a+"]], sa.gaps_L)
+ sa.disconnect()
+ self.assertEqual([], sa.gaps_L)
+ self.assertEqual([], sa.gaps_R)
+ self.assertEqual([], sa.gaps_of_end("L"))
+ self.assertEqual([], sa.gaps_of_end("R"))
+ self.assertEqual([], sa.gaps)
+
diff --git a/tests/test_api_references_groups.py b/tests/test_api_references_groups.py
new file mode 100644
index 0000000..210deeb
--- /dev/null
+++ b/tests/test_api_references_groups.py
@@ -0,0 +1,259 @@
+import gfapy
+import unittest
+
+class TestAPIReferencesGroups(unittest.TestCase):
+
+ def test_paths_references(self):
+ g = gfapy.Gfa()
+ s = {}; l = {}
+ for name in ["a", "b", "c", "d", "e", "f"]:
+ s[name] = gfapy.Line("S\t{}\t*".format(name))
+ g.append(s[name])
+ path = gfapy.Line("P\tp1\tf+,a+,b+,c-,e+\t*")
+ self.assertEqual([gfapy.OrientedLine("f","+"), gfapy.OrientedLine("a","+"),
+ gfapy.OrientedLine("b","+"), gfapy.OrientedLine("c","-"),
+ gfapy.OrientedLine("e","+")], path.segment_names)
+ self.assertEqual([], path.links)
+ # connection
+ g.append(path)
+ # add links
+ for name in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"]:
+ l[name] = gfapy.Line("\t".join((list("L{}*".format(name)))))
+ g.append(l[name])
+ # segment_names
+ self.assertEqual([gfapy.OrientedLine(s["f"],"+"),
+ gfapy.OrientedLine(s["a"],"+"),
+ gfapy.OrientedLine(s["b"],"+"),
+ gfapy.OrientedLine(s["c"],"-"),
+ gfapy.OrientedLine(s["e"],"+")], path.segment_names)
+ # links
+ self.assertEqual([gfapy.OrientedLine(l["a-f-"],"-"),
+ gfapy.OrientedLine(l["a+b+"],"+"),
+ gfapy.OrientedLine(l["b+c-"],"+"),
+ gfapy.OrientedLine(l["e-c+"],"-")],
+ path.links)
+ # path disconnection
+ path.disconnect()
+ self.assertEqual([gfapy.OrientedLine("f","+"),
+ gfapy.OrientedLine("a","+"),
+ gfapy.OrientedLine("b","+"),
+ gfapy.OrientedLine("c","-"),
+ gfapy.OrientedLine("e","+")], path.segment_names)
+ self.assertEqual([], path.links)
+ g.append(path)
+ # links disconnection cascades on paths:
+ assert(path.is_connected())
+ l["a-f-"].disconnect()
+ assert(not path.is_connected())
+ self.assertEqual([gfapy.OrientedLine("f","+"),
+ gfapy.OrientedLine("a","+"),
+ gfapy.OrientedLine("b","+"),
+ gfapy.OrientedLine("c","-"),
+ gfapy.OrientedLine("e","+")], path.segment_names)
+ g.append(path)
+ g.append(l["a-f-"])
+ # segment disconnection cascades on links and then paths:
+ assert(path.is_connected())
+ s["a"].disconnect()
+ assert(not path.is_connected())
+ self.assertEqual([gfapy.OrientedLine("f","+"),
+ gfapy.OrientedLine("a","+"),
+ gfapy.OrientedLine("b","+"),
+ gfapy.OrientedLine("c","-"),
+ gfapy.OrientedLine("e","+")], path.segment_names)
+ self.assertEqual([], path.links)
+
+ def test_paths_backreferences(self):
+ g = gfapy.Gfa()
+ s = {}; l = {}
+ for name in ["a", "b", "c", "d", "e", "f"]:
+ s[name] = gfapy.Line("S\t{}\t*".format(name))
+ g.append(s[name])
+ path = gfapy.Line("P\tp1\tf+,a+,b+,c-,e+\t*")
+ g.append(path)
+ for sname in ["a", "b", "c", "e", "f"]:
+ self.assertEqual([path], s[sname].paths)
+ self.assertEqual([], s["d"].paths)
+ for name in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"]:
+ l[name] = gfapy.Line("\t".join(list("L{}*".format(name))))
+ g.append(l[name])
+ for lname in ["a+b+", "b+c-", "e-c+", "a-f-"]:
+ self.assertEqual([path], l[lname].paths)
+ self.assertEqual([], l["c-d+"].paths)
+ # disconnection effects
+ path.disconnect()
+ for lname in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"]:
+ self.assertEqual([], l[lname].paths)
+ for sname in ["a", "b", "c", "d", "e", "f"]:
+ self.assertEqual([], s[sname].paths)
+ # reconnection
+ path.connect(g)
+ for sname in ["a", "b", "c", "e", "f"]:
+ self.assertEqual([path], s[sname].paths)
+ self.assertEqual([], s["d"].paths)
+ for lname in ["a+b+", "b+c-", "e-c+", "a-f-"]:
+ self.assertEqual([path], l[lname].paths)
+ self.assertEqual([], l["c-d+"].paths)
+
+ def test_gfa2_paths_references(self):
+ g = gfapy.Gfa()
+ s = {}
+ for name in ["a", "b", "c", "d", "e", "f"]:
+ s[name] = gfapy.Line("S\t{}\t1000\t*".format(name))
+ g.append(s[name])
+ path1_part1 = gfapy.Line("O\tp1\tp2- b+")
+ path1_part2 = gfapy.Line("O\tp1\tc- e-c+-")
+ path1 = path1_part2
+ path2 = gfapy.Line("O\tp2\tf+ a+")
+ self.assertEqual([gfapy.OrientedLine("p2","-"),
+ gfapy.OrientedLine("b","+")], path1_part1.items)
+ self.assertEqual([gfapy.OrientedLine("c","-"),
+ gfapy.OrientedLine("e-c+","-")], path1_part2.items)
+ self.assertEqual([gfapy.OrientedLine("f","+"),
+ gfapy.OrientedLine("a","+")], path2.items)
+ with self.assertRaises(gfapy.RuntimeError): path1.captured_path
+ with self.assertRaises(gfapy.RuntimeError): path2.captured_path
+ # connection
+ g.append(path1_part1)
+ g.append(path1_part2)
+ g.append(path2)
+ # edges
+ e = {}
+ for name in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-", "f-b+"]:
+ coord1 = "900\t1000$" if (name[1] == "+") else "0\t100"
+ coord2 = "0\t100" if (name[3] == "+") else "900\t1000$"
+ e[name] = gfapy.Line("E\t{}\t{}\t{}\t{}\t{}\t100M".format(name,name[0:2],name[2:4],coord1,coord2))
+ g.append(e[name])
+ # items
+ self.assertEqual([gfapy.OrientedLine(path2,"-"),
+ gfapy.OrientedLine(s["b"],"+"),
+ gfapy.OrientedLine(s["c"],"-"),
+ gfapy.OrientedLine(e["e-c+"],"-")],
+ path1.items)
+ self.assertEqual([gfapy.OrientedLine(s["f"],"+"),
+ gfapy.OrientedLine(s["a"],"+")],
+ path2.items)
+ # induced set
+ self.assertEqual([gfapy.OrientedLine(s["f"],"+"),
+ gfapy.OrientedLine(e["a-f-"],"-"),
+ gfapy.OrientedLine(s["a"],"+")],
+ path2.captured_path)
+ self.assertEqual([gfapy.OrientedLine(s["a"],"-"),
+ gfapy.OrientedLine(e["a-f-"],"+"),
+ gfapy.OrientedLine(s["f"],"-"),
+ gfapy.OrientedLine(e["f-b+"],"+"),
+ gfapy.OrientedLine(s["b"],"+"),
+ gfapy.OrientedLine(e["b+c-"],"+"),
+ gfapy.OrientedLine(s["c"],"-"),
+ gfapy.OrientedLine(e["e-c+"],"-"),
+ gfapy.OrientedLine(s["e"],"+")],
+ path1.captured_path)
+ # backreferences
+ for line in [path2, s["b"], s["c"], e["e-c+"]]:
+ self.assertEqual([path1], line.paths)
+ for line in [s["f"], s["a"]]:
+ self.assertEqual([path2], line.paths)
+ # group disconnection
+ path1.disconnect()
+ self.assertEqual([gfapy.OrientedLine("p2","-"), gfapy.OrientedLine("b","+"), gfapy.OrientedLine("c","-"), gfapy.OrientedLine("e-c+","-")],
+ path1.items)
+ with self.assertRaises(gfapy.RuntimeError):
+ path1.captured_path
+ self.assertEqual([gfapy.OrientedLine(s["f"],"+"), gfapy.OrientedLine(s["a"],"+")], path2.items)
+ for line in [path2, s["b"], s["c"], e["e-c+"]]:
+ self.assertEqual([], line.paths)
+ # group reconnection
+ g.append(path1)
+ self.assertEqual([gfapy.OrientedLine(path2,"-"), gfapy.OrientedLine(s["b"],"+"), gfapy.OrientedLine(s["c"],"-"), gfapy.OrientedLine(e["e-c+"],"-")],
+ path1.items)
+ self.assertEqual([gfapy.OrientedLine(s["f"],"+"), gfapy.OrientedLine(s["a"],"+")], path2.items)
+ for line in [path2, s["b"], s["c"], e["e-c+"]]:
+ self.assertEqual([path1], line.paths)
+ # item disconnection cascades on group
+ assert(path1.is_connected())
+ assert(path2.is_connected())
+ e["e-c+"].disconnect()
+ assert(not path1.is_connected())
+ assert(path2.is_connected())
+ g.append(e["e-c+"])
+ g.append(path1)
+ # two-level disconnection cascade
+ assert(path1.is_connected())
+ assert(path2.is_connected())
+ s["f"].disconnect()
+ assert(not path2.is_connected())
+ assert(not path1.is_connected())
+
+ def test_sets_references(self):
+ g = gfapy.Gfa()
+ s = {}
+ set1 = gfapy.Line("U\tset1\tb set2 c e-c+")
+ set2 = gfapy.Line("U\tset2\tg c-d+ path1")
+ path1 = gfapy.Line("O\tpath1\tf+ a+")
+ self.assertEqual(["b", "set2", "c", "e-c+"], set1.items)
+ self.assertEqual(["g", "c-d+", "path1"], set2.items)
+ # induced set of non-connected cannot be computed
+ with self.assertRaises(gfapy.RuntimeError): set1.induced_set
+ with self.assertRaises(gfapy.RuntimeError): set2.induced_set
+ # connection
+ g.append(set1)
+ g.append(set2)
+ # induced set cannot be computed, as long as not all references are solved
+ with self.assertRaises(gfapy.RuntimeError): set1.induced_set
+ # connect items
+ g.append(path1)
+ for name in ["a", "b", "c", "d", "e", "f", "g"]:
+ s[name] = gfapy.Line("S\t"+"{}".format(name)+"\t1000\t*")
+ g.append(s[name])
+ e = {}
+ for name in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"]:
+ coord1 = "900\t1000$" if (name[1] == "+") else "0\t100"
+ coord2 = "0\t100" if (name[3] == "+") else "900\t1000$"
+ e[name] = gfapy.Line("E\t{}\t{}\t{}\t{}\t{}\t100M".format(name,name[0:2],name[2:4],coord1,coord2))
+ g.append(e[name])
+ # items
+ self.assertEqual([s["b"], set2, s["c"], e["e-c+"]], set1.items)
+ self.assertEqual([s["g"], e["c-d+"], path1], set2.items)
+ # induced set
+ self.assertEqual([gfapy.OrientedLine(s["f"],"+"), gfapy.OrientedLine(s["a"],"+")],
+ path1.captured_segments)
+ self.assertEqual(set([x.name for x in [s["g"], s["c"], s["d"], s["f"], s["a"]]]),
+ set([x.name for x in set2.induced_segments_set]))
+ self.assertEqual(set([x.name for x in [s["b"], s["g"], s["c"], s["d"], s["f"], s["a"], s["e"]]]),
+ set([x.name for x in set1.induced_segments_set]))
+ self.assertEqual(set([x.name for x in [e["c-d+"], e["a-f-"]]]),
+ set([x.name for x in set2.induced_edges_set]))
+ self.assertEqual([e["a+b+"],e["b+c-"],e["c-d+"],e["e-c+"],e["a-f-"]],
+ set1.induced_edges_set)
+ self.assertEqual(set([x.name for x in set1.induced_segments_set + set1.induced_edges_set]),
+ set([x.name for x in set1.induced_set]))
+ # backreferences
+ for line in [s["b"], set2, s["c"], e["e-c+"]]:
+ self.assertEqual([set1], line.sets)
+ for line in [s["g"], e["c-d+"], path1]:
+ self.assertEqual([set2], line.sets)
+ # group disconnection
+ set1.disconnect()
+ self.assertEqual(["b", "set2", "c", "e-c+"], set1.items)
+ for line in [s["b"], set2, s["c"], e["e-c+"]]:
+ self.assertEqual([], line.sets)
+ # group reconnection
+ g.append(set1)
+ self.assertEqual([s["b"], set2, s["c"], e["e-c+"]], set1.items)
+ for line in [s["b"], set2, s["c"], e["e-c+"]]:
+ self.assertEqual([set1], line.sets)
+ # item disconnection cascades on group
+ assert(set1.is_connected())
+ e["e-c+"].disconnect()
+ assert(not set1.is_connected())
+ g.append(e["e-c+"])
+ g.append(set1)
+ # multilevel disconnection cascade
+ assert(path1.is_connected())
+ assert(set2.is_connected())
+ assert(set1.is_connected())
+ s["f"].disconnect()
+ assert(not path1.is_connected())
+ assert(not set2.is_connected())
+ assert(not set1.is_connected())
+
diff --git a/tests/test_api_references_virtual.py b/tests/test_api_references_virtual.py
new file mode 100644
index 0000000..f66d458
--- /dev/null
+++ b/tests/test_api_references_virtual.py
@@ -0,0 +1,131 @@
+import gfapy
+import unittest
+
+class TestAPIReferencesVirtual(unittest.TestCase):
+
+ def test_edges_gaps_create_virtual_segments(self):
+ data = [
+ ["gfa1", {"lines":["L\ta\t+\tb\t-\t*", "C\ta\t-\tb\t+\t100\t*"],
+ "m1":"oriented_from", "m2":"oriented_to",
+ "sA":"S\ta\t*", "sB":"S\tb\t*",
+ "collection":"edges"}],
+ ["gfa2", {"lines":["E\t*\ta+\tb-\t0\t100\t900\t1000$\t*"],
+ "m1":"sid1", "m2":"sid2",
+ "sA":"S\ta\t1000\t*", "sB":"S\tb\t1000\t*",
+ "collection":"edges"}],
+ ["gfa2", {"lines":["G\t*\ta+\tb-\t1000\t100"],
+ "m1":"sid1", "m2":"sid2",
+ "sA":"S\ta\t1000\t*", "sB":"S\tb\t1000\t*",
+ "collection":"gaps"}]
+ ]
+ for v,values in data:
+ for linestr in values["lines"]:
+ g = gfapy.Gfa(version=v)
+ line = gfapy.Line(linestr)
+ g.append(line)
+ self.assertEqual(set(["a", "b"]), set([x.name for x in g.segments]))
+ for s in g.segments: assert(s.virtual)
+ sA = gfapy.Line(values["sA"])
+ g.append(sA)
+ self.assertEqual(set(["a", "b"]), set([x.name for x in g.segments]))
+ assert(not g.segment("a").virtual)
+ assert(g.segment("b").virtual)
+ self.assertEqual(sA, getattr(line,values["m1"]).line)
+ self.assertEqual(sA, g.segment("a"))
+ self.assertEqual([line], getattr(sA,values["collection"]))
+ sB = gfapy.Line(values["sB"])
+ g.append(sB)
+ self.assertEqual(set(["a", "b"]), set([x.name for x in g.segments]))
+ assert(not g.segment("b").virtual)
+ self.assertEqual(sB, getattr(line,values["m2"]).line)
+ self.assertEqual(sB, g.segment("b"))
+ self.assertEqual([line], getattr(sB,values["collection"]))
+
+ def test_fragments_create_virtual_segments(self):
+ g = gfapy.Gfa(version="gfa2")
+ fr = gfapy.Line("F\ta\tread10-\t0\t10\t990\t1000$\t*")
+ g.append(fr)
+ self.assertEqual(["a"], [x.name for x in g.segments])
+ assert(g.segment("a").virtual)
+ sA = gfapy.Line("S\ta\t1000\t*")
+ g.append(sA)
+ self.assertEqual(["a"], [x.name for x in g.segments])
+ assert(not g.segment("a").virtual)
+ self.assertEqual(sA, fr.sid)
+ self.assertEqual(sA, g.segment("a"))
+ self.assertEqual([fr], sA.fragments)
+
+ def test_paths_create_virtual_links(self):
+ g = gfapy.Gfa(version="gfa1")
+ path = gfapy.Line("P\tp1\tb+,ccc-,e+\t10M1I2M,15M")
+ g.append(path)
+ for i in path.segment_names: assert(i.line.virtual)
+ self.assertEqual(set(["b", "ccc", "e"]), set([x.name for x in g.segments]))
+ sB = gfapy.Line("S\tb\t*")
+ g.append(sB)
+ assert(not path.segment_names[0].line.virtual)
+ self.assertEqual(sB, path.segment_names[0].line)
+ self.assertEqual([path], sB.paths)
+ for i in path.links: assert(i.line.virtual)
+ l = gfapy.Line("L\tccc\t+\tb\t-\t2M1D10M")
+ g.append(l)
+ assert(not path.links[0].line.virtual)
+ self.assertEqual(l, path.links[0].line)
+ self.assertEqual([path], l.paths)
+ l = gfapy.Line("L\tccc\t-\te\t+\t15M")
+ g.append(l)
+ assert(not path.links[1].line.virtual)
+ self.assertEqual(l, path.links[1].line)
+ self.assertEqual([path], l.paths)
+
+ def test_ordered_groups_create_virtual_unknown_records(self):
+ g = gfapy.Gfa(version="gfa2")
+ path = gfapy.Line("O\tp1\tchildpath- b+ c- edge-")
+ g.append(path)
+ for i in path.items:
+ assert(i.line.virtual)
+ self.assertEqual("\n", i.line.record_type)
+ childpath = gfapy.Line("O\tchildpath\tf+ a+")
+ g.append(childpath)
+ assert(not path.items[0].line.virtual)
+ self.assertEqual(childpath, path.items[0].line)
+ self.assertEqual([path], childpath.paths)
+ sB = gfapy.Line("S\tb\t1000\t*")
+ g.append(sB)
+ assert(not path.items[1].line.virtual)
+ self.assertEqual(sB, path.items[1].line)
+ self.assertEqual([path], sB.paths)
+ edge = gfapy.Line("E\tedge\te-\tc+\t0\t100\t900\t1000$\t*")
+ g.append(edge)
+ assert(not path.items[-1].line.virtual)
+ self.assertEqual(edge, path.items[-1].line)
+ self.assertEqual([path], edge.paths)
+
+ def test_unordered_groups_create_virtual_unknown_records(self):
+ g = gfapy.Gfa(version="gfa2")
+ set = gfapy.Line("U\tset\tchildpath b childset edge")
+ g.append(set)
+ for i in set.items:
+ assert(i.virtual)
+ self.assertEqual("\n", i.record_type)
+ childpath = gfapy.Line("O\tchildpath\tf+ a+")
+ g.append(childpath)
+ assert(not set.items[0].virtual)
+ self.assertEqual(childpath, set.items[0])
+ self.assertEqual([set], childpath.sets)
+ sB = gfapy.Line("S\tb\t1000\t*")
+ g.append(sB)
+ assert(not set.items[1].virtual)
+ self.assertEqual(sB, set.items[1])
+ self.assertEqual([set], sB.sets)
+ childset = gfapy.Line("U\tchildset\tg edge2")
+ g.append(childset)
+ assert(not set.items[2].virtual)
+ self.assertEqual(childset, set.items[2])
+ self.assertEqual([set], childset.sets)
+ edge = gfapy.Line("E\tedge\te-\tc+\t0\t100\t900\t1000$\t*")
+ g.append(edge)
+ assert(not set.items[3].virtual)
+ self.assertEqual(edge, set.items[3])
+ self.assertEqual([set], edge.sets)
+
diff --git a/tests/test_api_rename_lines.py b/tests/test_api_rename_lines.py
new file mode 100644
index 0000000..44448cf
--- /dev/null
+++ b/tests/test_api_rename_lines.py
@@ -0,0 +1,23 @@
+import gfapy
+import unittest
+
+class TestAPIRenameLines(unittest.TestCase):
+
+ def test_rename(self):
+ gfa = gfapy.Gfa(["S\t0\t*", "S\t1\t*", "S\t2\t*",
+ "L\t0\t+\t2\t-\t12M", "C\t1\t+\t0\t+\t12\t12M", "P\t4\t2+,0-\t12M"])
+ gfa.segment("0").name = "X"
+ with self.assertRaises(gfapy.NotFoundError): gfa.try_get_segment("0")
+ self.assertEqual(set(["X", "1", "2"]), set(gfa.segment_names))
+ self.assertEqual("L\tX\t+\t2\t-\t12M", str(gfa.dovetails[0]))
+ self.assertEqual("C\t1\t+\tX\t+\t12\t12M", str(gfa.containments[0]))
+ self.assertEqual("P\t4\t2+,X-\t12M", str(gfa.paths[0]))
+ with self.assertRaises(gfapy.NotFoundError): gfa.try_get_segment("0").dovetails_of_end("R")
+ self.assertEqual("L\tX\t+\t2\t-\t12M", str(gfa.segment("X").dovetails_of_end("R")[0]))
+ self.assertEqual("C\t1\t+\tX\t+\t12\t12M",
+ str(gfa.try_get_segment("1").edges_to_contained[0]))
+ with self.assertRaises(gfapy.NotFoundError): gfa.try_get_segment("0").containers
+ self.assertEqual("C\t1\t+\tX\t+\t12\t12M",
+ str(gfa.try_get_segment("X").edges_to_containers[0]))
+ self.assertEqual("P\t4\t2+,X-\t12M", str(gfa.try_get_segment("X").paths[0]))
+
diff --git a/tests/test_api_tags.py b/tests/test_api_tags.py
new file mode 100644
index 0000000..b03c318
--- /dev/null
+++ b/tests/test_api_tags.py
@@ -0,0 +1,376 @@
+import unittest
+import gfapy
+
+class TestApiTags(unittest.TestCase):
+
+ def test_predefined_tags(self):
+ # correct type:
+ gfapy.line.Header(["H", "VN:Z:1"], vlevel=3) # nothing raised
+ # custom tags with the same letters as predefined tags but lower case
+ gfapy.line.Header(["H", "vn:i:1"], vlevel=3) # nothing raised
+ # wrong type
+ gfapy.line.Header(["H", "VN:i:1"], vlevel=0) # nothing raised
+ for level in [1,2,3]:
+ self.assertRaises(gfapy.TypeError,
+ gfapy.line.Header, ["H", "VN:i:1"], vlevel=level)
+
+ def test_custom_tags(self):
+ for version in ["gfa1","gfa2"]:
+ # upper case
+ gfapy.line.Header(["H", "ZZ:Z:1"], version=version, vlevel=0) # nothing raised
+ gfapy.line.Header("H\tZZ:Z:1", version=version, vlevel=0) # nothing raised
+ gfapy.line.Header("H\tZZ:Z:1", version=version, vlevel=0) # nothing raised
+ gfapy.Gfa("H\tZZ:Z:1", version=version, vlevel=0) # nothing raised
+ for level in [1,2,3]:
+ self.assertRaises(gfapy.FormatError,
+ gfapy.line.Header,["H", "ZZ:Z:1"], version=version, vlevel=level)
+ self.assertRaises(gfapy.FormatError,
+ gfapy.Line, "H\tZZ:Z:1", version=version, vlevel=level)
+ self.assertRaises(gfapy.FormatError,
+ gfapy.Gfa, "H\tZZ:Z:1", version=version, vlevel=level)
+ # lower case
+ for level in [0,1,2,3]:
+ gfapy.line.Header(["H", "zz:Z:1"], version=version, vlevel=0) # nothing raised
+ gfapy.Line("H\tzz:Z:1", version=version, vlevel=0) # nothing raised
+ gfapy.Gfa("H\tzz:Z:1", version=version, vlevel=0) # nothing raised
+
+ def test_wrong_tag_format(self):
+ self.assertRaises(gfapy.FormatError, gfapy.line.Header, ["H", "VN i:1"])
+ self.assertRaises(gfapy.FormatError, gfapy.line.Header, ["H", "vna:i:1"])
+ self.assertRaises(gfapy.FormatError, gfapy.line.Header, ["H", "VN:ZZ:1"])
+ # the content can include :, so four : are e.g. not an error
+ self.assertEqual("1:1:1", gfapy.line.Header(["H", "VN:Z:1:1:1"]).VN)
+
+ def test_wrong_tag_data(self):
+ # validation level 0
+ # - some wrong data passes through
+ gfapy.line.Header(["H", "zz:B:i,1,1,A"], vlevel=0) # nothing raised
+ gfapy.line.Header(["H", "zz:Z:i,\t1,1,A"], vlevel=0) # nothing raised
+ # - some errors are catched
+ self.assertRaises(gfapy.FormatError, gfapy.line.Header, ["H", "zz:i:1A"], vlevel=0)
+ # level > 0, wrong data is catched
+ for level in [1,2,3]:
+ self.assertRaises(gfapy.ValueError,
+ gfapy.line.Header,["H", "zz:B:i,1,1,A"],vlevel=level)
+ self.assertRaises(gfapy.FormatError,
+ gfapy.line.Header,["H", "zz:i:1A"],vlevel=level)
+
+ def test_duplicate_tag(self):
+ for version in ["gfa1","gfa2"]:
+ gfapy.line.Header(["H", "zz:i:1", "VN:Z:1", "zz:i:2"],
+ version=version, vlevel=0) # nothing raised
+ gfapy.Line("H\tzz:i:1\tVN:Z:0\tzz:i:2",version=version,
+ vlevel=0) # nothing raised
+ gfapy.Line("H\tzz:i:1\tVN:Z:0\tzz:i:2",version=version,
+ vlevel=0) # nothing raised
+ for level in [1,2,3]:
+ self.assertRaises(gfapy.NotUniqueError, gfapy.line.Header,
+ ["H", "zz:i:1", "VN:Z:0", "zz:i:2"], version=version, vlevel=level)
+ self.assertRaises(gfapy.NotUniqueError, gfapy.Line,
+ "H\tzz:i:1\tVN:Z:0\tzz:i:2",version=version, vlevel=level)
+ self.assertRaises(gfapy.NotUniqueError, gfapy.Gfa,
+ "H\tzz:i:1\tVN:Z:#{version}\tzz:i:2", version=version, vlevel=level)
+
+ def test_validate_field(self):
+ l = gfapy.line.Header(["H", "zz:i:1", "VN:Z:1.0"], version="gfa1", vlevel=0)
+ l.zz = "x"
+ self.assertRaises(gfapy.FormatError, l.validate_field, "zz")
+ l.set_datatype("zz", "Z")
+ l.validate_field("zz") # nothing raised
+
+ def test_validate(self):
+ # wrong tag value
+ l = gfapy.line.Header(["H", "zz:i:1", "VN:Z:1.0"], version="gfa1", vlevel=0)
+ l.zz = "x"
+ self.assertRaises(gfapy.FormatError, l.validate)
+ # wrong predefined tag name
+ l = gfapy.line.Header(["H", "zz:i:1", "VZ:Z:1.0"], version="gfa1", vlevel=0)
+ self.assertRaises(gfapy.FormatError, l.validate)
+ # wrong predefined tag datatype
+ l = gfapy.line.Header(["H", "zz:i:1", "VN:i:1"], version="gfa1", vlevel=0)
+ self.assertRaises(gfapy.TypeError, l.validate)
+
+ # test tags for get/set tests:
+ # - KC -> predefined, set
+ # - RC -> predefined, not set;
+ # - XX -> custom, invalid (upper case)
+ # - xx -> custom set
+ # - zz -> custom not set
+
+ def test_get_tag_content(self):
+ for version in ["gfa1","gfa2"]:
+ for level in [0,1,2,3]:
+ l = gfapy.Line(["S", "12","*","xx:f:1.3","KC:i:10"], vlevel=level)
+ # tagnames
+ self.assertEqual(sorted(["xx", "KC"]), sorted(l.tagnames))
+ # test presence of tag
+ assert(l.KC)
+ assert(not l.RC)
+ with self.assertRaises(AttributeError): l.XX
+ assert(l.xx)
+ assert(not l.zz)
+ # get tag content, fieldname methods
+ self.assertEqual(10, l.KC)
+ self.assertEqual(None, l.RC)
+ with self.assertRaises(AttributeError): l.XX
+ self.assertEqual(1.3, l.xx)
+ self.assertEqual(None, l.zz)
+ # get tag content, get()
+ self.assertEqual(10, l.get("KC"))
+ self.assertEqual(None, l.get("RC"))
+ self.assertEqual(None, l.get("XX"))
+ self.assertEqual(1.3, l.get("xx"))
+ self.assertEqual(None, l.get("zz"))
+ # banged version, fieldname methods
+ self.assertEqual(10, l.try_get_KC())
+ self.assertRaises(gfapy.NotFoundError, l.try_get_RC)
+ with self.assertRaises(AttributeError): l.try_get_XX()
+ self.assertEqual(1.3, l.try_get_xx())
+ with self.assertRaises(gfapy.NotFoundError):
+ l.try_get_zz()
+ # banged version, get()
+ self.assertEqual(10, l.try_get("KC"))
+ self.assertRaises(gfapy.NotFoundError, l.try_get, "RC")
+ self.assertRaises(gfapy.NotFoundError, l.try_get, "XX")
+ self.assertEqual(1.3, l.try_get("xx"))
+ self.assertRaises(gfapy.NotFoundError, l.try_get, "zz")
+ # get tag datatype
+ self.assertEqual("i", l.get_datatype("KC"))
+ self.assertEqual("i", l.get_datatype("RC"))
+ self.assertEqual(None, l.get_datatype("XX"))
+ self.assertEqual("f", l.get_datatype("xx"))
+ self.assertEqual(None, l.get_datatype("zz"))
+ # as string: content only
+ self.assertEqual("10", l.field_to_s("KC"))
+ self.assertRaises(gfapy.NotFoundError, l.field_to_s, "RC")
+ self.assertRaises(gfapy.NotFoundError, l.field_to_s, "XX")
+ self.assertEqual("1.3", l.field_to_s("xx"))
+ self.assertRaises(gfapy.NotFoundError, l.field_to_s, "zz")
+ # as string: complete
+ self.assertEqual("KC:i:10", l.field_to_s("KC", tag=True))
+ self.assertEqual("xx:f:1.3", l.field_to_s("xx", tag=True))
+ ## # respond_to? normal version
+ ## assert(l.respond_to?("KC"))
+ ## assert(l.respond_to?("RC"))
+ ## assert(not l.respond_to?("XX"))
+ ## assert(l.respond_to?("xx"))
+ ## assert(l.respond_to?("zz"))
+ ## # respond_to? banged version
+ ## assert(l.respond_to?("KC"!))
+ ## assert(l.respond_to?("RC"!))
+ ## assert(not l.respond_to?("XX"!))
+ ## assert(l.respond_to?("xx"!))
+ ## assert(l.respond_to?("zz"!))
+
+ def test_set_tag_content(self):
+ for version in ["gfa1","gfa2"]:
+ for level in [0,1,2,3]:
+ l = gfapy.Line(["S", "12","*","xx:f:13","KC:i:10"], vlevel=level)
+ # set tag content, fieldname methods
+ l.KC = 12 # nothing raised; self.assertEqual(12, l.KC)
+ l.RC = 12 # nothing raised; self.assertEqual(12, l.RC)
+ l.xx = 1.2 # nothing raised; self.assertEqual(1.2, l.xx)
+ l.zz = 1.2 # nothing raised; self.assertEqual(1.2, l.zz)
+ # set tag content, set()
+ l.set("KC", 14) # nothing raised; self.assertEqual(14, l.KC)
+ l.set("RC", 14) # nothing raised; self.assertEqual(14, l.RC)
+ l.set("xx", 1.4) # nothing raised; self.assertEqual(1.4, l.xx)
+ l.set("zz", 1.4) # nothing raised; self.assertEqual(1.4, l.zz)
+ # check respond_to method
+ ### assert(l.has_attr("KC"))
+ ### assert(l.has_attr("RC"))
+ ### assert(not l.respond_to?("XX"=))
+ ### assert(l.respond_to?("xx"=))
+ ### assert(l.respond_to?("zz"=))
+ # set datatype for predefined field
+ self.assertRaises(gfapy.RuntimeError, l.set_datatype, "KC", "Z")
+ self.assertRaises(gfapy.RuntimeError, l.set_datatype, "RC","Z")
+ # set datatype for non-existing custom tag
+ l.set_datatype("zz", "i") # nothing raised
+ if level == 0:
+ l.set_datatype("XX", "Z") # nothing raised
+ elif level >= 1:
+ self.assertRaises(gfapy.FormatError, l.set_datatype, "XX", "Z")
+ # change datatype for existing custom tag
+ l.xx = 1.1 # nothing raised
+ l.xx = "1.1" # nothing raised
+ if level == 2:
+ l.xx = "1A" # nothing raised
+ with self.assertRaises(gfapy.Error):
+ str(l)
+ elif level == 3:
+ with self.assertRaises(gfapy.FormatError):
+ l.xx = "1A"
+ l.set_datatype("xx", "Z"); l.xx = "1A" # nothing raised
+ # unknown datatype
+ self.assertRaises(gfapy.ArgumentError, l.set_datatype, "xx", "P")
+
+ def test_delete_tag(self):
+ for version in ["gfa1","gfa2"]:
+ for level in [0,1,2,3]:
+ l = gfapy.Line(["S", "12","*","xx:f:13","KC:i:10"], vlevel=level)
+ # delete method
+ l.delete("KC") # nothing raised
+ self.assertEqual(None, l.KC)
+ self.assertEqual(["xx"], l.tagnames)
+ l.delete("RC") # nothing raised
+ l.delete("XX") # nothing raised
+ l.delete("xx") # nothing raised
+ self.assertEqual([], l.tagnames)
+ l.delete("zz") # nothing raised
+ l = gfapy.Line(["S", "12","*","xx:f:13","KC:i:10"], vlevel=level)
+ # set to None
+ l.set("KC",None) # nothing raised
+ self.assertEqual(None, l.KC)
+ self.assertEqual(["xx"], l.tagnames)
+ l.set("RC",None) # nothing raised
+ if level == 0:
+ l.set("XX",None) # nothing raised
+ else:
+ self.assertRaises(gfapy.FormatError,l.set,"XX",None)
+ l.set("xx",None) # nothing raised
+ self.assertEqual([], l.tagnames)
+ l.set("zz",None) # nothing raised
+
+ def test_datatype_to_python_objects(self):
+ l = gfapy.line.Header(["H", "a1:A:1", "z1:Z:hallo",
+ "b1:B:c,12,12", "b2:B:f,1E-2,3.0,3",
+ "h1:H:00A1",
+ "j1:J:[12,\"a\"]", "j2:J:{\"a\":1,\"b\":[2,3]}",
+ "f1:f:-1.23E-04", "i1:i:-123"])
+ self.assertEqual(str, l.a1.__class__)
+ self.assertEqual(str, l.z1.__class__)
+ self.assertEqual(gfapy.NumericArray, l.b1.__class__)
+ self.assertEqual(gfapy.NumericArray, l.b2.__class__)
+ self.assertEqual(gfapy.ByteArray, l.h1.__class__)
+ self.assertEqual(list, l.j1.__class__)
+ self.assertEqual(dict, l.j2.__class__)
+ self.assertEqual(int, l.i1.__class__)
+ self.assertEqual(float, l.f1.__class__)
+
+
+ def test_python_object_to_datatype(self):
+ l = gfapy.line.Header(["H"])
+ # String
+ l.zz="1" # nothing raised
+ self.assertEqual("1", l.zz)
+ self.assertEqual("Z", l.get_datatype("zz"))
+ self.assertEqual("1", l.field_to_s("zz"))
+ self.assertEqual("1", gfapy.Line(str(l)).zz)
+ # Integer
+ l.ii=1 # nothing raised
+ self.assertEqual(1, l.ii)
+ self.assertEqual("i", l.get_datatype("ii"))
+ self.assertEqual("1", l.field_to_s("ii"))
+ self.assertEqual(1, gfapy.Line(str(l)).ii)
+ # Float
+ l.ff=1.0 # nothing raised
+ self.assertEqual(1.0, l.ff)
+ self.assertEqual("f", l.get_datatype("ff"))
+ self.assertEqual("1.0", l.field_to_s("ff"))
+ self.assertEqual(1.0, gfapy.Line(str(l)).ff)
+ # Array: all floats
+ l.af=[1.0,1.0] # nothing raised
+ self.assertEqual([1.0,1.0], l.af)
+ self.assertEqual("B", l.get_datatype("af"))
+ self.assertEqual("f,1.0,1.0", l.field_to_s("af"))
+ self.assertEqual([1.0,1.0], gfapy.Line(str(l)).af)
+ # Array: all integers
+ l.ai=[1,1] # nothing raised
+ self.assertEqual([1,1], l.ai)
+ self.assertEqual("B", l.get_datatype("ai"))
+ self.assertEqual("C,1,1", l.field_to_s("ai"))
+ self.assertEqual([1,1], gfapy.Line(str(l)).ai)
+ # Array: anything else
+ l.aa=[1,1.0,"X"] # nothing raised
+ self.assertEqual([1,1.0,"X"], l.aa)
+ self.assertEqual("J", l.get_datatype("aa"))
+ self.assertEqual('[1, 1.0, "X"]', l.field_to_s("aa"))
+ self.assertEqual([1,1.0,"X"], gfapy.Line(str(l)).aa)
+ # Hash
+ l.hh={"a":1.0, "b":1} # nothing raised
+ self.assertEqual({"a":1.0,"b":1}, l.hh)
+ self.assertEqual("J", l.get_datatype("hh"))
+ try:
+ self.assertEqual('{"a": 1.0, "b": 1}', l.field_to_s("hh"))
+ except:
+ self.assertEqual('{"b": 1, "a": 1.0}', l.field_to_s("hh"))
+ self.assertEqual({"a":1.0,"b":1}, gfapy.Line(str(l)).hh)
+ # gfapy.ByteArray
+ l.ba=gfapy.ByteArray([0,255]) # nothing raised
+ self.assertEqual(gfapy.ByteArray([0,255]), l.ba)
+ self.assertEqual("H", l.get_datatype("ba"))
+ self.assertEqual('00FF', l.field_to_s("ba"))
+ self.assertEqual(gfapy.ByteArray([0,255]), gfapy.Line(str(l)).ba)
+
+ def test_byte_arrays(self):
+ # creation:, from array, from string
+ a = gfapy.ByteArray([1,2,3,4,5]) # nothing raised
+ b = gfapy.ByteArray([1,2,3,4,5]) # nothing raised
+ self.assertEqual(a, b)
+ c = gfapy.ByteArray("12ACF4AA601C1F") # nothing raised
+ self.assertEqual(gfapy.ByteArray([18, 172, 244, 170, 96, 28, 31]), c)
+ # validation
+ a.validate() # nothing raised
+ with self.assertRaises(gfapy.ValueError):
+ gfapy.ByteArray([1,2,3,4,356])
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.ByteArray("12ACF4AA601C1")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.ByteArray("")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.ByteArray("12ACG4AA601C1F")
+ # to string
+ a = gfapy.ByteArray([18, 172, 244, 170, 96, 28, 31])
+ self.assertEqual("12ACF4AA601C1F", str(a))
+ a = list(a)
+ a[2] = 280
+ with self.assertRaises(gfapy.ValueError):
+ a = gfapy.ByteArray(a)
+
+ def test_numeric_arrays(self):
+ # creation:, from array, from string
+ a = gfapy.NumericArray([1,2,3,4,5]) # nothing raised
+ b = gfapy.NumericArray([1,2,3,4,5]) # nothing raised
+ self.assertEqual(a, b)
+ c = gfapy.NumericArray.from_string("i,1,2,3,4,5") # nothing raised
+ self.assertEqual(gfapy.NumericArray([1, 2, 3, 4, 5]), c)
+ # validation
+ a.validate() # nothing raised
+ gfapy.NumericArray([1,2,3,4,356]).validate() # nothing raised
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray([1,2.0,3,4,356]).validate)
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray([1.0,2.0,3,4,356]).validate)
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray([1,"x",3,4,356]).validate)
+ with self.assertRaises(gfapy.ValueError):
+ a = gfapy.NumericArray.from_string("i,1,X,2")
+ with self.assertRaises(gfapy.FormatError):
+ a = gfapy.NumericArray.from_string("")
+ with self.assertRaises(gfapy.FormatError):
+ a = gfapy.NumericArray.from_string("i,1,2,")
+ with self.assertRaises(gfapy.TypeError):
+ a = gfapy.NumericArray.from_string("x,1,2")
+ # to string
+ a = gfapy.NumericArray([18, 72, 244, 70, 96, 38, 31])
+ self.assertEqual("C", a.compute_subtype())
+ self.assertEqual("C,18,72,244,70,96,38,31", str(a))
+ a[2] = -2
+ self.assertEqual("c", a.compute_subtype())
+ self.assertEqual("c,18,72,-2,70,96,38,31", str(a))
+ a[2] = 280
+ self.assertEqual("S", a.compute_subtype())
+ self.assertEqual("S,18,72,280,70,96,38,31", str(a))
+ a[2] = -280
+ self.assertEqual("s", a.compute_subtype())
+ self.assertEqual("s,18,72,-280,70,96,38,31", str(a))
+ a[2] = 280000
+ self.assertEqual("I", a.compute_subtype())
+ self.assertEqual("I,18,72,280000,70,96,38,31", str(a))
+ a[2] = -280000
+ self.assertEqual("i", a.compute_subtype())
+ self.assertEqual("i,18,72,-280000,70,96,38,31", str(a))
+ a = gfapy.NumericArray([float(x) for x in a])
+ self.assertEqual("f", a.compute_subtype())
+ self.assertEqual("f,18.0,72.0,-280000.0,70.0,96.0,38.0,31.0", str(a))
diff --git a/tests/test_api_version.py b/tests/test_api_version.py
new file mode 100644
index 0000000..a307515
--- /dev/null
+++ b/tests/test_api_version.py
@@ -0,0 +1,241 @@
+import gfapy
+import unittest
+
+class TestApiVersion(unittest.TestCase):
+
+ def test_init_without_version_by_init(self):
+ gfa = gfapy.Gfa()
+ self.assertEqual(None, gfa.version)
+
+ def test_init_GFA1(self):
+ gfa = gfapy.Gfa(version="gfa1")
+ self.assertEqual("gfa1", gfa.version)
+
+ def test_init_GFA2(self):
+ gfa = gfapy.Gfa(version="gfa2")
+ self.assertEqual("gfa2", gfa.version)
+
+ def test_init_invalid_version(self):
+ self.assertRaises(gfapy.VersionError, gfapy.Gfa, version="x.x")
+
+ def test_GFA1_header(self):
+ hother = "H\taa:A:a\tff:f:1.1"
+ hv1 = "H\tzz:Z:test\tVN:Z:1.0\tii:i:11"
+ gfa = gfapy.Gfa()
+ gfa.add_line(hother)
+ self.assertEqual(None, gfa.version)
+ gfa.add_line(hv1)
+ self.assertEqual("gfa1", gfa.version)
+
+
+ def test_GFA2_header(self):
+ hother = "H\taa:A:a\tff:f:1.1"
+ hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11"
+ gfa = gfapy.Gfa()
+ gfa.add_line(hother)
+ self.assertEqual(None, gfa.version)
+ gfa.add_line(hv2)
+ self.assertEqual("gfa2", gfa.version)
+
+
+ def test_unknown_version_in_header(self):
+ hother = "H\taa:A:a\tff:f:1.1"
+ hvx = "H\tzz:Z:test\tVN:Z:x.x\tii:i:11"
+ gfa = gfapy.Gfa()
+ gfa.add_line(hother)
+ self.assertEqual(None, gfa.version)
+ self.assertRaises(gfapy.VersionError, gfa.add_line, hvx)
+
+
+ def test_wrong_version_in_header(self):
+ hother = "H\taa:A:a\tff:f:1.1"
+ hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11"
+ gfa = gfapy.Gfa(version="gfa1")
+ gfa.add_line(hother)
+ self.assertEqual("gfa1", gfa.version)
+ self.assertRaises(gfapy.VersionError, gfa.add_line, hv2)
+
+ def test_conflicting_versions_in_header(self):
+ hother = "H\taa:A:a\tff:f:1.1"
+ hv1 = "H\tzz:Z:test\tVN:Z:1.0\tii:i:11"
+ hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11"
+ gfa = gfapy.Gfa()
+ gfa.add_line(hother)
+ gfa.add_line(hv1)
+ self.assertRaises(gfapy.VersionError, gfa.add_line, hv2)
+
+ def test_version_by_segment_GFA1_syntax(self):
+ sv1 = "S\tA\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(sv1)
+ self.assertEqual("gfa1", gfa.version)
+
+ def test_version_by_segment_GFA2_syntax(self):
+ sv2 = "S\tB\t100\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(sv2)
+ self.assertEqual("gfa2", gfa.version)
+
+ def test_GFA2_segment_in_GFA1(self):
+ sv1 = "S\tA\t*"
+ sv2 = "S\tB\t100\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(sv1)
+ self.assertRaises(gfapy.VersionError, gfa.add_line, sv2)
+
+ def test_GFA1_segment_in_GFA2(self):
+ sv1 = "S\tA\t*"
+ sv2 = "S\tB\t100\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(sv2)
+ self.assertRaises(gfapy.VersionError, gfa.add_line, sv1)
+
+ def test_version_by_GFA2_specific_line_E(self):
+ e = "E\t*\tA+\tB+\t0\t10\t20\t30\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(e)
+ self.assertEqual("gfa2", gfa.version)
+
+ def test_version_by_GFA2_specific_line_G(self):
+ g = "G\t*\tA+\tB-\t1000\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(g)
+ self.assertEqual("gfa2", gfa.version)
+
+ def test_version_by_GFA2_specific_line_F(self):
+ f = "F\tX\tID+\t10\t100\t0\t90$\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(f)
+ self.assertEqual("gfa2", gfa.version)
+
+ def test_version_by_GFA2_specific_line_O(self):
+ o = "O\tX\tA+ B- C+"
+ gfa = gfapy.Gfa()
+ gfa.add_line(o)
+ self.assertEqual("gfa2", gfa.version)
+
+ def test_version_by_GFA2_specific_line_U(self):
+ u = "U\tX\tA B C"
+ gfa = gfapy.Gfa()
+ gfa.add_line(u)
+ self.assertEqual("gfa2", gfa.version)
+
+ def test_version_guess_GFA1_specific_line_L(self):
+ string = "L\tA\t-\tB\t+\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(string)
+ gfa.process_line_queue()
+ self.assertEqual("gfa1", gfa.version)
+
+ def test_version_guess_GFA1_specific_line_C(self):
+ string = "C\tA\t+\tB\t-\t10\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(string)
+ gfa.process_line_queue()
+ self.assertEqual("gfa1", gfa.version)
+
+ def test_version_guess_GFA1_specific_line_P(self):
+ string = "P\t1\ta-,b+\t*"
+ gfa = gfapy.Gfa()
+ gfa.add_line(string)
+ gfa.process_line_queue()
+ self.assertEqual("gfa1", gfa.version)
+
+ def test_version_guess_default(self):
+ gfa = gfapy.Gfa()
+ gfa.process_line_queue()
+ self.assertEqual("gfa2", gfa.version)
+
+ def test_header_version(self):
+ self.assertEqual("generic", gfapy.Line("H\tVN:Z:1.0").version)
+ self.assertEqual("gfa1", gfapy.Line("H\tVN:Z:1.0", version="gfa1").version)
+ self.assertEqual("gfa2", gfapy.Line("H\tVN:Z:1.0", version="gfa2").version)
+
+ def test_comment_version(self):
+ self.assertEqual("generic", gfapy.Line("# VN:Z:1.0").version)
+ self.assertEqual("gfa1", gfapy.Line("# VN:Z:1.0", version="gfa1").version)
+ self.assertEqual("gfa2", gfapy.Line("# VN:Z:1.0", version="gfa2").version)
+
+ def test_segment_version(self):
+ self.assertEqual("gfa1", gfapy.Line("S\tA\tNNNN").version)
+ self.assertEqual("gfa2", gfapy.Line("S\tA\t1\tNNNN").version)
+ self.assertEqual("gfa1", gfapy.Line("S\tA\tNNNN", version="gfa1").version)
+ self.assertEqual("gfa2", gfapy.Line("S\tA\t1\tNNNN", version="gfa2").version)
+ self.assertRaises(gfapy.FormatError,
+ gfapy.Line, "S\tA\t1\tNNNN", version="gfa1")
+ self.assertRaises(gfapy.FormatError,
+ gfapy.Line, "S\tA\tNNNN", version="gfa2")
+
+ def test_link_version(self):
+ string = "L\tA\t+\tB\t-\t*"
+ self.assertEqual("gfa1", gfapy.Line(string).version)
+ self.assertEqual("gfa1", gfapy.Line(string, version="gfa1").version)
+ self.assertRaises(gfapy.VersionError, gfapy.Line, string, version="gfa2")
+ self.assertRaises(gfapy.VersionError,
+ gfapy.line.edge.Link, ["A","+","B","-","*"], version="gfa2")
+
+ def test_containment_version(self):
+ string = "C\tA\t+\tB\t-\t10\t*"
+ self.assertEqual("gfa1", gfapy.Line(string).version)
+ self.assertEqual("gfa1", gfapy.Line(string,version="gfa1").version)
+ self.assertRaises(gfapy.VersionError, gfapy.Line,string,version="gfa2")
+ self.assertRaises(gfapy.VersionError, gfapy.line.edge.Containment,
+ ["A","+","B","-","10","*"], version="gfa2")
+
+ def test_edge_version(self):
+ self.assertEqual("gfa2", gfapy.Line("E\t*\tA-\tB+\t0\t100\t0\t100\t*").version)
+ self.assertEqual("gfa2", gfapy.Line("E\t*\tA-\tB+\t0\t100\t0\t100\t*",version=\
+ "gfa2").version)
+ self.assertRaises(gfapy.VersionError,
+ gfapy.Line, "E\t*\tA-\tB+\t0\t100\t0\t100\t*", version="gfa1")
+ self.assertRaises(gfapy.VersionError,
+ gfapy.line.edge.GFA2, ["A-","B+", "0", "100", "0", "100", "*"],
+ version="gfa1")
+
+ def test_gap_version(self):
+ self.assertEqual("gfa2", gfapy.Line("G\t*\tA-\tB+\t100\t*").version)
+ self.assertEqual("gfa2", gfapy.Line("G\t*\tA-\tB+\t100\t*",
+ version="gfa2").version)
+ self.assertRaises(gfapy.VersionError,
+ gfapy.Line, "G\t*\tA-\tB+\t100\t*", version="gfa1")
+ self.assertRaises(gfapy.VersionError,
+ gfapy.line.Gap,["A-","B+", "100", "*"], version="gfa1")
+
+ def test_fragment_version(self):
+ self.assertEqual("gfa2", gfapy.Line("F\tA\tread1-\t0\t100\t0\t100\t*").version)
+ self.assertEqual("gfa2", gfapy.Line("F\tA\tread1-\t0\t100\t0\t100\t*", version=\
+ "gfa2").version)
+ self.assertRaises(gfapy.VersionError,
+ gfapy.Line, "F\tA\tread1-\t0\t100\t0\t100\t*", version="gfa1")
+ self.assertRaises(gfapy.VersionError,
+ gfapy.line.Fragment,["A","read-", "0", "100", "0", "100", "*"],
+ version="gfa1")
+
+ def test_custom_record_version(self):
+ self.assertEqual("gfa2", gfapy.Line("X\tVN:Z:1.0").version)
+ self.assertEqual("gfa2", gfapy.Line("X\tVN:Z:1.0", version="gfa2").version)
+ self.assertRaises(gfapy.VersionError,
+ gfapy.Line, "X\tVN:Z:1.0", version="gfa1")
+ self.assertRaises(gfapy.VersionError,
+ gfapy.line.CustomRecord, ["X","VN:Z:1.0"], version="gfa1")
+
+ def test_path_version(self):
+ string = "P\t1\tA+,B-\t*"
+ self.assertEqual("gfa1", gfapy.Line(string).version)
+ self.assertEqual("gfa1", gfapy.Line(string, version="gfa1").version)
+ self.assertRaises(gfapy.VersionError, gfapy.Line, string, version="gfa2")
+ string = "O\t1\tA+ B-"
+ self.assertEqual("gfa2", gfapy.Line(string).version)
+ self.assertEqual("gfa2", gfapy.Line(string, version="gfa2").version)
+ self.assertRaises(gfapy.VersionError, gfapy.Line, string, version="gfa1")
+
+ def test_set_version(self):
+ string = "U\t1\tA B C"
+ self.assertEqual("gfa2", gfapy.Line(string).version)
+ self.assertEqual("gfa2", gfapy.Line(string, version="gfa2").version)
+ self.assertRaises(gfapy.VersionError, gfapy.Line, string, version="gfa1")
+
+ def test_unknown_record_version(self):
+ self.assertEqual("gfa2", gfapy.line.Unknown([None, "A"]).version)
+ self.assertEqual("gfa2", gfapy.line.Unknown([None, "A"], version="gfa2").version)
+ self.assertRaises(gfapy.VersionError, gfapy.line.Unknown,["\n","A"], version="gfa1")
diff --git a/tests/test_api_version_conversion.py b/tests/test_api_version_conversion.py
new file mode 100644
index 0000000..0584d27
--- /dev/null
+++ b/tests/test_api_version_conversion.py
@@ -0,0 +1,217 @@
+import gfapy
+import unittest
+
+class TestApiVersion(unittest.TestCase):
+
+ def test_header_conversion(self):
+ gfa1str = "H\tVN:Z:1.0"
+ gfa2str = "H\tVN:Z:2.0"
+ self.assertEqual(gfa1str, str(gfapy.Line(gfa2str).to_gfa1()))
+ self.assertEqual(gfa1str, str(gfapy.Line(gfa2str).to_gfa1()))
+ self.assertEqual(gfa2str, str(gfapy.Line(gfa1str).to_gfa2()))
+ self.assertEqual(gfa2str, str(gfapy.Line(gfa2str).to_gfa2()))
+
+ def test_comment_conversion(self):
+ self.assertEqual("# comment",
+ str(gfapy.Line("# comment",version="gfa1").to_gfa1()))
+ self.assertEqual("# comment",
+ str(gfapy.Line("# comment",version="gfa2").to_gfa1()))
+ self.assertEqual("# comment",
+ str(gfapy.Line("# comment",version="gfa1").to_gfa2()))
+ self.assertEqual("# comment",
+ str(gfapy.Line("# comment",version="gfa2").to_gfa2()))
+
+ def test_segment_conversion(self):
+ self.assertEqual("S\tA\tNNNN",
+ str(gfapy.Line("S\tA\tNNNN").to_gfa1()))
+ self.assertEqual("S\tA\t4\tNNNN",
+ str(gfapy.Line("S\tA\tNNNN").to_gfa2()))
+ self.assertEqual("S\tA\tNNNN\tLN:i:4",str(gfapy.Line("S\tA\t4\tNNNN").to_gfa1()))
+ self.assertEqual("S\tA\t*\tLN:i:4",str(gfapy.Line("S\tA\t4\t*").to_gfa1()))
+ self.assertEqual("S\tA\t4\tNNNN",str(gfapy.Line("S\tA\t4\tNNNN").to_gfa2()))
+ # wrong sequence alphabet for GFA2->GFA1
+ self.assertEqual("S\tA\t4\t[[]]",str(gfapy.Line("S\tA\t4\t[[]]").to_gfa2()))
+ self.assertRaises(gfapy.RuntimeError,gfapy.Line("S\tA\t4\t[[]]").to_gfa1)
+ # wrong identifier for GFA2->GFA1
+ self.assertEqual("S\tA+,\t3\tNNN", str(gfapy.Line("S\tA+,\t3\tNNN").to_gfa2()))
+ self.assertRaises(gfapy.RuntimeError,gfapy.Line("S\tA+,\t3\tNNN").to_gfa1)
+ # sequence not available but LN for GFA1->GFA2
+ self.assertEqual("S\tA\t4\t*",str(gfapy.Line("S\tA\t*\tLN:i:4").to_gfa2()))
+ # both sequence and LN not available for GFA1->GFA2
+ self.assertRaises(gfapy.RuntimeError,gfapy.Line("S\tA\t*").to_gfa2)
+
+ def test_link_conversion(self):
+ gfa1str = "L\tA\t+\tB\t-\t100M"
+ gfa1str_noov = "L\tA\t+\tB\t+\t*"
+ gfa2str = "E\t1\tA+\tB-\t100\t200$\t100\t200$\t100M"
+ # not connected
+ self.assertRaises(gfapy.RuntimeError,gfapy.Line(gfa1str).to_gfa2)
+ # connected
+ g = gfapy.Gfa()
+ g.add_line("S\tA\t*\tLN:i:200")
+ g.add_line("S\tB\t*\tLN:i:200")
+ gfa1line = gfapy.Line(gfa1str)
+ g.add_line(gfa1line)
+ gfa1line_noov = gfapy.Line(gfa1str_noov)
+ g.add_line(gfa1line_noov)
+ self.assertEqual(gfa2str,str(gfa1line.to_gfa2()))
+ self.assertEqual(gfa1str+"\tID:Z:1",str(gfa1line.to_gfa1()))
+ # placeholder overlap
+ self.assertRaises(gfapy.ValueError,gfa1line_noov.to_gfa2)
+ # TODO check if the alignment is compatible with the segment length
+
+ def test_containment_conversion(self):
+ gfa1str = "C\tA\t+\tB\t-\t20\t100M"
+ gfa1str_noov = "C\tA\t+\tB\t+\t20\t*"
+ gfa2str = "E\t1\tA+\tB-\t20\t120\t0\t100$\t100M"
+ # not connected
+ self.assertRaises(gfapy.RuntimeError,gfapy.Line(gfa1str).to_gfa2)
+ # connected
+ g = gfapy.Gfa()
+ g.add_line("S\tA\t*\tLN:i:200")
+ g.add_line("S\tB\t*\tLN:i:100")
+ gfa1line = gfapy.Line(gfa1str)
+ g.add_line(gfa1line)
+ gfa1line_noov = gfapy.Line(gfa1str_noov)
+ g.add_line(gfa1line_noov)
+ self.assertEqual(gfa2str,str(gfa1line.to_gfa2()))
+ self.assertEqual(gfa1str+"\tID:Z:1",str(gfa1line.to_gfa1()))
+ # placeholder overlap
+ self.assertRaises(gfapy.ValueError,gfa1line_noov.to_gfa2)
+ # TODO check if the alignment is compatible with the segment length
+
+ def test_edge_conversion(self):
+ dovetail = "E\t*\tA+\tB-\t100\t200$\t100\t200$\t100M"
+ dovetail_gfa1 = "L\tA\t+\tB\t-\t100M"
+ containment = "E\t*\tA+\tB-\t20\t120\t0\t100$\t100M"
+ containment_gfa1 = "C\tA\t+\tB\t-\t20\t100M"
+ internal = "E\t*\tA+\tB-\t20\t110\t10\t100$\t90M"
+ self.assertEqual(dovetail_gfa1,str( gfapy.Line(dovetail).to_gfa1()))
+ self.assertEqual(containment_gfa1,str( gfapy.Line(containment).to_gfa1()))
+ self.assertRaises(gfapy.RuntimeError,gfapy.Line(internal).to_gfa1)
+
+ def test_L_to_E(self):
+ g = gfapy.Gfa(version="gfa1")
+ g.add_line("S\t1\t*\tLN:i:100")
+ g.add_line("S\t2\t*\tLN:i:100")
+ g.add_line("S\t3\t*\tLN:i:100")
+ g.add_line("S\t4\t*\tLN:i:100")
+ g.add_line("L\t1\t+\t2\t+\t10M")
+ g.add_line("L\t1\t-\t2\t-\t20M")
+ g.add_line("L\t3\t-\t4\t+\t30M")
+ g.add_line("L\t3\t+\t4\t-\t40M")
+ dovetails_gfa1 = g.dovetails
+ dovetails_gfa2 = {dovetails_gfa1[0].to_gfa2_s(),
+ dovetails_gfa1[1].to_gfa2_s(),
+ dovetails_gfa1[2].to_gfa2_s(),
+ dovetails_gfa1[3].to_gfa2_s()}
+ expected_dovetails_gfa2 = {
+ "E 5 1+ 2+ 90 100$ 0 10 10M",
+ "E 6 1- 2- 0 20 80 100$ 20M",
+ "E 7 3- 4+ 0 30 0 30 30M",
+ "E 8 3+ 4- 60 100$ 60 100$ 40M"}
+ try:
+ self.assertEqual(expected_dovetails_gfa2, dovetails_gfa2)
+ except:
+ # sometimes 7 and 8 are assigned with a different order
+ # despite using a fixed hash seed in the tests
+ expected_dovetails_gfa2 = {
+ "E 5 1+ 2+ 90 100$ 0 10 10M",
+ "E 6 1- 2- 0 20 80 100$ 20M",
+ "E 8 3- 4+ 0 30 0 30 30M",
+ "E 7 3+ 4- 60 100$ 60 100$ 40M"}
+ self.assertEqual(expected_dovetails_gfa2, dovetails_gfa2)
+ assert(isinstance(g.dovetails[0].to_gfa1(),gfapy.line.edge.Link))
+ assert(isinstance(g.dovetails[0].to_gfa2(),gfapy.line.edge.GFA2))
+
+ def test_E_to_L(self):
+ e1 = gfapy.Line("E\t*\t1+\t2+\t90\t100$\t0\t10\t10M")
+ l1 = "L\t1\t+\t2\t+\t10M"
+ self.assertEqual(l1, e1.to_gfa1_s())
+ e2 = gfapy.Line("E\t*\t1+\t2+\t0\t20\t80\t100$\t20M")
+ l2 = "L\t2\t+\t1\t+\t20M"
+ self.assertEqual(l2, e2.to_gfa1_s())
+ e3 = gfapy.Line("E\t*\t3-\t4+\t0\t30\t0\t30\t30M")
+ l3 = "L\t3\t-\t4\t+\t30M"
+ self.assertEqual(l3, e3.to_gfa1_s())
+ e4 = gfapy.Line("E\t*\t3+\t4-\t60\t100$\t60\t100$\t40M")
+ l4 = "L\t3\t+\t4\t-\t40M"
+ self.assertEqual(l4, e4.to_gfa1_s())
+
+ def test_path_conversion(self):
+ path_gfa1 = "P\t1\ta+,b-\t100M"
+ path_gfa2 = "O\t1\ta+ a_to_b+ b-"
+ # gfa1 => gfa2
+ l1 = "L\ta\t+\tb\t-\t100M\tID:Z:a_to_b"
+ g1 = gfapy.Gfa()
+ path_gfa1_line = gfapy.Line(path_gfa1)
+ g1.add_line(path_gfa1_line)
+ g1.add_line(l1)
+ g1.process_line_queue()
+ # not connected
+ self.assertRaises(gfapy.RuntimeError,
+ gfapy.Line(path_gfa1).to_gfa2)
+ # connected
+ self.assertEqual(path_gfa1,str(path_gfa1_line.to_gfa1()))
+ self.assertEqual(path_gfa2,str(path_gfa1_line.to_gfa2()))
+ # gfa2 => gfa1
+ e = "E\ta_to_b\ta+\tb-\t100\t200$\t100\t200$\t100M"
+ sA = "S\ta\t200\t*"
+ sB = "S\tb\t200\t*"
+ g2 = gfapy.Gfa()
+ path_gfa2_line = gfapy.Line(path_gfa2)
+ g2.add_line(path_gfa2_line)
+ g2.add_line(e)
+ g2.add_line(sA)
+ g2.add_line(sB)
+ # not connected
+ self.assertRaises(gfapy.RuntimeError,
+ gfapy.Line(path_gfa2).to_gfa1)
+ # connected
+ self.assertEqual(path_gfa1,str( path_gfa2_line.to_gfa1()))
+ self.assertEqual(path_gfa2,str( path_gfa2_line.to_gfa2()))
+
+ def test_gap_conversion(self):
+ s = "G\t*\tA-\tB+\t100\t*"
+ self.assertEqual(s, str(gfapy.Line(s).to_gfa2()))
+ self.assertRaises(gfapy.VersionError,gfapy.Line(s).to_gfa1)
+
+ def test_fragment_conversion(self):
+ s = "F\tA\tread1-\t0\t100\t0\t100\t*"
+ self.assertEqual(s,str( gfapy.Line(s).to_gfa2()))
+ self.assertRaises(gfapy.VersionError,gfapy.Line(s).to_gfa1)
+
+ def test_set_conversion(self):
+ s = "U\t1\tA B C"
+ self.assertEqual(s,str( gfapy.Line(s).to_gfa2()))
+ self.assertRaises(gfapy.VersionError,gfapy.Line(s).to_gfa1)
+
+ def test_custom_record_conversion(self):
+ s = "X\tx1\tA\tC"
+ self.assertEqual(s,str( gfapy.Line(s).to_gfa2()))
+ self.assertRaises(gfapy.VersionError,gfapy.Line(s).to_gfa1)
+
+ def test_unknown_record_conversion(self):
+ record = gfapy.line.Unknown([None, "A"])
+ self.assertEqual(record, record.to_gfa2())
+ self.assertRaises(gfapy.VersionError,record.to_gfa1)
+
+ def test_gfa_conversion(self):
+ gfa1_str ='''# comment
+H\tVN:Z:1.0
+S\tB\t*\tLN:i:200
+S\tC\t*\tLN:i:100
+S\tA\t*\tLN:i:200
+L\tA\t+\tB\t-\t100M\tID:Z:a_to_b
+C\tA\t+\tC\t-\t20\t100M\tID:Z:2
+P\t1\tA+,B-\t100M'''
+ gfa2_str ='''# comment
+H\tVN:Z:2.0
+S\tB\t200\t*
+S\tC\t100\t*
+S\tA\t200\t*
+E\ta_to_b\tA+\tB-\t100\t200$\t100\t200$\t100M
+E\t2\tA+\tC-\t20\t120\t0\t100$\t100M
+O\t1\tA+ a_to_b+ B-'''
+ self.assertEqual(gfa2_str, str(gfapy.Gfa(gfa1_str).to_gfa2()))
+ self.assertEqual(gfa1_str, str(gfapy.Gfa(gfa2_str).to_gfa1()))
diff --git a/tests/test_gfapy_alignment.py b/tests/test_gfapy_alignment.py
new file mode 100644
index 0000000..ebaeee5
--- /dev/null
+++ b/tests/test_gfapy_alignment.py
@@ -0,0 +1,40 @@
+import unittest
+import gfapy
+
+class TestAlignment(unittest.TestCase):
+
+ def test_string_to_cigar(self):
+ self.assertEqual(gfapy.CIGAR([
+ gfapy.CIGAR.Operation(12, "M"),
+ gfapy.CIGAR.Operation(1, "D"),
+ gfapy.CIGAR.Operation(2, "I"),
+ ]), gfapy.Alignment("12M1D2I"))
+
+ def test_string_to_placeholder(self):
+ self.assertIsInstance(gfapy.Alignment("*"), gfapy.Placeholder)
+
+ def test_string_to_trace(self):
+ self.assertEqual(gfapy.Trace([12,14,15]),
+ gfapy.Alignment("12,14,15"))
+
+ def test_string_invalid(self):
+ self.assertRaises(gfapy.FormatError,
+ gfapy.Alignment, "12x1,D2I")
+
+ def test_list_to_cigar(self):
+ self.assertEqual(gfapy.CIGAR([
+ gfapy.CIGAR.Operation(12, "M"),
+ gfapy.CIGAR.Operation(1, "D"),
+ gfapy.CIGAR.Operation(2, "I")]),
+ gfapy.Alignment(
+ [gfapy.CIGAR.Operation(12, "M"),
+ gfapy.CIGAR.Operation(1, "D"),
+ gfapy.CIGAR.Operation(2, "I")]))
+
+ def test_list_to_trace(self):
+ self.assertEqual(gfapy.Trace([12,14,15]),
+ gfapy.Alignment([12,14,15]))
+
+ def test_list_invalid(self):
+ self.assertRaises(gfapy.FormatError,
+ gfapy.Alignment,["12x1", "2I"])
diff --git a/tests/test_gfapy_byte_array.py b/tests/test_gfapy_byte_array.py
new file mode 100644
index 0000000..c409c31
--- /dev/null
+++ b/tests/test_gfapy_byte_array.py
@@ -0,0 +1,28 @@
+import unittest
+import gfapy
+
+class TestByteArray(unittest.TestCase):
+
+ def test_byte_arrays(self):
+ # creation: from list, from string
+ a_lst = [18, 172, 244, 170, 96, 28, 31]
+ a = gfapy.ByteArray(a_lst)
+ for i in range(0,len(a_lst)):
+ self.assertEqual(a[i], a_lst[i])
+ a_str = "12ACF4AA601C1F"
+ b = gfapy.ByteArray(a_str)
+ self.assertEqual(a,b)
+ # validation
+ self.assertRaises(gfapy.ValueError, gfapy.ByteArray, [1,2,3,4,356])
+ self.assertRaises(gfapy.FormatError, gfapy.ByteArray, "12ACF4AA601C1")
+ self.assertRaises(gfapy.FormatError, gfapy.ByteArray, "")
+ self.assertRaises(gfapy.FormatError, gfapy.ByteArray, "12ACG4AA601C1")
+ # to_s
+ self.assertEqual(str(b), a_str)
+ self.assertEqual(str(a), a_str)
+ # read only; transform to list to edit a value
+ tmp = list(a)
+ tmp[3]=1
+ a = gfapy.ByteArray(tmp)
+ self.assertEqual(a, gfapy.ByteArray([18,172,244,1,96,28,31]))
+
diff --git a/tests/test_gfapy_cigar.py b/tests/test_gfapy_cigar.py
new file mode 100644
index 0000000..0d1b1b3
--- /dev/null
+++ b/tests/test_gfapy_cigar.py
@@ -0,0 +1,23 @@
+import unittest
+import gfapy
+
+class TestCigar(unittest.TestCase):
+
+ def test_from_string_nonempty(self):
+ self.assertEqual(gfapy.CIGAR([gfapy.CIGAR.Operation(12, "M"),
+ gfapy.CIGAR.Operation( 1, "D"),
+ gfapy.CIGAR.Operation( 2, "I")]),
+ gfapy.CIGAR._from_string("12M1D2I"))
+
+ def test_from_string_empty(self):
+ self.assertEqual(gfapy.Placeholder, gfapy.CIGAR._from_string("*"))
+
+ def test_from_string_invalid(self):
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.CIGAR._from_string("12x1D2I")
+
+ def test__str__noempty(self):
+ self.assertEqual("12M1D2I",
+ str(gfapy.CIGAR([gfapy.CIGAR.Operation(12, "M"),
+ gfapy.CIGAR.Operation( 1, "D"),
+ gfapy.CIGAR.Operation( 2, "I")])))
diff --git a/tests/test_gfapy_line_containment.py b/tests/test_gfapy_line_containment.py
new file mode 100644
index 0000000..43e0fbd
--- /dev/null
+++ b/tests/test_gfapy_line_containment.py
@@ -0,0 +1,51 @@
+import unittest
+import gfapy
+
+class TestLineContainment(unittest.TestCase):
+
+ def test_from_string(self):
+ fields = ["C","1","+","2","-","12","12M","MQ:i:1232","NM:i:3","ab:Z:abcd"]
+ string="\t".join(fields)
+ gfapy.Line(string)
+ self.assertIsInstance(gfapy.Line(string), gfapy.line.edge.Containment)
+ self.assertEqual(fields[0], gfapy.Line(string).record_type)
+ self.assertEqual(fields[1], gfapy.Line(string).from_segment)
+ self.assertEqual(fields[2], gfapy.Line(string).from_orient)
+ self.assertEqual(fields[3], gfapy.Line(string).to_segment)
+ self.assertEqual(fields[4], gfapy.Line(string).to_orient)
+ self.assertEqual(12, gfapy.Line(string).pos)
+ self.assertEqual([gfapy.alignment.cigar.CIGAR.Operation(12, "M")],
+ gfapy.Line(string).overlap)
+ self.assertEqual(1232, gfapy.Line(string).MQ)
+ self.assertEqual(3, gfapy.Line(string).NM)
+ self.assertEqual("abcd", gfapy.Line(string).ab)
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line(string+"\tH1")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line(string+"\tH1")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("C\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[2]="x"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[4]="x"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[5]="x"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[6]="x"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields[:]
+ f[7]="MQ:Z:1232"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields[:]
+ f[8]="NM:Z:1232"
+ gfapy.Line("\t".join(f), vlevel = 2)
diff --git a/tests/test_gfapy_line_edge.py b/tests/test_gfapy_line_edge.py
new file mode 100644
index 0000000..83a5a5b
--- /dev/null
+++ b/tests/test_gfapy_line_edge.py
@@ -0,0 +1,18 @@
+import unittest
+import gfapy
+
+class TestLineEdge(unittest.TestCase):
+
+ def test_to_gfa1(self):
+ e1 = gfapy.Line("E\t*\t1+\t2+\t90\t100$\t0\t10\t10M")
+ l1 = "L\t1\t+\t2\t+\t10M"
+ self.assertEqual(l1, e1.to_gfa1_s())
+ e2 = gfapy.Line("E\t*\t1+\t2+\t0\t20\t80\t100$\t20M")
+ l2 = "L\t2\t+\t1\t+\t20M"
+ self.assertEqual(l2, e2.to_gfa1_s())
+ e3 = gfapy.Line("E\t*\t3-\t4+\t0\t30\t0\t30\t30M")
+ l3 = "L\t3\t-\t4\t+\t30M"
+ self.assertEqual(l3, e3.to_gfa1_s())
+ e4 = gfapy.Line("E\t*\t3+\t4-\t60\t100$\t60\t100$\t40M")
+ l4 = "L\t3\t+\t4\t-\t40M"
+ self.assertEqual(l4, e4.to_gfa1_s())
diff --git a/tests/test_gfapy_line_header.py b/tests/test_gfapy_line_header.py
new file mode 100644
index 0000000..1adcb58
--- /dev/null
+++ b/tests/test_gfapy_line_header.py
@@ -0,0 +1,12 @@
+import unittest
+import gfapy
+
+class TestLineHeader(unittest.TestCase):
+
+ def test_from_string(self):
+ gfapy.Line("H\tVN:Z:1.0")
+ self.assertIsInstance(gfapy.Line("H\tVN:Z:1.0"), gfapy.line.Header)
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("H\tH2\tVN:Z:1.0")
+ with self.assertRaises(gfapy.TypeError):
+ gfapy.Line("H\tVN:i:1.0")
diff --git a/tests/test_gfapy_line_link.py b/tests/test_gfapy_line_link.py
new file mode 100644
index 0000000..f4c6795
--- /dev/null
+++ b/tests/test_gfapy_line_link.py
@@ -0,0 +1,84 @@
+import gfapy
+import unittest
+
+class TestLineLink(unittest.TestCase):
+
+ def test_from_string(self):
+ fields=["L","1","+","2","-","12M","RC:i:1232","NM:i:3","ab:Z:abcd",
+ "FC:i:2321","KC:i:1212","MQ:i:40"]
+ string = "\t".join(fields)
+ gfapy.Line(string)
+ self.assertIsInstance(gfapy.Line(string), gfapy.line.edge.Link)
+ self.assertEqual(str(fields[0]), gfapy.Line(string).record_type)
+ self.assertEqual(str(fields[1]), gfapy.Line(string).from_segment)
+ self.assertEqual(str(fields[2]), gfapy.Line(string).from_orient)
+ self.assertEqual(str(fields[3]), gfapy.Line(string).to_segment)
+ self.assertEqual(str(fields[4]), gfapy.Line(string).to_orient)
+ self.assertEqual([gfapy.alignment.CIGAR.Operation(12, "M")],
+ gfapy.Line(string).overlap)
+ self.assertEqual(1232, gfapy.Line(string).RC)
+ self.assertEqual(3, gfapy.Line(string).NM)
+ self.assertEqual(2321, gfapy.Line(string).FC)
+ self.assertEqual(1212, gfapy.Line(string).KC)
+ self.assertEqual(40, gfapy.Line(string).MQ)
+ self.assertEqual("abcd", gfapy.Line(string).ab)
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line((string+"\tH1"))
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("L\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[2]="x"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[4]="x"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[5]="x"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields[:]
+ f[6]="RC:Z:1232"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields[:]
+ f[7]="NM:Z:1232"
+ gfapy.Line("\t".join(f), vlevel = 2)
+
+ #TODO
+ #def test_coords
+ # g = RGFA.new(version: :gfa1)
+ # g << "S\t1\t*\tLN:i:100"
+ # g << "L\t1\t+\t2\t-\t1M2D10M1I"
+ # assert_equal([87,100], g.links[0].from_coords)
+ # assert_raises(RGFA::ValueError) {g.links[0].to_coords}
+ # g << "S\t2\t*\tLN:i:100"
+ # assert_equal([88,100], g.links[0].to_coords)
+ # g << "L\t3\t-\t4\t+\t10M2P3D1M"
+ # assert_equal([0,14], g.links[1].from_coords)
+ # assert_equal([0,11], g.links[1].to_coords)
+ #end
+
+ #def test_to_gfa2
+ # g = RGFA.new(version: :gfa1)
+ # g << "S\t1\t*\tLN:i:100"
+ # g << "S\t2\t*\tLN:i:100"
+ # g << "S\t3\t*\tLN:i:100"
+ # g << "S\t4\t*\tLN:i:100"
+ # g << "L\t1\t+\t2\t+\t10M"
+ # g << "L\t1\t-\t2\t-\t20M"
+ # g << "L\t3\t-\t4\t+\t30M"
+ # g << "L\t3\t+\t4\t-\t40M"
+ # assert_equal("E * 1+ 2+ 90 100$ 0 10 10M",
+ # g.links[0].to_gfa2_s)
+ # assert_equal("E * 1- 2- 0 20 80 100$ 20M",
+ # g.links[1].to_gfa2_s)
+ # assert_equal("E * 3- 4+ 0 30 0 30 30M",
+ # g.links[2].to_gfa2_s)
+ # assert_equal("E * 3+ 4- 60 100$ 60 100$ 40M",
+ # g.links[3].to_gfa2_s)
+ # assert_equal(RGFA::Line::Edge::Link, g.links[0].to_gfa1.class)
+ # assert_equal(RGFA::Line::Edge::GFA2, g.links[0].to_gfa2.class)
+ #end
diff --git a/tests/test_gfapy_line_path.py b/tests/test_gfapy_line_path.py
new file mode 100644
index 0000000..17d42d1
--- /dev/null
+++ b/tests/test_gfapy_line_path.py
@@ -0,0 +1,57 @@
+import gfapy
+import unittest
+
+class TestLinePath(unittest.TestCase):
+
+ def test_from_string(self):
+ fields = ["P","4","1+,2-,3+","9M2I3D1M,12M","ab:Z:abcd"]
+ string = "\t".join(fields)
+ gfapy.Line(string)
+ self.assertIsInstance(gfapy.Line(string), gfapy.line.group.Path)
+ self.assertEqual(str(fields[0]), gfapy.Line(string).record_type)
+ self.assertEqual(str(fields[1]), gfapy.Line(string).path_name)
+ self.assertEqual([gfapy.OrientedLine("1","+"),
+ gfapy.OrientedLine("2","-"),
+ gfapy.OrientedLine("3","+")],
+ gfapy.Line(string).segment_names)
+ self.assertEqual([[gfapy.alignment.cigar.Operation(9,"M"),
+ gfapy.alignment.cigar.Operation(2,"I"),
+ gfapy.alignment.cigar.Operation(3,"D"),
+ gfapy.alignment.cigar.Operation(1,"M")],
+ [gfapy.alignment.cigar.Operation(12,"M")]],
+ gfapy.Line(string).overlaps)
+ self.assertEqual("abcd", gfapy.Line(string).ab)
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line(string+"\tH1")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("P\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[2]="1,2,3"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.InconsistencyError):
+ f=fields[:]
+ f[2]="1+,2+"
+ f[3]="9M,12M,3M"
+ gfapy.Line("\t".join(f), vlevel = 2)
+
+ f=fields[:]
+ f[3]="*,*"
+ gfapy.Line("\t".join(f), vlevel = 2)
+
+ f=fields[:]
+ f[3]="9M2I3D1M,12M,12M"
+ gfapy.Line("\t".join(f), vlevel = 2)
+
+ f=fields[:]
+ f[3]="*"
+ gfapy.Line("\t".join(f), vlevel = 2)
+
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[3]="12,12"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.FormatError):
+ f=fields[:]
+ f[3]="12M|12M"
+ gfapy.Line("\t".join(f), vlevel = 2)
diff --git a/tests/test_gfapy_line_segment.py b/tests/test_gfapy_line_segment.py
new file mode 100644
index 0000000..99174f0
--- /dev/null
+++ b/tests/test_gfapy_line_segment.py
@@ -0,0 +1,63 @@
+import gfapy
+import unittest
+
+class TestLineSegment(unittest.TestCase):
+
+ def test_from_string(self):
+ fields = ["S","1","ACGTCACANNN","RC:i:1232","LN:i:11","ab:Z:abcd",
+ "FC:i:2321","KC:i:1212"]
+ string = "\t".join(fields)
+ gfapy.Line(string)
+ self.assertIsInstance(gfapy.Line(string), gfapy.line.segment.GFA1)
+ self.assertEqual(str(fields[0]), gfapy.Line(string).record_type)
+ self.assertEqual(str(fields[1]), gfapy.Line(string).name)
+ self.assertEqual(fields[2], gfapy.Line(string).sequence)
+ self.assertEqual(1232, gfapy.Line(string).RC)
+ self.assertEqual(11, gfapy.Line(string).LN)
+ self.assertEqual(2321, gfapy.Line(string).FC)
+ self.assertEqual(1212, gfapy.Line(string).KC)
+ self.assertEqual("abcd", gfapy.Line(string).ab)
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line(string + "\tH1")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("S\tH")
+ with self.assertRaises(gfapy.FormatError):
+ f = fields[:]
+ f[2]="!@#?"
+ gfapy.Line("\t".join(f), vlevel = 2)
+ with self.assertRaises(gfapy.TypeError):
+ f=fields[:]
+ f[3]="RC:Z:1232"
+ gfapy.Line("\t".join(f), version = "gfa1")
+ f=["S","2","ACGTCACANNN","LN:i:3"]
+ with self.assertRaises(gfapy.InconsistencyError):
+ gfapy.Line("\t".join(f), version = "gfa1", vlevel = 2)
+ f=["S","2","ACGTCACANNN","LN:i:11"]
+ gfapy.Line("\t".join(f))
+ f=["S","2","*","LN:i:3"]
+ gfapy.Line("\t".join(f))
+
+ def test_forbidden_segment_names(self):
+ gfapy.Line("S\tA+B\t*")
+ gfapy.Line("S\tA-B\t*")
+ gfapy.Line("S\tA,B\t*")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("S\tA+,B\t*", vlevel = 2)
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("S\tA-,B\t*", vlevel = 2)
+
+ def test_coverage(self):
+ l = gfapy.Line("S\t0\t*\tRC:i:600\tLN:i:100")
+ self.assertEqual(6, l.coverage())
+ self.assertEqual(6, l.try_get_coverage())
+ l = gfapy.Line("S\t0\t*\tRC:i:600")
+ self.assertEqual(None, l.coverage())
+ self.assertRaises(gfapy.NotFoundError, l.try_get_coverage)
+ l = gfapy.Line("S\t0\t*\tLN:i:100")
+ self.assertEqual(None, l.coverage())
+ self.assertRaises(gfapy.NotFoundError, l.try_get_coverage)
+ l = gfapy.Line("S\t0\t*\tFC:i:600\tLN:i:100")
+ self.assertEqual(None, l.coverage())
+ self.assertRaises(gfapy.NotFoundError, l.try_get_coverage)
+ self.assertEqual(6, l.coverage(count_tag = "FC"))
+ self.assertEqual(6, l.try_get_coverage(count_tag = "FC"))
diff --git a/tests/test_gfapy_line_version.py b/tests/test_gfapy_line_version.py
new file mode 100644
index 0000000..963091a
--- /dev/null
+++ b/tests/test_gfapy_line_version.py
@@ -0,0 +1,50 @@
+import gfapy
+import unittest
+
+class TestLineVersion(unittest.TestCase):
+
+ def test_header(self):
+ self.assertEqual("generic", gfapy.Line("H\tVN:Z:1.0").version)
+ self.assertEqual("gfa1", gfapy.Line("H\tVN:Z:1.0", version = "gfa1").version)
+ self.assertEqual("gfa2", gfapy.Line("H\tVN:Z:1.0", version = "gfa2").version)
+
+ def test_comment(self):
+ self.assertEqual("generic", gfapy.Line("# VN:Z:1.0").version)
+ self.assertEqual("gfa1", gfapy.Line("# VN:Z:1.0", version = "gfa1").version)
+ self.assertEqual("gfa2", gfapy.Line("# VN:Z:1.0", version = "gfa2").version)
+
+ def test_segment(self):
+ self.assertEqual("gfa1", gfapy.Line("S\tA\tNNNN").version)
+ self.assertEqual("gfa2", gfapy.Line("S\tA\t1\tNNNN").version)
+ self.assertEqual("gfa1", gfapy.Line("S\tA\tNNNN", version = "gfa1").version)
+ self.assertEqual("gfa2", gfapy.Line("S\tA\t1\tNNNN", version = "gfa2").version)
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("S\tA\t1\tNNNN", version = "gfa1")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line("S\tA\tNNNN", version = "gfa2")
+
+ def test_link(self):
+ self.assertEqual("gfa1", gfapy.Line("L\tA\t+\tB\t-\t*").version)
+ self.assertEqual("gfa1",
+ gfapy.Line("L\tA\t+\tB\t-\t*", version = "gfa1").version)
+ with self.assertRaises(gfapy.VersionError):
+ gfapy.Line("L\tA\t+\tB\t-\t*", version = "gfa2")
+ with self.assertRaises(gfapy.VersionError):
+ gfapy.line.edge.Link(["A","+","B","-","*"], version = "gfa2")
+
+ def test_containment(self):
+ self.assertEqual("gfa1", gfapy.Line("C\tA\t+\tB\t-\t10\t*").version)
+ self.assertEqual("gfa1",
+ gfapy.Line("C\tA\t+\tB\t-\t10\t*", version = "gfa1").version)
+ with self.assertRaises(gfapy.VersionError):
+ gfapy.Line("C\tA\t+\tB\t-\t10\t*", version = "gfa2")
+ with self.assertRaises(gfapy.VersionError):
+ gfapy.line.edge.Containment(["A","+","B","-","10","*"], version = "gfa2")
+
+ def test_custom_record(self):
+ self.assertEqual("gfa2", gfapy.Line("X\tVN:Z:1.0").version)
+ self.assertEqual("gfa2", gfapy.Line("X\tVN:Z:1.0", version = "gfa2").version)
+ with self.assertRaises(gfapy.VersionError):
+ gfapy.Line("X\tVN:Z:1.0", version = "gfa1")
+ with self.assertRaises(gfapy.VersionError):
+ gfapy.line.CustomRecord(["X","VN:Z:1.0"], version = "gfa1")
diff --git a/tests/test_gfapy_numeric_array.py b/tests/test_gfapy_numeric_array.py
new file mode 100644
index 0000000..1b45f3a
--- /dev/null
+++ b/tests/test_gfapy_numeric_array.py
@@ -0,0 +1,49 @@
+import unittest
+import gfapy
+
+class TestNumericArray(unittest.TestCase):
+
+ def test_numeric_arrays(self):
+ # creation: new, from array, from string
+ a = gfapy.NumericArray([1,2,3,4,5])
+ b = gfapy.NumericArray.from_string("i,1,2,3,4,5")
+ self.assertEqual(a, b)
+ # validation
+ a.validate
+ gfapy.NumericArray([1,2,3,4,356]).validate
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray([1,2.0,3,4,356]).validate)
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray([1.0,2.0,3.0,4.0,356]).validate)
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray([1,"x",3,4,356]).validate)
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray.from_string, "i,1,X,2")
+ self.assertRaises(gfapy.FormatError,
+ gfapy.NumericArray.from_string, "")
+ self.assertRaises(gfapy.FormatError,
+ gfapy.NumericArray.from_string, "i,1,2,")
+ self.assertRaises(gfapy.TypeError,
+ gfapy.NumericArray.from_string, "x,1,2")
+ # to string
+ a = gfapy.NumericArray([18, 72, 244, 70, 96, 38, 31])
+ self.assertEqual("C", a.compute_subtype())
+ self.assertEqual("C,18,72,244,70,96,38,31", str(a))
+ a[2] = -2
+ self.assertEqual("c", a.compute_subtype())
+ self.assertEqual("c,18,72,-2,70,96,38,31", str(a))
+ a[2] = 280
+ self.assertEqual("S", a.compute_subtype())
+ self.assertEqual("S,18,72,280,70,96,38,31", str(a))
+ a[2] = -280
+ self.assertEqual("s", a.compute_subtype())
+ self.assertEqual("s,18,72,-280,70,96,38,31", str(a))
+ a[2] = 280000
+ self.assertEqual("I", a.compute_subtype())
+ self.assertEqual("I,18,72,280000,70,96,38,31", str(a))
+ a[2] = -280000
+ self.assertEqual("i", a.compute_subtype())
+ self.assertEqual("i,18,72,-280000,70,96,38,31", str(a))
+ a = gfapy.NumericArray([18.0, 72.0, -280000.0, 70.0, 96.0, 38.0, 31.0])
+ self.assertEqual("f", a.compute_subtype())
+ self.assertEqual("f,18.0,72.0,-280000.0,70.0,96.0,38.0,31.0", str(a))
diff --git a/tests/test_gfapy_segment_references.py b/tests/test_gfapy_segment_references.py
new file mode 100644
index 0000000..e294d10
--- /dev/null
+++ b/tests/test_gfapy_segment_references.py
@@ -0,0 +1,16 @@
+import gfapy
+import unittest
+
+class TestSegmentReferences(unittest.TestCase):
+
+ def test_link_other(self):
+ l = gfapy.Line("L\t1\t+\t2\t-\t*")
+ self.assertEqual("2", l.other("1"))
+ self.assertEqual("1", l.other("2"))
+ self.assertRaises(gfapy.NotFoundError, l.other, "0")
+
+ def test_link_circular(self):
+ l = gfapy.Line("L\t1\t+\t2\t-\t*")
+ self.assertEqual(False, l.is_circular())
+ l = gfapy.Line("L\t1\t+\t1\t-\t*")
+ self.assertEqual(True, l.is_circular())
diff --git a/tests/test_gfapy_sequence.py b/tests/test_gfapy_sequence.py
new file mode 100644
index 0000000..475c533
--- /dev/null
+++ b/tests/test_gfapy_sequence.py
@@ -0,0 +1,18 @@
+import unittest
+import gfapy
+
+class TestSequence(unittest.TestCase):
+ pass
+
+ #TODO: fix test
+ #def test_rc(self):
+ # self.assertEqual("gcatcgatcgt", gfapy.sequence.rc("acgatcgatgc"))
+ # self.assertEqual("gCaTCgatcgt", gfapy.sequence.rc("acgatcGAtGc"))
+ # self.assertEqual("gcatcnatcgt", gfapy.sequence.rc("acgatngatgc"))
+ # self.assertEqual("gcatcYatcgt", gfapy.sequence.rc("acgatRgatgc"))
+ # self.assertRaises(gfapy.InconsistencyError, gfapy.sequence.rc, "acgatUgatgc")
+ # self.assertEqual("gcaucgaucgu", gfapy.sequence.rc("acgaucgaugc"))
+ # self.assertEqual("===.", gfapy.sequence.rc(".==="))
+ # self.assertRaises(gfapy.ValueError, gfapy.sequence.rc, "acgatXgatgc")
+ # self.assertEqual("*", gfapy.sequence.rc("*"))
+ # self.assertRaises(gfapy.ValueError, gfapy.sequence.rc, "**")
diff --git a/tests/test_gfapy_trace.py b/tests/test_gfapy_trace.py
new file mode 100644
index 0000000..710e718
--- /dev/null
+++ b/tests/test_gfapy_trace.py
@@ -0,0 +1,17 @@
+import unittest
+import gfapy
+
+class TestTrace(unittest.TestCase):
+ def test_from_string(self):
+ self.assertEqual(gfapy.Trace([12,14,15]), gfapy.Trace._from_string("12,14,15"))
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Trace._from_string("12x,12,12")
+
+ def test_validation(self):
+ gfapy.Trace._from_string("12,12,12").validate()
+ self.assertRaises(gfapy.ValueError, gfapy.Trace._from_string("12,12,12").validate, ts = 10)
+ self.assertRaises(gfapy.ValueError, gfapy.Trace._from_string("12,-12,12").validate, ())
+ self.assertRaises(gfapy.TypeError, gfapy.Trace(["12x",12,12]).validate, ())
+
+ def test_str(self):
+ self.assertEqual("12,12,12", str(gfapy.Trace([12,12,12])))
diff --git a/tests/test_graphop_artifacts.py b/tests/test_graphop_artifacts.py
new file mode 100644
index 0000000..e2cb1a1
--- /dev/null
+++ b/tests/test_graphop_artifacts.py
@@ -0,0 +1,32 @@
+import gfapy
+import unittest
+
+class TestGraphOpArtifacts(unittest.TestCase):
+
+ def test_remove_small_components(self):
+ for sfx in ["gfa", "gfa2"]:
+ g = gfapy.Gfa.from_file("tests/testdata/two_components.{}".format(sfx))
+ self.assertEqual(2, len(g.connected_components()))
+ g.remove_small_components(1000)
+ self.assertEqual(2, len(g.connected_components()))
+ g.remove_small_components(3000)
+ self.assertEqual(1, len(g.connected_components()))
+ g.remove_small_components(10000)
+ self.assertEqual(0, len(g.connected_components()))
+
+ def test_remove_dead_ends(self):
+ for sfx in ["gfa", "gfa2"]:
+ g = gfapy.Gfa.from_file("tests/testdata/dead_ends.{}".format(sfx))
+ self.assertEqual(6, len(g.segments))
+ g.remove_dead_ends(100)
+ self.assertEqual(6, len(g.segments))
+ g.remove_dead_ends(1500)
+ self.assertEqual(5, len(g.segments))
+ g.remove_dead_ends(1500)
+ self.assertEqual(5, len(g.segments))
+ g.remove_dead_ends(150000)
+ g.remove_dead_ends(150000)
+ self.assertEqual(2, len(g.segments))
+ g.remove_dead_ends(1500000)
+ self.assertEqual(0, len(g.segments))
+
diff --git a/tests/test_graphop_copy_number.py b/tests/test_graphop_copy_number.py
new file mode 100644
index 0000000..34d8d6c
--- /dev/null
+++ b/tests/test_graphop_copy_number.py
@@ -0,0 +1,35 @@
+import gfapy
+import unittest
+
+class TestGraphopCopyNumber(unittest.TestCase):
+
+ def test_delete_low_coverage_segments(self):
+ for sfx in ["gfa", "gfa2"]:
+ gfa = gfapy.Gfa.from_file("tests/testdata/copynum.1.{}".format(sfx))
+ self.assertEqual({"0","1","2"}, set(gfa.segment_names))
+ gfa.delete_low_coverage_segments(10)
+ self.assertEqual({"1","2"}, set(gfa.segment_names))
+ gfa.delete_low_coverage_segments(100)
+ self.assertEqual({"2"}, set(gfa.segment_names))
+ gfa.delete_low_coverage_segments(1000)
+ self.assertEqual(set(), set(gfa.segment_names))
+
+ def test_compute_copy_numbers(self):
+ for sfx in ["gfa", "gfa2"]:
+ gfa = gfapy.Gfa.from_file("tests/testdata/copynum.2.{}".format(sfx))
+ gfa.compute_copy_numbers(9) # nothing raised
+ self.assertEqual(0, gfa.try_get_segment("0").cn)
+ self.assertEqual(1, gfa.try_get_segment("1").cn)
+ self.assertEqual(2, gfa.try_get_segment("2").cn)
+ self.assertEqual(3, gfa.try_get_segment("3").cn)
+
+ def test_apply_copy_number(self):
+ for sfx in ["gfa", "gfa2"]:
+ gfa = gfapy.Gfa.from_file("tests/testdata/copynum.2.{}".format(sfx))
+ self.assertEqual({"0","1","2","3"}, set(gfa.segment_names))
+ gfa.compute_copy_numbers(9)
+ gfa.apply_copy_numbers()
+ self.assertEqual({"1","2","3","2*2","3*2","3*3"}, set(gfa.segment_names))
+ gfa.compute_copy_numbers(9)
+ assert(all(x.cn == 1 for x in gfa.segments))
+
diff --git a/tests/test_internals_field_parser.py b/tests/test_internals_field_parser.py
new file mode 100644
index 0000000..2bd20bc
--- /dev/null
+++ b/tests/test_internals_field_parser.py
@@ -0,0 +1,48 @@
+import gfapy
+import unittest
+
+class TestInternalsFieldParser(unittest.TestCase):
+
+ def test_parse_gfa_tag(self):
+ o = "AA:i:1"
+ self.assertEqual(["AA","i","1"], gfapy.Field._parse_gfa_tag(o))
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._parse_gfa_tag("1A:A:A")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._parse_gfa_tag("_A:A:A")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._parse_gfa_tag("A:A:A")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._parse_gfa_tag("AAA:A:A")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._parse_gfa_tag("AA:C:1")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._parse_gfa_tag("AA:AA:1")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._parse_gfa_tag("AA:a:1")
+
+ def test_parse_gfa_field_A(self):
+ self.assertEqual("1", gfapy.Field._parse_gfa_field("1", "A"))
+
+ def test_parse_gfa_field_i(self):
+ self.assertEqual(12, gfapy.Field._parse_gfa_field("12", "i"))
+
+ def test_parse_gfa_field_f(self):
+ self.assertEqual(1.2, gfapy.Field._parse_gfa_field("1.2", "f"))
+
+ def test_parse_gfa_field_Z(self):
+ self.assertEqual("1.2", gfapy.Field._parse_gfa_field("1.2", "Z"))
+
+ def test_parse_gfa_field_H(self):
+ self.assertEqual(gfapy.ByteArray([26]),
+ gfapy.Field._parse_gfa_field("1A", "H"))
+
+ def test_parse_gfa_field_B(self):
+ self.assertEqual([12,12,12],
+ gfapy.Field._parse_gfa_field("c,12,12,12", "B"))
+ self.assertEqual([1.2,1.2,1.2],
+ gfapy.Field._parse_gfa_field("f,1.2,1.2,1.2", "B"))
+
+ def test_parse_gfa_field_J(self):
+ self.assertEqual({"1" : 2},
+ gfapy.Field._parse_gfa_field("{\"1\":2}", "J"))
diff --git a/tests/test_internals_field_validator.py b/tests/test_internals_field_validator.py
new file mode 100644
index 0000000..f93e325
--- /dev/null
+++ b/tests/test_internals_field_validator.py
@@ -0,0 +1,42 @@
+import gfapy
+import unittest
+
+class TestInternalsFieldValidator(unittest.TestCase):
+
+ def test_field_gfa_field_validate_i(self):
+ gfapy.Field._validate_gfa_field("1" , "i")
+ gfapy.Field._validate_gfa_field("12" , "i")
+ gfapy.Field._validate_gfa_field("-12", "i")
+ self.assertRaises(gfapy.FormatError, gfapy.Field._validate_gfa_field, "1A", "i")
+ self.assertRaises(gfapy.FormatError, gfapy.Field._validate_gfa_field, "A1", "i")
+ self.assertRaises(gfapy.FormatError, gfapy.Field._validate_gfa_field, "2.1", "i")
+
+ def test_field_gfa_field_validate_A(self):
+ gfapy.Field._validate_gfa_field("A", "A")
+ self.assertRaises(gfapy.FormatError, gfapy.Field._validate_gfa_field, "AA", "A")
+
+ def test_field_gfa_field_validate_f(self):
+ gfapy.Field._validate_gfa_field("-12.1", "f")
+ gfapy.Field._validate_gfa_field("-12.1E-2", "f")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._validate_gfa_field("2.1X", "f")
+
+ def test_field_gfa_field_validate_Z(self):
+ gfapy.Field._validate_gfa_field("-12.1E-2", "Z")
+
+ def test_field_gfa_field_validate_H(self):
+ gfapy.Field._validate_gfa_field("0A12121EFF", "H")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._validate_gfa_field("21X1", "H")
+
+ def test_field_gfa_field_validate_B(self):
+ gfapy.Field._validate_gfa_field("i,12,-5", "B")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._validate_gfa_field("C,X1", "B")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._validate_gfa_field("f.1.1", "B")
+
+ def test_field_gfa_field_validate_J(self):
+ gfapy.Field._validate_gfa_field("{\"1\":2}", "J")
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Field._validate_gfa_field("1\t2", "J")
diff --git a/tests/test_internals_field_writer.py b/tests/test_internals_field_writer.py
new file mode 100644
index 0000000..acca0a4
--- /dev/null
+++ b/tests/test_internals_field_writer.py
@@ -0,0 +1,33 @@
+import gfapy
+import unittest
+
+class TestInternalsFieldWriter(unittest.TestCase):
+ def test_field_writer_i(self):
+ self.assertEqual("13", gfapy.Field._to_gfa_field(13))
+
+ def test_field_writer_f(self):
+ self.assertEqual("1.3", gfapy.Field._to_gfa_field(1.3))
+
+ def test_field_writer_Z(self):
+ self.assertEqual("1B", gfapy.Field._to_gfa_field("1B"))
+
+ def test_field_writer_H(self):
+ self.assertEqual("0D0D0D",
+ gfapy.Field._to_gfa_field(gfapy.ByteArray([13,13,13])))
+ with self.assertRaises(gfapy.ValueError):
+ gfapy.Field._to_gfa_field(gfapy.ByteArray([13,13,1.3]))
+ with self.assertRaises(gfapy.ValueError):
+ gfapy.Field._to_gfa_field(gfapy.ByteArray([13,13,350]))
+
+ def test_field_writer_B(self):
+ self.assertEqual("C,13,13,13", gfapy.Field._to_gfa_field([13,13,13]))
+ self.assertEqual("f,1.3,1.3,1.3", gfapy.Field._to_gfa_field([1.3,1.3,1.3]))
+ with self.assertRaises(gfapy.ValueError):
+ gfapy.Field._to_gfa_field([13,1.3,1.3], "B")
+
+ def test_field_writer_J(self):
+ self.assertEqual("[\"A\", 12]", gfapy.Field._to_gfa_field(["A", 12]))
+ self.assertEqual("{\"A\": 12}", gfapy.Field._to_gfa_field({"A" : 12}))
+
+ def test_field_writer_as_tag(self):
+ self.assertEqual("AA:i:13", gfapy.Field._to_gfa_tag(13, "AA"))
diff --git a/tests/test_internals_tag_datatype.py b/tests/test_internals_tag_datatype.py
new file mode 100644
index 0000000..9e1cca6
--- /dev/null
+++ b/tests/test_internals_tag_datatype.py
@@ -0,0 +1,22 @@
+import unittest
+import gfapy
+
+class TestInternalsTagDatatype(unittest.TestCase):
+
+ def test_datatype_value_independent(self):
+ self.assertEqual("Z", gfapy.Field._get_default_gfa_tag_datatype("string"))
+ self.assertEqual("i", gfapy.Field._get_default_gfa_tag_datatype(1))
+ self.assertEqual("f", gfapy.Field._get_default_gfa_tag_datatype(1.0))
+ self.assertEqual("H", gfapy.Field._get_default_gfa_tag_datatype(gfapy.ByteArray([])))
+ self.assertEqual("B", gfapy.Field._get_default_gfa_tag_datatype(gfapy.NumericArray([])))
+ self.assertEqual("J", gfapy.Field._get_default_gfa_tag_datatype({}))
+
+ def test_datatype_arrays(self):
+ self.assertEqual("B", gfapy.Field._get_default_gfa_tag_datatype([1,1]))
+ self.assertEqual("B", gfapy.Field._get_default_gfa_tag_datatype([1.0,1.0]))
+ self.assertEqual("J", gfapy.Field._get_default_gfa_tag_datatype([1,1.0]))
+ self.assertEqual("J", gfapy.Field._get_default_gfa_tag_datatype(["1",1]))
+ self.assertEqual("J", gfapy.Field._get_default_gfa_tag_datatype([1.0,"1.0"]))
+ self.assertEqual("J", gfapy.Field._get_default_gfa_tag_datatype(["z","z"]))
+ self.assertEqual("J", gfapy.Field._get_default_gfa_tag_datatype(
+ [[1,2,3],[3,4,5]]))
diff --git a/tests/test_unit_alignment.py b/tests/test_unit_alignment.py
new file mode 100644
index 0000000..2e47ec8
--- /dev/null
+++ b/tests/test_unit_alignment.py
@@ -0,0 +1,55 @@
+import unittest
+import gfapy
+
+class TestUnitAlignment(unittest.TestCase):
+
+ cigar_1 = gfapy.CIGAR([
+ gfapy.CIGAR.Operation(12,"M"),
+ gfapy.CIGAR.Operation(1,"D"),
+ gfapy.CIGAR.Operation(2,"I"),
+ gfapy.CIGAR.Operation(0,"M"),
+ gfapy.CIGAR.Operation(1,"P")])
+
+ cigar_1_a = [
+ gfapy.CIGAR.Operation(12,"M"),
+ gfapy.CIGAR.Operation(1,"D"),
+ gfapy.CIGAR.Operation(2,"I"),
+ gfapy.CIGAR.Operation(0,"M"),
+ gfapy.CIGAR.Operation(1,"P")]
+
+ cigar_1_s = "12M1D2I0M1P"
+
+ trace_1 = gfapy.Trace([12,12,0])
+ trace_1_s = "12,12,0"
+ trace_1_a = [12,12,0]
+
+ def test_list_to_alignment(self):
+ assert(isinstance(gfapy.Alignment([]),gfapy.AlignmentPlaceholder))
+ self.assertEqual(TestUnitAlignment.cigar_1, gfapy.Alignment(TestUnitAlignment.cigar_1_a))
+ self.assertRaises(gfapy.VersionError, gfapy.Alignment, TestUnitAlignment.trace_1_a, version="gfa1")
+ self.assertEqual(TestUnitAlignment.trace_1, gfapy.Alignment(TestUnitAlignment.trace_1_a, version="gfa2"))
+ self.assertRaises(gfapy.VersionError, gfapy.Alignment, TestUnitAlignment.cigar_1_a, version="gfaX")
+ self.assertRaises(gfapy.FormatError, gfapy.Alignment, ["x",2,1])
+ # only the first element is checked, therefore:
+ malformed1 = [1,2,"x"]
+ gfapy.Alignment(malformed1, version="gfa2") # nothing raised
+ assert(isinstance(gfapy.Alignment(malformed1, version="gfa2"), gfapy.Trace))
+ self.assertRaises(gfapy.TypeError, gfapy.Alignment(malformed1, version="gfa2").validate)
+ malformed2 = [gfapy.CIGAR.Operation(12,"M"),2,"x"]
+ gfapy.Alignment(malformed2) # nothing raised
+ assert(isinstance(gfapy.Alignment(malformed2),gfapy.CIGAR))
+ self.assertRaises(gfapy.TypeError, gfapy.Alignment(malformed2).validate)
+
+ def test_cigar_from_string(self):
+ self.assertEqual(TestUnitAlignment.cigar_1,
+ gfapy.CIGAR._from_string(TestUnitAlignment.cigar_1_s))
+ assert(isinstance(gfapy.CIGAR._from_string("*"),
+ gfapy.AlignmentPlaceholder))
+ self.assertEqual(TestUnitAlignment.cigar_1,
+ gfapy.CIGAR(TestUnitAlignment.cigar_1_a))
+
+ def test_trace_from_string(self):
+ self.assertEqual(TestUnitAlignment.trace_1,
+ gfapy.Trace._from_string(TestUnitAlignment.trace_1_s))
+ self.assertRaises(gfapy.FormatError, gfapy.Trace._from_string, "A,1,2")
+
diff --git a/tests/test_unit_field_array.py b/tests/test_unit_field_array.py
new file mode 100644
index 0000000..b2bef72
--- /dev/null
+++ b/tests/test_unit_field_array.py
@@ -0,0 +1,50 @@
+import unittest
+import gfapy
+
+class TestUnitFieldArray(unittest.TestCase):
+
+ def test_initialize(self):
+ a = gfapy.FieldArray("i", [1,2,3])
+ # from a FieldArray:
+ z = gfapy.FieldArray("Z", a)
+ # no validations by default:
+ gfapy.FieldArray("i", [1,2,"a"])
+ gfapy.FieldArray("wrong", [1,2])
+
+ def test_datatype(self):
+ fa = gfapy.FieldArray("i", [1,2,3])
+ self.assertEqual("i", fa.datatype)
+
+ def test_validate(self):
+ f1 = gfapy.FieldArray("i", [1,2,3])
+ f2 = gfapy.FieldArray("i", [1,2,"a"])
+ f3 = gfapy.FieldArray("wrong", [1,2])
+ f1.validate()
+ self.assertRaises(gfapy.FormatError, f2.validate)
+ self.assertRaises(gfapy.TypeError, f3.validate)
+
+ def test_validate_gfa_field(self):
+ gfapy.FieldArray("i", [1,2,3])._validate_gfa_field("i")
+ self.assertRaises(gfapy.TypeError,
+ gfapy.FieldArray("i", [1,2,3])._validate_gfa_field, "J")
+ self.assertRaises(gfapy.FormatError,
+ gfapy.FieldArray("i", [1,2,"a"])._validate_gfa_field, "i")
+ gfapy.FieldArray("wrong", [1,2])._validate_gfa_field("i")
+
+ def test_to_gfa_field(self):
+ f = gfapy.FieldArray("i", [1,2,3])
+ self.assertEqual("1\t2\t3", f._to_gfa_field())
+
+ def test_to_gfa_tag(self):
+ f = gfapy.FieldArray("i", [1,2,3])
+ self.assertEqual("xx:i:1\txx:i:2\txx:i:3", f._to_gfa_tag("xx"))
+
+ def test_vpush(self):
+ self.assertRaises(gfapy.FormatError,
+ gfapy.FieldArray("i", [1,2,3])._vpush, "x")
+ self.assertRaises(gfapy.TypeError,
+ gfapy.FieldArray("i", [1,2,3])._vpush, 2.0)
+ self.assertRaises(gfapy.InconsistencyError,
+ gfapy.FieldArray("i", [1,2,3])._vpush, "z", "Z")
+ gfapy.FieldArray("i", [1,2,3])._vpush("z", "i")
+
diff --git a/tests/test_unit_gfa_lines.py b/tests/test_unit_gfa_lines.py
new file mode 100644
index 0000000..5f5a659
--- /dev/null
+++ b/tests/test_unit_gfa_lines.py
@@ -0,0 +1,61 @@
+import unittest
+import gfapy
+
+class TestUnitGfaLines(unittest.TestCase):
+
+ def test_register_line_merge(self):
+ g = gfapy.Gfa(version="gfa1")
+ l = gfapy.line.Header({"xx": 1}, version="gfa1")
+ l._gfa = g
+ g._register_line(l)
+ self.assertEqual(1, g.header.xx)
+ self.assertRaises(gfapy.AssertionError, g._unregister_line, l)
+
+ def test_register_line_name_present(self):
+ g = gfapy.Gfa(version="gfa1")
+ l = gfapy.line.segment.GFA1({"name": "sx"}, version="gfa1")
+ l._gfa = g
+ g._register_line(l)
+ self.assertEqual([l], g.segments)
+ self.assertEqual(l, g.line("sx"))
+ self.assertEqual(["sx"], g.segment_names)
+ g._unregister_line(l)
+ self.assertEqual([], g.segments)
+ self.assertEqual(None, g.line("sx"))
+ self.assertEqual([], g.segment_names)
+
+ def test_register_line_name_absent(self):
+ g = gfapy.Gfa(version="gfa2")
+ l = gfapy.line.edge.GFA2({"eid": gfapy.Placeholder()},
+ version="gfa2")
+ l._gfa = g
+ g._register_line(l)
+ self.assertEqual([l], g.edges)
+ self.assertEqual([], g.edge_names)
+ g._unregister_line(l)
+ self.assertEqual([], g.edges)
+
+ def test_register_line_external(self):
+ g = gfapy.Gfa(version="gfa2")
+ l = gfapy.line.Fragment({"external": gfapy.OrientedLine("x","+")},
+ version="gfa2")
+ l._gfa = g
+ g._register_line(l)
+ self.assertEqual([l], g.fragments)
+ self.assertEqual([l], g.fragments_for_external("x"))
+ self.assertEqual(["x"], g.external_names)
+ g._unregister_line(l)
+ self.assertEqual([], g.fragments)
+ self.assertEqual([], g.fragments_for_external("x"))
+ self.assertEqual([], g.external_names)
+
+ def test_register_line_unnamed(self):
+ g = gfapy.Gfa(version="gfa1")
+ l = gfapy.line.edge.Link({}, version="gfa1")
+ l._gfa = g
+ g._register_line(l)
+ self.assertEqual([l], g.dovetails)
+ g._unregister_line(l)
+ self.assertEqual([], g.dovetails)
+
+
diff --git a/tests/test_unit_header.py b/tests/test_unit_header.py
new file mode 100644
index 0000000..372b13e
--- /dev/null
+++ b/tests/test_unit_header.py
@@ -0,0 +1,110 @@
+import unittest
+import gfapy
+
+class TestUnitHeader(unittest.TestCase):
+
+ def test_new(self):
+ gfapy.line.Header(["H", "VN:Z:1.0", "xx:i:11"])
+
+ def test_string_to_gfa_line(self):
+ gfapy.Line("H\tVN:Z:1.0")
+ assert(isinstance(gfapy.Line("H\tVN:Z:1.0"),gfapy.line.Header))
+ self.assertEqual(gfapy.line.Header(["H", "VN:Z:1.0", "xx:i:11"]),
+ gfapy.Line("H\tVN:Z:1.0\txx:i:11"))
+ self.assertRaises(gfapy.FormatError,
+ gfapy.Line, "H\tH2\tVN:Z:1.0")
+ self.assertRaises(gfapy.TypeError,
+ gfapy.Line, "H\tVN:i:1.0")
+
+ def test_to_s(self):
+ try:
+ self.assertEqual("H\tVN:Z:1.0\txx:i:11",
+ str(gfapy.line.Header(["H", "VN:Z:1.0", "xx:i:11"])))
+ except:
+ self.assertEqual("H\txx:i:11\tVN:Z:1.0",
+ str(gfapy.line.Header(["H", "VN:Z:1.0", "xx:i:11"])))
+
+ def test_tag_reading(self):
+ self.assertEqual("1.0",
+ gfapy.line.Header(["H", "VN:Z:1.0", "xx:i:11"]).VN)
+
+ def test_tag_writing(self):
+ gfapy.line.Header(["H", "VN:Z:1.0", "xx:i:11"]).VN = "2.0"
+
+ def test_connection(self):
+ assert(not gfapy.line.Header(["H"]).is_connected())
+ assert(gfapy.Gfa().header.is_connected())
+ self.assertRaises(gfapy.RuntimeError,
+ gfapy.line.Header(["H"]).connect, gfapy.Gfa())
+
+ def test_to_gfa1_a(self):
+ line = gfapy.Line("H\tVN:Z:1.0\txx:i:1")
+ self.assertEqual("H", line._to_gfa1_a()[0])
+ self.assertEqual(sorted(["VN:Z:1.0", "xx:i:1"]), sorted(line._to_gfa1_a()[1:]))
+ line = gfapy.Line("H\tVN:Z:2.0\txx:i:1")
+ self.assertEqual("H", line._to_gfa1_a()[0])
+ self.assertEqual(sorted(["VN:Z:1.0", "xx:i:1"]), sorted(line._to_gfa1_a()[1:]))
+
+ def test_to_gfa2_a(self):
+ line = gfapy.Line("H\tVN:Z:1.0\txx:i:1")
+ self.assertEqual("H", line._to_gfa2_a()[0])
+ self.assertEqual(sorted(["VN:Z:2.0", "xx:i:1"]), sorted(line._to_gfa2_a()[1:]))
+ line = gfapy.Line("H\tVN:Z:2.0\txx:i:1")
+ self.assertEqual("H", line._to_gfa2_a()[0])
+ self.assertEqual(sorted(["VN:Z:2.0", "xx:i:1"]), sorted(line._to_gfa2_a()[1:]))
+
+ def test_add(self):
+ line = gfapy.Line("H\tVN:Z:2.0\txx:i:1")
+ line.add("yy", "test")
+ self.assertEqual("test", line.yy)
+ line.add("yy", "test")
+ self.assertEqual(["test","test"], line.yy)
+ line.add("yy", "test")
+ self.assertEqual(["test","test","test"], line.yy)
+ line.add("VN", "2.0")
+ self.assertEqual("2.0", line.VN)
+ self.assertRaises(gfapy.InconsistencyError, line.add, "VN", "1.0")
+ line.add("TS", "120")
+ self.assertEqual(120, line.TS)
+ line.add("TS", 120)
+ line.add("TS", "120")
+ self.assertRaises(gfapy.InconsistencyError, line.add, "TS", 130)
+ self.assertRaises(gfapy.InconsistencyError, line.add, "TS", "140")
+
+ def test_field_to_s(self):
+ line = gfapy.Line("H\tVN:Z:1.0\txx:i:1")
+ line.add("xx", 2)
+ self.assertEqual("1.0", line.field_to_s("VN"))
+ self.assertEqual("1\t2", line.field_to_s("xx"))
+ self.assertEqual("VN:Z:1.0", line.field_to_s("VN", tag=True))
+ self.assertEqual("xx:i:1\txx:i:2", line.field_to_s("xx", tag=True))
+
+ def test_n_duptags(self):
+ line = gfapy.Line("H\tVN:Z:1.0\txx:i:1")
+ self.assertEqual(0, line._n_duptags())
+ line.add("xx", 2)
+ self.assertEqual(1, line._n_duptags())
+ line.add("xx", 2)
+ self.assertEqual(1, line._n_duptags())
+ line.add("zz", 2)
+ self.assertEqual(1, line._n_duptags())
+ line.add("zz", 2)
+ self.assertEqual(2, line._n_duptags())
+
+ def test_split(self):
+ line = gfapy.Line("H\tVN:Z:1.0\txx:i:1")
+ line.add("xx", 2)
+ self.assertEqual(3, len(line._split()))
+ for s in line._split():
+ assert(isinstance(s, gfapy.line.Header))
+ self.assertEqual(sorted(["H\tVN:Z:1.0", "H\txx:i:1", "H\txx:i:2"]),
+ sorted([str(x) for x in line._split()]))
+
+ def test_merge(self):
+ line1 = gfapy.Line("H\tVN:Z:1.0\txx:i:1")
+ line2 = gfapy.Line("H\txx:i:2\tyy:f:1.0")
+ line1._merge(line2)
+ self.assertEqual("1.0", line1.VN)
+ self.assertEqual([1,2], line1.xx)
+ self.assertEqual(1.0, line1.yy)
+
diff --git a/tests/test_unit_line.py b/tests/test_unit_line.py
new file mode 100644
index 0000000..4d2e2f5
--- /dev/null
+++ b/tests/test_unit_line.py
@@ -0,0 +1,89 @@
+import unittest
+import gfapy
+
+class TestUnitLine(unittest.TestCase):
+
+ def test_initialize_not_enough_positional_fields(self):
+ gfapy.Line(["S", "1", "*"])
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line(["S", "1"])
+
+ def test_initialize_too_many_positionals(self):
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.Line(["S", "1", "*", "*"])
+
+ def test_initialize_predefined_tag_wrong_type(self):
+ gfapy.line.Header(["H", "VN:Z:1"])
+ with self.assertRaises(gfapy.TypeError):
+ gfapy.line.Header(["H", "VN:i:1"])
+
+ def test_initialize_wrong_tag_format(self):
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.line.Header(["H", "VN i:1"])
+
+ def test_initialize_positional_field_type_error(self):
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.line.segment.GFA1(["S", "1\t1", "*", "*"])
+
+ def test_initialize_tag_type_error(self):
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.line.Header(["H", "zz:i:1A"])
+
+ def test_initialize_duplicate_tag(self):
+ with self.assertRaises(gfapy.NotUniqueError):
+ gfapy.line.Header(["H", "zz:i:1", "zz:i:2"])
+ with self.assertRaises(gfapy.NotUniqueError):
+ gfapy.line.Header(["H", "zz:i:1", "VN:Z:1", "zz:i:2"])
+
+ def test_initialize_custom_tag(self):
+ with self.assertRaises(gfapy.FormatError):
+ gfapy.line.Header(["H", "ZZ:Z:1"])
+
+ def test_record_type(self):
+ l = gfapy.line.Header(["H", "xx:i:13", "VN:Z:HI"])
+ self.assertEqual("H", l.record_type)
+ with self.assertRaises(AttributeError):
+ l.record_type = "S"
+
+ def test_add_tag(self):
+ l = gfapy.line.Header(["H", "xx:i:13", "VN:Z:HI"])
+ self.assertEqual(None, l.xy)
+ l.set("xy", "HI")
+ self.assertEqual("HI", l.xy)
+
+ def test_unknown_record_type(self):
+ with self.assertRaises(gfapy.VersionError):
+ gfapy.Line("Z\txxx", version = "gfa1")
+ gfapy.Line("Z\txxx", version = "gfa2")
+ gfapy.Line("Z\txxx")
+
+ def test_field_alias(self):
+ s = gfapy.Line("S\tA\t*")
+ self.assertEqual(s.name, s.get("name"))
+ self.assertEqual("A", s.name)
+ self.assertEqual("A", s.sid)
+ self.assertEqual("A", s.get("name"))
+ self.assertEqual("A", s.get("sid"))
+ s.set("name", "B")
+ self.assertEqual("B", s.get("sid"))
+ s.set("sid", "C")
+ self.assertEqual("C", s.name)
+
+ def test_to_s(self):
+ fields = ["H", "VN:Z:HI", "xx:i:13"]
+ l = gfapy.line.Header(fields[:])
+ lstr = str(l)
+ self.assertEqual("\t".join(fields), lstr)
+
+ def test_clone(self):
+ l = gfapy.Line("H\tVN:Z:1.0")
+ l1 = l
+ l2 = l.clone()
+ self.assertIsInstance(l, gfapy.line.Header)
+ self.assertIsInstance(l2, gfapy.line.Header)
+ l2.VN = "2.0"
+ self.assertEqual("2.0", l2.VN)
+ self.assertEqual("1.0", l.VN)
+ l1.VN = "2.0"
+ self.assertEqual("2.0", l.VN)
+
diff --git a/tests/test_unit_line_cloning.py b/tests/test_unit_line_cloning.py
new file mode 100644
index 0000000..6fc8a55
--- /dev/null
+++ b/tests/test_unit_line_cloning.py
@@ -0,0 +1,56 @@
+import unittest
+import gfapy
+
+class TestUnitLineCloning(unittest.TestCase):
+
+ def test_clone_tags(self):
+ l = gfapy.Line("H\tVN:Z:1.0")
+ l1 = l
+ l2 = l.clone()
+ self.assertIsInstance(l, gfapy.line.Header)
+ self.assertIsInstance(l2, gfapy.line.Header)
+ l2.VN = "2.0"
+ self.assertEqual("2.0", l2.VN)
+ self.assertEqual("1.0", l.VN)
+ l1.VN = "2.0"
+ self.assertEqual("2.0", l.VN)
+
+ def test_clone_deep_string(self):
+ s = gfapy.Line("S\t1\tCAGCTTG")
+ s_clone = s.clone()
+ s_clone.sequence += "CCC"
+ self.assertNotEqual(s_clone.sequence, s.sequence)
+
+ def test_clone_deep_posfield_array(self):
+ u = gfapy.Line("U\t*\t1 2 3")
+ u_clone = u.clone()
+ self.assertEqual(u_clone.items, u.items)
+ self.assertNotEqual(id(u_clone.items), id(u.items))
+ u_clone.items.append("4")
+ self.assertNotEqual(u_clone.items, u.items)
+
+ def test_clone_deep_J_field(self):
+ h = gfapy.Line("H\txx:J:[1,2,3]")
+ h_clone = h.clone()
+ self.assertEqual(h_clone.xx, h.xx)
+ self.assertNotEqual(id(h_clone.xx), id(h.xx))
+ h_clone.xx[0] += 1
+ self.assertNotEqual(h_clone.xx, h.xx)
+
+ def test_clone_disconnected(self):
+ g = gfapy.Gfa()
+ sA = gfapy.Line("S\tA\t7\tCAGCTTG")
+ u = gfapy.Line("U\tU12\tA B C")
+ g.add_line(sA)
+ g.add_line(u)
+ assert(u.is_connected())
+ self.assertEqual([u], sA.sets)
+ self.assertEqual([u], g.sets)
+ u_clone = u.clone()
+ assert(not u_clone.is_connected())
+ self.assertEqual([u], sA.sets)
+ self.assertEqual([u], g.sets)
+ assert(all(isinstance(i,gfapy.Line) for i in u.items))
+ assert(not any(isinstance(i,gfapy.Line) for i in u_clone.items))
+ self.assertEqual(["A", "B", "C"], [e.name for e in u.items])
+ self.assertEqual(["A", "B", "C"], u_clone.items)
diff --git a/tests/test_unit_line_connection.py b/tests/test_unit_line_connection.py
new file mode 100644
index 0000000..e26464f
--- /dev/null
+++ b/tests/test_unit_line_connection.py
@@ -0,0 +1,154 @@
+import unittest
+import gfapy
+
+class TestUnitLineConnection(unittest.TestCase):
+
+ def test_connected_and_gfa(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ assert(not s1.is_connected())
+ self.assertEqual(None, s1.gfa)
+ g = gfapy.Gfa()
+ g.append(s1)
+ assert(s1.is_connected())
+ assert(g is s1.gfa)
+
+ def test_connect(self):
+ s2 = gfapy.Line("S\t2\tACCAT")
+ assert(not s2.is_connected())
+ self.assertEqual(None, s2.gfa)
+ g = gfapy.Gfa()
+ s2.connect(g)
+ assert(s2.is_connected())
+ assert(g is s2.gfa)
+
+ def test_connect_registers_line(self):
+ s2 = gfapy.Line("S\t2\tACCAT")
+ g = gfapy.Gfa()
+ self.assertEqual([], g.segments)
+ s2.connect(g)
+ self.assertEqual([s2], g.segments)
+
+ def test_disconnect(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ g = gfapy.Gfa()
+ g.append(s1)
+ assert(s1.is_connected())
+ assert(g is s1.gfa)
+ s1.disconnect()
+ assert(not s1.is_connected())
+ self.assertEqual(None, s1.gfa)
+
+ def test_disconnect_unregisters_line(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ g = gfapy.Gfa()
+ g.append(s1)
+ self.assertEqual([s1], g.segments)
+ s1.disconnect()
+ self.assertEqual([], g.segments)
+
+ def test_disconnect_removes_field_backreferences(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ l = gfapy.Line("L\t1\t+\t2\t-\t*")
+ g = gfapy.Gfa()
+ g.append(s1)
+ g.append(l)
+ self.assertEqual([l], s1.dovetails)
+ l.disconnect()
+ self.assertEqual([], s1.dovetails)
+
+ def test_disconnect_removes_field_references(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ l = gfapy.Line("L\t1\t+\t2\t-\t*")
+ g = gfapy.Gfa()
+ g.append(s1)
+ g.append(l)
+ assert(l.get("from") is s1)
+ l.disconnect()
+ assert(not l.get("from") is s1)
+ self.assertEqual("1", l.get("from"))
+
+ def test_disconnect_disconnects_depent_lines(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ l = gfapy.Line("L\t1\t+\t2\t-\t*")
+ g = gfapy.Gfa()
+ g.append(s1)
+ g.append(l)
+ assert(l.is_connected())
+ s1.disconnect()
+ assert(not l.is_connected())
+
+ def test_disconnect_removes_nonfield_backreferences(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ s2 = gfapy.Line("S\t2\tCATGG")
+ s3 = gfapy.Line("S\t3\tTGGAA")
+ l12 = gfapy.Line("L\t1\t+\t2\t+\t*")
+ l23 = gfapy.Line("L\t2\t+\t3\t+\t*")
+ p4 = gfapy.Line("P\t4\t1+,2+,3+\t*")
+ g = gfapy.Gfa()
+ for line in [s1, s2, s3, l12, l23, p4]:
+ g.append(line)
+ self.assertEqual([p4], l12.paths)
+ p4.disconnect()
+ self.assertEqual([], l12.paths)
+
+ def test_disconnect_removes_nonfield_references(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ s2 = gfapy.Line("S\t2\tCATGG")
+ s3 = gfapy.Line("S\t3\tTGGAA")
+ l12 = gfapy.Line("L\t1\t+\t2\t+\t*")
+ l23 = gfapy.Line("L\t2\t+\t3\t+\t*")
+ p4 = gfapy.Line("P\t4\t1+,2+,3+\t*")
+ g = gfapy.Gfa()
+ for line in [s1, s2, s3, l12, l23, p4]:
+ g.append(line)
+ self.assertEqual([gfapy.OrientedLine(l12,"+"),gfapy.OrientedLine(l23,"+")], p4.links)
+ p4.disconnect()
+ self.assertEqual([], p4.links)
+
+ def test_add_reference(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ self.assertEqual([], s1.gaps_L)
+ s1._add_reference("X", "gaps_L")
+ self.assertEqual(["X"], s1.gaps_L)
+ s1._add_reference("Y", "gaps_L")
+ self.assertEqual(["X", "Y"], s1.gaps_L)
+ s1._add_reference("Z", "gaps_L", append=False)
+ self.assertEqual(["Z", "X", "Y"], s1.gaps_L)
+
+ def test_delete_reference(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ s1._add_reference("A", "gaps_L")
+ s1._add_reference("B", "gaps_L")
+ s1._add_reference("C", "gaps_L")
+ s1._add_reference("D", "gaps_L")
+ s1._add_reference("E", "gaps_L")
+ self.assertEqual(["A", "B", "C", "D", "E"], s1.gaps_L)
+ s1._delete_reference("C", "gaps_L")
+ self.assertEqual(["A", "B", "D", "E"], s1.gaps_L)
+ s1._delete_first_reference("gaps_L")
+ self.assertEqual(["B", "D", "E"], s1.gaps_L)
+ s1._delete_last_reference("gaps_L")
+ self.assertEqual(["B", "D"], s1.gaps_L)
+
+ def test_update_references(self):
+ s1 = gfapy.Line("S\t1\tACCAT")
+ gA = gfapy.line.Gap({})
+ gnewA = gfapy.line.Gap({})
+ gB = gfapy.line.Gap({})
+ gC = gfapy.line.Gap({})
+ gD = gfapy.line.Gap({})
+ gE = gfapy.line.Gap({})
+ gX = gfapy.line.Gap({})
+ s1._add_reference(gA, "gaps_L")
+ s1._add_reference(gB, "gaps_L")
+ s1._add_reference(gC, "gaps_L")
+ s1._add_reference(gD, "gaps_L")
+ s1._add_reference(gE, "gaps_L")
+ self.assertEqual([gA, gB, gC, gD, gE], s1.gaps_L)
+ s1._update_references(gA, gnewA, "sid1")
+ self.assertEqual([gnewA, gB, gC, gD, gE], s1.gaps_L)
+ s1._update_references(gX, "newX", "sid1")
+ self.assertEqual([gnewA, gB, gC, gD, gE], s1.gaps_L)
+ s1._update_references(gB, None, "sid1")
+ self.assertEqual([gnewA, gC, gD, gE], s1.gaps_L)
+
diff --git a/tests/test_unit_line_dynamic_fields.py b/tests/test_unit_line_dynamic_fields.py
new file mode 100644
index 0000000..38fab18
--- /dev/null
+++ b/tests/test_unit_line_dynamic_fields.py
@@ -0,0 +1,96 @@
+import unittest
+import gfapy
+
+class TestUnitLineDynamicFields(unittest.TestCase):
+
+ def test_respond_to(self):
+ l = gfapy.line.edge.Link(["L", "1", "+", "2", "-", "*", "zz:Z:yes", "KC:i:100"])
+ # record_type
+ self.assertTrue(hasattr(l, "record_type"))
+ # reqfields
+ self.assertTrue(hasattr(l, "from"))
+ self.assertIsInstance(object.__getattribute__(l, "from"),
+ gfapy.line.common.dynamic_fields.DynamicField)
+ # predefined tags
+ self.assertTrue(hasattr(l, "KC"))
+ self.assertTrue(hasattr(l, "try_get_KC"))
+ self.assertIsInstance(object.__getattribute__(l, "KC"),
+ gfapy.line.common.dynamic_fields.DynamicField)
+ # custom tags
+ self.assertTrue(hasattr(l, "zz"))
+ self.assertTrue(hasattr(l, "try_get_zz"))
+ # not-yet-existing tags
+ self.assertTrue(hasattr(l, "aa"))
+ #raises exception in python, because hasattr calls getattr
+ #self.assertTrue(hasattr(l, "try_get_aa"))
+
+ def test_field_getters_positional_fields(self):
+ l = gfapy.Line(["S", "12", "*", "xx:i:13", "KC:i:10"])
+ self.assertEqual("12", l.name)
+ with self.assertRaises(AttributeError):
+ l.zzz
+
+ def test_field_getters_existing_tags(self):
+ l = gfapy.Line(["S", "12", "*", "xx:i:13", "KC:i:10"])
+ self.assertEqual("xx", sorted(l.tagnames)[1])
+ self.assertEqual("13", l.field_to_s("xx"))
+ self.assertEqual(13, l.xx)
+ self.assertEqual(13, l.try_get_xx())
+ self.assertEqual("10", l.field_to_s("KC"))
+ self.assertEqual(10, l.KC)
+ self.assertEqual(10, l.try_get_KC())
+
+ def test_field_getters_not_existing_tags(self):
+ l = gfapy.line.Header(["H", "xx:i:13", "VN:Z:HI"])
+ self.assertEqual(None, l.zz)
+ with self.assertRaises(gfapy.NotFoundError):
+ l.try_get_zz()
+
+ def test_field_setters_positional_fields(self):
+ l = gfapy.Line(["S", "12", "*", "xx:i:13", "KC:i:1200"])
+ with self.assertRaises(gfapy.FormatError):
+ l.name = "A\t1"
+ l.validate_field("name")
+ l.name = "14"
+ self.assertEqual("14", l.name)
+
+ def test_field_setters_existing_tags(self):
+ l = gfapy.line.Header(["H", "xx:i:13", "VN:Z:HI"], vlevel = 3)
+ self.assertEqual(13, l.xx)
+ l.xx = 15
+ self.assertEqual(15, l.xx)
+ with self.assertRaises(gfapy.FormatError):
+ l.xx = "1A"
+ l.set_datatype("xx", "Z")
+ l.xx = "1A"
+ self.assertEqual("HI", l.VN)
+ l.VN = "HO"
+ self.assertEqual("HO", l.VN)
+
+ def test_field_setters_not_existing_tags(self):
+ l = gfapy.line.Header(["H", "xx:i:13", "VN:Z:HI"])
+ l.zz="1"
+ self.assertEqual("1", l.zz)
+ self.assertEqual("Z", gfapy.Field._get_default_gfa_tag_datatype(l.zz))
+ l.zi=1
+ self.assertEqual(1, l.zi)
+ self.assertEqual("i", gfapy.Field._get_default_gfa_tag_datatype(l.zi))
+ l.zf=1.0
+ self.assertEqual(1.0, l.zf)
+ self.assertEqual("f", gfapy.Field._get_default_gfa_tag_datatype(l.zf))
+ l.bf=[1.0, 1.0]
+ self.assertEqual([1.0, 1.0], l.bf)
+ self.assertEqual("B", gfapy.Field._get_default_gfa_tag_datatype(l.bf))
+ l.bi=[1.0, 1.0]
+ self.assertEqual([1, 1], l.bi)
+ self.assertEqual("B", gfapy.Field._get_default_gfa_tag_datatype(l.bi))
+ l.ba=[1.0, 1]
+ self.assertEqual([1.0, 1], l.ba)
+ self.assertEqual("J", gfapy.Field._get_default_gfa_tag_datatype(l.ba))
+ l.bh={"a" : 1.0, "b" : 1}
+ self.assertEqual({"a" : 1.0, "b" : 1}, gfapy.Line(str(l)).bh)
+ self.assertEqual("J", gfapy.Field._get_default_gfa_tag_datatype(l.bh))
+ #Assignement of new attributes possible in python.
+ #with self.assertRaises(AttributeError):
+ # l.zzz="1"
+
diff --git a/tests/test_unit_line_equivalence.py b/tests/test_unit_line_equivalence.py
new file mode 100644
index 0000000..5e2a9d3
--- /dev/null
+++ b/tests/test_unit_line_equivalence.py
@@ -0,0 +1,150 @@
+import unittest
+import gfapy
+
+class TestUnitLineEquivalence(unittest.TestCase):
+
+ a = gfapy.Line("S\tA\t*\tLN:i:8\txx:Z:a")
+ b = gfapy.Line("S\tB\t*\tLN:i:10")
+ c = gfapy.Line("C\tA\t+\tB\t+\t10\t*")
+ l = gfapy.Line("L\tA\t+\tB\t+\t*")
+ e = gfapy.Line("E\t1\tA+\tB-\t0\t100$\t20\t121\t*")
+
+ a_ln = gfapy.Line("S\tA\t*\tLN:i:10\txx:Z:a")
+ a_seq = gfapy.Line("S\tA\tACCTTCGT\tLN:i:8\txx:Z:a")
+ a_gfa2 = gfapy.Line("S\tA\t8\tACCTTCGT\txx:Z:a")
+ a_noxx = gfapy.Line("S\tA\t*\tLN:i:8")
+ a_yy = gfapy.Line("S\tA\t*\tLN:i:8\txx:Z:a\tyy:Z:b")
+ l_from = gfapy.Line("L\tC\t+\tB\t+\t*")
+ e_name = gfapy.Line("E\t2\tA+\tB-\t0\t100$\t20\t121\t*")
+
+ h_a = {"record_type": "S",
+ "name": "A",
+ "LN": 8,
+ "xx": "a"}
+ h_a_rt = h_a.copy()
+ h_a_rt["record_type"] = "X"
+ h_a_pl = h_a.copy()
+ h_a_pl["name"] = gfapy.Placeholder()
+ h_a_name = h_a.copy()
+ h_a_name["name"] = "B"
+ h_a_seq = h_a.copy()
+ h_a_seq["sequence"] = "ACCTTCGT"
+ h_a_ln = h_a.copy()
+ h_a_ln["LN"] = 10
+ h_a_LNstr = h_a.copy()
+ h_a_LNstr["LN"] = "8"
+ h_a_noxx = h_a.copy()
+ h_a_noxx.pop("xx")
+ h_a_yy = h_a.copy()
+ h_a_yy["yy"] = "b"
+ h_a_gfa2 = {"record_type": "S",
+ "sid": "A",
+ "slen": 8,
+ "xx": "a"}
+
+
+ def test_line_placeholder(self):
+ assert(not gfapy.is_placeholder(TestUnitLineEquivalence.a))
+ assert(not gfapy.is_placeholder(TestUnitLineEquivalence.b))
+
+ def test_line_diff_two_segments(self):
+ adiffb = [("different", "positional_field", "name", "A", "B"),
+ ("exclusive", "<", "tag", "xx", "Z", "a"),
+ ("different", "tag", "LN", "i", "8", "i", "10")]
+ self.assertEqual(sorted(adiffb), sorted(TestUnitLineEquivalence.a.diff(TestUnitLineEquivalence.b)))
+ bdiffa = [("different", "positional_field", "name", "B", "A"),
+ ("exclusive", ">", "tag", "xx", "Z", "a"),
+ ("different", "tag", "LN", "i", "10", "i", "8")]
+ self.assertEqual(sorted(bdiffa), sorted(TestUnitLineEquivalence.b.diff(TestUnitLineEquivalence.a)))
+ self.assertEqual([], TestUnitLineEquivalence.a.diff(TestUnitLineEquivalence.a))
+ self.assertEqual([], TestUnitLineEquivalence.b.diff(TestUnitLineEquivalence.b))
+
+ def test_line_diffscript_two_segments(self):
+ acpy = TestUnitLineEquivalence.a.clone()
+ exec(acpy.diffscript(TestUnitLineEquivalence.b, "acpy"))
+ self.assertNotEqual(str(TestUnitLineEquivalence.b), str(TestUnitLineEquivalence.a))
+ self.assertEqual(str(TestUnitLineEquivalence.b), str(acpy))
+ bcpy = TestUnitLineEquivalence.b.clone()
+ exec(bcpy.diffscript(TestUnitLineEquivalence.a, "bcpy"))
+ self.assertNotEqual(str(TestUnitLineEquivalence.a), str(TestUnitLineEquivalence.b))
+ self.assertEqual(str(TestUnitLineEquivalence.a), str(bcpy))
+
+ def test_equal(self):
+ assert(TestUnitLineEquivalence.a == TestUnitLineEquivalence.a)
+ assert(TestUnitLineEquivalence.b == TestUnitLineEquivalence.b)
+ assert(TestUnitLineEquivalence.c == TestUnitLineEquivalence.c)
+ assert(TestUnitLineEquivalence.l == TestUnitLineEquivalence.l)
+ assert(TestUnitLineEquivalence.e == TestUnitLineEquivalence.e)
+ assert(not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.b))
+ assert(not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a_ln))
+ assert(not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a_seq))
+ assert(not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a_gfa2))
+ assert(not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a_noxx))
+ assert(TestUnitLineEquivalence.b == TestUnitLineEquivalence.b.clone())
+ assert(TestUnitLineEquivalence.a == TestUnitLineEquivalence.a.clone())
+
+ def test_pointer_equality(self):
+ assert(TestUnitLineEquivalence.a is TestUnitLineEquivalence.a)
+ assert(not TestUnitLineEquivalence.a is TestUnitLineEquivalence.a.clone())
+
+ def test_has_eql_fields(self):
+ # same object
+ assert(TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a))
+ # clone
+ assert(TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a.clone()))
+ # positional field difference
+ assert(not TestUnitLineEquivalence.l._has_eql_fields(TestUnitLineEquivalence.l_from))
+ assert(TestUnitLineEquivalence.l._has_eql_fields(TestUnitLineEquivalence.l_from, ["from"]))
+ # positional field difference: name alias
+ assert(not TestUnitLineEquivalence.e._has_eql_fields(TestUnitLineEquivalence.e_name))
+ assert(TestUnitLineEquivalence.e._has_eql_fields(TestUnitLineEquivalence.e_name, ["eid"]))
+ assert(TestUnitLineEquivalence.e._has_eql_fields(TestUnitLineEquivalence.e_name, ["name"]))
+ # positional field difference: placeholder in line
+ assert(TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a_seq))
+ # positional field difference: placeholder in reference
+ assert(TestUnitLineEquivalence.a_seq._has_eql_fields(TestUnitLineEquivalence.a))
+ # tag difference
+ assert(not TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a_ln))
+ assert(TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a_ln, ["LN"]))
+ # additional tag in line
+ assert(TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a_noxx))
+ assert(not TestUnitLineEquivalence.a_noxx._has_eql_fields(TestUnitLineEquivalence.a))
+ # missing tag in line
+ assert(not TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a_yy))
+ assert(TestUnitLineEquivalence.a_yy._has_eql_fields(TestUnitLineEquivalence.a))
+ assert(TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a_yy, ["yy"]))
+ # gfa1 vs gfa2
+ assert(TestUnitLineEquivalence.a._has_eql_fields(TestUnitLineEquivalence.a_gfa2, ["slen"]))
+ assert(TestUnitLineEquivalence.a_gfa2._has_eql_fields(TestUnitLineEquivalence.a, ["LN"]))
+ # record_type
+ assert(not TestUnitLineEquivalence.c._has_eql_fields(TestUnitLineEquivalence.l))
+ assert(not TestUnitLineEquivalence.l._has_eql_fields(TestUnitLineEquivalence.c))
+ assert(TestUnitLineEquivalence.c._has_eql_fields(TestUnitLineEquivalence.l, ["record_type"]))
+ assert(TestUnitLineEquivalence.l._has_eql_fields(TestUnitLineEquivalence.c, ["record_type", "pos"]))
+
+ def test_has_field_values(self):
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a))
+ # record_type difference
+ assert(not TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_rt))
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_rt, ["record_type"]))
+ # positional field difference
+ assert(not TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_name))
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_name, ["name"]))
+ # positional field difference: placeholder in line
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_seq))
+ # positional field difference: placeholder in hash is compared
+ assert(not TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_pl))
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_pl, ["name"]))
+ # tag difference
+ assert(not TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_ln))
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_ln, ["LN"]))
+ # encoded value
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_LNstr))
+ # additional tag in line
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_noxx))
+ # missing tag in line
+ assert(not TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_yy))
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_yy, ["yy"]))
+ # gfa1 vs gfa2
+ assert(TestUnitLineEquivalence.a._has_field_values(TestUnitLineEquivalence.h_a_gfa2, ["slen"]))
+
diff --git a/tests/test_unit_lines_finders.py b/tests/test_unit_lines_finders.py
new file mode 100644
index 0000000..9a18664
--- /dev/null
+++ b/tests/test_unit_lines_finders.py
@@ -0,0 +1,74 @@
+import unittest
+import gfapy
+
+class TestUnitLineFinders(unittest.TestCase):
+
+
+ l_gfa1_str = ["S\t1\t*",
+ "S\t2\t*",
+ "S\t3\t*",
+ "S\t4\tCGAT",
+ "L\t1\t+\t2\t+\t*",
+ "L\t1\t-\t3\t+\t10M",
+ "C\t1\t-\t4\t-\t1\t*",
+ "P\tp1\t1+,2+\t*"]
+ l_gfa1 = [gfapy.Line(s) for s in l_gfa1_str]
+ gfa1 = gfapy.Gfa(l_gfa1)
+
+ l_gfa2_str = ["S\t5\t100\t*",
+ "S\t6\t110\t*",
+ "E\te1\t5+\t6-\t0\t100$\t10\t110$\t*",
+ "G\tg1\t5-\t6-\t1000\t*",
+ "O\to1\t5+ 6-",
+ "U\tu1\t5 e1",
+ "F\t5\tread1-\t0\t10\t102\t122\t*",
+ "F\t5\tread1-\t30\t100$\t180\t255\t*",
+ "F\t6\tread1-\t40\t50\t52\t64\t*",
+ "X\tx1\txx:Z:A",
+ "X\tx2",
+ "G\t*\t5+\t6+\t2000\t*"]
+ l_gfa2 = [gfapy.Line(s) for s in l_gfa2_str]
+ gfa2 = gfapy.Gfa(l_gfa2)
+
+ def test_search_link(self):
+ # search using the direct link
+ self.assertEqual(TestUnitLineFinders.l_gfa1[4], TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1","+"), gfapy.OrientedLine("2","+"), "*"))
+ # search using the complement link
+ self.assertEqual(TestUnitLineFinders.l_gfa1[4], TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("2","-"), gfapy.OrientedLine("1","-"), "*"))
+ # with cigar parameter, but placeholder in line
+ self.assertEqual(TestUnitLineFinders.l_gfa1[4],
+ TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1","+"), gfapy.OrientedLine("2","+"), "10M"))
+ # with cigar parameter, and cigar in line
+ self.assertEqual(TestUnitLineFinders.l_gfa1[5],
+ TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1","-"), gfapy.OrientedLine("3","+"), "10M"))
+ self.assertEqual(None,
+ TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1","-"), gfapy.OrientedLine("3","+"), "12M"))
+ # with placeholder parameter, and cigar in line
+ self.assertEqual(TestUnitLineFinders.l_gfa1[5],
+ TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1","-"), gfapy.OrientedLine("3","+"), "*"))
+
+ def test_search_duplicate_gfa1(self):
+ # link
+ self.assertEqual(TestUnitLineFinders.l_gfa1[4], TestUnitLineFinders.gfa1._search_duplicate(TestUnitLineFinders.l_gfa1[4]))
+ # complement link
+ self.assertEqual(TestUnitLineFinders.l_gfa1[4], TestUnitLineFinders.gfa1._search_duplicate(TestUnitLineFinders.l_gfa1[4].complement()))
+ # containment
+ self.assertEqual(None, TestUnitLineFinders.gfa1._search_duplicate(TestUnitLineFinders.l_gfa1[6]))
+ # segment
+ self.assertEqual(TestUnitLineFinders.l_gfa1[0], TestUnitLineFinders.gfa1._search_duplicate(TestUnitLineFinders.l_gfa1[0]))
+ # path
+ self.assertEqual(TestUnitLineFinders.l_gfa1[7], TestUnitLineFinders.gfa1._search_duplicate(TestUnitLineFinders.l_gfa1[7]))
+
+ def test_search_duplicate_gfa2(self):
+ # line with mandatory name
+ self.assertEqual(TestUnitLineFinders.l_gfa2[0], TestUnitLineFinders.gfa2._search_duplicate(TestUnitLineFinders.l_gfa2[0]))
+ # line with optional name, present
+ self.assertEqual(TestUnitLineFinders.l_gfa2[2], TestUnitLineFinders.gfa2._search_duplicate(TestUnitLineFinders.l_gfa2[2]))
+ self.assertEqual(TestUnitLineFinders.l_gfa2[3], TestUnitLineFinders.gfa2._search_duplicate(TestUnitLineFinders.l_gfa2[3]))
+ self.assertEqual(TestUnitLineFinders.l_gfa2[4], TestUnitLineFinders.gfa2._search_duplicate(TestUnitLineFinders.l_gfa2[4]))
+ self.assertEqual(TestUnitLineFinders.l_gfa2[5], TestUnitLineFinders.gfa2._search_duplicate(TestUnitLineFinders.l_gfa2[5]))
+ # line with optional name, not present
+ self.assertEqual(None, TestUnitLineFinders.gfa2._search_duplicate(TestUnitLineFinders.l_gfa2[11]))
+ # line with no name
+ self.assertEqual(None, TestUnitLineFinders.gfa2._search_duplicate(TestUnitLineFinders.l_gfa2[6]))
+ self.assertEqual(None, TestUnitLineFinders.gfa2._search_duplicate(TestUnitLineFinders.l_gfa2[9]))
diff --git a/tests/test_unit_multiplication.py b/tests/test_unit_multiplication.py
new file mode 100644
index 0000000..fee1ca0
--- /dev/null
+++ b/tests/test_unit_multiplication.py
@@ -0,0 +1,46 @@
+import gfapy
+import unittest
+
+class TestUnitMultiplication(unittest.TestCase):
+
+ def test_auto_select_distribute_end_lB_eq_lE(self):
+ g = gfapy.Gfa()
+ # lB == lE == 1
+ self.assertEqual(None, g._auto_select_distribute_end( 4, 1, 1, False))
+ # lB == lE == factor
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 4, 4, False))
+ # lB == lE; </> factor
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 2, 2, False))
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 6, 6, False))
+
+ def test_auto_select_distribute_end_l_1(self):
+ g = gfapy.Gfa()
+ # lB or lE == 1, other </==/> factor
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 2, 1, False))
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 4, 1, False))
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 6, 1, False))
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 1, 2, False))
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 1, 4, False))
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 1, 6, False))
+
+ def test_auto_select_distribute_end_eq_factor(self):
+ g = gfapy.Gfa()
+ # one =, one > factor
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 4, 5, False))
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 5, 4, False))
+ # one =, one < factor
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 4, 3, False))
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 3, 4, False))
+
+ def test_auto_select_distribute_end_diff_factor(self):
+ g = gfapy.Gfa()
+ # both > 1; both < factor
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 3, 2, False))
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 2, 3, False))
+ # both > 1; both > factor
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 5, 6, False))
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 6, 5, False))
+ # both > 1; one <, one > factor
+ self.assertEqual("L", g._auto_select_distribute_end( 4, 3, 5, False))
+ self.assertEqual("R", g._auto_select_distribute_end( 4, 5, 3, False))
+
diff --git a/tests/test_unit_numeric_array.py b/tests/test_unit_numeric_array.py
new file mode 100644
index 0000000..8de7f24
--- /dev/null
+++ b/tests/test_unit_numeric_array.py
@@ -0,0 +1,21 @@
+import unittest
+import gfapy
+
+class TestUnitNumericArray(unittest.TestCase):
+
+ def test_integer_type(self):
+ v = {b: 2**(b/2) for b in [8,16,32,64,128]}
+ self.assertEqual("C", gfapy.NumericArray.integer_type((0,v[8])))
+ self.assertEqual("c", gfapy.NumericArray.integer_type((-1,v[8])))
+ self.assertEqual("S", gfapy.NumericArray.integer_type((0,v[16])))
+ self.assertEqual("s", gfapy.NumericArray.integer_type((-1,v[16])))
+ self.assertEqual("I", gfapy.NumericArray.integer_type((0,v[32])))
+ self.assertEqual("i", gfapy.NumericArray.integer_type((-1,v[32])))
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray.integer_type, (0,v[64]))
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray.integer_type, (-1,v[64]))
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray.integer_type, (0,v[128]))
+ self.assertRaises(gfapy.ValueError,
+ gfapy.NumericArray.integer_type, (-1,v[128]))
diff --git a/tests/test_unit_oriented_line.py b/tests/test_unit_oriented_line.py
new file mode 100644
index 0000000..9b8b331
--- /dev/null
+++ b/tests/test_unit_oriented_line.py
@@ -0,0 +1,100 @@
+import unittest
+import gfapy
+
+class TestUnitOrientedLine(unittest.TestCase):
+
+ def test_init(self):
+ a = gfapy.OrientedLine("a","+")
+ # no validation on creation: (invalid orientation)
+ gfapy.OrientedLine("a","*")
+ # no validation on creation: (invalid line name)
+ gfapy.OrientedLine("a\ta","+")
+ b = gfapy.OrientedLine("a+")
+ self.assertEqual(a, b)
+ c = gfapy.OrientedLine(["a","+"])
+ self.assertEqual(a, c)
+ self.assertRaises(IndexError, gfapy.OrientedLine, [])
+ self.assertRaises(IndexError, gfapy.OrientedLine, ["a"])
+ # nothing raised, if too many args are provided (further are ignored)
+ gfapy.OrientedLine(["a", "+", 1])
+
+ def test_properties(self):
+ a = gfapy.OrientedLine("a", "+")
+ self.assertEqual("a", a.line)
+ self.assertEqual("+", a.orient)
+ self.assertEqual("a", a.name)
+ s = gfapy.Line("S\tb\t*\txx:Z:1.0")
+ a.line = s
+ self.assertEqual(s, a.line)
+ self.assertEqual("b", a.name)
+ self.assertEqual("+", a.orient)
+ a.orient = "-"
+ self.assertEqual(s, a.line)
+ self.assertEqual("-", a.orient)
+
+ def test_validate(self):
+ gfapy.OrientedLine("a","+").validate()
+ gfapy.OrientedLine(gfapy.Line("S\tb\t*\txx:Z:1.0"),
+ "-").validate()
+ self.assertRaises(gfapy.ValueError,
+ gfapy.OrientedLine("a","*").validate)
+ self.assertRaises(gfapy.TypeError,
+ gfapy.OrientedLine([],"+").validate)
+ self.assertRaises(gfapy.FormatError,
+ gfapy.OrientedLine("a\ta","+").validate)
+
+
+ def test_inverted(self):
+ os = gfapy.OrientedLine("a", "+")
+ inv_os = os.inverted()
+ self.assertEqual("a", inv_os.line)
+ self.assertEqual("+", os.orient)
+ self.assertEqual("-", inv_os.orient)
+ s = gfapy.Line("S\tb\t*\txx:Z:1.0")
+ os = gfapy.OrientedLine(s, "-")
+ inv_os = os.inverted()
+ self.assertEqual(s, inv_os.line)
+ self.assertEqual("-", os.orient)
+ self.assertEqual("+", inv_os.orient)
+ os = gfapy.OrientedLine("a", "*")
+ self.assertRaises(gfapy.ValueError, os.invert)
+
+ def test_str(self):
+ self.assertEqual("a-", str(gfapy.OrientedLine("a","-")))
+ s = gfapy.Line("S\tb\t*\txx:Z:1.0")
+ self.assertEqual("b+", str(gfapy.OrientedLine(s,"+")))
+
+ def test_equal(self):
+ a = gfapy.OrientedLine("a", "+")
+ b = gfapy.OrientedLine(gfapy.Line("S\ta\t*"), "+")
+ c = gfapy.OrientedLine("a", "-")
+ self.assertEqual(a, b)
+ self.assertNotEqual(a, c)
+ # line itself is not checked for equiv, only name:
+ b2 = gfapy.OrientedLine(gfapy.Line("S\ta\tCACAC"), "+")
+ self.assertEqual(b, b2)
+ # equivalence to string:
+ self.assertEqual("a+", a)
+ self.assertEqual("a+", b)
+ self.assertEqual(a, "a+")
+ self.assertEqual(b, "a+")
+ # equivalence to list:
+ self.assertEqual(a, ["a", "+"])
+ self.assertEqual(b, ["a", "+"])
+ self.assertEqual(["a", "+"], a)
+ self.assertEqual(["a", "+"], b)
+
+ def test_block(self):
+ a = gfapy.OrientedLine("a", "+")
+ a._block()
+ with self.assertRaises(gfapy.RuntimeError):
+ a.line = "b"
+ a._unblock()
+ a.line = "b"
+
+ def test_delegate_methods(self):
+ ol = gfapy.OrientedLine(gfapy.Line("S\ta\tCACAC"), "+")
+ self.assertEqual("CACAC", ol.sequence)
+ self.assertEqual("CACAC", ol.field_to_s("sequence"))
+ ol.set("xx", 1)
+ self.assertEqual("xx:i:1", ol.field_to_s("xx", True))
diff --git a/tests/test_unit_segment_end.py b/tests/test_unit_segment_end.py
new file mode 100644
index 0000000..e6217ee
--- /dev/null
+++ b/tests/test_unit_segment_end.py
@@ -0,0 +1,96 @@
+import unittest
+import gfapy
+
+class TestUnitSegmentEnd(unittest.TestCase):
+
+ sym = "a"
+ ref = gfapy.Line("S\ta\t*\txx:Z:1.0")
+ invalid_sym = "a\ta"
+ invalid_ref = []
+ se_s = gfapy.SegmentEnd(sym, "L")
+ se_r = gfapy.SegmentEnd(ref, "R")
+ se_s_str = "aL"
+ se_r_str = "aR"
+ se_s_sym = "aL"
+ se_r_sym = "aR"
+
+ def test_new(self):
+ gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
+ # no validation on creation
+ gfapy.SegmentEnd(TestUnitSegmentEnd.invalid_sym, "X")
+
+ def test_from_list(self):
+ self.assertEqual(TestUnitSegmentEnd.se_s,
+ gfapy.SegmentEnd(["a", "L"]))
+ self.assertEqual(gfapy.SegmentEnd,
+ gfapy.SegmentEnd(["a", "L"]).__class__)
+ self.assertRaises(gfapy.ArgumentError, gfapy.SegmentEnd,
+ ["a", "L", "L"])
+ gfapy.SegmentEnd(["a", "X"]) # no validation
+
+ def test_segment(self):
+ self.assertEqual(TestUnitSegmentEnd.sym, TestUnitSegmentEnd.se_s.segment)
+ self.assertEqual(TestUnitSegmentEnd.ref, TestUnitSegmentEnd.se_r.segment)
+ se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "R")
+ se2.segment = TestUnitSegmentEnd.ref
+ self.assertEqual(TestUnitSegmentEnd.ref, se2.segment)
+
+ def test_end_type(self):
+ self.assertEqual("L", TestUnitSegmentEnd.se_s.end_type)
+ self.assertEqual("R", TestUnitSegmentEnd.se_r.end_type)
+ se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
+ se2.end_type = "R"
+ self.assertEqual("R", se2.end_type)
+
+ def test_name(self):
+ self.assertEqual(TestUnitSegmentEnd.sym, TestUnitSegmentEnd.se_s.name)
+ self.assertEqual(TestUnitSegmentEnd.sym, TestUnitSegmentEnd.se_r.name)
+
+ def test_validate(self):
+ TestUnitSegmentEnd.se_s.validate()
+ TestUnitSegmentEnd.se_r.validate()
+ se1 = gfapy.SegmentEnd("a", "X")
+ self.assertRaises(gfapy.ValueError, se1.validate)
+
+ def test_inverted(self):
+ inv_s = TestUnitSegmentEnd.se_s.inverted()
+ self.assertEqual(TestUnitSegmentEnd.se_s.segment, inv_s.segment)
+ self.assertEqual("R", inv_s.end_type)
+ inv_r = TestUnitSegmentEnd.se_r.inverted()
+ self.assertEqual(TestUnitSegmentEnd.se_r.segment, inv_r.segment)
+ self.assertEqual("L", inv_r.end_type)
+
+ def test_to_s(self):
+ self.assertEqual(TestUnitSegmentEnd.se_s_str, str(TestUnitSegmentEnd.se_s))
+ self.assertEqual(TestUnitSegmentEnd.se_r_str, str(TestUnitSegmentEnd.se_r))
+
+ def test_equal(self):
+ se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
+ se3 = gfapy.SegmentEnd(TestUnitSegmentEnd.ref, "R")
+ self.assertEqual(TestUnitSegmentEnd.se_s, se2)
+ self.assertEqual(TestUnitSegmentEnd.se_r, se3)
+ # only name and end_type equivalence is checked, not segment
+ assert(TestUnitSegmentEnd.se_r != TestUnitSegmentEnd.se_s)
+ assert(TestUnitSegmentEnd.se_r.inverted() == TestUnitSegmentEnd.se_s)
+ # equivalence to array
+ assert(TestUnitSegmentEnd.se_s == ["a","L"])
+ assert(TestUnitSegmentEnd.se_r == ["a","R"])
+
+ #def test_comparison(self):
+ # self.assertEqual(-1, ["a","L"].to_segment_end() <=> ["b","L"].to_segment_end())
+ # self.assertEqual(0, ["a","L"].to_segment_end() <=> ["a","L"].to_segment_end())
+ # self.assertEqual(1, ["b","L"].to_segment_end() <=> ["a","L"].to_segment_end())
+ # self.assertEqual(-1, ["a","L"].to_segment_end() <=> ["a","R"].to_segment_end())
+ # self.assertEqual(0, ["a","R"].to_segment_end() <=> ["a","R"].to_segment_end())
+ # self.assertEqual(1, ["a","R"].to_segment_end() <=> ["a","L"].to_segment_end())
+
+ def test_segment_ends_path(self):
+ sep = gfapy.SegmentEndsPath([gfapy.SegmentEnd("a","L"),
+ gfapy.SegmentEnd("b","R")])
+ self.assertEqual([gfapy.SegmentEnd("b","L"),gfapy.SegmentEnd("a","R")],
+ list(reversed(sep)))
+ self.assertNotEqual([gfapy.SegmentEnd("b","L"),gfapy.SegmentEnd("a","R")],
+ sep)
+ sep.reverse()
+ self.assertEqual([gfapy.SegmentEnd("b","L"),gfapy.SegmentEnd("a","R")],
+ sep)
diff --git a/tests/test_unit_symbol_invert.py b/tests/test_unit_symbol_invert.py
new file mode 100644
index 0000000..1e147db
--- /dev/null
+++ b/tests/test_unit_symbol_invert.py
@@ -0,0 +1,16 @@
+import unittest
+import gfapy
+
+class TestUnitSymbolInvert(unittest.TestCase):
+
+ def test_invert_orientations(self):
+ self.assertEqual("+", gfapy.invert("-"))
+ self.assertEqual("-", gfapy.invert("+"))
+
+ def test_invert_segment_ends(self):
+ self.assertEqual("L", gfapy.invert("R"))
+ self.assertEqual("R", gfapy.invert("L"))
+
+ def test_invert_invalid(self):
+ self.assertRaises(gfapy.ValueError, gfapy.invert, "xx")
+
diff --git a/tests/test_unit_unknown.py b/tests/test_unit_unknown.py
new file mode 100644
index 0000000..c903770
--- /dev/null
+++ b/tests/test_unit_unknown.py
@@ -0,0 +1,25 @@
+import unittest
+import gfapy
+
+class TestUnitUnknown(unittest.TestCase):
+
+ u = gfapy.line.Unknown([None, "a"])
+
+ def test_new(self):
+ assert(isinstance(TestUnitUnknown.u, gfapy.line.Unknown))
+
+ def test_str(self):
+ self.assertEqual("?record_type?\ta\tco:Z:line_created_by_gfapy",
+ str(TestUnitUnknown.u))
+
+ def test_tags(self):
+ with self.assertRaises(AttributeError):
+ TestUnitUnknown.u.xx
+ self.assertEqual(None, TestUnitUnknown.u.get("xx"))
+ with self.assertRaises(gfapy.RuntimeError):
+ TestUnitUnknown.u.xx = 1
+ self.assertRaises(gfapy.RuntimeError,
+ TestUnitUnknown.u.set,"xx",1)
+
+ def test_virtual(self):
+ assert(TestUnitUnknown.u.virtual)
diff --git a/tests/testdata/all_line_types.gfa1.gfa b/tests/testdata/all_line_types.gfa1.gfa
new file mode 100644
index 0000000..4e30854
--- /dev/null
+++ b/tests/testdata/all_line_types.gfa1.gfa
@@ -0,0 +1,22 @@
+# File used for the collections test
+S 1 *
+S 3 CGATGCTAGCTGACTGTCGATGCTGTGTG
+L 1 + 2 + 12M ID:Z:1_to_2
+S 5 *
+S 13 *
+C 2 + 6 + 10 122M ID:Z:2_to_6
+P 14 11+,12+ 122M
+S 11 *
+H ac:Z:test2
+S 12 *
+S 4 *
+H VN:Z:1.0
+L 1 + 3 + 12M ID:Z:1_to_3
+L 11 + 12 + 122M ID:Z:11_to_12
+S 6 *
+L 11 + 13 + 120M ID:Z:11_to_13
+P 15 11+,13+ 120M
+S 2 * xx:Z:sometag
+H aa:i:12 ab:Z:test1
+H aa:i:15
+C 1 + 5 + 12 120M ID:Z:1_to_5
diff --git a/tests/testdata/all_line_types.gfa2.gfa b/tests/testdata/all_line_types.gfa2.gfa
new file mode 100644
index 0000000..8fdd667
--- /dev/null
+++ b/tests/testdata/all_line_types.gfa2.gfa
@@ -0,0 +1,33 @@
+# File used for the collections test
+# similar but NOT equivalent to the gfa1 file!
+S 1 122 *
+S 3 29 TGCTAGCTGACTGTCGATGCTGTGTG
+E 1_to_2 1+ 2+ 110 122$ 0 12 12M
+S 5 130 *
+S 13 150 *
+E 2_to_6 2+ 6+ 0 122$ 10 132 122M
+O 14 11+ 12+
+S 11 140 * xx:i:11
+F 2 read1+ 0 42 12 55 * id:Z:read1_in_2
+F 2 read2+ 45 62 0 18 * id:Z:read2_in_2
+U 16 1 3 15 2_to_6 16sub
+H ac:Z:test2
+# another comment
+S 12 150 *
+S 4 120 *
+H VN:Z:2.0
+E 1_to_3 1+ 3+ 112 122$ 0 12 10M
+G 1_to_11 1+ 11- 120 *
+E 11_to_12 11+ 12+ 18 140$ 0 122 122M
+S 6 150 *
+X custom_record xx:Z:testtag
+X custom_record X2
+E 11_to_13 11+ 13+ 20 140$ 0 120 120M
+G 2_to_12 2- 12+ 500 50
+O 15 11+ 11_to_13+ 13+ xx:i:-1
+Y another_custom_record
+U 16sub 2 3
+S 2 120 * xx:Z:sometag
+H aa:i:12 ab:Z:test1
+H aa:i:15
+E 1_to_5 1+ 5+ 0 122$ 2 124 * zz:Z:tag
diff --git a/tests/testdata/copynum.1.gfa b/tests/testdata/copynum.1.gfa
new file mode 100644
index 0000000..580260b
--- /dev/null
+++ b/tests/testdata/copynum.1.gfa
@@ -0,0 +1,3 @@
+S 0 * RC:i:600 LN:i:100
+S 1 * RC:i:6000 LN:i:100
+S 2 * RC:i:60000 LN:i:100
diff --git a/tests/testdata/copynum.1.gfa2 b/tests/testdata/copynum.1.gfa2
new file mode 100644
index 0000000..689c9d4
--- /dev/null
+++ b/tests/testdata/copynum.1.gfa2
@@ -0,0 +1,3 @@
+S 0 100 * RC:i:600
+S 1 100 * RC:i:6000
+S 2 100 * RC:i:60000
diff --git a/tests/testdata/copynum.2.gfa b/tests/testdata/copynum.2.gfa
new file mode 100644
index 0000000..4bf90b2
--- /dev/null
+++ b/tests/testdata/copynum.2.gfa
@@ -0,0 +1,4 @@
+S 0 * RC:i:10 LN:i:100
+S 1 * RC:i:1000 LN:i:100
+S 2 * RC:i:2000 LN:i:100
+S 3 * RC:i:3000 LN:i:100
diff --git a/tests/testdata/copynum.2.gfa2 b/tests/testdata/copynum.2.gfa2
new file mode 100644
index 0000000..7202e4a
--- /dev/null
+++ b/tests/testdata/copynum.2.gfa2
@@ -0,0 +1,4 @@
+S 0 100 * RC:i:10
+S 1 100 * RC:i:1000
+S 2 100 * RC:i:2000
+S 3 100 * RC:i:3000
diff --git a/tests/testdata/dead_ends.gfa b/tests/testdata/dead_ends.gfa
new file mode 100644
index 0000000..5b90c7e
--- /dev/null
+++ b/tests/testdata/dead_ends.gfa
@@ -0,0 +1,12 @@
+H VN:Z:1.0
+S 1 * LN:i:1000000
+S 2 * LN:i:1000000
+S 3 * LN:i:100000
+S 3b * LN:i:100000
+S 4 * LN:i:10000
+S 4b * LN:i:1000
+L 1 + 2 + 1000M
+L 2 + 3 + 1000M
+L 2 + 3b + 1000M
+L 3 + 4 + 100M
+L 3 + 4b + 100M
diff --git a/tests/testdata/dead_ends.gfa2 b/tests/testdata/dead_ends.gfa2
new file mode 100644
index 0000000..c0a639d
--- /dev/null
+++ b/tests/testdata/dead_ends.gfa2
@@ -0,0 +1,12 @@
+H VN:Z:2.0
+S 1 1000000 *
+S 2 1000000 *
+S 3 100000 *
+S 3b 100000 *
+S 4 10000 *
+S 4b 1000 *
+E * 1+ 2+ 999000 1000000$ 0 1000 1000M
+E * 2+ 3+ 999000 1000000$ 0 1000 1000M
+E * 2+ 3b+ 999000 1000000$ 0 1000 1000M
+E * 3+ 4+ 99900 100000$ 0 100 100M
+E * 3+ 4b+ 99900 100000$ 0 100 100M
diff --git a/tests/testdata/example1.gfa b/tests/testdata/example1.gfa
new file mode 100644
index 0000000..cc1f42f
--- /dev/null
+++ b/tests/testdata/example1.gfa
@@ -0,0 +1,45 @@
+H VN:Z:1.0
+S 1 * LN:i:6871 RC:i:2200067
+S 10 * LN:i:251 RC:i:82006
+S 11 * LN:i:208 RC:i:39533
+S 12 * LN:i:186 RC:i:34457
+S 16 * LN:i:157 RC:i:15334
+S 18 * LN:i:145 RC:i:55632
+S 19 * LN:i:134 RC:i:49274
+S 2 * LN:i:4589 RC:i:6428225
+S 20 * LN:i:134 RC:i:20521
+S 21 * LN:i:133 RC:i:28174
+S 22 * LN:i:132 RC:i:17846
+S 23 * LN:i:132 RC:i:24658
+S 24 * LN:i:107 RC:i:22256
+S 3 * LN:i:2044 RC:i:2727166
+S 4 * LN:i:1744 RC:i:1729157
+S 5 * LN:i:1378 RC:i:1071246
+S 6 * LN:i:1356 RC:i:422793
+S 7 * LN:i:920 RC:i:630822
+S 8 * LN:i:876 RC:i:794734
+S 9 * LN:i:255 RC:i:40589
+L 1 + 2 + 10M
+L 1 - 19 - 10M
+L 10 + 3 - 10M
+L 10 - 4 + 10M
+L 11 - 6 - 10M
+L 11 + 9 - 10M
+L 12 + 9 + 10M
+L 12 - 18 + 10M
+L 16 + 20 + 10M
+L 16 - 22 - 10M
+L 18 + 19 + 10M
+L 18 - 23 + 10M
+L 2 + 5 + 10M
+L 2 + 5 - 10M
+L 2 - 8 + 10M
+L 20 + 21 + 10M
+L 21 + 23 - 10M
+L 22 - 6 - 10M
+L 24 + 7 + 10M
+L 24 - 7 + 10M
+L 3 + 4 - 10M
+L 3 - 6 + 10M
+L 3 - 8 - 10M
+L 4 - 7 - 10M
diff --git a/tests/testdata/example1.gfa2 b/tests/testdata/example1.gfa2
new file mode 100644
index 0000000..f89e75e
--- /dev/null
+++ b/tests/testdata/example1.gfa2
@@ -0,0 +1,45 @@
+H VN:Z:2.0
+S 1 6871 * RC:i:2200067
+S 10 251 * RC:i:82006
+S 11 208 * RC:i:39533
+S 12 186 * RC:i:34457
+S 16 157 * RC:i:15334
+S 18 145 * RC:i:55632
+S 19 134 * RC:i:49274
+S 2 4589 * RC:i:6428225
+S 20 134 * RC:i:20521
+S 21 133 * RC:i:28174
+S 22 132 * RC:i:17846
+S 23 132 * RC:i:24658
+S 24 107 * RC:i:22256
+S 3 2044 * RC:i:2727166
+S 4 1744 * RC:i:1729157
+S 5 1378 * RC:i:1071246
+S 6 1356 * RC:i:422793
+S 7 920 * RC:i:630822
+S 8 876 * RC:i:794734
+S 9 255 * RC:i:40589
+E * 1+ 2+ 6861 6871$ 0 10 10M
+E * 1- 19- 0 10 124 134$ 10M
+E * 10+ 3- 241 251$ 2034 2044$ 10M
+E * 10- 4+ 0 10 0 10 10M
+E * 11- 6- 0 10 1346 1356$ 10M
+E * 11+ 9- 198 208$ 245 255$ 10M
+E * 12+ 9+ 176 186$ 0 10 10M
+E * 12- 18+ 0 10 0 10 10M
+E * 16+ 20+ 147 157$ 0 10 10M
+E * 16- 22- 0 10 122 132$ 10M
+E * 18+ 19+ 135 145$ 0 10 10M
+E * 18- 23+ 0 10 0 10 10M
+E * 2+ 5+ 4579 4589$ 0 10 10M
+E * 2+ 5- 4579 4589$ 1368 1378$ 10M
+E * 2- 8+ 0 10 0 10 10M
+E * 20+ 21+ 124 134$ 0 10 10M
+E * 21+ 23- 123 133$ 122 132$ 10M
+E * 22- 6- 0 10 1346 1356$ 10M
+E * 24+ 7+ 97 107$ 0 10 10M
+E * 24- 7+ 0 10 0 10 10M
+E * 3+ 4- 2034 2044$ 1734 1744$ 10M
+E * 3- 6+ 0 10 0 10 10M
+E * 3- 8- 0 10 866 876$ 10M
+E * 4- 7- 0 10 910 920$ 10M
diff --git a/tests/testdata/example_from_spec.gfa b/tests/testdata/example_from_spec.gfa
new file mode 100644
index 0000000..12b7646
--- /dev/null
+++ b/tests/testdata/example_from_spec.gfa
@@ -0,0 +1,9 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/blob/master/GFA-spec.md#example
+S 11 ACCTT
+S 12 TCAAGG
+S 13 CTTGATT
+L 11 + 12 - 4M ID:Z:11+_12-
+L 12 - 13 + 5M ID:Z:12-_13+
+L 11 + 13 + 3M ID:Z:11+_13+
+P 14 11+,12-,13+ 4M,5M
diff --git a/tests/testdata/example_from_spec.gfa2 b/tests/testdata/example_from_spec.gfa2
new file mode 100644
index 0000000..2adee00
--- /dev/null
+++ b/tests/testdata/example_from_spec.gfa2
@@ -0,0 +1,9 @@
+H VN:Z:2.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/blob/master/GFA-spec.md#example
+S 11 5 ACCTT
+S 12 6 TCAAGG
+S 13 7 CTTGATT
+E 11+_12- 11+ 12- 1 5$ 2 6$ 4M
+E 12-_13+ 12- 13+ 0 5 0 5 5M
+E 11+_13+ 11+ 13+ 2 5$ 0 3 3M
+O 14 11+ 11+_12-+ 12- 12-_13++ 13+
diff --git a/tests/testdata/example_from_spec.path14.seq b/tests/testdata/example_from_spec.path14.seq
new file mode 100644
index 0000000..65069d9
--- /dev/null
+++ b/tests/testdata/example_from_spec.path14.seq
@@ -0,0 +1 @@
+ACCTTGATT
diff --git a/tests/testdata/example_from_spec2.gfa b/tests/testdata/example_from_spec2.gfa
new file mode 100644
index 0000000..7c28d13
--- /dev/null
+++ b/tests/testdata/example_from_spec2.gfa
@@ -0,0 +1,13 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/blob/master/GFA-spec.md#first-update-on-gfa
+S 1 CGATGCAA
+L 1 + 2 + 5M
+S 2 TGCAAAGTAC
+L 3 + 2 + 0M
+S 3 TGCAACGTATAGACTTGTCAC RC:i:4
+L 3 + 4 - 1M1D2M
+S 4 GCATATA
+L 4 - 5 + 0M
+S 5 CGATGATA
+S 6 ATGA
+C 5 + 6 + 2 4M
diff --git a/tests/testdata/example_from_spec2.gfa2 b/tests/testdata/example_from_spec2.gfa2
new file mode 100644
index 0000000..ecba05d
--- /dev/null
+++ b/tests/testdata/example_from_spec2.gfa2
@@ -0,0 +1,13 @@
+H VN:Z:2.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/blob/master/GFA-spec.md#first-update-on-gfa
+S 1 8 CGATGCAA
+S 2 10 TGCAAAGTAC
+S 3 21 TGCAACGTATAGACTTGTCAC RC:i:4
+S 4 7 GCATATA
+S 5 8 CGATGATA
+S 6 4 ATGA
+E * 1+ 2+ 3 8$ 0 5 5M
+E * 3+ 2+ 21$ 21$ 0 0 0M
+E * 3+ 4- 17 21$ 3 7$ 1M1D2M
+E * 4- 5+ 0 0 0 0 0M
+E * 5+ 6+ 2 6 0 4$ 4M
diff --git a/tests/testdata/gfa2_edges_classification.gfa b/tests/testdata/gfa2_edges_classification.gfa
new file mode 100644
index 0000000..0e24860
--- /dev/null
+++ b/tests/testdata/gfa2_edges_classification.gfa
@@ -0,0 +1,1619 @@
+#
+# a is the segment according to which the edges are classified
+# (all edges involve a)
+#
+S a 100 *
+#
+# The other segments are named according to the name of the edge which leads
+# which connects them to a, preceded by an "s"
+#
+# Naming scheme for the edges
+# letter 1: < if the segment in sid1 is a, > if the segment in sid2 is a
+# letters 2/3: the orientations in sid1 and sid2
+# letters 4/5/6/7: symbols for the coordinates, with the following convention:
+# 0 => 0; 1 => 30; 2 => 70; $ => 100$
+#
+# The edges contain an "at" string tag, which describe the edge from the
+# perpective of the segment a. The tag value is one of: "dovetail_L",
+# "dovetail_R", "internal", "to_container", "to_contained".
+#
+S s<++0000 100 *
+S s>++0000 100 *
+S s<+-0000 100 *
+S s>+-0000 100 *
+S s<-+0000 100 *
+S s>-+0000 100 *
+S s<--0000 100 *
+S s>--0000 100 *
+S s<++0001 100 *
+S s>++0001 100 *
+S s<+-0001 100 *
+S s>+-0001 100 *
+S s<-+0001 100 *
+S s>-+0001 100 *
+S s<--0001 100 *
+S s>--0001 100 *
+S s<++0002 100 *
+S s>++0002 100 *
+S s<+-0002 100 *
+S s>+-0002 100 *
+S s<-+0002 100 *
+S s>-+0002 100 *
+S s<--0002 100 *
+S s>--0002 100 *
+S s<++000$ 100 *
+S s>++000$ 100 *
+S s<+-000$ 100 *
+S s>+-000$ 100 *
+S s<-+000$ 100 *
+S s>-+000$ 100 *
+S s<--000$ 100 *
+S s>--000$ 100 *
+S s<++0011 100 *
+S s>++0011 100 *
+S s<+-0011 100 *
+S s>+-0011 100 *
+S s<-+0011 100 *
+S s>-+0011 100 *
+S s<--0011 100 *
+S s>--0011 100 *
+S s<++0012 100 *
+S s>++0012 100 *
+S s<+-0012 100 *
+S s>+-0012 100 *
+S s<-+0012 100 *
+S s>-+0012 100 *
+S s<--0012 100 *
+S s>--0012 100 *
+S s<++001$ 100 *
+S s>++001$ 100 *
+S s<+-001$ 100 *
+S s>+-001$ 100 *
+S s<-+001$ 100 *
+S s>-+001$ 100 *
+S s<--001$ 100 *
+S s>--001$ 100 *
+S s<++0022 100 *
+S s>++0022 100 *
+S s<+-0022 100 *
+S s>+-0022 100 *
+S s<-+0022 100 *
+S s>-+0022 100 *
+S s<--0022 100 *
+S s>--0022 100 *
+S s<++002$ 100 *
+S s>++002$ 100 *
+S s<+-002$ 100 *
+S s>+-002$ 100 *
+S s<-+002$ 100 *
+S s>-+002$ 100 *
+S s<--002$ 100 *
+S s>--002$ 100 *
+S s<++00$$ 100 *
+S s>++00$$ 100 *
+S s<+-00$$ 100 *
+S s>+-00$$ 100 *
+S s<-+00$$ 100 *
+S s>-+00$$ 100 *
+S s<--00$$ 100 *
+S s>--00$$ 100 *
+S s<++0100 100 *
+S s>++0100 100 *
+S s<+-0100 100 *
+S s>+-0100 100 *
+S s<-+0100 100 *
+S s>-+0100 100 *
+S s<--0100 100 *
+S s>--0100 100 *
+S s<++0101 100 *
+S s>++0101 100 *
+S s<+-0101 100 *
+S s>+-0101 100 *
+S s<-+0101 100 *
+S s>-+0101 100 *
+S s<--0101 100 *
+S s>--0101 100 *
+S s<++0102 100 *
+S s>++0102 100 *
+S s<+-0102 100 *
+S s>+-0102 100 *
+S s<-+0102 100 *
+S s>-+0102 100 *
+S s<--0102 100 *
+S s>--0102 100 *
+S s<++010$ 100 *
+S s>++010$ 100 *
+S s<+-010$ 100 *
+S s>+-010$ 100 *
+S s<-+010$ 100 *
+S s>-+010$ 100 *
+S s<--010$ 100 *
+S s>--010$ 100 *
+S s<++0111 100 *
+S s>++0111 100 *
+S s<+-0111 100 *
+S s>+-0111 100 *
+S s<-+0111 100 *
+S s>-+0111 100 *
+S s<--0111 100 *
+S s>--0111 100 *
+S s<++0112 100 *
+S s>++0112 100 *
+S s<+-0112 100 *
+S s>+-0112 100 *
+S s<-+0112 100 *
+S s>-+0112 100 *
+S s<--0112 100 *
+S s>--0112 100 *
+S s<++011$ 100 *
+S s>++011$ 100 *
+S s<+-011$ 100 *
+S s>+-011$ 100 *
+S s<-+011$ 100 *
+S s>-+011$ 100 *
+S s<--011$ 100 *
+S s>--011$ 100 *
+S s<++0122 100 *
+S s>++0122 100 *
+S s<+-0122 100 *
+S s>+-0122 100 *
+S s<-+0122 100 *
+S s>-+0122 100 *
+S s<--0122 100 *
+S s>--0122 100 *
+S s<++012$ 100 *
+S s>++012$ 100 *
+S s<+-012$ 100 *
+S s>+-012$ 100 *
+S s<-+012$ 100 *
+S s>-+012$ 100 *
+S s<--012$ 100 *
+S s>--012$ 100 *
+S s<++01$$ 100 *
+S s>++01$$ 100 *
+S s<+-01$$ 100 *
+S s>+-01$$ 100 *
+S s<-+01$$ 100 *
+S s>-+01$$ 100 *
+S s<--01$$ 100 *
+S s>--01$$ 100 *
+S s<++0200 100 *
+S s>++0200 100 *
+S s<+-0200 100 *
+S s>+-0200 100 *
+S s<-+0200 100 *
+S s>-+0200 100 *
+S s<--0200 100 *
+S s>--0200 100 *
+S s<++0201 100 *
+S s>++0201 100 *
+S s<+-0201 100 *
+S s>+-0201 100 *
+S s<-+0201 100 *
+S s>-+0201 100 *
+S s<--0201 100 *
+S s>--0201 100 *
+S s<++0202 100 *
+S s>++0202 100 *
+S s<+-0202 100 *
+S s>+-0202 100 *
+S s<-+0202 100 *
+S s>-+0202 100 *
+S s<--0202 100 *
+S s>--0202 100 *
+S s<++020$ 100 *
+S s>++020$ 100 *
+S s<+-020$ 100 *
+S s>+-020$ 100 *
+S s<-+020$ 100 *
+S s>-+020$ 100 *
+S s<--020$ 100 *
+S s>--020$ 100 *
+S s<++0211 100 *
+S s>++0211 100 *
+S s<+-0211 100 *
+S s>+-0211 100 *
+S s<-+0211 100 *
+S s>-+0211 100 *
+S s<--0211 100 *
+S s>--0211 100 *
+S s<++0212 100 *
+S s>++0212 100 *
+S s<+-0212 100 *
+S s>+-0212 100 *
+S s<-+0212 100 *
+S s>-+0212 100 *
+S s<--0212 100 *
+S s>--0212 100 *
+S s<++021$ 100 *
+S s>++021$ 100 *
+S s<+-021$ 100 *
+S s>+-021$ 100 *
+S s<-+021$ 100 *
+S s>-+021$ 100 *
+S s<--021$ 100 *
+S s>--021$ 100 *
+S s<++0222 100 *
+S s>++0222 100 *
+S s<+-0222 100 *
+S s>+-0222 100 *
+S s<-+0222 100 *
+S s>-+0222 100 *
+S s<--0222 100 *
+S s>--0222 100 *
+S s<++022$ 100 *
+S s>++022$ 100 *
+S s<+-022$ 100 *
+S s>+-022$ 100 *
+S s<-+022$ 100 *
+S s>-+022$ 100 *
+S s<--022$ 100 *
+S s>--022$ 100 *
+S s<++02$$ 100 *
+S s>++02$$ 100 *
+S s<+-02$$ 100 *
+S s>+-02$$ 100 *
+S s<-+02$$ 100 *
+S s>-+02$$ 100 *
+S s<--02$$ 100 *
+S s>--02$$ 100 *
+S s<++0$00 100 *
+S s>++0$00 100 *
+S s<+-0$00 100 *
+S s>+-0$00 100 *
+S s<-+0$00 100 *
+S s>-+0$00 100 *
+S s<--0$00 100 *
+S s>--0$00 100 *
+S s<++0$01 100 *
+S s>++0$01 100 *
+S s<+-0$01 100 *
+S s>+-0$01 100 *
+S s<-+0$01 100 *
+S s>-+0$01 100 *
+S s<--0$01 100 *
+S s>--0$01 100 *
+S s<++0$02 100 *
+S s>++0$02 100 *
+S s<+-0$02 100 *
+S s>+-0$02 100 *
+S s<-+0$02 100 *
+S s>-+0$02 100 *
+S s<--0$02 100 *
+S s>--0$02 100 *
+S s<++0$0$ 100 *
+S s>++0$0$ 100 *
+S s<+-0$0$ 100 *
+S s>+-0$0$ 100 *
+S s<-+0$0$ 100 *
+S s>-+0$0$ 100 *
+S s<--0$0$ 100 *
+S s>--0$0$ 100 *
+S s<++0$11 100 *
+S s>++0$11 100 *
+S s<+-0$11 100 *
+S s>+-0$11 100 *
+S s<-+0$11 100 *
+S s>-+0$11 100 *
+S s<--0$11 100 *
+S s>--0$11 100 *
+S s<++0$12 100 *
+S s>++0$12 100 *
+S s<+-0$12 100 *
+S s>+-0$12 100 *
+S s<-+0$12 100 *
+S s>-+0$12 100 *
+S s<--0$12 100 *
+S s>--0$12 100 *
+S s<++0$1$ 100 *
+S s>++0$1$ 100 *
+S s<+-0$1$ 100 *
+S s>+-0$1$ 100 *
+S s<-+0$1$ 100 *
+S s>-+0$1$ 100 *
+S s<--0$1$ 100 *
+S s>--0$1$ 100 *
+S s<++0$22 100 *
+S s>++0$22 100 *
+S s<+-0$22 100 *
+S s>+-0$22 100 *
+S s<-+0$22 100 *
+S s>-+0$22 100 *
+S s<--0$22 100 *
+S s>--0$22 100 *
+S s<++0$2$ 100 *
+S s>++0$2$ 100 *
+S s<+-0$2$ 100 *
+S s>+-0$2$ 100 *
+S s<-+0$2$ 100 *
+S s>-+0$2$ 100 *
+S s<--0$2$ 100 *
+S s>--0$2$ 100 *
+S s<++0$$$ 100 *
+S s>++0$$$ 100 *
+S s<+-0$$$ 100 *
+S s>+-0$$$ 100 *
+S s<-+0$$$ 100 *
+S s>-+0$$$ 100 *
+S s<--0$$$ 100 *
+S s>--0$$$ 100 *
+S s<++1100 100 *
+S s>++1100 100 *
+S s<+-1100 100 *
+S s>+-1100 100 *
+S s<-+1100 100 *
+S s>-+1100 100 *
+S s<--1100 100 *
+S s>--1100 100 *
+S s<++1101 100 *
+S s>++1101 100 *
+S s<+-1101 100 *
+S s>+-1101 100 *
+S s<-+1101 100 *
+S s>-+1101 100 *
+S s<--1101 100 *
+S s>--1101 100 *
+S s<++1102 100 *
+S s>++1102 100 *
+S s<+-1102 100 *
+S s>+-1102 100 *
+S s<-+1102 100 *
+S s>-+1102 100 *
+S s<--1102 100 *
+S s>--1102 100 *
+S s<++110$ 100 *
+S s>++110$ 100 *
+S s<+-110$ 100 *
+S s>+-110$ 100 *
+S s<-+110$ 100 *
+S s>-+110$ 100 *
+S s<--110$ 100 *
+S s>--110$ 100 *
+S s<++1111 100 *
+S s>++1111 100 *
+S s<+-1111 100 *
+S s>+-1111 100 *
+S s<-+1111 100 *
+S s>-+1111 100 *
+S s<--1111 100 *
+S s>--1111 100 *
+S s<++1112 100 *
+S s>++1112 100 *
+S s<+-1112 100 *
+S s>+-1112 100 *
+S s<-+1112 100 *
+S s>-+1112 100 *
+S s<--1112 100 *
+S s>--1112 100 *
+S s<++111$ 100 *
+S s>++111$ 100 *
+S s<+-111$ 100 *
+S s>+-111$ 100 *
+S s<-+111$ 100 *
+S s>-+111$ 100 *
+S s<--111$ 100 *
+S s>--111$ 100 *
+S s<++1122 100 *
+S s>++1122 100 *
+S s<+-1122 100 *
+S s>+-1122 100 *
+S s<-+1122 100 *
+S s>-+1122 100 *
+S s<--1122 100 *
+S s>--1122 100 *
+S s<++112$ 100 *
+S s>++112$ 100 *
+S s<+-112$ 100 *
+S s>+-112$ 100 *
+S s<-+112$ 100 *
+S s>-+112$ 100 *
+S s<--112$ 100 *
+S s>--112$ 100 *
+S s<++11$$ 100 *
+S s>++11$$ 100 *
+S s<+-11$$ 100 *
+S s>+-11$$ 100 *
+S s<-+11$$ 100 *
+S s>-+11$$ 100 *
+S s<--11$$ 100 *
+S s>--11$$ 100 *
+S s<++1200 100 *
+S s>++1200 100 *
+S s<+-1200 100 *
+S s>+-1200 100 *
+S s<-+1200 100 *
+S s>-+1200 100 *
+S s<--1200 100 *
+S s>--1200 100 *
+S s<++1201 100 *
+S s>++1201 100 *
+S s<+-1201 100 *
+S s>+-1201 100 *
+S s<-+1201 100 *
+S s>-+1201 100 *
+S s<--1201 100 *
+S s>--1201 100 *
+S s<++1202 100 *
+S s>++1202 100 *
+S s<+-1202 100 *
+S s>+-1202 100 *
+S s<-+1202 100 *
+S s>-+1202 100 *
+S s<--1202 100 *
+S s>--1202 100 *
+S s<++120$ 100 *
+S s>++120$ 100 *
+S s<+-120$ 100 *
+S s>+-120$ 100 *
+S s<-+120$ 100 *
+S s>-+120$ 100 *
+S s<--120$ 100 *
+S s>--120$ 100 *
+S s<++1211 100 *
+S s>++1211 100 *
+S s<+-1211 100 *
+S s>+-1211 100 *
+S s<-+1211 100 *
+S s>-+1211 100 *
+S s<--1211 100 *
+S s>--1211 100 *
+S s<++1212 100 *
+S s>++1212 100 *
+S s<+-1212 100 *
+S s>+-1212 100 *
+S s<-+1212 100 *
+S s>-+1212 100 *
+S s<--1212 100 *
+S s>--1212 100 *
+S s<++121$ 100 *
+S s>++121$ 100 *
+S s<+-121$ 100 *
+S s>+-121$ 100 *
+S s<-+121$ 100 *
+S s>-+121$ 100 *
+S s<--121$ 100 *
+S s>--121$ 100 *
+S s<++1222 100 *
+S s>++1222 100 *
+S s<+-1222 100 *
+S s>+-1222 100 *
+S s<-+1222 100 *
+S s>-+1222 100 *
+S s<--1222 100 *
+S s>--1222 100 *
+S s<++122$ 100 *
+S s>++122$ 100 *
+S s<+-122$ 100 *
+S s>+-122$ 100 *
+S s<-+122$ 100 *
+S s>-+122$ 100 *
+S s<--122$ 100 *
+S s>--122$ 100 *
+S s<++12$$ 100 *
+S s>++12$$ 100 *
+S s<+-12$$ 100 *
+S s>+-12$$ 100 *
+S s<-+12$$ 100 *
+S s>-+12$$ 100 *
+S s<--12$$ 100 *
+S s>--12$$ 100 *
+S s<++1$00 100 *
+S s>++1$00 100 *
+S s<+-1$00 100 *
+S s>+-1$00 100 *
+S s<-+1$00 100 *
+S s>-+1$00 100 *
+S s<--1$00 100 *
+S s>--1$00 100 *
+S s<++1$01 100 *
+S s>++1$01 100 *
+S s<+-1$01 100 *
+S s>+-1$01 100 *
+S s<-+1$01 100 *
+S s>-+1$01 100 *
+S s<--1$01 100 *
+S s>--1$01 100 *
+S s<++1$02 100 *
+S s>++1$02 100 *
+S s<+-1$02 100 *
+S s>+-1$02 100 *
+S s<-+1$02 100 *
+S s>-+1$02 100 *
+S s<--1$02 100 *
+S s>--1$02 100 *
+S s<++1$0$ 100 *
+S s>++1$0$ 100 *
+S s<+-1$0$ 100 *
+S s>+-1$0$ 100 *
+S s<-+1$0$ 100 *
+S s>-+1$0$ 100 *
+S s<--1$0$ 100 *
+S s>--1$0$ 100 *
+S s<++1$11 100 *
+S s>++1$11 100 *
+S s<+-1$11 100 *
+S s>+-1$11 100 *
+S s<-+1$11 100 *
+S s>-+1$11 100 *
+S s<--1$11 100 *
+S s>--1$11 100 *
+S s<++1$12 100 *
+S s>++1$12 100 *
+S s<+-1$12 100 *
+S s>+-1$12 100 *
+S s<-+1$12 100 *
+S s>-+1$12 100 *
+S s<--1$12 100 *
+S s>--1$12 100 *
+S s<++1$1$ 100 *
+S s>++1$1$ 100 *
+S s<+-1$1$ 100 *
+S s>+-1$1$ 100 *
+S s<-+1$1$ 100 *
+S s>-+1$1$ 100 *
+S s<--1$1$ 100 *
+S s>--1$1$ 100 *
+S s<++1$22 100 *
+S s>++1$22 100 *
+S s<+-1$22 100 *
+S s>+-1$22 100 *
+S s<-+1$22 100 *
+S s>-+1$22 100 *
+S s<--1$22 100 *
+S s>--1$22 100 *
+S s<++1$2$ 100 *
+S s>++1$2$ 100 *
+S s<+-1$2$ 100 *
+S s>+-1$2$ 100 *
+S s<-+1$2$ 100 *
+S s>-+1$2$ 100 *
+S s<--1$2$ 100 *
+S s>--1$2$ 100 *
+S s<++1$$$ 100 *
+S s>++1$$$ 100 *
+S s<+-1$$$ 100 *
+S s>+-1$$$ 100 *
+S s<-+1$$$ 100 *
+S s>-+1$$$ 100 *
+S s<--1$$$ 100 *
+S s>--1$$$ 100 *
+S s<++2200 100 *
+S s>++2200 100 *
+S s<+-2200 100 *
+S s>+-2200 100 *
+S s<-+2200 100 *
+S s>-+2200 100 *
+S s<--2200 100 *
+S s>--2200 100 *
+S s<++2201 100 *
+S s>++2201 100 *
+S s<+-2201 100 *
+S s>+-2201 100 *
+S s<-+2201 100 *
+S s>-+2201 100 *
+S s<--2201 100 *
+S s>--2201 100 *
+S s<++2202 100 *
+S s>++2202 100 *
+S s<+-2202 100 *
+S s>+-2202 100 *
+S s<-+2202 100 *
+S s>-+2202 100 *
+S s<--2202 100 *
+S s>--2202 100 *
+S s<++220$ 100 *
+S s>++220$ 100 *
+S s<+-220$ 100 *
+S s>+-220$ 100 *
+S s<-+220$ 100 *
+S s>-+220$ 100 *
+S s<--220$ 100 *
+S s>--220$ 100 *
+S s<++2211 100 *
+S s>++2211 100 *
+S s<+-2211 100 *
+S s>+-2211 100 *
+S s<-+2211 100 *
+S s>-+2211 100 *
+S s<--2211 100 *
+S s>--2211 100 *
+S s<++2212 100 *
+S s>++2212 100 *
+S s<+-2212 100 *
+S s>+-2212 100 *
+S s<-+2212 100 *
+S s>-+2212 100 *
+S s<--2212 100 *
+S s>--2212 100 *
+S s<++221$ 100 *
+S s>++221$ 100 *
+S s<+-221$ 100 *
+S s>+-221$ 100 *
+S s<-+221$ 100 *
+S s>-+221$ 100 *
+S s<--221$ 100 *
+S s>--221$ 100 *
+S s<++2222 100 *
+S s>++2222 100 *
+S s<+-2222 100 *
+S s>+-2222 100 *
+S s<-+2222 100 *
+S s>-+2222 100 *
+S s<--2222 100 *
+S s>--2222 100 *
+S s<++222$ 100 *
+S s>++222$ 100 *
+S s<+-222$ 100 *
+S s>+-222$ 100 *
+S s<-+222$ 100 *
+S s>-+222$ 100 *
+S s<--222$ 100 *
+S s>--222$ 100 *
+S s<++22$$ 100 *
+S s>++22$$ 100 *
+S s<+-22$$ 100 *
+S s>+-22$$ 100 *
+S s<-+22$$ 100 *
+S s>-+22$$ 100 *
+S s<--22$$ 100 *
+S s>--22$$ 100 *
+S s<++2$00 100 *
+S s>++2$00 100 *
+S s<+-2$00 100 *
+S s>+-2$00 100 *
+S s<-+2$00 100 *
+S s>-+2$00 100 *
+S s<--2$00 100 *
+S s>--2$00 100 *
+S s<++2$01 100 *
+S s>++2$01 100 *
+S s<+-2$01 100 *
+S s>+-2$01 100 *
+S s<-+2$01 100 *
+S s>-+2$01 100 *
+S s<--2$01 100 *
+S s>--2$01 100 *
+S s<++2$02 100 *
+S s>++2$02 100 *
+S s<+-2$02 100 *
+S s>+-2$02 100 *
+S s<-+2$02 100 *
+S s>-+2$02 100 *
+S s<--2$02 100 *
+S s>--2$02 100 *
+S s<++2$0$ 100 *
+S s>++2$0$ 100 *
+S s<+-2$0$ 100 *
+S s>+-2$0$ 100 *
+S s<-+2$0$ 100 *
+S s>-+2$0$ 100 *
+S s<--2$0$ 100 *
+S s>--2$0$ 100 *
+S s<++2$11 100 *
+S s>++2$11 100 *
+S s<+-2$11 100 *
+S s>+-2$11 100 *
+S s<-+2$11 100 *
+S s>-+2$11 100 *
+S s<--2$11 100 *
+S s>--2$11 100 *
+S s<++2$12 100 *
+S s>++2$12 100 *
+S s<+-2$12 100 *
+S s>+-2$12 100 *
+S s<-+2$12 100 *
+S s>-+2$12 100 *
+S s<--2$12 100 *
+S s>--2$12 100 *
+S s<++2$1$ 100 *
+S s>++2$1$ 100 *
+S s<+-2$1$ 100 *
+S s>+-2$1$ 100 *
+S s<-+2$1$ 100 *
+S s>-+2$1$ 100 *
+S s<--2$1$ 100 *
+S s>--2$1$ 100 *
+S s<++2$22 100 *
+S s>++2$22 100 *
+S s<+-2$22 100 *
+S s>+-2$22 100 *
+S s<-+2$22 100 *
+S s>-+2$22 100 *
+S s<--2$22 100 *
+S s>--2$22 100 *
+S s<++2$2$ 100 *
+S s>++2$2$ 100 *
+S s<+-2$2$ 100 *
+S s>+-2$2$ 100 *
+S s<-+2$2$ 100 *
+S s>-+2$2$ 100 *
+S s<--2$2$ 100 *
+S s>--2$2$ 100 *
+S s<++2$$$ 100 *
+S s>++2$$$ 100 *
+S s<+-2$$$ 100 *
+S s>+-2$$$ 100 *
+S s<-+2$$$ 100 *
+S s>-+2$$$ 100 *
+S s<--2$$$ 100 *
+S s>--2$$$ 100 *
+S s<++$$00 100 *
+S s>++$$00 100 *
+S s<+-$$00 100 *
+S s>+-$$00 100 *
+S s<-+$$00 100 *
+S s>-+$$00 100 *
+S s<--$$00 100 *
+S s>--$$00 100 *
+S s<++$$01 100 *
+S s>++$$01 100 *
+S s<+-$$01 100 *
+S s>+-$$01 100 *
+S s<-+$$01 100 *
+S s>-+$$01 100 *
+S s<--$$01 100 *
+S s>--$$01 100 *
+S s<++$$02 100 *
+S s>++$$02 100 *
+S s<+-$$02 100 *
+S s>+-$$02 100 *
+S s<-+$$02 100 *
+S s>-+$$02 100 *
+S s<--$$02 100 *
+S s>--$$02 100 *
+S s<++$$0$ 100 *
+S s>++$$0$ 100 *
+S s<+-$$0$ 100 *
+S s>+-$$0$ 100 *
+S s<-+$$0$ 100 *
+S s>-+$$0$ 100 *
+S s<--$$0$ 100 *
+S s>--$$0$ 100 *
+S s<++$$11 100 *
+S s>++$$11 100 *
+S s<+-$$11 100 *
+S s>+-$$11 100 *
+S s<-+$$11 100 *
+S s>-+$$11 100 *
+S s<--$$11 100 *
+S s>--$$11 100 *
+S s<++$$12 100 *
+S s>++$$12 100 *
+S s<+-$$12 100 *
+S s>+-$$12 100 *
+S s<-+$$12 100 *
+S s>-+$$12 100 *
+S s<--$$12 100 *
+S s>--$$12 100 *
+S s<++$$1$ 100 *
+S s>++$$1$ 100 *
+S s<+-$$1$ 100 *
+S s>+-$$1$ 100 *
+S s<-+$$1$ 100 *
+S s>-+$$1$ 100 *
+S s<--$$1$ 100 *
+S s>--$$1$ 100 *
+S s<++$$22 100 *
+S s>++$$22 100 *
+S s<+-$$22 100 *
+S s>+-$$22 100 *
+S s<-+$$22 100 *
+S s>-+$$22 100 *
+S s<--$$22 100 *
+S s>--$$22 100 *
+S s<++$$2$ 100 *
+S s>++$$2$ 100 *
+S s<+-$$2$ 100 *
+S s>+-$$2$ 100 *
+S s<-+$$2$ 100 *
+S s>-+$$2$ 100 *
+S s<--$$2$ 100 *
+S s>--$$2$ 100 *
+S s<++$$$$ 100 *
+S s>++$$$$ 100 *
+S s<+-$$$$ 100 *
+S s>+-$$$$ 100 *
+S s<-+$$$$ 100 *
+S s>-+$$$$ 100 *
+S s<--$$$$ 100 *
+S s>--$$$$ 100 *
+E <++0000 a+ s<++0000+ 0 0 0 0 * at:Z:internal
+E >++0000 s>++0000+ a+ 0 0 0 0 * at:Z:internal
+E <+-0000 a+ s<+-0000- 0 0 0 0 * at:Z:dovetail_L
+E >+-0000 s>+-0000+ a- 0 0 0 0 * at:Z:dovetail_L
+E <-+0000 a- s<-+0000+ 0 0 0 0 * at:Z:dovetail_L
+E >-+0000 s>-+0000- a+ 0 0 0 0 * at:Z:dovetail_L
+E <--0000 a- s<--0000- 0 0 0 0 * at:Z:internal
+E >--0000 s>--0000- a- 0 0 0 0 * at:Z:internal
+E <++0001 a+ s<++0001+ 0 0 0 30 * at:Z:internal
+E >++0001 s>++0001+ a+ 0 0 0 30 * at:Z:internal
+E <+-0001 a+ s<+-0001- 0 0 0 30 * at:Z:dovetail_L
+E >+-0001 s>+-0001+ a- 0 0 0 30 * at:Z:dovetail_L
+E <-+0001 a- s<-+0001+ 0 0 0 30 * at:Z:dovetail_L
+E >-+0001 s>-+0001- a+ 0 0 0 30 * at:Z:dovetail_L
+E <--0001 a- s<--0001- 0 0 0 30 * at:Z:internal
+E >--0001 s>--0001- a- 0 0 0 30 * at:Z:internal
+E <++0002 a+ s<++0002+ 0 0 0 70 * at:Z:internal
+E >++0002 s>++0002+ a+ 0 0 0 70 * at:Z:internal
+E <+-0002 a+ s<+-0002- 0 0 0 70 * at:Z:dovetail_L
+E >+-0002 s>+-0002+ a- 0 0 0 70 * at:Z:dovetail_L
+E <-+0002 a- s<-+0002+ 0 0 0 70 * at:Z:dovetail_L
+E >-+0002 s>-+0002- a+ 0 0 0 70 * at:Z:dovetail_L
+E <--0002 a- s<--0002- 0 0 0 70 * at:Z:internal
+E >--0002 s>--0002- a- 0 0 0 70 * at:Z:internal
+E <++000$ a+ s<++000$+ 0 0 0 100$ * at:Z:to_contained
+E >++000$ s>++000$+ a+ 0 0 0 100$ * at:Z:to_container
+E <+-000$ a+ s<+-000$- 0 0 0 100$ * at:Z:to_contained
+E >+-000$ s>+-000$+ a- 0 0 0 100$ * at:Z:to_container
+E <-+000$ a- s<-+000$+ 0 0 0 100$ * at:Z:to_contained
+E >-+000$ s>-+000$- a+ 0 0 0 100$ * at:Z:to_container
+E <--000$ a- s<--000$- 0 0 0 100$ * at:Z:to_contained
+E >--000$ s>--000$- a- 0 0 0 100$ * at:Z:to_container
+E <++0011 a+ s<++0011+ 0 0 30 30 * at:Z:internal
+E >++0011 s>++0011+ a+ 0 0 30 30 * at:Z:internal
+E <+-0011 a+ s<+-0011- 0 0 30 30 * at:Z:internal
+E >+-0011 s>+-0011+ a- 0 0 30 30 * at:Z:internal
+E <-+0011 a- s<-+0011+ 0 0 30 30 * at:Z:internal
+E >-+0011 s>-+0011- a+ 0 0 30 30 * at:Z:internal
+E <--0011 a- s<--0011- 0 0 30 30 * at:Z:internal
+E >--0011 s>--0011- a- 0 0 30 30 * at:Z:internal
+E <++0012 a+ s<++0012+ 0 0 30 70 * at:Z:internal
+E >++0012 s>++0012+ a+ 0 0 30 70 * at:Z:internal
+E <+-0012 a+ s<+-0012- 0 0 30 70 * at:Z:internal
+E >+-0012 s>+-0012+ a- 0 0 30 70 * at:Z:internal
+E <-+0012 a- s<-+0012+ 0 0 30 70 * at:Z:internal
+E >-+0012 s>-+0012- a+ 0 0 30 70 * at:Z:internal
+E <--0012 a- s<--0012- 0 0 30 70 * at:Z:internal
+E >--0012 s>--0012- a- 0 0 30 70 * at:Z:internal
+E <++001$ a+ s<++001$+ 0 0 30 100$ * at:Z:dovetail_L
+E >++001$ s>++001$+ a+ 0 0 30 100$ * at:Z:dovetail_R
+E <+-001$ a+ s<+-001$- 0 0 30 100$ * at:Z:internal
+E >+-001$ s>+-001$+ a- 0 0 30 100$ * at:Z:internal
+E <-+001$ a- s<-+001$+ 0 0 30 100$ * at:Z:internal
+E >-+001$ s>-+001$- a+ 0 0 30 100$ * at:Z:internal
+E <--001$ a- s<--001$- 0 0 30 100$ * at:Z:dovetail_L
+E >--001$ s>--001$- a- 0 0 30 100$ * at:Z:dovetail_R
+E <++0022 a+ s<++0022+ 0 0 70 70 * at:Z:internal
+E >++0022 s>++0022+ a+ 0 0 70 70 * at:Z:internal
+E <+-0022 a+ s<+-0022- 0 0 70 70 * at:Z:internal
+E >+-0022 s>+-0022+ a- 0 0 70 70 * at:Z:internal
+E <-+0022 a- s<-+0022+ 0 0 70 70 * at:Z:internal
+E >-+0022 s>-+0022- a+ 0 0 70 70 * at:Z:internal
+E <--0022 a- s<--0022- 0 0 70 70 * at:Z:internal
+E >--0022 s>--0022- a- 0 0 70 70 * at:Z:internal
+E <++002$ a+ s<++002$+ 0 0 70 100$ * at:Z:dovetail_L
+E >++002$ s>++002$+ a+ 0 0 70 100$ * at:Z:dovetail_R
+E <+-002$ a+ s<+-002$- 0 0 70 100$ * at:Z:internal
+E >+-002$ s>+-002$+ a- 0 0 70 100$ * at:Z:internal
+E <-+002$ a- s<-+002$+ 0 0 70 100$ * at:Z:internal
+E >-+002$ s>-+002$- a+ 0 0 70 100$ * at:Z:internal
+E <--002$ a- s<--002$- 0 0 70 100$ * at:Z:dovetail_L
+E >--002$ s>--002$- a- 0 0 70 100$ * at:Z:dovetail_R
+E <++00$$ a+ s<++00$$+ 0 0 100$ 100$ * at:Z:dovetail_L
+E >++00$$ s>++00$$+ a+ 0 0 100$ 100$ * at:Z:dovetail_R
+E <+-00$$ a+ s<+-00$$- 0 0 100$ 100$ * at:Z:internal
+E >+-00$$ s>+-00$$+ a- 0 0 100$ 100$ * at:Z:internal
+E <-+00$$ a- s<-+00$$+ 0 0 100$ 100$ * at:Z:internal
+E >-+00$$ s>-+00$$- a+ 0 0 100$ 100$ * at:Z:internal
+E <--00$$ a- s<--00$$- 0 0 100$ 100$ * at:Z:dovetail_L
+E >--00$$ s>--00$$- a- 0 0 100$ 100$ * at:Z:dovetail_R
+E <++0100 a+ s<++0100+ 0 30 0 0 * at:Z:internal
+E >++0100 s>++0100+ a+ 0 30 0 0 * at:Z:internal
+E <+-0100 a+ s<+-0100- 0 30 0 0 * at:Z:dovetail_L
+E >+-0100 s>+-0100+ a- 0 30 0 0 * at:Z:dovetail_L
+E <-+0100 a- s<-+0100+ 0 30 0 0 * at:Z:dovetail_L
+E >-+0100 s>-+0100- a+ 0 30 0 0 * at:Z:dovetail_L
+E <--0100 a- s<--0100- 0 30 0 0 * at:Z:internal
+E >--0100 s>--0100- a- 0 30 0 0 * at:Z:internal
+E <++0101 a+ s<++0101+ 0 30 0 30 * at:Z:internal
+E >++0101 s>++0101+ a+ 0 30 0 30 * at:Z:internal
+E <+-0101 a+ s<+-0101- 0 30 0 30 * at:Z:dovetail_L
+E >+-0101 s>+-0101+ a- 0 30 0 30 * at:Z:dovetail_L
+E <-+0101 a- s<-+0101+ 0 30 0 30 * at:Z:dovetail_L
+E >-+0101 s>-+0101- a+ 0 30 0 30 * at:Z:dovetail_L
+E <--0101 a- s<--0101- 0 30 0 30 * at:Z:internal
+E >--0101 s>--0101- a- 0 30 0 30 * at:Z:internal
+E <++0102 a+ s<++0102+ 0 30 0 70 * at:Z:internal
+E >++0102 s>++0102+ a+ 0 30 0 70 * at:Z:internal
+E <+-0102 a+ s<+-0102- 0 30 0 70 * at:Z:dovetail_L
+E >+-0102 s>+-0102+ a- 0 30 0 70 * at:Z:dovetail_L
+E <-+0102 a- s<-+0102+ 0 30 0 70 * at:Z:dovetail_L
+E >-+0102 s>-+0102- a+ 0 30 0 70 * at:Z:dovetail_L
+E <--0102 a- s<--0102- 0 30 0 70 * at:Z:internal
+E >--0102 s>--0102- a- 0 30 0 70 * at:Z:internal
+E <++010$ a+ s<++010$+ 0 30 0 100$ * at:Z:to_contained
+E >++010$ s>++010$+ a+ 0 30 0 100$ * at:Z:to_container
+E <+-010$ a+ s<+-010$- 0 30 0 100$ * at:Z:to_contained
+E >+-010$ s>+-010$+ a- 0 30 0 100$ * at:Z:to_container
+E <-+010$ a- s<-+010$+ 0 30 0 100$ * at:Z:to_contained
+E >-+010$ s>-+010$- a+ 0 30 0 100$ * at:Z:to_container
+E <--010$ a- s<--010$- 0 30 0 100$ * at:Z:to_contained
+E >--010$ s>--010$- a- 0 30 0 100$ * at:Z:to_container
+E <++0111 a+ s<++0111+ 0 30 30 30 * at:Z:internal
+E >++0111 s>++0111+ a+ 0 30 30 30 * at:Z:internal
+E <+-0111 a+ s<+-0111- 0 30 30 30 * at:Z:internal
+E >+-0111 s>+-0111+ a- 0 30 30 30 * at:Z:internal
+E <-+0111 a- s<-+0111+ 0 30 30 30 * at:Z:internal
+E >-+0111 s>-+0111- a+ 0 30 30 30 * at:Z:internal
+E <--0111 a- s<--0111- 0 30 30 30 * at:Z:internal
+E >--0111 s>--0111- a- 0 30 30 30 * at:Z:internal
+E <++0112 a+ s<++0112+ 0 30 30 70 * at:Z:internal
+E >++0112 s>++0112+ a+ 0 30 30 70 * at:Z:internal
+E <+-0112 a+ s<+-0112- 0 30 30 70 * at:Z:internal
+E >+-0112 s>+-0112+ a- 0 30 30 70 * at:Z:internal
+E <-+0112 a- s<-+0112+ 0 30 30 70 * at:Z:internal
+E >-+0112 s>-+0112- a+ 0 30 30 70 * at:Z:internal
+E <--0112 a- s<--0112- 0 30 30 70 * at:Z:internal
+E >--0112 s>--0112- a- 0 30 30 70 * at:Z:internal
+E <++011$ a+ s<++011$+ 0 30 30 100$ * at:Z:dovetail_L
+E >++011$ s>++011$+ a+ 0 30 30 100$ * at:Z:dovetail_R
+E <+-011$ a+ s<+-011$- 0 30 30 100$ * at:Z:internal
+E >+-011$ s>+-011$+ a- 0 30 30 100$ * at:Z:internal
+E <-+011$ a- s<-+011$+ 0 30 30 100$ * at:Z:internal
+E >-+011$ s>-+011$- a+ 0 30 30 100$ * at:Z:internal
+E <--011$ a- s<--011$- 0 30 30 100$ * at:Z:dovetail_L
+E >--011$ s>--011$- a- 0 30 30 100$ * at:Z:dovetail_R
+E <++0122 a+ s<++0122+ 0 30 70 70 * at:Z:internal
+E >++0122 s>++0122+ a+ 0 30 70 70 * at:Z:internal
+E <+-0122 a+ s<+-0122- 0 30 70 70 * at:Z:internal
+E >+-0122 s>+-0122+ a- 0 30 70 70 * at:Z:internal
+E <-+0122 a- s<-+0122+ 0 30 70 70 * at:Z:internal
+E >-+0122 s>-+0122- a+ 0 30 70 70 * at:Z:internal
+E <--0122 a- s<--0122- 0 30 70 70 * at:Z:internal
+E >--0122 s>--0122- a- 0 30 70 70 * at:Z:internal
+E <++012$ a+ s<++012$+ 0 30 70 100$ * at:Z:dovetail_L
+E >++012$ s>++012$+ a+ 0 30 70 100$ * at:Z:dovetail_R
+E <+-012$ a+ s<+-012$- 0 30 70 100$ * at:Z:internal
+E >+-012$ s>+-012$+ a- 0 30 70 100$ * at:Z:internal
+E <-+012$ a- s<-+012$+ 0 30 70 100$ * at:Z:internal
+E >-+012$ s>-+012$- a+ 0 30 70 100$ * at:Z:internal
+E <--012$ a- s<--012$- 0 30 70 100$ * at:Z:dovetail_L
+E >--012$ s>--012$- a- 0 30 70 100$ * at:Z:dovetail_R
+E <++01$$ a+ s<++01$$+ 0 30 100$ 100$ * at:Z:dovetail_L
+E >++01$$ s>++01$$+ a+ 0 30 100$ 100$ * at:Z:dovetail_R
+E <+-01$$ a+ s<+-01$$- 0 30 100$ 100$ * at:Z:internal
+E >+-01$$ s>+-01$$+ a- 0 30 100$ 100$ * at:Z:internal
+E <-+01$$ a- s<-+01$$+ 0 30 100$ 100$ * at:Z:internal
+E >-+01$$ s>-+01$$- a+ 0 30 100$ 100$ * at:Z:internal
+E <--01$$ a- s<--01$$- 0 30 100$ 100$ * at:Z:dovetail_L
+E >--01$$ s>--01$$- a- 0 30 100$ 100$ * at:Z:dovetail_R
+E <++0200 a+ s<++0200+ 0 70 0 0 * at:Z:internal
+E >++0200 s>++0200+ a+ 0 70 0 0 * at:Z:internal
+E <+-0200 a+ s<+-0200- 0 70 0 0 * at:Z:dovetail_L
+E >+-0200 s>+-0200+ a- 0 70 0 0 * at:Z:dovetail_L
+E <-+0200 a- s<-+0200+ 0 70 0 0 * at:Z:dovetail_L
+E >-+0200 s>-+0200- a+ 0 70 0 0 * at:Z:dovetail_L
+E <--0200 a- s<--0200- 0 70 0 0 * at:Z:internal
+E >--0200 s>--0200- a- 0 70 0 0 * at:Z:internal
+E <++0201 a+ s<++0201+ 0 70 0 30 * at:Z:internal
+E >++0201 s>++0201+ a+ 0 70 0 30 * at:Z:internal
+E <+-0201 a+ s<+-0201- 0 70 0 30 * at:Z:dovetail_L
+E >+-0201 s>+-0201+ a- 0 70 0 30 * at:Z:dovetail_L
+E <-+0201 a- s<-+0201+ 0 70 0 30 * at:Z:dovetail_L
+E >-+0201 s>-+0201- a+ 0 70 0 30 * at:Z:dovetail_L
+E <--0201 a- s<--0201- 0 70 0 30 * at:Z:internal
+E >--0201 s>--0201- a- 0 70 0 30 * at:Z:internal
+E <++0202 a+ s<++0202+ 0 70 0 70 * at:Z:internal
+E >++0202 s>++0202+ a+ 0 70 0 70 * at:Z:internal
+E <+-0202 a+ s<+-0202- 0 70 0 70 * at:Z:dovetail_L
+E >+-0202 s>+-0202+ a- 0 70 0 70 * at:Z:dovetail_L
+E <-+0202 a- s<-+0202+ 0 70 0 70 * at:Z:dovetail_L
+E >-+0202 s>-+0202- a+ 0 70 0 70 * at:Z:dovetail_L
+E <--0202 a- s<--0202- 0 70 0 70 * at:Z:internal
+E >--0202 s>--0202- a- 0 70 0 70 * at:Z:internal
+E <++020$ a+ s<++020$+ 0 70 0 100$ * at:Z:to_contained
+E >++020$ s>++020$+ a+ 0 70 0 100$ * at:Z:to_container
+E <+-020$ a+ s<+-020$- 0 70 0 100$ * at:Z:to_contained
+E >+-020$ s>+-020$+ a- 0 70 0 100$ * at:Z:to_container
+E <-+020$ a- s<-+020$+ 0 70 0 100$ * at:Z:to_contained
+E >-+020$ s>-+020$- a+ 0 70 0 100$ * at:Z:to_container
+E <--020$ a- s<--020$- 0 70 0 100$ * at:Z:to_contained
+E >--020$ s>--020$- a- 0 70 0 100$ * at:Z:to_container
+E <++0211 a+ s<++0211+ 0 70 30 30 * at:Z:internal
+E >++0211 s>++0211+ a+ 0 70 30 30 * at:Z:internal
+E <+-0211 a+ s<+-0211- 0 70 30 30 * at:Z:internal
+E >+-0211 s>+-0211+ a- 0 70 30 30 * at:Z:internal
+E <-+0211 a- s<-+0211+ 0 70 30 30 * at:Z:internal
+E >-+0211 s>-+0211- a+ 0 70 30 30 * at:Z:internal
+E <--0211 a- s<--0211- 0 70 30 30 * at:Z:internal
+E >--0211 s>--0211- a- 0 70 30 30 * at:Z:internal
+E <++0212 a+ s<++0212+ 0 70 30 70 * at:Z:internal
+E >++0212 s>++0212+ a+ 0 70 30 70 * at:Z:internal
+E <+-0212 a+ s<+-0212- 0 70 30 70 * at:Z:internal
+E >+-0212 s>+-0212+ a- 0 70 30 70 * at:Z:internal
+E <-+0212 a- s<-+0212+ 0 70 30 70 * at:Z:internal
+E >-+0212 s>-+0212- a+ 0 70 30 70 * at:Z:internal
+E <--0212 a- s<--0212- 0 70 30 70 * at:Z:internal
+E >--0212 s>--0212- a- 0 70 30 70 * at:Z:internal
+E <++021$ a+ s<++021$+ 0 70 30 100$ * at:Z:dovetail_L
+E >++021$ s>++021$+ a+ 0 70 30 100$ * at:Z:dovetail_R
+E <+-021$ a+ s<+-021$- 0 70 30 100$ * at:Z:internal
+E >+-021$ s>+-021$+ a- 0 70 30 100$ * at:Z:internal
+E <-+021$ a- s<-+021$+ 0 70 30 100$ * at:Z:internal
+E >-+021$ s>-+021$- a+ 0 70 30 100$ * at:Z:internal
+E <--021$ a- s<--021$- 0 70 30 100$ * at:Z:dovetail_L
+E >--021$ s>--021$- a- 0 70 30 100$ * at:Z:dovetail_R
+E <++0222 a+ s<++0222+ 0 70 70 70 * at:Z:internal
+E >++0222 s>++0222+ a+ 0 70 70 70 * at:Z:internal
+E <+-0222 a+ s<+-0222- 0 70 70 70 * at:Z:internal
+E >+-0222 s>+-0222+ a- 0 70 70 70 * at:Z:internal
+E <-+0222 a- s<-+0222+ 0 70 70 70 * at:Z:internal
+E >-+0222 s>-+0222- a+ 0 70 70 70 * at:Z:internal
+E <--0222 a- s<--0222- 0 70 70 70 * at:Z:internal
+E >--0222 s>--0222- a- 0 70 70 70 * at:Z:internal
+E <++022$ a+ s<++022$+ 0 70 70 100$ * at:Z:dovetail_L
+E >++022$ s>++022$+ a+ 0 70 70 100$ * at:Z:dovetail_R
+E <+-022$ a+ s<+-022$- 0 70 70 100$ * at:Z:internal
+E >+-022$ s>+-022$+ a- 0 70 70 100$ * at:Z:internal
+E <-+022$ a- s<-+022$+ 0 70 70 100$ * at:Z:internal
+E >-+022$ s>-+022$- a+ 0 70 70 100$ * at:Z:internal
+E <--022$ a- s<--022$- 0 70 70 100$ * at:Z:dovetail_L
+E >--022$ s>--022$- a- 0 70 70 100$ * at:Z:dovetail_R
+E <++02$$ a+ s<++02$$+ 0 70 100$ 100$ * at:Z:dovetail_L
+E >++02$$ s>++02$$+ a+ 0 70 100$ 100$ * at:Z:dovetail_R
+E <+-02$$ a+ s<+-02$$- 0 70 100$ 100$ * at:Z:internal
+E >+-02$$ s>+-02$$+ a- 0 70 100$ 100$ * at:Z:internal
+E <-+02$$ a- s<-+02$$+ 0 70 100$ 100$ * at:Z:internal
+E >-+02$$ s>-+02$$- a+ 0 70 100$ 100$ * at:Z:internal
+E <--02$$ a- s<--02$$- 0 70 100$ 100$ * at:Z:dovetail_L
+E >--02$$ s>--02$$- a- 0 70 100$ 100$ * at:Z:dovetail_R
+E <++0$00 a+ s<++0$00+ 0 100$ 0 0 * at:Z:to_container
+E >++0$00 s>++0$00+ a+ 0 100$ 0 0 * at:Z:to_contained
+E <+-0$00 a+ s<+-0$00- 0 100$ 0 0 * at:Z:to_container
+E >+-0$00 s>+-0$00+ a- 0 100$ 0 0 * at:Z:to_contained
+E <-+0$00 a- s<-+0$00+ 0 100$ 0 0 * at:Z:to_container
+E >-+0$00 s>-+0$00- a+ 0 100$ 0 0 * at:Z:to_contained
+E <--0$00 a- s<--0$00- 0 100$ 0 0 * at:Z:to_container
+E >--0$00 s>--0$00- a- 0 100$ 0 0 * at:Z:to_contained
+E <++0$01 a+ s<++0$01+ 0 100$ 0 30 * at:Z:to_container
+E >++0$01 s>++0$01+ a+ 0 100$ 0 30 * at:Z:to_contained
+E <+-0$01 a+ s<+-0$01- 0 100$ 0 30 * at:Z:to_container
+E >+-0$01 s>+-0$01+ a- 0 100$ 0 30 * at:Z:to_contained
+E <-+0$01 a- s<-+0$01+ 0 100$ 0 30 * at:Z:to_container
+E >-+0$01 s>-+0$01- a+ 0 100$ 0 30 * at:Z:to_contained
+E <--0$01 a- s<--0$01- 0 100$ 0 30 * at:Z:to_container
+E >--0$01 s>--0$01- a- 0 100$ 0 30 * at:Z:to_contained
+E <++0$02 a+ s<++0$02+ 0 100$ 0 70 * at:Z:to_container
+E >++0$02 s>++0$02+ a+ 0 100$ 0 70 * at:Z:to_contained
+E <+-0$02 a+ s<+-0$02- 0 100$ 0 70 * at:Z:to_container
+E >+-0$02 s>+-0$02+ a- 0 100$ 0 70 * at:Z:to_contained
+E <-+0$02 a- s<-+0$02+ 0 100$ 0 70 * at:Z:to_container
+E >-+0$02 s>-+0$02- a+ 0 100$ 0 70 * at:Z:to_contained
+E <--0$02 a- s<--0$02- 0 100$ 0 70 * at:Z:to_container
+E >--0$02 s>--0$02- a- 0 100$ 0 70 * at:Z:to_contained
+E <++0$0$ a+ s<++0$0$+ 0 100$ 0 100$ * at:Z:to_contained
+E >++0$0$ s>++0$0$+ a+ 0 100$ 0 100$ * at:Z:to_container
+E <+-0$0$ a+ s<+-0$0$- 0 100$ 0 100$ * at:Z:to_contained
+E >+-0$0$ s>+-0$0$+ a- 0 100$ 0 100$ * at:Z:to_container
+E <-+0$0$ a- s<-+0$0$+ 0 100$ 0 100$ * at:Z:to_contained
+E >-+0$0$ s>-+0$0$- a+ 0 100$ 0 100$ * at:Z:to_container
+E <--0$0$ a- s<--0$0$- 0 100$ 0 100$ * at:Z:to_contained
+E >--0$0$ s>--0$0$- a- 0 100$ 0 100$ * at:Z:to_container
+E <++0$11 a+ s<++0$11+ 0 100$ 30 30 * at:Z:to_container
+E >++0$11 s>++0$11+ a+ 0 100$ 30 30 * at:Z:to_contained
+E <+-0$11 a+ s<+-0$11- 0 100$ 30 30 * at:Z:to_container
+E >+-0$11 s>+-0$11+ a- 0 100$ 30 30 * at:Z:to_contained
+E <-+0$11 a- s<-+0$11+ 0 100$ 30 30 * at:Z:to_container
+E >-+0$11 s>-+0$11- a+ 0 100$ 30 30 * at:Z:to_contained
+E <--0$11 a- s<--0$11- 0 100$ 30 30 * at:Z:to_container
+E >--0$11 s>--0$11- a- 0 100$ 30 30 * at:Z:to_contained
+E <++0$12 a+ s<++0$12+ 0 100$ 30 70 * at:Z:to_container
+E >++0$12 s>++0$12+ a+ 0 100$ 30 70 * at:Z:to_contained
+E <+-0$12 a+ s<+-0$12- 0 100$ 30 70 * at:Z:to_container
+E >+-0$12 s>+-0$12+ a- 0 100$ 30 70 * at:Z:to_contained
+E <-+0$12 a- s<-+0$12+ 0 100$ 30 70 * at:Z:to_container
+E >-+0$12 s>-+0$12- a+ 0 100$ 30 70 * at:Z:to_contained
+E <--0$12 a- s<--0$12- 0 100$ 30 70 * at:Z:to_container
+E >--0$12 s>--0$12- a- 0 100$ 30 70 * at:Z:to_contained
+E <++0$1$ a+ s<++0$1$+ 0 100$ 30 100$ * at:Z:to_container
+E >++0$1$ s>++0$1$+ a+ 0 100$ 30 100$ * at:Z:to_contained
+E <+-0$1$ a+ s<+-0$1$- 0 100$ 30 100$ * at:Z:to_container
+E >+-0$1$ s>+-0$1$+ a- 0 100$ 30 100$ * at:Z:to_contained
+E <-+0$1$ a- s<-+0$1$+ 0 100$ 30 100$ * at:Z:to_container
+E >-+0$1$ s>-+0$1$- a+ 0 100$ 30 100$ * at:Z:to_contained
+E <--0$1$ a- s<--0$1$- 0 100$ 30 100$ * at:Z:to_container
+E >--0$1$ s>--0$1$- a- 0 100$ 30 100$ * at:Z:to_contained
+E <++0$22 a+ s<++0$22+ 0 100$ 70 70 * at:Z:to_container
+E >++0$22 s>++0$22+ a+ 0 100$ 70 70 * at:Z:to_contained
+E <+-0$22 a+ s<+-0$22- 0 100$ 70 70 * at:Z:to_container
+E >+-0$22 s>+-0$22+ a- 0 100$ 70 70 * at:Z:to_contained
+E <-+0$22 a- s<-+0$22+ 0 100$ 70 70 * at:Z:to_container
+E >-+0$22 s>-+0$22- a+ 0 100$ 70 70 * at:Z:to_contained
+E <--0$22 a- s<--0$22- 0 100$ 70 70 * at:Z:to_container
+E >--0$22 s>--0$22- a- 0 100$ 70 70 * at:Z:to_contained
+E <++0$2$ a+ s<++0$2$+ 0 100$ 70 100$ * at:Z:to_container
+E >++0$2$ s>++0$2$+ a+ 0 100$ 70 100$ * at:Z:to_contained
+E <+-0$2$ a+ s<+-0$2$- 0 100$ 70 100$ * at:Z:to_container
+E >+-0$2$ s>+-0$2$+ a- 0 100$ 70 100$ * at:Z:to_contained
+E <-+0$2$ a- s<-+0$2$+ 0 100$ 70 100$ * at:Z:to_container
+E >-+0$2$ s>-+0$2$- a+ 0 100$ 70 100$ * at:Z:to_contained
+E <--0$2$ a- s<--0$2$- 0 100$ 70 100$ * at:Z:to_container
+E >--0$2$ s>--0$2$- a- 0 100$ 70 100$ * at:Z:to_contained
+E <++0$$$ a+ s<++0$$$+ 0 100$ 100$ 100$ * at:Z:to_container
+E >++0$$$ s>++0$$$+ a+ 0 100$ 100$ 100$ * at:Z:to_contained
+E <+-0$$$ a+ s<+-0$$$- 0 100$ 100$ 100$ * at:Z:to_container
+E >+-0$$$ s>+-0$$$+ a- 0 100$ 100$ 100$ * at:Z:to_contained
+E <-+0$$$ a- s<-+0$$$+ 0 100$ 100$ 100$ * at:Z:to_container
+E >-+0$$$ s>-+0$$$- a+ 0 100$ 100$ 100$ * at:Z:to_contained
+E <--0$$$ a- s<--0$$$- 0 100$ 100$ 100$ * at:Z:to_container
+E >--0$$$ s>--0$$$- a- 0 100$ 100$ 100$ * at:Z:to_contained
+E <++1100 a+ s<++1100+ 30 30 0 0 * at:Z:internal
+E >++1100 s>++1100+ a+ 30 30 0 0 * at:Z:internal
+E <+-1100 a+ s<+-1100- 30 30 0 0 * at:Z:internal
+E >+-1100 s>+-1100+ a- 30 30 0 0 * at:Z:internal
+E <-+1100 a- s<-+1100+ 30 30 0 0 * at:Z:internal
+E >-+1100 s>-+1100- a+ 30 30 0 0 * at:Z:internal
+E <--1100 a- s<--1100- 30 30 0 0 * at:Z:internal
+E >--1100 s>--1100- a- 30 30 0 0 * at:Z:internal
+E <++1101 a+ s<++1101+ 30 30 0 30 * at:Z:internal
+E >++1101 s>++1101+ a+ 30 30 0 30 * at:Z:internal
+E <+-1101 a+ s<+-1101- 30 30 0 30 * at:Z:internal
+E >+-1101 s>+-1101+ a- 30 30 0 30 * at:Z:internal
+E <-+1101 a- s<-+1101+ 30 30 0 30 * at:Z:internal
+E >-+1101 s>-+1101- a+ 30 30 0 30 * at:Z:internal
+E <--1101 a- s<--1101- 30 30 0 30 * at:Z:internal
+E >--1101 s>--1101- a- 30 30 0 30 * at:Z:internal
+E <++1102 a+ s<++1102+ 30 30 0 70 * at:Z:internal
+E >++1102 s>++1102+ a+ 30 30 0 70 * at:Z:internal
+E <+-1102 a+ s<+-1102- 30 30 0 70 * at:Z:internal
+E >+-1102 s>+-1102+ a- 30 30 0 70 * at:Z:internal
+E <-+1102 a- s<-+1102+ 30 30 0 70 * at:Z:internal
+E >-+1102 s>-+1102- a+ 30 30 0 70 * at:Z:internal
+E <--1102 a- s<--1102- 30 30 0 70 * at:Z:internal
+E >--1102 s>--1102- a- 30 30 0 70 * at:Z:internal
+E <++110$ a+ s<++110$+ 30 30 0 100$ * at:Z:to_contained
+E >++110$ s>++110$+ a+ 30 30 0 100$ * at:Z:to_container
+E <+-110$ a+ s<+-110$- 30 30 0 100$ * at:Z:to_contained
+E >+-110$ s>+-110$+ a- 30 30 0 100$ * at:Z:to_container
+E <-+110$ a- s<-+110$+ 30 30 0 100$ * at:Z:to_contained
+E >-+110$ s>-+110$- a+ 30 30 0 100$ * at:Z:to_container
+E <--110$ a- s<--110$- 30 30 0 100$ * at:Z:to_contained
+E >--110$ s>--110$- a- 30 30 0 100$ * at:Z:to_container
+E <++1111 a+ s<++1111+ 30 30 30 30 * at:Z:internal
+E >++1111 s>++1111+ a+ 30 30 30 30 * at:Z:internal
+E <+-1111 a+ s<+-1111- 30 30 30 30 * at:Z:internal
+E >+-1111 s>+-1111+ a- 30 30 30 30 * at:Z:internal
+E <-+1111 a- s<-+1111+ 30 30 30 30 * at:Z:internal
+E >-+1111 s>-+1111- a+ 30 30 30 30 * at:Z:internal
+E <--1111 a- s<--1111- 30 30 30 30 * at:Z:internal
+E >--1111 s>--1111- a- 30 30 30 30 * at:Z:internal
+E <++1112 a+ s<++1112+ 30 30 30 70 * at:Z:internal
+E >++1112 s>++1112+ a+ 30 30 30 70 * at:Z:internal
+E <+-1112 a+ s<+-1112- 30 30 30 70 * at:Z:internal
+E >+-1112 s>+-1112+ a- 30 30 30 70 * at:Z:internal
+E <-+1112 a- s<-+1112+ 30 30 30 70 * at:Z:internal
+E >-+1112 s>-+1112- a+ 30 30 30 70 * at:Z:internal
+E <--1112 a- s<--1112- 30 30 30 70 * at:Z:internal
+E >--1112 s>--1112- a- 30 30 30 70 * at:Z:internal
+E <++111$ a+ s<++111$+ 30 30 30 100$ * at:Z:internal
+E >++111$ s>++111$+ a+ 30 30 30 100$ * at:Z:internal
+E <+-111$ a+ s<+-111$- 30 30 30 100$ * at:Z:internal
+E >+-111$ s>+-111$+ a- 30 30 30 100$ * at:Z:internal
+E <-+111$ a- s<-+111$+ 30 30 30 100$ * at:Z:internal
+E >-+111$ s>-+111$- a+ 30 30 30 100$ * at:Z:internal
+E <--111$ a- s<--111$- 30 30 30 100$ * at:Z:internal
+E >--111$ s>--111$- a- 30 30 30 100$ * at:Z:internal
+E <++1122 a+ s<++1122+ 30 30 70 70 * at:Z:internal
+E >++1122 s>++1122+ a+ 30 30 70 70 * at:Z:internal
+E <+-1122 a+ s<+-1122- 30 30 70 70 * at:Z:internal
+E >+-1122 s>+-1122+ a- 30 30 70 70 * at:Z:internal
+E <-+1122 a- s<-+1122+ 30 30 70 70 * at:Z:internal
+E >-+1122 s>-+1122- a+ 30 30 70 70 * at:Z:internal
+E <--1122 a- s<--1122- 30 30 70 70 * at:Z:internal
+E >--1122 s>--1122- a- 30 30 70 70 * at:Z:internal
+E <++112$ a+ s<++112$+ 30 30 70 100$ * at:Z:internal
+E >++112$ s>++112$+ a+ 30 30 70 100$ * at:Z:internal
+E <+-112$ a+ s<+-112$- 30 30 70 100$ * at:Z:internal
+E >+-112$ s>+-112$+ a- 30 30 70 100$ * at:Z:internal
+E <-+112$ a- s<-+112$+ 30 30 70 100$ * at:Z:internal
+E >-+112$ s>-+112$- a+ 30 30 70 100$ * at:Z:internal
+E <--112$ a- s<--112$- 30 30 70 100$ * at:Z:internal
+E >--112$ s>--112$- a- 30 30 70 100$ * at:Z:internal
+E <++11$$ a+ s<++11$$+ 30 30 100$ 100$ * at:Z:internal
+E >++11$$ s>++11$$+ a+ 30 30 100$ 100$ * at:Z:internal
+E <+-11$$ a+ s<+-11$$- 30 30 100$ 100$ * at:Z:internal
+E >+-11$$ s>+-11$$+ a- 30 30 100$ 100$ * at:Z:internal
+E <-+11$$ a- s<-+11$$+ 30 30 100$ 100$ * at:Z:internal
+E >-+11$$ s>-+11$$- a+ 30 30 100$ 100$ * at:Z:internal
+E <--11$$ a- s<--11$$- 30 30 100$ 100$ * at:Z:internal
+E >--11$$ s>--11$$- a- 30 30 100$ 100$ * at:Z:internal
+E <++1200 a+ s<++1200+ 30 70 0 0 * at:Z:internal
+E >++1200 s>++1200+ a+ 30 70 0 0 * at:Z:internal
+E <+-1200 a+ s<+-1200- 30 70 0 0 * at:Z:internal
+E >+-1200 s>+-1200+ a- 30 70 0 0 * at:Z:internal
+E <-+1200 a- s<-+1200+ 30 70 0 0 * at:Z:internal
+E >-+1200 s>-+1200- a+ 30 70 0 0 * at:Z:internal
+E <--1200 a- s<--1200- 30 70 0 0 * at:Z:internal
+E >--1200 s>--1200- a- 30 70 0 0 * at:Z:internal
+E <++1201 a+ s<++1201+ 30 70 0 30 * at:Z:internal
+E >++1201 s>++1201+ a+ 30 70 0 30 * at:Z:internal
+E <+-1201 a+ s<+-1201- 30 70 0 30 * at:Z:internal
+E >+-1201 s>+-1201+ a- 30 70 0 30 * at:Z:internal
+E <-+1201 a- s<-+1201+ 30 70 0 30 * at:Z:internal
+E >-+1201 s>-+1201- a+ 30 70 0 30 * at:Z:internal
+E <--1201 a- s<--1201- 30 70 0 30 * at:Z:internal
+E >--1201 s>--1201- a- 30 70 0 30 * at:Z:internal
+E <++1202 a+ s<++1202+ 30 70 0 70 * at:Z:internal
+E >++1202 s>++1202+ a+ 30 70 0 70 * at:Z:internal
+E <+-1202 a+ s<+-1202- 30 70 0 70 * at:Z:internal
+E >+-1202 s>+-1202+ a- 30 70 0 70 * at:Z:internal
+E <-+1202 a- s<-+1202+ 30 70 0 70 * at:Z:internal
+E >-+1202 s>-+1202- a+ 30 70 0 70 * at:Z:internal
+E <--1202 a- s<--1202- 30 70 0 70 * at:Z:internal
+E >--1202 s>--1202- a- 30 70 0 70 * at:Z:internal
+E <++120$ a+ s<++120$+ 30 70 0 100$ * at:Z:to_contained
+E >++120$ s>++120$+ a+ 30 70 0 100$ * at:Z:to_container
+E <+-120$ a+ s<+-120$- 30 70 0 100$ * at:Z:to_contained
+E >+-120$ s>+-120$+ a- 30 70 0 100$ * at:Z:to_container
+E <-+120$ a- s<-+120$+ 30 70 0 100$ * at:Z:to_contained
+E >-+120$ s>-+120$- a+ 30 70 0 100$ * at:Z:to_container
+E <--120$ a- s<--120$- 30 70 0 100$ * at:Z:to_contained
+E >--120$ s>--120$- a- 30 70 0 100$ * at:Z:to_container
+E <++1211 a+ s<++1211+ 30 70 30 30 * at:Z:internal
+E >++1211 s>++1211+ a+ 30 70 30 30 * at:Z:internal
+E <+-1211 a+ s<+-1211- 30 70 30 30 * at:Z:internal
+E >+-1211 s>+-1211+ a- 30 70 30 30 * at:Z:internal
+E <-+1211 a- s<-+1211+ 30 70 30 30 * at:Z:internal
+E >-+1211 s>-+1211- a+ 30 70 30 30 * at:Z:internal
+E <--1211 a- s<--1211- 30 70 30 30 * at:Z:internal
+E >--1211 s>--1211- a- 30 70 30 30 * at:Z:internal
+E <++1212 a+ s<++1212+ 30 70 30 70 * at:Z:internal
+E >++1212 s>++1212+ a+ 30 70 30 70 * at:Z:internal
+E <+-1212 a+ s<+-1212- 30 70 30 70 * at:Z:internal
+E >+-1212 s>+-1212+ a- 30 70 30 70 * at:Z:internal
+E <-+1212 a- s<-+1212+ 30 70 30 70 * at:Z:internal
+E >-+1212 s>-+1212- a+ 30 70 30 70 * at:Z:internal
+E <--1212 a- s<--1212- 30 70 30 70 * at:Z:internal
+E >--1212 s>--1212- a- 30 70 30 70 * at:Z:internal
+E <++121$ a+ s<++121$+ 30 70 30 100$ * at:Z:internal
+E >++121$ s>++121$+ a+ 30 70 30 100$ * at:Z:internal
+E <+-121$ a+ s<+-121$- 30 70 30 100$ * at:Z:internal
+E >+-121$ s>+-121$+ a- 30 70 30 100$ * at:Z:internal
+E <-+121$ a- s<-+121$+ 30 70 30 100$ * at:Z:internal
+E >-+121$ s>-+121$- a+ 30 70 30 100$ * at:Z:internal
+E <--121$ a- s<--121$- 30 70 30 100$ * at:Z:internal
+E >--121$ s>--121$- a- 30 70 30 100$ * at:Z:internal
+E <++1222 a+ s<++1222+ 30 70 70 70 * at:Z:internal
+E >++1222 s>++1222+ a+ 30 70 70 70 * at:Z:internal
+E <+-1222 a+ s<+-1222- 30 70 70 70 * at:Z:internal
+E >+-1222 s>+-1222+ a- 30 70 70 70 * at:Z:internal
+E <-+1222 a- s<-+1222+ 30 70 70 70 * at:Z:internal
+E >-+1222 s>-+1222- a+ 30 70 70 70 * at:Z:internal
+E <--1222 a- s<--1222- 30 70 70 70 * at:Z:internal
+E >--1222 s>--1222- a- 30 70 70 70 * at:Z:internal
+E <++122$ a+ s<++122$+ 30 70 70 100$ * at:Z:internal
+E >++122$ s>++122$+ a+ 30 70 70 100$ * at:Z:internal
+E <+-122$ a+ s<+-122$- 30 70 70 100$ * at:Z:internal
+E >+-122$ s>+-122$+ a- 30 70 70 100$ * at:Z:internal
+E <-+122$ a- s<-+122$+ 30 70 70 100$ * at:Z:internal
+E >-+122$ s>-+122$- a+ 30 70 70 100$ * at:Z:internal
+E <--122$ a- s<--122$- 30 70 70 100$ * at:Z:internal
+E >--122$ s>--122$- a- 30 70 70 100$ * at:Z:internal
+E <++12$$ a+ s<++12$$+ 30 70 100$ 100$ * at:Z:internal
+E >++12$$ s>++12$$+ a+ 30 70 100$ 100$ * at:Z:internal
+E <+-12$$ a+ s<+-12$$- 30 70 100$ 100$ * at:Z:internal
+E >+-12$$ s>+-12$$+ a- 30 70 100$ 100$ * at:Z:internal
+E <-+12$$ a- s<-+12$$+ 30 70 100$ 100$ * at:Z:internal
+E >-+12$$ s>-+12$$- a+ 30 70 100$ 100$ * at:Z:internal
+E <--12$$ a- s<--12$$- 30 70 100$ 100$ * at:Z:internal
+E >--12$$ s>--12$$- a- 30 70 100$ 100$ * at:Z:internal
+E <++1$00 a+ s<++1$00+ 30 100$ 0 0 * at:Z:dovetail_R
+E >++1$00 s>++1$00+ a+ 30 100$ 0 0 * at:Z:dovetail_L
+E <+-1$00 a+ s<+-1$00- 30 100$ 0 0 * at:Z:internal
+E >+-1$00 s>+-1$00+ a- 30 100$ 0 0 * at:Z:internal
+E <-+1$00 a- s<-+1$00+ 30 100$ 0 0 * at:Z:internal
+E >-+1$00 s>-+1$00- a+ 30 100$ 0 0 * at:Z:internal
+E <--1$00 a- s<--1$00- 30 100$ 0 0 * at:Z:dovetail_R
+E >--1$00 s>--1$00- a- 30 100$ 0 0 * at:Z:dovetail_L
+E <++1$01 a+ s<++1$01+ 30 100$ 0 30 * at:Z:dovetail_R
+E >++1$01 s>++1$01+ a+ 30 100$ 0 30 * at:Z:dovetail_L
+E <+-1$01 a+ s<+-1$01- 30 100$ 0 30 * at:Z:internal
+E >+-1$01 s>+-1$01+ a- 30 100$ 0 30 * at:Z:internal
+E <-+1$01 a- s<-+1$01+ 30 100$ 0 30 * at:Z:internal
+E >-+1$01 s>-+1$01- a+ 30 100$ 0 30 * at:Z:internal
+E <--1$01 a- s<--1$01- 30 100$ 0 30 * at:Z:dovetail_R
+E >--1$01 s>--1$01- a- 30 100$ 0 30 * at:Z:dovetail_L
+E <++1$02 a+ s<++1$02+ 30 100$ 0 70 * at:Z:dovetail_R
+E >++1$02 s>++1$02+ a+ 30 100$ 0 70 * at:Z:dovetail_L
+E <+-1$02 a+ s<+-1$02- 30 100$ 0 70 * at:Z:internal
+E >+-1$02 s>+-1$02+ a- 30 100$ 0 70 * at:Z:internal
+E <-+1$02 a- s<-+1$02+ 30 100$ 0 70 * at:Z:internal
+E >-+1$02 s>-+1$02- a+ 30 100$ 0 70 * at:Z:internal
+E <--1$02 a- s<--1$02- 30 100$ 0 70 * at:Z:dovetail_R
+E >--1$02 s>--1$02- a- 30 100$ 0 70 * at:Z:dovetail_L
+E <++1$0$ a+ s<++1$0$+ 30 100$ 0 100$ * at:Z:to_contained
+E >++1$0$ s>++1$0$+ a+ 30 100$ 0 100$ * at:Z:to_container
+E <+-1$0$ a+ s<+-1$0$- 30 100$ 0 100$ * at:Z:to_contained
+E >+-1$0$ s>+-1$0$+ a- 30 100$ 0 100$ * at:Z:to_container
+E <-+1$0$ a- s<-+1$0$+ 30 100$ 0 100$ * at:Z:to_contained
+E >-+1$0$ s>-+1$0$- a+ 30 100$ 0 100$ * at:Z:to_container
+E <--1$0$ a- s<--1$0$- 30 100$ 0 100$ * at:Z:to_contained
+E >--1$0$ s>--1$0$- a- 30 100$ 0 100$ * at:Z:to_container
+E <++1$11 a+ s<++1$11+ 30 100$ 30 30 * at:Z:internal
+E >++1$11 s>++1$11+ a+ 30 100$ 30 30 * at:Z:internal
+E <+-1$11 a+ s<+-1$11- 30 100$ 30 30 * at:Z:internal
+E >+-1$11 s>+-1$11+ a- 30 100$ 30 30 * at:Z:internal
+E <-+1$11 a- s<-+1$11+ 30 100$ 30 30 * at:Z:internal
+E >-+1$11 s>-+1$11- a+ 30 100$ 30 30 * at:Z:internal
+E <--1$11 a- s<--1$11- 30 100$ 30 30 * at:Z:internal
+E >--1$11 s>--1$11- a- 30 100$ 30 30 * at:Z:internal
+E <++1$12 a+ s<++1$12+ 30 100$ 30 70 * at:Z:internal
+E >++1$12 s>++1$12+ a+ 30 100$ 30 70 * at:Z:internal
+E <+-1$12 a+ s<+-1$12- 30 100$ 30 70 * at:Z:internal
+E >+-1$12 s>+-1$12+ a- 30 100$ 30 70 * at:Z:internal
+E <-+1$12 a- s<-+1$12+ 30 100$ 30 70 * at:Z:internal
+E >-+1$12 s>-+1$12- a+ 30 100$ 30 70 * at:Z:internal
+E <--1$12 a- s<--1$12- 30 100$ 30 70 * at:Z:internal
+E >--1$12 s>--1$12- a- 30 100$ 30 70 * at:Z:internal
+E <++1$1$ a+ s<++1$1$+ 30 100$ 30 100$ * at:Z:internal
+E >++1$1$ s>++1$1$+ a+ 30 100$ 30 100$ * at:Z:internal
+E <+-1$1$ a+ s<+-1$1$- 30 100$ 30 100$ * at:Z:dovetail_R
+E >+-1$1$ s>+-1$1$+ a- 30 100$ 30 100$ * at:Z:dovetail_R
+E <-+1$1$ a- s<-+1$1$+ 30 100$ 30 100$ * at:Z:dovetail_R
+E >-+1$1$ s>-+1$1$- a+ 30 100$ 30 100$ * at:Z:dovetail_R
+E <--1$1$ a- s<--1$1$- 30 100$ 30 100$ * at:Z:internal
+E >--1$1$ s>--1$1$- a- 30 100$ 30 100$ * at:Z:internal
+E <++1$22 a+ s<++1$22+ 30 100$ 70 70 * at:Z:internal
+E >++1$22 s>++1$22+ a+ 30 100$ 70 70 * at:Z:internal
+E <+-1$22 a+ s<+-1$22- 30 100$ 70 70 * at:Z:internal
+E >+-1$22 s>+-1$22+ a- 30 100$ 70 70 * at:Z:internal
+E <-+1$22 a- s<-+1$22+ 30 100$ 70 70 * at:Z:internal
+E >-+1$22 s>-+1$22- a+ 30 100$ 70 70 * at:Z:internal
+E <--1$22 a- s<--1$22- 30 100$ 70 70 * at:Z:internal
+E >--1$22 s>--1$22- a- 30 100$ 70 70 * at:Z:internal
+E <++1$2$ a+ s<++1$2$+ 30 100$ 70 100$ * at:Z:internal
+E >++1$2$ s>++1$2$+ a+ 30 100$ 70 100$ * at:Z:internal
+E <+-1$2$ a+ s<+-1$2$- 30 100$ 70 100$ * at:Z:dovetail_R
+E >+-1$2$ s>+-1$2$+ a- 30 100$ 70 100$ * at:Z:dovetail_R
+E <-+1$2$ a- s<-+1$2$+ 30 100$ 70 100$ * at:Z:dovetail_R
+E >-+1$2$ s>-+1$2$- a+ 30 100$ 70 100$ * at:Z:dovetail_R
+E <--1$2$ a- s<--1$2$- 30 100$ 70 100$ * at:Z:internal
+E >--1$2$ s>--1$2$- a- 30 100$ 70 100$ * at:Z:internal
+E <++1$$$ a+ s<++1$$$+ 30 100$ 100$ 100$ * at:Z:internal
+E >++1$$$ s>++1$$$+ a+ 30 100$ 100$ 100$ * at:Z:internal
+E <+-1$$$ a+ s<+-1$$$- 30 100$ 100$ 100$ * at:Z:dovetail_R
+E >+-1$$$ s>+-1$$$+ a- 30 100$ 100$ 100$ * at:Z:dovetail_R
+E <-+1$$$ a- s<-+1$$$+ 30 100$ 100$ 100$ * at:Z:dovetail_R
+E >-+1$$$ s>-+1$$$- a+ 30 100$ 100$ 100$ * at:Z:dovetail_R
+E <--1$$$ a- s<--1$$$- 30 100$ 100$ 100$ * at:Z:internal
+E >--1$$$ s>--1$$$- a- 30 100$ 100$ 100$ * at:Z:internal
+E <++2200 a+ s<++2200+ 70 70 0 0 * at:Z:internal
+E >++2200 s>++2200+ a+ 70 70 0 0 * at:Z:internal
+E <+-2200 a+ s<+-2200- 70 70 0 0 * at:Z:internal
+E >+-2200 s>+-2200+ a- 70 70 0 0 * at:Z:internal
+E <-+2200 a- s<-+2200+ 70 70 0 0 * at:Z:internal
+E >-+2200 s>-+2200- a+ 70 70 0 0 * at:Z:internal
+E <--2200 a- s<--2200- 70 70 0 0 * at:Z:internal
+E >--2200 s>--2200- a- 70 70 0 0 * at:Z:internal
+E <++2201 a+ s<++2201+ 70 70 0 30 * at:Z:internal
+E >++2201 s>++2201+ a+ 70 70 0 30 * at:Z:internal
+E <+-2201 a+ s<+-2201- 70 70 0 30 * at:Z:internal
+E >+-2201 s>+-2201+ a- 70 70 0 30 * at:Z:internal
+E <-+2201 a- s<-+2201+ 70 70 0 30 * at:Z:internal
+E >-+2201 s>-+2201- a+ 70 70 0 30 * at:Z:internal
+E <--2201 a- s<--2201- 70 70 0 30 * at:Z:internal
+E >--2201 s>--2201- a- 70 70 0 30 * at:Z:internal
+E <++2202 a+ s<++2202+ 70 70 0 70 * at:Z:internal
+E >++2202 s>++2202+ a+ 70 70 0 70 * at:Z:internal
+E <+-2202 a+ s<+-2202- 70 70 0 70 * at:Z:internal
+E >+-2202 s>+-2202+ a- 70 70 0 70 * at:Z:internal
+E <-+2202 a- s<-+2202+ 70 70 0 70 * at:Z:internal
+E >-+2202 s>-+2202- a+ 70 70 0 70 * at:Z:internal
+E <--2202 a- s<--2202- 70 70 0 70 * at:Z:internal
+E >--2202 s>--2202- a- 70 70 0 70 * at:Z:internal
+E <++220$ a+ s<++220$+ 70 70 0 100$ * at:Z:to_contained
+E >++220$ s>++220$+ a+ 70 70 0 100$ * at:Z:to_container
+E <+-220$ a+ s<+-220$- 70 70 0 100$ * at:Z:to_contained
+E >+-220$ s>+-220$+ a- 70 70 0 100$ * at:Z:to_container
+E <-+220$ a- s<-+220$+ 70 70 0 100$ * at:Z:to_contained
+E >-+220$ s>-+220$- a+ 70 70 0 100$ * at:Z:to_container
+E <--220$ a- s<--220$- 70 70 0 100$ * at:Z:to_contained
+E >--220$ s>--220$- a- 70 70 0 100$ * at:Z:to_container
+E <++2211 a+ s<++2211+ 70 70 30 30 * at:Z:internal
+E >++2211 s>++2211+ a+ 70 70 30 30 * at:Z:internal
+E <+-2211 a+ s<+-2211- 70 70 30 30 * at:Z:internal
+E >+-2211 s>+-2211+ a- 70 70 30 30 * at:Z:internal
+E <-+2211 a- s<-+2211+ 70 70 30 30 * at:Z:internal
+E >-+2211 s>-+2211- a+ 70 70 30 30 * at:Z:internal
+E <--2211 a- s<--2211- 70 70 30 30 * at:Z:internal
+E >--2211 s>--2211- a- 70 70 30 30 * at:Z:internal
+E <++2212 a+ s<++2212+ 70 70 30 70 * at:Z:internal
+E >++2212 s>++2212+ a+ 70 70 30 70 * at:Z:internal
+E <+-2212 a+ s<+-2212- 70 70 30 70 * at:Z:internal
+E >+-2212 s>+-2212+ a- 70 70 30 70 * at:Z:internal
+E <-+2212 a- s<-+2212+ 70 70 30 70 * at:Z:internal
+E >-+2212 s>-+2212- a+ 70 70 30 70 * at:Z:internal
+E <--2212 a- s<--2212- 70 70 30 70 * at:Z:internal
+E >--2212 s>--2212- a- 70 70 30 70 * at:Z:internal
+E <++221$ a+ s<++221$+ 70 70 30 100$ * at:Z:internal
+E >++221$ s>++221$+ a+ 70 70 30 100$ * at:Z:internal
+E <+-221$ a+ s<+-221$- 70 70 30 100$ * at:Z:internal
+E >+-221$ s>+-221$+ a- 70 70 30 100$ * at:Z:internal
+E <-+221$ a- s<-+221$+ 70 70 30 100$ * at:Z:internal
+E >-+221$ s>-+221$- a+ 70 70 30 100$ * at:Z:internal
+E <--221$ a- s<--221$- 70 70 30 100$ * at:Z:internal
+E >--221$ s>--221$- a- 70 70 30 100$ * at:Z:internal
+E <++2222 a+ s<++2222+ 70 70 70 70 * at:Z:internal
+E >++2222 s>++2222+ a+ 70 70 70 70 * at:Z:internal
+E <+-2222 a+ s<+-2222- 70 70 70 70 * at:Z:internal
+E >+-2222 s>+-2222+ a- 70 70 70 70 * at:Z:internal
+E <-+2222 a- s<-+2222+ 70 70 70 70 * at:Z:internal
+E >-+2222 s>-+2222- a+ 70 70 70 70 * at:Z:internal
+E <--2222 a- s<--2222- 70 70 70 70 * at:Z:internal
+E >--2222 s>--2222- a- 70 70 70 70 * at:Z:internal
+E <++222$ a+ s<++222$+ 70 70 70 100$ * at:Z:internal
+E >++222$ s>++222$+ a+ 70 70 70 100$ * at:Z:internal
+E <+-222$ a+ s<+-222$- 70 70 70 100$ * at:Z:internal
+E >+-222$ s>+-222$+ a- 70 70 70 100$ * at:Z:internal
+E <-+222$ a- s<-+222$+ 70 70 70 100$ * at:Z:internal
+E >-+222$ s>-+222$- a+ 70 70 70 100$ * at:Z:internal
+E <--222$ a- s<--222$- 70 70 70 100$ * at:Z:internal
+E >--222$ s>--222$- a- 70 70 70 100$ * at:Z:internal
+E <++22$$ a+ s<++22$$+ 70 70 100$ 100$ * at:Z:internal
+E >++22$$ s>++22$$+ a+ 70 70 100$ 100$ * at:Z:internal
+E <+-22$$ a+ s<+-22$$- 70 70 100$ 100$ * at:Z:internal
+E >+-22$$ s>+-22$$+ a- 70 70 100$ 100$ * at:Z:internal
+E <-+22$$ a- s<-+22$$+ 70 70 100$ 100$ * at:Z:internal
+E >-+22$$ s>-+22$$- a+ 70 70 100$ 100$ * at:Z:internal
+E <--22$$ a- s<--22$$- 70 70 100$ 100$ * at:Z:internal
+E >--22$$ s>--22$$- a- 70 70 100$ 100$ * at:Z:internal
+E <++2$00 a+ s<++2$00+ 70 100$ 0 0 * at:Z:dovetail_R
+E >++2$00 s>++2$00+ a+ 70 100$ 0 0 * at:Z:dovetail_L
+E <+-2$00 a+ s<+-2$00- 70 100$ 0 0 * at:Z:internal
+E >+-2$00 s>+-2$00+ a- 70 100$ 0 0 * at:Z:internal
+E <-+2$00 a- s<-+2$00+ 70 100$ 0 0 * at:Z:internal
+E >-+2$00 s>-+2$00- a+ 70 100$ 0 0 * at:Z:internal
+E <--2$00 a- s<--2$00- 70 100$ 0 0 * at:Z:dovetail_R
+E >--2$00 s>--2$00- a- 70 100$ 0 0 * at:Z:dovetail_L
+E <++2$01 a+ s<++2$01+ 70 100$ 0 30 * at:Z:dovetail_R
+E >++2$01 s>++2$01+ a+ 70 100$ 0 30 * at:Z:dovetail_L
+E <+-2$01 a+ s<+-2$01- 70 100$ 0 30 * at:Z:internal
+E >+-2$01 s>+-2$01+ a- 70 100$ 0 30 * at:Z:internal
+E <-+2$01 a- s<-+2$01+ 70 100$ 0 30 * at:Z:internal
+E >-+2$01 s>-+2$01- a+ 70 100$ 0 30 * at:Z:internal
+E <--2$01 a- s<--2$01- 70 100$ 0 30 * at:Z:dovetail_R
+E >--2$01 s>--2$01- a- 70 100$ 0 30 * at:Z:dovetail_L
+E <++2$02 a+ s<++2$02+ 70 100$ 0 70 * at:Z:dovetail_R
+E >++2$02 s>++2$02+ a+ 70 100$ 0 70 * at:Z:dovetail_L
+E <+-2$02 a+ s<+-2$02- 70 100$ 0 70 * at:Z:internal
+E >+-2$02 s>+-2$02+ a- 70 100$ 0 70 * at:Z:internal
+E <-+2$02 a- s<-+2$02+ 70 100$ 0 70 * at:Z:internal
+E >-+2$02 s>-+2$02- a+ 70 100$ 0 70 * at:Z:internal
+E <--2$02 a- s<--2$02- 70 100$ 0 70 * at:Z:dovetail_R
+E >--2$02 s>--2$02- a- 70 100$ 0 70 * at:Z:dovetail_L
+E <++2$0$ a+ s<++2$0$+ 70 100$ 0 100$ * at:Z:to_contained
+E >++2$0$ s>++2$0$+ a+ 70 100$ 0 100$ * at:Z:to_container
+E <+-2$0$ a+ s<+-2$0$- 70 100$ 0 100$ * at:Z:to_contained
+E >+-2$0$ s>+-2$0$+ a- 70 100$ 0 100$ * at:Z:to_container
+E <-+2$0$ a- s<-+2$0$+ 70 100$ 0 100$ * at:Z:to_contained
+E >-+2$0$ s>-+2$0$- a+ 70 100$ 0 100$ * at:Z:to_container
+E <--2$0$ a- s<--2$0$- 70 100$ 0 100$ * at:Z:to_contained
+E >--2$0$ s>--2$0$- a- 70 100$ 0 100$ * at:Z:to_container
+E <++2$11 a+ s<++2$11+ 70 100$ 30 30 * at:Z:internal
+E >++2$11 s>++2$11+ a+ 70 100$ 30 30 * at:Z:internal
+E <+-2$11 a+ s<+-2$11- 70 100$ 30 30 * at:Z:internal
+E >+-2$11 s>+-2$11+ a- 70 100$ 30 30 * at:Z:internal
+E <-+2$11 a- s<-+2$11+ 70 100$ 30 30 * at:Z:internal
+E >-+2$11 s>-+2$11- a+ 70 100$ 30 30 * at:Z:internal
+E <--2$11 a- s<--2$11- 70 100$ 30 30 * at:Z:internal
+E >--2$11 s>--2$11- a- 70 100$ 30 30 * at:Z:internal
+E <++2$12 a+ s<++2$12+ 70 100$ 30 70 * at:Z:internal
+E >++2$12 s>++2$12+ a+ 70 100$ 30 70 * at:Z:internal
+E <+-2$12 a+ s<+-2$12- 70 100$ 30 70 * at:Z:internal
+E >+-2$12 s>+-2$12+ a- 70 100$ 30 70 * at:Z:internal
+E <-+2$12 a- s<-+2$12+ 70 100$ 30 70 * at:Z:internal
+E >-+2$12 s>-+2$12- a+ 70 100$ 30 70 * at:Z:internal
+E <--2$12 a- s<--2$12- 70 100$ 30 70 * at:Z:internal
+E >--2$12 s>--2$12- a- 70 100$ 30 70 * at:Z:internal
+E <++2$1$ a+ s<++2$1$+ 70 100$ 30 100$ * at:Z:internal
+E >++2$1$ s>++2$1$+ a+ 70 100$ 30 100$ * at:Z:internal
+E <+-2$1$ a+ s<+-2$1$- 70 100$ 30 100$ * at:Z:dovetail_R
+E >+-2$1$ s>+-2$1$+ a- 70 100$ 30 100$ * at:Z:dovetail_R
+E <-+2$1$ a- s<-+2$1$+ 70 100$ 30 100$ * at:Z:dovetail_R
+E >-+2$1$ s>-+2$1$- a+ 70 100$ 30 100$ * at:Z:dovetail_R
+E <--2$1$ a- s<--2$1$- 70 100$ 30 100$ * at:Z:internal
+E >--2$1$ s>--2$1$- a- 70 100$ 30 100$ * at:Z:internal
+E <++2$22 a+ s<++2$22+ 70 100$ 70 70 * at:Z:internal
+E >++2$22 s>++2$22+ a+ 70 100$ 70 70 * at:Z:internal
+E <+-2$22 a+ s<+-2$22- 70 100$ 70 70 * at:Z:internal
+E >+-2$22 s>+-2$22+ a- 70 100$ 70 70 * at:Z:internal
+E <-+2$22 a- s<-+2$22+ 70 100$ 70 70 * at:Z:internal
+E >-+2$22 s>-+2$22- a+ 70 100$ 70 70 * at:Z:internal
+E <--2$22 a- s<--2$22- 70 100$ 70 70 * at:Z:internal
+E >--2$22 s>--2$22- a- 70 100$ 70 70 * at:Z:internal
+E <++2$2$ a+ s<++2$2$+ 70 100$ 70 100$ * at:Z:internal
+E >++2$2$ s>++2$2$+ a+ 70 100$ 70 100$ * at:Z:internal
+E <+-2$2$ a+ s<+-2$2$- 70 100$ 70 100$ * at:Z:dovetail_R
+E >+-2$2$ s>+-2$2$+ a- 70 100$ 70 100$ * at:Z:dovetail_R
+E <-+2$2$ a- s<-+2$2$+ 70 100$ 70 100$ * at:Z:dovetail_R
+E >-+2$2$ s>-+2$2$- a+ 70 100$ 70 100$ * at:Z:dovetail_R
+E <--2$2$ a- s<--2$2$- 70 100$ 70 100$ * at:Z:internal
+E >--2$2$ s>--2$2$- a- 70 100$ 70 100$ * at:Z:internal
+E <++2$$$ a+ s<++2$$$+ 70 100$ 100$ 100$ * at:Z:internal
+E >++2$$$ s>++2$$$+ a+ 70 100$ 100$ 100$ * at:Z:internal
+E <+-2$$$ a+ s<+-2$$$- 70 100$ 100$ 100$ * at:Z:dovetail_R
+E >+-2$$$ s>+-2$$$+ a- 70 100$ 100$ 100$ * at:Z:dovetail_R
+E <-+2$$$ a- s<-+2$$$+ 70 100$ 100$ 100$ * at:Z:dovetail_R
+E >-+2$$$ s>-+2$$$- a+ 70 100$ 100$ 100$ * at:Z:dovetail_R
+E <--2$$$ a- s<--2$$$- 70 100$ 100$ 100$ * at:Z:internal
+E >--2$$$ s>--2$$$- a- 70 100$ 100$ 100$ * at:Z:internal
+E <++$$00 a+ s<++$$00+ 100$ 100$ 0 0 * at:Z:dovetail_R
+E >++$$00 s>++$$00+ a+ 100$ 100$ 0 0 * at:Z:dovetail_L
+E <+-$$00 a+ s<+-$$00- 100$ 100$ 0 0 * at:Z:internal
+E >+-$$00 s>+-$$00+ a- 100$ 100$ 0 0 * at:Z:internal
+E <-+$$00 a- s<-+$$00+ 100$ 100$ 0 0 * at:Z:internal
+E >-+$$00 s>-+$$00- a+ 100$ 100$ 0 0 * at:Z:internal
+E <--$$00 a- s<--$$00- 100$ 100$ 0 0 * at:Z:dovetail_R
+E >--$$00 s>--$$00- a- 100$ 100$ 0 0 * at:Z:dovetail_L
+E <++$$01 a+ s<++$$01+ 100$ 100$ 0 30 * at:Z:dovetail_R
+E >++$$01 s>++$$01+ a+ 100$ 100$ 0 30 * at:Z:dovetail_L
+E <+-$$01 a+ s<+-$$01- 100$ 100$ 0 30 * at:Z:internal
+E >+-$$01 s>+-$$01+ a- 100$ 100$ 0 30 * at:Z:internal
+E <-+$$01 a- s<-+$$01+ 100$ 100$ 0 30 * at:Z:internal
+E >-+$$01 s>-+$$01- a+ 100$ 100$ 0 30 * at:Z:internal
+E <--$$01 a- s<--$$01- 100$ 100$ 0 30 * at:Z:dovetail_R
+E >--$$01 s>--$$01- a- 100$ 100$ 0 30 * at:Z:dovetail_L
+E <++$$02 a+ s<++$$02+ 100$ 100$ 0 70 * at:Z:dovetail_R
+E >++$$02 s>++$$02+ a+ 100$ 100$ 0 70 * at:Z:dovetail_L
+E <+-$$02 a+ s<+-$$02- 100$ 100$ 0 70 * at:Z:internal
+E >+-$$02 s>+-$$02+ a- 100$ 100$ 0 70 * at:Z:internal
+E <-+$$02 a- s<-+$$02+ 100$ 100$ 0 70 * at:Z:internal
+E >-+$$02 s>-+$$02- a+ 100$ 100$ 0 70 * at:Z:internal
+E <--$$02 a- s<--$$02- 100$ 100$ 0 70 * at:Z:dovetail_R
+E >--$$02 s>--$$02- a- 100$ 100$ 0 70 * at:Z:dovetail_L
+E <++$$0$ a+ s<++$$0$+ 100$ 100$ 0 100$ * at:Z:to_contained
+E >++$$0$ s>++$$0$+ a+ 100$ 100$ 0 100$ * at:Z:to_container
+E <+-$$0$ a+ s<+-$$0$- 100$ 100$ 0 100$ * at:Z:to_contained
+E >+-$$0$ s>+-$$0$+ a- 100$ 100$ 0 100$ * at:Z:to_container
+E <-+$$0$ a- s<-+$$0$+ 100$ 100$ 0 100$ * at:Z:to_contained
+E >-+$$0$ s>-+$$0$- a+ 100$ 100$ 0 100$ * at:Z:to_container
+E <--$$0$ a- s<--$$0$- 100$ 100$ 0 100$ * at:Z:to_contained
+E >--$$0$ s>--$$0$- a- 100$ 100$ 0 100$ * at:Z:to_container
+E <++$$11 a+ s<++$$11+ 100$ 100$ 30 30 * at:Z:internal
+E >++$$11 s>++$$11+ a+ 100$ 100$ 30 30 * at:Z:internal
+E <+-$$11 a+ s<+-$$11- 100$ 100$ 30 30 * at:Z:internal
+E >+-$$11 s>+-$$11+ a- 100$ 100$ 30 30 * at:Z:internal
+E <-+$$11 a- s<-+$$11+ 100$ 100$ 30 30 * at:Z:internal
+E >-+$$11 s>-+$$11- a+ 100$ 100$ 30 30 * at:Z:internal
+E <--$$11 a- s<--$$11- 100$ 100$ 30 30 * at:Z:internal
+E >--$$11 s>--$$11- a- 100$ 100$ 30 30 * at:Z:internal
+E <++$$12 a+ s<++$$12+ 100$ 100$ 30 70 * at:Z:internal
+E >++$$12 s>++$$12+ a+ 100$ 100$ 30 70 * at:Z:internal
+E <+-$$12 a+ s<+-$$12- 100$ 100$ 30 70 * at:Z:internal
+E >+-$$12 s>+-$$12+ a- 100$ 100$ 30 70 * at:Z:internal
+E <-+$$12 a- s<-+$$12+ 100$ 100$ 30 70 * at:Z:internal
+E >-+$$12 s>-+$$12- a+ 100$ 100$ 30 70 * at:Z:internal
+E <--$$12 a- s<--$$12- 100$ 100$ 30 70 * at:Z:internal
+E >--$$12 s>--$$12- a- 100$ 100$ 30 70 * at:Z:internal
+E <++$$1$ a+ s<++$$1$+ 100$ 100$ 30 100$ * at:Z:internal
+E >++$$1$ s>++$$1$+ a+ 100$ 100$ 30 100$ * at:Z:internal
+E <+-$$1$ a+ s<+-$$1$- 100$ 100$ 30 100$ * at:Z:dovetail_R
+E >+-$$1$ s>+-$$1$+ a- 100$ 100$ 30 100$ * at:Z:dovetail_R
+E <-+$$1$ a- s<-+$$1$+ 100$ 100$ 30 100$ * at:Z:dovetail_R
+E >-+$$1$ s>-+$$1$- a+ 100$ 100$ 30 100$ * at:Z:dovetail_R
+E <--$$1$ a- s<--$$1$- 100$ 100$ 30 100$ * at:Z:internal
+E >--$$1$ s>--$$1$- a- 100$ 100$ 30 100$ * at:Z:internal
+E <++$$22 a+ s<++$$22+ 100$ 100$ 70 70 * at:Z:internal
+E >++$$22 s>++$$22+ a+ 100$ 100$ 70 70 * at:Z:internal
+E <+-$$22 a+ s<+-$$22- 100$ 100$ 70 70 * at:Z:internal
+E >+-$$22 s>+-$$22+ a- 100$ 100$ 70 70 * at:Z:internal
+E <-+$$22 a- s<-+$$22+ 100$ 100$ 70 70 * at:Z:internal
+E >-+$$22 s>-+$$22- a+ 100$ 100$ 70 70 * at:Z:internal
+E <--$$22 a- s<--$$22- 100$ 100$ 70 70 * at:Z:internal
+E >--$$22 s>--$$22- a- 100$ 100$ 70 70 * at:Z:internal
+E <++$$2$ a+ s<++$$2$+ 100$ 100$ 70 100$ * at:Z:internal
+E >++$$2$ s>++$$2$+ a+ 100$ 100$ 70 100$ * at:Z:internal
+E <+-$$2$ a+ s<+-$$2$- 100$ 100$ 70 100$ * at:Z:dovetail_R
+E >+-$$2$ s>+-$$2$+ a- 100$ 100$ 70 100$ * at:Z:dovetail_R
+E <-+$$2$ a- s<-+$$2$+ 100$ 100$ 70 100$ * at:Z:dovetail_R
+E >-+$$2$ s>-+$$2$- a+ 100$ 100$ 70 100$ * at:Z:dovetail_R
+E <--$$2$ a- s<--$$2$- 100$ 100$ 70 100$ * at:Z:internal
+E >--$$2$ s>--$$2$- a- 100$ 100$ 70 100$ * at:Z:internal
+E <++$$$$ a+ s<++$$$$+ 100$ 100$ 100$ 100$ * at:Z:internal
+E >++$$$$ s>++$$$$+ a+ 100$ 100$ 100$ 100$ * at:Z:internal
+E <+-$$$$ a+ s<+-$$$$- 100$ 100$ 100$ 100$ * at:Z:dovetail_R
+E >+-$$$$ s>+-$$$$+ a- 100$ 100$ 100$ 100$ * at:Z:dovetail_R
+E <-+$$$$ a- s<-+$$$$+ 100$ 100$ 100$ 100$ * at:Z:dovetail_R
+E >-+$$$$ s>-+$$$$- a+ 100$ 100$ 100$ 100$ * at:Z:dovetail_R
+E <--$$$$ a- s<--$$$$- 100$ 100$ 100$ 100$ * at:Z:internal
+E >--$$$$ s>--$$$$- a- 100$ 100$ 100$ 100$ * at:Z:internal
diff --git a/tests/testdata/invalid/edge_missing.gfa2 b/tests/testdata/invalid/edge_missing.gfa2
new file mode 100644
index 0000000..8b40e8d
--- /dev/null
+++ b/tests/testdata/invalid/edge_missing.gfa2
@@ -0,0 +1,32 @@
+# File used for the collections test
+# similar but NOT equivalent to the gfa1 file!
+S 1 122 *
+S 3 29 TGCTAGCTGACTGTCGATGCTGTGTG
+E 1_to_2 1+ 2+ 110 122$ 0 12 12M
+S 5 130 *
+S 13 150 *
+E 2_to_6 2+ 6+ 0 122$ 10 132 122M
+O 14 11+ 12+
+S 11 140 * xx:i:11
+F 2 read1+ 0 42 12 55 * id:Z:read1_in_2
+F 2 read2+ 45 62 0 18 * id:Z:read2_in_2
+U 16 1 3 15 2_to_6 16sub
+H ac:Z:test2
+# another comment
+S 12 150 *
+S 4 120 *
+H VN:Z:2.0
+E 1_to_3 1+ 3+ 112 122$ 0 12 10M
+G 1_to_11 1+ 11- 120 *
+E 11_to_12 11+ 12+ 18 140$ 0 122 122M
+S 6 150 *
+X custom_record xx:Z:testtag
+X custom_record X2
+G 2_to_12 2- 12+ 500 50
+O 15 11+ 11_to_13+ 13+ xx:i:-1
+Y another_custom_record
+U 16sub 2 3
+S 2 120 * xx:Z:sometag
+H aa:i:12 ab:Z:test1
+H aa:i:15
+E 1_to_5 1+ 5+ 0 122$ 2 124 * zz:Z:tag
diff --git a/tests/testdata/invalid/edge_wrong_lastpos.gfa2 b/tests/testdata/invalid/edge_wrong_lastpos.gfa2
new file mode 100644
index 0000000..90cd30b
--- /dev/null
+++ b/tests/testdata/invalid/edge_wrong_lastpos.gfa2
@@ -0,0 +1,12 @@
+H VN:Z:2.0
+H ul:Z:https://github.com/sjackman/assembly-graph/blob/master/sample.gfa
+S 1 8 CGATGCAA
+S 2 10 TGCAAAGTAC
+S 3 21 TGCAACGTATAGACTTGTCAC RC:i:4
+S 4 7 GCATATA
+S 5 8 CGATGATA
+S 6 4 ATGA
+E * 1+ 2+ 3 9$ 0 5 5M
+E * 3+ 2+ 21$ 21$ 0 0 0M
+E * 3+ 4- 17 21$ 3 7$ 1M1D2M
+E * 4- 5+ 0 0 0 0 0M
diff --git a/tests/testdata/invalid/fragment_wrong_lastpos.gfa2 b/tests/testdata/invalid/fragment_wrong_lastpos.gfa2
new file mode 100644
index 0000000..9018c52
--- /dev/null
+++ b/tests/testdata/invalid/fragment_wrong_lastpos.gfa2
@@ -0,0 +1,33 @@
+# File used for the collections test
+# similar but NOT equivalent to the gfa1 file!
+S 1 122 *
+S 3 29 TGCTAGCTGACTGTCGATGCTGTGTG
+E 1_to_2 1+ 2+ 110 122$ 0 12 12M
+S 5 130 *
+S 13 150 *
+E 2_to_6 2+ 6+ 0 122$ 10 132 122M
+O 14 11+ 12+
+S 11 140 * xx:i:11
+F 3 read1+ 0 42$ 12 55 * id:Z:read1_in_3
+F 2 read2+ 45 62 0 18 * id:Z:read2_in_2
+U 16 1 3 15 2_to_6 16sub
+H ac:Z:test2
+# another comment
+S 12 150 *
+S 4 120 *
+H VN:Z:2.0
+E 1_to_3 1+ 3+ 112 122$ 0 12 10M
+G 1_to_11 1+ 11- 120 *
+E 11_to_12 11+ 12+ 18 140$ 0 122 122M
+S 6 150 *
+X custom_record xx:Z:testtag
+X custom_record X2
+E 11_to_13 11+ 13+ 20 140$ 0 120 120M
+G 2_to_12 2- 12+ 500 50
+O 15 11+ 11_to_13+ 13+ xx:i:-1
+Y another_custom_record
+U 16sub 2 3
+S 2 120 * xx:Z:sometag
+H aa:i:12 ab:Z:test1
+H aa:i:15
+E 1_to_5 1+ 5+ 0 122$ 2 124 * zz:Z:tag
diff --git a/tests/testdata/invalid/inconsistent_length.gfa1 b/tests/testdata/invalid/inconsistent_length.gfa1
new file mode 100644
index 0000000..ad912e0
--- /dev/null
+++ b/tests/testdata/invalid/inconsistent_length.gfa1
@@ -0,0 +1,12 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/sjackman/assembly-graph/blob/master/sample.gfa
+S 1 CGATGCAA LN:i:12
+S 2 TGCAAAGTAC
+S 3 TGCAACGTATAGACTTGTCAC RC:i:4
+S 4 GCATATA
+S 5 CGATGATA
+S 6 ATGA
+L 1 + 2 + 5M
+L 3 + 2 + 0M
+L 3 + 4 - 1M1D2M1S
+L 4 - 5 + 0M
diff --git a/tests/testdata/invalid/link_missing.gfa1 b/tests/testdata/invalid/link_missing.gfa1
new file mode 100644
index 0000000..4c9a490
--- /dev/null
+++ b/tests/testdata/invalid/link_missing.gfa1
@@ -0,0 +1,21 @@
+# File used for the collections test
+S 1 *
+S 3 CGATGCTAGCTGACTGTCGATGCTGTGTG
+L 1 + 2 + 12M ID:Z:1_to_2
+S 5 *
+S 13 *
+C 2 + 6 + 10 122M ID:Z:2_to_6
+P 14 11+,12+ 122M
+S 11 *
+H ac:Z:test2
+S 12 *
+S 4 *
+H VN:Z:1.0
+L 1 + 3 + 12M ID:Z:1_to_3
+S 6 *
+L 11 + 13 + 120M ID:Z:11_to_13
+P 15 11+,13+ 120M
+S 2 * xx:Z:sometag
+H aa:i:12 ab:Z:test1
+H aa:i:15
+C 1 + 5 + 12 120M ID:Z:1_to_5
diff --git a/tests/testdata/invalid/segment_missing.gfa1 b/tests/testdata/invalid/segment_missing.gfa1
new file mode 100644
index 0000000..e417141
--- /dev/null
+++ b/tests/testdata/invalid/segment_missing.gfa1
@@ -0,0 +1,21 @@
+# comment
+S 3 CGATGCTAGCTGACTGTCGATGCTGTGTG
+L 1 + 2 + 12M ID:Z:1_to_2
+S 5 *
+S 13 *
+C 2 + 6 + 10 122M ID:Z:2_to_6
+P 14 11+,12+ 122M
+S 11 *
+H ac:Z:test2
+S 12 *
+S 4 *
+H VN:Z:1.0
+L 1 + 3 + 12M ID:Z:1_to_3
+L 11 + 12 + 122M ID:Z:11_to_12
+S 6 *
+L 11 + 13 + 120M ID:Z:11_to_13
+P 15 11+,13+ 120M
+S 2 * xx:Z:sometag
+H aa:i:12 ab:Z:test1
+H aa:i:15
+C 1 + 5 + 12 120M ID:Z:1_to_5
diff --git a/tests/testdata/invalid/segment_missing.gfa2 b/tests/testdata/invalid/segment_missing.gfa2
new file mode 100644
index 0000000..42247aa
--- /dev/null
+++ b/tests/testdata/invalid/segment_missing.gfa2
@@ -0,0 +1,32 @@
+# File used for the collections test
+# similar but NOT equivalent to the gfa1 file!
+S 3 29 TGCTAGCTGACTGTCGATGCTGTGTG
+E 1_to_2 1+ 2+ 110 122$ 0 12 12M
+S 5 130 *
+S 13 150 *
+E 2_to_6 2+ 6+ 0 122$ 10 132 122M
+O 14 11+ 12+
+S 11 140 * xx:i:11
+F 2 read1+ 0 42 12 55 * id:Z:read1_in_2
+F 2 read2+ 45 62 0 18 * id:Z:read2_in_2
+U 16 1 3 15 2_to_6 16sub
+H ac:Z:test2
+# another comment
+S 12 150 *
+S 4 120 *
+H VN:Z:2.0
+E 1_to_3 1+ 3+ 112 122$ 0 12 10M
+G 1_to_11 1+ 11- 120 *
+E 11_to_12 11+ 12+ 18 140$ 0 122 122M
+S 6 150 *
+X custom_record xx:Z:testtag
+X custom_record X2
+E 11_to_13 11+ 13+ 20 140$ 0 120 120M
+G 2_to_12 2- 12+ 500 50
+O 15 11+ 11_to_13+ 13+ xx:i:-1
+Y another_custom_record
+U 16sub 2 3
+S 2 120 * xx:Z:sometag
+H aa:i:12 ab:Z:test1
+H aa:i:15
+E 1_to_5 1+ 5+ 0 122$ 2 124 * zz:Z:tag
diff --git a/tests/testdata/linear_merging.1.gfa b/tests/testdata/linear_merging.1.gfa
new file mode 100644
index 0000000..3324cb8
--- /dev/null
+++ b/tests/testdata/linear_merging.1.gfa
@@ -0,0 +1,8 @@
+H VN:Z:1.0
+S 0 ACGA
+S 1 ACGA
+S 2 ACGA
+S 3 ACGA
+L 0 + 1 + 1M
+L 1 + 2 - 1M
+L 2 + 3 + 1M
diff --git a/tests/testdata/linear_merging.1.gfa2 b/tests/testdata/linear_merging.1.gfa2
new file mode 100644
index 0000000..93c8a2d
--- /dev/null
+++ b/tests/testdata/linear_merging.1.gfa2
@@ -0,0 +1,8 @@
+H VN:Z:2.0
+S 0 4 ACGA
+S 1 4 ACGA
+S 2 4 ACGA
+S 3 4 ACGA
+E * 0+ 1+ 3 4$ 0 1 1M
+E * 1+ 2- 3 4$ 3 4$ 1M
+E * 2+ 3+ 3 4$ 0 1 1M
diff --git a/tests/testdata/linear_merging.2.gfa b/tests/testdata/linear_merging.2.gfa
new file mode 100644
index 0000000..916ac50
--- /dev/null
+++ b/tests/testdata/linear_merging.2.gfa
@@ -0,0 +1,8 @@
+H VN:Z:1.0
+S 0 ACGA
+S 1 ACGA
+S 2 ACGT
+S 3 TCGA
+L 0 + 1 + 1M
+L 1 + 2 - 1M
+L 2 - 3 + 1M
diff --git a/tests/testdata/linear_merging.2.gfa2 b/tests/testdata/linear_merging.2.gfa2
new file mode 100644
index 0000000..0da9437
--- /dev/null
+++ b/tests/testdata/linear_merging.2.gfa2
@@ -0,0 +1,8 @@
+H VN:Z:2.0
+S 0 4 ACGA
+S 1 4 ACGA
+S 2 4 ACGT
+S 3 4 TCGA
+E * 0+ 1+ 3 4$ 0 1 1M
+E * 1+ 2- 3 4$ 3 4$ 1M
+E * 2- 3+ 0 1 0 1 1M
diff --git a/tests/testdata/linear_merging.3.gfa b/tests/testdata/linear_merging.3.gfa
new file mode 100644
index 0000000..659949e
--- /dev/null
+++ b/tests/testdata/linear_merging.3.gfa
@@ -0,0 +1,8 @@
+H VN:Z:1.0
+S 0 * LN:i:10
+S 1 * LN:i:10
+S 2 * LN:i:10
+S 3 * LN:i:10
+L 0 + 1 + 1M
+L 1 + 2 - 1M
+L 2 - 3 + 1M
diff --git a/tests/testdata/linear_merging.3.gfa2 b/tests/testdata/linear_merging.3.gfa2
new file mode 100644
index 0000000..610fb79
--- /dev/null
+++ b/tests/testdata/linear_merging.3.gfa2
@@ -0,0 +1,8 @@
+H VN:Z:2.0
+S 0 10 *
+S 1 10 *
+S 2 10 *
+S 3 10 *
+E * 0+ 1+ 9 10$ 0 1 1M
+E * 1+ 2- 9 10$ 9 10$ 1M
+E * 2- 3+ 0 1 0 1 1M
diff --git a/tests/testdata/linear_merging.4.gfa b/tests/testdata/linear_merging.4.gfa
new file mode 100644
index 0000000..c977c36
--- /dev/null
+++ b/tests/testdata/linear_merging.4.gfa
@@ -0,0 +1,9 @@
+H VN:Z:1.0
+S 0 * LN:i:10
+S 1 * LN:i:10
+S 2 * LN:i:10
+S 3 * LN:i:10
+L 0 + 1 + 1M
+L 0 + 2 + 1M
+L 1 + 2 - 1M
+L 2 - 3 + 1M
diff --git a/tests/testdata/linear_merging.4.gfa2 b/tests/testdata/linear_merging.4.gfa2
new file mode 100644
index 0000000..59a59ef
--- /dev/null
+++ b/tests/testdata/linear_merging.4.gfa2
@@ -0,0 +1,9 @@
+H VN:Z:2.0
+S 0 10 *
+S 1 10 *
+S 2 10 *
+S 3 10 *
+E * 0+ 1+ 9 10$ 0 1 1M
+E * 0+ 2+ 9 10$ 0 1 1M
+E * 1+ 2- 9 10$ 9 10$ 1M
+E * 2- 3+ 0 1 0 1 1M
diff --git a/tests/testdata/linear_merging.5.gfa b/tests/testdata/linear_merging.5.gfa
new file mode 100644
index 0000000..5b5f2e0
--- /dev/null
+++ b/tests/testdata/linear_merging.5.gfa
@@ -0,0 +1,9 @@
+H VN:Z:1.0
+S 0 * LN:i:10
+S 1 * LN:i:10
+S 2 * LN:i:10
+S 3 * LN:i:10
+L 0 + 1 + 1M
+L 0 + 2 + 1M
+L 1 + 2 + 1M
+L 2 + 3 + 1M
diff --git a/tests/testdata/linear_merging.5.gfa2 b/tests/testdata/linear_merging.5.gfa2
new file mode 100644
index 0000000..3a4797f
--- /dev/null
+++ b/tests/testdata/linear_merging.5.gfa2
@@ -0,0 +1,9 @@
+H VN:Z:2.0
+S 0 10 *
+S 1 10 *
+S 2 10 *
+S 3 10 *
+E * 0+ 1+ 9 10$ 0 1 1M
+E * 0+ 2+ 9 10$ 0 1 1M
+E * 1+ 2+ 9 10$ 0 1 1M
+E * 2+ 3+ 9 10$ 0 1 1M
diff --git a/tests/testdata/links_distri.l1.gfa b/tests/testdata/links_distri.l1.gfa
new file mode 100644
index 0000000..11e72f4
--- /dev/null
+++ b/tests/testdata/links_distri.l1.gfa
@@ -0,0 +1,4 @@
+H VN:Z:1.0
+S 1 * LN:i:100
+S 2 * LN:i:100
+L 1 + 2 + 10M
diff --git a/tests/testdata/links_distri.l1.gfa2 b/tests/testdata/links_distri.l1.gfa2
new file mode 100644
index 0000000..6f2fff4
--- /dev/null
+++ b/tests/testdata/links_distri.l1.gfa2
@@ -0,0 +1,4 @@
+H VN:Z:2.0
+S 1 100 *
+S 2 100 *
+E * 1+ 2+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l1.m2.gfa b/tests/testdata/links_distri.l1.m2.gfa
new file mode 100644
index 0000000..2f09eaf
--- /dev/null
+++ b/tests/testdata/links_distri.l1.m2.gfa
@@ -0,0 +1,6 @@
+H VN:Z:1.0
+S 1 * LN:i:100
+S 1*2 * LN:i:100
+S 2 * LN:i:100
+L 1 + 2 + 10M
+L 1*2 + 2 + 10M
diff --git a/tests/testdata/links_distri.l1.m2.gfa2 b/tests/testdata/links_distri.l1.m2.gfa2
new file mode 100644
index 0000000..611d43d
--- /dev/null
+++ b/tests/testdata/links_distri.l1.m2.gfa2
@@ -0,0 +1,6 @@
+H VN:Z:2.0
+S 1 100 *
+S 1*2 100 *
+S 2 100 *
+E * 1+ 2+ 90 100$ 0 10 10M
+E * 1*2+ 2+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l2.gfa b/tests/testdata/links_distri.l2.gfa
new file mode 100644
index 0000000..94b8217
--- /dev/null
+++ b/tests/testdata/links_distri.l2.gfa
@@ -0,0 +1,6 @@
+H VN:Z:1.0
+S 1 * LN:i:100
+S 2*2 * LN:i:100
+S 2*3 * LN:i:100
+L 1 + 2*2 + 10M
+L 1 + 2*3 + 10M
diff --git a/tests/testdata/links_distri.l2.gfa2 b/tests/testdata/links_distri.l2.gfa2
new file mode 100644
index 0000000..2cb0750
--- /dev/null
+++ b/tests/testdata/links_distri.l2.gfa2
@@ -0,0 +1,6 @@
+H VN:Z:2.0
+S 1 100 *
+S 2*2 100 *
+S 2*3 100 *
+E * 1+ 2*2+ 90 100$ 0 10 10M
+E * 1+ 2*3+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l2.m2.gfa b/tests/testdata/links_distri.l2.m2.gfa
new file mode 100644
index 0000000..bdd3364
--- /dev/null
+++ b/tests/testdata/links_distri.l2.m2.gfa
@@ -0,0 +1,7 @@
+H VN:Z:1.0
+S 1 * LN:i:100 or:Z:1
+S 1*2 * LN:i:100 or:Z:1
+S 2*2 * LN:i:100
+S 2*3 * LN:i:100
+L 1 + 2*2 + 10M
+L 1*2 + 2*3 + 10M
diff --git a/tests/testdata/links_distri.l2.m2.gfa2 b/tests/testdata/links_distri.l2.m2.gfa2
new file mode 100644
index 0000000..26eb7d8
--- /dev/null
+++ b/tests/testdata/links_distri.l2.m2.gfa2
@@ -0,0 +1,7 @@
+H VN:Z:2.0
+S 1 100 * or:Z:1
+S 1*2 100 * or:Z:1
+S 2*2 100 *
+S 2*3 100 *
+E * 1+ 2*2+ 90 100$ 0 10 10M
+E * 1*2+ 2*3+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l2.m2.no_ld.gfa b/tests/testdata/links_distri.l2.m2.no_ld.gfa
new file mode 100644
index 0000000..285717f
--- /dev/null
+++ b/tests/testdata/links_distri.l2.m2.no_ld.gfa
@@ -0,0 +1,9 @@
+H VN:Z:1.0
+S 1 * LN:i:100
+S 2*2 * LN:i:100
+S 2*3 * LN:i:100
+S 1*2 * LN:i:100
+L 1 + 2*2 + 10M
+L 1 + 2*3 + 10M
+L 1*2 + 2*2 + 10M
+L 1*2 + 2*3 + 10M
diff --git a/tests/testdata/links_distri.l2.m2.no_ld.gfa2 b/tests/testdata/links_distri.l2.m2.no_ld.gfa2
new file mode 100644
index 0000000..63e6d7d
--- /dev/null
+++ b/tests/testdata/links_distri.l2.m2.no_ld.gfa2
@@ -0,0 +1,9 @@
+H VN:Z:2.0
+S 1 100 *
+S 2*2 100 *
+S 2*3 100 *
+S 1*2 100 *
+E * 1+ 2*2+ 90 100$ 0 10 10M
+E * 1+ 2*3+ 90 100$ 0 10 10M
+E * 1*2+ 2*2+ 90 100$ 0 10 10M
+E * 1*2+ 2*3+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l2.m3.gfa b/tests/testdata/links_distri.l2.m3.gfa
new file mode 100644
index 0000000..bf8f4f4
--- /dev/null
+++ b/tests/testdata/links_distri.l2.m3.gfa
@@ -0,0 +1,8 @@
+H VN:Z:1.0
+S 1 * LN:i:100 or:Z:1
+S 2*2 * LN:i:100
+S 2*3 * LN:i:100
+S 1*2 * LN:i:100 or:Z:1
+S 1*3 * LN:i:100 or:Z:1
+L 1 + 2*2 + 10M
+L 1*2 + 2*3 + 10M
diff --git a/tests/testdata/links_distri.l2.m3.gfa2 b/tests/testdata/links_distri.l2.m3.gfa2
new file mode 100644
index 0000000..e99c8cf
--- /dev/null
+++ b/tests/testdata/links_distri.l2.m3.gfa2
@@ -0,0 +1,8 @@
+H VN:Z:2.0
+S 1 100 * or:Z:1
+S 2*2 100 *
+S 2*3 100 *
+S 1*2 100 * or:Z:1
+S 1*3 100 * or:Z:1
+E * 1+ 2*2+ 90 100$ 0 10 10M
+E * 1*2+ 2*3+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l2.m3.no_ld.gfa b/tests/testdata/links_distri.l2.m3.no_ld.gfa
new file mode 100644
index 0000000..619d6bd
--- /dev/null
+++ b/tests/testdata/links_distri.l2.m3.no_ld.gfa
@@ -0,0 +1,12 @@
+H VN:Z:1.0
+S 1 * LN:i:100
+S 2*2 * LN:i:100
+S 2*3 * LN:i:100
+S 1*2 * LN:i:100
+S 1*3 * LN:i:100
+L 1 + 2*2 + 10M
+L 1 + 2*3 + 10M
+L 1*2 + 2*2 + 10M
+L 1*2 + 2*3 + 10M
+L 1*3 + 2*2 + 10M
+L 1*3 + 2*3 + 10M
diff --git a/tests/testdata/links_distri.l2.m3.no_ld.gfa2 b/tests/testdata/links_distri.l2.m3.no_ld.gfa2
new file mode 100644
index 0000000..13534df
--- /dev/null
+++ b/tests/testdata/links_distri.l2.m3.no_ld.gfa2
@@ -0,0 +1,12 @@
+H VN:Z:2.0
+S 1 100 *
+S 2*2 100 *
+S 2*3 100 *
+S 1*2 100 *
+S 1*3 100 *
+E * 1+ 2*2+ 90 100$ 0 10 10M
+E * 1+ 2*3+ 90 100$ 0 10 10M
+E * 1*2+ 2*2+ 90 100$ 0 10 10M
+E * 1*2+ 2*3+ 90 100$ 0 10 10M
+E * 1*3+ 2*2+ 90 100$ 0 10 10M
+E * 1*3+ 2*3+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l3.gfa b/tests/testdata/links_distri.l3.gfa
new file mode 100644
index 0000000..25f9b64
--- /dev/null
+++ b/tests/testdata/links_distri.l3.gfa
@@ -0,0 +1,8 @@
+H VN:Z:1.0
+S 1 * LN:i:100
+S 2*2 * LN:i:100
+S 2*3 * LN:i:100
+S 2*4 * LN:i:100
+L 1 + 2*2 + 10M
+L 1 + 2*3 + 10M
+L 1 + 2*4 + 10M
diff --git a/tests/testdata/links_distri.l3.gfa2 b/tests/testdata/links_distri.l3.gfa2
new file mode 100644
index 0000000..a27a893
--- /dev/null
+++ b/tests/testdata/links_distri.l3.gfa2
@@ -0,0 +1,8 @@
+H VN:Z:2.0
+S 1 100 *
+S 2*2 100 *
+S 2*3 100 *
+S 2*4 100 *
+E * 1+ 2*2+ 90 100$ 0 10 10M
+E * 1+ 2*3+ 90 100$ 0 10 10M
+E * 1+ 2*4+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l3.m2.gfa b/tests/testdata/links_distri.l3.m2.gfa
new file mode 100644
index 0000000..1c40c1a
--- /dev/null
+++ b/tests/testdata/links_distri.l3.m2.gfa
@@ -0,0 +1,10 @@
+H VN:Z:1.0
+S 1 * LN:i:100 or:Z:1
+S 2*2 * LN:i:100
+S 2*3 * LN:i:100
+S 2*4 * LN:i:100
+S 1*2 * LN:i:100 or:Z:1
+L 1 + 2*2 + 10M
+L 1 + 2*3 + 10M
+L 1*2 + 2*3 + 10M
+L 1*2 + 2*4 + 10M
diff --git a/tests/testdata/links_distri.l3.m2.gfa2 b/tests/testdata/links_distri.l3.m2.gfa2
new file mode 100644
index 0000000..9c4ff3d
--- /dev/null
+++ b/tests/testdata/links_distri.l3.m2.gfa2
@@ -0,0 +1,10 @@
+H VN:Z:2.0
+S 1 100 * or:Z:1
+S 2*2 100 *
+S 2*3 100 *
+S 2*4 100 *
+S 1*2 100 * or:Z:1
+E * 1+ 2*2+ 90 100$ 0 10 10M
+E * 1+ 2*3+ 90 100$ 0 10 10M
+E * 1*2+ 2*3+ 90 100$ 0 10 10M
+E * 1*2+ 2*4+ 90 100$ 0 10 10M
diff --git a/tests/testdata/links_distri.l3.m2.no_ld.gfa b/tests/testdata/links_distri.l3.m2.no_ld.gfa
new file mode 100644
index 0000000..a12d083
--- /dev/null
+++ b/tests/testdata/links_distri.l3.m2.no_ld.gfa
@@ -0,0 +1,12 @@
+H VN:Z:1.0
+S 1 * LN:i:100
+S 2*2 * LN:i:100
+S 2*3 * LN:i:100
+S 2*4 * LN:i:100
+S 1*2 * LN:i:100
+L 1 + 2*2 + 10M
+L 1 + 2*3 + 10M
+L 1 + 2*4 + 10M
+L 1*2 + 2*2 + 10M
+L 1*2 + 2*3 + 10M
+L 1*2 + 2*4 + 10M
diff --git a/tests/testdata/links_distri.l3.m2.no_ld.gfa2 b/tests/testdata/links_distri.l3.m2.no_ld.gfa2
new file mode 100644
index 0000000..b2ee07a
--- /dev/null
+++ b/tests/testdata/links_distri.l3.m2.no_ld.gfa2
@@ -0,0 +1,12 @@
+H VN:Z:2.0
+S 1 100 *
+S 2*2 100 *
+S 2*3 100 *
+S 2*4 100 *
+S 1*2 100 *
+E * 1+ 2*2+ 90 100$ 0 10 10M
+E * 1+ 2*3+ 90 100$ 0 10 10M
+E * 1+ 2*4+ 90 100$ 0 10 10M
+E * 1*2+ 2*2+ 90 100$ 0 10 10M
+E * 1*2+ 2*3+ 90 100$ 0 10 10M
+E * 1*2+ 2*4+ 90 100$ 0 10 10M
diff --git a/tests/testdata/loop.gfa b/tests/testdata/loop.gfa
new file mode 100644
index 0000000..1fa6e88
--- /dev/null
+++ b/tests/testdata/loop.gfa
@@ -0,0 +1,10 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/sjackman/assembly-graph/raw/master/loop.gfa
+S 1 AAA
+S 2 ACG
+S 3 CAT
+S 4 TTT
+L 1 + 1 + 2M
+L 2 + 2 - 2M
+L 3 - 3 + 2M
+L 4 - 4 - 2M
diff --git a/tests/testdata/loop.gfa2 b/tests/testdata/loop.gfa2
new file mode 100644
index 0000000..d185390
--- /dev/null
+++ b/tests/testdata/loop.gfa2
@@ -0,0 +1,10 @@
+H VN:Z:2.0
+H ul:Z:https://github.com/sjackman/assembly-graph/raw/master/loop.gfa
+S 1 3 AAA
+S 2 3 ACG
+S 3 3 CAT
+S 4 3 TTT
+E * 1+ 1+ 1 3$ 0 2 2M
+E * 2+ 2- 1 3$ 1 3$ 2M
+E * 3- 3+ 0 2 0 2 2M
+E * 4- 4- 0 2 1 3$ 2M
diff --git a/tests/testdata/sample.gfa b/tests/testdata/sample.gfa
new file mode 100644
index 0000000..ff9c477
--- /dev/null
+++ b/tests/testdata/sample.gfa
@@ -0,0 +1,12 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/sjackman/assembly-graph/blob/master/sample.gfa
+S 1 CGATGCAA
+S 2 TGCAAAGTAC
+S 3 TGCAACGTATAGACTTGTCAC RC:i:4
+S 4 GCATATA
+S 5 CGATGATA
+S 6 ATGA
+L 1 + 2 + 5M
+L 3 + 2 + 0M
+L 3 + 4 - 1M1D2M1S
+L 4 - 5 + 0M
diff --git a/tests/testdata/sample.gfa2 b/tests/testdata/sample.gfa2
new file mode 100644
index 0000000..fccbe26
--- /dev/null
+++ b/tests/testdata/sample.gfa2
@@ -0,0 +1,12 @@
+H VN:Z:2.0
+H ul:Z:https://github.com/sjackman/assembly-graph/blob/master/sample.gfa
+S 1 8 CGATGCAA
+S 2 10 TGCAAAGTAC
+S 3 21 TGCAACGTATAGACTTGTCAC RC:i:4
+S 4 7 GCATATA
+S 5 8 CGATGATA
+S 6 4 ATGA
+E * 1+ 2+ 3 8$ 0 5 5M
+E * 3+ 2+ 21$ 21$ 0 0 0M
+E * 3+ 4- 17 21$ 3 7$ 1M1D2M
+E * 4- 5+ 0 0 0 0 0M
diff --git a/tests/testdata/spec_q1.gfa b/tests/testdata/spec_q1.gfa
new file mode 100644
index 0000000..6edd3df
--- /dev/null
+++ b/tests/testdata/spec_q1.gfa
@@ -0,0 +1,8 @@
+H VN:Z:1.0
+S 1 AGCGTA
+S 2 TAACAG
+L 1 + 2 + 0M ID:Z:1+_2+
+P A 1+,2+ 0M
+P B 1+,2+ 0M st:i:4 en:i:3
+#invalid: P C 1+ * st:i:5
+#invalid: P D 2+ * st:i:2 en:i:5
diff --git a/tests/testdata/spec_q1.gfa2 b/tests/testdata/spec_q1.gfa2
new file mode 100644
index 0000000..cf9f9e5
--- /dev/null
+++ b/tests/testdata/spec_q1.gfa2
@@ -0,0 +1,8 @@
+#invalid: P C 1+ * st:i:5
+#invalid: P D 2+ * st:i:2 en:i:5
+H VN:Z:2.0
+S 1 6 AGCGTA
+S 2 6 TAACAG
+E 1+_2+ 1+ 2+ 6$ 6$ 0 0 0M
+O A 1+ 1+_2++ 2+
+O B 1+ 1+_2++ 2+
diff --git a/tests/testdata/spec_q2.gfa b/tests/testdata/spec_q2.gfa
new file mode 100644
index 0000000..4ef16b7
--- /dev/null
+++ b/tests/testdata/spec_q2.gfa
@@ -0,0 +1,9 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/23
+H co:Z:modified: the final "," of circular was eliminated
+S 1 AGCGTA
+S 2 TAACAG
+L 1 + 2 + 2M ID:Z:1+_2+
+L 2 + 1 + 2M ID:Z:2+_1+
+P linear 1+,2+ 2M
+P circular 1+,2+ 2M,2M
diff --git a/tests/testdata/spec_q2.gfa2 b/tests/testdata/spec_q2.gfa2
new file mode 100644
index 0000000..c9b06aa
--- /dev/null
+++ b/tests/testdata/spec_q2.gfa2
@@ -0,0 +1,9 @@
+H VN:Z:2.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/23
+H co:Z:modified: the final "," of circular was eliminated
+S 1 6 AGCGTA
+S 2 6 TAACAG
+E 1+_2+ 1+ 2+ 4 6$ 0 2 2M
+E 2+_1+ 2+ 1+ 4 6$ 0 2 2M
+O linear 1+ 1+_2++ 2+
+O circular 1+ 1+_2++ 2+ 2+_1++ 1+
diff --git a/tests/testdata/spec_q2.path_circular.seq b/tests/testdata/spec_q2.path_circular.seq
new file mode 100644
index 0000000..bc4533e
--- /dev/null
+++ b/tests/testdata/spec_q2.path_circular.seq
@@ -0,0 +1 @@
+AGCGTAAC
diff --git a/tests/testdata/spec_q2.path_linear.seq b/tests/testdata/spec_q2.path_linear.seq
new file mode 100644
index 0000000..3bc7a6f
--- /dev/null
+++ b/tests/testdata/spec_q2.path_linear.seq
@@ -0,0 +1 @@
+AGCGTAACAG
diff --git a/tests/testdata/spec_q3.gfa b/tests/testdata/spec_q3.gfa
new file mode 100644
index 0000000..a92c430
--- /dev/null
+++ b/tests/testdata/spec_q3.gfa
@@ -0,0 +1,13 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/23#issuecomment-166806423
+H co:Z:is_this_okay is OK and shall be interpreted as a linear path
+S 1 AGCGTA
+S 2 TAACAG
+S 3 GTCATC
+L 1 + 2 + 2M ID:Z:1+_2+
+L 2 + 3 + 2M ID:Z:2+_3+
+L 3 + 1 + 0M ID:Z:3+_1+
+P linear_path 1+,2+,3+ *,*
+P circular_path 1+,2+,3+ *,*,*
+P more_than_circular 1+,2+,3+,1+ *,*,0M
+P is_this_okay 1+,2+,3+ *
diff --git a/tests/testdata/spec_q3.gfa2 b/tests/testdata/spec_q3.gfa2
new file mode 100644
index 0000000..9256421
--- /dev/null
+++ b/tests/testdata/spec_q3.gfa2
@@ -0,0 +1,13 @@
+H VN:Z:2.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/23#issuecomment-166806423
+H co:Z:is_this_okay is OK and shall be interpreted as a linear path
+S 1 6 AGCGTA
+S 2 6 TAACAG
+S 3 6 GTCATC
+E 1+_2+ 1+ 2+ 4 6$ 0 2 2M
+E 2+_3+ 2+ 3+ 4 6$ 0 2 2M
+E 3+_1+ 3+ 1+ 6$ 6$ 0 0 0M
+O linear_path 1+ 1+_2++ 2+ 2+_3++ 3+
+O circular_path 1+ 1+_2++ 2+ 2+_3++ 3+ 3+_1++ 1+
+O more_than_circular 1+ 1+_2++ 2+ 2+_3++ 3+ 3+_1++ 1+
+O is_this_okay 1+ 1+_2++ 2+ 2+_3++ 3+
diff --git a/tests/testdata/spec_q4.gfa b/tests/testdata/spec_q4.gfa
new file mode 100644
index 0000000..4a99ab3
--- /dev/null
+++ b/tests/testdata/spec_q4.gfa
@@ -0,0 +1,14 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/23#issuecomment-172869000
+H co:Z:similar to spec_q3
+H co:Z:more_than_circular should be invalid, 3+/1+/0M is not supported by a link
+S 1 AGCGTA
+S 2 TAACAG
+S 3 GTCATC
+L 1 + 2 + 2M ID:Z:1+_2+
+L 2 + 3 + 2M ID:Z:2+_3+
+L 3 + 1 + 2M ID:Z:3+_1+
+P linear_path 1+,2+,3+ *,*
+P circular_path 1+,2+,3+ *,*,*
+# P more_than_circular 1+,2+,3+,1+ *,*,0M
+P is_this_okay 1+,2+,3+ *
diff --git a/tests/testdata/spec_q4.gfa2 b/tests/testdata/spec_q4.gfa2
new file mode 100644
index 0000000..8f018d3
--- /dev/null
+++ b/tests/testdata/spec_q4.gfa2
@@ -0,0 +1,14 @@
+# P more_than_circular 1+,2+,3+,1+ *,*,0M
+H VN:Z:2.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/23#issuecomment-172869000
+H co:Z:similar to spec_q3
+H co:Z:more_than_circular should be invalid, 3+/1+/0M is not supported by a link
+S 1 6 AGCGTA
+S 2 6 TAACAG
+S 3 6 GTCATC
+E 1+_2+ 1+ 2+ 4 6$ 0 2 2M
+E 2+_3+ 2+ 3+ 4 6$ 0 2 2M
+E 3+_1+ 3+ 1+ 4 6$ 0 2 2M
+O linear_path 1+ 1+_2++ 2+ 2+_3++ 3+
+O circular_path 1+ 1+_2++ 2+ 2+_3++ 3+ 3+_1++ 1+
+O is_this_okay 1+ 1+_2++ 2+ 2+_3++ 3+
diff --git a/tests/testdata/spec_q4.path_more_than_circular.seq b/tests/testdata/spec_q4.path_more_than_circular.seq
new file mode 100644
index 0000000..c4f6e83
--- /dev/null
+++ b/tests/testdata/spec_q4.path_more_than_circular.seq
@@ -0,0 +1 @@
+AGCGTAACAGCATCAGCGTA
diff --git a/tests/testdata/spec_q5.gfa b/tests/testdata/spec_q5.gfa
new file mode 100644
index 0000000..268a4bf
--- /dev/null
+++ b/tests/testdata/spec_q5.gfa
@@ -0,0 +1,11 @@
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/8
+H co:Z:the last 3 links are equivalent to the first 3
+S read0 * LN:i:5500
+S read1 * LN:i:4000
+S read2 * LN:i:5500
+L read0 + read1 - 2000M
+L read1 - read2 + 3000M
+L read0 + read2 + 1000M
+L read1 + read0 - 2000M
+L read2 - read1 + 3000M
+L read2 - read0 - 1000M
diff --git a/tests/testdata/spec_q5.gfa2 b/tests/testdata/spec_q5.gfa2
new file mode 100644
index 0000000..50f86f1
--- /dev/null
+++ b/tests/testdata/spec_q5.gfa2
@@ -0,0 +1,8 @@
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/8
+H co:Z:the last 3 links are equivalent to the first 3
+S read0 5500 *
+S read1 4000 *
+S read2 5500 *
+E * read0+ read1- 3500 5500$ 2000 4000$ 2000M
+E * read1- read2+ 0 3000 0 3000 3000M
+E * read0+ read2+ 4500 5500$ 0 1000 1000M
diff --git a/tests/testdata/spec_q6.gfa b/tests/testdata/spec_q6.gfa
new file mode 100644
index 0000000..cdb7ab5
--- /dev/null
+++ b/tests/testdata/spec_q6.gfa
@@ -0,0 +1,9 @@
+S 0 GAT tn:Z:Human
+S 1 CCC tn:Z:Mouse
+S 2 TAC tn:Z:Human,Mouse
+S 3 A tn:Z:Human
+S 4 TTA tn:Z:Mouse
+L 0 + 2 + 0M tn:Z:Human
+L 1 + 2 + 0M tn:Z:Mouse
+L 2 + 3 + 0M tn:Z:Human
+L 2 + 4 + 0M tn:Z:Mouse
diff --git a/tests/testdata/spec_q6.gfa2 b/tests/testdata/spec_q6.gfa2
new file mode 100644
index 0000000..04bb825
--- /dev/null
+++ b/tests/testdata/spec_q6.gfa2
@@ -0,0 +1,9 @@
+S 0 3 GAT tn:Z:Human
+S 1 3 CCC tn:Z:Mouse
+S 2 3 TAC tn:Z:Human,Mouse
+S 3 1 A tn:Z:Human
+S 4 3 TTA tn:Z:Mouse
+E * 0+ 2+ 3$ 3$ 0 0 0M tn:Z:Human
+E * 1+ 2+ 3$ 3$ 0 0 0M tn:Z:Mouse
+E * 2+ 3+ 3$ 3$ 0 0 0M tn:Z:Human
+E * 2+ 4+ 3$ 3$ 0 0 0M tn:Z:Mouse
diff --git a/tests/testdata/spec_q7.gfa b/tests/testdata/spec_q7.gfa
new file mode 100644
index 0000000..71d4501
--- /dev/null
+++ b/tests/testdata/spec_q7.gfa
@@ -0,0 +1,9 @@
+H VN:Z:1.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/7#issuecomment-219685552
+S 11 ACCTT pg:J:{"Human":[{"target":"chr1","pos":1500,"strand":true}],"Mouse":[{"target":"chr2","pos":2000,"strand":false}],"ecoli":[{"target":"chr1","pos":2000,"strand":false},{"target":"chr1","pos":3000,"strand":true}]}
+S 12 TCAAGG
+S 13 CTTGATT
+L 11 + 12 - 4M ID:Z:11+_12-
+L 12 - 13 + 5M ID:Z:12-_13+
+L 11 + 13 + 3M ID:Z:11+_13+
+P 14 11+,12-,13+ 4M,5M
diff --git a/tests/testdata/spec_q7.gfa2 b/tests/testdata/spec_q7.gfa2
new file mode 100644
index 0000000..f160e5c
--- /dev/null
+++ b/tests/testdata/spec_q7.gfa2
@@ -0,0 +1,9 @@
+H VN:Z:2.0
+H ul:Z:https://github.com/pmelsted/GFA-spec/issues/7#issuecomment-219685552
+S 11 5 ACCTT pg:J:{"Human":[{"target":"chr1","pos":1500,"strand":true}],"Mouse":[{"target":"chr2","pos":2000,"strand":false}],"ecoli":[{"target":"chr1","pos":2000,"strand":false},{"target":"chr1","pos":3000,"strand":true}]}
+S 12 6 TCAAGG
+S 13 7 CTTGATT
+E 11+_12- 11+ 12- 1 5$ 2 6$ 4M
+E 12-_13+ 12- 13+ 0 5 0 5 5M
+E 11+_13+ 11+ 13+ 2 5$ 0 3 3M
+O 14 11+ 11+_12-+ 12- 12-_13++ 13+
diff --git a/tests/testdata/two_components.gfa b/tests/testdata/two_components.gfa
new file mode 100644
index 0000000..7c2f6fb
--- /dev/null
+++ b/tests/testdata/two_components.gfa
@@ -0,0 +1,11 @@
+H VN:Z:1.0
+S 1 * LN:i:1000
+S 2 * LN:i:1000
+S 3 * LN:i:1000
+S 4 * LN:i:1000
+S 5 * LN:i:1000
+S 6 * LN:i:1000
+L 1 + 2 + 100M
+L 1 + 3 + 100M
+L 1 + 4 + 100M
+L 5 + 6 + 100M
diff --git a/tests/testdata/two_components.gfa2 b/tests/testdata/two_components.gfa2
new file mode 100644
index 0000000..774805f
--- /dev/null
+++ b/tests/testdata/two_components.gfa2
@@ -0,0 +1,11 @@
+H VN:Z:2.0
+S 1 1000 *
+S 2 1000 *
+S 3 1000 *
+S 4 1000 *
+S 5 1000 *
+S 6 1000 *
+E * 1+ 2+ 900 1000$ 0 100 100M
+E * 1+ 3+ 900 1000$ 0 100 100M
+E * 1+ 4+ 900 1000$ 0 100 100M
+E * 5+ 6+ 900 1000$ 0 100 100M
diff --git a/tests/testdata/unnamed_and_named_links.gfa b/tests/testdata/unnamed_and_named_links.gfa
new file mode 100644
index 0000000..3f4a064
--- /dev/null
+++ b/tests/testdata/unnamed_and_named_links.gfa
@@ -0,0 +1,8 @@
+S A AAAAAAACGT
+S B ACGTCCACGT
+S C CACGTCCGGG
+S D GGGGGGGGGG
+L A + B + 4M ID:Z:2
+L B + C + 5M
+L C + D + 3M
+P P1 A+,B+ 4M
diff --git a/tests/testdata/unnamed_link.gfa b/tests/testdata/unnamed_link.gfa
new file mode 100644
index 0000000..6fa9dea
--- /dev/null
+++ b/tests/testdata/unnamed_link.gfa
@@ -0,0 +1,4 @@
+S A AAAAAAACGT
+S B ACGTCCACGT
+L A + B + 4M
+P P1 A+,B+ 4M
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gfapy.git
More information about the debian-med-commit
mailing list