[med-svn] [ruby-rgfa] 02/09: New upstream version 1.3
Sascha Steinbiss
satta at debian.org
Mon Sep 26 21:25:19 UTC 2016
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository ruby-rgfa.
commit 95b8e41a6a556e6d81a7f8403f1e023ea55163d6
Author: Sascha Steinbiss <satta at debian.org>
Date: Mon Sep 26 20:38:59 2016 +0000
New upstream version 1.3
---
.gitignore | 2 +
.travis.yml | 4 +
CHANGELOG | 19 +++
README.md | 26 ++++-
Rakefile | 15 ++-
bin/{gfadiff.rb => gfadiff} | 0
bin/{rgfa-findcrisprs.rb => rgfa-findcrisprs} | 0
bin/{rgfa-mergelinear.rb => rgfa-mergelinear} | 0
bin/{rgfa-simdebruijn.rb => rgfa-simdebruijn} | 0
...rgfa-cheatsheet-1.2.tex => rgfa-cheatsheet.tex} | 37 +++---
lib/rgfa.rb | 2 +-
lib/rgfa/cigar.rb | 11 +-
lib/rgfa/field_parser.rb | 8 +-
lib/rgfa/field_validator.rb | 14 +++
lib/rgfa/line.rb | 8 +-
lib/rgfa/line/comment.rb | 13 +++
lib/rgfa/line/containment.rb | 16 ++-
lib/rgfa/line/link.rb | 127 ++++++++++-----------
lib/rgfa/line/path.rb | 30 ++---
lib/rgfa/line/segment.rb | 6 +-
lib/rgfa/lines.rb | 3 +
lib/rgfa/links.rb | 2 +-
lib/rgfa/segment_ends_path.rb | 2 +
pdfdoc/{cover.html => cover.html.erb} | 2 +-
rgfa.gemspec | 12 +-
test/test_rgfa_line_comment.rb | 13 +++
test/test_rgfa_line_creators.rb | 10 ++
test/test_rgfa_line_path.rb | 2 +-
test/test_rgfa_line_segment.rb | 12 ++
29 files changed, 259 insertions(+), 137 deletions(-)
diff --git a/.gitignore b/.gitignore
index ec77918..c3eefd4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,8 +3,10 @@ doc
.yardoc
pdfdoc/index.html
pdfdoc/rgfa*.pdf
+pdfdoc/cover.html
cheatsheet/*.aux
cheatsheet/*_latexmk
cheatsheet/*.fls
cheatsheet/*.log
cheatsheet/*.pdf
+cheatsheet/version
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..736c079
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,4 @@
+language: ruby
+rvm:
+ - 2.0
+ - 2.2
diff --git a/CHANGELOG b/CHANGELOG
index 04c0812..c5100c2 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,22 @@
+== 1.3 ==
+
+major changes:
+- changes in GFA specification:
+-- P lines: cigars field is now overlaps
+-- comment lines
+-- forbid +, and -, in segment names
+- .rb suffix removed from bin/* scripts
+
+minor changes:
+- improved links terminology
+ (normal link -> canonical; reverse link/CIGAR -> complement)
+- definition of canonical link simplified
+
+== 1.2.1 ==
+
+- support new segment tags SH and UR
+- update cheatsheet
+
== 1.2 ==
- merge RGFATools into the main RGFA gem
diff --git a/README.md b/README.md
index 60e527d..3dab85d 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
The Graphical Fragment Assembly (GFA) is a proposed format which allow
to describe the product of sequence assembly.
This gem implements the proposed specifications for the GFA format
-described under https://github.com/pmelsted/GFA-spec/blob/master/GFA-spec.md
+described under https://github.com/GFA-spec/GFA-spec/blob/master/GFA-spec.md
as close as possible.
The library allows to create a RGFA object from a file in the GFA format
@@ -12,20 +12,34 @@ elements (e.g. which links connect two segments) and to manipulate the
graph (e.g. to eliminate a link or a segment or to duplicate a segment
distributing the read counts evenly on the copies).
+## Installation
+
+The latest release of the gem can be installed from the rubygems repository
+using:
+```gem install rgfa```
+
+Alternatively this git repository can be cloned or the source code
+installed from a release archive, and then the gem created and installed
+using:
+```rake install```
+
## Usage
-After installation of the gem (rake install), the library can be included
-in the own scripts with require "rgfa". Additional functionality, which
+To use the library in your Ruby scripts, just require it as follows:
+```require "rgfa"```
+
+Additional functionality, which
requires custom tags and additional conventions, is included in a separate
-part of the code named "RGFATools" and can be accessed with require "rgfatools".
+part of the code named {RGFATools} and can be accessed with:
+```require "rgfatools"```
## Documentation
A cheatsheet is available as pdf under
-https://github.com/ggonnella/rgfa/blob/master/cheatsheet/rgfa-cheatsheet-1.2.pdf
+https://github.com/ggonnella/rgfa/blob/master/cheatsheet/rgfa-cheatsheet-1.3.pdf
The full API documentation is available as pdf under
-https://github.com/ggonnella/rgfa/blob/master/pdfdoc/rgfa-api-1.2.pdf
+https://github.com/ggonnella/rgfa/blob/master/pdfdoc/rgfa-api-1.3.pdf
or in HTML format (http://www.rubydoc.info/github/ggonnella/rgfa/master/RGFA).
The main class of the library is {RGFA}, which is a good starting point
diff --git a/Rakefile b/Rakefile
index 40ba406..0822714 100644
--- a/Rakefile
+++ b/Rakefile
@@ -1,6 +1,6 @@
require "rake/testtask"
-$rgfaversion="1.2"
+$rgfaversion=Gem::Specification.load("rgfa.gemspec").version.to_s
Rake::TestTask.new do |t|
t.libs << 'test'
@@ -36,14 +36,21 @@ begin
rescue LoadError
end
-desc "Create cheatsheet"
-task :cs do
- system("latexmk cheatsheet/rgfa-cheatsheet-#$rgfaversion.tex "+
+desc "Typeset cheatsheet"
+task :cheatsheet do
+ system("echo #$rgfaversion > cheatsheet/version")
+ system("latexmk cheatsheet/rgfa-cheatsheet.tex "+
"-pdf -outdir=cheatsheet")
+ system("mv cheatsheet/rgfa-cheatsheet.pdf"+
+ " cheatsheet/rgfa-cheatsheet-#$rgfaversion.pdf")
end
desc "Create a PDF documentation"
task :pdf do
+ require "erb"
+ File.open("pdfdoc/cover.html", "w") do |f|
+ f.puts ERB.new(IO.read("pdfdoc/cover.html.erb")).result(binding)
+ end
system("yard2.0 --one-file -o pdfdoc")
system("wkhtmltopdf cover pdfdoc/cover.html "+
"toc "+
diff --git a/bin/gfadiff.rb b/bin/gfadiff
similarity index 100%
rename from bin/gfadiff.rb
rename to bin/gfadiff
diff --git a/bin/rgfa-findcrisprs.rb b/bin/rgfa-findcrisprs
similarity index 100%
rename from bin/rgfa-findcrisprs.rb
rename to bin/rgfa-findcrisprs
diff --git a/bin/rgfa-mergelinear.rb b/bin/rgfa-mergelinear
similarity index 100%
rename from bin/rgfa-mergelinear.rb
rename to bin/rgfa-mergelinear
diff --git a/bin/rgfa-simdebruijn.rb b/bin/rgfa-simdebruijn
similarity index 100%
rename from bin/rgfa-simdebruijn.rb
rename to bin/rgfa-simdebruijn
diff --git a/cheatsheet/rgfa-cheatsheet-1.2.tex b/cheatsheet/rgfa-cheatsheet.tex
similarity index 84%
rename from cheatsheet/rgfa-cheatsheet-1.2.tex
rename to cheatsheet/rgfa-cheatsheet.tex
index e17ab77..d1f299f 100644
--- a/cheatsheet/rgfa-cheatsheet-1.2.tex
+++ b/cheatsheet/rgfa-cheatsheet.tex
@@ -3,12 +3,15 @@
\usepackage{comment}
\usepackage{fancyhdr}
\usepackage{lastpage}
+\usepackage{catchfile}
\pagestyle{fancy}
+\CatchFileDef{\RGFAver}{version}{}
+
\usepackage{array}
\fancyhf{}
\renewcommand{\headrulewidth}{0pt}
-\rhead{\bfseries RGFA/RGFATools v.1.2 Cheatsheet (\thepage/\pageref{LastPage})
+\rhead{\bfseries RGFA \RGFAver Cheatsheet (\thepage/\pageref{LastPage})
\hspace{1.1cm}}
\lfoot{\tiny \ \ \ \ \ \ \ \ Copyright (c) 2016, Giorgio Gonnella, ZBH, University of
Hamburg, Germany. This document is under CC-BY-SA license.}
@@ -54,17 +57,16 @@ Hamburg, Germany. This document is under CC-BY-SA license.}
Write GFA to file & gfa.to\_file(filename) \\
Write GFA to standard output & puts gfa \\
Create deep copy & gfa.clone \\
- Validate after manual edits & gfa.validate! \\
Output statistics (normal/compact) & puts gfa.info; puts gfa.info(true) \\
\hline
Turn off validations & gfa.turn\_off\_validations \\
- Segments first & gfa.require\_segments\_first\_order \\
+ Validate line references & gfa.validate! \\
Enable progress logging & gfa.enable\_progress\_logging \\
\hline
Name of all segments & gfa.segment\_names \\
Name of all paths & gfa.path\_names \\
All segments, links, paths, etc & gfa.segments; gfa.links; gfa.paths; \ldots \\
- Iterate over segments, links, etc & gfa.each\_segment \verb/{|s|...}/ \\
+ Iterate over segments, links, etc & gfa.segments.each \verb/{|s|...}/ \\
\hline
Find segment & gfa.segment(segment\_name) \\
\ldots exception if does not exist & gfa.segment!(segment\_name) \\
@@ -87,7 +89,7 @@ Hamburg, Germany. This document is under CC-BY-SA license.}
\hline
Add line (examples) & gfa << "H\verb/\t/VZ:i:1.0" \\
& gfa << "S\verb/\t/a\verb/\t/*\verb/\t/LN:i:1200" \\
- Rename segment or path & gfa.rename("old", "new") \\
+ Rename segment or path & gfa.rename(:old, :new) \\
\hline
Segment coverage & s.coverage\\
Segment coverage (more accurate) & s.coverage(unit\_length:~avreadlen)\\
@@ -129,11 +131,8 @@ Hamburg, Germany. This document is under CC-BY-SA license.}
Delete segments contained in s & gfa.rm(\verb/:contained_in,:s/) \\
Delete s1-E links except to s2-B & gfa.delete\_other\_links(\verb/[s1,:E],[s2,:B]/)\\
\hline
- Content of headers field & gfa.header.xx \\
- Replace header field content & gfa.set\_header\_field(:xx, 12, \\
- & \hspace{4.3cm})\\
- Append to header field & gfa.set\_header\_field(:xx, 12,\\
- & \hspace{4.3cm}, existing: :add)\\
+ Access headers field & gfa.header.xx \\
+ Add new header field & gfa.header.add(:xx, 12)\\
\hline
Sum of read counts & \verb/gfa.segments.map(&:RC).inject(:+)/ \\
Highest coverage & \verb/gfa.segments.map(&:coverage).max/ \\
@@ -150,7 +149,7 @@ Hamburg, Germany. This document is under CC-BY-SA license.}
Split components & gfa.split\_connected\_components \\
Number of dead ends & gfa.n\_dead\_ends \\
\hline
- \textit{(with RGFATools only)} & \\
+ \textit{(require "rgfatools")} & \\
Muliply segment, distribute links & gfa.multiply("A", 4) \\
Compute copy numbers & gfa.compute\_copy\_numbers \\
Apply copy numbers & gfa.apply\_copy\_numbers \\
@@ -160,14 +159,14 @@ Hamburg, Germany. This document is under CC-BY-SA license.}
Remove small components & gfa.remove\_small\_components(minlen) \\
\hline
\textit{(Command line tools)} & \\
- Compare two GFA files & gfadiff.rb 1.gfa 2.gfa \\
- \ldots only segments and links & gfadiff.rb -s -l 1.gfa 2.gfa \\
- \ldots output as ruby script & gfadiff.rb -script 1.gfa 2.gfa \\
- Merge linear paths in graph & simplify.rb 2.gfa > 3.gfa \\
- \hline
- \textit{(Experimental command line tools)} & \\
- Simulate de Bruijn graph & simulate\_debruijn.rb 27 gnm.fas > 1.gfa \\
- \ldots and find CRISPRs candidates & find\_crisprs.rb 1.gfa \\
+ Compare two GFA files & gfadiff 1.gfa 2.gfa \\
+ \ldots only segments and links & gfadiff -s -l 1.gfa 2.gfa \\
+ \ldots output as ruby script & gfadiff -script 1.gfa 2.gfa \\
+ Merge linear paths in graph & rgfa-mergelinear 2.gfa > 3.gfa \\
+ \hline
+ \textit{(Case studies CLI tools)} & \\
+ Simulate de Bruijn graph & rgfa-simdebruijn 27 gnm.fas > 1.gfa \\
+ \ldots and find CRISPRs candidates & rgfa-findcrisprs 1.gfa \\
\hline
\end{tabular}
diff --git a/lib/rgfa.rb b/lib/rgfa.rb
index bf87ccf..013b944 100644
--- a/lib/rgfa.rb
+++ b/lib/rgfa.rb
@@ -15,7 +15,7 @@
# removing lines belonging to a RGFA instance. Specialized modules exist
# for each kind of line:
# - {RGFA::Headers}: accessing and creating header information is done
-# using a single header line object ({headers RGFA#header})
+# using a single header line object ({#header RGFA#header})
# - {RGFA::Segments}
# - {RGFA::Links}
# - {RGFA::Containments}
diff --git a/lib/rgfa/cigar.rb b/lib/rgfa/cigar.rb
index 4d07b7a..250ead0 100644
--- a/lib/rgfa/cigar.rb
+++ b/lib/rgfa/cigar.rb
@@ -4,11 +4,11 @@ require_relative "error.rb"
# Represents the contents of a CIGAR string.
class RGFA::CIGAR < Array
- # Compute the CIGAR for the segments in reverse direction.
+ # Compute the CIGAR for the segments when these are switched.
#
- # @example Reversing a CIGAR
+ # @example Computing the complement CIGAR
#
- # RGFA::CIGAR.from_string("2M1D3M").reverse.to_s
+ # RGFA::CIGAR.from_string("2M1D3M").complement.to_s
# # => "3M1I2M"
#
# # S1 + S2 + 2M1D3M
@@ -22,8 +22,8 @@ class RGFA::CIGAR < Array
# # S2 - S1 - 3M1I2M
#
# @return [RGFA::CIGAR] (empty if CIGAR string is *)
- def reverse
- super.map do |op|
+ def complement
+ reverse.map do |op|
if op.code == :I
op.code = :D
elsif op.code == :D
@@ -94,6 +94,7 @@ class RGFA::CIGAR::Operation
attr_accessor :len
attr_accessor :code
+ # CIGAR operation code
CODE = [:M, :I, :D, :N, :S, :H, :P, :X, :"="]
# @param len [Integer] length of the operation
diff --git a/lib/rgfa/field_parser.rb b/lib/rgfa/field_parser.rb
index 14ef2dc..605eb0c 100644
--- a/lib/rgfa/field_parser.rb
+++ b/lib/rgfa/field_parser.rb
@@ -19,11 +19,17 @@ module RGFA::FieldParser
fieldname: nil,
frozen: false)
case datatype
+ when :cmt
+ return self
when :A, :Z, :seq
validate_gfa_field!(datatype, fieldname: fieldname) if validate_strings
self.freeze if frozen
return self
- when :lbl, :orn
+ when :lbl
+ validate_segment_name!
+ validate_gfa_field!(datatype, fieldname: fieldname) if validate_strings
+ return to_sym.freeze
+ when :orn
validate_gfa_field!(datatype, fieldname: fieldname) if validate_strings
return to_sym.freeze
when :i
diff --git a/lib/rgfa/field_validator.rb b/lib/rgfa/field_validator.rb
index 3890e68..fe3c430 100644
--- a/lib/rgfa/field_validator.rb
+++ b/lib/rgfa/field_validator.rb
@@ -27,6 +27,7 @@ module RGFA::FieldValidator
:cig => /^(\*|(([0-9]+[MIDNSHPX=])+))$/, # CIGAR string
:cgs => /^(\*|(([0-9]+[MIDNSHPX=])+))(,(\*|(([0-9]+[MIDNSHPX=])+)))*$/,
# multiple CIGARs, comma-sep
+ :cmt => /.*/, # content of comment line, everything is allowed
}
# Validates the string according to the provided datatype
@@ -49,6 +50,19 @@ module RGFA::FieldValidator
end
end
+ # Validates segment names, to check that they do not contain + or -
+ # followed by comma
+ # @raise [RGFA::FieldParser::FormatError] if the segment name is invalid
+ # @return [void]
+ # @api private
+ def validate_segment_name!
+ if self =~ /.*[+-],.*/
+ raise RGFA::FieldParser::FormatError,
+ "Segment names are not allowed to contain +/- followed by comma "+
+ "(found: #{self})"
+ end
+ end
+
end
class String
diff --git a/lib/rgfa/line.rb b/lib/rgfa/line.rb
index 287d315..f2cca91 100644
--- a/lib/rgfa/line.rb
+++ b/lib/rgfa/line.rb
@@ -126,6 +126,7 @@ class RGFA::Line
when :L then RGFA::Line::Link
when :C then RGFA::Line::Containment
when :P then RGFA::Line::Path
+ when :"#" then RGFA::Line::Comment
else
raise RGFA::Line::UnknownRecordTypeError,
"Record type unknown: '#{record_type}'"
@@ -684,6 +685,7 @@ require_relative "line/segment.rb"
require_relative "line/path.rb"
require_relative "line/link.rb"
require_relative "line/containment.rb"
+require_relative "line/comment.rb"
# Extensions to the String core class.
#
@@ -696,7 +698,11 @@ class String
# @param validate [Integer] <i>(defaults to: 2)</i>
# see RGFA::Line#initialize
def to_rgfa_line(validate: 2)
- split(RGFA::Line::SEPARATOR).to_rgfa_line(validate: validate)
+ if self[0] == "#"
+ return RGFA::Line::Comment.new([self[1..-1]], validate: 0)
+ else
+ split(RGFA::Line::SEPARATOR).to_rgfa_line(validate: validate)
+ end
end
end
diff --git a/lib/rgfa/line/comment.rb b/lib/rgfa/line/comment.rb
new file mode 100644
index 0000000..f3eb5dd
--- /dev/null
+++ b/lib/rgfa/line/comment.rb
@@ -0,0 +1,13 @@
+# A comment line of a RGFA file
+class RGFA::Line::Comment < RGFA::Line
+
+ RECORD_TYPE = :"#"
+ REQFIELDS = [:content]
+ PREDEFINED_OPTFIELDS = []
+ DATATYPE = {
+ :content => :cmt,
+ }
+
+ define_field_methods!
+
+end
diff --git a/lib/rgfa/line/containment.rb b/lib/rgfa/line/containment.rb
index df57a41..01f7dea 100644
--- a/lib/rgfa/line/containment.rb
+++ b/lib/rgfa/line/containment.rb
@@ -56,11 +56,15 @@ class RGFA::Line::Containment < RGFA::Line
return rpos
end
- # Returns true if the containment is normal, false otherwise
+ # Returns true if the containment is canonical, false otherwise
#
- # <b> Definition of normal containment </b>
+ # == Definition of canonical containment
#
- # Each containment has an equivalent reverse containment.
+ # A containment is canonical if the from orientation is +
+ #
+ # === Details
+ #
+ # Each containment has an equivalent complement containment.
# Consider a containment of B (length:8) in A (length:100) at position 9 of A
# with a cigar 1M1I2M3D4M (i.e. rpos = 19).
#
@@ -69,18 +73,18 @@ class RGFA::Line::Containment < RGFA::Line
# A- B+ 1M1I2M3D4M 9 == A+ B- 4M3D2M1I1M 80
# A- B- 1M1I2M3D4M 9 == A+ B+ 4M3D2M1I1M 80
#
- # Pos in the reverse is equal to the length of A minus the right pos
+ # Pos in the complement is equal to the length of A minus the right pos
# of B before reversing.
#
# We require here that A != B as A == B makes no sense for containments.
# Thus it is always possible to express the containment using a positive
# from orientation.
#
- # For this reason the normality is simply defined as + from orientation.
+ # For this reason the canon is simply defined as + from orientation.
#
# @return [Boolean]
#
- def normal?
+ def canonical?
from_orient == :+
end
diff --git a/lib/rgfa/line/link.rb b/lib/rgfa/line/link.rb
index 8796fef..e437c48 100644
--- a/lib/rgfa/line/link.rb
+++ b/lib/rgfa/line/link.rb
@@ -112,108 +112,99 @@ class RGFA::Line::Link < RGFA::Line
to.to_sym
end
- # Returns true if the link is normal, false otherwise
+ # Returns true if the link is canonical, false otherwise
#
- # == Definition of normal link
+ # == Definition of canonical link
#
- # Each link has an equivalent reverse link. Consider a link of A to B
- # with a overlap 1M1I2M:
+ # A link if canonical if:
+ # - from != to and from < to (lexicographically); or
+ # - from == to and at least one of from_orient or to_orient is +
#
- # from+ to to+ (1M1I2M) == to- to from- (2M1D1M)
- # from- to to- (1M1I2M) == to+ to from+ (2M1D1M)
- # from+ to to- (1M1I2M) == to+ to from- (2M1D1M)
- # from- to to+ (1M1I2M) == to- to from+ (2M1D1M)
+ # === Details
#
- # Consider also the special case, where from == to and the overlap is not
- # specified, or equal to its reverse:
+ # In the special case in which from == to (== s) we have the
+ # following equivalences:
#
- # from+ to from+ (*) == from- to from- (*) # left has a +; right has no +
- # from- to from- (*) == from+ to from+ (*) # left has no +; right has a +
- # from+ to from- (*) == from+ to from- (*) # left == right
- # from- to from+ (*) == from- to from+ (*) # left == right
+ # s + s + == s - s -
+ # s - s - == s + s + (same as previous case)
+ # s + s - == s + s - (equivalent to itself)
+ # s - s + == s - s + (equivalent to itself)
#
- # Thus we define a link as normal if:
- # - from < to (lexicographical comparison of segments)
- # - from == to and overlap.to_s < reverse_overlap.to_s
- # - from == to, overlap == reverse_overlap and at least one orientation is +
+ # Considering the values on the left, the first one can be taken as
+ # canonical, the second not, because it can be transformed in the first
+ # one; the other two values are canonical, as they are only equivalent
+ # to themselves.
#
# @return [Boolean]
#
- def normal?
+ def canonical?
if from_name < to_name
return true
elsif from_name > to_name
return false
else
- overlap_s = overlap.to_s
- reverse_overlap_s = reverse_overlap.to_s
- if overlap_s < reverse_overlap_s
- return true
- elsif overlap_s > reverse_overlap_s
- return false
- else
- return [from_orient, to_orient].include?(:+)
- end
+ return [from_orient, to_orient].include?(:+)
end
end
- # Returns the unchanged link if the link is normal,
- # otherwise reverses the link and returns it.
+ # Returns the unchanged link if the link is canonical,
+ # otherwise complements the link and returns it.
#
# @note The path references are not corrected by this method; therefore
# the method shall be used before the link is embedded in a graph.
#
# @return [RGFA::Line::Link] self
- def normalize!
- reverse! if !normal?
+ def canonicize!
+ complement! if !canonical?
end
- # Creates a link with both strands of the sequences inverted.
+ # Creates the equivalent link with from/to inverted.
+ #
# The CIGAR operations (order/type) are inverted as well.
# Optional fields are left unchanged.
#
- # @note The path references are not copied to the reverse link.
+ # @note The path references are not copied to the complement link.
#
# @note This method shall be overridden if custom optional fields
- # are defined, which have a ``reverse'' operation which determines
- # their value in the equivalent but reverse link.
+ # are defined, which have a ``complementation'' operation which determines
+ # their value in the equivalent complement link.
#
# @return [RGFA::Line::Link] the inverted link.
- def reverse
+ def complement
l = self.clone
l.from = to
l.from_orient = (to_orient == :+ ? :- : :+)
l.to = from
l.to_orient = (from_orient == :+ ? :- : :+)
- l.overlap = reverse_overlap
+ l.overlap = complement_overlap
l
end
- # Reverses the link inplace, i.e. sets:
+ # Complements the link inplace, i.e. sets:
# from = to
# from_orient = other_orient(to_orient)
# to = from
# to_orient = other_orient(from_orient)
- # overlap = reverse_overlap.
+ # overlap = complement_overlap.
#
# The optional fields are left unchanged.
#
- # @note The path references are not reversed by this method; therefore
+ # @note The path references are not complemented by this method; therefore
# the method shall be used before the link is embedded in a graph.
#
# @note This method shall be overridden if custom optional fields
- # are defined, which have a ``reverse'' operation which determines
- # their value in the equivalent but reverse link.
+ # are defined, which have a ``complementation'' operation which determines
+ # their value in the complement link.
#
# @return [RGFA::Line::Link] self
- def reverse!
+ def complement!
tmp = self.from
self.from = self.to
self.to = tmp
tmp = self.from_orient
self.from_orient = (self.to_orient == :+) ? :- : :+
self.to_orient = (tmp == :+) ? :- : :+
- self.overlap = self.reverse_overlap
+ self.overlap = self.complement_overlap
return self
end
@@ -224,7 +215,7 @@ class RGFA::Line::Link < RGFA::Line
#
# Otherwise, an array of tuples path/boolean is returned.
# The boolean value tells
- # if the link is used in direct (true) or reverse direction (false)
+ # if the link is used (true) or its complement (false)
# in the path.
# @return [Array<Array<(RGFA::Line::Path, Boolean)>>]
def paths
@@ -235,8 +226,8 @@ class RGFA::Line::Link < RGFA::Line
# Compute the overlap when the strand of both sequences is inverted.
#
# @return [RGFA::CIGAR]
- def reverse_overlap
- self.overlap.reverse
+ def complement_overlap
+ self.overlap.to_cigar.complement
end
#
@@ -244,23 +235,23 @@ class RGFA::Line::Link < RGFA::Line
# Thereby, optional fields are not considered.
#
# @note Inverting the strand of both links and reversing
- # the CIGAR operations (order/type), one obtains a
- # reverse but equivalent link.
+ # the CIGAR operations (order/type), one obtains an
+ # equivalent complement link.
#
# @param other [RGFA::Line::Link] a link
# @return [Boolean] are self and other equivalent?
# @see #==
# @see #same?
- # @see #reverse?
+ # @see #complement?
def eql?(other)
- same?(other) or reverse?(other)
+ same?(other) or complement?(other)
end
# Compares the optional fields of two links.
#
# @note This method shall be overridden if custom optional fields
- # are defined, which have a ``reverse'' operation which determines
- # their value in the equivalent but reverse link.
+ # are defined, which have a ``complementation'' operation which determines
+ # their value in the equivalent but complement link.
#
# @param other [RGFA::Line::Link] a link
# @return [Boolean] are self and other equivalent?
@@ -291,7 +282,7 @@ class RGFA::Line::Link < RGFA::Line
# @param other [RGFA::Line::Link] a link
# @return [Boolean] are self and other equivalent?
# @see #eql?
- # @see #reverse?
+ # @see #complement?
# @see #==
def same?(other)
(from_end == other.from_end and
@@ -299,37 +290,37 @@ class RGFA::Line::Link < RGFA::Line
overlap == other.overlap)
end
- # Compares the reverse of the link to another link
+ # Compares the link to the complement of another link
# and determine their equivalence.
# Thereby, optional fields are not considered.
#
# @param other [RGFA::Line::Link] the other link
- # @return [Boolean] are the reverse of self and other equivalent?
+ # @return [Boolean] are self and the complement of other equivalent?
# @see #eql?
# @see #same?
# @see #==
- def reverse?(other)
+ def complement?(other)
(from_end == other.to_end and
to_end == other.from_end and
- overlap == other.reverse_overlap)
+ overlap == other.complement_overlap)
end
# Computes an hash for including a link in an Hash tables,
- # so that the hash of a link and its reverse is the same.
+ # so that the hash of a link and its complement is the same.
# Thereby, optional fields are not considered.
# @see #eql?
def hash
- from_end.hash + to_end.hash + overlap.hash + reverse_overlap.to_s.hash
+ from_end.hash + to_end.hash + overlap.hash + complement_overlap.to_s.hash
end
- # Compares a link and optionally the reverse link,
+ # Compares a link and optionally the complement link,
# with two oriented_segments and optionally an overlap.
# @param [RGFA::OrientedSegment] other_oriented_from
# @param [RGFA::OrientedSegment] other_oriented_to
- # @param equivalent [Boolean] shall the reverse link also be considered?
+ # @param equivalent [Boolean] shall the complement link also be considered?
# @param [RGFA::CIGAR] other_overlap compared only if not empty
# @return [Boolean] does the link or, if +equivalent+,
- # the reverse link go from the first
+ # the complement link go from the first
# oriented segment to the second with an overlap equal to the provided one
# (if not empty)?
def compatible?(other_oriented_from, other_oriented_to, other_overlap = [],
@@ -340,7 +331,7 @@ class RGFA::Line::Link < RGFA::Line
if is_direct
return true
elsif equivalent
- return compatible_reverse?(other_oriented_from, other_oriented_to,
+ return compatible_complement?(other_oriented_from, other_oriented_to,
other_overlap)
else
return false
@@ -361,15 +352,15 @@ class RGFA::Line::Link < RGFA::Line
(overlap.empty? or other_overlap.empty? or (overlap == other_overlap))
end
- # Compares the reverse link with two oriented segments and optionally an
+ # Compares the complement link with two oriented segments and optionally an
# overlap.
# @param [RGFA::OrientedSegment] other_oriented_from
# @param [RGFA::OrientedSegment] other_oriented_to
# @param [RGFA::CIGAR] other_overlap compared only if not empty
- # @return [Boolean] does the reverse link go from the first
+ # @return [Boolean] does the complement link go from the first
# oriented segment to the second with an overlap equal to the provided one
# (if not empty)?
- def compatible_reverse?(other_oriented_from, other_oriented_to,
+ def compatible_complement?(other_oriented_from, other_oriented_to,
other_overlap = [])
(oriented_to == other_oriented_from.invert_orient and
oriented_from == other_oriented_to.invert_orient) and
diff --git a/lib/rgfa/line/path.rb b/lib/rgfa/line/path.rb
index 9ae9c5f..dc25904 100644
--- a/lib/rgfa/line/path.rb
+++ b/lib/rgfa/line/path.rb
@@ -2,12 +2,12 @@
class RGFA::Line::Path < RGFA::Line
RECORD_TYPE = :P
- REQFIELDS = [:path_name, :segment_names, :cigars]
+ REQFIELDS = [:path_name, :segment_names, :overlaps]
PREDEFINED_OPTFIELDS = []
DATATYPE = {
:path_name => :lbl,
:segment_names => :lbs,
- :cigars => :cgs,
+ :overlaps => :cgs,
}
define_field_methods!
@@ -30,7 +30,7 @@ class RGFA::Line::Path < RGFA::Line
# equal to the number of segments.
# @return [Boolean]
def circular?
- self.cigars.size == self.segment_names.size
+ self.overlaps.size == self.segment_names.size
end
# Is the path linear? This is the case when the number of CIGARs
@@ -40,11 +40,11 @@ class RGFA::Line::Path < RGFA::Line
!circular?
end
- # Are the cigars a single "*"? This is a compact representation of
+ # Are the overlaps a single "*"? This is a compact representation of
# a linear path where all CIGARs are "*"
# @return [Boolean]
- def undef_cigars?
- self.cigars.size == 1 and self.cigars[0].empty?
+ def undef_overlaps?
+ self.overlaps.size == 1 and self.overlaps[0].empty?
end
# The links to which the path refers; it can be an empty array
@@ -62,14 +62,14 @@ class RGFA::Line::Path < RGFA::Line
# an array, which elements are 3-tuples (from oriented segment,
# to oriented segment, cigar)
def required_links
- has_undef_cigars = self.undef_cigars?
+ has_undef_overlaps = self.undef_overlaps?
retval = []
self.segment_names.size.times do |i|
j = i+1
if j == self.segment_names.size
circular? ? j = 0 : break
end
- cigar = has_undef_cigars ? [] : self.cigars[i]
+ cigar = has_undef_overlaps ? [] : self.overlaps[i]
retval << [self.segment_names[i], self.segment_names[j], cigar]
end
retval
@@ -78,20 +78,20 @@ class RGFA::Line::Path < RGFA::Line
private
def validate_lists_size!
- n_cigars = self.cigars.size
+ n_overlaps = self.overlaps.size
n_segments = self.segment_names.size
- if n_cigars == n_segments - 1
+ if n_overlaps == n_segments - 1
# case 1: linear path
return true
- elsif n_cigars == 1 and self.cigars[0].empty?
- # case 2: linear path, single "*" to represent cigars which are all "*"
+ elsif n_overlaps == 1 and self.overlaps[0].empty?
+ # case 2: linear path, single "*" to represent overlaps which are all "*"
return true
- elsif n_cigars == n_segments
+ elsif n_overlaps == n_segments
# case 3: circular path
else
raise RGFA::Line::Path::ListLengthsError,
"Path has #{n_segments} oriented segments, "+
- "but #{n_cigars} CIGARs"
+ "but #{n_overlaps} overlaps"
end
end
@@ -102,5 +102,5 @@ class RGFA::Line::Path < RGFA::Line
end
-# Error raised if number of segments and cigars are not consistent
+# Error raised if number of segments and overlaps are not consistent
class RGFA::Line::Path::ListLengthsError < RGFA::Error; end
diff --git a/lib/rgfa/line/segment.rb b/lib/rgfa/line/segment.rb
index 1b59226..dc3deda 100644
--- a/lib/rgfa/line/segment.rb
+++ b/lib/rgfa/line/segment.rb
@@ -3,14 +3,16 @@ class RGFA::Line::Segment < RGFA::Line
RECORD_TYPE = :S
REQFIELDS = [:name, :sequence]
- PREDEFINED_OPTFIELDS = [:LN, :RC, :FC, :KC]
+ PREDEFINED_OPTFIELDS = [:LN, :RC, :FC, :KC, :SH, :UR]
DATATYPE = {
:name => :lbl,
:sequence => :seq,
:LN => :i,
:RC => :i,
:FC => :i,
- :KC => :i
+ :KC => :i,
+ :SH => :H,
+ :UR => :Z
}
define_field_methods!
diff --git a/lib/rgfa/lines.rb b/lib/rgfa/lines.rb
index 8a719f8..4cbd172 100644
--- a/lib/rgfa/lines.rb
+++ b/lib/rgfa/lines.rb
@@ -28,6 +28,9 @@ module RGFA::Lines
add_containment(gfa_line)
when :P
add_path(gfa_line)
+ when :"#"
+ # do nothing, as the spec says these shall be ignored
+ # maybe we want to store them and output them again in a future version
else
raise # this never happens, as already catched by gfa_line init
end
diff --git a/lib/rgfa/links.rb b/lib/rgfa/links.rb
index 824b75f..28b4962 100644
--- a/lib/rgfa/links.rb
+++ b/lib/rgfa/links.rb
@@ -7,7 +7,7 @@ module RGFA::Links
def add_link(gfa_line)
gfa_line = gfa_line.to_rgfa_line(validate: @validate)
- gfa_line.normalize!
+ gfa_line.canonicize!
l = nil
if segment(gfa_line.from) and segment(gfa_line.to)
l = link_from_to(gfa_line.oriented_from,
diff --git a/lib/rgfa/segment_ends_path.rb b/lib/rgfa/segment_ends_path.rb
index d06dd2b..4871836 100644
--- a/lib/rgfa/segment_ends_path.rb
+++ b/lib/rgfa/segment_ends_path.rb
@@ -1,6 +1,8 @@
# An array containing {RGFA::SegmentEnd} elements, which defines a path
# in the graph
class RGFA::SegmentEndsPath < Array
+ # Create a reverse direction path
+ # @return [RGFA::SegmentEndsPath]
def reverse
super.map {|segment_end| segment_end.to_segment_end.invert_end_type}
end
diff --git a/pdfdoc/cover.html b/pdfdoc/cover.html.erb
similarity index 87%
rename from pdfdoc/cover.html
rename to pdfdoc/cover.html.erb
index 1a8d244..fe53daa 100644
--- a/pdfdoc/cover.html
+++ b/pdfdoc/cover.html.erb
@@ -6,7 +6,7 @@
<div class="page-middle-inner">
<h3>Giorgio Gonnella</h3>
<h1>RGFA library - API documentation</h1>
- <h2>Version 1.1</h2>
+ <h2>Version <%=$rgfaversion%> </h2>
</div>
</div>
</div>
diff --git a/rgfa.gemspec b/rgfa.gemspec
index e15ecf3..3efe4dd 100644
--- a/rgfa.gemspec
+++ b/rgfa.gemspec
@@ -1,7 +1,7 @@
Gem::Specification.new do |s|
s.name = 'rgfa'
- s.version = '1.2'
- s.date = '2016-09-20'
+ s.version = '1.3'
+ s.date = '2016-09-26'
s.summary = 'Parse, edit and write GFA-format graphs in Ruby'
s.description = <<-EOF
The Graphical Fragment Assembly (GFA) is a proposed format which allow
@@ -57,10 +57,10 @@ Gem::Specification.new do |s|
'lib/rgfatools/superfluous_links.rb',
'lib/rgfatools/linear_paths.rb',
'lib/rgfatools/p_bubbles.rb',
- 'bin/gfadiff.rb',
- 'bin/rgfa-mergelinear.rb',
- 'bin/rgfa-simdebruijn.rb',
- 'bin/rgfa-findcrisprs.rb',
+ 'bin/gfadiff',
+ 'bin/rgfa-mergelinear',
+ 'bin/rgfa-simdebruijn',
+ 'bin/rgfa-findcrisprs',
]
s.homepage = 'http://github.com/ggonnella/rgfa'
s.license = 'CC-BY-SA'
diff --git a/test/test_rgfa_line_comment.rb b/test/test_rgfa_line_comment.rb
new file mode 100644
index 0000000..e365358
--- /dev/null
+++ b/test/test_rgfa_line_comment.rb
@@ -0,0 +1,13 @@
+require_relative "../lib/rgfa.rb"
+require "test/unit"
+
+class TestRGFALineComment < Test::Unit::TestCase
+
+ def test_from_string
+ str = "#this is a comment"
+ l = str.to_rgfa_line
+ assert_equal(RGFA::Line::Comment, l.class)
+ assert_equal(str[1..-1], l.content)
+ end
+
+end
diff --git a/test/test_rgfa_line_creators.rb b/test/test_rgfa_line_creators.rb
index 4fd61f6..05c19d9 100644
--- a/test/test_rgfa_line_creators.rb
+++ b/test/test_rgfa_line_creators.rb
@@ -10,6 +10,16 @@ class TestRGFALineCreators < Test::Unit::TestCase
assert_equal([h], gfa.headers.map(&:to_s))
end
+ def test_add_comments
+ gfa = RGFA.new
+ c1 = "#this is a comment"
+ c2 = "# this is also a comment"
+ c3 = "#and \tthis too!"
+ assert_nothing_raised { gfa << c1 }
+ assert_nothing_raised { gfa << c2 }
+ assert_nothing_raised { gfa << c3 }
+ end
+
def test_add_segments
gfa = RGFA.new
s1 = "S\t1\t*".to_rgfa_line
diff --git a/test/test_rgfa_line_path.rb b/test/test_rgfa_line_path.rb
index beb8ee9..772c22b 100644
--- a/test/test_rgfa_line_path.rb
+++ b/test/test_rgfa_line_path.rb
@@ -17,7 +17,7 @@ class TestRGFALinePath < Test::Unit::TestCase
RGFA::CIGAR::Operation.new(3,:D),
RGFA::CIGAR::Operation.new(1,:M)],
[RGFA::CIGAR::Operation.new(12,:M)]],
- str.to_rgfa_line.cigars)
+ str.to_rgfa_line.overlaps)
assert_equal("abcd", str.to_rgfa_line.ab)
assert_raises(RGFA::FieldParser::FormatError) { (str+"\tH1").to_rgfa_line }
assert_raises(RGFA::Line::RequiredFieldMissingError) { "P\tH".to_rgfa_line }
diff --git a/test/test_rgfa_line_segment.rb b/test/test_rgfa_line_segment.rb
index 1c21888..a0b186c 100644
--- a/test/test_rgfa_line_segment.rb
+++ b/test/test_rgfa_line_segment.rb
@@ -35,6 +35,18 @@ class TestRGFALineSegment < Test::Unit::TestCase
assert_nothing_raised { f.join("\t").to_rgfa_line }
end
+ def test_forbidden_segment_names
+ assert_nothing_raised { "S\tA+B\t*".to_rgfa_line }
+ assert_nothing_raised { "S\tA-B\t*".to_rgfa_line }
+ assert_nothing_raised { "S\tA,B\t*".to_rgfa_line }
+ assert_raises(RGFA::FieldParser::FormatError) do
+ "S\tA+,B\t*".to_rgfa_line
+ end
+ assert_raises(RGFA::FieldParser::FormatError) do
+ "S\tA-,B\t*".to_rgfa_line
+ end
+ end
+
def test_coverage
l = "S\t0\t*\tRC:i:600\tLN:i:100".to_rgfa_line
assert_equal(6, l.coverage)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ruby-rgfa.git
More information about the debian-med-commit
mailing list