[med-svn] [Git][med-team/tantan][upstream] New upstream version 26

Wed Jun 2 20:09:16 BST 2021


Nilesh Patra pushed to branch upstream at Debian Med / tantan


Commits:
3505195e by Nilesh Patra at 2021-06-03T00:20:23+05:30
New upstream version 26
- - - - -


12 changed files:

- + .gitattributes
- − ChangeLog.txt
- Makefile
- − README.html
- README.txt → README.rst
- src/LambdaCalculator.cc
- src/LambdaCalculator.hh
- src/Makefile
- + src/cbrc_linalg.cc
- + src/cbrc_linalg.hh
- src/mcf_tantan_options.cc
- − src/version.hh


Changes:

=====================================
.gitattributes
=====================================
@@ -0,0 +1 @@
+src/Makefile export-subst


=====================================
ChangeLog.txt deleted
=====================================
@@ -1,140 +0,0 @@
-2020-05-07  Martin C. Frith  <Martin C. Frith>
-
-	* src/tantan.cc:
-	Make it faster
-	[6b7981c6d602] [tip]
-
-2018-12-19  Martin C. Frith  <Martin C. Frith>
-
-	* src/tantan.cc:
-	Make it faster
-	[3523060bcfb9]
-
-	* src/tantan_repeat_finder.cc, src/tantan_repeat_finder.hh:
-	Make -f4 a bit faster
-	[1fccdf6e21be]
-
-	* README.txt, src/mcf_tantan_options.cc, src/mcf_tantan_options.hh,
-	src/tantan_app.cc, src/tantan_repeat_finder.cc,
-	src/tantan_repeat_finder.hh, test/hard.fa, test/tantan_test.out,
-	test/tantan_test.sh:
-	Add option to find straightforward tandem repeats
-	[1bae60144712]
-
-	* README.txt, src/mcf_tantan_options.cc, src/tantan.cc,
-	src/tantan_app.cc:
-	Tweak the help message
-	[fc1ca32a72aa]
-
-2018-12-10  Martin C. Frith  <Martin C. Frith>
-
-	* README.txt, src/mcf_tantan_options.cc, src/mcf_tantan_options.hh,
-	src/tantan_app.cc, test/tantan_test.out, test/tantan_test.sh:
-	Add match score, mismatch cost options
-	[2383404c795a]
-
-	* src/tantan.cc:
-	Refactor
-	[d61af119db30]
-
-	* src/tantan.cc:
-	Refactor
-	[81bdfe23217d]
-
-	* src/CA_code/lambda_calculator.c, src/CA_code/lambda_calculator.h,
-	src/CA_code/lubksb.c, src/CA_code/ludcmp.c, src/CA_code/nrutil.c,
-	src/CA_code/nrutil.h, src/LambdaCalculator.cc,
-	src/LambdaCalculator.hh, src/Makefile,
-	src/mcf_score_matrix_probs.cc, src/mcf_score_matrix_probs.hh,
-	src/tantan_app.cc:
-	Use Konta-san's code for matrix lambda
-	[fccc8e5e9c1b]
-
-	* src/tantan.cc, src/tantan_app.cc, test/tantan_test.sh:
-	Refactor
-	[f9a9da99553d]
-
-2012-10-16  Martin C. Frith  <Martin C. Frith>
-
-	* README.txt:
-	Expanded the documentation: installation & FAQ
-	[946a951b1a06]
-
-	* src/CA_code/nrutil.c:
-	Just fixed a compiler warning
-	[fa79ce69a581]
-
-2012-08-08  Martin C. Frith  <Martin C. Frith>
-
-	* Makefile:
-	Avoid getting extra, unwanted files in the zip archive
-	[d1d55af0e693]
-
-	* test/tantan_test.sh:
-	Made the test script less likely to use an unexpected version of
-	tantan
-	[894f9fc63c3e]
-
-	* Makefile, README.txt:
-	Added "make install", and HTML-ization of the README
-	[674f2f7da185]
-
-	* src/Makefile:
-	Made the Makefile check the version number
-	[ea0be7d72912]
-
-	* README.txt:
-	Converted the README to reStructuredText
-	[25b631ab86b4]
-
-	* src/Makefile:
-	Generalized the Makefile
-	[9c324ee14279]
-
-2011-07-07  Martin C. Frith  <Martin C. Frith>
-
-	* README.txt:
-	Just changed the documentation a bit
-	[2d6dfe34fe33]
-
-2011-03-22  Martin C. Frith  <Martin C. Frith>
-
-	* README.txt, src/mcf_tantan_options.cc, src/mcf_tantan_options.hh,
-	src/tantan_app.cc:
-	Added BED output option. Clarified the README (thanks: Joe Ryan).
-	[db4af8e37a20]
-
-2011-01-07  Martin C. Frith  <Martin C. Frith>
-
-	* README.txt, src/mcf_fasta_sequence.cc, src/mcf_fasta_sequence.hh,
-	test/panda.fastq, test/tantan_test.out, test/tantan_test.sh:
-	Made tantan work on fastq format, and improved the README
-	[e7d454a5982d]
-
-2010-11-16  Martin C. Frith  <Martin C. Frith>
-
-	* README.txt, src/Makefile, src/mcf_alphabet.cc, src/mcf_alphabet.hh,
-	src/mcf_tantan_options.cc:
-	Fixed compile error with g++ 4.0.1
-	[97a2b94e5835]
-
-2010-10-13  Martin C. Frith  <Martin C. Frith>
-
-	* src/tantan_app.cc:
-	Boost the version number to 1
-	[c5a20b01a563]
-
-	* COPYING.txt, README.txt, src/CA_code/lambda_calculator.c,
-	src/CA_code/lambda_calculator.h, src/CA_code/lubksb.c,
-	src/CA_code/ludcmp.c, src/CA_code/nrutil.c, src/CA_code/nrutil.h,
-	src/Makefile, src/mcf_alphabet.cc, src/mcf_alphabet.hh,
-	src/mcf_fasta_sequence.cc, src/mcf_fasta_sequence.hh,
-	src/mcf_score_matrix.cc, src/mcf_score_matrix.hh,
-	src/mcf_score_matrix_probs.cc, src/mcf_score_matrix_probs.hh,
-	src/mcf_tantan_options.cc, src/mcf_tantan_options.hh,
-	src/mcf_util.cc, src/mcf_util.hh, src/tantan.cc, src/tantan.hh,
-	src/tantan_app.cc, test/atMask.mat, test/hg19_chrM.fa,
-	test/tantan_test.out, test/tantan_test.sh, test/titin_human.fa:
-	First commit
-	[4f9b0ba31588]
-


=====================================
Makefile
=====================================
@@ -12,14 +12,5 @@ install: all
 clean:
 	@cd src && ${MAKE} clean
 
-README.html: README.txt
-	rst2html README.txt > README.html
-
-log:
-	hg log --style changelog > ChangeLog.txt
-
-distdir = tantan-`hg id -n`
-dist: README.html log
-	@cd src && ${MAKE} version.hh
-	rsync -rC --exclude tantan src test Makefile *.txt README.html ${distdir}
-	zip -qrm ${distdir} ${distdir}
+tag:
+	git tag -m "" `git rev-list HEAD^ | grep -c .`


=====================================
README.html deleted
=====================================
@@ -1,555 +0,0 @@
-<?xml version="1.0" encoding="utf-8" ?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-<meta name="generator" content="Docutils 0.6: http://docutils.sourceforge.net/" />
-<title>tantan</title>
-<style type="text/css">
-
-/*
-:Author: David Goodger (goodger at python.org)
-:Id: $Id: html4css1.css 5951 2009-05-18 18:03:10Z milde $
-:Copyright: This stylesheet has been placed in the public domain.
-
-Default cascading style sheet for the HTML output of Docutils.
-
-See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to
-customize this style sheet.
-*/
-
-/* used to remove borders from tables and images */
-.borderless, table.borderless td, table.borderless th {
-  border: 0 }
-
-table.borderless td, table.borderless th {
-  /* Override padding for "table.docutils td" with "! important".
-     The right padding separates the table cells. */
-  padding: 0 0.5em 0 0 ! important }
-
-.first {
-  /* Override more specific margin styles with "! important". */
-  margin-top: 0 ! important }
-
-.last, .with-subtitle {
-  margin-bottom: 0 ! important }
-
-.hidden {
-  display: none }
-
-a.toc-backref {
-  text-decoration: none ;
-  color: black }
-
-blockquote.epigraph {
-  margin: 2em 5em ; }
-
-dl.docutils dd {
-  margin-bottom: 0.5em }
-
-/* Uncomment (and remove this text!) to get bold-faced definition list terms
-dl.docutils dt {
-  font-weight: bold }
-*/
-
-div.abstract {
-  margin: 2em 5em }
-
-div.abstract p.topic-title {
-  font-weight: bold ;
-  text-align: center }
-
-div.admonition, div.attention, div.caution, div.danger, div.error,
-div.hint, div.important, div.note, div.tip, div.warning {
-  margin: 2em ;
-  border: medium outset ;
-  padding: 1em }
-
-div.admonition p.admonition-title, div.hint p.admonition-title,
-div.important p.admonition-title, div.note p.admonition-title,
-div.tip p.admonition-title {
-  font-weight: bold ;
-  font-family: sans-serif }
-
-div.attention p.admonition-title, div.caution p.admonition-title,
-div.danger p.admonition-title, div.error p.admonition-title,
-div.warning p.admonition-title {
-  color: red ;
-  font-weight: bold ;
-  font-family: sans-serif }
-
-/* Uncomment (and remove this text!) to get reduced vertical space in
-   compound paragraphs.
-div.compound .compound-first, div.compound .compound-middle {
-  margin-bottom: 0.5em }
-
-div.compound .compound-last, div.compound .compound-middle {
-  margin-top: 0.5em }
-*/
-
-div.dedication {
-  margin: 2em 5em ;
-  text-align: center ;
-  font-style: italic }
-
-div.dedication p.topic-title {
-  font-weight: bold ;
-  font-style: normal }
-
-div.figure {
-  margin-left: 2em ;
-  margin-right: 2em }
-
-div.footer, div.header {
-  clear: both;
-  font-size: smaller }
-
-div.line-block {
-  display: block ;
-  margin-top: 1em ;
-  margin-bottom: 1em }
-
-div.line-block div.line-block {
-  margin-top: 0 ;
-  margin-bottom: 0 ;
-  margin-left: 1.5em }
-
-div.sidebar {
-  margin: 0 0 0.5em 1em ;
-  border: medium outset ;
-  padding: 1em ;
-  background-color: #ffffee ;
-  width: 40% ;
-  float: right ;
-  clear: right }
-
-div.sidebar p.rubric {
-  font-family: sans-serif ;
-  font-size: medium }
-
-div.system-messages {
-  margin: 5em }
-
-div.system-messages h1 {
-  color: red }
-
-div.system-message {
-  border: medium outset ;
-  padding: 1em }
-
-div.system-message p.system-message-title {
-  color: red ;
-  font-weight: bold }
-
-div.topic {
-  margin: 2em }
-
-h1.section-subtitle, h2.section-subtitle, h3.section-subtitle,
-h4.section-subtitle, h5.section-subtitle, h6.section-subtitle {
-  margin-top: 0.4em }
-
-h1.title {
-  text-align: center }
-
-h2.subtitle {
-  text-align: center }
-
-hr.docutils {
-  width: 75% }
-
-img.align-left, .figure.align-left{
-  clear: left ;
-  float: left ;
-  margin-right: 1em }
-
-img.align-right, .figure.align-right {
-  clear: right ;
-  float: right ;
-  margin-left: 1em }
-
-.align-left {
-  text-align: left }
-
-.align-center {
-  clear: both ;
-  text-align: center }
-
-.align-right {
-  text-align: right }
-
-/* reset inner alignment in figures */
-div.align-right {
-  text-align: left }
-
-/* div.align-center * { */
-/*   text-align: left } */
-
-ol.simple, ul.simple {
-  margin-bottom: 1em }
-
-ol.arabic {
-  list-style: decimal }
-
-ol.loweralpha {
-  list-style: lower-alpha }
-
-ol.upperalpha {
-  list-style: upper-alpha }
-
-ol.lowerroman {
-  list-style: lower-roman }
-
-ol.upperroman {
-  list-style: upper-roman }
-
-p.attribution {
-  text-align: right ;
-  margin-left: 50% }
-
-p.caption {
-  font-style: italic }
-
-p.credits {
-  font-style: italic ;
-  font-size: smaller }
-
-p.label {
-  white-space: nowrap }
-
-p.rubric {
-  font-weight: bold ;
-  font-size: larger ;
-  color: maroon ;
-  text-align: center }
-
-p.sidebar-title {
-  font-family: sans-serif ;
-  font-weight: bold ;
-  font-size: larger }
-
-p.sidebar-subtitle {
-  font-family: sans-serif ;
-  font-weight: bold }
-
-p.topic-title {
-  font-weight: bold }
-
-pre.address {
-  margin-bottom: 0 ;
-  margin-top: 0 ;
-  font: inherit }
-
-pre.literal-block, pre.doctest-block {
-  margin-left: 2em ;
-  margin-right: 2em }
-
-span.classifier {
-  font-family: sans-serif ;
-  font-style: oblique }
-
-span.classifier-delimiter {
-  font-family: sans-serif ;
-  font-weight: bold }
-
-span.interpreted {
-  font-family: sans-serif }
-
-span.option {
-  white-space: nowrap }
-
-span.pre {
-  white-space: pre }
-
-span.problematic {
-  color: red }
-
-span.section-subtitle {
-  /* font-size relative to parent (h1..h6 element) */
-  font-size: 80% }
-
-table.citation {
-  border-left: solid 1px gray;
-  margin-left: 1px }
-
-table.docinfo {
-  margin: 2em 4em }
-
-table.docutils {
-  margin-top: 0.5em ;
-  margin-bottom: 0.5em }
-
-table.footnote {
-  border-left: solid 1px black;
-  margin-left: 1px }
-
-table.docutils td, table.docutils th,
-table.docinfo td, table.docinfo th {
-  padding-left: 0.5em ;
-  padding-right: 0.5em ;
-  vertical-align: top }
-
-table.docutils th.field-name, table.docinfo th.docinfo-name {
-  font-weight: bold ;
-  text-align: left ;
-  white-space: nowrap ;
-  padding-left: 0 }
-
-h1 tt.docutils, h2 tt.docutils, h3 tt.docutils,
-h4 tt.docutils, h5 tt.docutils, h6 tt.docutils {
-  font-size: 100% }
-
-ul.auto-toc {
-  list-style-type: none }
-
-</style>
-</head>
-<body>
-<div class="document" id="tantan">
-<h1 class="title">tantan</h1>
-
-<div class="section" id="introduction">
-<h1>Introduction</h1>
-<p>tantan is a tool for masking simple regions (low complexity and
-short-period tandem repeats) in biological sequences.</p>
-<p>The aim of tantan is to prevent false predictions when searching for
-homologous regions between two sequences.  Simple repeats often align
-strongly to each other, causing false homology predictions.</p>
-</div>
-<div class="section" id="setup">
-<h1>Setup</h1>
-<p>You need to have a C++ compiler.  On Linux, you might need to install
-a package called "g++".  On Mac, you might need to install
-command-line developer tools.  On Windows, you might need to install
-Cygwin.</p>
-<p>Using the command line, go into the tantan directory.  To compile it,
-type:</p>
-<pre class="literal-block">
-make
-</pre>
-<p>Optionally, copy tantan to a standard "bin" directory (using "sudo" to
-request administrator permissions):</p>
-<pre class="literal-block">
-sudo make install
-</pre>
-<p>Or copy it to your personal bin directory:</p>
-<pre class="literal-block">
-make install prefix=~
-</pre>
-<p>You might need to log out and back in for your computer to recognize
-the new program.</p>
-</div>
-<div class="section" id="normal-usage">
-<h1>Normal usage</h1>
-<ul>
-<li><p class="first">Suppose you have some nucleotide sequences (DNA or RNA) in a
-FASTA-format file called "ntseqs.fa".  You can identify simple
-repeats like this:</p>
-<pre class="literal-block">
-tantan ntseqs.fa > masked.fa
-</pre>
-<p>This will create a new FASTA file called "masked.fa" that replaces
-all masked regions with lowercase letters.  (tantan also works on
-FASTQ-format, though it does not use the quality data.)</p>
-</li>
-<li><p class="first">To mask proteins effectively, tantan needs to use different
-algorithm parameters than for nucleotides.  You have to tell it when
-you have proteins, using the "-p" option:</p>
-<pre class="literal-block">
-tantan -p aaseqs.fa > masked.fa
-</pre>
-<p>If you omit "-p" and the sequences look proteinaceous, tantan will
-print a warning message.</p>
-</li>
-</ul>
-</div>
-<div class="section" id="advanced-usage">
-<h1>Advanced usage</h1>
-<ul>
-<li><p class="first">By default, tantan indicates repetitive regions with lowercase
-letters.  You can make it replace repetitive letters with (say) "N"
-by using the "-x" option:</p>
-<pre class="literal-block">
-tantan -x N ntseqs.fa > masked.fa
-</pre>
-</li>
-<li><p class="first">By default, tantan does not preserve lowercase letters in the input
-sequences.  You can tell it to preserve them by using the "-c"
-option.  So the output will have the union of the lowercase in the
-input and the lowercase assigned by tantan:</p>
-<pre class="literal-block">
-tantan -c ntseqs.fa > masked.fa
-</pre>
-</li>
-<li><p class="first">tantan's masking rate is usually OK, but you can alter it by
-changing the "-r" parameter from its default value of 0.005.  Higher
-values increase the amount of masking, and lower values decrease it.
-This increases the masking rate:</p>
-<pre class="literal-block">
-tantan -r 0.02 ntseqs.fa > masked.fa
-</pre>
-</li>
-<li><p class="first">Finally, to mask extremely AT-rich DNA, you should change tantan's
-scoring matrix.  The "test" directory contains a matrix "atMask.mat"
-that works well for DNA with ~80% A+T, such as Plasmodium and
-Dictyostelium genomes.  We recommend masking such DNA like this:</p>
-<pre class="literal-block">
-tantan -m atMask.mat -r 0.01 atrich.fa > masked.fa
-</pre>
-</li>
-</ul>
-<p>The preceding examples cover all of tantan's options that you should
-ever need.</p>
-</div>
-<div class="section" id="recommendations-for-homology-search">
-<h1>Recommendations for homology search</h1>
-<ol class="arabic simple">
-<li>Mask <em>both</em> (sets of) sequences.</li>
-<li>If for some reason you wish to mask only one (set of) sequence(s),
-increase "-r" to 0.02 (0.05 for AT-rich DNA).</li>
-<li>For DNA-versus-protein alignment, increase "-r" for the proteins to
-0.02.  If for some reason you mask only one (set of) sequence(s),
-make sure it's the proteins.</li>
-<li>Some alignment tools exclude lowercase from their initial seeding
-phase, but treat lowercase identically to uppercase during their
-subsequent extension phase.  Unfortunately, this does not reliably
-prevent false homology predictions.  It is OK to re-align without
-masking after homology has been decided: FASTA and LAST can do
-this.</li>
-</ol>
-<p>For more information, please read the tantan publication (see below).</p>
-</div>
-<div class="section" id="faq">
-<h1>FAQ</h1>
-<div class="section" id="why-does-tantan-sometimes-mask-isolated-bases">
-<h2>Why does tantan sometimes mask isolated bases?</h2>
-<p>This happens when a region is borderline repetitive, and a single base
-creeps just over the threshold.  Don't worry about it, it's not a
-problem (at least for tantan's aim of preventing false homology).</p>
-</div>
-<div class="section" id="does-tantan-mask-functional-sequence">
-<h2>Does tantan mask functional sequence?</h2>
-<p>Of course.  Proteins and protein-coding exons can contain simple
-repeats.  Repeats can be functional.  If we want to avoid false
-homology we have to mask them.  Remember that tantan merely lowercases
-repeats, so it's easy to lift the masking after determining homology.</p>
-</div>
-</div>
-<div class="section" id="options">
-<h1>Options</h1>
-<table class="docutils option-list" frame="void" rules="none">
-<col class="option" />
-<col class="description" />
-<tbody valign="top">
-<tr><td class="option-group">
-<kbd><span class="option">-p</span></kbd></td>
-<td>interpret the sequences as proteins</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-x</span></kbd></td>
-<td>letter to use for masking, instead of lowercase</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-c</span></kbd></td>
-<td>preserve uppercase/lowercase in non-masked regions</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-m</span></kbd></td>
-<td>file for letter-pair score matrix</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-r</span></kbd></td>
-<td>probability of a repeat starting per position</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-e</span></kbd></td>
-<td>probability of a repeat ending per position</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-w</span></kbd></td>
-<td>maximum tandem repeat period to consider</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-d</span></kbd></td>
-<td>probability decay per period (period-(i+1) / period-i)</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-i</span></kbd></td>
-<td>match score</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-j</span></kbd></td>
-<td>mismatch cost</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-a</span></kbd></td>
-<td>gap existence cost</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-b</span></kbd></td>
-<td>gap extension cost</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-s</span></kbd></td>
-<td>minimum repeat probability for masking</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-n</span></kbd></td>
-<td>minimum copy number, affects -f4 only</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-f</span></kbd></td>
-<td>output type: 0=masked sequence, 1=repeat probabilities,
-2=repeat counts, 3=BED, 4=tandem repeats</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">-h</span>, <span class="option">--help</span></kbd></td>
-<td>show help message, then exit</td></tr>
-<tr><td class="option-group">
-<kbd><span class="option">--version</span></kbd></td>
-<td>show version information, then exit</td></tr>
-</tbody>
-</table>
-</div>
-<div class="section" id="advanced-issues">
-<h1>Advanced issues</h1>
-<p>When tantan masks tandem repeats, it tends to leave the first
-(left-most) repeat unit unmasked.  This sometimes allows us to find
-homologs we would otherwise miss:</p>
-<pre class="literal-block">
-TGCAAGCTA TTAGGCTTAGGTCAGTGC ttaagcttaggtcagtgc AACATA
-||| ||| | |||||||||||||||||| ||| |||||||||||||| ||| ||
-TGCTAGCAA TTAGGCTTAGGTCAGTGC ttaggcttaggtcagtgc AACGTA
-</pre>
-<p>However, there is a danger of non-equivalent repeat units being
-unmasked.  This happens especially if we mask DNA on one strand but
-align it on the other strand:</p>
-<pre class="literal-block">
-                   TGCAAGCTA TTAGGCTTAGGTCAGTGC ttaagcttaggtcagtgc AACATA
-                             ||||||||||||||||||
-TGCTAGCAA ttaggcttaggtcagtgc TTAGGCTTAGGTCAGTGC AACGTA
-</pre>
-<p>(My thanks to Junko Tsuji and Paul Horton for finding these issues.)</p>
-</div>
-<div class="section" id="finding-straightforward-tandem-repeats">
-<h1>Finding straightforward tandem repeats</h1>
-<p>Option -f4 runs tantan in a different mode, where it finds
-straightforward tandem repeats only.  (Technically, it uses a Viterbi
-algorithm instead of a Forward-Backward algorithm.)  This is <em>not</em>
-recommended for avoiding false homologs!  But it might be useful for
-studying tandem repeats.  The output looks like this:</p>
-<pre class="literal-block">
-mySeq   14765   14780   6       2.5     GTCATG  GTCATG,GTCATG,GTC
-mySeq   632362  632377  2       6       GC      GC,GC,GC,GCt,GCT,GCT
-mySeq   1278353 1278369 3       6.5     TCA     TCA,TCA,TCA,TC-,TC,TC,T
-mySeq   3616084 3616100 3       5.33333 TGG     TGA,TGA,TGG,TGG,TGG,T
-</pre>
-<p>The first 3 columns show the start and end coordinates of the
-repetitive region, in <a class="reference external" href="https://genome.ucsc.edu/FAQ/FAQformat.html#format1">BED</a> format.  Column
-4 shows the length of the repeating unit (which might vary due to
-insertions and deletions, so this column shows the most common
-length).  Column 5 shows the number of repeat units.  Column 6 shows
-the repeating unit (which again might vary, so this is just a
-representative).  Column 7 shows the whole repeat: lowercase letters
-are insertions relative to the previous repeat unit, and dashes are
-deletions relative to the previous repeat unit.</p>
-</div>
-<div class="section" id="miscellaneous">
-<h1>Miscellaneous</h1>
-<p>tantan is distributed under the GNU General Public License, either
-version 3 of the License, or (at your option) any later version.  For
-details, see COPYING.txt.</p>
-<p>If you use tantan in your research, please cite:
-"A new repeat-masking method enables specific detection of homologous
-sequences", MC Frith, Nucleic Acids Research 2011 39(4):e23.</p>
-<p>tantan's website is: <a class="reference external" href="http://www.cbrc.jp/tantan/">http://www.cbrc.jp/tantan/</a></p>
-<p>If you have any questions, comments, or problems concerning tantan,
-please email: tantan (ATmark) cbrc (dot) jp.</p>
-</div>
-</div>
-</body>
-</html>


=====================================
README.txt → README.rst
=====================================
@@ -208,8 +208,3 @@ details, see COPYING.txt.
 If you use tantan in your research, please cite:
 "A new repeat-masking method enables specific detection of homologous
 sequences", MC Frith, Nucleic Acids Research 2011 39(4):e23.
-
-tantan's website is: http://www.cbrc.jp/tantan/
-
-If you have any questions, comments, or problems concerning tantan,
-please email: tantan (ATmark) cbrc (dot) jp.


=====================================
src/LambdaCalculator.cc
=====================================
@@ -1,186 +1,29 @@
 // Copyright 2015 Yutaro Konta
 
 #include "LambdaCalculator.hh"
-#include <vector>
+#include "cbrc_linalg.hh"
 #include <cassert>
-#include <cstdio>  // sprintf
-#include <cstdlib>
 #include <cfloat>
 #include <cmath>
+#include <numeric>
 //#include <iostream>
 using namespace std;
 
-static double roundToFewDigits(double x)
+static bool calculate_inv_sum(double **matrix, int alpha_size, double tau, double* inv_sum, double **tmpMat, double *tmpVec)
 {
-  // This rounding fixes some inaccuracies, e.g. for DNA with a simple
-  // match/mismatch matrix it's likely to make all the probabilities
-  // exactly 0.25, as they should be.
-  char buffer[32];
-  sprintf(buffer, "%g", x);
-  return atof(buffer);
-}
-
-static double** makematrix(int m, int n, double val)
-{
-  double** mat = new double* [m];
-  for (int i=0; i<m; i++)
-    {
-      mat[i] = new double [n];
-      for (int j=0; j<n; j++)
-        mat[i][j] = val;
-    }
-  return mat;
-}
-
-static void deletematrix(double** a, int m)
-{
-  for (int i=0; i<m; i++)
-    delete[] a[i];
-  delete[] a;
-}
-
-static double summatrix(double** a, int m, int n)
-{
-  double s = 0;
-  for (int i=0; i<m; i++)
-    for (int j=0; j<n; j++)
-      s += a[i][j];
-  return s;
-}
-
-static int max_index(double** a, int n, int i)
-{
-  double max = -DBL_MAX;
-  int maxindex = -1;
-
-  for (int j=i; j<n; j++)
-    {
-      if (fabs(a[j][i]) > max)
-        {
-          max = fabs(a[j][i]);
-          maxindex = j;
-        }
-    }
-  return maxindex;
-}
-
-static void swap_matrix(double** a, int i, int j)
-{
-  double* v = a[i];
-  a[i] = a[j];
-  a[j] = v;
-}
-
-static void swap_vector(int* a, int i, int j)
-{
-  int v = a[i];
-  a[i] = a[j];
-  a[j] = v;
-}
-
-static bool lu_pivoting(double** a, int* idx, int n)
-{
-  int v;
-
-  for (int i=0; i<n; i++)
-    idx[i] = i;
-
-  for (int i=0; i<n; i++)
-    {
-      v = max_index(a, n, i);
-      assert(v >= 0);
-      if (fabs(a[v][i]) < 1e-10)
-        {
-          return false; // singular matrix!
-        }
-
-      swap_matrix(a, i, v);
-      swap_vector(idx, i, v);
-
-      a[i][i] = 1.0/a[i][i];
-      for (int j=i+1; j<n; j++)
-        {
-          a[j][i] = a[j][i] * a[i][i];
-          for (int k=i+1; k<n; k++)
-            a[j][k] = a[j][k] - a[j][i] * a[i][k];
-        }
-    }
-  return true;
-}
-
-static void solvep(double **a, double *x, double *b, int n)
-{
-  double *y = new double [n];
-
-  for (int i=0; i<n; i++)
-    {
-      y[i] = b[i];
-      for (int j=0; j<i; j++)
-        y[i] -= a[i][j] * y[j];
-    }
-
-  for (int i=n-1; i>=0; i--)
-    {
-      x[i] = y[i];
-      for (int j=i+1; j<n; j++)
-        x[i] -= a[i][j] * x[j];
-      x[i] *= a[i][i]; // needed because a[i][i] is inverted
-    }
-  delete[] y;
-}
-
-static void transpose(double **a, int n)
-{
-  double v;
-  for (int i=0; i<n; i++)
-    {
-      for (int j=0; j<i; j++)
-        {
-          v = a[i][j];
-          a[i][j] = a[j][i];
-          a[j][i] = v;
-        }
-    }
-}
-
-static bool invert(double **a, double **inv, int n)
-{
-  int* idx = new int [n];
-
-  double **e = makematrix(n,n,0);
-
-  if(!lu_pivoting(a, idx, n))
-    return false;
-
-  for (int i=0; i<n; i++)
-    e[idx[i]][i] = 1; // transposed
-
-  delete[] idx;
-
-  for (int i=0; i<n; i++)
-    solvep(a, inv[i], e[i], n);
-
-  deletematrix(e, n);
-  transpose(inv, n); // transpose inv to make the inverted matrix of a
-  return true;
-}
-
-static bool calculate_inv_sum(double **matrix, int alpha_size, double tau, double* inv_sum)
-{
-  double **m = makematrix(alpha_size, alpha_size, 0);
-  double **y = makematrix(alpha_size, alpha_size, 0);
-
   for (int i=0; i<alpha_size; i++)
     for (int j=0; j<alpha_size; j++)
-      m[i][j] = exp(tau * matrix[i][j]);
+      tmpMat[i][j] = exp(tau * matrix[i][j]);
 
-  if(!invert(m, y, alpha_size))
-    return false;
+  std::fill_n(tmpVec, alpha_size, 1.0);
 
-  *inv_sum = summatrix(y, alpha_size, alpha_size);
+  try {
+    cbrc::linalgSolve(tmpMat, tmpVec, alpha_size);
+  } catch(...) {
+    return false;
+  }
 
-  deletematrix(m, alpha_size);
-  deletematrix(y, alpha_size);
+  *inv_sum = std::accumulate(tmpVec, tmpVec + alpha_size, 0.0);
   return true;
 }
 
@@ -265,12 +108,21 @@ bool LambdaCalculator::binary_search(double** matrix, int alpha_size, double lb,
   double r_sum=0;
   int iter=0;
 
+  std::vector<double> tmpVals(alpha_size * alpha_size);
+  std::vector<double *> tmpPtrs(alpha_size);
+  for (int i = 0; i < alpha_size; ++i)
+    tmpPtrs[i] = &tmpVals[i * alpha_size];
+  double **tmpMat = &tmpPtrs[0];
+
+  double *tmpVec = &letprob2[0];
+
   while (iter < maxiter && (l>=r || (l_sum < 1.0 && r_sum < 1.0) || (l_sum > 1.0 && r_sum > 1.0)))
     {
       l = lb + (ub - lb)*rand()/RAND_MAX;
       r = lb + (ub - lb)*rand()/RAND_MAX;
 
-      if (!calculate_inv_sum(matrix, alpha_size, l, &l_sum) || !calculate_inv_sum(matrix, alpha_size, r, &r_sum))
+      if (!calculate_inv_sum(matrix, alpha_size, l, &l_sum, tmpMat, tmpVec) ||
+	  !calculate_inv_sum(matrix, alpha_size, r, &r_sum, tmpMat, tmpVec))
         {
           l = 0;
           r = 0;
@@ -285,7 +137,7 @@ bool LambdaCalculator::binary_search(double** matrix, int alpha_size, double lb,
     {
       double mid = (l + r)/2.0;
       double mid_sum;
-      if (!calculate_inv_sum(matrix, alpha_size, mid, &mid_sum))
+      if (!calculate_inv_sum(matrix, alpha_size, mid, &mid_sum, tmpMat, tmpVec))
         return false;
 
       if (fabs(mid_sum) >= DBL_MAX)
@@ -309,7 +161,7 @@ bool LambdaCalculator::binary_search(double** matrix, int alpha_size, double lb,
 
   if (fabs(l_sum - 1.0) < fabs(r_sum - 1.0))
     {
-      if (check_lambda(matrix, l, alpha_size, letprob1, letprob2))
+      if (check_lambda(matrix, l, alpha_size, letprob1, letprob2, tmpMat))
         {
           *lambda = l;
           return true;
@@ -317,7 +169,7 @@ bool LambdaCalculator::binary_search(double** matrix, int alpha_size, double lb,
       return false;
     }
 
-  if (check_lambda(matrix, r, alpha_size, letprob1, letprob2))
+  if (check_lambda(matrix, r, alpha_size, letprob1, letprob2, tmpMat))
     {
       *lambda = r;
       return true;
@@ -346,63 +198,62 @@ double LambdaCalculator::calculate_lambda(double** matrix, int alpha_size, vecto
   return lambda;
 }
 
-bool LambdaCalculator::check_lambda(double** matrix, double lambda, int alpha_size, vector<double>& letprob1, vector<double>& letprob2)
+bool LambdaCalculator::check_lambda(double** matrix, double lambda, int alpha_size, vector<double>& letprob1, vector<double>& letprob2, double** tmpMat)
 {
-  double **m = makematrix(alpha_size, alpha_size, 0);
-  double **y = makematrix(alpha_size, alpha_size, 0);
-
   for (int i=0; i<alpha_size; i++)
     for (int j=0; j<alpha_size; j++)
-      m[i][j] = exp(lambda * matrix[i][j]);
+      tmpMat[i][j] = exp(lambda * matrix[i][j]);
 
-  invert(m, y, alpha_size);
+  std::fill_n(&letprob2[0], alpha_size, 1.0);
+  cbrc::linalgSolve(tmpMat, &letprob2[0], alpha_size);
 
   for (int i=0; i<alpha_size; i++)
     {
-      double p = 0;
-      for (int j=0;j<alpha_size; j++)
-        p += y[i][j];
+      double p = letprob2[i];
       if (p < 0 || p > 1)
-        {
-          letprob2.clear();
-          return false;
-        }
-      letprob2.push_back(roundToFewDigits(p));
+	return false;
+      letprob2[i] = roundToFewDigits(p);
     }
 
+  for (int i=0; i<alpha_size; i++)
+    for (int j=0; j<alpha_size; j++)
+      tmpMat[i][j] = exp(lambda * matrix[j][i]);
+
+  std::fill_n(&letprob1[0], alpha_size, 1.0);
+  cbrc::linalgSolve(tmpMat, &letprob1[0], alpha_size);
+
   for (int j=0; j<alpha_size; j++)
     {
-      double q = 0;
-      for (int i=0; i<alpha_size; i++)
-        q += y[i][j];
+      double q = letprob1[j];
       if (q < 0 || q > 1)
-        {
-          letprob2.clear();
-          letprob1.clear();
-          return false;
-        }
-      letprob1.push_back(roundToFewDigits(q));
+	return false;
+      letprob1[j] = roundToFewDigits(q);
     }
 
-  deletematrix(m, alpha_size);
-  deletematrix(y, alpha_size);
-
   return true;
 }
 
 void LambdaCalculator::calculate(const const_int_ptr *matrix, int alphSize) {
   assert(alphSize >= 0);
-  setBad();
+  lambda_ = -1;
+  letterProbs1_.resize(alphSize);
+  letterProbs2_.resize(alphSize);
 
   int maxiter = 1000;
   int max_boundary_search_iter = 100;
   double lb_ratio = 1e-6;
 
-  double** mat = makematrix(alphSize, alphSize, 0);
+  std::vector<double> matVals(alphSize * alphSize);
+  std::vector<double *> matPtrs(alphSize);
+  for (int i = 0; i < alphSize; ++i)
+    matPtrs[i] = &matVals[i * alphSize];
+  double** mat = &matPtrs[0];
+
   for (int i=0; i<alphSize; i++)
     for (int j=0; j<alphSize; j++)
       mat[i][j] = matrix[i][j];
+
+  // xxx srand ?
   lambda_ = calculate_lambda(mat, alphSize, letterProbs1_, letterProbs2_, maxiter, max_boundary_search_iter, lb_ratio);
-  deletematrix(mat, alphSize);
 }
 }


=====================================
src/LambdaCalculator.hh
=====================================
@@ -14,11 +14,23 @@
 #define LAMBDA_CALCULATOR_HH
 
 #include <vector>
+#include <stdio.h>  // sprintf
+#include <stdlib.h>  // atof
 
 namespace cbrc{
 
 typedef const int *const_int_ptr;
 
+inline double roundToFewDigits(double x)
+{
+  // This rounding fixes some inaccuracies, e.g. for DNA with a simple
+  // match/mismatch matrix it's likely to make all the probabilities
+  // exactly 0.25, as they should be.
+  char buffer[32];
+  sprintf(buffer, "%g", x);
+  return atof(buffer);
+}
+
 class LambdaCalculator{
  public:
   LambdaCalculator() { setBad(); }
@@ -50,7 +62,7 @@ class LambdaCalculator{
   bool find_ub(double **matrix, int alpha_size, double *ub);
   bool binary_search(double** matrix, int alpha_size, double lb, double ub, std::vector<double>& letprob1, std::vector<double>& letprob2, double* lambda, int maxiter);
   double calculate_lambda(double** matrix, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2, int maxiter, int max_boundary_search_iter, double lb_ratio);
-  bool check_lambda(double** matrix, double lambda, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2);
+  bool check_lambda(double** matrix, double lambda, int alpha_size, std::vector<double>& letprob1, std::vector<double>& letprob2, double** tmpMat);
 };
 
 }  // end namespace


=====================================
src/Makefile
=====================================
@@ -1,6 +1,4 @@
-CXXFLAGS = -O3 -Wall -W -Wcast-qual -Wswitch-enum -Wundef	\
--Wcast-align -Wold-style-cast
-# -Wconversion
+CXXFLAGS = -O3 -Wall
 
 all: tantan
 
@@ -10,11 +8,12 @@ tantan: *.cc *.hh version.hh Makefile
 clean:
 	rm -f tantan
 
-VERSION = \"`hg id -n`\"
+VERSION1 = git describe --dirty
+VERSION2 = echo ' (HEAD -> main, tag: 26) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+
+VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
 
 version.hh: FORCE
-	if test -e ../.hg; \
-	then echo $(VERSION) | cmp -s $@ - || echo $(VERSION) > $@ ; \
-	fi
+	echo $(VERSION) | cmp -s $@ - || echo $(VERSION) > $@
 
 FORCE:


=====================================
src/cbrc_linalg.cc
=====================================
@@ -0,0 +1,38 @@
+// Copyright 2013 Martin C. Frith
+
+#include "cbrc_linalg.hh"
+#include <algorithm>
+#include <cmath>
+#include <stdexcept>
+
+namespace cbrc {
+
+void linalgSolve(double **m, double *v, unsigned s) {
+  for (unsigned k = 0; k < s; ++k) {
+    // partial pivoting:
+    unsigned iMax = k;
+    for (unsigned i = k; i < s; ++i)
+      if (std::fabs(m[i][k]) > std::fabs(m[iMax][k]))
+	iMax = i;
+    if (iMax > k) {
+      std::swap_ranges(m[k], m[k] + s, m[iMax]);
+      std::swap(v[k], v[iMax]);
+    }
+
+    if (m[k][k] == 0.0)
+      throw std::runtime_error("singular matrix");
+
+    for (unsigned i = 0; i < s; ++i) {
+      if (i == k) continue;
+      double q = m[i][k] / m[k][k];
+      for (unsigned j = k; j < s; ++j)
+	m[i][j] -= q * m[k][j];
+      v[i] -= q * v[k];
+    }
+  }
+
+  for (unsigned k = 0; k < s; ++k)
+    v[k] /= m[k][k];
+}
+
+}


=====================================
src/cbrc_linalg.hh
=====================================
@@ -0,0 +1,27 @@
+// Copyright 2013 Martin C. Frith
+
+// This routine solves simultaneous linear equations, such as:
+//
+//   m11 * x1  +  m12 * x2  +  m13 * x3   =   v1
+//   m21 * x1  +  m22 * x2  +  m23 * x3   =   v2
+//   m31 * x1  +  m32 * x2  +  m33 * x3   =   v3
+//
+// We know m and v, and we wish to determine x.
+//
+// Input: m should be a matrix of size s*s, and v should be a vector
+// of length s.
+//
+// Output: the result is written into v.  The calculation modifies m.
+//
+// If the matrix is singular, an error is thrown.
+
+#ifndef CBRC_LINALG_HH
+#define CBRC_LINALG_HH
+
+namespace cbrc {
+
+void linalgSolve(double **m, double *v, unsigned s);
+
+}
+
+#endif


=====================================
src/mcf_tantan_options.cc
=====================================
@@ -92,9 +92,6 @@ Options (default settings):\n\
       + stringify(outputType) + ")\n\
  -h, --help  show help message, then exit\n\
  --version   show version information, then exit\n\
-\n\
-Report bugs to: tantan at cbrc.jp\n\
-Home page: http://www.cbrc.jp/tantan/\n\
 ";
   // -k for transition cost?
 


=====================================
src/version.hh deleted
=====================================
@@ -1 +0,0 @@
-"23"



View it on GitLab: https://salsa.debian.org/med-team/tantan/-/commit/3505195e287dd21c761e437b5e1d2c967decfa34

-- 
View it on GitLab: https://salsa.debian.org/med-team/tantan/-/commit/3505195e287dd21c761e437b5e1d2c967decfa34
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210602/85fbc068/attachment-0001.htm>