[med-svn] [hinge] 02/04: Imported Upstream version 0.42

Afif Elghraoui afif at moszumanska.debian.org
Tue May 23 05:08:10 UTC 2017


This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository hinge.

commit e62bdef68f61721583efabc993006f06b257cd3d
Author: Afif Elghraoui <afif at debian.org>
Date:   Mon May 22 21:50:40 2017 -0400

    Imported Upstream version 0.42
---
 .gitignore                                         |    1 +
 .travis.yml                                        |   17 +-
 CMakeLists.txt                                     |   10 +-
 README.md                                          |   60 +-
 demo/NCTC9657_demo/run.sh                          |   37 +-
 demo/ecoli_P4_demo/run.sh                          |   40 +
 demo/ecoli_demo/run.sh                             |   29 +-
 demo/ecoli_demo/run_norevcomp.sh                   |   40 +
 demo/ecoli_nanopore/run.sh                         |   41 +
 demo/yeast_W303_demo/nominal.ini                   |   33 +
 demo/yeast_W303_demo/run.sh                        |   42 +
 docker/README.md                                   |    3 +-
 LICENSE => licence.txt                             |    2 +-
 misc/param_description1.png                        |  Bin 0 -> 10988 bytes
 parameter_description.md                           |   78 ++
 scripts/add_groundtruth.py                         |    0
 scripts/condense_graph.py                          |    0
 scripts/condense_graph_and_annotate.py             |    0
 scripts/condense_graph_annotate_clip_ends.py       |    0
 scripts/condense_graph_create_gfa_compute_n50.py   |    0
 scripts/condense_graph_with_gt.py                  |    0
 scripts/create_hgraph.py                           |    0
 scripts/draw_pileup_region_find_bridges.py         |  158 +++
 scripts/fasta_to_fastq.py                          |   20 +
 scripts/get_draft_path.py                          |  692 +++++-----
 scripts/get_draft_path_norevcomp.py                |  516 +-------
 scripts/get_single_strand.py                       |   16 +
 scripts/merge_hinges.py                            |    0
 scripts/pipeline_consensus.py                      |    0
 scripts/pipeline_consensus_norevcomp.py            |    0
 scripts/pipeline_nctc.py                           |    0
 scripts/pruning_and_clipping.py                    |  375 ++++--
 ...ipping2.py => pruning_and_clipping_nanopore.py} |  231 ++--
 scripts/random_condensation.py                     |    0
 scripts/repeat_annotate_reads.py                   |    0
 scripts/split_las.py                               |   21 +
 src/CMakeLists.txt                                 |   17 +
 src/consensus/CMakeLists.txt                       |    3 +-
 src/consensus/consensus.cpp                        |  104 +-
 src/consensus/draft.cpp                            | 1361 +++++++++++---------
 src/consensus/draft_chopper.cpp                    |   24 +-
 src/filter/CMakeLists.txt                          |    2 +
 src/filter/filter.cpp                              |  287 ++---
 src/hinge                                          |   58 +
 src/hinge.1.md                                     |   46 +
 src/include/LAInterface.h                          |   32 +-
 src/layout/CMakeLists.txt                          |    2 +
 src/layout/hinging.cpp                             |  717 ++++++-----
 src/lib/LAInterface.cpp                            |  393 ++++--
 src/maximal/CMakeLists.txt                         |    6 +
 src/maximal/maximal.cpp                            |  860 +++++++++++++
 utils/build.sh                                     |    3 +-
 utils/setup.sh                                     |    7 +-
 53 files changed, 3945 insertions(+), 2439 deletions(-)

diff --git a/.gitignore b/.gitignore
index 1f5e5ee..cdcac1d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,4 @@ build
 notebook
 demo
 .idea
+inst/
diff --git a/.travis.yml b/.travis.yml
index 2ec13a2..0a4f0d8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,10 +4,14 @@ compiler: gcc
 
 sudo: required
 
-install: sudo apt-get install build-essential &&
+install: sudo apt-get update && sudo apt-get install build-essential &&
     sudo apt-get install libboost-dev &&
-         sudo apt-get install libboost-all-dev && 
-         wget http://www.cmake.org/files/v3.2/cmake-3.2.2.tar.gz --no-check-certificate &&
+         sudo apt-get install libboost-all-dev &&
+         sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y &&
+         sudo apt-get update; sudo apt-get install gcc-4.8 g++-4.8 -y &&
+         gcc-4.8 --version &&
+         which gcc-4.8 &&
+         wget http://www.cmake.org/files/v3.2/cmake-3.2.2.tar.gz  &&
          tar xf cmake-3.2.2.tar.gz &&
          cd cmake-3.2.2 &&
          ./configure &&
@@ -15,12 +19,7 @@ install: sudo apt-get install build-essential &&
          sudo make install &&
          cmake --version &&
          pwd &&
-         cd .. &&
-         sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y &&
-         sudo apt-get update; sudo apt-get install gcc-4.8 g++-4.8 -y --force-yes&&
-         gcc-4.8 --version &&
-         which gcc-4.8
-
+         cd ..
 script: ./utils/build.sh
 
 notifications:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1bae40c..f91b4b6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,3 +1,11 @@
 cmake_minimum_required(VERSION 3.2)
 project(AwAssembler)
-add_subdirectory(src bin)
\ No newline at end of file
+
+set(libexec "lib/hinge")
+add_subdirectory(src bin)
+
+install(
+DIRECTORY scripts/
+DESTINATION ${libexec}
+USE_SOURCE_PERMISSIONS
+)
diff --git a/README.md b/README.md
index a3234fb..cbec182 100644
--- a/README.md
+++ b/README.md
@@ -3,9 +3,11 @@ Software accompanying  "HINGE: Long-Read Assembly Achieves Optimal Repeat Resolu
 
 - Preprint: http://biorxiv.org/content/early/2016/08/01/062117
 
+- Paper: http://genome.cshlp.org/content/early/2017/03/20/gr.216465.116.abstract
+
 - An ipython notebook to reproduce results in the paper can be found in this [repository](https://github.com/govinda-kamath/HINGE-analyses).
 
-CI Status: ![image](https://travis-ci.org/fxia22/HINGE.svg?branch=master)
+CI Status: ![image](https://travis-ci.org/HingeAssembler/HINGE.svg?branch=master)
 
 
 
@@ -47,35 +49,9 @@ It can be analyzed and visualized, etc.
 
 ## Parameters
 
-In the pipeline described above, most programs not only takes the input file and output file as arguments, but also require a configuration file in ini format. This consists parameters for each step in the pipeline, and their usage and effects are explained below:
-
-
-###[filter]
-- length_threshold = 6500; // Length threshold for reads to be considered in the backbone
-- quality_threshold = 0.23; // Quality threshold for edges to be considered in the backbone 
-- n_iter = 2; // iterations of filtering, the filtering needs several iterations, because when filter reads, you got rid of some edges; when filter edges, you got rid of some reads (if the last edge is filtered.) Typically 2-3 iterations will be enough.
-- aln_threshold = 2500; // Length of alignment for edges to be considered in the backbone
-- min_cov = 5; // Minimal coverage for a segment to be considered not chimer/adaptor
-- cut_off = 200; // A parameter for identifying long chimer in the middle of a read
-- theta = 300; // A parameter for tolerance of the overhang length when looking for right extension.
-
-
-###[running]
-- n_proc = 12; // number of CPUs for layout step
-
-###[draft]
-- min_cov = 10; //obsolete
-- trim = 200; //obsolete
-- edge_safe = 100; //obsolete
-- tspace = 900; //space between new "trace points"
+In the pipeline described above, several programs load their parameters from a configuration file in the ini format.  All tunable parameters are described in [this document](parameter_description.md).
 
 
-###[consensus]
-- min_length = 2000; // Minimal length of reads used for final consensus
-- trim_end = 200; // Trim ends for alignments for final consensus
-- best_n = 1; // If one read has multiple alignments with the bacbone assembly, choose the longest n segments for consensus.
-- quality_threshold = 0.23; // alignment quality threshold
-
 # Installation
 
 ## Dependencies
@@ -88,6 +64,7 @@ In the pipeline described above, most programs not only takes the input file and
 The following python packages are necessary:
 - numpy
 - ujson
+- configparser
 - colormap
 - easydev.tools
 
@@ -102,6 +79,8 @@ git submodule update
 ./utils/build.sh
 ```
 
+Alternatively, you can use docker to build and use HINGE, see [this guide](https://github.com/HingeAssembler/HINGE/tree/master/docker) for more information. 
+
 # Running
 
 In order to call the programs from anywhere, I suggest one export the directory of binary file to system environment, you can do that by using the script `setup.sh`. The parameters are initialised in `utils/nominal.ini`. The path to nominal.ini has to be specified to run the scripts.
@@ -125,35 +104,40 @@ DASqv -c100 ecoli ecoli.las
 # Run filter
 
 mkdir log
-Reads_filter --db ecoli --las ecoli.las -x ecoli --config <path-to-nominal.ini>
+hinge filter --db ecoli --las ecoli.las -x ecoli --config <path-to-nominal.ini>
+
+# Get maximal reads
+
+hinge maximal --db ecoli --las ecoli.las -x ecoli --config <path-to-nominal.ini>
 
 # Run layout
 
-hinging --db ecoli --las ecoli.las -x ecoli --config <path-to-nominal.ini> -o ecoli
+hinge layout --db ecoli --las ecoli.las -x ecoli --config <path-to-nominal.ini> -o ecoli
 
 # Run postprocessing
 
-python pruning_and_clipping.py ecoli.edges.hinges ecoli.hinge.list <identifier-of-run>
+hinge clip ecoli.edges.hinges ecoli.hinge.list <identifier-of-run>
 
 
 # get draft assembly 
 
-get_draft_path.py <working directory> ecoli ecoli<identifier-of-run>.G2.graphml
-draft_assembly --db ecoli --las ecoli.las --prefix ecoli --config <path-to-nominal.ini> --out ecoli.draft
+hinge draft-path <working directory> ecoli ecoli<identifier-of-run>.G2.graphml
+hinge draft --db ecoli --las ecoli.las --prefix ecoli --config <path-to-nominal.ini> --out ecoli.draft
 
 
 # get consensus assembly
 
-correct_head.py ecoli.draft.fasta ecoli.draft.pb.fasta draft_map.txt
+hinge correct-head ecoli.draft.fasta ecoli.draft.pb.fasta draft_map.txt
 fasta2DB draft ecoli.draft.pb.fasta 
-HPC.daligner draft ecoli | zsh -v  
-LAmerge draft.ecoli.las draft.ecoli.*.las
-consensus draft ecoli draft.ecoli.las ecoli.consensus.fasta utils/nominal.ini
-get_consensus_gfa.py <working directory> ecoli ecoli.consensus.fasta
+HPC.daligner ecoli draft | zsh -v  
+hinge consensus draft ecoli draft.ecoli.las ecoli.consensus.fasta <path-to-nominal.ini>
+hinge gfa <working directory> ecoli ecoli.consensus.fasta
 
 #results should be in ecoli_consensus.gfa
 ```
 
+
+
 ## Analysis of Results
 
 ### showing ground truth on graph
diff --git a/demo/NCTC9657_demo/run.sh b/demo/NCTC9657_demo/run.sh
index 2c36c03..9bdbf72 100644
--- a/demo/NCTC9657_demo/run.sh
+++ b/demo/NCTC9657_demo/run.sh
@@ -1,4 +1,4 @@
-correct_head.py NCTC9657_reads.fasta reads.pb.fasta map.txt
+hinge correct-head NCTC9657_reads.fasta reads.pb.fasta map.txt
 fasta2DB NCTC9657 reads.pb.fasta
 
 DBsplit NCTC9657
@@ -11,20 +11,33 @@ DASqv -c100 NCTC9657 NCTC9657.las
 
 mkdir log
 
-Reads_filter --db NCTC9657 --las NCTC9657.las -x NCTC9657 --config ../../utils/nominal.ini
-hinging --db NCTC9657 --las NCTC9657.las -x NCTC9657 --config ../../utils/nominal.ini -o NCTC9657
 
-pruning_and_clipping.py NCTC9657.edges.hinges NCTC9657.hinge.list demo
 
-get_draft_path.py $PWD NCTC9657 NCTC9657demo.G2.graphml
-draft_assembly --db NCTC9657 --las NCTC9657.las --prefix NCTC9657 --config ../../utils/nominal.ini --out NCTC9657.draft
+hinge filter --db NCTC9657 --las NCTC9657 --mlas -x NCTC9657 --config ../../utils/nominal.ini
+hinge layout --db NCTC9657 --las NCTC9657.las -x NCTC9657 --config ../../utils/nominal.ini -o NCTC9657
 
-correct_head.py NCTC9657.draft.fasta NCTC9657.draft.pb.fasta draft_map.txt 
+hinge clip NCTC9657.edges.hinges NCTC9657.hinge.list demo
+
+hinge draft-path $PWD NCTC9657 NCTC9657demo.G2.graphml
+hinge draft --db NCTC9657 --las NCTC9657.las --prefix NCTC9657 --config ../../utils/nominal.ini --out NCTC9657.draft
+
+
+
+
+
+
+hinge correct-head NCTC9657.draft.fasta NCTC9657.draft.pb.fasta draft_map.txt 
 fasta2DB draft NCTC9657.draft.pb.fasta
-HPC.daligner draft NCTC9657 | bash -v 
+HPC.daligner NCTC9657 draft | bash -v 
+
+# rm draft.*.NCTC9657.*.las
+# LAmerge draft.NCTC9657.las draft.NCTC9657.*.las
+
+hinge consensus draft NCTC9657 draft.NCTC9657.las NCTC9657.consensus.fasta ../../utils/nominal.ini
+
+hinge gfa $PWD NCTC9657 NCTC9657.consensus.fasta
+
+
+
 
-rm draft.*.NCTC9657.*.las
-LAmerge draft.NCTC9657.las draft.NCTC9657.*.las
-consensus draft NCTC9657 draft.NCTC9657.las NCTC9657.consensus.fasta ../../utils/nominal.ini
 
-get_consensus_gfa.py $PWD NCTC9657 NCTC9657demo.G2.graphml NCTC9657.consensus.fasta
diff --git a/demo/ecoli_P4_demo/run.sh b/demo/ecoli_P4_demo/run.sh
new file mode 100644
index 0000000..5b9e9b3
--- /dev/null
+++ b/demo/ecoli_P4_demo/run.sh
@@ -0,0 +1,40 @@
+wget -nv http://files.pacb.com/datasets/secondary-analysis/ecoli-k12-P4C2-20KSS/ecoliK12.tar.gz
+tar -zxf ecoliK12.tar.gz
+
+dextract -o ecoliK12/Analysis_Results/*.bax.h5
+fasta2DB ecoli m130404_014004_sidney_c100506902550000001823076808221337_s1_p0.fasta
+
+
+DBsplit ecoli
+
+HPC.daligner ecoli | bash -v
+
+rm ecoli.*.ecoli.*.las
+LAmerge ecoli.las ecoli.[0-9].las
+DASqv -c100 ecoli ecoli.las
+
+mkdir log
+
+
+
+hinge filter --db ecoli --las "ecoli.*.las" -x ecoli --config ../../utils/nominal.ini
+hinge layout --db ecoli --las ecoli.las -x ecoli --config ../../utils/nominal.ini -o ecoli
+
+hinge clip ecoli.edges.hinges ecoli.hinge.list demo
+
+hinge draft-path $PWD ecoli ecolidemo.G2.graphml
+hinge draft --db ecoli --las ecoli.las --prefix ecoli --config ../../utils/nominal.ini --out ecoli.draft
+
+
+
+hinge correct-head ecoli.draft.fasta ecoli.draft.pb.fasta draft_map.txt 
+fasta2DB draft ecoli.draft.pb.fasta
+
+HPC.daligner ecoli draft | bash -v 
+
+#rm draft.*.ecoli.*.las
+#LAmerge draft.ecoli.las draft.ecoli.*.las
+
+hinge consensus draft ecoli draft.ecoli.las ecoli.consensus.fasta ../../utils/nominal.ini
+
+hinge gfa $PWD ecoli  ecoli.consensus.fasta
diff --git a/demo/ecoli_demo/run.sh b/demo/ecoli_demo/run.sh
index 52105a5..9640f26 100644
--- a/demo/ecoli_demo/run.sh
+++ b/demo/ecoli_demo/run.sh
@@ -1,8 +1,8 @@
-#wget http://gembox.cbcb.umd.edu/mhap/raw/ecoli_p4_filtered.fastq.gz
-#gunzip ecoli_p4_filtered.fastq.gz
+wget http://gembox.cbcb.umd.edu/mhap/raw/ecoli_p4_filtered.fastq.gz
+gunzip ecoli_p4_filtered.fastq.gz
 
 seqtk seq -a ecoli_p4_filtered.fastq > reads.fasta
-correct_head.py reads.fasta reads.pb.fasta map.txt
+hinge correct-head reads.fasta reads.pb.fasta map.txt
 fasta2DB ecoli reads.pb.fasta
 
 
@@ -18,24 +18,27 @@ mkdir log
 
 
 
-Reads_filter --db ecoli --las "ecoli.*.las" -x ecoli --config ../../utils/nominal.ini
-hinging --db ecoli --las ecoli.las -x ecoli --config ../../utils/nominal.ini -o ecoli
+hinge filter --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini
 
-pruning_and_clipping.py ecoli.edges.hinges ecoli.hinge.list demo
+hinge maximal --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini
 
-get_draft_path.py $PWD ecoli ecolidemo.G2.graphml
-draft_assembly --db ecoli --las ecoli.las --prefix ecoli --config ../../utils/nominal.ini --out ecoli.draft
+hinge layout --db ecoli --las ecoli.las -x ecoli --config ../../utils/nominal.ini -o ecoli
 
+hinge clip ecoli.edges.hinges ecoli.hinge.list demo
 
+hinge draft-path $PWD ecoli ecolidemo.G2.graphml
+hinge draft --db ecoli --las ecoli.las --prefix ecoli --config ../../utils/nominal.ini --out ecoli.draft
 
-correct_head.py ecoli.draft.fasta ecoli.draft.pb.fasta draft_map.txt 
+
+
+hinge correct-head ecoli.draft.fasta ecoli.draft.pb.fasta draft_map.txt 
 fasta2DB draft ecoli.draft.pb.fasta
 
 HPC.daligner ecoli draft | bash -v 
 
-# rm draft.*.ecoli.*.las
-# LAmerge draft.ecoli.las draft.ecoli.*.las
+#rm draft.*.ecoli.*.las
+#LAmerge draft.ecoli.las draft.ecoli.*.las
 
-consensus draft ecoli draft.ecoli.las ecoli.consensus.fasta ../../utils/nominal.ini
+hinge consensus draft ecoli draft.ecoli.las ecoli.consensus.fasta ../../utils/nominal.ini
 
-get_consensus_gfa.py $PWD ecoli ecolidemo.G2.graphml ecoli.consensus.fasta
+hinge gfa $PWD ecoli  ecoli.consensus.fasta
diff --git a/demo/ecoli_demo/run_norevcomp.sh b/demo/ecoli_demo/run_norevcomp.sh
new file mode 100644
index 0000000..3e792f2
--- /dev/null
+++ b/demo/ecoli_demo/run_norevcomp.sh
@@ -0,0 +1,40 @@
+wget http://gembox.cbcb.umd.edu/mhap/raw/ecoli_p4_filtered.fastq.gz
+gunzip ecoli_p4_filtered.fastq.gz
+
+seqtk seq -a ecoli_p4_filtered.fastq > reads.fasta
+correct_head.py reads.fasta reads.pb.fasta map.txt
+fasta2DB ecoli reads.pb.fasta
+
+
+DBsplit ecoli
+
+HPC.daligner ecoli | bash -v
+
+rm ecoli.*.ecoli.*.las
+LAmerge ecoli.las ecoli.[0-9].las
+DASqv -c100 ecoli ecoli.las
+
+mkdir -p log
+
+
+
+Reads_filter --db ecoli --las ecoli --mlas -x ecoli --config ~/AwesomeAssembler/utils/nominal.ini
+hinging --db ecoli --las ecoli.las -x ecoli --config ~/AwesomeAssembler/utils/nominal.ini -o ecoli
+
+pruning_and_clipping.py ecoli.edges.hinges ecoli.hinge.list demo
+
+get_draft_path.py $PWD ecoli ecolidemo.G2.graphml
+draft_assembly --db ecoli --las ecoli.las --prefix ecoli --confi ~/AwesomeAssembler/utils/nominal.ini --out ecoli.draft
+
+get_draft_path_norevcomp.py ecoli.draft.fasta ecoli.draft.norevcomp.fasta
+
+correct_head.py ecoli.draft.norevcomp.fasta ecoli.draft.pb.fasta draft_map.txt 
+fasta2DB draft ecoli.draft.pb.fasta
+
+HPC.daligner ecoli draft | bash -v 
+
+rm draft.*.ecoli.*.las
+LAmerge draft.ecoli.las draft.ecoli.*.las
+
+consensus draft ecoli draft.ecoli.las ecoli.consensus.fasta ~/AwesomeAssembler/utils/nominal.ini
+
diff --git a/demo/ecoli_nanopore/run.sh b/demo/ecoli_nanopore/run.sh
new file mode 100644
index 0000000..9953ae4
--- /dev/null
+++ b/demo/ecoli_nanopore/run.sh
@@ -0,0 +1,41 @@
+wget http://s3.climb.ac.uk/nanopore/R9_Ecoli_K12_MG1655_lambda_MinKNOW_0.51.1.62.all.fasta
+#gunzip ecoli_p4_filtered.fastq.gz
+
+#seqtk seq -a ecoli_p4_filtered.fastq > reads.fasta
+hinge correct-head R9_Ecoli_K12_MG1655_lambda_MinKNOW_0.51.1.62.all.fasta reads.pb.fasta map.txt
+fasta2DB ecoli reads.pb.fasta
+
+
+DBsplit ecoli
+
+HPC.daligner ecoli | bash -v
+
+rm ecoli.*.ecoli.*.las
+LAmerge ecoli.las ecoli.[0-9].las
+DASqv -c100 ecoli ecoli.las
+
+mkdir log
+
+
+
+hinge filter --db ecoli --las ecoli --mlas -x ecoli --config ../../utils/nominal.ini
+hinge layout --db ecoli --las ecoli.las -x ecoli --config ../../utils/nominal.ini -o ecoli
+
+hinge clip-nanopore ecoli.edges.hinges ecoli.hinge.list demo
+
+hinge draft-path $PWD ecoli ecolidemo.G2.graphml
+hinge draft --db ecoli --las ecoli.las --prefix ecoli --config ../../utils/nominal.ini --out ecoli.draft
+
+
+
+hinge correct-head ecoli.draft.fasta ecoli.draft.pb.fasta draft_map.txt 
+fasta2DB draft ecoli.draft.pb.fasta
+
+HPC.daligner ecoli draft | bash -v 
+
+#rm draft.*.ecoli.*.las
+#LAmerge draft.ecoli.las draft.ecoli.*.las
+
+hinge consensus draft ecoli draft.ecoli.las ecoli.consensus.fasta ../../utils/nominal.ini
+
+hinge gfa $PWD ecoli  ecoli.consensus.fasta
diff --git a/demo/yeast_W303_demo/nominal.ini b/demo/yeast_W303_demo/nominal.ini
new file mode 100644
index 0000000..7c40384
--- /dev/null
+++ b/demo/yeast_W303_demo/nominal.ini
@@ -0,0 +1,33 @@
+
+[filter]
+length_threshold = 1000;
+quality_threshold = 0.23;
+n_iter = 3; // filter iteration
+aln_threshold = 1000;
+min_cov = 5;
+cut_off = 300;
+theta = 300;
+use_qv = true;
+
+[running]
+n_proc = 12;
+
+[draft]
+min_cov = 10;
+trim = 200;
+edge_safe = 100;
+tspace = 900;
+step = 50;
+
+
+[consensus]
+min_length = 4000;
+trim_end = 200;
+best_n = 1;
+quality_threshold = 0.23;
+
+[layout]
+hinge_slack = 1000
+min_connected_component_size = 8
+del_telomere = 1
+aggressive_pruning = 1
diff --git a/demo/yeast_W303_demo/run.sh b/demo/yeast_W303_demo/run.sh
new file mode 100644
index 0000000..d995b06
--- /dev/null
+++ b/demo/yeast_W303_demo/run.sh
@@ -0,0 +1,42 @@
+wget -nc -i https://gist.githubusercontent.com/pb-jchin/6359919/raw/9c172c7ff7cbc0193ce89e715215ce912f3f30e6/gistfile1.txt
+dextract -o *.bax.h5
+
+
+fasta2DB yeast m130605_000141_42207_c100515142550000001823076608221372_s1_p0.fasta 
+
+
+
+DBsplit yeast
+
+HPC.daligner yeast | bash -v
+
+rm yeast.*.yeast.*.las
+LAmerge yeast.las yeast.[0-9].las
+DASqv -c100 yeast yeast.las
+
+mkdir log
+
+
+hinge filter --db yeast --las yeast --mlas -x yeast --config nominal.ini
+hinge maximal --db yeast --las yeast --mlas -x yeast --config nominal.ini
+
+hinge layout --db yeast --las yeast -x yeast --config nominal.ini -o yeast
+
+hinge clip yeast.edges.hinges yeast.hinge.list demo
+
+hinge draft-path $PWD yeast yeastdemo.G3.graphml
+hinge draft --db yeast --las yeast.las --prefix yeast --config nominal.ini --out yeast.draft
+
+
+
+hinge correct-head yeast.draft.fasta yeast.draft.pb.fasta draft_map.txt 
+fasta2DB draft yeast.draft.pb.fasta
+
+HPC.daligner yeast draft | bash -v 
+
+rm draft.*.yeast.*.las
+LAmerge draft.yeast.las draft.yeast.*.las
+
+hinge consensus draft yeast draft.yeast.las yeast.consensus.fasta nominal.ini
+
+hinge gfa $PWD yeast  yeast.consensus.fasta
diff --git a/docker/README.md b/docker/README.md
index 40c2d3f..8b6e6a6 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,3 +1,4 @@
-#Docker Image Build Guide
+# Docker Image Build Guide
+
 This folder contains dockerfiles to build hinge for certain linux distributions. To use copy the dockerfile to root directory of the repository and run `docker build .`
 
diff --git a/LICENSE b/licence.txt
similarity index 98%
rename from LICENSE
rename to licence.txt
index fdd3efc..0a4ee5c 100644
--- a/LICENSE
+++ b/licence.txt
@@ -30,4 +30,4 @@ David Tse. All rights reserved.
   OR TORT (INCLUDING  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                      
                                                                                      
-  For any issues regarding this software and its use, contact Fei Xia at xf1280 at gmail.com.
\ No newline at end of file
+  For any issues regarding this software and its use, contact Fei Xia at xf1280 at gmail.com.
diff --git a/misc/param_description1.png b/misc/param_description1.png
new file mode 100644
index 0000000..c17ce90
Binary files /dev/null and b/misc/param_description1.png differ
diff --git a/parameter_description.md b/parameter_description.md
new file mode 100644
index 0000000..92bda7d
--- /dev/null
+++ b/parameter_description.md
@@ -0,0 +1,78 @@
+## Parameters used by HINGE
+
+All the parameters below can be set using the .ini file read by the HINGE programs.
+
+
+
+###[filter]
+- length_threshold = 1000; // Minimum read length
+- aln_threshold = 2500; // Minimum alignment length between two reads to be considered when building graph
+- min_cov = 5; // Minimum coverage depth for a segment on a read to not be considered erroneous/chimeric
+- cut_off = 300; // When looking for chimeric segments, we look for coverage gaps on a read, after reducing all matches by cut_off in the beginning and in the end
+- theta = 300; // When classifying a match between two reads as a right/left overlap, internal match, etc., overhangs of length up to theta are ignored
+- use_qv = true; // Use qv scores provided by DAligner when creating the read masks (i.e., the part of the read that will actually be used for assembly)
+- coverage = true; // Use coverage values when creating the read masks. If both use_qv and coverage are set to true, an intersection of the two masks is taken.
+- coverage_frac_repeat_annotation = 3; 
+- min_repeat_annotation_threshold = 10;
+- max_repeat_annotation_threshold = 20; 
+
+// A repeat annotation is placed on the read at position i+reso if 
+
+``` |coverage[i]-coverage[i+reso]| > min( max( coverage[i+reso]/coverage_frac_repeat_annotation, min_repeat_annotation_threshold), max_repeat_annotation_threshold) ```
+
+- repeat_annotation_gap_threshold = 300; // How far two hinges of the same type can be on a read
+- no_hinge_region = 500; // Hinges cannot be placed within no_hinge_region of the start and end of the read
+- hinge_min_support = 7; // Minimum number of reads that have to start in a `reso` (default 40) length interval to be considered in hinge calling
+- hinge_unbridged = 6; // Number of reads that one has to see before a pileup to declare a potential hinge unbridged
+- hinge_bin = 100; // Physical length of the bins considered
+- hinge_tolerance_length = 100; // Matches starting within hinge_tolerance_length of a hinge are considered to be starting at the hinge
+
+<!-- - quality_threshold = 0.23; // Quality threshold for edges to be considered in the backbone -->
+<!--- n_iter = 2; // iterations of filtering, the filtering needs several iterations, because when filter reads, you got rid of some edges; when filter edges, you got rid of some reads (if the last edge is filtered.) Typically 2-3 iterations will be enough.-->
+<!-- - theta2 = 0; // When classifying a match between two reads as a right/left overlap, internal match, etc., an overhang must have length at least theta2 for the match to be seen as internal -->
+
+
+###[running]
+- n_proc = 12; // number of CPUs for layout step
+
+
+
+
+###[layout]
+
+- hinge_tolerance = 150; // This is how far an overlap must start from a hinge to be considered an internal overlap.
+- hinge_slack = 1000; // This is the amount by which  a forward overlap must be longer than a forward internal overlap to be preferred while building a graph.
+
+- matching_hinge_slack = 200; // We identify two in-hinges (out-hinges) on two different reads as corresponding to the same repeat event, if the reads match in the repeat part, and the two hinges are within matching_hinge_slack of each other
+- min_connected_component_size = 8; // In order to actually add a hinge to a read, we require that at least min_connected_component_size reads have a repeat annotation and they are all identified as the beginning (or end) of the same repeat
+- kill_hinge_overlap = 300; 
+- kill_hinge_internal = 40; 
+
+// When filtering hinges (so that only one in-hinge and one out-hinge are left for each reapeat), we kill an in-hinge (out-hinge) if there is a forward (backward) extension read that starts at least kill_hinge_overlap before (after) the hinge, 
+or if there is a forward_internal (backward_internal) extension read that starts at most kill_hinge_internal after (before) the hinge, as illustrated below. 
+
+<img src="misc/param_description1.png" width=600px/>
+
+- num_events_telomere = 7; 
+- del_telomeres = 0; // If set to 1, any read with more than num_events_telomere repeat annotations will be classified as a telomere read and will be deleted.
+- aggressive_pruning = 0; //If set to 1, the pruning will be more aggressive. We recommend it be set to 1 for large
+genome.
+- use_two_matches = 1; // Allow the HINGE algorithm to consider the top two matches between a pair of reads (as opposed to just the longest match)
+
+
+###[draft]
+<!--- min_cov = 10; //obsolete-->
+<!--- trim = 200; //obsolete-->
+<!--- edge_safe = 100; //obsolete-->
+- tspace = 900; //space between new "trace points"
+- step = 50;
+
+
+
+###[consensus]
+- min_length = 4000; // Minimal length of reads used for final consensus
+- trim_end = 200; // Trim ends for alignments for final consensus
+- best_n = 1; // If one read has multiple alignments with the bacbone assembly, choose the longest n segments for consensus.
+- quality_threshold = 0.23; // alignment quality threshold
+
+
diff --git a/scripts/add_groundtruth.py b/scripts/add_groundtruth.py
old mode 100644
new mode 100755
diff --git a/scripts/condense_graph.py b/scripts/condense_graph.py
old mode 100644
new mode 100755
diff --git a/scripts/condense_graph_and_annotate.py b/scripts/condense_graph_and_annotate.py
old mode 100644
new mode 100755
diff --git a/scripts/condense_graph_annotate_clip_ends.py b/scripts/condense_graph_annotate_clip_ends.py
old mode 100644
new mode 100755
diff --git a/scripts/condense_graph_create_gfa_compute_n50.py b/scripts/condense_graph_create_gfa_compute_n50.py
old mode 100644
new mode 100755
diff --git a/scripts/condense_graph_with_gt.py b/scripts/condense_graph_with_gt.py
old mode 100644
new mode 100755
diff --git a/scripts/create_hgraph.py b/scripts/create_hgraph.py
old mode 100644
new mode 100755
diff --git a/scripts/draw_pileup_region_find_bridges.py b/scripts/draw_pileup_region_find_bridges.py
new file mode 100755
index 0000000..64b6a90
--- /dev/null
+++ b/scripts/draw_pileup_region_find_bridges.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from ipywidgets.widgets import interact
+import interface_utils as util
+import sys
+
+import os
+os.environ['PATH'] += ':~/AwesomeAssembler/DALIGNER'
+#print os.popen("export").read()
+
+left = int(sys.argv[1])
+right = int(sys.argv[2])
+
+ref = sys.argv[3]
+read = sys.argv[4]
+las = sys.argv[5]
+contig = sys.argv[6]
+length_th = int(sys.argv[7])
+bridge_begin = int(sys.argv[8])
+bridge_end = int(sys.argv[9])
+
+#path = '/data/pacbio_assembly/AwesomeAssembler/data/ecoli/'
+aln = []
+
+#bb = []
+#with open('ecoli.linear.edges') as f:
+#    for line in f:
+#        e = line.split(" ")[0]
+#        if e[-1] == '\'':
+#            e = e[:-1]
+#
+#        bb.append(int(e))
+#
+#print bb
+#
+#bb = set(bb)
+
+
+for i,item in enumerate(util.get_alignments_mapping3(ref, read, las, contig)):
+    #if i%2000 == 0:
+    #    print i, item
+
+    if item[3] >= left and item[4] <= right and item[4] - item[3] > length_th:
+        aln.append(item)
+
+
+
+covy = np.zeros((right - left, ))
+for item in aln:
+    covy[item[3] - left : item[4] - left] += 1
+
+covx = np.arange(left, right)
+
+#for i in range(0, len(covx), 10):
+#    print covx[i], covy[i]
+
+print 'number:',len(aln)
+aln.sort(key = lambda x:x[2])
+
+alns = []
+current_b = aln[0][2]
+aln_group = []
+
+for item in aln:
+    if current_b != item[2]:
+        alns.append(aln_group)
+        aln_group = []
+        aln_group.append(item)
+        current_b = item[2]
+    else:
+        aln_group.append(item)
+
+num = len(alns)
+
+print len(aln), len(alns)
+
+alns.sort(key = lambda x:min([item[3] for item in x]))
+
+
+
+fig = plt.figure(figsize = (15,10))
+plt.axes()
+ax1 = plt.subplot2grid((6,6), (0, 0), colspan=6, rowspan=4)
+ax2 = plt.subplot2grid((6,6), (4, 0), colspan=6, rowspan=1, sharex = ax1)
+
+
+#plt.gca().axes.get_yaxis().set_visible(False)
+#l = aln[0][5]
+tip = (right-left)/5000
+ed = (right-left)/2000
+grid_size = 1.0
+ax1.set_xlim(left-2000,right+2000)
+ax1.set_ylim(-5,num*grid_size)
+
+points = [[left,0], [right,0], [right+tip,grid_size/4], [right,grid_size/2], [left,grid_size/2]]
+#rectangle = plt.Rectangle((0, 0), l, 5, fc='r',ec = 'none')
+polygon = plt.Polygon(points,fc = 'r', ec = 'none', alpha = 0.6)
+ax1.add_patch(polygon)
+
+dotted_line = plt.Line2D((left, left), (0, num*grid_size ),ls='-.')
+ax1.add_line(dotted_line)
+
+dotted_line2 = plt.Line2D((right, right), (0, num*grid_size ),ls='-.')
+ax1.add_line(dotted_line2)
+
+for i,aln_group in enumerate(alns):
+    for item in aln_group:
+        abpos = item[3]
+        aepos = item[4]
+        if abpos < bridge_begin+200 and aepos > bridge_end-200:
+            print item
+        bbpos = item[6]
+        bepos = item[7]
+        blen = item[8]
+        strand = item[0]
+        points_start = []
+        points_end = []
+
+        if strand == 'n':
+            points = [[abpos, (i+1)*grid_size], [aepos, (i+1)*grid_size], [aepos + tip, (i+1)*grid_size + grid_size/4], [aepos, (i+1)*grid_size+grid_size/2], [abpos, (i+1)*grid_size+grid_size/2]]
+            if (bepos < blen):
+                points_end = [[aepos, (i+1)*grid_size], [aepos + tip, (i+1)*grid_size + grid_size/4], [aepos, (i+1)*grid_size+grid_size/2], [aepos+ed, (i+1)*grid_size+grid_size/2], [aepos + ed+ tip, (i+1)*grid_size + grid_size/4],  [aepos+ed, (i+1)*grid_size]]
+            if (bbpos > 0):
+                points_start = [[abpos, (i+1)*grid_size], [abpos, (i+1)*grid_size+grid_size/2], [abpos-ed, (i+1)*grid_size+grid_size/2], [abpos-ed, (i+1)*grid_size]]
+        else:
+            points = [[abpos, (i+1)*grid_size], [aepos, (i+1)*grid_size], [aepos, (i+1)*grid_size+grid_size/2], [abpos, (i+1)*grid_size+grid_size/2], [abpos - tip, (i+1)*grid_size + grid_size/4]]
+            if (bepos < blen):
+                points_end = [[aepos, (i+1)*grid_size],  [aepos, (i+1)*grid_size+grid_size/2], [aepos+ed, (i+1)*grid_size+grid_size/2], [aepos+ed, (i+1)*grid_size]]
+            if (bbpos > 0):
+                points_start = [[abpos, (i+1)*grid_size],[abpos-tip, (i+1)*grid_size+grid_size/4], [abpos, (i+1)*grid_size+grid_size/2], [abpos-ed, (i+1)*grid_size+grid_size/2],[abpos-ed-tip, (i+1)*grid_size+grid_size/4], [abpos-ed, (i+1)*grid_size]]
+
+        #if item[2] in bb:
+        #    polygon = plt.Polygon(points,fc = 'r', ec = 'none', alpha = 0.8)
+        #else:
+        polygon = plt.Polygon(points,fc = 'b', ec = 'none', alpha = 0.6)
+
+        polygon.set_url("http://shannon.stanford.edu:5000/aln" + str(item[2]+1) + ".pdf")
+        ax1.add_patch(polygon)
+
+        if points_end != []:
+            polygon2 = plt.Polygon(points_end,fc = 'g', ec = 'none', alpha = 0.6)
+            ax1.add_patch(polygon2)
+
+        if points_start != []:
+            polygon2 = plt.Polygon(points_start,fc = 'g', ec = 'none', alpha = 0.6)
+            ax1.add_patch(polygon2)
+
+
+ax2.plot(covx, covy)
+plt.xlabel('position')
+ax1.set_ylabel('pile-o-gram')
+ax2.set_ylabel('coverage')
+
+
+plt.savefig('mapping/map.' + str(contig) + '_' + str(left) +'_'+ str(right)+ '.svg')
diff --git a/scripts/fasta_to_fastq.py b/scripts/fasta_to_fastq.py
new file mode 100755
index 0000000..81c5474
--- /dev/null
+++ b/scripts/fasta_to_fastq.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+"""
+Convert FASTA to FASTQ file with a static
+
+Usage:
+$ ./fasta_to_fastq NAME.fasta NAME.fastq
+"""
+
+import sys, os
+from Bio import SeqIO
+
+# Get inputs
+fa_path = sys.argv[1]
+fq_path = sys.argv[2]
+
+# make fastq
+with open(fa_path, "r") as fasta, open(fq_path, "w") as fastq:
+    for record in SeqIO.parse(fasta, "fasta"):
+        record.letter_annotations["phred_quality"] = [40] * len(record)
+        SeqIO.write(record, fastq, "fastq")
diff --git a/scripts/get_draft_path.py b/scripts/get_draft_path.py
index 14a8f73..9dba102 100755
--- a/scripts/get_draft_path.py
+++ b/scripts/get_draft_path.py
@@ -9,12 +9,62 @@ import networkx as nx
 import itertools
 from pbcore.io import FastaIO
 
+
+
+
+def rev_node(node):
+    node_id = node.split('_')[0]
+    return node_id + '_' + str(1-int(node.split('_')[1]))
+
+
+
+def merge_nodes(g,in_node,out_node):
+
+    weight = str(g.edge[in_node][out_node]['length'])
+
+    if 'path' in g.node[in_node]:
+        path1 = g.node[in_node]['path']
+        weightspath1 = g.node[in_node]['weightspath']
+    else:
+        path1 = in_node
+        weightspath1 = ''
+
+    if 'path' in g.node[out_node]:
+        path2 = g.node[out_node]['path']
+        weightspath2 = ';' + g.node[out_node]['weightspath']
+    else:
+        path2 = out_node
+        weightspath2 = ''
+
+    g.node[in_node]['path'] = path1 + ';' + path2
+
+    if weightspath1 == '':
+        g.node[in_node]['weightspath'] = weight + weightspath2
+    else:
+        g.node[in_node]['weightspath'] = weightspath1 + ';' + weight + weightspath2
+
+    for nodeB in g.successors(out_node):
+        g.add_edge(in_node,nodeB,length=g.edge[out_node][nodeB]['length'])
+
+
+    g.node[in_node]['cut_end'] = g.node[out_node]['cut_end']
+    g.remove_node(out_node)
+
+
+
 filedir = sys.argv[1]
 filename = sys.argv[2]
 graphml_path = sys.argv[3]
 
 in_graph = nx.read_graphml(graphml_path)
 
+# debug output
+#for node in in_graph.nodes():
+#    print node
+
+#for edge in in_graph.edges():
+#    print len(in_graph.edge[edge[0]][edge[1]])
+
 reads = sorted(list(set([int(x.split("_")[0].lstrip("B")) for x in in_graph.nodes()])))
 
 dbshow_reads = ' '.join([str(x+1) for x in reads])
@@ -29,491 +79,369 @@ for read_id,read in itertools.izip(reads,reads_queried):
 #     print read
     read_dict[read_id] = read
 
+# to simulate reads
+
+# read_dict = {}
+# for vertex in in_graph.nodes():
+#     read_dict[int(vertex.split('_')[0])] = ['A','A'*50000]
+
+
 complement = {'A':'T','C': 'G','T':'A', 'G':'C','a':'t','t':'a','c':'g','g':'c'}
 
+# out_graphml_name = 'test.graphml'
+out_graphml_name = filedir + '/' + filename +'_draft.graphml'
 
-def rev_node(node):
-    node_id = node.split('_')[0]
-    return node_id + '_' + str(1-int(node.split('_')[1]))
+# outfile = 'test.edges.list'
+outfile = filedir + '/' + filename + ".edges.list"
+
+
+rev_comp_contig = True
+
+out_graph = in_graph.copy()
+
+
+# first we add some info to the graph for the cutting of contigs
+for vert in out_graph.nodes():
+
+    vert_id, vert_or = vert.split("_")
+    vert_id = vert_id.lstrip("B")
+
+    vert_len = len(read_dict[int(vert_id)][1])
 
+    out_graph.node[vert]['cut_start'] = 0
+    out_graph.node[vert]['cut_end'] = vert_len
 
 
-def reverse_complement(string):
-    return "".join(map(lambda x:complement[x],reversed(string)))
+    # SHOULD THIS USE THE RAW MATCHES?
 
-def get_string(path):
-    # print path
-    ret_str = ''
-    for itm in path:
-        # print itm
-        read_id,rd_orientation = itm[0].split("_")
-        if rd_orientation == '1':
-            assert itm[1][0] >= itm[1][1]
-            str_st = itm[1][1]
-            str_end = itm[1][0]
-            read_str = read_dict[int(read_id.lstrip("B"))][1][str_st:str_end]
+    if out_graph.in_degree(vert) > 1:
+        if vert_or == '0':
+            out_graph.node[vert]['cut_start'] = max([out_graph.edge[x][vert]['read_b_match_start'] for x in out_graph.predecessors(vert)])
         else:
+            out_graph.node[vert]['cut_start'] = vert_len - min([out_graph.edge[vert_id+'_0'][x]['read_a_match_start'] for x in out_graph.successors(vert_id+'_0')])
 
-            assert itm[1][0] <= itm[1][1]
-            str_st = itm[1][0]
-            str_end = itm[1][1]
-            read_str = reverse_complement(read_dict[int(read_id.lstrip("B"))][1][str_st:str_end])
-#         print str_st,str_end
-#         print read_id
-#         print read_dict[int(read_id)][str_st:str_end]
-#         print read_str
-        # print 'read len',len(read_str)
-        ret_str += read_str
-    # print len(path), len(ret_str)
-    return ret_str
+    if out_graph.out_degree(vert) > 1:
 
+        if vert_or == '0':
+            out_graph.node[vert]['cut_end'] = min([out_graph.edge[vert][x]['read_a_match_start'] for x in out_graph.successors(vert)])
+        else:
+            out_graph.node[vert]['cut_end'] = vert_len - max([out_graph.edge[x][vert_id+'_0']['read_b_match_start'] for x in out_graph.predecessors(vert_id+'_0')])
 
 
-# the following loop removes start/end inconsistencies created in pruning and clipping
-for vert in in_graph:
 
-    vert_id, vert_or = vert.split("_")
-    if vert_or == '1':
-        continue
 
-    read_starts = [(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]
-    read_starts.append(0)
-    read_ends = [(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]    
-    read_ends.append(100000)
+# next we merge the nodes in out_graph to form the contigs
 
-    if max(read_starts) > min(read_ends):
+nodes_to_merge = [x for x in out_graph.nodes() if out_graph.in_degree(x) == 1 and out_graph.out_degree(out_graph.predecessors(x)[0]) == 1]
 
-        for pred in in_graph.predecessors(vert):
-            in_graph.remove_edge(pred,vert)
-            in_graph.remove_edge(rev_node(vert),rev_node(pred))
 
 
+# print len(read_dict[41260][1])
+# print len(read_dict[4697][1])
 
-vertices_of_interest = set([x for x in in_graph if in_graph.in_degree(x) != 1 or in_graph.out_degree(x) != 1])
 
-read_tuples = {}
+while nodes_to_merge:
 
-for vert in vertices_of_interest:
+    cur_node = nodes_to_merge[0]
 
-    vert_id, vert_or = vert.split("_")
-    if vert_or == '1':
-        continue
-    vert_len = len(read_dict[int(vert_id)][1])
-#     print vert_len
-    read_starts = [(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]
-    read_ends = [(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]
-    if read_starts:
-        read_start = max(read_starts)
-    else:
-        read_start = 0
-    if read_ends:
-        read_end = min(read_ends)
+    prev_node = out_graph.predecessors(cur_node)[0]
+
+
+    if prev_node != cur_node:
+        merge_nodes(out_graph,prev_node,cur_node)
     else:
-        read_end = vert_len
-    read_tuples[vert] = (read_start,read_end)
-    # print read_starts, read_ends, vert
+        out_graph.node[cur_node]['path'] = out_graph.node[cur_node]['path'] + ';' + cur_node
+        out_graph.node[cur_node]['weightspath'] = out_graph.node[cur_node]['weightspath'] + ';' + str(out_graph.edge[prev_node][cur_node]['length'])
+        out_graph.node[cur_node]['cut_end'] = len(read_dict[int(cur_node.split('_')[0])][1])
 
 
-for vert in vertices_of_interest:
+    nodes_to_merge.pop(0)
 
-    vert_id, vert_or = vert.split("_")
-    if vert_or == '1':
-        read_tuples[vert] = read_tuples[vert_id+"_0"]
+    # print len(nodes_to_merge)
 
 
-start_vertices = [x for x in vertices_of_interest if in_graph.in_degree(x) == 0 or in_graph.out_degree(x) > 1]
-h = nx.DiGraph()
 
-read_tuples_raw = {}
-for vertex in vertices_of_interest:
-    successors = in_graph.successors(vertex)
-    if successors:
-        succ = successors[0]
-        d =  in_graph.get_edge_data(vertex,succ)
-        read_tuples_raw[vertex] = (d['read_a_start_raw'], d['read_a_end_raw'])
-    else:
-        predecessors = in_graph.predecessors(vertex)
-        if not len(predecessors) == 0:
-            pred = predecessors[0]
-            d =  in_graph.get_edge_data(pred,vertex)
-            read_tuples_raw[vertex] = (d['read_b_start_raw'], d['read_b_end_raw'])
-        else:
-            read_tuples_raw[vertex] = (0,0)
 
 
-for vertex in vertices_of_interest:
-    h.add_node(vertex)
-    if vertex.split("_")[1] == '0':
-        path_var = [(vertex,(read_tuples[vertex][0], read_tuples[vertex][1]))]
-    else:
-        path_var = [(vertex,(read_tuples[vertex][1], read_tuples[vertex][0]))]
-    #print path_var
-    segment = get_string(path_var)
-    h.node[vertex]['start_read'] = path_var[0][1][0]
-    h.node[vertex]['end_read'] = path_var[0][1][1]
-    h.node[vertex]['path'] = [vertex]
-    h.node[vertex]['segment'] = segment
-
-vertices_used = set([x for x in h.nodes()])
-contig_no = 1
-for start_vertex in vertices_of_interest:
-    first_out_vertices = in_graph.successors(start_vertex)
-    # print start_vertex, first_out_vertices
-    for vertex in first_out_vertices:
-        predecessor = start_vertex
-        start_vertex_id,start_vertex_or = start_vertex.split("_")
-        cur_vertex = vertex
-        if start_vertex_or == '0':
-            cur_path = [(start_vertex,(read_tuples[start_vertex][1],
-                                       in_graph.edge[start_vertex][cur_vertex]['read_a_start']))]
-        elif start_vertex_or == '1':
-            cur_path = [(start_vertex,(read_tuples[start_vertex][0],
-                                       in_graph.edge[start_vertex][cur_vertex]['read_a_start']))]
-
-        while cur_vertex not in vertices_of_interest:
-            successor = in_graph.successors(cur_vertex)[0]
-            start_point = in_graph.edge[predecessor][cur_vertex]['read_b_start']
-            end_point = in_graph.edge[cur_vertex][successor]['read_a_start']
-            cur_path.append((cur_vertex,(start_point,end_point)))
-            vertices_used.add(cur_vertex)
-            predecessor = cur_vertex
-            cur_vertex = successor
-
-        stop_vertex_id, stop_vertex_or = cur_vertex.split("_")
-        if stop_vertex_or == '0':
-            cur_path.append((cur_vertex,(in_graph.edge[predecessor][cur_vertex]['read_b_start'],
-                        read_tuples[cur_vertex][0])))
-        elif stop_vertex_or == '1':
-            cur_path.append((cur_vertex,(in_graph.edge[predecessor][cur_vertex]['read_b_start'],
-                        read_tuples[cur_vertex][1])))
-
-
-        node_name = str(contig_no)
-        h.add_node(node_name)
-        contig_no += 1
-#         print cur_path
-        node_path = [x[0] for x in cur_path]
-        h.node[node_name]['path'] = node_path
-        h.node[node_name]['start_read'] = cur_path[0][1][0]
-        h.node[node_name]['end_read'] = cur_path[-1][1][1]
-        h.node[node_name]['segment'] = get_string(cur_path)
-        h.add_edges_from([(start_vertex,node_name),(node_name,cur_vertex)])
-#         paths.append(cur_path)
-
-#print read_tuples
-
-while set(in_graph.nodes())-vertices_used:
-    vert = list(set(in_graph.nodes())-vertices_used)[0]
-    vert_id,vert_or = vert.split("_")
-    if vert_or == '0':
-        read_start = min( min([(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]),
-                         max([(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]))
-        read_end = max( min([(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]),
-                         max([(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]))
-        vertRC = vert_id+"_1"
-    else:
-        read_start = max( min([(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]),
-                         max([(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]))
-        read_end = min( min([(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]),
-                         max([(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]))
-        vertRC = vert_id+"_0"
-
-    successor_start = in_graph.successors(vert)[0]
-    d =  in_graph.get_edge_data(vert,successor_start)
-    read_tuples_raw[vert] = (d['read_a_start_raw'], d['read_a_end_raw'])
-
-    successor_start = in_graph.successors(vertRC)[0]
-    d =  in_graph.get_edge_data(vertRC,successor_start)
-    read_tuples_raw[vertRC] = (d['read_a_start_raw'], d['read_a_end_raw'])
-
-    h.add_node(vert)
-    node_path = [vert]
-    h.node[vert]['path'] = node_path
-    h.node[vert]['start_read'] = read_start
-    h.node[vert]['end_read'] = read_end
-    h.node[vert]['segment'] = get_string([(vert,(read_start, read_end))])
-    vertices_used.add(vert)
-
-    first_out_vertices = in_graph.successors(vert)
-    for vertex in first_out_vertices:
-        predecessor = vert
-        cur_vertex = vertex
-        cur_path = []
-        while cur_vertex != vert:
-            successor = in_graph.successors(cur_vertex)[0]
-            start_point = in_graph.edge[predecessor][cur_vertex]['read_b_start']
-            end_point = in_graph.edge[cur_vertex][successor]['read_a_start']
-            cur_path.append((cur_vertex,(start_point,end_point)))
-            vertices_used.add(cur_vertex)
-            predecessor = cur_vertex
-            cur_vertex = successor
-        node_name = str(contig_no)
-        h.add_node(node_name)
-        contig_no += 1
-#         print cur_path
-
-        node_path = [x[0] for x in cur_path]
-        h.node[node_name]['path'] = node_path
-        try:
-            h.node[node_name]['start_read'] = cur_path[0][1][0]
-            h.node[node_name]['end_read'] = cur_path[-1][1][1]
-        except:
-            print path_var
-            raise
-        h.node[node_name]['segment'] = get_string(cur_path)
-        h.add_edges_from([(vert,node_name),(node_name,vert)])
-
-    if vertRC not in vertices_used:
-        h.add_node(vertRC)
-        h.node[vertRC]['segment'] = get_string([(vertRC,(read_end, read_start))])
-        h.node[vertRC]['path'] = [vertRC]
-        h.node[vertRC]['start_read'] = read_end
-        h.node[vertRC]['end_read'] = read_start
-
-        vertices_used.add(vertRC)
-        first_out_vertices = in_graph.successors(vertRC)
-        for vertex in first_out_vertices:
-            predecessor = vertRC
-            cur_vertex = vertex
-            cur_path = []
-            while cur_vertex != vertRC:
-                successor = in_graph.successors(cur_vertex)[0]
-                start_point = in_graph.edge[predecessor][cur_vertex]['read_b_start']
-                end_point = in_graph.edge[cur_vertex][successor]['read_a_start']
-                cur_path.append((cur_vertex,(start_point,end_point)))
-                vertices_used.add(cur_vertex)
-                predecessor = cur_vertex
-                cur_vertex = successor
-            node_name = str(contig_no)
-            h.add_node(node_name)
+
+# next we print the contigs out to the .edges.list file
+contig_no = 0
+# print "Writing out_graph with "+str(len(out_graph.nodes()))+" contigs/nodes"
+
+
+# we keep track of the already printed nodes so that reverse complement pairs are printed together
+# we don't add to printed_nodes the "border" nodes so that we still have a partition of the nodes into contigs
+# printed_nodes = set()
+printed_nodes = {}
+
+# debug output
+
+# for node in out_graph.nodes():
+#     print node
+#     print out_graph.node[node]
+
+
+# for edge in out_graph.edges():
+#     print edge
+#     print out_graph.edge[edge[0]][edge[1]]
+
+
+
+
+with open(outfile, 'w') as f:
+
+    for vertex in out_graph.nodes():
+
+        if rev_node(vertex) in printed_nodes:
+            out_graph.node[vertex]['contig_id'] = printed_nodes[rev_node(vertex)] + 1
+
+            continue
+
+        # single-node contig
+        if 'path' not in out_graph.node[vertex]:
+
+            out_graph.node[vertex]['contig_id'] = contig_no + 1
+            f.write('>Unitig%d\n'%(contig_no))
+
+            # printed_nodes = printed_nodes | set([vertex])
+            printed_nodes[vertex] = contig_no
+
             contig_no += 1
-    #         print cur_path
 
-            node_path = [x[0] for x in cur_path]
-            h.node[node_name]['path'] = node_path
-            h.node[node_name]['start_read'] = cur_path[0][1][0]
-            h.node[node_name]['end_read'] = cur_path[-1][1][1]
-            h.node[node_name]['segment'] = get_string(cur_path)
-            # print len(cur_path)
-            h.add_edges_from([(vertRC,node_name),(node_name,vertRC)])
+            # we repeat the same node twice so that the line is easily distinguishable (6 numbers)
+            f.write('O %s %s %s %s %d %d\n'%(vertex.split('_')[0].lstrip('B'), vertex.split('_')[1]  , vertex.split('_')[0].lstrip('B'),
+                vertex.split('_')[1], out_graph.node[vertex]['cut_start'], out_graph.node[vertex]['cut_end']) )
 
 
 
-outfile = filedir + '/' + filename + ".edges.list"
-# outfile_norevcomp = filedir + '/' + filename + ".norevcomp.edges.list"
 
+            f.write('>Unitig%d\n'%(contig_no))
+            contig_no += 1
 
-vert_to_merge = [x for x in h.nodes() if len(h.successors(x)) == 1 and len(h.predecessors(h.successors(x)[0])) == 1 and
-    x != h.successors(x)[0] ]
+            vertex_rc = rev_node(vertex)
+            f.write('O %s %s %s %s %d %d\n'%(vertex_rc.split('_')[0].lstrip('B'), vertex_rc.split('_')[1]  , vertex_rc.split('_')[0].lstrip('B'),
+                vertex_rc.split('_')[1], out_graph.node[vertex_rc]['cut_start'], out_graph.node[vertex_rc]['cut_end']) )
+            continue
 
-# while True:
 
 
-for vert in vert_to_merge:
 
-    # and
-    # len(nx.node_connected_component(h.to_undirected(), x)) > 2]
+        node_list = out_graph.node[vertex]['path'].split(';')
+        weights_list = out_graph.node[vertex]['weightspath'].split(';')
 
-    if len(h.successors(x)) != 1 or len(h.predecessors(h.successors(x)[0])) != 1 or x == h.successors(x)[0]:
-        continue
 
 
-    succ = h.successors(vert)[0]
-    preds = h.predecessors(vert)
 
-    if succ in preds:
-        continue
+        # double-node contig
+        if out_graph.in_degree(vertex) != 1 and out_graph.out_degree(vertex) != 1 and len(node_list) == 2:
 
-    h.node[succ]['segment'] =  h.node[vert]['segment'] + h.node[succ]['segment']
-    h.node[succ]['path'] = h.node[vert]['path'] + h.node[succ]['path'][1:]
+            out_graph.node[vertex]['contig_id'] = contig_no
+            f.write('>Unitig%d\n'%(contig_no))
 
-    for pred in preds:
-        #print pred, succ
-        h.add_edges_from([(pred,succ)])
-        h.remove_edge(pred,vert)
+            # printed_nodes = printed_nodes | set(node_list)
+            printed_nodes[node_list[0]] = contig_no
+            printed_nodes[node_list[1]] = contig_no
 
-    h.remove_edge(vert,succ)
-    h.remove_node(vert)
+            contig_no += 1
 
+            nodeA = node_list[0]
+            nodeB = node_list[1]
+            f.write('D %s %s %s %s %s %d %d\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1],
+                weights_list[0], out_graph.node[vertex]['cut_start'], out_graph.node[vertex]['cut_end']) )
 
 
-path_to_vert = {}
-RCmap = {}
 
-for i, vert in enumerate(h.nodes()):
-    path =  h.node[vert]['path']
-    path_to_vert[':'.join(path)] = vert 
+            f.write('>Unitig%d\n'%(contig_no))
+            contig_no += 1
 
-for path in path_to_vert:
-    path_to_search = ':'.join(list(reversed([ x.split('_')[0]+'_'+str(1-int(x.split('_')[1])) for x in path.split(':')])))
-    RCmap[path_to_vert[path]] = path_to_vert[path_to_search]
+            nodeA = rev_node(node_list[1])
+            nodeB = rev_node(node_list[0])
+            f.write('D %s %s %s %s %s %d %d\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1],
+                weights_list[0], len(read_dict[int(nodeA.split('_')[0])][1]) - out_graph.node[vertex]['cut_end'], len(read_dict[int(nodeB.split('_')[0])][1]) - out_graph.node[vertex]['cut_start'] ) )
 
-# print path_to_vert        
+            continue
 
 
-# print RCmap
-# print [x for x in h.edges()]
 
 
-vert_to_merge = [x for x in h.nodes() if len(h.successors(x)) == 1 and len(h.predecessors(h.successors(x)[0])) == 1 and
-    x != h.successors(x)[0] and len(h.successors(h.successors(x)[0])) == 1 and h.successors(h.successors(x)[0])[0]== x 
-    and len(nx.node_connected_component(h.to_undirected(), x)) == 2]
 
+        # print out_graph.node[vertex]['path']
+        # print node_list
+        # print out_graph.node[vertex]['weightspath']
+        # print weights_list
 
-for vert in vert_to_merge:
+        # print len(node_list),len(weights_list)
 
-    if vert not in h.nodes():
-        continue
+        if len(node_list) != len(weights_list)+1:
+            print 'Something went wrong with contig '+str(contig_no)
+            continue
 
-    if len(h.successors(vert)) == 1 and h.successors(vert)[0] == vert:
-        continue
+        # printed_nodes = printed_nodes | set(node_list)
+        for curnode in node_list:
+            printed_nodes[curnode] = contig_no
 
-    succ = h.successors(vert)[0]
 
-    # print vert, succ
+        # print 'Unitig ' +str(contig_no) + ' ('+str(len(node_list))+' nodes)'
 
-    vertRC = RCmap[vert]
-    # print vert, vertRC
+        out_graph.node[vertex]['contig_id'] = contig_no
+        f.write('>Unitig%d\n'%(contig_no))
+        contig_no += 1
 
-    predRC = h.predecessors(vertRC)[0]
 
-    # print h.node[vert]['path']
-    # print h.node[succ]['path']
+        # prev_vert = out_graph.node[node_list[0]]['prev_node']
+        # if prev_vert != '':
 
-    h.node[succ]['segment'] =  h.node[vert]['segment'] + h.node[succ]['segment']
-    h.node[predRC]['segment'] =  h.node[predRC]['segment'] + h.node[vertRC]['segment']
+        if out_graph.in_degree(vertex) == 1 and out_graph.predecessors(vertex)[0] != vertex:
 
-    h.node[succ]['path'] = h.node[vert]['path'] + h.node[succ]['path']
-    h.node[predRC]['path'] = h.node[predRC]['path'] + h.node[vertRC]['path']
+            prev_contig = out_graph.predecessors(vertex)[0]
+            cut_start = out_graph.node[prev_contig]['cut_end']
+            if out_graph.node[prev_contig].has_key('path'):
+                nodeA = out_graph.node[prev_contig]['path'].split(';')[-1]
+            else:
+                nodeA = prev_contig
 
-    # print vert, succ, predRC, vertRC
+            nodeB = node_list[0]
+            f.write('S %s %s %s %s %s %d\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'),
+                nodeB.split('_')[1], out_graph.edge[prev_contig][vertex]['length'], cut_start) )
 
-    h.add_edges_from([(succ,succ)])
-    h.add_edges_from([(predRC,predRC)])
 
-    h.remove_node(vert)
-    h.remove_node(vertRC)
+            if len(node_list) > 2:
 
+                nodeA = node_list[0]
+                nodeB = node_list[1]
+                f.write('T %s %s %s %s %s\n'%(nodeA.split('_')[0].lstrip('B'),nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1], weights_list[0]) )
 
 
+        else:
+            nodeA = node_list[0]
+            nodeB = node_list[1]
+            f.write('S %s %s %s %s %s %d\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1],
+                weights_list[0], out_graph.node[vertex]['cut_start']) )
 
+        for i in range(1,len(weights_list)-1):
+            nodeA = node_list[i]
+            nodeB = node_list[i+1]
+            f.write('T %s %s %s %s %s\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1], weights_list[i]) )
 
-for  i, vert in enumerate(h.nodes()):
-    pass
-    #print i,len(h.node[vert]['path'])
 
-cnt = 0
-with open(outfile, 'w') as f:
-    for i,node in enumerate(h.nodes()):
-        #print node
-        #print h.node[node]
-        path = h.node[node]['path']
-        h.node[node]['contig_id'] = cnt
-        cnt += 1
-        f.write('>Unitig%d\n'%(i))
-        if len(path) == 1:
-            #print path[0]
-            f.write(' '.join([path[0].split('_')[0], path[0].split('_')[1], str(read_tuples_raw[path[0]][0]), str(read_tuples_raw[path[0]][1])]) + '\n')
-        for j in range(len(path)-1):
-            nodeA = path[j].lstrip("B")
-            nodeB = path[j+1].lstrip("B")
-
-            d =  in_graph.get_edge_data(path[j],path[j+1])
-            try:
-                f.write('%s %s %s %s %d %d %d %d %d\n'%(nodeA.split('_')[0],nodeA.split('_')[1]  , nodeB.split('_')[0],
-                    nodeB.split('_')[1], -d['read_a_start_raw'] + d['read_a_end_raw'] - d['read_b_start_raw'] + d['read_b_end_raw'],
-                    d['read_a_start_raw'], d['read_a_end_raw'], d['read_b_start_raw'], d['read_b_end_raw']))
-            except:
-                print "in error"
-                print nodeB
-                print node
-                print  h.node[node]['start_read']
-                print  h.node[node]['end_read']
-                print  h.node[node]['path']
-                print  len(h.node[node]['segment'])
-                print d
-                raise
-
-
-# one_sided_contigs = []
-
-observed_paths = []
-cnt = 0
 
-out_graphml_name = filedir + '/' + filename +'_draft.graphml'
+        if out_graph.out_degree(vertex) == 1 and out_graph.successors(vertex)[0] != vertex:
 
+            if len(node_list) > 2:
 
-gfaname = filedir + '/' + filename+ '_draft_python.gfa'
-if len(sys.argv) > 3:
-    consensus_name = sys.argv[3]
-else:
-    consensus_name = ''
+                nodeA = node_list[len(weights_list)-1]
+                nodeB = node_list[len(weights_list)]
+                f.write('T %s %s %s %s %s\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1], weights_list[-1]) )
 
-consensus_contigs = []
-try:
-    with open(consensus_name) as f:
-        for line in f:
-            if line[0] != '>':
-                consensus_contigs.append(line.strip())
-except:
-    pass
-# for  i, vert in enumerate(h.nodes()):
-#    print i,len(h.node[vert]['path']), len(h.node[vert]['segment']), len(consensus_contigs[i])
+            next_contig = out_graph.successors(vertex)[0]
+            # we end this contig where the next one begins
+            cut_end = out_graph.node[next_contig]['cut_start']
 
+            nodeA = node_list[len(weights_list)]
+            if out_graph.node[next_contig].has_key('path'):
+                nodeB = out_graph.node[next_contig]['path'].split(';')[0]
+            else:
+                nodeB = next_contig
 
-one_sided_contigs = []
+            f.write('E %s %s %s %s %s %d\n'%(nodeA.split('_')[0].lstrip('B'),nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'),
+                nodeB.split('_')[1], out_graph.edge[vertex][next_contig]['length'], cut_end) )
 
-observed_paths = []
 
-for i, vert in enumerate(h.nodes()):
-    path =  [x.split('_')[0] for x in h.node[vert]['path']]
-    path_to_search = list(reversed(path))
-    if path_to_search not in observed_paths:
-        observed_paths.append(path)
-        one_sided_contigs.append(h.node[vert]['segment'])
 
+        else:
 
+            nodeA = node_list[len(weights_list)-1]
+            nodeB = node_list[len(weights_list)]
+            f.write('E %s %s %s %s %s %d\n'%(nodeA.split('_')[0].lstrip('B'),nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1],
+                weights_list[-1], out_graph.node[vertex]['cut_end']) )
 
-# commented out the block below so that the non-reverse-complemented contigs are not produced here
 
-# out_nonrevcomp_name = filedir + '/' + filename +'_nonrevcomp.fasta'
-# writer = FastaIO.FastaWriter(out_nonrevcomp_name)
-# for i, ctg in enumerate(one_sided_contigs):
-#     print i, len(ctg)
-#     new_header = str(i)
-#     writer.writeRecord(new_header, ctg)
 
 
+        # if we want reverse complement contigs, we print them next to each other
 
+        if rev_comp_contig == False:
+            continue
 
 
+        f.write('>Unitig%d\n'%(contig_no))
+        contig_no += 1
+
+
+
+        if out_graph.out_degree(vertex) == 1 and out_graph.successors(vertex)[0] != vertex:
+
+            next_contig = out_graph.successors(vertex)[0]
+
+            nodeB = rev_node(node_list[len(weights_list)])
+            if out_graph.node[next_contig].has_key('path'):
+                nodeA = rev_node(out_graph.node[next_contig]['path'].split(';')[0])
+            else:
+                nodeA = rev_node(next_contig)
+
+            # we start this contig where the previous (rc: next) one ended
+            cut_start = len(read_dict[int(nodeA.split('_')[0])][1]) - out_graph.node[next_contig]['cut_start']
+
+            f.write('S %s %s %s %s %s %d\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'),
+                nodeB.split('_')[1], out_graph.edge[vertex][next_contig]['length'], cut_start) )
+
+
+            if len(node_list) > 2:
+
+                nodeA = rev_node(node_list[len(weights_list)])
+                nodeB = rev_node(node_list[len(weights_list)-1])
+                f.write('T %s %s %s %s %s\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1], weights_list[-1]) )
+
+        else:
+
+            nodeA = rev_node(node_list[len(weights_list)])
+            nodeB = rev_node(node_list[len(weights_list)-1])
+
+            f.write('S %s %s %s %s %s %d\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1],
+                weights_list[-1], len(read_dict[int(nodeA.split('_')[0])][1]) - out_graph.node[vertex]['cut_end']) )
+
+
+
+        for i in range(len(weights_list)-1,1,-1):
+            nodeA = rev_node(node_list[i])
+            nodeB = rev_node(node_list[i-1])
+            f.write('T %s %s %s %s %s\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1], weights_list[i-1]) )
+
+        if out_graph.in_degree(vertex) == 1 and out_graph.predecessors(vertex)[0] != vertex:
+
+            if len(node_list) > 2:
+
+                nodeA = rev_node(node_list[1])
+                nodeB = rev_node(node_list[0])
+                f.write('T %s %s %s %s %s\n'%(nodeA.split('_')[0].lstrip('B'), nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1], weights_list[0]) )
+
+            prev_contig = out_graph.predecessors(vertex)[0]
+
+            nodeA = rev_node(node_list[0])
+
+            if out_graph.node[prev_contig].has_key('path'):
+                nodeB = rev_node(out_graph.node[prev_contig]['path'].split(';')[-1])
+            else:
+                nodeB = rev_node(prev_contig)
+            cut_end = len(read_dict[int(nodeB.split('_')[0])][1]) - out_graph.node[prev_contig]['cut_end']
+
+            f.write('E %s %s %s %s %s %d\n'%(nodeA.split('_')[0].lstrip('B'),nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'),
+                nodeB.split('_')[1], out_graph.edge[prev_contig][vertex]['length'], cut_end) )
+
+        else:
+            nodeB = rev_node(node_list[0])
+            nodeA = rev_node(node_list[1])
+            f.write('E %s %s %s %s %s %d\n'%(nodeA.split('_')[0].lstrip('B'),nodeA.split('_')[1]  , nodeB.split('_')[0].lstrip('B'), nodeB.split('_')[1],
+                weights_list[0], len(read_dict[int(nodeB.split('_')[0])][1]) - out_graph.node[vertex]['cut_start']) )
 
-#last =  h.nodes()[-1]
-#print h.node[last]
-#path_last = h.node[last]['path']
 
 
 
-#for i in range(len(path_last)-1):
-#    read_a = path_last[i]
-#    read_b = path_last[i+1]
-#    print read_a, read_b, in_graph.edge[read_a][read_b]
+print "Number of contigs: "+str(contig_no)
 
-for i,node in enumerate(h.nodes()):
-     h.node[node]['path'] = ';'.join(h.node[node]['path'])
-nx.write_graphml(h,out_graphml_name)
+nx.write_graphml(out_graph,out_graphml_name)
 
 
-# with open(gfaname,'w') as f:
-#     f.write("H\tVN:Z:1.0\n")
-#     for i,vert in enumerate(h.nodes()):
-#         seg = h.node[vert]['segment']
-#         print len(seg)
 
-#         seg_line = "S\t"+vert+"\t"+seg + '\n'
-#         f.write(seg_line)
-#     for edge in h.edges():
-#         edge_line = "L\t"+edge[0]+"\t+\t"+edge[1]+"\t+\t0M\n"
-#         f.write(edge_line)
 
 
diff --git a/scripts/get_draft_path_norevcomp.py b/scripts/get_draft_path_norevcomp.py
old mode 100644
new mode 100755
index 09dab9d..57cc01c
--- a/scripts/get_draft_path_norevcomp.py
+++ b/scripts/get_draft_path_norevcomp.py
@@ -2,515 +2,17 @@
 
 import sys
 import os
-import subprocess
-from parse_read import *
-import numpy as np
-import networkx as nx
-import itertools
 from pbcore.io import FastaIO
 
-filedir = sys.argv[1]
-filename = sys.argv[2]
-graphml_path = sys.argv[3]
 
-in_graph = nx.read_graphml(graphml_path)
-
-reads = sorted(list(set([int(x.split("_")[0].lstrip("B")) for x in in_graph.nodes()])))
-
-dbshow_reads = ' '.join([str(x+1) for x in reads])
-
-DBshow_cmd = "DBshow "+ filedir+'/'+ filename+' '+dbshow_reads
-stream = subprocess.Popen(DBshow_cmd.split(),
-                                  stdout=subprocess.PIPE,bufsize=1)
-reads_queried = parse_read(stream.stdout)
-read_dict = {}
-for read_id,read in itertools.izip(reads,reads_queried):
-    rdlen = len(read[1])
-#     print read
-    read_dict[read_id] = read
-
-complement = {'A':'T','C': 'G','T':'A', 'G':'C','a':'t','t':'a','c':'g','g':'c'}
-
-def reverse_complement(string):
-    return "".join(map(lambda x:complement[x],reversed(string)))
-
-def get_string(path):
-    # print path
-    ret_str = ''
-    for itm in path:
-        # print itm
-        read_id,rd_orientation = itm[0].split("_")
-        if rd_orientation == '1':
-            assert itm[1][0] >= itm[1][1]
-            str_st = itm[1][1]
-            str_end = itm[1][0]
-            read_str = read_dict[int(read_id.lstrip("B"))][1][str_st:str_end]
-        else:
-
-            assert itm[1][0] <= itm[1][1]
-            str_st = itm[1][0]
-            str_end = itm[1][1]
-            read_str = reverse_complement(read_dict[int(read_id.lstrip("B"))][1][str_st:str_end])
-#         print str_st,str_end
-#         print read_id
-#         print read_dict[int(read_id)][str_st:str_end]
-#         print read_str
-        # print 'read len',len(read_str)
-        ret_str += read_str
-    # print len(path), len(ret_str)
-    return ret_str
-
-
-
-
-vertices_of_interest = set([x for x in in_graph if in_graph.in_degree(x) != 1 or in_graph.out_degree(x) != 1])
-
-read_tuples = {}
-
-for vert in vertices_of_interest:
-
-    vert_id, vert_or = vert.split("_")
-    if vert_or == '1':
-        continue
-    vert_len = len(read_dict[int(vert_id)][1])
-#     print vert_len
-    read_starts = [(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]
-    read_ends = [(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]
-    if read_starts:
-        read_start = max(read_starts)
-    else:
-        read_start = 0
-    if read_ends:
-        read_end = min(read_ends)
-    else:
-        read_end = vert_len
-    read_tuples[vert] = (read_start,read_end)
-    # print read_starts, read_ends, vert
-
-
-for vert in vertices_of_interest:
-
-    vert_id, vert_or = vert.split("_")
-    if vert_or == '1':
-        read_tuples[vert] = read_tuples[vert_id+"_0"]
-
-
-start_vertices = [x for x in vertices_of_interest if in_graph.in_degree(x) == 0 or in_graph.out_degree(x) > 1]
-h = nx.DiGraph()
-
-read_tuples_raw = {}
-for vertex in vertices_of_interest:
-    successors = in_graph.successors(vertex)
-    if successors:
-        succ = successors[0]
-        d =  in_graph.get_edge_data(vertex,succ)
-        read_tuples_raw[vertex] = (d['read_a_start_raw'], d['read_a_end_raw'])
-    else:
-        predecessors = in_graph.predecessors(vertex)
-        if not len(predecessors) == 0:
-            pred = predecessors[0]
-            d =  in_graph.get_edge_data(pred,vertex)
-            read_tuples_raw[vertex] = (d['read_b_start_raw'], d['read_b_end_raw'])
-        else:
-            read_tuples_raw[vertex] = (0,0)
-
-
-for vertex in vertices_of_interest:
-    h.add_node(vertex)
-    if vertex.split("_")[1] == '0':
-        path_var = [(vertex,(read_tuples[vertex][0], read_tuples[vertex][1]))]
-    else:
-        path_var = [(vertex,(read_tuples[vertex][1], read_tuples[vertex][0]))]
-    #print path_var
-    segment = get_string(path_var)
-    h.node[vertex]['start_read'] = path_var[0][1][0]
-    h.node[vertex]['end_read'] = path_var[0][1][1]
-    h.node[vertex]['path'] = [vertex]
-    h.node[vertex]['segment'] = segment
-
-vertices_used = set([x for x in h.nodes()])
-contig_no = 1
-for start_vertex in vertices_of_interest:
-    first_out_vertices = in_graph.successors(start_vertex)
-    # print start_vertex, first_out_vertices
-    for vertex in first_out_vertices:
-        predecessor = start_vertex
-        start_vertex_id,start_vertex_or = start_vertex.split("_")
-        cur_vertex = vertex
-        if start_vertex_or == '0':
-            cur_path = [(start_vertex,(read_tuples[start_vertex][1],
-                                       in_graph.edge[start_vertex][cur_vertex]['read_a_start']))]
-        elif start_vertex_or == '1':
-            cur_path = [(start_vertex,(read_tuples[start_vertex][0],
-                                       in_graph.edge[start_vertex][cur_vertex]['read_a_start']))]
-
-        while cur_vertex not in vertices_of_interest:
-            successor = in_graph.successors(cur_vertex)[0]
-            start_point = in_graph.edge[predecessor][cur_vertex]['read_b_start']
-            end_point = in_graph.edge[cur_vertex][successor]['read_a_start']
-            cur_path.append((cur_vertex,(start_point,end_point)))
-            vertices_used.add(cur_vertex)
-            predecessor = cur_vertex
-            cur_vertex = successor
-
-        stop_vertex_id, stop_vertex_or = cur_vertex.split("_")
-        if stop_vertex_or == '0':
-            cur_path.append((cur_vertex,(in_graph.edge[predecessor][cur_vertex]['read_b_start'],
-                        read_tuples[cur_vertex][0])))
-        elif stop_vertex_or == '1':
-            cur_path.append((cur_vertex,(in_graph.edge[predecessor][cur_vertex]['read_b_start'],
-                        read_tuples[cur_vertex][1])))
-
-
-        node_name = str(contig_no)
-        h.add_node(node_name)
-        contig_no += 1
-#         print cur_path
-        node_path = [x[0] for x in cur_path]
-        h.node[node_name]['path'] = node_path
-        h.node[node_name]['start_read'] = cur_path[0][1][0]
-        h.node[node_name]['end_read'] = cur_path[-1][1][1]
-        h.node[node_name]['segment'] = get_string(cur_path)
-        h.add_edges_from([(start_vertex,node_name),(node_name,cur_vertex)])
-#         paths.append(cur_path)
-
-#print read_tuples
-
-while set(in_graph.nodes())-vertices_used:
-    vert = list(set(in_graph.nodes())-vertices_used)[0]
-    vert_id,vert_or = vert.split("_")
-    if vert_or == '0':
-        read_start = min( min([(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]),
-                         max([(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]))
-        read_end = max( min([(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]),
-                         max([(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]))
-        vertRC = vert_id+"_1"
-    else:
-        read_start = max( min([(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]),
-                         max([(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]))
-        read_end = min( min([(in_graph.edge[x][vert]['read_b_start']) for x in in_graph.predecessors(vert)]),
-                         max([(in_graph.edge[vert][x]['read_a_start']) for x in in_graph.successors(vert)]))
-        vertRC = vert_id+"_0"
-
-    successor_start = in_graph.successors(vert)[0]
-    d =  in_graph.get_edge_data(vert,successor_start)
-    read_tuples_raw[vert] = (d['read_a_start_raw'], d['read_a_end_raw'])
-
-    successor_start = in_graph.successors(vertRC)[0]
-    d =  in_graph.get_edge_data(vertRC,successor_start)
-    read_tuples_raw[vertRC] = (d['read_a_start_raw'], d['read_a_end_raw'])
-
-    h.add_node(vert)
-    node_path = [vert]
-    h.node[vert]['path'] = node_path
-    h.node[vert]['start_read'] = read_start
-    h.node[vert]['end_read'] = read_end
-    h.node[vert]['segment'] = get_string([(vert,(read_start, read_end))])
-    vertices_used.add(vert)
-
-    first_out_vertices = in_graph.successors(vert)
-    for vertex in first_out_vertices:
-        predecessor = vert
-        cur_vertex = vertex
-        cur_path = []
-        while cur_vertex != vert:
-            successor = in_graph.successors(cur_vertex)[0]
-            start_point = in_graph.edge[predecessor][cur_vertex]['read_b_start']
-            end_point = in_graph.edge[cur_vertex][successor]['read_a_start']
-            cur_path.append((cur_vertex,(start_point,end_point)))
-            vertices_used.add(cur_vertex)
-            predecessor = cur_vertex
-            cur_vertex = successor
-        node_name = str(contig_no)
-        h.add_node(node_name)
-        contig_no += 1
-#         print cur_path
-
-        node_path = [x[0] for x in cur_path]
-        h.node[node_name]['path'] = node_path
-        try:
-            h.node[node_name]['start_read'] = cur_path[0][1][0]
-            h.node[node_name]['end_read'] = cur_path[-1][1][1]
-        except:
-            print path_var
-            raise
-        h.node[node_name]['segment'] = get_string(cur_path)
-        h.add_edges_from([(vert,node_name),(node_name,vert)])
-
-    if vertRC not in vertices_used:
-        h.add_node(vertRC)
-        h.node[vertRC]['segment'] = get_string([(vertRC,(read_end, read_start))])
-        h.node[vertRC]['path'] = [vertRC]
-        h.node[vertRC]['start_read'] = read_end
-        h.node[vertRC]['end_read'] = read_start
-
-        vertices_used.add(vertRC)
-        first_out_vertices = in_graph.successors(vertRC)
-        for vertex in first_out_vertices:
-            predecessor = vertRC
-            cur_vertex = vertex
-            cur_path = []
-            while cur_vertex != vertRC:
-                successor = in_graph.successors(cur_vertex)[0]
-                start_point = in_graph.edge[predecessor][cur_vertex]['read_b_start']
-                end_point = in_graph.edge[cur_vertex][successor]['read_a_start']
-                cur_path.append((cur_vertex,(start_point,end_point)))
-                vertices_used.add(cur_vertex)
-                predecessor = cur_vertex
-                cur_vertex = successor
-            node_name = str(contig_no)
-            h.add_node(node_name)
-            contig_no += 1
-    #         print cur_path
-
-            node_path = [x[0] for x in cur_path]
-            h.node[node_name]['path'] = node_path
-            h.node[node_name]['start_read'] = cur_path[0][1][0]
-            h.node[node_name]['end_read'] = cur_path[-1][1][1]
-            h.node[node_name]['segment'] = get_string(cur_path)
-            # print len(cur_path)
-            h.add_edges_from([(vertRC,node_name),(node_name,vertRC)])
-
-
-
-outfile = filedir + '/' + filename + ".edges.list"
-outfile_norevcomp = filedir + '/' + filename + ".norevcomp.edges.list"
-
-
-vert_to_merge = [x for x in h.nodes() if len(h.successors(x)) == 1 and len(h.predecessors(h.successors(x)[0])) == 1 and 
- x != h.successors(x)[0] and
- len(nx.node_connected_component(h.to_undirected(), x)) > 2]
-
-while True:
-
-    vert_to_merge = [x for x in h.nodes() if len(h.successors(x)) == 1 and len(h.predecessors(h.successors(x)[0])) == 1 and
-    x != h.successors(x)[0] and
-    len(nx.node_connected_component(h.to_undirected(), x)) > 2]
-
-    if not vert_to_merge:
-        break
-    vert = vert_to_merge[0]
-    #print vert,
-    succ = h.successors(vert)[0]
-    preds = h.predecessors(vert)
-    h.node[succ]['segment'] =  h.node[vert]['segment'] + h.node[succ]['segment']
-    h.node[succ]['path'] = h.node[vert]['path'] + h.node[succ]['path'][1:]
-
-    for pred in preds:
-        #print pred, succ
-        h.add_edges_from([(pred,succ)])
-        h.remove_edge(pred,vert)
-
-    h.remove_edge(vert,succ)
-    h.remove_node(vert)
-
-path_to_vert = {}
-RCmap = {}
-
-for i, vert in enumerate(h.nodes()):
-    path =  h.node[vert]['path']
-    path_to_vert[':'.join(path)] = vert 
-
-for path in path_to_vert:
-    path_to_search = ':'.join(list(reversed([ x.split('_')[0]+'_'+str(1-int(x.split('_')[1])) for x in path.split(':')])))
-    RCmap[path_to_vert[path]] = path_to_vert[path_to_search]
-
-# print path_to_vert        
-
-
-# print RCmap
-# print [x for x in h.edges()]
-
-while True:
-    vert_to_merge = [x for x in h.nodes() if len(h.successors(x)) == 1 and len(h.predecessors(h.successors(x)[0])) == 1 and
-    x != h.successors(x)[0] and h.successors(h.successors(x)[0])[0]== x and len(h.successors(h.successors(x)[0])) == 1 and
-    len(nx.node_connected_component(h.to_undirected(), x)) == 2]
-
-    if not vert_to_merge:
-        break
-
-    vert = vert_to_merge[0]
-    succ = h.successors(vert)[0]
-
-    # print vert, succ
-
-    vertRC = RCmap[vert]
-    # print vert, vertRC
-
-    predRC = h.predecessors(vertRC)[0]
-
-    # print h.node[vert]['path']
-    # print h.node[succ]['path']
-
-    h.node[succ]['segment'] =  h.node[vert]['segment'] + h.node[succ]['segment']
-    h.node[predRC]['segment'] =  h.node[predRC]['segment'] + h.node[vertRC]['segment']
-
-    h.node[succ]['path'] = h.node[vert]['path'] + h.node[succ]['path']
-    h.node[predRC]['path'] = h.node[predRC]['path'] + h.node[vertRC]['path']
-
-    # print vert, succ, predRC, vertRC
-
-    h.add_edges_from([(succ,succ)])
-    h.add_edges_from([(predRC,predRC)])
-
-    h.remove_node(vert)
-    h.remove_node(vertRC)
-
-
-
-
-for  i, vert in enumerate(h.nodes()):
-    print i,len(h.node[vert]['path'])
-
-# with open(outfile, 'w') as f:
-#     for i,node in enumerate(h.nodes()):
-#         #print node
-#         #print h.node[node]
-#         path = h.node[node]['path']
-
-#         f.write('>Unitig%d\n'%(i))
-#         if len(path) == 1:
-#             #print path[0]
-#             f.write(' '.join([path[0].split('_')[0], path[0].split('_')[1], str(read_tuples_raw[path[0]][0]), str(read_tuples_raw[path[0]][1])]) + '\n')
-#         for j in range(len(path)-1):
-#             nodeA = path[j].lstrip("B")
-#             nodeB = path[j+1].lstrip("B")
-
-#             d =  in_graph.get_edge_data(path[j],path[j+1])
-#             try:
-#                 f.write('%s %s %s %s %d %d %d %d %d\n'%(nodeA.split('_')[0],nodeA.split('_')[1]  , nodeB.split('_')[0],
-#                     nodeB.split('_')[1], -d['read_a_start_raw'] + d['read_a_end_raw'] - d['read_b_start_raw'] + d['read_b_end_raw'],
-#                     d['read_a_start_raw'], d['read_a_end_raw'], d['read_b_start_raw'], d['read_b_end_raw']))
-#             except:
-#                 print "in error"
-#                 print nodeB
-#                 print node
-#                 print  h.node[node]['start_read']
-#                 print  h.node[node]['end_read']
-#                 print  h.node[node]['path']
-#                 print  len(h.node[node]['segment'])
-#                 print d
-#                 raise
-
-
-# one_sided_contigs = []
-
-observed_paths = []
-cnt = 0
-
-
-out_graphml_name = filedir + '/' + filename +'_draft.graphml'
-
-
-gfaname = filedir + '/' + filename+ '_draft_python.gfa'
-if len(sys.argv) > 3:
-    consensus_name = sys.argv[3]
-else:
-    consensus_name = ''
-
-consensus_contigs = []
-try:
-    with open(consensus_name) as f:
-        for line in f:
-            if line[0] != '>':
-                consensus_contigs.append(line.strip())
-except:
-    pass
-# for  i, vert in enumerate(h.nodes()):
-#    print i,len(h.node[vert]['path']), len(h.node[vert]['segment']), len(consensus_contigs[i])
-
-
-one_sided_contigs = []
-
-observed_paths = []
-
-vertices_to_keep = []
-
-for i, vert in enumerate(h.nodes()):
-    path =  [x.split('_')[0] for x in h.node[vert]['path']]
-    path_to_search = list(reversed(path))
-    if path_to_search not in observed_paths:
-        observed_paths.append(path)
-        one_sided_contigs.append(h.node[vert]['segment'])
-        vertices_to_keep.append(vert)
-
-
-
-# commented out the block below so that the non-reverse-complemented contigs are not produced here
-
-# out_nonrevcomp_name = filedir + '/' + filename +'_nonrevcomp.fasta'
-# writer = FastaIO.FastaWriter(out_nonrevcomp_name)
-# for i, ctg in enumerate(one_sided_contigs):
-#     print i, len(ctg)
-#     new_header = str(i)
-#     writer.writeRecord(new_header, ctg)
-
-
-
-
-with open(outfile, 'w') as f:
-    for i,node in enumerate(h.nodes()):
-        #print node
-        #print h.node[node]
-        path = h.node[node]['path']
-
-        if node in vertices_to_keep:
-
-            f.write('>Unitig%d\n'%(cnt))
-            print "Writing contig number"
-            print cnt
-            cnt += 1
-            if len(path) == 1:
-                #print path[0]
-                f.write(' '.join([path[0].split('_')[0], path[0].split('_')[1], str(read_tuples_raw[path[0]][0]), str(read_tuples_raw[path[0]][1])]) + '\n')
-            for j in range(len(path)-1):
-                nodeA = path[j].lstrip("B")
-                nodeB = path[j+1].lstrip("B")
-
-                d =  in_graph.get_edge_data(path[j],path[j+1])
-                try:
-                    f.write('%s %s %s %s %d %d %d %d %d\n'%(nodeA.split('_')[0],nodeA.split('_')[1]  , nodeB.split('_')[0],
-                        nodeB.split('_')[1], -d['read_a_start_raw'] + d['read_a_end_raw'] - d['read_b_start_raw'] + d['read_b_end_raw'],
-                        d['read_a_start_raw'], d['read_a_end_raw'], d['read_b_start_raw'], d['read_b_end_raw']))
-                except:
-                    print "in error"
-                    # print nodeB
-                    # print node
-                    # print  h.node[node]['start_read']
-                    # print  h.node[node]['end_read']
-                    # print  h.node[node]['path']
-                    # print  len(h.node[node]['segment'])
-                    print d
-                    print in_graph
-                    raise
-
-
-#last =  h.nodes()[-1]
-#print h.node[last]
-#path_last = h.node[last]['path']
-
-
-
-#for i in range(len(path_last)-1):
-#    read_a = path_last[i]
-#    read_b = path_last[i+1]
-#    print read_a, read_b, in_graph.edge[read_a][read_b]
-
-for i,node in enumerate(h.nodes()):
-     h.node[node]['path'] = ';'.join(h.node[node]['path'])
-nx.write_graphml(h,out_graphml_name)
-
-
-# with open(gfaname,'w') as f:
-#     f.write("H\tVN:Z:1.0\n")
-#     for i,vert in enumerate(h.nodes()):
-#         seg = h.node[vert]['segment']
-#         print len(seg)
-
-#         seg_line = "S\t"+vert+"\t"+seg + '\n'
-#         f.write(seg_line)
-#     for edge in h.edges():
-#         edge_line = "L\t"+edge[0]+"\t+\t"+edge[1]+"\t+\t0M\n"
-#         f.write(edge_line)
+def run(reader, writer):
+    for i,record in enumerate(reader):
+        if i%2 == 0:
+            writer.writeRecord(record.header, record.sequence)
 
 
+if __name__ == '__main__':
+    iname, oname = sys.argv[1:3]
+    reader = FastaIO.FastaReader(iname)
+    writer = FastaIO.FastaWriter(oname)
+    run(reader, writer)
diff --git a/scripts/get_single_strand.py b/scripts/get_single_strand.py
new file mode 100644
index 0000000..8b41b7e
--- /dev/null
+++ b/scripts/get_single_strand.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+
+#usage python get_single_strand.py <in-fasta> <out-fasta>
+
+from pbcore.io import FastaIO
+import sys
+
+flpath = sys.argv[1]
+outpath = sys.argv[2]
+writer = FastaIO.FastaWriter(outpath)
+reader = FastaIO.FastaReader(flpath)
+j = 0
+for i,record in enumerate(reader):
+    if j%2 == 0:
+        writer.writeRecord('Consensus'+str(j), record.sequence)
+        j+=1
\ No newline at end of file
diff --git a/scripts/merge_hinges.py b/scripts/merge_hinges.py
old mode 100644
new mode 100755
diff --git a/scripts/pipeline_consensus.py b/scripts/pipeline_consensus.py
old mode 100644
new mode 100755
diff --git a/scripts/pipeline_consensus_norevcomp.py b/scripts/pipeline_consensus_norevcomp.py
old mode 100644
new mode 100755
diff --git a/scripts/pipeline_nctc.py b/scripts/pipeline_nctc.py
old mode 100644
new mode 100755
diff --git a/scripts/pruning_and_clipping.py b/scripts/pruning_and_clipping.py
index 93bbc2d..25fc4c9 100755
--- a/scripts/pruning_and_clipping.py
+++ b/scripts/pruning_and_clipping.py
@@ -12,11 +12,14 @@ import ujson
 from colormap import rgb2hex
 import operator
 import matplotlib.colors
+import configparser
 # print G.number_of_edges(),G.number_of_nodes()
 
 
 # In[3]:
 
+
+
 def write_graph(G,flname):
     with open(flname,'w') as f:
         for edge in G.edges_iter():
@@ -609,47 +612,51 @@ def resolve_rep(g,rep_path,in_node,out_node):
     prefix = 'B'
 
     g.add_edge(in_node,prefix + rep_path[0],
-        read_a_start=g.edge[in_node][rep_path[0]]['read_a_start'],
-        read_a_end=g.edge[in_node][rep_path[0]]['read_a_end'],
-        read_b_start=g.edge[in_node][rep_path[0]]['read_b_start'],
-        read_b_end=g.edge[in_node][rep_path[0]]['read_b_end'],
-        read_a_start_raw=g.edge[in_node][rep_path[0]]['read_a_start_raw'],
-        read_a_end_raw=g.edge[in_node][rep_path[0]]['read_a_end_raw'],
-        read_b_start_raw=g.edge[in_node][rep_path[0]]['read_b_start_raw'],
-        read_b_end_raw=g.edge[in_node][rep_path[0]]['read_b_end_raw'])
+        length=g.edge[in_node][rep_path[0]]['length'],
+        read_a_match_start=g.edge[in_node][rep_path[0]]['read_a_match_start'],
+        read_a_match_end=g.edge[in_node][rep_path[0]]['read_a_match_end'],
+        read_b_match_start=g.edge[in_node][rep_path[0]]['read_b_match_start'],
+        read_b_match_end=g.edge[in_node][rep_path[0]]['read_b_match_end'],
+        read_a_match_start_raw=g.edge[in_node][rep_path[0]]['read_a_match_start_raw'],
+        read_a_match_end_raw=g.edge[in_node][rep_path[0]]['read_a_match_end_raw'],
+        read_b_match_start_raw=g.edge[in_node][rep_path[0]]['read_b_match_start_raw'],
+        read_b_match_end_raw=g.edge[in_node][rep_path[0]]['read_b_match_end_raw'])
     g.remove_edge(in_node,rep_path[0])
 
     g.add_edge(prefix+rep_path[-1],out_node,
-        read_a_start=g.edge[rep_path[-1]][out_node]['read_a_start'],
-        read_a_end=g.edge[rep_path[-1]][out_node]['read_a_end'],
-        read_b_start=g.edge[rep_path[-1]][out_node]['read_b_start'],
-        read_b_end=g.edge[rep_path[-1]][out_node]['read_b_end'],
-        read_a_start_raw=g.edge[rep_path[-1]][out_node]['read_a_start_raw'],
-        read_a_end_raw=g.edge[rep_path[-1]][out_node]['read_a_end_raw'],
-        read_b_start_raw=g.edge[rep_path[-1]][out_node]['read_b_start_raw'],
-        read_b_end_raw=g.edge[rep_path[-1]][out_node]['read_b_end_raw'])
+               length=g.edge[rep_path[-1]][out_node]['length'],
+        read_a_match_start=g.edge[rep_path[-1]][out_node]['read_a_match_start'],
+        read_a_match_end=g.edge[rep_path[-1]][out_node]['read_a_match_end'],
+        read_b_match_start=g.edge[rep_path[-1]][out_node]['read_b_match_start'],
+        read_b_match_end=g.edge[rep_path[-1]][out_node]['read_b_match_end'],
+        read_a_match_start_raw=g.edge[rep_path[-1]][out_node]['read_a_match_start_raw'],
+        read_a_match_end_raw=g.edge[rep_path[-1]][out_node]['read_a_match_end_raw'],
+        read_b_match_start_raw=g.edge[rep_path[-1]][out_node]['read_b_match_start_raw'],
+        read_b_match_end_raw=g.edge[rep_path[-1]][out_node]['read_b_match_end_raw'])
     g.remove_edge(rep_path[-1],out_node)
 
 
     g.add_edge(rev_node(prefix + rep_path[0]),rev_node(in_node),
-        read_a_start=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_start'],
-        read_a_end=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_end'],
-        read_b_start=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_start'],
-        read_b_end=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_end'],
-        read_a_start_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_start_raw'],
-        read_a_end_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_end_raw'],
-        read_b_start_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_start_raw'],
-        read_b_end_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_end_raw'])
+               length =g.edge[rev_node(rep_path[0])][rev_node(in_node)]['length'],
+        read_a_match_start=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_match_start'],
+        read_a_match_end=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_match_end'],
+        read_b_match_start=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_match_start'],
+        read_b_match_end=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_match_end'],
+        read_a_match_start_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_match_start_raw'],
+        read_a_match_end_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_match_end_raw'],
+        read_b_match_start_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_match_start_raw'],
+        read_b_match_end_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_match_end_raw'])
     g.remove_edge(rev_node(rep_path[0]),rev_node(in_node))
     g.add_edge(rev_node(out_node),rev_node(prefix+rep_path[-1]),
-        read_a_start=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_start'],
-        read_a_end=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_end'],
-        read_b_start=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_start'],
-        read_b_end=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_end'],
-        read_a_start_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_start_raw'],
-        read_a_end_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_end_raw'],
-        read_b_start_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_start_raw'],
-        read_b_end_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_end_raw'])
+               length=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['length'],
+        read_a_match_start=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_match_start'],
+        read_a_match_end=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_match_end'],
+        read_b_match_start=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_match_start'],
+        read_b_match_end=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_match_end'],
+        read_a_match_start_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_match_start_raw'],
+        read_a_match_end_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_match_end_raw'],
+        read_b_match_start_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_match_start_raw'],
+        read_b_match_end_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_match_end_raw'])
     g.remove_edge(rev_node(out_node),rev_node(rep_path[-1]))
 
 
@@ -657,23 +664,25 @@ def resolve_rep(g,rep_path,in_node,out_node):
 
     for i in range(0,len(rep_path)-1):
         g.add_edge(prefix+rep_path[i],prefix+rep_path[i+1],
-            read_a_start=g.edge[rep_path[i]][rep_path[i+1]]['read_a_start'],
-            read_a_end=g.edge[rep_path[i]][rep_path[i+1]]['read_a_end'],
-            read_b_start=g.edge[rep_path[i]][rep_path[i+1]]['read_b_start'],
-            read_b_end=g.edge[rep_path[i]][rep_path[i+1]]['read_b_end'],
-            read_a_start_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_a_start_raw'],
-            read_a_end_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_a_end_raw'],
-            read_b_start_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_b_start_raw'],
-            read_b_end_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_b_end_raw'])
+                   length=g.edge[rep_path[i]][rep_path[i+1]]['length'],
+            read_a_match_start=g.edge[rep_path[i]][rep_path[i+1]]['read_a_match_start'],
+            read_a_match_end=g.edge[rep_path[i]][rep_path[i+1]]['read_a_match_end'],
+            read_b_match_start=g.edge[rep_path[i]][rep_path[i+1]]['read_b_match_start'],
+            read_b_match_end=g.edge[rep_path[i]][rep_path[i+1]]['read_b_match_end'],
+            read_a_match_start_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_a_match_start_raw'],
+            read_a_match_end_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_a_match_end_raw'],
+            read_b_match_start_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_b_match_start_raw'],
+            read_b_match_end_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_b_match_end_raw'])
         g.add_edge(rev_node(prefix+rep_path[i+1]),rev_node(prefix+rep_path[i]),
-            read_a_start=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_start'],
-            read_a_end=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_end'],
-            read_b_start=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_start'],
-            read_b_end=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_end'],
-            read_a_start_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_start_raw'],
-            read_a_end_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_end_raw'],
-            read_b_start_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_start_raw'],
-            read_b_end_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_end_raw'])
+                   length =g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['length'],
+            read_a_match_start=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_match_start'],
+            read_a_match_end=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_match_end'],
+            read_b_match_start=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_match_start'],
+            read_b_match_end=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_match_end'],
+            read_a_match_start_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_match_start_raw'],
+            read_a_match_end_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_match_end_raw'],
+            read_b_match_start_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_match_start_raw'],
+            read_b_match_end_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_match_end_raw'])
 
 
 
@@ -698,6 +707,7 @@ def loop_resolution(g,max_nodes,flank,print_debug = False):
             print '----'
             print st_node
 
+        loop_len = 0
 
         for first_node in g.successors(st_node):
 
@@ -715,12 +725,22 @@ def loop_resolution(g,max_nodes,flank,print_debug = False):
             if print_debug:
                 print 'going on loop'
 
+            loop_len = 0
+            prev_edge = g[st_node][next_node]
             node_cnt = 0
             while g.in_degree(next_node) == 1 and g.out_degree(next_node) == 1 and node_cnt < max_nodes:
                 node_cnt += 1
                 in_node = next_node
                 next_node = g.successors(next_node)[0]
+                loop_len += abs(g[in_node][next_node]['read_a_match_start'] - prev_edge['read_b_match_start'])
+                prev_edge = g[in_node][next_node]
 
+            if node_cnt >= max_nodes:
+                continue
+
+            if print_debug:
+                print "length in loop " + str(loop_len)
+            len_in_loop = loop_len
             first_node_of_repeat = next_node
 
             if g.in_degree(next_node) == 2:
@@ -754,18 +774,29 @@ def loop_resolution(g,max_nodes,flank,print_debug = False):
             if g.in_degree(next_node) == 2 and g.out_degree(next_node) == 1:
                 next_double_node = g.successors(next_node)[0]
                 rep.append(next_double_node)
+                prev_edge = g[next_node][next_double_node]
             else:
                 next_double_node = next_node
+                try:
+                    assert not (g.in_degree(next_double_node) == 1 and g.out_degree(next_double_node) == 1)
+                except:
+                    print str(g.in_degree(next_node))
+                    print str(g.out_degree(next_node))
+                    raise
 
             while g.in_degree(next_double_node) == 1 and g.out_degree(next_double_node) == 1 and node_cnt < max_nodes:
                 node_cnt += 1
+                loop_len += abs(g[next_double_node][g.successors(next_double_node)[0]]['read_a_match_start'] - prev_edge['read_b_match_start'])
                 next_double_node = g.successors(next_double_node)[0]
                 rep.append(next_double_node)
 
+            if print_debug:
+                print "length in repeat " + str(loop_len-len_in_loop)
 
-            if next_double_node == st_node:
+            if next_double_node == st_node and loop_len > MAX_PLASMID_LENGTH:
                 if print_debug:
                     print 'success!'
+                    print "length in loop " + str(loop_len)
                     print 'rep is:'
                     print rep
                     print 'in_node and other_successor:'
@@ -792,6 +823,56 @@ def loop_resolution(g,max_nodes,flank,print_debug = False):
 
 
 
+def y_pruning(G,flank):
+
+    H = G.copy()
+
+    y_nodes = set([x for x in H.nodes() if H.out_degree(x) > 1 and H.in_degree(x) == 1])
+
+    pruned_count = 0
+
+    for st_node in y_nodes:
+
+        pruned = 0
+
+        try:  
+            H.predecessors(st_node)
+        except:
+            continue
+
+        prev_node = H.predecessors(st_node)[0]
+
+        node_cnt = 0
+    
+        while H.in_degree(prev_node) == 1 and H.out_degree(prev_node) == 1:
+            node_cnt += 1
+            prev_node = H.predecessors(prev_node)[0]
+            if node_cnt >= flank:
+                break
+        if node_cnt < flank: # and prev_node != st_node:
+            continue
+
+        # if we got here, we probably have a Y, and not a collapsed repeat
+        for vert in H.successors(st_node):
+            if H.node[vert]['CFLAG'] == True:
+                
+                try:
+                    H.remove_edge(st_node,vert)
+                    H.remove_edge(rev_node(vert),rev_node(st_node))
+                    pruned = 1
+    
+                except:
+                    pass
+
+        if pruned == 1:
+            pruned_count += 1
+
+    # print "Number of pruned Y's: "+str(pruned_count)
+
+
+    return H
+
+
 # In[72]:
 
 
@@ -956,6 +1037,57 @@ def add_annotation(g,in_hinges,out_hinges):
 
 
 
+
+def add_chimera_flags(g,prefix):
+
+    cov_flags = prefix + '.cov.flag'
+    slf_flags = None
+
+    for node in g.nodes():
+        g.node[node]['CFLAG'] = False
+    if slf_flags != None:
+        g.node[node]['SFLAG'] = False
+
+    node_set = set(g.nodes())
+    num_bad_cov_reads = 0
+    if cov_flags != None:
+        with open(cov_flags,'r') as f:
+            for line in f:
+                node_name = line.strip()
+                try: 
+                    assert not ((node_name+'_0' in node_set and node_name+'_1' not in node_set)
+                        or (node_name+'_0' not in node_set and node_name+'_1'  in node_set))
+                except:
+                    print node_name + ' is not symmetrically present in the graph input.'
+                    raise
+                if node_name+'_0' in node_set:
+                    g.node[node_name+'_0']['CFLAG'] = True
+                    g.node[node_name+'_1']['CFLAG'] = True
+                    num_bad_cov_reads += 1
+    print str(num_bad_cov_reads) + ' bad coverage reads.'
+
+    num_bad_slf_reads = 0
+    if slf_flags != None:
+        with open(slf_flags,'r') as f:
+            for line in f:
+                node_name = line.strip()
+                try: 
+                    assert not ((node_name+'_0' in node_set and node_name+'_1' not in node_set)
+                        or (node_name+'_0' not in node_set and node_name+'_1'  in node_set))
+                except:
+                    print node_name + ' is not symmetrically present in the graph input.'
+                    raise
+                if node_name+'_0' in node_set:
+                    g.node[node_name+'_0']['SFLAG'] = True
+                    g.node[node_name+'_1']['SFLAG'] = True
+                    num_bad_slf_reads += 1
+    print str(num_bad_slf_reads) + ' bad self aligned reads.'            
+
+
+
+
+
+
 def connect_strands(g):
 
     for node in g.nodes():
@@ -981,10 +1113,10 @@ def create_bidirected(g):
             head_id, head_orientation = successor.split('_')
 
             h.add_edge(tail_id,head_id,tail_or = int(tail_orientation),head_or = int(head_orientation),
-                read_a_start=g.edge[u][successor]['read_a_start'],
-                read_a_end=g.edge[u][successor]['read_a_end'],
-                read_b_start=g.edge[u][successor]['read_b_start'],
-                read_b_end=g.edge[u][successor]['read_b_end'])
+                read_a_match_start=g.edge[u][successor]['read_a_match_start'],
+                read_a_match_end=g.edge[u][successor]['read_a_match_end'],
+                read_b_match_start=g.edge[u][successor]['read_b_match_start'],
+                read_b_match_end=g.edge[u][successor]['read_b_match_end'])
 
 
     st_nodes = [x for x in g if g.in_degree(x) != 1 or g.out_degree(x) > 1]
@@ -1035,10 +1167,10 @@ def create_bidirected2(g):
             h.add_edge(tail_id,head_id)
 
             # h.add_edge(tail_id,head_id,tail_or = int(tail_orientation),head_or = int(head_orientation),
-            #     read_a_start=g.edge[u][successor]['read_a_start'],
-            #     read_a_end=g.edge[u][successor]['read_a_end'],
-            #     read_b_start=g.edge[u][successor]['read_b_start'],
-            #     read_b_end=g.edge[u][successor]['read_b_end'])
+            #     read_a_match_start=g.edge[u][successor]['read_a_match_start'],
+            #     read_a_match_end=g.edge[u][successor]['read_a_match_end'],
+            #     read_b_match_start=g.edge[u][successor]['read_b_match_start'],
+            #     read_b_match_end=g.edge[u][successor]['read_b_match_end'])
 
 
     st_nodes = [x for x in g if g.in_degree(x) != 1 or g.out_degree(x) > 1]
@@ -1092,9 +1224,35 @@ hingesname = sys.argv[2]
 
 
 suffix = sys.argv[3]
+DEL_TELOMERE = False
+AGGRESSIVE_PRUNING = False
+
+if len(sys.argv) >= 5:
+    ini_file_path = sys.argv[4]
+    config = configparser.ConfigParser()
+    config.read(ini_file_path)
+    try:
+        MAX_PLASMID_LENGTH = config.getint('layout', 'max_plasmid_length')
+        # print 'MAX_PLASMID_LENGTH in config '+str(MAX_PLASMID_LENGTH)
+    except:
+        MAX_PLASMID_LENGTH = 500000
+        # print 'MAX_PLASMID_LENGTH '+str(MAX_PLASMID_LENGTH)
+    try: 
+        DEL_TELOMERE = config.getbool('layout','del_telomere')
+    except:
+        DEL_TELOMERE = False
+    try: 
+        AGGRESSIVE_PRUNING = config.getbool('layout','aggressive_pruning')
+    except:
+        AGGRESSIVE_PRUNING = False
+
+else:
+    MAX_PLASMID_LENGTH = 500000
+
+
 
-if len(sys.argv)==5:
-    json_file = open(sys.argv[4])
+if len(sys.argv)>=6:
+    json_file = open(sys.argv[5])
 else:
     json_file = None
 # path = '../pb_data/ecoli_shortened/ecoli4/'
@@ -1102,6 +1260,8 @@ else:
 
 
 
+
+
 # In[116]:
 
 G = nx.DiGraph()
@@ -1120,43 +1280,43 @@ with open (flname) as f:
         # e1_match1 = abs(int(lines1[6].lstrip('['))-int(lines1[7].rstrip(']')))
         # e1_match2 = abs(int(lines1[8].lstrip('['))-int(lines1[9].rstrip(']')))
         e1_match_len = int(lines1[2])
-        ra_start = int(lines1[6].lstrip('['))
-        ra_end = int(lines1[7].rstrip(']'))
-        rb_start = int(lines1[8].lstrip('['))
-        rb_end = int(lines1[9].rstrip(']'))
+        ra_match_start = int(lines1[6].lstrip('['))
+        ra_match_end = int(lines1[7].rstrip(']'))
+        rb_match_start = int(lines1[8].lstrip('['))
+        rb_match_end = int(lines1[9].rstrip(']'))
 
-        ra_start_raw = int(lines1[-4].lstrip('['))
-        ra_end_raw = int(lines1[-3].rstrip(']'))
-        rb_start_raw = int(lines1[-2].lstrip('['))
-        rb_end_raw = int(lines1[-1].rstrip(']'))
+        ra_match_start_raw = int(lines1[-4].lstrip('['))
+        ra_match_end_raw = int(lines1[-3].rstrip(']'))
+        rb_match_start_raw = int(lines1[-2].lstrip('['))
+        rb_match_end_raw = int(lines1[-1].rstrip(']'))
 
 
         if e1 in G.edges():
             G.add_edge(lines1[0] + "_" + lines1[3], lines1[1] + "_" + lines1[4],
                 hinge_edge=int(lines1[5]),intersection=1,length=e1_match_len,z=0,
-                read_a_start=ra_start,read_a_end=ra_end,
-                read_b_start=rb_start,read_b_end=rb_end,
-                read_a_start_raw=ra_start_raw,read_a_end_raw=ra_end_raw,
-                read_b_start_raw=rb_start_raw,read_b_end_raw=rb_end_raw)
+                read_a_match_start=ra_match_start,read_a_match_end=ra_match_end,
+                read_b_match_start=rb_match_start,read_b_match_end=rb_match_end,
+                read_a_match_start_raw=ra_match_start_raw,read_a_match_end_raw=ra_match_end_raw,
+                read_b_match_start_raw=rb_match_start_raw,read_b_match_end_raw=rb_match_end_raw)
             G.add_edge(lines1[1] + "_" + str(1-int(lines1[4])), lines1[0] + "_" + str(1-int(lines1[3])),
                 hinge_edge=int(lines1[5]),intersection=1,length=e1_match_len,z=0,
-                read_a_start=rb_start,read_a_end=rb_end,
-                read_b_start=ra_start,read_b_end=ra_end,
-                read_a_start_raw=rb_start_raw,read_a_end_raw=rb_end_raw,
-                read_b_start_raw=ra_start_raw,read_b_end_raw=ra_end_raw)
+                read_a_match_start=rb_match_start,read_a_match_end=rb_match_end,
+                read_b_match_start=ra_match_start,read_b_match_end=ra_match_end,
+                read_a_match_start_raw=rb_match_start_raw,read_a_match_end_raw=rb_match_end_raw,
+                read_b_match_start_raw=ra_match_start_raw,read_b_match_end_raw=ra_match_end_raw)
         else:
             G.add_edge(lines1[0] + "_" + lines1[3], lines1[1] + "_" + lines1[4],
                 hinge_edge=int(lines1[5]),intersection=0,length=e1_match_len,z=0,
-                read_a_start=ra_start,read_a_end=ra_end,
-                read_b_start=rb_start,read_b_end=rb_end,
-                read_a_start_raw=ra_start_raw,read_a_end_raw=ra_end_raw,
-                read_b_start_raw=rb_start_raw,read_b_end_raw=rb_end_raw)
+                read_a_match_start=ra_match_start,read_a_match_end=ra_match_end,
+                read_b_match_start=rb_match_start,read_b_match_end=rb_match_end,
+                read_a_match_start_raw=ra_match_start_raw,read_a_match_end_raw=ra_match_end_raw,
+                read_b_match_start_raw=rb_match_start_raw,read_b_match_end_raw=rb_match_end_raw)
             G.add_edge(lines1[1] + "_" + str(1-int(lines1[4])), lines1[0] + "_" + str(1-int(lines1[3])),
                 hinge_edge=int(lines1[5]),intersection=0,length=e1_match_len,z=0,
-                read_a_start=rb_start,read_a_end=rb_end,
-                read_b_start=ra_start,read_b_end=ra_end,
-                read_a_start_raw=rb_start_raw,read_a_end_raw=rb_end_raw,
-                read_b_start_raw=ra_start_raw,read_b_end_raw=ra_end_raw)
+                read_a_match_start=rb_match_start,read_a_match_end=rb_match_end,
+                read_b_match_start=ra_match_start,read_b_match_end=ra_match_end,
+                read_a_match_start_raw=rb_match_start_raw,read_a_match_end_raw=rb_match_end_raw,
+                read_b_match_start_raw=ra_match_start_raw,read_b_match_end_raw=ra_match_end_raw)
 
 
 
@@ -1193,6 +1353,11 @@ with open (hingesname) as f:
 
 add_annotation(G,in_hinges,out_hinges)
 
+
+add_chimera_flags(G,prefix)
+
+
+
 # try:
 mark_skipped_edges(G,flname.split('.')[0] + '.edges.skipped')
 # except:
@@ -1201,6 +1366,9 @@ mark_skipped_edges(G,flname.split('.')[0] + '.edges.skipped')
 
 
 
+
+
+
 # json_file = open('../pb_data/ecoli_shortened/ecoli4/ecoli.mapping.1.json')
 
 
@@ -1223,10 +1391,12 @@ G1,G0 = z_clipping_sym(G0,6,set(),set())
 # G1=z_clipping_sym(G1,5,in_hinges,out_hinges)
 # G1=z_clipping_sym(G1,5,in_hinges,out_hinges)
 
-
-G1 = bubble_bursting_sym(G1,10)
-
-G1 = dead_end_clipping_sym(G1,5)
+if DEL_TELOMERE:
+    G1 = bubble_bursting_sym(G1,20)
+    G1 = dead_end_clipping_sym(G1,20)
+else:
+    G1 = bubble_bursting_sym(G1,10)
+    G1 = dead_end_clipping_sym(G1,5)
 
 nx.write_graphml(G0, prefix+suffix+'.'+'G0'+'.graphml')
 nx.write_graphml(G1, prefix+suffix+'.'+'G1'+'.graphml')
@@ -1237,16 +1407,15 @@ G2 = G1.copy()
 Gs = random_condensation_sym(G1,1000)
 
 
+
 loop_resolution(G2,500,50)
 
 G2s = random_condensation_sym(G2,1000)
 
+nx.write_graphml(G2, prefix+suffix+'.'+'G2'+'.graphml')
 
 
 
-
-nx.write_graphml(G2, prefix+suffix+'.'+'G2'+'.graphml')
-
 nx.write_graphml(Gs, prefix+suffix+'.'+'Gs'+'.graphml')
 
 nx.write_graphml(G2s, prefix+suffix+'.'+'G2s'+'.graphml')
@@ -1259,6 +1428,32 @@ G2c = connect_strands(G2s)
 
 nx.write_graphml(G2c, prefix+suffix+'.'+'G2c'+'.graphml')
 
+
+
+
+if AGGRESSIVE_PRUNING:
+
+    G3 = y_pruning(G2,10)
+
+    G3 = dead_end_clipping_sym(G3,10)
+
+    G3s = random_condensation_sym(G3,1000)
+
+    G3c = connect_strands(G3s)
+
+    nx.write_graphml(G3, prefix+suffix+'.'+'G2'+'.graphml')
+
+    nx.write_graphml(G3s, prefix+suffix+'.'+'G3s'+'.graphml')
+
+    nx.write_graphml(G3c, prefix+suffix+'.'+'G3c'+'.graphml')
+
+
+
+
+
+
+
+
 # G2b = create_bidirected2(G2)
 
 # nx.write_graphml(G2b, prefix+suffix+'.'+'G2b'+'.graphml')
diff --git a/scripts/pruning_and_clipping2.py b/scripts/pruning_and_clipping_nanopore.py
old mode 100755
new mode 100644
similarity index 76%
rename from scripts/pruning_and_clipping2.py
rename to scripts/pruning_and_clipping_nanopore.py
index a420d1e..d83095f
--- a/scripts/pruning_and_clipping2.py
+++ b/scripts/pruning_and_clipping_nanopore.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+
 # coding: utf-8
 
 # In[115]:
@@ -9,8 +10,8 @@ import sys
 import numpy as np
 import ujson
 from colormap import rgb2hex
-
-
+import operator
+import matplotlib.colors
 # print G.number_of_edges(),G.number_of_nodes()
 
 
@@ -200,12 +201,12 @@ def dead_end_clipping_sym(G,threshold,print_debug = False):
 
         cur_path = [st_node]
 
+        cur_node = st_node
         if print_debug:
             print '----0'
             print st_node
 
         if len(H.successors(st_node)) == 1:
-
             cur_node = H.successors(st_node)[0]
 
             if print_debug:
@@ -219,12 +220,16 @@ def dead_end_clipping_sym(G,threshold,print_debug = False):
 
                 cur_node = H.successors(cur_node)[0]
 
+                if len(cur_path) > threshold + 1:
+                    break
+
+
         if print_debug:
             print '----2'
             print cur_path
 
 
-        if len(cur_path) <= threshold:
+        if len(cur_path) <= threshold and (H.in_degree(cur_node) > 1 or H.out_degree(cur_node) == 0):
             for vertex in cur_path:
                 # try:
                 if print_debug:
@@ -604,31 +609,51 @@ def resolve_rep(g,rep_path,in_node,out_node):
     prefix = 'B'
 
     g.add_edge(in_node,prefix + rep_path[0],
-        read_a_start=g.edge[in_node][rep_path[0]]['read_a_start'],
-        read_a_end=g.edge[in_node][rep_path[0]]['read_a_end'],
-        read_b_start=g.edge[in_node][rep_path[0]]['read_b_start'],
-        read_b_end=g.edge[in_node][rep_path[0]]['read_b_end'])
+        length=g.edge[in_node][rep_path[0]]['length'],
+        read_a_match_start=g.edge[in_node][rep_path[0]]['read_a_match_start'],
+        read_a_match_end=g.edge[in_node][rep_path[0]]['read_a_match_end'],
+        read_b_match_start=g.edge[in_node][rep_path[0]]['read_b_match_start'],
+        read_b_match_end=g.edge[in_node][rep_path[0]]['read_b_match_end'],
+        read_a_match_start_raw=g.edge[in_node][rep_path[0]]['read_a_match_start_raw'],
+        read_a_match_end_raw=g.edge[in_node][rep_path[0]]['read_a_match_end_raw'],
+        read_b_match_start_raw=g.edge[in_node][rep_path[0]]['read_b_match_start_raw'],
+        read_b_match_end_raw=g.edge[in_node][rep_path[0]]['read_b_match_end_raw'])
     g.remove_edge(in_node,rep_path[0])
 
     g.add_edge(prefix+rep_path[-1],out_node,
-        read_a_start=g.edge[rep_path[-1]][out_node]['read_a_start'],
-        read_a_end=g.edge[rep_path[-1]][out_node]['read_a_end'],
-        read_b_start=g.edge[rep_path[-1]][out_node]['read_b_start'],
-        read_b_end=g.edge[rep_path[-1]][out_node]['read_b_end'])
+               length=g.edge[rep_path[-1]][out_node]['length'],
+        read_a_match_start=g.edge[rep_path[-1]][out_node]['read_a_match_start'],
+        read_a_match_end=g.edge[rep_path[-1]][out_node]['read_a_match_end'],
+        read_b_match_start=g.edge[rep_path[-1]][out_node]['read_b_match_start'],
+        read_b_match_end=g.edge[rep_path[-1]][out_node]['read_b_match_end'],
+        read_a_match_start_raw=g.edge[rep_path[-1]][out_node]['read_a_match_start_raw'],
+        read_a_match_end_raw=g.edge[rep_path[-1]][out_node]['read_a_match_end_raw'],
+        read_b_match_start_raw=g.edge[rep_path[-1]][out_node]['read_b_match_start_raw'],
+        read_b_match_end_raw=g.edge[rep_path[-1]][out_node]['read_b_match_end_raw'])
     g.remove_edge(rep_path[-1],out_node)
 
 
     g.add_edge(rev_node(prefix + rep_path[0]),rev_node(in_node),
-        read_a_start=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_start'],
-        read_a_end=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_end'],
-        read_b_start=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_start'],
-        read_b_end=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_end'])
+               length =g.edge[rev_node(rep_path[0])][rev_node(in_node)]['length'],
+        read_a_match_start=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_match_start'],
+        read_a_match_end=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_match_end'],
+        read_b_match_start=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_match_start'],
+        read_b_match_end=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_match_end'],
+        read_a_match_start_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_match_start_raw'],
+        read_a_match_end_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_a_match_end_raw'],
+        read_b_match_start_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_match_start_raw'],
+        read_b_match_end_raw=g.edge[rev_node(rep_path[0])][rev_node(in_node)]['read_b_match_end_raw'])
     g.remove_edge(rev_node(rep_path[0]),rev_node(in_node))
     g.add_edge(rev_node(out_node),rev_node(prefix+rep_path[-1]),
-        read_a_start=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_start'],
-        read_a_end=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_end'],
-        read_b_start=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_start'],
-        read_b_end=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_end'])
+               length=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['length'],
+        read_a_match_start=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_match_start'],
+        read_a_match_end=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_match_end'],
+        read_b_match_start=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_match_start'],
+        read_b_match_end=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_match_end'],
+        read_a_match_start_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_match_start_raw'],
+        read_a_match_end_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_a_match_end_raw'],
+        read_b_match_start_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_match_start_raw'],
+        read_b_match_end_raw=g.edge[rev_node(out_node)][rev_node(rep_path[-1])]['read_b_match_end_raw'])
     g.remove_edge(rev_node(out_node),rev_node(rep_path[-1]))
 
 
@@ -636,15 +661,25 @@ def resolve_rep(g,rep_path,in_node,out_node):
 
     for i in range(0,len(rep_path)-1):
         g.add_edge(prefix+rep_path[i],prefix+rep_path[i+1],
-            read_a_start=g.edge[rep_path[i]][rep_path[i+1]]['read_a_start'],
-            read_a_end=g.edge[rep_path[i]][rep_path[i+1]]['read_a_end'],
-            read_b_start=g.edge[rep_path[i]][rep_path[i+1]]['read_b_start'],
-            read_b_end=g.edge[rep_path[i]][rep_path[i+1]]['read_b_end'])
+                   length=g.edge[rep_path[i]][rep_path[i+1]]['length'],
+            read_a_match_start=g.edge[rep_path[i]][rep_path[i+1]]['read_a_match_start'],
+            read_a_match_end=g.edge[rep_path[i]][rep_path[i+1]]['read_a_match_end'],
+            read_b_match_start=g.edge[rep_path[i]][rep_path[i+1]]['read_b_match_start'],
+            read_b_match_end=g.edge[rep_path[i]][rep_path[i+1]]['read_b_match_end'],
+            read_a_match_start_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_a_match_start_raw'],
+            read_a_match_end_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_a_match_end_raw'],
+            read_b_match_start_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_b_match_start_raw'],
+            read_b_match_end_raw=g.edge[rep_path[i]][rep_path[i+1]]['read_b_match_end_raw'])
         g.add_edge(rev_node(prefix+rep_path[i+1]),rev_node(prefix+rep_path[i]),
-            read_a_start=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_start'],
-            read_a_end=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_end'],
-            read_b_start=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_start'],
-            read_b_end=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_end'])
+                   length =g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['length'],
+            read_a_match_start=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_match_start'],
+            read_a_match_end=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_match_end'],
+            read_b_match_start=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_match_start'],
+            read_b_match_end=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_match_end'],
+            read_a_match_start_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_match_start_raw'],
+            read_a_match_end_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_a_match_end_raw'],
+            read_b_match_start_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_match_start_raw'],
+            read_b_match_end_raw=g.edge[rev_node(rep_path[i+1])][rev_node(rep_path[i])]['read_b_match_end_raw'])
 
 
 
@@ -657,6 +692,8 @@ def loop_resolution(g,max_nodes,flank,print_debug = False):
         print '----'
         print starting_nodes
 
+    tandem = []
+
     for st_node in starting_nodes:
 
 
@@ -700,7 +737,7 @@ def loop_resolution(g,max_nodes,flank,print_debug = False):
                     prev_node = g.predecessors(prev_node)[0]
                     if node_cnt >= flank:
                         break
-                if node_cnt < flank and prev_node != st_node:
+                if node_cnt < flank: # and prev_node != st_node:
                     continue
 
 
@@ -712,7 +749,7 @@ def loop_resolution(g,max_nodes,flank,print_debug = False):
                 if node_cnt >= flank:
                     break
 
-            if node_cnt < flank and next_node != first_node_of_repeat:
+            if node_cnt < flank: # and next_node != first_node_of_repeat:
                 continue
 
             rep = [first_node_of_repeat]
@@ -742,8 +779,19 @@ def loop_resolution(g,max_nodes,flank,print_debug = False):
                 resolve_rep(g,rep,in_node,other_successor)
     #             print next_double_node
 
+                if node_cnt < 5:
+
+                    tandem.append(rep)
+
+
+
                 continue
 
+    if len(tandem) > 0:
+        with open('tandem.txt', 'w') as tandemout:
+            for rep in tandem:
+                tandemout.write(str(rep))
+
 
     return g
 
@@ -773,7 +821,7 @@ def add_groundtruth(g,json_file,in_hinges,out_hinges):
         # print node_base
 
         #print node
-        # g.node[node]['normpos'] = 0.0
+        g.node[node]['normpos'] = 0
         if mapping.has_key(node_base):
             g.node[node]['chr'] = mapping[node_base][0][2]+1
             g.node[node]['aln_start'] = min (mapping[node_base][0][0],mapping[node_base][0][1])
@@ -799,29 +847,56 @@ def add_groundtruth(g,json_file,in_hinges,out_hinges):
         else:
             chr_length_dict[g.node[node]['chr']] = max(g.node[node]['aln_end'], 1)
 
-    chr_set = set([g.node[x]['chr'] for x in g.nodes()])
-    red_bk = 102
-    green_bk = 102
-    blue_bk = 102
-    for chrom in chr_set:
+    chr_list = sorted(chr_length_dict.items(), key=operator.itemgetter(1), reverse=True)
+
+    max_chr_len1 = max([g.node[x]['aln_end'] for x in  g.nodes()])
+    max_chr_multiplier = 10**len(str(max_chr_len1))
+    print [x for x in chr_list]
+    chr_set =[x [0] for x in chr_list]
+    print chr_set
+    # red_bk = 102
+    # green_bk = 102
+    # blue_bk = 102
+    colour_list = ['red', 'lawngreen', 'deepskyblue', 'deeppink', 'darkorange', 'purple', 'gold', 'mediumblue',   'saddlebrown', 'darkgreen']
+    for colour in colour_list:
+        print  matplotlib.colors.colorConverter.to_rgb(colour)
+    for index, chrom in enumerate(chr_set):
         node_set = set([x for x in  g.nodes() if g.node[x]['chr'] == chrom])
+        print chrom
 
-        max_chr_len = float(max([g.node[x]['aln_end'] for x in  g.nodes() if g.node[x]['chr'] == chrom]))
 
-        red = random.randint(0,255)
-        green = random.randint(0,255)
-        blue = random.randint(0,255)
+        max_chr_len = max([g.node[x]['aln_end'] for x in  g.nodes() if g.node[x]['chr'] == chrom])
+        # max_chr_multiplier = 10**len(str(max_chr_len))
+
+
+        if index < 10:
+            rgb_tuple = matplotlib.colors.colorConverter.to_rgb(colour_list[index])
+            red = int(255*rgb_tuple[0])
+            green = int(255*rgb_tuple[1])
+            blue = int(255*rgb_tuple[2])
+        else:
+            red = random.randint(0,255)
+            # green = random.randint(0,255)
+            blue = random.randint(0,255)
+            brightness = 200
+            green  = max(0,min( 255,brightness - int((0.2126 *red +  0.0722 *blue)/0.7152 )))
+
+        red_bk = max(red-100,0)
+        blue_bk = max(blue-100,0)
+        green_bk = max(green-100,0)
+
+        print red,blue,green
         for node in node_set:
-            lamda = (g.node[node]['aln_end']/max_chr_len)**3
+            g.node[node]['normpos'] = g.node[node]['chr'] * max_chr_multiplier + (g.node[node]['aln_end']/float(max_chr_len))*max_chr_multiplier
+            lamda = (g.node[node]['aln_end']/max_chr_len)
             nd_red = (1-lamda)*red + lamda*red_bk
             nd_green = (1-lamda)*green + lamda*green_bk
             nd_blue = (1-lamda)*blue + lamda*blue_bk
+            g.node[node]['color'] = rgb2hex(nd_red, nd_green, nd_blue)
             g.node[node]['color_r'] = nd_red
             g.node[node]['color_g'] = nd_green
             g.node[node]['color_b'] = nd_blue
 
-
-
     # max_chr_len = len(str(max_chr))
 
     # div_num = float(10**(max_chr_len))
@@ -912,10 +987,10 @@ def create_bidirected(g):
             head_id, head_orientation = successor.split('_')
 
             h.add_edge(tail_id,head_id,tail_or = int(tail_orientation),head_or = int(head_orientation),
-                read_a_start=g.edge[u][successor]['read_a_start'],
-                read_a_end=g.edge[u][successor]['read_a_end'],
-                read_b_start=g.edge[u][successor]['read_b_start'],
-                read_b_end=g.edge[u][successor]['read_b_end'])
+                read_a_match_start=g.edge[u][successor]['read_a_match_start'],
+                read_a_match_end=g.edge[u][successor]['read_a_match_end'],
+                read_b_match_start=g.edge[u][successor]['read_b_match_start'],
+                read_b_match_end=g.edge[u][successor]['read_b_match_end'])
 
 
     st_nodes = [x for x in g if g.in_degree(x) != 1 or g.out_degree(x) > 1]
@@ -966,10 +1041,10 @@ def create_bidirected2(g):
             h.add_edge(tail_id,head_id)
 
             # h.add_edge(tail_id,head_id,tail_or = int(tail_orientation),head_or = int(head_orientation),
-            #     read_a_start=g.edge[u][successor]['read_a_start'],
-            #     read_a_end=g.edge[u][successor]['read_a_end'],
-            #     read_b_start=g.edge[u][successor]['read_b_start'],
-            #     read_b_end=g.edge[u][successor]['read_b_end'])
+            #     read_a_match_start=g.edge[u][successor]['read_a_match_start'],
+            #     read_a_match_end=g.edge[u][successor]['read_a_match_end'],
+            #     read_b_match_start=g.edge[u][successor]['read_b_match_start'],
+            #     read_b_match_end=g.edge[u][successor]['read_b_match_end'])
 
 
     st_nodes = [x for x in g if g.in_degree(x) != 1 or g.out_degree(x) > 1]
@@ -1051,43 +1126,43 @@ with open (flname) as f:
         # e1_match1 = abs(int(lines1[6].lstrip('['))-int(lines1[7].rstrip(']')))
         # e1_match2 = abs(int(lines1[8].lstrip('['))-int(lines1[9].rstrip(']')))
         e1_match_len = int(lines1[2])
-        ra_start = int(lines1[6].lstrip('['))
-        ra_end = int(lines1[7].rstrip(']'))
-        rb_start = int(lines1[8].lstrip('['))
-        rb_end = int(lines1[9].rstrip(']'))
+        ra_match_start = int(lines1[6].lstrip('['))
+        ra_match_end = int(lines1[7].rstrip(']'))
+        rb_match_start = int(lines1[8].lstrip('['))
+        rb_match_end = int(lines1[9].rstrip(']'))
 
-        ra_start_raw = int(lines1[-4].lstrip('['))
-        ra_end_raw = int(lines1[-3].rstrip(']'))
-        rb_start_raw = int(lines1[-2].lstrip('['))
-        rb_end_raw = int(lines1[-1].rstrip(']'))
+        ra_match_start_raw = int(lines1[-4].lstrip('['))
+        ra_match_end_raw = int(lines1[-3].rstrip(']'))
+        rb_match_start_raw = int(lines1[-2].lstrip('['))
+        rb_match_end_raw = int(lines1[-1].rstrip(']'))
 
 
         if e1 in G.edges():
             G.add_edge(lines1[0] + "_" + lines1[3], lines1[1] + "_" + lines1[4],
                 hinge_edge=int(lines1[5]),intersection=1,length=e1_match_len,z=0,
-                read_a_start=ra_start,read_a_end=ra_end,
-                read_b_start=rb_start,read_b_end=rb_end,
-                read_a_start_raw=ra_start_raw,read_a_end_raw=ra_end_raw,
-                read_b_start_raw=rb_start_raw,read_b_end_raw=rb_end_raw)
+                read_a_match_start=ra_match_start,read_a_match_end=ra_match_end,
+                read_b_match_start=rb_match_start,read_b_match_end=rb_match_end,
+                read_a_match_start_raw=ra_match_start_raw,read_a_match_end_raw=ra_match_end_raw,
+                read_b_match_start_raw=rb_match_start_raw,read_b_match_end_raw=rb_match_end_raw)
             G.add_edge(lines1[1] + "_" + str(1-int(lines1[4])), lines1[0] + "_" + str(1-int(lines1[3])),
                 hinge_edge=int(lines1[5]),intersection=1,length=e1_match_len,z=0,
-                read_a_start=ra_start,read_a_end=ra_end,
-                read_b_start=rb_start,read_b_end=rb_end,
-                read_a_start_raw=ra_start_raw,read_a_end_raw=ra_end_raw,
-                read_b_start_raw=rb_start_raw,read_b_end_raw=rb_end_raw)
+                read_a_match_start=rb_match_start,read_a_match_end=rb_match_end,
+                read_b_match_start=ra_match_start,read_b_match_end=ra_match_end,
+                read_a_match_start_raw=rb_match_start_raw,read_a_match_end_raw=rb_match_end_raw,
+                read_b_match_start_raw=ra_match_start_raw,read_b_match_end_raw=ra_match_end_raw)
         else:
             G.add_edge(lines1[0] + "_" + lines1[3], lines1[1] + "_" + lines1[4],
                 hinge_edge=int(lines1[5]),intersection=0,length=e1_match_len,z=0,
-                read_a_start=ra_start,read_a_end=ra_end,
-                read_b_start=rb_start,read_b_end=rb_end,
-                read_a_start_raw=ra_start_raw,read_a_end_raw=ra_end_raw,
-                read_b_start_raw=rb_start_raw,read_b_end_raw=rb_end_raw)
+                read_a_match_start=ra_match_start,read_a_match_end=ra_match_end,
+                read_b_match_start=rb_match_start,read_b_match_end=rb_match_end,
+                read_a_match_start_raw=ra_match_start_raw,read_a_match_end_raw=ra_match_end_raw,
+                read_b_match_start_raw=rb_match_start_raw,read_b_match_end_raw=rb_match_end_raw)
             G.add_edge(lines1[1] + "_" + str(1-int(lines1[4])), lines1[0] + "_" + str(1-int(lines1[3])),
                 hinge_edge=int(lines1[5]),intersection=0,length=e1_match_len,z=0,
-                read_a_start=ra_start,read_a_end=ra_end,
-                read_b_start=rb_start,read_b_end=rb_end,
-                read_a_start_raw=ra_start_raw,read_a_end_raw=ra_end_raw,
-                read_b_start_raw=rb_start_raw,read_b_end_raw=rb_end_raw)
+                read_a_match_start=rb_match_start,read_a_match_end=rb_match_end,
+                read_b_match_start=ra_match_start,read_b_match_end=ra_match_end,
+                read_a_match_start_raw=rb_match_start_raw,read_a_match_end_raw=rb_match_end_raw,
+                read_b_match_start_raw=ra_match_start_raw,read_b_match_end_raw=ra_match_end_raw)
 
 
 
@@ -1098,6 +1173,8 @@ with open (flname) as f:
         Ginfo[(lines1[1] + "_" + str(1-int(lines1[4])), lines1[0] + "_" + str(1-int(lines1[3])))] = towrite
 
 
+nx.write_graphml(G, prefix+suffix+'.'+'G00'+'.graphml')
+
 
 
 vertices=set()
@@ -1153,9 +1230,9 @@ G1,G0 = z_clipping_sym(G0,6,set(),set())
 # G1=z_clipping_sym(G1,5,in_hinges,out_hinges)
 
 
-G1 = bubble_bursting_sym(G1,10)
+G1 = bubble_bursting_sym(G1,20)
 
-G1 = dead_end_clipping_sym(G1,5)
+G1 = dead_end_clipping_sym(G1,20)
 
 nx.write_graphml(G0, prefix+suffix+'.'+'G0'+'.graphml')
 nx.write_graphml(G1, prefix+suffix+'.'+'G1'+'.graphml')
diff --git a/scripts/random_condensation.py b/scripts/random_condensation.py
old mode 100644
new mode 100755
diff --git a/scripts/repeat_annotate_reads.py b/scripts/repeat_annotate_reads.py
old mode 100644
new mode 100755
diff --git a/scripts/split_las.py b/scripts/split_las.py
new file mode 100755
index 0000000..a1f06b9
--- /dev/null
+++ b/scripts/split_las.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+import os
+import argparse
+
+ap = argparse.ArgumentParser(description="run LAsplit by splitting las into sizes of less than specified length")
+ap.add_argument("las", help="path to las file to be split. assumed to be sorted.")
+ap.add_argument("max_size", help="max size of any split file.", type=int, default=4, nargs='?')
+
+args = ap.parse_args()
+
+laspath = args.las
+max_las_size = args.max_size
+
+
+x = os.path.getsize(laspath)
+num_divisions = (x/10**9)/max_las_size + 1
+out_las_name = laspath.split('.las')[0]+'.# '
+
+LAsplit_cmd = 'LAsplit -v '+out_las_name+ str(num_divisions) +' < ' + laspath
+os.system(LAsplit_cmd)
\ No newline at end of file
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 271b6e2..182dc77 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -33,11 +33,28 @@ add_subdirectory(spdlog)
 
 #add_subdirectory(ogdf)
 
+find_program(PANDOC pandoc)
+
+if(PANDOC)
+  add_custom_command(
+    OUTPUT hinge.1
+    COMMAND ${PANDOC} --from markdown --to man -s ${CMAKE_CURRENT_SOURCE_DIR}/hinge.1.md -o hinge.1
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/hinge.1.md
+    VERBATIM
+    )
+
+  add_custom_target(man ALL DEPENDS hinge.1)
+  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/hinge.1 DESTINATION share/man/man1)
+endif()
+
+install(PROGRAMS hinge DESTINATION bin)
 
 ## Executables
 
 add_subdirectory(filter filter)
 
+add_subdirectory(maximal maximal)
+
 add_subdirectory(consensus consensus)
 
 add_subdirectory(layout layout)
diff --git a/src/consensus/CMakeLists.txt b/src/consensus/CMakeLists.txt
index 9ac757f..97542ee 100644
--- a/src/consensus/CMakeLists.txt
+++ b/src/consensus/CMakeLists.txt
@@ -6,5 +6,4 @@ target_link_libraries(draft_assembly LAInterface ini falcon spdlog)
 add_executable(consensus consensus.cpp)
 target_link_libraries(consensus LAInterface falcon ini)
 
-add_executable(io io_base)
-target_link_libraries(io LAInterface ini spdlog)
+install(TARGETS draft_assembly consensus DESTINATION ${libexec})
diff --git a/src/consensus/consensus.cpp b/src/consensus/consensus.cpp
index 86efdd7..578d855 100644
--- a/src/consensus/consensus.cpp
+++ b/src/consensus/consensus.cpp
@@ -24,21 +24,41 @@ extern "C" {
 #include "INIReader.h"
 
 
-#define LAST_READ_SYMBOL  '$'
+static char ToU[4] = { 'A', 'C', 'G', 'T' };
+int chop_end(std::pair<std::string, std::string> * alignment, int chop) {
+    int len = alignment->first.size();
+    if (len < chop*2 + 10)
+        return 0;
 
-bool compare_overlap(LAlignment * ovl1, LAlignment * ovl2) {
-    return ((ovl1->aepos - ovl1->abpos + ovl1->bepos - ovl1->bbpos) > (ovl2->aepos - ovl2->abpos + ovl2->bepos - ovl2->bbpos));
-}
+    int start = chop;
+    while (alignment->first[start] == '-') start++;
+    int offset = 0;
+    for (int i =0; i < start; i++)
+        if (alignment->first[i]!= '-')
+            offset++;
+
+    alignment->first = alignment->first.substr(start, len-start-chop);
+    alignment->second = alignment->second.substr(start, len-start-chop);
 
+    return offset;
 
-static int ORDER(const void *l, const void *r) {
-    int x = *((int32 *) l);
-    int y = *((int32 *) r);
-    return (x - y);
 }
 
+char toLower(char c) {
+
+    char base = c;
+
+    switch (c) {
+        case 'A': base = 'a'; break;
+        case 'C': base = 'c'; break;
+        case 'G': base = 'g'; break;
+        case 'T': base = 't'; break;
+    }
+
+    return base;
+
+}
 
-static char ToU[4] = { 'A', 'C', 'G', 'T' };
 
 int main(int argc, char *argv[]) {
 
@@ -89,7 +109,7 @@ int main(int argc, char *argv[]) {
 
 	std::vector<std::vector<LAlignment *>> idx;
 
-    printf("%d\n", res.size());
+    printf("%lu\n", res.size());
 
 	for (int i = 0; i < n_contigs; i++)
 		idx.push_back(std::vector<LAlignment *>());
@@ -99,8 +119,8 @@ int main(int argc, char *argv[]) {
     }
 
 	for (int i = 0; i < n_contigs; i++) {
-        std::sort(idx[i].begin(), idx[i].end(), compare_overlap);
-        printf("%d %d\n", i, idx[i].size());
+        std::sort(idx[i].begin(), idx[i].end(), compare_overlap_aln);
+        printf("%d %lu\n", i, idx[i].size());
     }
 
     std::cout << "Getting read lengths" << std::endl;
@@ -148,12 +168,19 @@ int main(int argc, char *argv[]) {
 
             la.recoverAlignment(idx[i][j]);
             std::pair<std::string, std::string>  alignment = la.getAlignmentTags(idx[i][j]);
+            //std::cout<<"before:" << alignment.first.substr(0,200) << std::endl;
+
+            int offset = chop_end(&alignment,100);
+            std::cout << offset<<std::endl;
+
+            //std::cout<<"after:" << alignment.first.substr(0,200) << std::endl;
+
+            int pos_in_contig = idx[i][j]->abpos + offset;
 
-            int pos_in_contig = idx[i][j]->abpos;
 
             for (int m = 0; m < alignment.first.length(); m++) {
 
-                unsigned int base = -1;
+                int base = -1;
                 switch (alignment.second[m]) {
                     case 'A': base = 0; break;
                     case 'C': base = 1; break;
@@ -163,11 +190,15 @@ int main(int argc, char *argv[]) {
                 }
 
                 if (alignment.first[m] != '-') {
-                    contig_base_scores[pos_in_contig][base]++;
-                    cov_depth[pos_in_contig]++;
+
+                    if (base != -1) {
+                        contig_base_scores[pos_in_contig][base]++;
+                        cov_depth[pos_in_contig]++;
+                    }
+
                     pos_in_contig++;
                 }
-                else {
+                else if (base != -1) {
                     insertion_score[pos_in_contig]++;
                     insertion_base_scores[pos_in_contig][base]++;
                 }
@@ -182,15 +213,41 @@ int main(int argc, char *argv[]) {
 
         int consensus_length = 0;
 
+        int low_coverage_bases = 0;
+
+        long int sum_coverage = 0;
+
         out << ">Consensus" << i << std::endl;
 
+
         for (int j=0; j < idx[i][0]->alen ; j++) {
 
-            unsigned int max_base = 0;
+            sum_coverage += cov_depth[j];
+
+            if (cov_depth[j] < 3) {
+//                std::cout << "Low coverage." << std::endl;
+
+                low_coverage_bases++;
+                out << toLower(reads_vec[i]->bases[j]);
+                continue;
+            }
+
+            if (insertion_score[j] > cov_depth[j]/2) {
+                int max_insertion_base = 0;
+                for (int b=1; b<4; b++) {
+                    if (insertion_base_scores[j][b] > insertion_base_scores[j][max_insertion_base]) max_insertion_base = b;
+                }
+                out << ToU[max_insertion_base];
+                consensus_length++;
+                insertions++;
+            }
+
+            int max_base = 0;
 
             for (int b=1; b<5; b++) {
                 if (contig_base_scores[j][b] > contig_base_scores[j][max_base]) max_base = b;
             }
+
             if (max_base < 4) {
                 out << ToU[max_base];
                 good_bases++;
@@ -200,23 +257,16 @@ int main(int argc, char *argv[]) {
                 deletions++;
             }
 
-            if (insertion_score[j] > cov_depth[j]/2) {
-                unsigned int max_insertion_base = 0;
-                for (int b=1; b<4; b++) {
-                    if (insertion_base_scores[j][b] > insertion_base_scores[j][max_insertion_base]) max_insertion_base = b;
-                }
-                out << ToU[max_insertion_base];
-                consensus_length++;
-                insertions++;
-            }
 
         }
         out << std::endl;
 
 
+        printf("Average coverage: %f\n",(1.0*sum_coverage)/idx[i][0]->alen);
         printf("Good bases: %d/%d\n",good_bases,idx[i][0]->alen);
         printf("Insertions: %d/%d\n",insertions,idx[i][0]->alen);
         printf("Deletions: %d/%d\n",deletions,idx[i][0]->alen);
+        printf("Low coverage bases: %d/%d\n",low_coverage_bases,idx[i][0]->alen);
         printf("Consensus length: %d\n",consensus_length);
 
 
diff --git a/src/consensus/draft.cpp b/src/consensus/draft.cpp
index a4bc8e1..702a7a7 100644
--- a/src/consensus/draft.cpp
+++ b/src/consensus/draft.cpp
@@ -9,6 +9,7 @@
 #include <omp.h>
 #include <tuple>
 #include <iomanip>
+#include <glob.h>
 
 #include "spdlog/spdlog.h"
 #include "cmdline.h"
@@ -37,15 +38,6 @@ using namespace boost;
 
 typedef adjacency_list <vecS, vecS, undirectedS> Graph;
 typedef std::tuple<Node, Node, int> Edge_w;
-typedef std::pair<Node, Node> Edge_nw;
-
-
-static int ORDER(const void *l, const void *r) {
-    int x = *((int32 *) l);
-    int y = *((int32 *) r);
-    return (x - y);
-}
-
 
 
 std::vector<int> get_mapping(std::string aln_tag1, std::string aln_tag2) {
@@ -79,23 +71,6 @@ std::string reverse_complement(std::string seq) {
 }
 
 
-
-std::ostream& operator<<(std::ostream& out, const MatchType value){
-    static std::map<MatchType, std::string> strings;
-    if (strings.size() == 0){
-#define INSERT_ELEMENT(p) strings[p] = #p
-        INSERT_ELEMENT(FORWARD);
-        INSERT_ELEMENT(BACKWARD);
-        INSERT_ELEMENT(ACOVERB);
-        INSERT_ELEMENT(BCOVERA);
-        INSERT_ELEMENT(INTERNAL);
-        INSERT_ELEMENT(UNDEFINED);
-        INSERT_ELEMENT(NOT_ACTIVE);
-#undef INSERT_ELEMENT
-    }
-    return out << strings[value];
-}
-
 std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
     std::stringstream ss(s);
     std::string item;
@@ -113,29 +88,617 @@ std::vector<std::string> split(const std::string &s, char delim) {
 }
 
 
+int draft_assembly_ctg(std::vector<Edge_w> & edgelist, LAInterface & la, std::vector<LAlignment *> & full_aln,
+                       std::unordered_map<int, std::vector<LOverlap *> > &idx3,
+                       std::unordered_map<int, std::unordered_map<int, std::vector<LOverlap *> > > & idx,
+                       std::vector<Read *> & reads, int TSPACE, int EDGE_SAFE, int MIN_COV2,
+                       int cut_start, int cut_end, bool one_read_contig, bool two_read_contig,
+                       std::string& contig) {
+    std::cout << "list size:" << edgelist.size() << std::endl;
+    if (edgelist.size() == 0) return -1; //error
 
-bool compare_overlap(LOverlap * ovl1, LOverlap * ovl2) {
-    return ((ovl1->read_A_match_end_ - ovl1->read_A_match_start_
-             + ovl1->read_B_match_end_ - ovl1->read_B_match_start_) >
-            (ovl2->read_A_match_end_ - ovl2->read_A_match_start_
-             + ovl2->read_B_match_end_ - ovl2->read_B_match_start_));
-}
+    std::string draft_assembly = "";
 
+    if (one_read_contig) {
+        if (std::get<0>(edgelist[0]).strand == 0) draft_assembly = reads[std::get<0>(edgelist[0]).id]->bases;
+        else draft_assembly = reverse_complement(reads[std::get<0>(edgelist[0]).id]->bases);
+        std::cout << cut_start << " " << cut_end << " " << reads[std::get<0>(edgelist[0]).id]->len << std::endl;
+        if ((cut_start <= draft_assembly.size()) and (cut_end <= draft_assembly.size()))
+            contig = draft_assembly.substr(cut_start, cut_end-cut_start);
+        return 1;
+    }
+
+
+
+    //std::vector<LAlignment *> full_alns;
+    std::vector<LAlignment *> selected;
+    std::unordered_map<int, std::vector<LAlignment *>> idx_aln;
+    //la.resetAlignment();
+    std::vector<int> range;
+
+    for (int i = 0; i < edgelist.size(); i++) {
+        range.push_back(std::get<0>(edgelist[i]).id);
+        idx_aln[std::get<0>(edgelist[i]).id] = std::vector<LAlignment *>();
+    }
+
+    std::sort(range.begin(), range.end());
+
+    //la.getAlignment(full_alns, range);
+
+    for (auto i:full_aln) {
+        idx_aln[i->read_A_id_].push_back(i);
+    }
+
+    for (int i = 0; i < edgelist.size(); i++) {
+        int aid = std::get<0>(edgelist[i]).id;
+        int bid = std::get<1>(edgelist[i]).id;
+        bool found = false;
+        for (int j = 0; j < idx_aln[std::get<0>(edgelist[i]).id].size(); j++) {
+            //printf("%d %d %d %d\n",bid, idx_aln[aid][j]->read_B_id_, idx_aln[aid][j]->aepos - idx_aln[aid][j]->abpos + idx_aln[aid][j]->bepos - idx_aln[aid][j]->bbpos, std::get<2>(edgelist[i]));
+            if ((idx_aln[aid][j]->read_B_id_ == bid) and \
+            (idx_aln[aid][j]->aepos - idx_aln[aid][j]->abpos + idx_aln[aid][j]->bepos - idx_aln[aid][j]->bbpos == std::get<2>(edgelist[i]))) {
+                selected.push_back(idx_aln[aid][j]);
+                found = true;
+                break;
+            }
+            if (found) continue;
+        }
+    }
+
+    std::cout << "selected:" << selected.size() << std::endl;
+
+
+
+    if (two_read_contig) {
+        if (std::get<0>(edgelist[0]).strand == 0) draft_assembly = reads[std::get<0>(edgelist[0]).id]->bases;
+        else draft_assembly = reverse_complement(reads[std::get<0>(edgelist[0]).id]->bases);
+
+        int aend = selected[0]->aepos;
+        int bstart = selected[0]->bbpos;
+
+        std::string readB;
+
+        if (std::get<1>(edgelist[0]).strand == 0) readB = reads[std::get<1>(edgelist[0]).id]->bases;
+        else readB = reverse_complement(reads[std::get<1>(edgelist[0]).id]->bases);
+
+
+        std::cout << "alen blen aend bstart" << reads[std::get<0>(edgelist[0]).id]->len << " " << reads[std::get<1>(edgelist[0]).id]->len << " " << aend << " " << bstart << std::endl;
+
+        draft_assembly = draft_assembly.substr(0, aend);
+        draft_assembly += readB.substr(bstart);
+
+        std::cout << cut_start << " " << cut_end << " " << reads[std::get<0>(edgelist[0]).id]->len << std::endl;
+        if ((cut_start <= draft_assembly.size()) and (cut_end <= draft_assembly.size()))
+            contig = draft_assembly.substr(cut_start, cut_end-cut_start);
+        return 2;
+    }
+
+    std::unordered_map<int, std::unordered_map<int, std::pair<std::string, std::string> > > aln_tags_map;
+    std::vector<std::pair<std::string, std::string> > aln_tags_list;
+    std::vector<std::pair<std::string, std::string> > aln_tags_list_true_strand;
+
+
+
+    for (int i = 0; i < selected.size(); i++) {
+        la.recoverAlignment(selected[i]);
+        //printf("%d %d %d %d %d\n", selected[i]->read_A_id_, selected[i]->read_B_id_,
+        //        selected[i]->alen, selected[i]->blen, selected[i]->tlen);
+        //printf("%d %d\n",selected[i]->tlen, selected[i]->trace_pts_len);
+        std::pair<std::string, std::string> res = la.getAlignmentTags(selected[i]);
+        aln_tags_map[selected[i]->read_A_id_][selected[i]->read_B_id_] = res;
+        aln_tags_list.push_back(res);
+    }
+
+
+    std::string sequence = "";
+
+    std::vector<LOverlap *> bedges;
+    std::vector<std::string> breads;
+
+    std::vector<std::vector<std::pair<int, int> > > pitfalls;
+
+
+    range.clear();
+    for (int i = 0; i < edgelist.size(); i++) {
+        range.push_back(std::get<0>(edgelist[i]).id);
+    }
+
+    std::vector<std::vector<int> *> coverages;
+
+    for (int i = 0; i < range.size(); i++) {
+        int aread = range[i];
+        if (idx3[aread].size() > 0) {
+            std::vector<int> *res = la.getCoverage(idx3[aread]);
+            std::vector<std::pair<int, int> > *res2 = la.lowCoverageRegions(*res, MIN_COV2);
+            //delete res;
+            coverages.push_back(res);
+            //printf("%d %d: (%d %d) ", i, aread, 0, idx3[aread][0]->alen);
+            //for (int j = 0; j < res2->size(); j++) {
+            //    printf("[%d %d] ", res2->at(j).first, res2->at(j).second);
+            //}
+            //printf("\n");
+            pitfalls.push_back(*res2);
+            delete res2;
+        }
+    }
+
+    /***
+     * Prepare the data
+     */
+
+    std::string overhang;
+    int len_overhang = 0;
+    for (int i = 0; i < edgelist.size(); i++) {
+
+        std::vector<LOverlap *> currentalns = idx[std::get<0>(edgelist[i]).id][std::get<1>(edgelist[i]).id];
+
+        LOverlap *currentaln = NULL;
+
+        for (int j = 0; j < currentalns.size(); j++) {
+            //std::cout << std::get<0>(edgelist[i]).id << " " << std::get<1>(edgelist[i]).id << " " << currentalns[j]->match_type_ << std::endl;
+            if (currentalns[j]->read_A_match_end_ - currentalns[j]->read_A_match_start_ + currentalns[j]->read_B_match_end_ - currentalns[j]->read_B_match_start_ ==
+                std::get<2>(edgelist[i]))
+                currentaln = currentalns[j];
+        }
+
+        if (currentaln == NULL) exit(1);
+        //currentaln->show();
+
+        std::string current_seq;
+        std::string next_seq;
+
+        std::string aln_tags1;
+        std::string aln_tags2;
+
+
+        if (std::get<0>(edgelist[i]).strand == 0)
+            current_seq = reads[std::get<0>(edgelist[i]).id]->bases;
+        else
+            current_seq = reverse_complement(reads[std::get<0>(edgelist[i]).id]->bases);
+
+        if (std::get<0>(edgelist[i]).strand == 0) {
+            aln_tags1 = aln_tags_list[i].first;
+            aln_tags2 = aln_tags_list[i].second;
+        } else {
+            aln_tags1 = reverse_complement(aln_tags_list[i].first);
+            aln_tags2 = reverse_complement(aln_tags_list[i].second);
+        }
+
+        aln_tags_list_true_strand.push_back(std::pair<std::string, std::string>(aln_tags1, aln_tags2));
+
+        if (std::get<1>(edgelist[i]).strand == 0)
+            next_seq = reads[std::get<1>(edgelist[i]).id]->bases;
+        else
+            next_seq = reverse_complement(reads[std::get<1>(edgelist[i]).id]->bases);
+
+        int abpos, aepos, alen, bbpos, bepos, blen, aes, aee, bes, bee;
+
+        alen = currentaln->alen;
+        blen = currentaln->blen;
+
+
+        if (std::get<0>(edgelist[i]).strand == 0) {
+            abpos = currentaln->read_A_match_start_;
+            aepos = currentaln->read_A_match_end_;
+
+            aes = currentaln->eff_read_A_read_start_;
+            aee = currentaln->eff_read_A_read_end_;
+
+        } else {
+            abpos = alen - currentaln->read_A_match_end_;
+            aepos = alen - currentaln->read_A_match_start_;
+
+            aes = alen - currentaln->eff_read_A_read_end_;
+            aee = alen - currentaln->eff_read_A_read_start_;
+        }
+
+        if (((std::get<1>(edgelist[i]).strand == 0))) {
+            bbpos = currentaln->read_B_match_start_;
+            bepos = currentaln->read_B_match_end_;
+
+            bes = currentaln->eff_read_B_read_start_;
+            bee = currentaln->eff_read_B_read_end_;
+
+        } else {
+            bbpos = blen - currentaln->read_B_match_end_;
+            bepos = blen - currentaln->read_B_match_start_;
+
+            bes = blen - currentaln->eff_read_B_read_end_;
+            bee = blen - currentaln->eff_read_B_read_start_;
+
+        }
+        aes = 0;
+        bes = 0;
+        aee = alen;
+        bee = blen;
+
+//            printf("%d %d [[%d %d] << [%d %d]] x [[%d %d] << [%d %d]]\n", std::get<0>(edgelist[i]).id, std::get<1>(edgelist[i]).id, abpos, aepos, aes, aee, bbpos, bepos, bes, bee);
+
+        LOverlap *new_ovl = new LOverlap();
+        new_ovl->read_A_match_start_ = abpos;
+        new_ovl->read_A_match_end_ = aepos;
+        new_ovl->read_B_match_start_ = bbpos;
+        new_ovl->read_B_match_end_ = bepos;
+        new_ovl->eff_read_A_read_end_ = aee;
+        new_ovl->eff_read_A_read_start_ = aes;
+        new_ovl->eff_read_B_read_end_ = bee;
+        new_ovl->eff_read_B_read_start_ = bes;
+        new_ovl->alen = currentaln->alen;
+        new_ovl->blen = currentaln->blen;
+        new_ovl->read_A_id_ = std::get<0>(edgelist[i]).id;
+        new_ovl->read_B_id_ = std::get<1>(edgelist[i]).id;
+
+
+        bedges.push_back(new_ovl);
+        breads.push_back(current_seq);
+        overhang = next_seq;
+        len_overhang = new_ovl->blen - new_ovl->read_B_match_end_ - (new_ovl->alen - new_ovl->read_A_match_end_);
+
+    }
+    //need to trim the end
+
+
+    if ((len_overhang > 0) and (len_overhang < overhang.size())) {
+        overhang = overhang.substr(overhang.size()-len_overhang);
+    } else overhang = "";
+
+    std::vector<std::vector<int> > mappings;
+    for (int i = 0; i < range.size(); i++) {
+        mappings.push_back(get_mapping(aln_tags_list_true_strand[i].first, aln_tags_list_true_strand[i].second));
+    }
+
+    std::cout << bedges.size() << " " << breads.size() << " " << selected.size() << " "
+    << aln_tags_list.size() << " " << pitfalls.size() << " " << aln_tags_list_true_strand.size()
+    << " " << mappings.size() << " " << coverages.size() << std::endl;
+
+    /*for (int i = 0; i < bedges.size() - 1; i++) {
+        printf("%d %d %d %d %d\n", bedges[i]->read_B_match_start_, bedges[i]->read_B_match_end_,
+                bedges[i+1]->read_A_match_start_, bedges[i+1]->read_A_match_end_,
+                bedges[i]->read_B_match_end_ - bedges[i+1]->read_A_match_start_);
+    }*/
+
+
+    int tspace = TSPACE; // set lane length to be 500
+    int nlane = 0;
 
-bool compare_overlap_weight(LOverlap * ovl1, LOverlap * ovl2) {
-    return (ovl1->weight > ovl2->weight);
-}
 
 
-bool compare_overlap_abpos(LOverlap * ovl1, LOverlap * ovl2) {
-    return ovl1->read_A_match_start_ < ovl2->read_A_match_start_;
-}
 
-bool compare_overlap_aepos(LOverlap * ovl1, LOverlap * ovl2) {
-    return ovl1->read_A_match_start_ > ovl2->read_A_match_start_;
+    std::vector<std::vector<std::pair<int, int>>> lanes;
+
+
+
+    int currentlane = 0;
+    int current_starting_read = 0;
+    int current_starting_space = 1;
+    int current_starting_offset = 0;
+    int n_bb_reads = range.size();
+    std::vector<std::vector<int>> trace_pts(n_bb_reads);
+    bool revert = false;
+
+
+    int rmax = -1;
+    /**
+     * Move forward and put "trace points"
+     */
+    while (current_starting_read < n_bb_reads - 1) {
+        int currentread = current_starting_read;
+        int additional_offset = 0;
+        while (bedges[current_starting_read]->read_A_match_start_ + current_starting_space * tspace +
+               current_starting_offset + additional_offset <
+               bedges[current_starting_read]->read_A_match_end_ - EDGE_SAFE) {
+            int waypoint = bedges[current_starting_read]->read_A_match_start_ + tspace * current_starting_space +
+                           current_starting_offset + additional_offset;
+            //if ((waypoint - bedges[current_starting_read]->read_A_match_start_) < EDGE_SAFE)
+            //    waypoint += EDGE_SAFE;
+
+            //int next_waypoint = mappings[currentread][waypoint - bedges[current_starting_read]->read_A_match_start_] + bedges[current_starting_read]->read_B_match_start_;
+            std::vector<std::pair<int, int> > lane;
+
+            while ((waypoint > bedges[currentread]->read_A_match_start_) and
+                   (waypoint < bedges[currentread]->read_A_match_end_)) {
+
+                //printf("%d %d\n", currentread, waypoint);
+                trace_pts[currentread].push_back(waypoint);
+
+
+                /*if (waypoint > bedges[currentread]->read_A_match_end_ - EDGE_SAFE) {
+                    printf("Reaching the end, neglect low coverage\n");
+                }
+
+                if ((coverages[currentread]->at(waypoint) < MIN_COV2) and (waypoint < bedges[currentread]->read_A_match_end_ - EDGE_SAFE)) {
+                    revert = true;
+                    printf("Low coverage, revert\n");
+                    break;
+                }*/
+
+
+                lane.push_back(std::pair<int, int>(currentread, waypoint));
+                if (currentread > rmax) rmax = currentread;
+                //int previous_wp = waypoint;
+                waypoint = mappings[currentread][waypoint - bedges[currentread]->read_A_match_start_] +
+                           bedges[currentread]->read_B_match_start_;
+                currentread++;
+                if (currentread >= n_bb_reads) break;
+            }
+            if (currentread < n_bb_reads) if (waypoint < bedges[currentread]->alen) {
+                lane.push_back(std::pair<int, int>(currentread, waypoint));
+                if (currentread > rmax) rmax = currentread;
+            }
+            /*if (revert) {
+                printf("revert\n");
+                revert = false;
+                while (currentread >= current_starting_read) {
+                    trace_pts[currentread].pop_back();
+                    currentread --;
+                    additional_offset += STEP;
+                }
+                currentread = current_starting_read;
+            }
+            else*/
+            {
+                if (currentread >= rmax)
+                    lanes.push_back(lane);
+                current_starting_space++;
+                currentread = current_starting_read;
+
+            }
+
+        }
+
+        current_starting_read++;
+        current_starting_space = 1;//get next space;
+        if (trace_pts[current_starting_read].size() == 0)
+            current_starting_offset = 0;
+        else
+            current_starting_offset =
+                    trace_pts[current_starting_read].back() - bedges[current_starting_read]->read_A_match_start_;
+    }
+
+
+    /**
+     * Show trace points on reads
+     */
+    for (int i = 0; i < n_bb_reads; i++) {
+        printf("Read %d:", i);
+        for (int j = 0; j < trace_pts[i].size(); j++) {
+            printf("%d ", trace_pts[i][j]);
+        }
+        printf("\n");
+    }
+
+    /**
+     * Show lanes
+     */
+
+    for (int i = 0; i < lanes.size(); i++) {
+
+        printf("Lane %d\n", i);
+        for (int j = 0; j < lanes[i].size(); j++) {
+            printf("[%d %d] ", lanes[i][j].first, lanes[i][j].second);
+        }
+        printf("\n");
+    }
+
+
+    printf("In total %lu lanes\n", lanes.size());
+    //if (lanes.size() < 2) {
+    //    draft_assembly = breads[0];
+    //    out_fa << ">DraftAssemblyContig" << num_contig << std::endl;
+    //    out_fa << draft_assembly << std::endl;
+    //    num_contig++;
+    //    continue;
+    //}
+
+    int first_start = lanes[0][0].second;
+    int last_end = lanes.back().back().second;
+
+    std::cout << "first " << first_start << " last " << last_end << std::endl;
+    std::cout << "len " << reads[std::get<0>(edgelist[0]).id]->len << " " << reads[std::get<1>(edgelist.back()).id]->len << std::endl;
+    assert(first_start <= reads[std::get<0>(edgelist[0]).id]->len);
+    assert(last_end <= reads[std::get<0>(edgelist.back()).id]->len);
+    std::string prefix = reads[std::get<0>(edgelist[0]).id]->bases.substr(0,first_start);
+    std::string suffix = reads[std::get<0>(edgelist.back()).id]->bases.substr(last_end);
+    printf("last read %d length %d, cut %d\n",std::get<1>(edgelist.back()).id, reads[std::get<1>(edgelist.back()).id]->len, cut_end);
+    cut_end = reads[std::get<1>(edgelist.back()).id]->len - cut_end;
+
+    /**
+     * Consequtive lanes form a column (ladder)
+     */
+    std::vector<std::vector<std::tuple<int, int, int> > > ladders;
+
+    for (int i = 0; i < lanes.size() - 1; i++) {
+        std::vector<std::pair<int, int> > lane1 = lanes[i];
+        std::vector<std::pair<int, int> > lane2 = lanes[i + 1];
+        std::vector<std::tuple<int, int, int> > ladder;
+        int pos = 0;
+        for (int j = 0; j < lane2.size(); j++) {
+            while ((lane1[pos].first != lane2[j].first) and (pos < lane1.size() - 1)) pos++;
+            if ((lane1[pos].first == lane2[j].first))
+                ladder.push_back(std::make_tuple(lane2[j].first, lane1[pos].second, lane2[j].second));
+        }
+        ladders.push_back(ladder);
+    }
+
+
+    /**
+     * show ladders
+     */
+    for (int i = 0; i < ladders.size(); i++) {
+//            printf("Ladder %d\n", i);
+//            for (int j = 0; j < ladders[i].size(); j++) {
+//                //printf("[%d %d-%d] ", std::get<0>(ladders[i][j]), std::get<1>(ladders[i][j]), std::get<2>(ladders[i][j]) );
+//                //printf("%s\n", breads[std::get<0>(ladders[i][j])].substr(std::get<1>(ladders[i][j]),std::get<2>(ladders[i][j])-std::get<1>(ladders[i][j])).c_str());
+//
+//            }
+
+        if (ladders[i].size() == 0) {
+            printf("low coverage!\n");
+            continue;
+        }
+
+        if (ladders[i].size() > 1) {
+
+
+            int mx = 0;
+            int maxcoverage = 0;
+            for (int j = 0; j < ladders[i].size(); j++) {
+                int mincoverage = 10000;
+                int read = std::get<0>(ladders[i][j]);
+                int start = std::get<1>(ladders[i][j]);
+                int end = std::get<2>(ladders[i][j]);
+                for (int pos = start; pos < end; pos++) {
+                    if (coverages[read]->at(pos) < mincoverage) mincoverage = coverages[read]->at(pos);
+                }
+                if (mincoverage > maxcoverage) {
+                    maxcoverage = mincoverage;
+                    mx = j;
+                }
+            }
+
+//                std::cout << "ladder " << i << " num reads " << ladders[i].size() << " possibly error here " <<
+//                maxcoverage << "\n!";
+
+
+            //if (ladders[i].size() == 2) {
+            //    draft_assembly += breads[std::get<0>(ladders[i][mx])].substr(std::get<1>(ladders[i][mx]),
+            //                                                                 std::get<2>(ladders[i][mx]) -
+            //                                                                 std::get<1>(ladders[i][mx]));
+            //    continue;
+            // }
+
+
+            std::string base = breads[std::get<0>(ladders[i][mx])].substr(std::get<1>(ladders[i][mx]),
+                                                                          std::get<2>(ladders[i][mx]) -
+                                                                          std::get<1>(ladders[i][mx]));;
+            int seq_count = ladders[i].size();
+//                printf("seq_count:%d, max %d\n", seq_count, mx);
+            align_tags_t **tags_list;
+            tags_list = (align_tags_t **) calloc(seq_count, sizeof(align_tags_t *));
+            consensus_data *consensus;
+
+            int alen = (std::get<2>(ladders[i][mx]) - std::get<1>(ladders[i][mx]));
+            for (int j = 0; j < ladders[i].size(); j++) {
+
+                int blen = (std::get<2>(ladders[i][j]) - std::get<1>(ladders[i][j]));
+                char *aseq = (char *) malloc(
+                        (20 + (std::get<2>(ladders[i][mx]) - std::get<1>(ladders[i][mx]))) * sizeof(char));
+                char *bseq = (char *) malloc(
+                        (20 + (std::get<2>(ladders[i][j]) - std::get<1>(ladders[i][j]))) * sizeof(char));
+                strcpy(aseq, breads[std::get<0>(ladders[i][mx])].substr(std::get<1>(ladders[i][mx]),
+                                                                        std::get<2>(ladders[i][mx]) -
+                                                                        std::get<1>(ladders[i][mx])).c_str());
+                strcpy(bseq, breads[std::get<0>(ladders[i][j])].substr(std::get<1>(ladders[i][j]),
+                                                                       std::get<2>(ladders[i][j]) -
+                                                                       std::get<1>(ladders[i][j])).c_str());
+
+
+                aln_range *arange = (aln_range *) calloc(1, sizeof(aln_range));
+                arange->s1 = 0;
+                arange->e1 = strlen(bseq);
+                arange->s2 = 0;
+                arange->e2 = strlen(aseq);
+                arange->score = 5;
+
+                //printf("blen %d alen%d\n",strlen(bseq), strlen(aseq));
+                //printf("before get tags\n");
+
+                alignment *alng = _align(bseq, blen, aseq, alen, 150, 1);
+
+                char *q_aln_str = (char *) malloc((5 + strlen(alng->q_aln_str)) * sizeof(char));
+                char *t_aln_str = (char *) malloc((5 + strlen(alng->t_aln_str)) * sizeof(char));
+
+
+                strcpy(q_aln_str + 1, alng->q_aln_str);
+                strcpy(t_aln_str + 1, alng->t_aln_str);
+                q_aln_str[0] = 'T';
+                t_aln_str[0] = 'T';
+
+
+                for (int pos = 0; pos < strlen(q_aln_str); pos++) q_aln_str[pos] = toupper(q_aln_str[pos]);
+                for (int pos = 0; pos < strlen(t_aln_str); pos++) t_aln_str[pos] = toupper(t_aln_str[pos]);
+
+                //printf("Q:%s\nT:%s\n", q_aln_str, t_aln_str);
+
+                tags_list[j] = get_align_tags(q_aln_str,
+                                              t_aln_str,
+                                              strlen(alng->q_aln_str) + 1,
+                                              arange, (unsigned int) j, 0);
+                //free(aseq);
+                //free(bseq);
+
+                /*for (int k = 0; k < tags_list[j]->len; k++) {
+                    printf("%d %d %ld %d %c %c\n",j, k, tags_list[j]->align_tags[k].t_pos,
+                           tags_list[j]->align_tags[k].delta,
+                            //tags_list[j]->align_tags[k].p_q_base,
+                           aseq[tags_list[j]->align_tags[k].t_pos],
+                           tags_list[j]->align_tags[k].q_base);
+                }*/
+                free(q_aln_str);
+                free(t_aln_str);
+                free(aseq);
+                free(bseq);
+                free_alignment(alng);
+
+            }
+
+            //printf("%d %d\n%s\n",seq_count, strlen(seq), seq);
+
+            consensus = get_cns_from_align_tags(tags_list, seq_count, alen + 1, 1);
+//                printf("Consensus len :%d\n",strlen(consensus->sequence));
+            draft_assembly += std::string(consensus->sequence);
+
+            free_consensus_data(consensus);
+            for (int j = 0; j < seq_count; j++)
+                free_align_tags(tags_list[j]);
+
+        } else {
+            draft_assembly += breads[std::get<0>(ladders[i][0])].substr(std::get<1>(ladders[i][0]),
+                                                                        std::get<2>(ladders[i][0]) -
+                                                                        std::get<1>(ladders[i][0]));
+        }
+
+//            printf("\n");
+    }
+
+
+
+    /*for (int i = 0; i < mapping.size(); i++)
+        printf("%d %d\n", i, mapping[i]);
+    printf("[%d %d], [%d %d]\n", bedges[0]->read_A_match_start_, bedges[0]->read_A_match_end_, bedges[0]->read_B_match_start_, bedges[0]->read_B_match_end_);*/
+
+    std::cout << sequence.size() << std::endl;
+    std::cout << draft_assembly.size() << std::endl;
+
+    //if (draft_assembly.size() > 0) {
+    //    out_fa << ">Draft_assembly" << num_contig << std::endl;
+    //    out_fa << draft_assembly << std::endl;
+    //}
+    //num_contig++;
+    contig = prefix + draft_assembly + suffix + overhang;
+
+	std::cout << "ctg size:" << contig.size() << "cut_start:" << cut_start << "cut_end:" << cut_end << std::endl;
+
+    if ((cut_start <= contig.size()) and (cut_end <= contig.size()))
+    contig = contig.substr(cut_start, contig.size() - cut_end - cut_start);
+    return 0;
 }
 
 
+
+inline std::vector<std::string> glob(const std::string& pat){
+    using namespace std;
+    glob_t glob_result;
+    glob(pat.c_str(),GLOB_TILDE,NULL,&glob_result);
+    vector<string> ret;
+    for(unsigned int i=0;i<glob_result.gl_pathc;++i){
+        ret.push_back(string(glob_result.gl_pathv[i]));
+    }
+    globfree(&glob_result);
+    return ret;
+};
+
+
 int main(int argc, char *argv[]) {
 
     cmdline::parser cmdp;
@@ -168,7 +731,7 @@ int main(int argc, char *argv[]) {
 
 
     std::string name_mask = out + ".mas";
-    std::string name_max = out + ".max";
+	std::string name_max = out + ".max";
     std::string name_homo = out + ".homologous.txt";
     std::string name_rep = out + ".repeat.txt";
     std::string name_hg = out + ".hinges.txt";
@@ -179,7 +742,6 @@ int main(int argc, char *argv[]) {
 
 
     std::ofstream deadend_out(name_deadend);
-    std::ofstream maximal_reads(name_max);
     std::ofstream garbage_out(name_garbage);
     std::ofstream contained_out(name_contained);
     std::ifstream homo(name_homo);
@@ -204,12 +766,6 @@ int main(int argc, char *argv[]) {
 
     console->info("draft consensus");
 
-    if (cmdp.exist("debug")) {
-        char *buff = (char *) malloc(sizeof(char) * 2000);
-        getwd(buff);
-        console->info("current user {}, current working directory {}", getlogin(), buff);
-        free(buff);
-    }
 
     console->info("name of db: {}, name of .las file {}", name_db, name_las);
     console->info("name of fasta: {}, name of .paf file {}", name_fasta, name_paf);
@@ -227,6 +783,15 @@ int main(int argc, char *argv[]) {
         la.openDB(name_db);
 
 
+    std::vector<std::string> name_las_list;
+    std::string name_las_str(name_las);
+
+    if (name_las_str.find('*') != -1)
+        name_las_list = glob(name_las_str);
+    else
+        name_las_list.push_back(name_las_str);
+
+
     if (strlen(name_las) > 0)
         la.openAlignmentFile(name_las);
 
@@ -239,8 +804,9 @@ int main(int argc, char *argv[]) {
     }
 
     int n_read;
-    if (strlen(name_db) > 0)
+    if (strlen(name_db) > 0) {
         n_read = la.getReadNumber();
+    }
 
     std::vector<Read *> reads; //Vector of pointers to all reads
 
@@ -250,11 +816,49 @@ int main(int argc, char *argv[]) {
 
     console->info("# Reads: {}", n_read); // output some statistics
 
+
+
+    if (strlen(name_db) > 0) {
+        la.getRead(reads, 0, n_read);
+    }
+
+	std::ifstream max_reads_file(name_max);
+
+    std::vector<bool> maximal_read;
+    maximal_read.resize(n_read, false);
+    std::string read_line;
+	int num_active_reads = 0;
+    while(std::getline(max_reads_file, read_line))
+    {
+        int read_number;
+        read_number = atoi(read_line.c_str());
+        maximal_read[read_number] = true;
+        num_active_reads++;
+    }
+    console->info("Total number of active reads: {}/{}", num_active_reads, n_read);
+
+    for (int i = 0; i < n_read; i++){
+        reads[i]->active = maximal_read[i];
+    }
+
+
+    std::vector<int> range;
+
+    for (int i = 0; i < n_read; i++) {
+        if (reads[i]->active) range.push_back(i);
+    }
+
+    std::sort(range.begin(), range.end());
+
     std::vector<LOverlap *> aln;//Vector of pointers to all alignments
+    std::vector<LAlignment *> full_aln;//Vector of pointers to all alignments
+
 
     if (strlen(name_las) > 0) {
         la.resetAlignment();
-        la.getOverlap(aln, 0, n_aln);
+        la.getOverlap(aln, range);
+        la.resetAlignment();
+		la.getAlignment(full_aln, range);
     }
 
     if (strlen(name_paf) > 0) {
@@ -269,10 +873,6 @@ int main(int argc, char *argv[]) {
     }
 
 
-    if (strlen(name_db) > 0) {
-        la.getRead(reads, 0, n_read);
-    }
-
     console->info("Input data finished");
 
     INIReader reader(name_config);
@@ -336,6 +936,9 @@ int main(int argc, char *argv[]) {
     std::vector<std::unordered_map<int, std::vector<LOverlap *> > > idx_ab;
 
 
+
+
+
     for (int i = 0; i < n_read; i++) {
         //An initialisation for loop
         //TODO Preallocate memory. Much more efficient.
@@ -401,589 +1004,99 @@ int main(int argc, char *argv[]) {
 
     int num_contig = 0;
     int num_one_read_contig = 0;
-    while (true) {
-        if (edges_file.eof()) break;
-        edgelist.clear();
-        std::string edge_line;
-        while (!edges_file.eof()) {
-            std::getline(edges_file, edge_line);
-            //std::cout << edge_line << std::endl;
-
-            std::vector<std::string> tokens = split(edge_line, ' ');
-
-            if (tokens.size() == 1) {
-                break;
-            }
-            //std::cout << tokens.size() << std::endl;
-
-            Node node0;
-            Node node1;
-            int w;
-            if (tokens.size() > 5 ) {
-                node0.id = std::stoi(tokens[0]);
-                node0.strand = std::stoi(tokens[1]);
-
-                node1.id = std::stoi(tokens[2]);
-                node1.strand = std::stoi(tokens[3]);;
-
-                w = std::stoi(tokens[4]);
-                edgelist.push_back(std::make_tuple(node0, node1, w));
-            }
-
-
-            if (tokens.size() == 4) {
-                out_fa << ">OneReadContig" << num_one_read_contig << std::endl;
-
-
-
-                int node_id = std::stoi(tokens[0]);
-                int node_strand = std::stoi(tokens[1]);
-                int from = std::stoi(tokens[2]);
-                int to = std::stoi(tokens[3]);
-
-
-                std::string current_seq;
-
-
-                if (node_strand == 0) current_seq = reads[node_id]->bases;
-                else current_seq = reverse_complement(reads[node_id]->bases);
-
-                out_fa << current_seq.substr(from, to-from) << std::endl;
-
-                num_one_read_contig++;
-            }
-        }
-
-        std::cout << "list size:" << edgelist.size() << std::endl;
-        if (edgelist.size() == 0) continue;
-
-
-        std::vector<LAlignment *> full_alns;
-        std::vector<LAlignment *> selected;
-        std::unordered_map<int, std::vector<LAlignment *>> idx_aln;
-        la.resetAlignment();
-        std::vector<int> range;
-
-        for (int i = 0; i < edgelist.size(); i++) {
-            range.push_back(std::get<0>(edgelist[i]).id);
-            idx_aln[std::get<0>(edgelist[i]).id] = std::vector<LAlignment *>();
-        }
-
-        std::sort(range.begin(), range.end());
-
-        la.getAlignment(full_alns, range);
-
-        for (auto i:full_alns) {
-            idx_aln[i->read_A_id_].push_back(i);
-        }
-
-        for (int i = 0; i < edgelist.size(); i++) {
-            int aid = std::get<0>(edgelist[i]).id;
-            int bid = std::get<1>(edgelist[i]).id;
-            bool found = false;
-            for (int j = 0; j < idx_aln[std::get<0>(edgelist[i]).id].size(); j++) {
-                //printf("%d %d %d %d\n",bid, idx_aln[aid][j]->bid, idx_aln[aid][j]->read_A_match_end_ - idx_aln[aid][j]->read_A_match_start_, std::get<2>(edgelist[i]));
-                if ((idx_aln[aid][j]->read_B_id_ == bid) and \
-            (idx_aln[aid][j]->aepos - idx_aln[aid][j]->abpos + idx_aln[aid][j]->bepos - idx_aln[aid][j]->bbpos == std::get<2>(edgelist[i]))) {
-                    selected.push_back(idx_aln[aid][j]);
-                    found = true;
-                    break;
-                }
-                if (found) continue;
-            }
-        }
-
-        std::cout << "selected:" << selected.size() << std::endl;
-
-        std::unordered_map<int, std::unordered_map<int, std::pair<std::string, std::string> > > aln_tags_map;
-        std::vector<std::pair<std::string, std::string> > aln_tags_list;
-        std::vector<std::pair<std::string, std::string> > aln_tags_list_true_strand;
-
-
-        for (int i = 0; i < selected.size(); i++) {
-            la.recoverAlignment(selected[i]);
-            //printf("%d %d\n",selected[i]->tlen, selected[i]->trace_pts_len);
-            std::pair<std::string, std::string> res = la.getAlignmentTags(selected[i]);
-            aln_tags_map[selected[i]->read_A_id_][selected[i]->read_B_id_] = res;
-            aln_tags_list.push_back(res);
-        }
-
-
-
-        std::string sequence = "";
-
-        std::vector<LOverlap *> bedges;
-        std::vector<std::string> breads;
-
-        std::vector<std::vector<std::pair<int, int> > > pitfalls;
-
-
-        range.clear();
-        for (int i = 0; i < edgelist.size(); i++) {
-            range.push_back(std::get<0>(edgelist[i]).id);
-        }
-
-        std::vector<std::vector<int> *> coverages;
-
-        for (int i = 0; i < range.size(); i++) {
-            int aread = range[i];
-            if (idx3[aread].size() > 0) {
-                std::vector<int> *res = la.getCoverage(idx3[aread]);
-                std::vector<std::pair<int, int> > *res2 = la.lowCoverageRegions(*res, MIN_COV2);
-                //delete res;
-                coverages.push_back(res);
-                //printf("%d %d: (%d %d) ", i, aread, 0, idx3[aread][0]->alen);
-                //for (int j = 0; j < res2->size(); j++) {
-                //    printf("[%d %d] ", res2->at(j).first, res2->at(j).second);
-                //}
-                //printf("\n");
-                pitfalls.push_back(*res2);
-                delete res2;
-            }
-        }
-
-
-        /***
-         * Prepare the data
-         */
-
-        for (int i = 0; i < edgelist.size(); i++) {
-
-            std::vector<LOverlap *> currentalns = idx[std::get<0>(edgelist[i]).id][std::get<1>(edgelist[i]).id];
-
-            LOverlap *currentaln = NULL;
-
-            for (int j = 0; j < currentalns.size(); j++) {
-                //std::cout << std::get<0>(edgelist[i]).id << " " << std::get<1>(edgelist[i]).id << " " << currentalns[j]->match_type_ << std::endl;
-                if (currentalns[j]->read_A_match_end_ - currentalns[j]->read_A_match_start_ + currentalns[j]->read_B_match_end_ - currentalns[j]->read_B_match_start_ ==
-                    std::get<2>(edgelist[i]))
-                    currentaln = currentalns[j];
-            }
-
-            if (currentaln == NULL) exit(1);
-            //currentaln->show();
-
-            std::string current_seq;
-            std::string next_seq;
-
-            std::string aln_tags1;
-            std::string aln_tags2;
-
-
-            if (std::get<0>(edgelist[i]).strand == 0)
-                current_seq = reads[std::get<0>(edgelist[i]).id]->bases;
-            else
-                current_seq = reverse_complement(reads[std::get<0>(edgelist[i]).id]->bases);
-
-            if (std::get<0>(edgelist[i]).strand == 0) {
-                aln_tags1 = aln_tags_list[i].first;
-                aln_tags2 = aln_tags_list[i].second;
-            } else {
-                aln_tags1 = reverse_complement(aln_tags_list[i].first);
-                aln_tags2 = reverse_complement(aln_tags_list[i].second);
-            }
-
-            aln_tags_list_true_strand.push_back(std::pair<std::string, std::string>(aln_tags1, aln_tags2));
-
-            if (std::get<1>(edgelist[i]).strand == 0)
-                next_seq = reads[std::get<1>(edgelist[i]).id]->bases;
-            else
-                next_seq = reverse_complement(reads[std::get<1>(edgelist[i]).id]->bases);
-
-            int abpos, aepos, alen, bbpos, bepos, blen, aes, aee, bes, bee;
-
-            alen = currentaln->alen;
-            blen = currentaln->blen;
-
-
-            if (std::get<0>(edgelist[i]).strand == 0) {
-                abpos = currentaln->read_A_match_start_;
-                aepos = currentaln->read_A_match_end_;
-
-                aes = currentaln->eff_read_A_start_;
-                aee = currentaln->eff_read_A_end_;
-
-            } else {
-                abpos = alen - currentaln->read_A_match_end_;
-                aepos = alen - currentaln->read_A_match_start_;
-
-                aes = alen - currentaln->eff_read_A_end_;
-                aee = alen - currentaln->eff_read_A_start_;
-            }
-
-            if (((std::get<1>(edgelist[i]).strand == 0))) {
-                bbpos = currentaln->read_B_match_start_;
-                bepos = currentaln->read_B_match_end_;
-
-                bes = currentaln->eff_read_B_start_;
-                bee = currentaln->eff_read_B_end_;
-
-            } else {
-                bbpos = blen - currentaln->read_B_match_end_;
-                bepos = blen - currentaln->read_B_match_start_;
-
-                bes = blen - currentaln->eff_read_B_end_;
-                bee = blen - currentaln->eff_read_B_start_;
-
-            }
-            aes = 0;
-            bes = 0;
-            aee = alen;
-            bee = blen;
-
-//            printf("%d %d [[%d %d] << [%d %d]] x [[%d %d] << [%d %d]]\n", std::get<0>(edgelist[i]).id, std::get<1>(edgelist[i]).id, abpos, aepos, aes, aee, bbpos, bepos, bes, bee);
-
-            LOverlap *new_ovl = new LOverlap();
-            new_ovl->read_A_match_start_ = abpos;
-            new_ovl->read_A_match_end_ = aepos;
-            new_ovl->read_B_match_start_ = bbpos;
-            new_ovl->read_B_match_end_ = bepos;
-            new_ovl->eff_read_A_end_ = aee;
-            new_ovl->eff_read_A_start_ = aes;
-            new_ovl->eff_read_B_end_ = bee;
-            new_ovl->eff_read_B_start_ = bes;
-            new_ovl->alen = currentaln->alen;
-            new_ovl->blen = currentaln->blen;
-            new_ovl->read_A_id_ = std::get<0>(edgelist[i]).id;
-            new_ovl->read_B_id_ = std::get<1>(edgelist[i]).id;
-
-
-            bedges.push_back(new_ovl);
-            breads.push_back(current_seq);
-
-
-        }
-        //need to trim the end
-
-
-
-        std::vector<std::vector<int> > mappings;
-        for (int i = 0; i < range.size(); i++) {
-            mappings.push_back(get_mapping(aln_tags_list_true_strand[i].first, aln_tags_list_true_strand[i].second));
-        }
-
-        std::cout << bedges.size() << " " << breads.size() << " " << selected.size() << " "
-        << aln_tags_list.size() << " " << pitfalls.size() << " " << aln_tags_list_true_strand.size()
-        << " " << mappings.size() << " " << coverages.size() << std::endl;
-
-        /*for (int i = 0; i < bedges.size() - 1; i++) {
-            printf("%d %d %d %d %d\n", bedges[i]->read_B_match_start_, bedges[i]->read_B_match_end_, bedges[i+1]->read_A_match_start_, bedges[i+1]->read_A_match_end_, bedges[i]->read_B_match_end_ - bedges[i+1]->read_A_match_start_);
-        }*/
-
-
-        int tspace = TSPACE; // set lane length to be 500
-        int nlane = 0;
-
-
-        //printf("%d %d\n", mappings[0][800], mappings[0][1000]); // debug output
-        //printf("%s\n%s\n", breads[0].substr(bedges[0]->read_A_match_start_ + 800, 50).c_str(),
-        //       breads[1].substr(bedges[0]->read_B_match_start_ + mappings[0][800], 50).c_str()); //debug output
-
-
-        std::vector<std::vector<std::pair<int, int>>> lanes;
-
-        std::string draft_assembly = "";
-
-
-        int currentlane = 0;
-        int current_starting_read = 0;
-        int current_starting_space = 1;
-        int current_starting_offset = 0;
-        int n_bb_reads = range.size();
-        std::vector<std::vector<int>> trace_pts(n_bb_reads);
-        bool revert = false;
-
-
-        int rmax = -1;
-        /**
-         * Move forward and put "trace points"
-         */
-        while (current_starting_read < n_bb_reads - 1) {
-            int currentread = current_starting_read;
-            int additional_offset = 0;
-            while (bedges[current_starting_read]->read_A_match_start_ + current_starting_space * tspace +
-                   current_starting_offset + additional_offset <
-                   bedges[current_starting_read]->read_A_match_end_ - EDGE_SAFE) {
-                int waypoint = bedges[current_starting_read]->read_A_match_start_ + tspace * current_starting_space +
-                               current_starting_offset + additional_offset;
-                //if ((waypoint - bedges[current_starting_read]->read_A_match_start_) < EDGE_SAFE)
-                //    waypoint += EDGE_SAFE;
-
-                //int next_waypoint = mappings[currentread][waypoint - bedges[current_starting_read]->read_A_match_start_] + bedges[current_starting_read]->read_B_match_start_;
-                std::vector<std::pair<int, int> > lane;
-
-                while ((waypoint > bedges[currentread]->read_A_match_start_) and
-                       (waypoint < bedges[currentread]->read_A_match_end_)) {
-
-                    printf("%d %d\n", currentread, waypoint);
-                    trace_pts[currentread].push_back(waypoint);
-
-
-                    /*if (waypoint > bedges[currentread]->read_A_match_end_ - EDGE_SAFE) {
-                        printf("Reaching the end, neglect low coverage\n");
-                    }
-
-                    if ((coverages[currentread]->at(waypoint) < MIN_COV2) and (waypoint < bedges[currentread]->read_A_match_end_ - EDGE_SAFE)) {
-                        revert = true;
-                        printf("Low coverage, revert\n");
-                        break;
-                    }*/
-
+    std::string current_name;
+    std::string edge_line;
+    std::string contig;
+    bool one_read_contig = false;
+    bool two_read_contig = false;
+    int cut_start = 0, cut_end = 0;
+
+    while (!edges_file.eof()) {
+        std::getline(edges_file, edge_line);
+        std::cout << edge_line << std::endl;
+        if (edge_line.size() == 0) continue;
+        if (edge_line[0] == '>') continue;
+        std::vector<std::string> tokens = split(edge_line, ' ');
+        if (tokens.size() < 6) std::cout << "Error! Wrong format." << std::endl;
+
+        Node node0;
+        Node node1;
+        node0.id = std::stoi(tokens[1]);
+        node1.id = std::stoi(tokens[3]);
+    }
 
-                    lane.push_back(std::pair<int, int>(currentread, waypoint));
-                    if (currentread > rmax) rmax = currentread;
-                    //int previous_wp = waypoint;
-                    waypoint = mappings[currentread][waypoint - bedges[currentread]->read_A_match_start_] +
-                               bedges[currentread]->read_B_match_start_;
-                    //printf("%s\n%s\n", breads[currentread].substr(previous_wp,50).c_str(), breads[currentread+1].substr(waypoint,50).c_str());
-                    currentread++;
-                    if (currentread >= n_bb_reads) break;
-                }
-                if (currentread < n_bb_reads) if (waypoint < bedges[currentread]->alen) {
-                    lane.push_back(std::pair<int, int>(currentread, waypoint));
-                    if (currentread > rmax) rmax = currentread;
-                }
-                /*if (revert) {
-                    printf("revert\n");
-                    revert = false;
-                    while (currentread >= current_starting_read) {
-                        trace_pts[currentread].pop_back();
-                        currentread --;
-                        additional_offset += STEP;
-                    }
-                    currentread = current_starting_read;
-                }
-                else*/
-                {
-                    if (currentread >= rmax)
-                        lanes.push_back(lane);
-                    current_starting_space++;
-                    currentread = current_starting_read;
+    edges_file.clear();
+    edges_file.seekg(0, std::ios::beg);
 
-                }
+    while (!edges_file.eof()) {
+        std::getline(edges_file, edge_line);
+        if (edge_line[0] == '>') {
+            std::cout << current_name << std::endl;
 
+            if (edgelist.size() > 0)
+            {
+                draft_assembly_ctg(edgelist, la, full_aln, idx3, idx, reads, TSPACE, EDGE_SAFE, MIN_COV2, cut_start, cut_end, one_read_contig, two_read_contig, contig);
+                out_fa << current_name << std::endl;
+                out_fa << contig << std::endl;
             }
-
-            current_starting_read++;
-            current_starting_space = 1;//get next space;
-            if (trace_pts[current_starting_read].size() == 0)
-                current_starting_offset = 0;
-            else
-                current_starting_offset =
-                        trace_pts[current_starting_read].back() - bedges[current_starting_read]->read_A_match_start_;
-        }
-
-
-        /**
-         * Show trace points on reads
-         */
-        for (int i = 0; i < n_bb_reads; i++) {
-            printf("Read %d:", i);
-            for (int j = 0; j < trace_pts[i].size(); j++) {
-                printf("%d ", trace_pts[i][j]);
-            }
-            printf("\n");
+            edgelist.clear();
+            current_name = edge_line;
+            one_read_contig = false;
+            two_read_contig = false;
+            cut_start = 0;
+            cut_end = 0;
+            continue;
         }
 
-        /**
-         * Show lanes
-         */
-
-        for (int i = 0; i < lanes.size(); i++) {
-
-            printf("Lane %d\n", i);
-            for (int j = 0; j < lanes[i].size(); j++) {
-                printf("[%d %d] ", lanes[i][j].first, lanes[i][j].second);
-            }
-            printf("\n");
-        }
+        if (edges_file.eof()) {
+            // process edges list
+            std::cout << current_name << std::endl;
 
+            draft_assembly_ctg(edgelist, la, full_aln, idx3, idx, reads, TSPACE, EDGE_SAFE, MIN_COV2, cut_start, cut_end, one_read_contig, two_read_contig, contig);
+            out_fa << current_name << std::endl;
+            out_fa << contig << std::endl;
 
-        printf("In total %d lanes\n", lanes.size());
-        if (lanes.size() == 0) {
-            draft_assembly = breads[0];
-            out_fa << ">DraftAssemblyContig" << num_contig << std::endl;
-            out_fa << draft_assembly << std::endl;
-            num_contig++;
+            edgelist.clear();
+            one_read_contig = false;
+            two_read_contig = false;
             continue;
         }
 
-
-
-        /**
-         * Consequtive lanes form a column (ladder)
-         */
-
-        std::vector<std::vector<std::tuple<int, int, int> > > ladders;
-
-        for (int i = 0; i < lanes.size() - 1; i++) {
-            std::vector<std::pair<int, int> > lane1 = lanes[i];
-            std::vector<std::pair<int, int> > lane2 = lanes[i + 1];
-            std::vector<std::tuple<int, int, int> > ladder;
-            int pos = 0;
-            for (int j = 0; j < lane2.size(); j++) {
-                while ((lane1[pos].first != lane2[j].first) and (pos < lane1.size() - 1)) pos++;
-                if ((lane1[pos].first == lane2[j].first))
-                    ladder.push_back(std::make_tuple(lane2[j].first, lane1[pos].second, lane2[j].second));
-            }
-            ladders.push_back(ladder);
+        std::vector<std::string> tokens = split(edge_line, ' ');
+        if (tokens.size() < 6) std::cout << "Error! Wrong format." << std::endl;
+        std::cout << edge_line << std::endl;
+
+        Node node0;
+        Node node1;
+        int w;
+        node0.id = std::stoi(tokens[1]);
+        node0.strand = std::stoi(tokens[2]);
+
+        node1.id = std::stoi(tokens[3]);
+        node1.strand = std::stoi(tokens[4]);;
+
+        if (tokens[0] == "O") {
+            w = 0;
+            one_read_contig = true;
+        } else if (tokens[0] == "D") {
+            w = std::stoi(tokens[5]);
+            two_read_contig = true;
         }
-
-
-        /**
-         * show ladders
-         */
-        for (int i = 0; i < ladders.size(); i++) {
-//            printf("Ladder %d\n", i);
-//            for (int j = 0; j < ladders[i].size(); j++) {
-//                //printf("[%d %d-%d] ", std::get<0>(ladders[i][j]), std::get<1>(ladders[i][j]), std::get<2>(ladders[i][j]) );
-//                //printf("%s\n", breads[std::get<0>(ladders[i][j])].substr(std::get<1>(ladders[i][j]),std::get<2>(ladders[i][j])-std::get<1>(ladders[i][j])).c_str());
-//
-//            }
-
-            if (ladders[i].size() == 0) {
-                printf("low coverage!\n");
-                continue;
-            }
-
-            if (ladders[i].size() > 1) {
-
-
-                int mx = 0;
-                int maxcoverage = 0;
-                for (int j = 0; j < ladders[i].size(); j++) {
-                    int mincoverage = 10000;
-                    int read = std::get<0>(ladders[i][j]);
-                    int start = std::get<1>(ladders[i][j]);
-                    int end = std::get<2>(ladders[i][j]);
-                    for (int pos = start; pos < end; pos++) {
-                        if (coverages[read]->at(pos) < mincoverage) mincoverage = coverages[read]->at(pos);
-                    }
-                    if (mincoverage > maxcoverage) {
-                        maxcoverage = mincoverage;
-                        mx = j;
-                    }
-                }
-
-//                std::cout << "ladder " << i << " num reads " << ladders[i].size() << " possibly error here " <<
-//                maxcoverage << "\n!";
-
-
-                //if (ladders[i].size() == 2) {
-                //    draft_assembly += breads[std::get<0>(ladders[i][mx])].substr(std::get<1>(ladders[i][mx]),
-                //                                                                 std::get<2>(ladders[i][mx]) -
-                //                                                                 std::get<1>(ladders[i][mx]));
-                //    continue;
-               // }
-
-
-                std::string base = breads[std::get<0>(ladders[i][mx])].substr(std::get<1>(ladders[i][mx]),
-                                                                              std::get<2>(ladders[i][mx]) -
-                                                                              std::get<1>(ladders[i][mx]));;
-                int seq_count = ladders[i].size();
-//                printf("seq_count:%d, max %d\n", seq_count, mx);
-                align_tags_t **tags_list;
-                tags_list = (align_tags_t **) calloc(seq_count, sizeof(align_tags_t *));
-                consensus_data *consensus;
-
-                int alen = (std::get<2>(ladders[i][mx]) - std::get<1>(ladders[i][mx]));
-                for (int j = 0; j < ladders[i].size(); j++) {
-
-                    int blen = (std::get<2>(ladders[i][j]) - std::get<1>(ladders[i][j]));
-                    char *aseq = (char *) malloc(
-                            (20 + (std::get<2>(ladders[i][mx]) - std::get<1>(ladders[i][mx]))) * sizeof(char));
-                    char *bseq = (char *) malloc(
-                            (20 + (std::get<2>(ladders[i][j]) - std::get<1>(ladders[i][j]))) * sizeof(char));
-                    strcpy(aseq, breads[std::get<0>(ladders[i][mx])].substr(std::get<1>(ladders[i][mx]),
-                                                                            std::get<2>(ladders[i][mx]) -
-                                                                            std::get<1>(ladders[i][mx])).c_str());
-                    strcpy(bseq, breads[std::get<0>(ladders[i][j])].substr(std::get<1>(ladders[i][j]),
-                                                                           std::get<2>(ladders[i][j]) -
-                                                                           std::get<1>(ladders[i][j])).c_str());
-
-
-                    aln_range *arange = (aln_range *) calloc(1, sizeof(aln_range));
-                    arange->s1 = 0;
-                    arange->e1 = strlen(bseq);
-                    arange->s2 = 0;
-                    arange->e2 = strlen(aseq);
-                    arange->score = 5;
-
-                    //printf("blen %d alen%d\n",strlen(bseq), strlen(aseq));
-                    //printf("before get tags\n");
-
-                    alignment *alng = _align(bseq, blen, aseq, alen, 150, 1);
-
-                    char *q_aln_str = (char *) malloc((5 + strlen(alng->q_aln_str)) * sizeof(char));
-                    char *t_aln_str = (char *) malloc((5 + strlen(alng->t_aln_str)) * sizeof(char));
-
-
-                    strcpy(q_aln_str + 1, alng->q_aln_str);
-                    strcpy(t_aln_str + 1, alng->t_aln_str);
-                    q_aln_str[0] = 'T';
-                    t_aln_str[0] = 'T';
-
-
-                    for (int pos = 0; pos < strlen(q_aln_str); pos++) q_aln_str[pos] = toupper(q_aln_str[pos]);
-                    for (int pos = 0; pos < strlen(t_aln_str); pos++) t_aln_str[pos] = toupper(t_aln_str[pos]);
-
-                    //printf("Q:%s\nT:%s\n", q_aln_str, t_aln_str);
-
-                    tags_list[j] = get_align_tags(q_aln_str,
-                                                  t_aln_str,
-                                                  strlen(alng->q_aln_str) + 1,
-                                                  arange, (unsigned int) j, 0);
-                    //free(aseq);
-                    //free(bseq);
-
-                    /*for (int k = 0; k < tags_list[j]->len; k++) {
-                        printf("%d %d %ld %d %c %c\n",j, k, tags_list[j]->align_tags[k].t_pos,
-                               tags_list[j]->align_tags[k].delta,
-                                //tags_list[j]->align_tags[k].p_q_base,
-                               aseq[tags_list[j]->align_tags[k].t_pos],
-                               tags_list[j]->align_tags[k].q_base);
-                    }*/
-                    free(q_aln_str);
-                    free(t_aln_str);
-                    free(aseq);
-                    free(bseq);
-                    free_alignment(alng);
-
-                }
-
-                //printf("%d %d\n%s\n",seq_count, strlen(seq), seq);
-
-                consensus = get_cns_from_align_tags(tags_list, seq_count, alen + 1, 1);
-//                printf("Consensus len :%d\n",strlen(consensus->sequence));
-                draft_assembly += std::string(consensus->sequence);
-
-                free_consensus_data(consensus);
-                for (int j = 0; j < seq_count; j++)
-                    free_align_tags(tags_list[j]);
-
-            } else {
-                draft_assembly += breads[std::get<0>(ladders[i][0])].substr(std::get<1>(ladders[i][0]),
-                                                                            std::get<2>(ladders[i][0]) -
-                                                                            std::get<1>(ladders[i][0]));
-            }
-
-//            printf("\n");
+        else w = std::stoi(tokens[5]);
+
+        edgelist.push_back(std::make_tuple(node0, node1, w));
+
+        if (tokens[0] == "O") {
+             cut_start = std::stoi(tokens[5]);
+             cut_end = std::stoi(tokens[6]);
+        } else if (tokens[0] == "S") {
+            cut_start = std::stoi(tokens[6]);
+        } else if (tokens[0] == "E") {
+            cut_end = std::stoi(tokens[6]);
+        } else if (tokens[0] == "D") {
+             cut_start = std::stoi(tokens[6]);
+             cut_end = std::stoi(tokens[7]);
         }
-
-
-
-        /*for (int i = 0; i < mapping.size(); i++)
-            printf("%d %d\n", i, mapping[i]);
-        printf("[%d %d], [%d %d]\n", bedges[0]->read_A_match_start_, bedges[0]->read_A_match_end_, bedges[0]->read_B_match_start_, bedges[0]->read_B_match_end_);*/
-
-        std::cout << sequence.size() << std::endl;
-        std::cout << draft_assembly.size() << std::endl;
-
-
-        out_fa << ">Draft_assembly" << num_contig << std::endl;
-        out_fa << draft_assembly << std::endl;
-        num_contig++;
-
     }
 
     if (strlen(name_db) > 0)
diff --git a/src/consensus/draft_chopper.cpp b/src/consensus/draft_chopper.cpp
index 1ce4a30..f27a6e6 100644
--- a/src/consensus/draft_chopper.cpp
+++ b/src/consensus/draft_chopper.cpp
@@ -605,30 +605,30 @@ int main(int argc, char *argv[]) {
                 abpos = currentaln->read_A_match_start_;
                 aepos = currentaln->read_A_match_end_;
 
-                aes = currentaln->eff_read_A_start_;
-                aee = currentaln->eff_read_A_end_;
+                aes = currentaln->eff_read_A_read_start_;
+                aee = currentaln->eff_read_A_read_end_;
 
             } else {
                 abpos = alen - currentaln->read_A_match_end_;
                 aepos = alen - currentaln->read_A_match_start_;
 
-                aes = alen - currentaln->eff_read_A_end_;
-                aee = alen - currentaln->eff_read_A_start_;
+                aes = alen - currentaln->eff_read_A_read_end_;
+                aee = alen - currentaln->eff_read_A_read_start_;
             }
 
             if (((std::get<1>(edgelist[i]).strand == 0))) {
                 bbpos = currentaln->read_B_match_start_;
                 bepos = currentaln->read_B_match_end_;
 
-                bes = currentaln->eff_read_B_start_;
-                bee = currentaln->eff_read_B_end_;
+                bes = currentaln->eff_read_B_read_start_;
+                bee = currentaln->eff_read_B_read_end_;
 
             } else {
                 bbpos = blen - currentaln->read_B_match_end_;
                 bepos = blen - currentaln->read_B_match_start_;
 
-                bes = blen - currentaln->eff_read_B_end_;
-                bee = blen - currentaln->eff_read_B_start_;
+                bes = blen - currentaln->eff_read_B_read_end_;
+                bee = blen - currentaln->eff_read_B_read_start_;
 
             }
             aes = 0;
@@ -643,10 +643,10 @@ int main(int argc, char *argv[]) {
             new_ovl->read_A_match_end_ = aepos;
             new_ovl->read_B_match_start_ = bbpos;
             new_ovl->read_B_match_end_ = bepos;
-            new_ovl->eff_read_A_end_ = aee;
-            new_ovl->eff_read_A_start_ = aes;
-            new_ovl->eff_read_B_end_ = bee;
-            new_ovl->eff_read_B_start_ = bes;
+            new_ovl->eff_read_A_read_end_ = aee;
+            new_ovl->eff_read_A_read_start_ = aes;
+            new_ovl->eff_read_B_read_end_ = bee;
+            new_ovl->eff_read_B_read_start_ = bes;
             new_ovl->alen = currentaln->alen;
             new_ovl->blen = currentaln->blen;
             new_ovl->read_A_id_ = std::get<0>(edgelist[i]).id;
diff --git a/src/filter/CMakeLists.txt b/src/filter/CMakeLists.txt
index 9829e6a..d98dff2 100644
--- a/src/filter/CMakeLists.txt
+++ b/src/filter/CMakeLists.txt
@@ -2,3 +2,5 @@ cmake_minimum_required(VERSION 3.2)
 
 add_executable(Reads_filter filter)
 target_link_libraries(Reads_filter LAInterface ini spdlog)
+
+install(TARGETS Reads_filter DESTINATION ${libexec})
diff --git a/src/filter/filter.cpp b/src/filter/filter.cpp
index 02bc4e8..eecc1e7 100644
--- a/src/filter/filter.cpp
+++ b/src/filter/filter.cpp
@@ -25,97 +25,41 @@
 #include "LAInterface.h"
 #include "cmdline.h"
 
-
-#define LAST_READ_SYMBOL  '$'
-
-
-typedef std::tuple<Node, Node, int> Edge_w; //Edge with weight
-typedef std::pair<Node, Node> Edge_nw; //Edge without weights
-
+std::string lastN(std::string input, int n)
+    {
+    return input.substr(input.size() - n);
+    }
 
 inline std::vector<std::string> glob(const std::string& pat){
     using namespace std;
     glob_t glob_result;
-    glob(pat.c_str(),GLOB_TILDE,NULL,&glob_result);
+    int i = 1;
+    std::string search_name;
+    search_name = pat + "."+std::to_string(i)+".las";
+    std::cout << search_name << endl;
+    glob(search_name.c_str(),GLOB_TILDE,NULL,&glob_result);
+//    std::cout << "Number of files " << glob_result.gl_pathc << std::endl;
+
     vector<string> ret;
-    for(unsigned int i=0;i<glob_result.gl_pathc;++i){
-        ret.push_back(string(glob_result.gl_pathv[i]));
-    }
-    globfree(&glob_result);
-    return ret;
-}
 
-static int ORDER(const void *l, const void *r) {
-    //Returns the difference between l and r. Why void pointer?
-    int x = *((int32 *) l);
-    int y = *((int32 *) r);
-    return (x - y);
-}
 
-std::ostream& operator<<(std::ostream& out, const MatchType value){
-    //What is this doing?
-    static std::map<MatchType, std::string> strings;
-    if (strings.size() == 0){
-#define INSERT_ELEMENT(p) strings[p] = #p
-        INSERT_ELEMENT(FORWARD);
-        INSERT_ELEMENT(BACKWARD);
-        INSERT_ELEMENT(MISMATCH_LEFT);
-        INSERT_ELEMENT(MISMATCH_RIGHT);
-        INSERT_ELEMENT(COVERED);
-        INSERT_ELEMENT(COVERING);
-        INSERT_ELEMENT(UNDEFINED);
-        INSERT_ELEMENT(MIDDLE);
-#undef INSERT_ELEMENT
+    while (glob_result.gl_pathc != 0){
+        ret.push_back(string(glob_result.gl_pathv[0]));
+        i ++;
+        search_name = pat + "."+std::to_string(i)+".las";
+        glob(search_name.c_str(),GLOB_TILDE,NULL,&glob_result);
+//        std::cout << "Number of files " << glob_result.gl_pathc << std::endl;
     }
 
-    return out << strings[value];
-}
-
-bool pairAscend(const std::pair<int, int>& firstElem,  const std::pair<int, int>& secondElem) {
-    return firstElem.first < secondElem.first;
-}
-
-bool pairDescend(const std::pair<int, int>& firstElem,  const std::pair<int, int>& secondElem) {
-    return firstElem.first > secondElem.first;
-}
-
+    std::cout << "-------------------------"<< std::endl;
+    std::cout << "Number of files " << i-1 << std::endl;
+    std::cout << "Input string " << pat.c_str() << std::endl;
+    std::cout << "-------------------------"<< std::endl;
 
-bool compare_overlap(LOverlap * ovl1, LOverlap * ovl2) {
-    //Returns True if the sum of the match lengths of the two reads in ovl1 > the sum of the  overlap lengths of the two reads in ovl2
-    //Returns False otherwise.
-    return ((ovl1->read_A_match_end_ - ovl1->read_A_match_start_ + ovl1->read_B_match_end_ - ovl1->read_B_match_start_)
-            > (ovl2->read_A_match_end_ - ovl2->read_A_match_start_ + ovl2->read_B_match_end_ - ovl2->read_B_match_start_));
-}
-
-bool compare_sum_overlaps(const std::vector<LOverlap * > * ovl1, const std::vector<LOverlap *> * ovl2) {
-    //Returns True if the sum of matches over both reads for overlaps in ovl1  > sum of matches over both reads for overlaps in ovl2
-    //Returns False otherwise
-    int sum1 = 0;
-    int sum2 = 0;
-    for (int i = 0; i < ovl1->size(); i++)
-        sum1 += (*ovl1)[i]->read_A_match_end_ - (*ovl1)[i]->read_A_match_start_ +
-                (*ovl1)[i]->read_B_match_end_ - (*ovl1)[i]->read_B_match_start_;
-    for (int i = 0; i < ovl2->size(); i++)
-        sum2 += (*ovl2)[i]->read_A_match_end_ - (*ovl2)[i]->read_A_match_start_ +
-                (*ovl2)[i]->read_B_match_end_ - (*ovl2)[i]->read_B_match_start_;
-    return sum1 > sum2;
-}
-
-bool compare_pos(LOverlap * ovl1, LOverlap * ovl2) {
-    //True if ovl1 starts earlier than ovl2 on read a.
-    return (ovl1->read_A_match_start_) > (ovl2->read_A_match_start_);
-}
-
-bool compare_overlap_abpos(LOverlap * ovl1, LOverlap * ovl2) {
-    //True if ovl2 starts earlier than ovl1 on read a.
-    //flips the two argumenst in compare_pos
-    return ovl1->read_A_match_start_ < ovl2->read_A_match_start_;
+    globfree(&glob_result);
+    return ret;
 }
 
-bool compare_overlap_aepos(LOverlap * ovl1, LOverlap * ovl2) {
-    //Same as compare_pos?
-    return ovl1->read_A_match_start_ > ovl2->read_A_match_start_;
-}
 
 std::vector<std::pair<int,int>> Merge(std::vector<LOverlap *> & intervals, int cutoff)
 //Returns sections of read a which are covered by overlaps. Each overlap is considered as
@@ -232,18 +176,33 @@ int main(int argc, char *argv[]) {
     cmdp.add<std::string>("prefix", 'x', "prefix of (intermediate) output", false, "out");
     cmdp.add<std::string>("restrictreads",'r',"restrict to reads in the file",false,"");
     cmdp.add<std::string>("log", 'g', "log folder name", false, "log");
+    cmdp.add("mlas", '\0', "multiple las files");
     cmdp.add("debug", '\0', "debug mode");
     cmdp.parse_check(argc, argv);
 
     LAInterface la;
     const char * name_db = cmdp.get<std::string>("db").c_str(); //.db file of reads to load
-    const char * name_las = cmdp.get<std::string>("las").c_str();//.las file of alignments
+    const char * name_las_base = cmdp.get<std::string>("las").c_str();//.las file of alignments
     const char * name_paf = cmdp.get<std::string>("paf").c_str();
     const char * name_fasta = cmdp.get<std::string>("fasta").c_str();
     const char * name_config = cmdp.get<std::string>("config").c_str();//name of the configuration file, in INI format
     std::string out = cmdp.get<std::string>("prefix");
     bool has_qv = true;
     const char * name_restrict = cmdp.get<std::string>("restrictreads").c_str();
+
+    std::string name_las_string;
+    if (cmdp.exist("mlas"))
+        name_las_string =  std::string(name_las_base);
+    else {
+        if (lastN(std::string(name_las_base), 4) == ".las")
+            name_las_string = std::string(name_las_base);
+        else
+            name_las_string = std::string(name_las_base) + ".las";
+    }
+
+
+    const char * name_las = name_las_string.c_str();
+
     /**
      * There are two sets of input, the first is db+las, which corresponds to daligner as an overlapper,
      * the other is fasta + paf, which corresponds to minimap as an overlapper.
@@ -263,12 +222,6 @@ int main(int argc, char *argv[]) {
 
     console->info("Reads filtering");
 
-    if (cmdp.exist("debug")) {
-        char *buff = (char *) malloc(sizeof(char) * 2000);
-        getwd(buff);
-        console->info("current user {}, current working directory {}", getlogin(), buff);
-        free(buff);
-    }
 
     console->info("name of db: {}, name of .las file {}", name_db, name_las);
     console->info("name of fasta: {}, name of .paf file {}", name_fasta, name_paf);
@@ -283,10 +236,14 @@ int main(int argc, char *argv[]) {
     if (strlen(name_db) > 0)
         la.openDB(name_db);
 
+
     std::vector<std::string> name_las_list;
     std::string name_las_str(name_las);
-    if (name_las_str.find('*') != -1)
+    console->info("Las files: {}", name_las_str);
+    if (cmdp.exist("mlas")) {
+        console->info("Calling glob.");
         name_las_list = glob(name_las_str);
+    }
     else
         name_las_list.push_back(name_las_str);
 
@@ -314,7 +271,8 @@ int main(int argc, char *argv[]) {
 
     if (strlen(name_db) > 0) {
         la.getRead(reads,0,n_read);
-        la.getQV(QV,0,n_read); // load QV track from .db file
+        if (la.getQV(QV,0,n_read) != 0) // load QV track from .db file
+            has_qv = false;
     }
 
 
@@ -402,8 +360,8 @@ int main(int argc, char *argv[]) {
     const int MIN_REPEAT_ANNOTATION_THRESHOLD = (int) reader.GetInteger("filter", "min_repeat_annotation_threshold", 10);
     const int MAX_REPEAT_ANNOTATION_THRESHOLD = (int) reader.GetInteger("filter", "max_repeat_annotation_threshold", 20);
     const int REPEAT_ANNOTATION_GAP_THRESHOLD = (int) reader.GetInteger("filter", "repeat_annotation_gap_threshold",300);
-    const int NO_HINGE_REGION = (int) reader.GetInteger("filter", "no_hinge_region",500);
     //How far two hinges of the same type can be
+    const int NO_HINGE_REGION = (int) reader.GetInteger("filter", "no_hinge_region",500);
     const int HINGE_MIN_SUPPORT = (int) reader.GetInteger("filter", "hinge_min_support", 7);
     //Minimum number of reads that have to start in a reso length interval to be considered in hinge calling
     const int HINGE_BIN_PILEUP_THRESHOLD = (int) reader.GetInteger("filter", "hinge_min_pileup", 7);
@@ -415,6 +373,7 @@ int main(int argc, char *argv[]) {
     const int HINGE_TOLERANCE_LENGTH = (int) reader.GetInteger("filter", "hinge_tolerance_length", 100);
     //Reads starting at +/- HINGE_TOLERANCE_LENGTH are considered reads starting at hinges
     HINGE_BIN_LENGTH=2*HINGE_TOLERANCE_LENGTH;
+    bool delete_telomere = (int) reader.GetInteger("layout", "del_telomere", 0);
 
     console->info("use_qv_mask set to {}",use_qv_mask);
     use_qv_mask = use_qv_mask and has_qv;
@@ -463,6 +422,9 @@ int main(int argc, char *argv[]) {
     std::ofstream filtered(out + ".filtered.fasta");
     std::ofstream hg(out + ".hinges.txt");
     std::ofstream mask(out + ".mas");
+    std::ofstream comask(out + ".cmas");
+    std::ofstream covflag(out + ".cov.flag");
+    std::ofstream selfflag(out + ".self.flag");
 
     for (int part = 0; part < name_las_list.size(); part++) {
 
@@ -484,7 +446,7 @@ int main(int argc, char *argv[]) {
 
         if (strlen(name_las) > 0) {
             la.resetAlignment();
-            la.getOverlap(aln, 0, n_aln);
+            la.getOverlap(aln, 0, n_read);
         }
 
         if (strlen(name_paf) > 0) {
@@ -501,14 +463,19 @@ int main(int argc, char *argv[]) {
         console->info("Input data finished, part {}/{}", part + 1, name_las_list.size());
 
 
+        console->info("length of alignments {}", aln.size());
+        //if (aln.size() == 0) continue;
 
         int r_begin = aln.front()->read_A_id_;
         int r_end = aln.back()->read_A_id_;
 
+        console->info("begin {} end {}", r_begin, r_end);
+
 
         std::vector<std::vector <LOverlap * > > idx_pileup; // this is the pileup
         std::vector<std::vector <LOverlap * > > idx_pileup_dedup; // this is the deduplicated pileup
         std::vector<std::unordered_map<int, std::vector<LOverlap *> > > idx_ab; //unordered_map from (aid, bid) to alignments in a vector
+        std::unordered_map<int, std::vector<std::pair<int, int> > > self_aln_list;
 
 
 
@@ -521,32 +488,43 @@ int main(int argc, char *argv[]) {
         }
 
         for (int i = 0; i < aln.size(); i++) {
+            if (aln[i]->read_A_id_ == aln[i]->read_B_id_) {
+                aln[i]->active = false;
+                if (self_aln_list.find(aln[i]->read_A_id_) == self_aln_list.end())
+                    self_aln_list[aln[i]->read_A_id_] = std::vector<std::pair<int, int>>();
+                self_aln_list[aln[i]->read_A_id_].push_back(std::pair<int, int>(aln[i]->read_A_match_start_, aln[i]->read_A_match_end_));
+                self_aln_list[aln[i]->read_A_id_].push_back(std::pair<int, int>(aln[i]->read_B_match_start_, aln[i]->read_B_match_end_));
+            }
             if (aln[i]->active) {
                 idx_pileup[aln[i]->read_A_id_].push_back(aln[i]);
             }
         }
 
+        std::set<int> self_match_reads;
+        for (auto it : self_aln_list) {
+            float cov = 0.0;
+            for (int i = 0; i < it.second.size(); i++)
+                cov += it.second[i].second - it.second[i].first;
+            cov /= float(reads[it.first]->len);
+            std::cout << "selfcov: " <<  it.first << " " << cov << " " << reads[it.first]->len << std::endl;
+            if ((cov > 4.5) and (reads[it.first]->len > 10000))
+                self_match_reads.insert(it.first);
+        }
 
 
 
-# pragma omp parallel for
         for (int i = 0; i < n_read; i++) {// sort overlaps of a reads
             std::sort(idx_pileup[i].begin(), idx_pileup[i].end(), compare_overlap);
         }
 
-# pragma omp parallel for
         for (int i = 0; i < aln.size(); i++) {
             idx_ab[aln[i]->read_A_id_][aln[i]->read_B_id_] = std::vector<LOverlap *>();
         }
 
-# pragma omp parallel for
         for (int i = 0; i < aln.size(); i++) {
             idx_ab[aln[i]->read_A_id_][aln[i]->read_B_id_].push_back(aln[i]);
         }
 
-
-
-# pragma omp parallel for
         for (int i = 0; i < n_read; i++) {
             for (std::unordered_map<int, std::vector<LOverlap *> >::iterator it = idx_ab[i].begin(); it!= idx_ab[i].end(); it++) {
                 std::sort(it->second.begin(), it->second.end(), compare_overlap);
@@ -666,14 +644,17 @@ int main(int argc, char *argv[]) {
                 cutoff_coverages[i][j].second -= MIN_COV;
                 if (cutoff_coverages[i][j].second < 0) cutoff_coverages[i][j].second = 0;
             }
-
+//            std::cout << "in here " << i << std::endl;
             //get the longest consecutive region that has decent coverage, decent coverage = estimated coverage / 3
             int start = 0;
             int end = start;
             int maxlen = 0, maxstart = 0, maxend = 0;
+            int start_coord = 0, end_coord = 0;
+            int max_start_coord = 0, max_end_coord = 0;
             for (int j = 0; j < cutoff_coverages[i].size(); j++) {
                 if (cutoff_coverages[i][j].second > 0) {
                     end = cutoff_coverages[i][j].first;
+                    end_coord = j;
                 } else {
                     if (end > start) {
                         //std::cout<<"read" << i << " "<<start+reso << "->" << end << std::endl;
@@ -681,26 +662,64 @@ int main(int argc, char *argv[]) {
                             maxlen = end - start - reso;
                             maxstart = start + reso;
                             maxend = end;
+                            max_start_coord = start_coord + 1;
+                            max_end_coord = end_coord;
                         }
                     }
                     start = cutoff_coverages[i][j].first;
+                    start_coord =j;
+                    end_coord = start_coord;
                     end = start;
                 }
             }
-            //std::cout << i << " " << maxstart << " " << maxend << std::endl;
-            //int s = std::max(maxstart, QV_mask[i].first);
-            //int l = std::min(maxend, QV_mask[i].second) - std::max(maxstart, QV_mask[i].first);
-            //if (l < 0) l = 0;
-            //filtered << ">read_" << i << std::endl;
-            //filtered << reads[i]->bases.substr(s,l) << std::endl;
+
+
+            int start_coverage = 0, end_coverage = 0;
+            if (max_end_coord - max_start_coord + 1 > 20){
+                for (int dummy_index = 0; dummy_index < 10; dummy_index ++){
+                    start_coverage += cutoff_coverages[i][max_start_coord + dummy_index].second + MIN_COV;
+                    end_coverage += cutoff_coverages[i][max_end_coord - dummy_index].second + MIN_COV;
+                }
+                start_coverage = start_coverage/10;
+                end_coverage = end_coverage/10;
+
+            }
+            else{
+                int limit = (max_end_coord - max_start_coord)/2;
+                for (int dummy_index = 0; dummy_index < limit; dummy_index ++){
+                    start_coverage += cutoff_coverages[i][max_start_coord + dummy_index].second + MIN_COV;
+                    end_coverage += cutoff_coverages[i][max_end_coord - dummy_index].second + MIN_COV;
+                }
+                if (limit == 0){
+                    start_coverage = 0;
+                    end_coverage = 0;
+                }
+                else {
+                    start_coverage = start_coverage / limit;
+                    end_coverage = end_coverage / limit;
+                }
+            }
+
+            if (delete_telomere) {
+                if ((start_coverage >= 10 * end_coverage) or (end_coverage >= 10 * start_coverage)) {
+                    covflag << i << std::endl;
+                }
+
+                if (self_match_reads.find(i) != self_match_reads.end()) {
+                    selfflag << i << std::endl;
+                }
+            }
 
             if (reads_to_keep.size()>0) {
-                if (reads_to_keep.find(i) == reads_to_keep.end()){
+                if (reads_to_keep.find(i) == reads_to_keep.end()) {
 //                std::cout<<"setting masks equal";
                     maxend=maxstart;
                     QV_mask[i].second=QV_mask[i].first;
                 }
             }
+
+            comask << i << " " << max_start_coord << " " << max_end_coord << std::endl;
+
             if ((use_qv_mask) and (use_coverage_mask)) {
                 maskvec[i] = (
                         std::pair<int, int>(std::max(maxstart, QV_mask[i].first), std::min(maxend, QV_mask[i].second)));
@@ -715,42 +734,10 @@ int main(int argc, char *argv[]) {
             }
         }
 
-        /*FILE* temp_out1;
-        FILE* temp_out2;
-        temp_out1=fopen("coverage.debug.txt","w");
-        temp_out2=fopen("coverage_gradient.debug.txt","w");
-
-        for (int i=0; i< n_read ; i++) {
-            fprintf(temp_out1,"%d \t", i);
-            for (int j=0; j < coverages[i].size(); j++){
-                fprintf(temp_out1,"%d:%d \t", coverages[i][j].first,coverages[i][j].second);
-            }
-            fprintf(temp_out1,"\n");
-        }
-
-        for (int i=0; i< n_read ; i++) {
-            fprintf(temp_out2,"%d \t", i);
-            for (int j=0; j < cgs[i].size(); j++){
-                fprintf(temp_out2,"%d:%d \t", cgs[i][j].first,cgs[i][j].second);
-            }
-            fprintf(temp_out2,"\n");
-        }
-        fclose(temp_out1);
-        fclose(temp_out2);*/
-
-        /*for (int i = 0; i < maskvec.size(); i++) {
-            printf("read %d %d %d\n", i, maskvec[i].first, maskvec[i].second);
-            printf("QV: read %d %d %d\n", i, QV_mask[i].first, QV_mask[i].second);
-        }*/
-
 
         //binarize coverage gradient;
 
 
-
-
-
-
         //detect repeats based on coverage gradient, mark it has rising (1) or falling (-1)
         for (int i = r_begin; i <= r_end; i++) {
             std::vector<std::pair<int, int> > anno;
@@ -789,34 +776,6 @@ int main(int argc, char *argv[]) {
 
 
 
-
-        //remove gaps
-//    for (int i = 0; i < n_read; i++) {
-//        for (std::vector<std::pair<int, int> >::iterator iter = repeat_annotation[i].begin(); iter < repeat_annotation[i].end(); ) {
-//            if (iter+1 < repeat_annotation[i].end()){
-//                if ((iter->second == -1) and ((iter+1)->second == 1) and
-//                        ((iter+1)->first - iter->first < REPEAT_ANNOTATION_GAP_THRESHOLD)){
-//                    iter = repeat_annotation[i].erase(iter);
-//                    iter = repeat_annotation[i].erase(iter); // fill gaps
-//                } else if ((iter->second == 1) and ((iter+1)->second == -1) and
-//                        ((iter+1)->first - iter->first < REPEAT_ANNOTATION_GAP_THRESHOLD)) {
-//                    iter = repeat_annotation[i].erase(iter);
-//                    iter = repeat_annotation[i].erase(iter);
-//                } else iter++;
-//            } else iter ++;
-//        }
-//    }
-
-
-        /*temp_out1=fopen("repeat_annotation.debug.txt","w");
-        for (int i = 0; i < n_read; i++) {
-            fprintf(temp_out1,"%d \t%d\t",i,repeat_annotation[i].size());
-            for (std::vector<std::pair<int, int> >::iterator iter = repeat_annotation[i].begin(); iter < repeat_annotation[i].end();iter++) {
-                fprintf(temp_out1,"%d:%d\t",iter->first,iter->second);
-            }
-            fprintf(temp_out1,"\n");
-        }
-        fclose(temp_out1);*/
         // need a better hinge detection
 
         // get hinges from repeat annotation information
diff --git a/src/hinge b/src/hinge
new file mode 100755
index 0000000..337d8cd
--- /dev/null
+++ b/src/hinge
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+export PATH="`dirname $BASH_SOURCE`/../lib/hinge:$PATH"
+
+subcommand="$1"
+shift
+
+case "$subcommand" in
+    filter)
+	Reads_filter "$@"
+	;;
+    layout)
+	hinging "$@"
+	;;
+	maximal)
+	get_maximal_reads "$@"
+	;;
+    clip)
+	pruning_and_clipping.py "$@"
+	;;
+    clip-nanopore)
+    pruning_and_clipping_nanopore.py "$@"
+    ;;
+    draft-path)
+	get_draft_path.py "$@"
+	;;
+    draft)
+	draft_assembly "$@"
+	;;
+    correct-head)
+	correct_head.py "$@"
+	;;
+    consensus)
+	consensus "$@"
+	;;
+    fasta2q)
+        fasta_to_fastq.py "$@"
+	;;
+    gfa)
+	get_consensus_gfa.py "$@"
+	;;
+    visualize|visualise)
+	Visualise_graph.py "$@"
+	;;
+    condense)
+	condense_graph.py "$@"
+	;;
+    correct_head)
+        correct_head.py "$@"
+	;;
+    split_las)
+        split_las.py "$@"
+    ;;
+    *)
+	echo "See hinge(1) for usage information."
+	exit 1
+	;;
+esac
diff --git a/src/hinge.1.md b/src/hinge.1.md
new file mode 100644
index 0000000..9b5faa9
--- /dev/null
+++ b/src/hinge.1.md
@@ -0,0 +1,46 @@
+% HINGE(1)
+%
+% October 2016
+
+# NAME
+
+hinge - assembler for long-read sequencing data
+
+# SYNOPSIS
+
+**hinge** {**subcommand**} *options* *files*
+
+# OPTIONS
+
+Subcommands are described below.
+Run each subcommand without arguments for usage information.
+
+**filter**
+:    filter out short reads and long chimeric reads.
+
+**maximal**
+:    get maximal reads.
+
+**layout**
+:    generate a layout for assembly
+
+**clip**
+:    prune and clip output of the **layout** command
+
+**draft-path**
+:    get assembly graph as list of nodes
+
+**draft**
+:    construct draft assembly
+
+**correct-head**
+:    convert fasta file to daligner-specific format
+
+**consensus**
+:    construct consensus sequence
+
+**gfa**
+:    Create a graphical fragment assembly file from **consensus** output
+
+**visualize**, **visualise**
+
diff --git a/src/include/LAInterface.h b/src/include/LAInterface.h
index eb87b0c..51b6884 100644
--- a/src/include/LAInterface.h
+++ b/src/include/LAInterface.h
@@ -90,7 +90,7 @@ public:
     int eff_read_A_match_start_, eff_read_B_match_start_, eff_read_A_match_end_, eff_read_B_match_end_;
     int tps;
     int reverse_complement_match_; //reverse_complement_match_, reverse complement = 1, same direction = 0
-    int eff_read_A_start_, eff_read_A_end_, eff_read_B_start_, eff_read_B_end_;
+    int eff_read_A_read_start_, eff_read_A_read_end_, eff_read_B_read_start_, eff_read_B_read_end_;
     MatchType match_type_ = UNDEFINED;
     void addtype(int max_overhang); //classify overlaps
     void AddTypesAsymmetric(int max_overhang, int min_overhang);
@@ -103,6 +103,7 @@ public:
 	void TrimOverlapNaive();
     int eff_start_trace_point_index_, eff_end_trace_point_index_;
     int weight;
+    int length;
 };
 
 
@@ -139,7 +140,7 @@ public:
     int openAlignmentFile(std::string filename); // open .las Alignment file
 
     void showRead(int from, int to); // show reads in a range
-	
+
     void showRead2(int from, int to); // show reads in a range
 
     void showAlignment(int from, int to); // show alignment with 'A read' in a range
@@ -153,14 +154,16 @@ public:
     Read *getRead2(int number); //get one read
 
     void getRead(std::vector<Read *> &reads, int from, int to); // get reads within a range
-	
-	void getQV(std::vector<std::vector<int> > & QV, int from, int to);
+
+    int getQV(std::vector<std::vector<int> > & QV, int from, int to);
 
     void getRead2(std::vector<Read *> &reads, int from, int to); // get reads within a range
 
 
     void getAlignmentB(std::vector<int> &, int n); // get all b reads aligned with a read
 
+    void getOverlap(std::vector<LOverlap *> &, std::vector<int> &range); // get overlap(simplified version of alignment) with a read in a range
+
     void getOverlap(std::vector<LOverlap *> &, int from, int64 to); // get overlap(simplified version of alignment) with a read in a range
 
     void getOverlapw(std::vector<LOverlap *> &, int from, int to); // get overlap(simplified version of alignment) with a read in a range
@@ -231,4 +234,23 @@ public:
 
 
 
-#endif
\ No newline at end of file
+
+bool pairAscend(const std::pair<int, int>& firstElem,  const std::pair<int, int>& secondElem);
+
+bool pairDescend(const std::pair<int, int>& firstElem,  const std::pair<int, int>& secondElem);
+
+bool compare_overlap(LOverlap * ovl1, LOverlap * ovl2);
+
+bool compare_sum_overlaps(const std::vector<LOverlap * > * ovl1, const std::vector<LOverlap *> * ovl2);
+
+bool compare_pos(LOverlap * ovl1, LOverlap * ovl2);
+
+bool compare_overlap_abpos(LOverlap * ovl1, LOverlap * ovl2);
+
+bool compare_overlap_aepos(LOverlap * ovl1, LOverlap * ovl2);
+
+bool compare_overlap_weight(LOverlap * ovl1, LOverlap * ovl2);
+
+bool compare_overlap_aln(LAlignment * ovl1, LAlignment * ovl2);
+
+#endif
diff --git a/src/layout/CMakeLists.txt b/src/layout/CMakeLists.txt
index 912bb8e..9cf2225 100644
--- a/src/layout/CMakeLists.txt
+++ b/src/layout/CMakeLists.txt
@@ -7,3 +7,5 @@ INCLUDE_DIRECTORIES( ${Boost_INCLUDE_DIR} )
 
 add_executable(hinging hinging)
 target_link_libraries(hinging LAInterface ini spdlog ${Boost_LIBRARIES})
+
+install(TARGETS hinging DESTINATION ${libexec})
diff --git a/src/layout/hinging.cpp b/src/layout/hinging.cpp
index 73bbf83..a85d18b 100644
--- a/src/layout/hinging.cpp
+++ b/src/layout/hinging.cpp
@@ -9,6 +9,8 @@
 #include <omp.h>
 #include <tuple>
 #include <iomanip>
+#include <glob.h>
+
 
 #include "spdlog/spdlog.h"
 #include "cmdline.h"
@@ -35,123 +37,39 @@ using namespace boost;
 
 typedef adjacency_list <vecS, vecS, undirectedS> Graph;
 typedef std::tuple<Node, Node, int> Edge_w;
-typedef std::pair<Node, Node> Edge_nw;
-
-
-static int ORDER(const void *l, const void *r) {
-    int x = *((int32 *) l);
-    int y = *((int32 *) r);
-    return (x - y);
-}
-
-
-std::ostream& operator<<(std::ostream& out, const MatchType value){
-    static std::map<MatchType, std::string> strings;
-    if (strings.size() == 0){
-#define INSERT_ELEMENT(p) strings[p] = #p
-        INSERT_ELEMENT(FORWARD);
-        INSERT_ELEMENT(BACKWARD);
-        INSERT_ELEMENT(ACOVERB);
-        INSERT_ELEMENT(BCOVERA);
-        INSERT_ELEMENT(INTERNAL);
-        INSERT_ELEMENT(UNDEFINED);
-        INSERT_ELEMENT(NOT_ACTIVE);
-#undef INSERT_ELEMENT
-    }
-    return out << strings[value];
-}
-
-
-
-bool compare_overlap(LOverlap * ovl1, LOverlap * ovl2) {
-    return ((ovl1->read_A_match_end_ - ovl1->read_A_match_start_
-             + ovl1->read_B_match_end_ - ovl1->read_B_match_start_) >
-            (ovl2->read_A_match_end_ - ovl2->read_A_match_start_
-             + ovl2->read_B_match_end_ - ovl2->read_B_match_start_));
-}
-
-bool compare_overlap_effective(LOverlap * ovl1, LOverlap * ovl2) {
-    return ((ovl1->eff_read_A_match_end_ - ovl1->eff_read_A_match_start_
-             + ovl1->eff_read_B_match_end_ - ovl1->eff_read_B_match_start_) >
-            (ovl2->eff_read_A_match_end_ - ovl2->eff_read_A_match_start_
-             + ovl2->eff_read_B_match_end_ - ovl2->eff_read_B_match_start_));
-}
-
-bool compare_overlap_weight(LOverlap * ovl1, LOverlap * ovl2) {
-    return (ovl1->weight > ovl2->weight);
-}
-
-bool compare_sum_overlaps(const std::vector<LOverlap * > * ovl1, const std::vector<LOverlap *> * ovl2) {
-    int sum1 = 0;
-    int sum2 = 0;
-    for (int i = 0; i < ovl1->size(); i++)
-        sum1 += (*ovl1)[i]->read_A_match_end_ - (*ovl1)[i]->read_A_match_start_
-                + (*ovl1)[i]->read_B_match_end_ - (*ovl1)[i]->read_B_match_start_;
-    for (int i = 0; i < ovl2->size(); i++)
-        sum2 += (*ovl2)[i]->read_A_match_end_ - (*ovl2)[i]->read_A_match_start_
-                + (*ovl2)[i]->read_B_match_end_ - (*ovl2)[i]->read_B_match_start_;
-    return sum1 > sum2;
-}
-
-bool compare_pos(LOverlap * ovl1, LOverlap * ovl2) {
-    return (ovl1->read_A_match_start_) > (ovl2->read_A_match_start_);
-}
-
-bool compare_overlap_abpos(LOverlap * ovl1, LOverlap * ovl2) {
-    return ovl1->read_A_match_start_ < ovl2->read_A_match_start_;
-}
 
-bool compare_overlap_aepos(LOverlap * ovl1, LOverlap * ovl2) {
-    return ovl1->read_A_match_start_ > ovl2->read_A_match_start_;
+std::string lastN(std::string input, int n)
+{
+    return input.substr(input.size() - n);
 }
 
-std::vector<std::pair<int,int>> Merge(std::vector<LOverlap *> & intervals, int cutoff)
-{
-    //std::cout<<"Merge"<<std::endl;
-    std::vector<std::pair<int, int > > ret;
-    int n = intervals.size();
-    if (n == 0) return ret;
-
-    if(n == 1) {
-        ret.push_back(std::pair<int,int>(intervals[0]->read_A_match_start_, intervals[0]->read_A_match_end_));
-        return ret;
+inline std::vector<std::string> glob(const std::string& pat){
+    using namespace std;
+    glob_t glob_result;
+    int i = 1;
+    std::string search_name;
+    search_name = pat + "."+std::to_string(i)+".las";
+    std::cout << search_name << endl;
+    glob(search_name.c_str(),GLOB_TILDE,NULL,&glob_result);
+//    std::cout << "Number of files " << glob_result.gl_pathc << std::endl;
+
+    vector<string> ret;
+
+
+    while (glob_result.gl_pathc != 0){
+        ret.push_back(string(glob_result.gl_pathv[0]));
+        i ++;
+        search_name = pat + "."+std::to_string(i)+".las";
+        glob(search_name.c_str(),GLOB_TILDE,NULL,&glob_result);
+//        std::cout << "Number of files " << glob_result.gl_pathc << std::endl;
     }
 
-    sort(intervals.begin(),intervals.end(),compare_overlap_abpos); //sort according to left
+    std::cout << "-------------------------"<< std::endl;
+    std::cout << "Number of files " << i-1 << std::endl;
+    std::cout << "Input string " << pat.c_str() << std::endl;
+    std::cout << "-------------------------"<< std::endl;
 
-    int left= intervals[0]->read_A_match_start_ + cutoff, right = intervals[0]->read_A_match_end_ - cutoff;
-    //left, right means maximal possible interval now
-
-    for(int i = 1; i < n; i++)
-    {
-        if(intervals[i]->read_A_match_start_ + cutoff <= right)
-        {
-            right=std::max(right, intervals[i]->read_A_match_end_ - cutoff);
-        }
-        else
-        {
-            ret.push_back(std::pair<int, int>(left,right));
-            left = intervals[i]->read_A_match_start_ + cutoff;
-            right = intervals[i]->read_A_match_end_ - cutoff;
-        }
-    }
-    ret.push_back(std::pair<int, int>(left,right));
-    return ret;
-}
-
-Interval Effective_length(std::vector<LOverlap *> & intervals, int min_cov) {
-    Interval ret;
-    sort(intervals.begin(),intervals.end(),compare_overlap_abpos); //sort according to left
-
-    if (intervals.size() > min_cov) {
-        ret.first = intervals[min_cov]->read_A_match_start_;
-    } else
-        ret.first = 0;
-    sort(intervals.begin(),intervals.end(),compare_overlap_aepos); //sort according to left
-    if (intervals.size() > min_cov) {
-        ret.second = intervals[min_cov]->read_A_match_end_;
-    } else
-        ret.second = 0;
+    globfree(&glob_result);
     return ret;
 }
 
@@ -165,26 +83,26 @@ bool ProcessAlignment(LOverlap * match, Read * read_A, Read * read_B, int ALN_TH
     //class object
     //std::cout<<" In ProcessAlignment"<<std::endl;
     bool contained=false;
-    match->eff_read_A_start_ = read_A->effective_start;
-    match->eff_read_A_end_ = read_A->effective_end;
+    match->eff_read_A_read_start_ = read_A->effective_start;
+    match->eff_read_A_read_end_ = read_A->effective_end;
 
     // removed the following if, so that things agree with the convention for reverse complement matches
 
-    match->eff_read_B_start_ = read_B->effective_start;
-    match->eff_read_B_end_ = read_B->effective_end;
+    match->eff_read_B_read_start_ = read_B->effective_start;
+    match->eff_read_B_read_end_ = read_B->effective_end;
 
 //    if (match->reverse_complement_match_ == 0) {
-//        match->eff_read_B_start_ = read_B->effective_start;
-//        match->eff_read_B_end_ = read_B->effective_end;
+//        match->eff_read_B_read_start_ = read_B->effective_start;
+//        match->eff_read_B_read_end_ = read_B->effective_end;
 //    } else {
-//        match->eff_read_B_start_ = read_B->len - read_B->effective_end;
-//        match->eff_read_B_end_ = read_B->len - read_B->effective_start;
+//        match->eff_read_B_read_start_ = read_B->len - read_B->effective_end;
+//        match->eff_read_B_read_end_ = read_B->len - read_B->effective_start;
 //    }
 
     /*printf("bef %d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n", match->read_A_id_, match->read_B_id_,
      * match->reverse_complement_match_,
         match->read_A_match_start_, match->read_A_match_end_, match->read_B_match_start_, match->read_B_match_end_,
-           match->eff_read_A_start_, match->eff_read_A_end_, match->eff_read_B_start_, match->eff_read_B_end_
+           match->eff_read_A_read_start_, match->eff_read_A_read_end_, match->eff_read_B_read_start_, match->eff_read_B_read_end_
     );*/
 
     if (trim)
@@ -199,7 +117,7 @@ bool ProcessAlignment(LOverlap * match, Read * read_A, Read * read_B, int ALN_TH
      * match->reverse_complement_match_,
            match->eff_read_A_match_start_, match->eff_read_A_match_end_, match->eff_read_B_match_start_,
            match->eff_read_B_match_end_,
-           match->eff_read_A_start_, match->eff_read_A_end_, match->eff_read_B_start_, match->eff_read_B_end_
+           match->eff_read_A_read_start_, match->eff_read_A_read_end_, match->eff_read_B_read_start_, match->eff_read_B_read_end_
     );*/
     //std::cout<< contained<<std::endl;
     if (((match->eff_read_B_match_end_ - match->eff_read_B_match_start_) < ALN_THRESHOLD)
@@ -220,6 +138,9 @@ bool ProcessAlignment(LOverlap * match, Read * read_A, Read * read_B, int ALN_TH
             match->eff_read_A_match_end_ - match->eff_read_A_match_start_
             + match->eff_read_B_match_end_ - match->eff_read_B_match_start_;
 
+    match->length = match->read_A_match_end_ - match->read_A_match_start_
+            + match->read_B_match_end_ - match->read_B_match_start_;
+
     return contained;
 }
 
@@ -277,7 +198,7 @@ void PrintOverlapToFile(FILE * file_pointer, LOverlap * match) {
         fprintf(file_pointer, "%d %d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d] [%d %d] [%d %d]\n",
                 match->read_A_id_,
                 match->read_B_id_,
-                match->weight,
+                match->length,
                 0,
                 direction,
                 hinged,
@@ -285,10 +206,10 @@ void PrintOverlapToFile(FILE * file_pointer, LOverlap * match) {
                 match->eff_read_A_match_end_,
                 match->eff_read_B_match_start_,
                 match->eff_read_B_match_end_,
-                match->eff_read_A_start_,
-                match->eff_read_A_end_,
-                match->eff_read_B_start_,
-                match->eff_read_B_end_,
+                match->eff_read_A_read_start_,
+                match->eff_read_A_read_end_,
+                match->eff_read_B_read_start_,
+                match->eff_read_B_read_end_,
 
                 match->read_A_match_start_,
                 match->read_A_match_end_,
@@ -302,7 +223,7 @@ void PrintOverlapToFile(FILE * file_pointer, LOverlap * match) {
         fprintf(file_pointer, "%d %d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d] [%d %d] [%d %d]\n",
                 match->read_B_id_,
                 match->read_A_id_,
-                match->weight,
+                match->length,
                 direction,
                 0,
                 hinged,
@@ -310,10 +231,10 @@ void PrintOverlapToFile(FILE * file_pointer, LOverlap * match) {
                 match->eff_read_B_match_end_,
                 match->eff_read_A_match_start_,
                 match->eff_read_A_match_end_,
-                match->eff_read_B_start_,
-                match->eff_read_B_end_,
-                match->eff_read_A_start_,
-                match->eff_read_A_end_,
+                match->eff_read_B_read_start_,
+                match->eff_read_B_read_end_,
+                match->eff_read_A_read_start_,
+                match->eff_read_A_read_end_,
 
                 match->read_A_match_start_,
                 match->read_A_match_end_,
@@ -349,7 +270,7 @@ void PrintOverlapToFile2(FILE * file_pointer, LOverlap * match, int hinge_pos) {
         fprintf(file_pointer, "%d %d %d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                 match->read_A_id_,
                 match->read_B_id_,
-                match->weight,
+                match->length,
                 0,
                 direction,
                 0,
@@ -358,16 +279,16 @@ void PrintOverlapToFile2(FILE * file_pointer, LOverlap * match, int hinge_pos) {
                 match->eff_read_A_match_end_,
                 match->eff_read_B_match_start_,
                 match->eff_read_B_match_end_,
-                match->eff_read_A_start_,
-                match->eff_read_A_end_,
-                match->eff_read_B_start_,
-                match->eff_read_B_end_);
+                match->eff_read_A_read_start_,
+                match->eff_read_A_read_end_,
+                match->eff_read_B_read_start_,
+                match->eff_read_B_read_end_);
     }
     else if (match->match_type_ == BACKWARD) {
         fprintf(file_pointer, "%d %d %d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                 match->read_B_id_,
                 match->read_A_id_,
-                match->weight,
+                match->length,
                 direction,
                 0,
                 0,
@@ -376,17 +297,17 @@ void PrintOverlapToFile2(FILE * file_pointer, LOverlap * match, int hinge_pos) {
                 match->eff_read_B_match_end_,
                 match->eff_read_A_match_start_,
                 match->eff_read_A_match_end_,
-                match->eff_read_B_start_,
-                match->eff_read_B_end_,
-                match->eff_read_A_start_,
-                match->eff_read_A_end_);
+                match->eff_read_B_read_start_,
+                match->eff_read_B_read_end_,
+                match->eff_read_A_read_start_,
+                match->eff_read_A_read_end_);
     }
     else if (match->match_type_ == FORWARD_INTERNAL) {
 
         fprintf(file_pointer, "%d %d %d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                 match->read_A_id_,
                 match->read_B_id_,
-                match->weight,
+                match->length,
                 0,
                 direction,
                 1, // hinged forward
@@ -395,16 +316,16 @@ void PrintOverlapToFile2(FILE * file_pointer, LOverlap * match, int hinge_pos) {
                 match->eff_read_A_match_end_,
                 match->eff_read_B_match_start_,
                 match->eff_read_B_match_end_,
-                match->eff_read_A_start_,
-                match->eff_read_A_end_,
-                match->eff_read_B_start_,
-                match->eff_read_B_end_);
+                match->eff_read_A_read_start_,
+                match->eff_read_A_read_end_,
+                match->eff_read_B_read_start_,
+                match->eff_read_B_read_end_);
     }
     else if (match->match_type_ == BACKWARD_INTERNAL) {
         fprintf(file_pointer, "%d %d %d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                 match->read_B_id_,
                 match->read_A_id_,
-                match->weight,
+                match->length,
                 direction,
                 0,
                 -1, // hinged backward
@@ -413,14 +334,242 @@ void PrintOverlapToFile2(FILE * file_pointer, LOverlap * match, int hinge_pos) {
                 match->eff_read_B_match_end_,
                 match->eff_read_A_match_start_,
                 match->eff_read_A_match_end_,
-                match->eff_read_B_start_,
-                match->eff_read_B_end_,
-                match->eff_read_A_start_,
-                match->eff_read_A_end_);
+                match->eff_read_B_read_start_,
+                match->eff_read_B_read_end_,
+                match->eff_read_A_read_start_,
+                match->eff_read_A_read_end_);
     }
 }
 
 
+void GetAlignment ( LAInterface &la, std::vector<Read *> & reads, std::vector<std::unordered_map<int, std::vector<LOverlap *> > > & idx_ab,
+                    std::vector<std::vector<LOverlap *>> & matches_forward, std::vector<std::vector<LOverlap *>>& matches_backward,
+                    int n_read, const char *name_db, const char *name_las_base, bool mult_las,
+                    int ALN_THRESHOLD, int THETA, int THETA2, bool USE_TWO_MATCHES, int64 n_aln_full,
+                    const std::shared_ptr<spdlog::logger> console,
+                    std::string name_maximal_reads, bool KEEP_ONLY_MATCHES_BETWEEN_MAXIMAL_READS ){
+
+    std::ifstream max_reads_file(name_maximal_reads);
+    n_aln_full = 0;
+    int num_active_reads(0);
+    int64 n_aln_kept_full(0);
+    int64 n_rev_aln_full(0);
+    int64 n_rev_aln_kept_full(0);
+    std::string name_las_string;
+    console->info("Multiple las files: {}", mult_las);
+
+    if (mult_las)
+        name_las_string =  std::string(name_las_base);
+    else {
+        if (lastN(std::string(name_las_base), 4) == ".las")
+            name_las_string = std::string(name_las_base);
+        else
+            name_las_string = std::string(name_las_base) + ".las";
+    }
+
+    n_aln_full = 0;
+    const char * name_las = name_las_string.c_str();
+
+    std::vector<std::string> name_las_list;
+    std::string name_las_str(name_las);
+    console->info("Las files: {}", name_las_str);
+
+    if (mult_las) {
+        console->info("Calling glob.");
+        name_las_list = glob(name_las_str);
+    }
+    else
+        name_las_list.push_back(name_las_str);
+
+    console->info("number of las files: {}", name_las_list.size());
+
+    std::vector<bool> maximal_read;
+    maximal_read.resize(n_read, false);
+    std::string read_line;
+    while(std::getline(max_reads_file, read_line))
+    {
+        int read_number;
+        read_number = atoi(read_line.c_str());
+        maximal_read[read_number] = true;
+        num_active_reads++;
+    }
+    console->info("Total number of active reads: {}/{}", num_active_reads, n_read);
+
+    for (int i = 0; i < n_read; i++){
+        reads[i]->active = (reads[i]->active) and (maximal_read[i]);
+    }
+
+
+    for (int part = 0; part < name_las_list.size(); part++) {
+
+        console->info("name of las: {}", name_las_list[part]);
+
+        if (strlen(name_las_list[part].c_str()) > 0)
+            la.openAlignmentFile(name_las_list[part]);
+
+        int64 n_aln = 0;
+        int64 n_aln_accept = 0;
+        int64 n_aln_rcomp_accept = 0;
+
+        if (strlen(name_las_list[part].c_str()) > 0) {
+            n_aln = la.getAlignmentNumber();
+            console->info("Load alignments from {}", name_las_list[part]);
+            console->info("# Alignments: {}", n_aln);
+        }
+
+        std::vector<LOverlap *> aln;//Vector of pointers to all alignments
+
+
+        if (strlen(name_las_list[part].c_str()) > 0) {
+            la.resetAlignment();
+            la.getOverlap(aln, 0, n_read);
+        }
+
+        int r_begin = aln.front()->read_A_id_;
+        int r_end = aln.back()->read_A_id_;
+        int num_active_reads_part (0);
+
+        for (int i = r_begin; i <= r_end; i++) {
+            if (reads[i]->active)
+                num_active_reads_part++;
+        }
+        console->info("# reads: {}", r_end-r_begin+1);
+        console->info("# active reads: {}/{}",num_active_reads_part, r_end-r_begin+1);
+        console->info("Input data finished, part {}/{}", part + 1, name_las_list.size());
+
+
+
+        for (int i = 0; i < aln.size(); i++) {
+
+            if (aln[i]->read_A_id_ == aln[i]->read_B_id_) {
+                aln[i]->active = false;
+            }
+            if ((reads[aln[i]->read_A_id_]->active) and
+                    ((reads[aln[i]->read_B_id_]->active) and KEEP_ONLY_MATCHES_BETWEEN_MAXIMAL_READS)) {
+                idx_ab[aln[i]->read_A_id_][aln[i]->read_B_id_] = std::vector<LOverlap *>();
+                n_aln_accept++;
+                n_aln_rcomp_accept += aln[i]->reverse_complement_match_;
+            }
+        }
+
+        for (int i = 0; i < aln.size(); i++) {
+            if ((reads[aln[i]->read_A_id_]->active) and
+                ((reads[aln[i]->read_B_id_]->active) and KEEP_ONLY_MATCHES_BETWEEN_MAXIMAL_READS))
+                idx_ab[aln[i]->read_A_id_][aln[i]->read_B_id_].push_back(aln[i]);
+        }
+
+
+        int n_overlaps = 0;
+        int n_rev_overlaps = 0;
+        for (int i = 0; i < aln.size(); i++) {
+            n_overlaps++;
+            n_rev_overlaps += aln[i]->reverse_complement_match_;
+        }
+
+        console->info("kept {}/{} overlaps,  {}/{} rev_overlaps in part {}/{}",n_aln_accept,
+                      n_overlaps, n_aln_rcomp_accept,
+                      n_rev_overlaps,
+                      part + 1, name_las_list.size());
+
+        n_aln_full += n_aln;
+        n_aln_kept_full += n_aln_accept;
+        n_rev_aln_full += n_rev_overlaps;
+        n_rev_aln_kept_full += n_aln_rcomp_accept;
+
+        console->info("index finished");
+
+
+
+
+        for (int i = r_begin; i <= r_end; i++) {
+            bool contained = false;
+            //std::cout<< "Testing opt " << i << std::endl;
+            if (reads[i]->active == false) {
+                continue;
+            }
+
+            int containing_read;
+
+            for (std::unordered_map<int, std::vector<LOverlap *> >::iterator it = idx_ab[i].begin();
+                 it != idx_ab[i].end(); it++) {
+                std::sort(it->second.begin(), it->second.end(), compare_overlap);//Sort overlaps by lengths
+                //std::cout<<"Giving input to ProcessAlignment "<<it->second.size() <<std::endl;
+
+                if (it->second.size() > 0) {
+                    //Figure out if read is contained
+                    LOverlap *ovl = it->second[0];
+                    bool contained_alignment;
+
+                    if (strlen(name_db) > 0)
+                        contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
+                                                               reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2,
+                                                               true);
+                    else
+                        contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
+                                                               reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2,
+                                                               false);
+                    if (contained_alignment == true) {
+                        containing_read = ovl->read_B_id_;
+                    }
+
+                    if (reads[ovl->read_B_id_]->active == true)
+                        contained = contained or contained_alignment;
+
+                    //Filter matches that matter.
+                    //TODO Figure out a way to do this more efficiently
+                    if ((ovl->match_type_ == FORWARD) or (ovl->match_type_ == FORWARD_INTERNAL))
+                        matches_forward[i].push_back(it->second[0]);
+                    else if ((ovl->match_type_ == BACKWARD) or (ovl->match_type_ == BACKWARD_INTERNAL))
+                        matches_backward[i].push_back(it->second[0]);
+
+                }
+
+
+                if ((it->second.size() > 1) and (USE_TWO_MATCHES)) {
+                    //Figure out if read is contained
+                    LOverlap *ovl = it->second[1];
+                    bool contained_alignment;
+
+                    if (strlen(name_db) > 0)
+                        contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
+                                                               reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2,
+                                                               true);
+                    else
+                        contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
+                                                               reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2,
+                                                               false);
+                    if (contained_alignment == true) {
+                        containing_read = ovl->read_B_id_;
+                    }
+
+                    if (reads[ovl->read_B_id_]->active == true)
+                        contained = contained or contained_alignment;
+
+                    //Filter matches that matter.
+                    //TODO Figure out a way to do this more efficiently
+                    if ((ovl->match_type_ == FORWARD) or (ovl->match_type_ == FORWARD_INTERNAL))
+                        matches_forward[i].push_back(it->second[1]);
+                    else if ((ovl->match_type_ == BACKWARD) or (ovl->match_type_ == BACKWARD_INTERNAL))
+                        matches_backward[i].push_back(it->second[1]);
+
+                }
+
+
+            }
+            if (contained) {
+                std::cout << "[contained] Should not happen" << std::endl;
+                reads[i]->active = false;
+            }
+        }
+
+    }
+
+    console->info("kept {}/{} overlaps,  {}/{} rev_overlaps in {} part(s)", n_aln_kept_full,
+                  n_aln_full, n_rev_aln_kept_full,
+                  n_rev_aln_full,
+                  name_las_list.size());
+}
+
 
 
 
@@ -440,6 +589,8 @@ int main(int argc, char *argv[]) {
     cmdp.add<std::string>("out", 'o', "final output file name", true, "");
     cmdp.add<std::string>("log", 'g', "log folder name", false, "log");
     cmdp.add("debug", '\0', "debug mode");
+    cmdp.add("mlas", '\0', "multiple las files");
+
 
 
 
@@ -472,9 +623,7 @@ int main(int argc, char *argv[]) {
 
 
     std::ofstream deadend_out(name_deadend);
-    std::ofstream maximal_reads(name_max);
     std::ofstream garbage_out(name_garbage);
-    std::ofstream contained_out(name_contained);
     std::ifstream homo(name_homo);
     std::vector<int> homo_reads;
 
@@ -497,19 +646,14 @@ int main(int argc, char *argv[]) {
 
     console->info("Hinging layout");
 
-
-    if (cmdp.exist("debug")) {
-        char *buff = (char *) malloc(sizeof(char) * 2000);
-        getwd(buff);
-        console->info("current user {}, current working directory {}", getlogin(), buff);
-        free(buff);
-    }
-
+    bool mult_las;
+    mult_las = cmdp.exist("mlas");
     console->info("name of db: {}, name of .las file {}", name_db, name_las);
     console->info("name of fasta: {}, name of .paf file {}", name_fasta, name_paf);
     console->info("filter files prefix: {}", out);
     console->info("output prefix: {}", out_name);
-
+    console->info("Multiple las files: {}", mult_las);
+    console->info("Multiple las files: {}", cmdp.exist("mlas"));
 
     std::ifstream ini_file(name_config);
     std::string str((std::istreambuf_iterator<char>(ini_file)),
@@ -521,16 +665,10 @@ int main(int argc, char *argv[]) {
         la.openDB(name_db);
 
 
-    if (strlen(name_las) > 0)
-        la.openAlignmentFile(name_las);
 
     int64 n_aln = 0;
 
-    if (strlen(name_las) > 0) {
-        n_aln = la.getAlignmentNumber();
-        console->info("Load alignments from {}", name_las);
-        console->info("# Alignments: {}", n_aln);
-    }
+
 
     int n_read;
     if (strlen(name_db) > 0)
@@ -544,23 +682,17 @@ int main(int argc, char *argv[]) {
 
     console->info("# Reads: {}", n_read); // output some statistics
 
-    std::vector<LOverlap *> aln;//Vector of pointers to all alignments
-
-    if (strlen(name_las) > 0) {
-        la.resetAlignment();
-        la.getOverlap(aln, 0, n_aln);
-    }
 
-    if (strlen(name_paf) > 0) {
-        n_aln = la.loadPAF(std::string(name_paf), aln);
-        console->info("Load alignments from {}", name_paf);
-        console->info("# Alignments: {}", n_aln);
-    }
+////    if (strlen(name_paf) > 0) {
+//        n_aln = la.loadPAF(std::string(name_paf), aln);
+//        console->info("Load alignments from {}", name_paf);
+//        console->info("# Alignments: {}", n_aln);
+//    }
 
-    if (n_aln == 0) {
-        console->error("No alignments!");
-        return 1;
-    }
+//    if (n_aln == 0) {
+//        console->error("No alignments!");
+//        return 1;
+//    }
 
 
     if (strlen(name_db) > 0) {
@@ -602,6 +734,8 @@ int main(int argc, char *argv[]) {
     int MIN_CONNECTED_COMPONENT_SIZE = (int) reader.GetInteger("layout", "min_connected_component_size", 8);
 
     bool USE_TWO_MATCHES = (int) reader.GetInteger("layout", "use_two_matches", 1);
+    bool KEEP_ONLY_MATCHES_BETWEEN_MAXIMAL_READS = (int) reader.GetInteger("layout",
+                                                    "keep_only_matches_between_maximal_reads", 1);
     bool delete_telomere = (int) reader.GetInteger("layout", "del_telomere", 0);
 
 
@@ -782,107 +916,12 @@ int main(int argc, char *argv[]) {
     int num_forward_overlaps(0), num_forward_internal_overlaps(0), num_reverse_overlaps(0),
             num_reverse_internal_overlaps(0), rev_complemented_matches(0);
 //# pragma omp parallel for
-    for (int i = 0; i < aln.size(); i++) {
-        idx_ab[aln[i]->read_A_id_][aln[i]->read_B_id_] = std::vector<LOverlap *>();
-    }
-
-    for (int i = 0; i < aln.size(); i++) {
-        idx_ab[aln[i]->read_A_id_][aln[i]->read_B_id_].push_back(aln[i]);
-    }
-
-    int n_overlaps = 0;
-    int n_rev_overlaps = 0;
-    for (int i = 0; i < aln.size(); i++) {
-        n_overlaps++;
-        n_rev_overlaps += aln[i]->reverse_complement_match_;
-    }
-
-    console->info("overlaps {} rev_overlaps {}", n_overlaps, n_rev_overlaps);
-
-    console->info("index finished");
-    console->info("Number reads {}", n_read);
-
-
-    for (int i = 0; i < n_read; i++) {
-        bool contained = false;
-        //std::cout<< "Testing opt " << i << std::endl;
-        if (reads[i]->active == false) {
-            continue;
-        }
-
-        int containing_read;
-
-        for (std::unordered_map<int, std::vector<LOverlap *> >::iterator it = idx_ab[i].begin();
-             it != idx_ab[i].end(); it++) {
-            std::sort(it->second.begin(), it->second.end(), compare_overlap);//Sort overlaps by lengths
-            //std::cout<<"Giving input to ProcessAlignment "<<it->second.size() <<std::endl;
-
-            if (it->second.size() > 0) {
-                //Figure out if read is contained
-                LOverlap *ovl = it->second[0];
-                bool contained_alignment;
-
-                if (strlen(name_db) > 0)
-                    contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
-                                                           reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2, true);
-                else
-                    contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
-                                                           reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2, false);
-                if (contained_alignment == true) {
-                    containing_read = ovl->read_B_id_;
-                }
-
-                if (reads[ovl->read_B_id_]->active == true)
-                    contained = contained or contained_alignment;
-
-                //Filter matches that matter.
-                //TODO Figure out a way to do this more efficiently
-                if ((ovl->match_type_ == FORWARD) or (ovl->match_type_ == FORWARD_INTERNAL))
-                    matches_forward[i].push_back(it->second[0]);
-                else if ((ovl->match_type_ == BACKWARD) or (ovl->match_type_ == BACKWARD_INTERNAL))
-                    matches_backward[i].push_back(it->second[0]);
 
-            }
-
-
-            if ((it->second.size() > 1) and (USE_TWO_MATCHES)) {
-                //Figure out if read is contained
-                LOverlap *ovl = it->second[1];
-                bool contained_alignment;
-
-                if (strlen(name_db) > 0)
-                    contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
-                                                           reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2, true);
-                else
-                    contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
-                                                           reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2, false);
-                if (contained_alignment == true) {
-                    containing_read = ovl->read_B_id_;
-                }
-
-                if (reads[ovl->read_B_id_]->active == true)
-                    contained = contained or contained_alignment;
-
-                //Filter matches that matter.
-                //TODO Figure out a way to do this more efficiently
-                if ((ovl->match_type_ == FORWARD) or (ovl->match_type_ == FORWARD_INTERNAL))
-                    matches_forward[i].push_back(it->second[1]);
-                else if ((ovl->match_type_ == BACKWARD) or (ovl->match_type_ == BACKWARD_INTERNAL))
-                    matches_backward[i].push_back(it->second[1]);
-
-            }
-
-
-
-
-        }
-        if (contained) {
-            reads[i]->active = false;
-            contained_out << i << "\t" << containing_read << std::endl;
-
-        }
-    }
 
+    GetAlignment ( la, reads,  idx_ab, matches_forward, matches_backward,
+            n_read,  name_db, name_las, mult_las, ALN_THRESHOLD,  THETA,  THETA2,
+                   USE_TWO_MATCHES, n_aln, console, name_max,
+                   KEEP_ONLY_MATCHES_BETWEEN_MAXIMAL_READS);
 
     for (int i = 0; i < n_read; i++) {//Isn't this just 0 or 1?
         num_overlaps += matches_forward[i].size() + matches_backward[i].size();
@@ -898,7 +937,6 @@ int main(int argc, char *argv[]) {
     for (int i = 0; i < n_read; i++) {
         if (reads[i]->active) {
             num_active_read++;
-            maximal_reads << i << std::endl;
         }
     }
     console->info("removed contained reads, active reads: {}", num_active_read);
@@ -962,7 +1000,6 @@ int main(int argc, char *argv[]) {
     << rev_complemented_bck_matches << " rev cmplment bck matches "
     << rev_complemented_bck_int_matches << " rev cmplement bck int matches " << std::endl;*/
 
-# pragma omp parallel for
     for (int i = 0; i < n_read; i++) {
         if (reads[i]->active) {
             std::sort(matches_forward[i].begin(), matches_forward[i].end(), compare_overlap_weight);
@@ -979,13 +1016,13 @@ int main(int argc, char *argv[]) {
                 if (reads[matches_forward[i][j]->read_B_id_]->active) {
                     fprintf(G_out, "%d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d] \n",
                             matches_forward[i][j]->read_A_id_, matches_forward[i][j]->read_B_id_,
-                            matches_forward[i][j]->weight, matches_forward[i][j]->reverse_complement_match_,
+                            matches_forward[i][j]->length, matches_forward[i][j]->reverse_complement_match_,
                             matches_forward[i][j]->match_type_, matches_forward[i][j]->eff_read_A_match_start_,
                             matches_forward[i][j]->eff_read_A_match_end_,
                             matches_forward[i][j]->eff_read_B_match_start_,
                             matches_forward[i][j]->eff_read_B_match_end_,
-                            matches_forward[i][j]->eff_read_A_start_, matches_forward[i][j]->eff_read_A_end_,
-                            matches_forward[i][j]->eff_read_B_start_, matches_forward[i][j]->eff_read_B_end_);
+                            matches_forward[i][j]->eff_read_A_read_start_, matches_forward[i][j]->eff_read_A_read_end_,
+                            matches_forward[i][j]->eff_read_B_read_start_, matches_forward[i][j]->eff_read_B_read_end_);
                     break;
                 }
             }
@@ -1000,13 +1037,13 @@ int main(int argc, char *argv[]) {
                 if (reads[matches_backward[i][j]->read_B_id_]->active) {
                     fprintf(G_out, "%d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d] \n",
                             matches_backward[i][j]->read_A_id_, matches_backward[i][j]->read_B_id_,
-                            matches_backward[i][j]->weight, matches_backward[i][j]->reverse_complement_match_,
+                            matches_backward[i][j]->length, matches_backward[i][j]->reverse_complement_match_,
                             matches_backward[i][j]->match_type_, matches_backward[i][j]->eff_read_A_match_start_,
                             matches_backward[i][j]->eff_read_A_match_end_,
                             matches_backward[i][j]->eff_read_B_match_start_,
                             matches_backward[i][j]->eff_read_B_match_end_,
-                            matches_backward[i][j]->eff_read_A_start_, matches_backward[i][j]->eff_read_A_end_,
-                            matches_backward[i][j]->eff_read_B_start_, matches_backward[i][j]->eff_read_B_end_);
+                            matches_backward[i][j]->eff_read_A_read_start_, matches_backward[i][j]->eff_read_A_read_end_,
+                            matches_backward[i][j]->eff_read_B_read_start_, matches_backward[i][j]->eff_read_B_read_end_);
                     break;
                 }
             }
@@ -1021,13 +1058,13 @@ int main(int argc, char *argv[]) {
                 if (reads[matches_forward[i][j]->read_B_id_]->active)
                     fprintf(out_backup, "%d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d] \n",
                             matches_forward[i][j]->read_A_id_, matches_forward[i][j]->read_B_id_,
-                            matches_forward[i][j]->weight, matches_forward[i][j]->reverse_complement_match_,
+                            matches_forward[i][j]->length, matches_forward[i][j]->reverse_complement_match_,
                             matches_forward[i][j]->match_type_, matches_forward[i][j]->eff_read_A_match_start_,
                             matches_forward[i][j]->eff_read_A_match_end_,
                             matches_forward[i][j]->eff_read_B_match_start_,
                             matches_forward[i][j]->eff_read_B_match_end_,
-                            matches_forward[i][j]->eff_read_A_start_, matches_forward[i][j]->eff_read_A_end_,
-                            matches_forward[i][j]->eff_read_B_start_, matches_forward[i][j]->eff_read_B_end_);
+                            matches_forward[i][j]->eff_read_A_read_start_, matches_forward[i][j]->eff_read_A_read_end_,
+                            matches_forward[i][j]->eff_read_B_read_start_, matches_forward[i][j]->eff_read_B_read_end_);
             }
     }
     fclose(out_backup);
@@ -1038,13 +1075,13 @@ int main(int argc, char *argv[]) {
                 if (reads[matches_backward[i][j]->read_B_id_]->active)
                     fprintf(out_backup, "%d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d] \n",
                             matches_backward[i][j]->read_A_id_, matches_backward[i][j]->read_B_id_,
-                            matches_backward[i][j]->weight, matches_backward[i][j]->reverse_complement_match_,
+                            matches_backward[i][j]->length, matches_backward[i][j]->reverse_complement_match_,
                             matches_backward[i][j]->match_type_, matches_backward[i][j]->eff_read_A_match_start_,
                             matches_backward[i][j]->eff_read_A_match_end_,
                             matches_backward[i][j]->eff_read_B_match_start_,
                             matches_backward[i][j]->eff_read_B_match_end_,
-                            matches_backward[i][j]->eff_read_A_start_, matches_backward[i][j]->eff_read_A_end_,
-                            matches_backward[i][j]->eff_read_B_start_, matches_backward[i][j]->eff_read_B_end_);
+                            matches_backward[i][j]->eff_read_A_read_start_, matches_backward[i][j]->eff_read_A_read_end_,
+                            matches_backward[i][j]->eff_read_B_read_start_, matches_backward[i][j]->eff_read_B_read_end_);
             }
     }
     fclose(out_backup);
@@ -1380,13 +1417,13 @@ int main(int argc, char *argv[]) {
 
                                                 fprintf(out_debug,"%d %d %d %d %d [%d %d] [%d %d] [%d %d] [%d %d] \n",
                                                         matches_forward[i][j]->read_A_id_, matches_forward[i][j]->read_B_id_,
-                                                        matches_forward[i][j]->weight, matches_forward[i][j]->reverse_complement_match_,
+                                                        matches_forward[i][j]->length, matches_forward[i][j]->reverse_complement_match_,
                                                         matches_forward[i][j]->match_type_, matches_forward[i][j]->eff_read_A_match_start_,
                                                         matches_forward[i][j]->eff_read_A_match_end_,
                                                         matches_forward[i][j]->eff_read_B_match_start_,
                                                         matches_forward[i][j]->eff_read_B_match_end_,
-                                                        matches_forward[i][j]->eff_read_A_start_, matches_forward[i][j]->eff_read_A_end_,
-                                                        matches_forward[i][j]->eff_read_B_start_, matches_forward[i][j]->eff_read_B_end_);
+                                                        matches_forward[i][j]->eff_read_A_read_start_, matches_forward[i][j]->eff_read_A_read_end_,
+                                                        matches_forward[i][j]->eff_read_B_read_start_, matches_forward[i][j]->eff_read_B_read_end_);
 
                                                 fprintf(out_debug, "%d %d %d %d\n", hinges_vec[i][k].pos,
                                                         hinges_vec[i][k].type,
@@ -1638,52 +1675,52 @@ int main(int argc, char *argv[]) {
                                 if (matches_forward[i][j]->reverse_complement_match_ == 0)
                                     fprintf(out_g1, "%d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                                             matches_forward[i][j]->read_A_id_,
-                                            matches_forward[i][j]->read_B_id_, matches_forward[i][j]->weight,
+                                            matches_forward[i][j]->read_B_id_, matches_forward[i][j]->length,
                                             matches_forward[i][j]->eff_read_A_match_start_,
                                             matches_forward[i][j]->eff_read_A_match_end_,
                                             matches_forward[i][j]->eff_read_B_match_start_,
                                             matches_forward[i][j]->eff_read_B_match_end_,
-                                            matches_forward[i][j]->eff_read_A_start_,
-                                            matches_forward[i][j]->eff_read_A_end_,
-                                            matches_forward[i][j]->eff_read_B_start_,
-                                            matches_forward[i][j]->eff_read_B_end_);
+                                            matches_forward[i][j]->eff_read_A_read_start_,
+                                            matches_forward[i][j]->eff_read_A_read_end_,
+                                            matches_forward[i][j]->eff_read_B_read_start_,
+                                            matches_forward[i][j]->eff_read_B_read_end_);
                                 else
                                     fprintf(out_g1, "%d %d' %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                                             matches_forward[i][j]->read_A_id_,
-                                            matches_forward[i][j]->read_B_id_, matches_forward[i][j]->weight,
+                                            matches_forward[i][j]->read_B_id_, matches_forward[i][j]->length,
                                             matches_forward[i][j]->eff_read_A_match_start_,
                                             matches_forward[i][j]->eff_read_A_match_end_,
                                             matches_forward[i][j]->eff_read_B_match_start_,
                                             matches_forward[i][j]->eff_read_B_match_end_,
-                                            matches_forward[i][j]->eff_read_A_start_,
-                                            matches_forward[i][j]->eff_read_A_end_,
-                                            matches_forward[i][j]->eff_read_B_start_,
-                                            matches_forward[i][j]->eff_read_B_end_);
+                                            matches_forward[i][j]->eff_read_A_read_start_,
+                                            matches_forward[i][j]->eff_read_A_read_end_,
+                                            matches_forward[i][j]->eff_read_B_read_start_,
+                                            matches_forward[i][j]->eff_read_B_read_end_);
 
                                 if (matches_forward[i][j]->reverse_complement_match_ == 0)
                                     fprintf(out_g2, "%d' %d' %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                                             matches_forward[i][j]->read_B_id_,
-                                            matches_forward[i][j]->read_A_id_, matches_forward[i][j]->weight,
+                                            matches_forward[i][j]->read_A_id_, matches_forward[i][j]->length,
                                             matches_forward[i][j]->eff_read_A_match_start_,
                                             matches_forward[i][j]->eff_read_A_match_end_,
                                             matches_forward[i][j]->eff_read_B_match_start_,
                                             matches_forward[i][j]->eff_read_B_match_end_,
-                                            matches_forward[i][j]->eff_read_A_start_,
-                                            matches_forward[i][j]->eff_read_A_end_,
-                                            matches_forward[i][j]->eff_read_B_start_,
-                                            matches_forward[i][j]->eff_read_B_end_);
+                                            matches_forward[i][j]->eff_read_A_read_start_,
+                                            matches_forward[i][j]->eff_read_A_read_end_,
+                                            matches_forward[i][j]->eff_read_B_read_start_,
+                                            matches_forward[i][j]->eff_read_B_read_end_);
                                 else
                                     fprintf(out_g2, "%d %d' %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                                             matches_forward[i][j]->read_B_id_,
-                                            matches_forward[i][j]->read_A_id_, matches_forward[i][j]->weight,
+                                            matches_forward[i][j]->read_A_id_, matches_forward[i][j]->length,
                                             matches_forward[i][j]->eff_read_A_match_start_,
                                             matches_forward[i][j]->eff_read_A_match_end_,
                                             matches_forward[i][j]->eff_read_B_match_start_,
                                             matches_forward[i][j]->eff_read_B_match_end_,
-                                            matches_forward[i][j]->eff_read_A_start_,
-                                            matches_forward[i][j]->eff_read_A_end_,
-                                            matches_forward[i][j]->eff_read_B_start_,
-                                            matches_forward[i][j]->eff_read_B_end_);
+                                            matches_forward[i][j]->eff_read_A_read_start_,
+                                            matches_forward[i][j]->eff_read_A_read_end_,
+                                            matches_forward[i][j]->eff_read_B_read_start_,
+                                            matches_forward[i][j]->eff_read_B_read_end_);
 
                             }
                         }
@@ -1704,52 +1741,52 @@ int main(int argc, char *argv[]) {
                                 if (matches_backward[i][j]->reverse_complement_match_ == 0)
                                     fprintf(out_g1, "%d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                                             matches_backward[i][j]->read_A_id_,
-                                            matches_backward[i][j]->read_B_id_, matches_backward[i][j]->weight,
+                                            matches_backward[i][j]->read_B_id_, matches_backward[i][j]->length,
                                             matches_backward[i][j]->eff_read_A_match_start_,
                                             matches_backward[i][j]->eff_read_A_match_end_,
                                             matches_backward[i][j]->eff_read_B_match_start_,
                                             matches_backward[i][j]->eff_read_B_match_end_,
-                                            matches_backward[i][j]->eff_read_A_start_,
-                                            matches_backward[i][j]->eff_read_A_end_,
-                                            matches_backward[i][j]->eff_read_B_start_,
-                                            matches_backward[i][j]->eff_read_B_end_);
+                                            matches_backward[i][j]->eff_read_A_read_start_,
+                                            matches_backward[i][j]->eff_read_A_read_end_,
+                                            matches_backward[i][j]->eff_read_B_read_start_,
+                                            matches_backward[i][j]->eff_read_B_read_end_);
                                 else
                                     fprintf(out_g1, "%d %d' %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                                             matches_backward[i][j]->read_A_id_,
-                                            matches_backward[i][j]->read_B_id_, matches_backward[i][j]->weight,
+                                            matches_backward[i][j]->read_B_id_, matches_backward[i][j]->length,
                                             matches_backward[i][j]->eff_read_A_match_start_,
                                             matches_backward[i][j]->eff_read_A_match_end_,
                                             matches_backward[i][j]->eff_read_B_match_start_,
                                             matches_backward[i][j]->eff_read_B_match_end_,
-                                            matches_backward[i][j]->eff_read_A_start_,
-                                            matches_backward[i][j]->eff_read_A_end_,
-                                            matches_backward[i][j]->eff_read_B_start_,
-                                            matches_backward[i][j]->eff_read_B_end_);
+                                            matches_backward[i][j]->eff_read_A_read_start_,
+                                            matches_backward[i][j]->eff_read_A_read_end_,
+                                            matches_backward[i][j]->eff_read_B_read_start_,
+                                            matches_backward[i][j]->eff_read_B_read_end_);
 
                                 if (matches_backward[i][j]->reverse_complement_match_ == 0)
                                     fprintf(out_g2, "%d' %d' %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                                             matches_backward[i][j]->read_B_id_,
-                                            matches_backward[i][j]->read_A_id_, matches_backward[i][j]->weight,
+                                            matches_backward[i][j]->read_A_id_, matches_backward[i][j]->length,
                                             matches_backward[i][j]->eff_read_A_match_start_,
                                             matches_backward[i][j]->eff_read_A_match_end_,
                                             matches_backward[i][j]->eff_read_B_match_start_,
                                             matches_backward[i][j]->eff_read_B_match_end_,
-                                            matches_backward[i][j]->eff_read_A_start_,
-                                            matches_backward[i][j]->eff_read_A_end_,
-                                            matches_backward[i][j]->eff_read_B_start_,
-                                            matches_backward[i][j]->eff_read_B_end_);
+                                            matches_backward[i][j]->eff_read_A_read_start_,
+                                            matches_backward[i][j]->eff_read_A_read_end_,
+                                            matches_backward[i][j]->eff_read_B_read_start_,
+                                            matches_backward[i][j]->eff_read_B_read_end_);
                                 else
                                     fprintf(out_g2, "%d %d' %d [%d %d] [%d %d] [%d %d] [%d %d]\n",
                                             matches_backward[i][j]->read_B_id_,
-                                            matches_backward[i][j]->read_A_id_, matches_backward[i][j]->weight,
+                                            matches_backward[i][j]->read_A_id_, matches_backward[i][j]->length,
                                             matches_backward[i][j]->eff_read_A_match_start_,
                                             matches_backward[i][j]->eff_read_A_match_end_,
                                             matches_backward[i][j]->eff_read_B_match_start_,
                                             matches_backward[i][j]->eff_read_B_match_end_,
-                                            matches_backward[i][j]->eff_read_A_start_,
-                                            matches_backward[i][j]->eff_read_A_end_,
-                                            matches_backward[i][j]->eff_read_B_start_,
-                                            matches_backward[i][j]->eff_read_B_end_);
+                                            matches_backward[i][j]->eff_read_A_read_start_,
+                                            matches_backward[i][j]->eff_read_A_read_end_,
+                                            matches_backward[i][j]->eff_read_B_read_start_,
+                                            matches_backward[i][j]->eff_read_B_read_end_);
                             }
                         }
                         backward++;
diff --git a/src/lib/LAInterface.cpp b/src/lib/LAInterface.cpp
index 57e1d1f..59025f1 100644
--- a/src/lib/LAInterface.cpp
+++ b/src/lib/LAInterface.cpp
@@ -61,7 +61,7 @@ int LAInterface::openDB2(std::string filename, std::string filename2) {
     strcpy(fn_1, fn);
     strcat(fn_1, ".db");
 
-    FILE * dstub = Fopen(fn_1, "r");
+    FILE * dstub = Fopen(fn_1, (char *)"r");
     if (dstub == NULL)
         exit(1);
 
@@ -69,8 +69,8 @@ int LAInterface::openDB2(std::string filename, std::string filename2) {
 
     printf("%d files\n", nfiles);
 
-    flist = (char **) Malloc(sizeof(char *) * nfiles, "Allocating file list");
-    findx = (int *) Malloc(sizeof(int *) * (nfiles + 1), "Allocating file index");
+    flist = (char **) Malloc(sizeof(char *) * nfiles, (char *)"Allocating file list");
+    findx = (int *) Malloc(sizeof(int *) * (nfiles + 1), (char *)"Allocating file index");
 
     if (flist == NULL || findx == NULL)
         exit(1);
@@ -82,7 +82,7 @@ int LAInterface::openDB2(std::string filename, std::string filename2) {
         char prolog[MAX_NAME], fname[MAX_NAME];
 
         if (fscanf(dstub, DB_FDATA, findx + i, fname, prolog) != 3) SYSTEM_ERROR
-        if ((flist[i] = Strdup(prolog, "Adding to file list")) == NULL)
+        if ((flist[i] = Strdup(prolog, (char *)"Adding to file list")) == NULL)
             exit(1);
     }
 
@@ -94,7 +94,7 @@ int LAInterface::openDB2(std::string filename, std::string filename2) {
     strcpy(fn_2, fn2);
     strcat(fn_2, ".db");
 
-    dstub = Fopen(fn_2, "r");
+    dstub = Fopen(fn_2, (char*)"r");
     if (dstub == NULL)
         exit(1);
 
@@ -102,8 +102,8 @@ int LAInterface::openDB2(std::string filename, std::string filename2) {
 
     printf("%d files\n", nfiles2);
 
-    flist2 = (char **) Malloc(sizeof(char *) * nfiles2, "Allocating file list");
-    findx2 = (int *) Malloc(sizeof(int *) * (nfiles2 + 1), "Allocating file index");
+    flist2 = (char **) Malloc(sizeof(char *) * nfiles2, (char *)"Allocating file list");
+    findx2 = (int *) Malloc(sizeof(int *) * (nfiles2 + 1), (char *)"Allocating file index");
 
     if (flist2 == NULL || findx2 == NULL)
         exit(1);
@@ -115,7 +115,7 @@ int LAInterface::openDB2(std::string filename, std::string filename2) {
         char prolog[MAX_NAME], fname[MAX_NAME];
 
         if (fscanf(dstub, DB_FDATA, findx2 + i, fname, prolog) != 3) SYSTEM_ERROR
-        if ((flist2[i] = Strdup(prolog, "Adding to file list")) == NULL)
+        if ((flist2[i] = Strdup(prolog, (char *)"Adding to file list")) == NULL)
             exit(1);
     }
 
@@ -151,7 +151,7 @@ int LAInterface::openDB(std::string filename) {
     strcpy(fn2, fn);
     strcat(fn2, ".db");
 
-    dstub = Fopen(fn2, "r");
+    dstub = Fopen(fn2, (char*)"r");
     if (dstub == NULL)
         exit(1);
 
@@ -159,8 +159,8 @@ int LAInterface::openDB(std::string filename) {
 
     //printf("%d files\n", nfiles);
 
-    flist = (char **) Malloc(sizeof(char *) * nfiles, "Allocating file list");
-    findx = (int *) Malloc(sizeof(int *) * (nfiles + 1), "Allocating file index");
+    flist = (char **) Malloc(sizeof(char *) * nfiles, (char *)"Allocating file list");
+    findx = (int *) Malloc(sizeof(int *) * (nfiles + 1), (char *)"Allocating file index");
 
     if (flist == NULL || findx == NULL)
         exit(1);
@@ -172,7 +172,7 @@ int LAInterface::openDB(std::string filename) {
         char prolog[MAX_NAME], fname[MAX_NAME];
 
         if (fscanf(dstub, DB_FDATA, findx + i, fname, prolog) != 3) SYSTEM_ERROR
-        if ((flist[i] = Strdup(prolog, "Adding to file list")) == NULL)
+        if ((flist[i] = Strdup(prolog, (char *)"Adding to file list")) == NULL)
             exit(1);
     }
 
@@ -597,7 +597,7 @@ int LAInterface::openAlignmentFile(std::string filename) {
     char *fn = new char[filename.size() + 1];
     strcpy(fn, filename.c_str());
 
-    input = Fopen(fn, "r");
+    input = Fopen(fn, (char*)"r");
     if (input == NULL)
         exit(1);
 
@@ -658,7 +658,7 @@ void LAInterface::showAlignment(int from, int to) {
     }
 
     tmax = 1000;
-    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, "Allocating trace vector");
+    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, (char *)"Allocating trace vector");
     if (trace == NULL)
         exit(1);
     in = 0;
@@ -719,7 +719,7 @@ void LAInterface::showAlignment(int from, int to) {
         Read_Overlap(input, ovl);
         if (ovl->path.tlen > tmax) {
             tmax = ((int) 1.2 * ovl->path.tlen) + 100;
-            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, "Allocating trace vector");
+            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, (char *)"Allocating trace vector");
             if (trace == NULL)
                 exit(1);
         }
@@ -945,7 +945,7 @@ void LAInterface::getAlignmentB(std::vector<int> &result, int from) {
     }
 
     tmax = 1000;
-    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, "Allocating trace vector");
+    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, (char *)"Allocating trace vector");
     if (trace == NULL)
         exit(1);
     in = 0;
@@ -1006,7 +1006,7 @@ void LAInterface::getAlignmentB(std::vector<int> &result, int from) {
         Read_Overlap(input, ovl);
         if (ovl->path.tlen > tmax) {
             tmax = ((int) 1.2 * ovl->path.tlen) + 100;
-            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, "Allocating trace vector");
+            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, (char *)"Allocating trace vector");
             if (trace == NULL)
                 exit(1);
         }
@@ -1401,6 +1401,119 @@ void LAInterface::resetAlignment() {
 
 
 }
+void LAInterface::getOverlap(std::vector<LOverlap *> &result_vec, std::vector<int> &range) {
+
+	    int j;
+	    uint16 *trace;
+	    int tmax;
+	    int in, npt, idx, ar;
+	    int64 tps;
+
+	    aln->path = &(ovl->path);
+
+	    tmax = 1000;
+	    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, (char *)"Allocating trace vector");
+	    if (trace == NULL)
+	        exit(1);
+	    in = 0;
+
+	    pts = new int[range.size()*2+20];
+	    for (int k = 0; k < range.size(); k++) {
+	        pts[k*2] = range[k] + 1;
+	        pts[k*2+1] = range[k] + 1;
+	    }
+	    pts[range.size()*2] = INT32_MAX;
+
+	    npt = pts[0];
+	    idx = 1;
+
+	    //  For each record do
+
+	    for (j = 0; j < novl; j++)
+	        //  Read it in
+	    {
+	        //if (j % (novl/100) == 0) {
+	        //    printf("%d percent finished\n", j/(novl/100));
+	        //}
+	        Read_Overlap(input, ovl);
+	        if (ovl->path.tlen > tmax) {
+	            tmax = ((int) 1.2 * ovl->path.tlen) + 100;
+	            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, (char *)"Allocating trace vector");
+	            if (trace == NULL)
+	                exit(1);
+	        }
+	        ovl->path.trace = (void *) trace;
+	        Read_Trace(input, ovl, tbytes);
+	        //  Determine if it should be displayed
+
+	        ar = ovl->aread + 1;
+	        if (in) {
+	            while (ar > npt) {
+	                npt = pts[idx++];
+	                if (ar < npt) {
+	                    in = 0;
+	                    break;
+	                }
+	                npt = pts[idx++];
+	            }
+	        }
+	        else {
+	            while (ar >= npt) {
+	                npt = pts[idx++];
+	                if (ar <= npt) {
+	                    in = 1;
+	                    break;
+	                }
+	                npt = pts[idx++];
+	            }
+	        }
+	        if (!in)
+	            continue;
+
+	        aln->alen = db1->reads[ovl->aread].rlen;
+	        aln->blen = db2->reads[ovl->bread].rlen;
+	        aln->flags = ovl->flags;
+	        tps = ovl->path.tlen / 2;
+	        LOverlap *new_ovl = new LOverlap();
+
+	        if (COMP(ovl->flags))
+	        {   new_ovl->reverse_complement_match_ = 1;
+	        }
+	        else {
+	            new_ovl->reverse_complement_match_ = 0;
+	        }
+
+	        if (small)
+	            Decompress_TraceTo16(ovl);
+
+	        new_ovl->trace_pts_len = ovl->path.tlen;
+	        new_ovl->trace_pts = (uint16 *)malloc(ovl->path.tlen * sizeof(uint16));
+
+	        memcpy(new_ovl->trace_pts, ovl->path.trace, ovl->path.tlen * sizeof(uint16));
+
+	        new_ovl->read_A_id_ = ovl->aread;
+	        new_ovl->read_B_id_ = ovl->bread;
+	        new_ovl->read_A_match_start_ = ovl->path.abpos;
+			new_ovl->read_A_match_end_ = ovl->path.aepos;
+	        new_ovl->alen = aln->alen;
+	        new_ovl->blen = aln->blen;
+
+	        if (new_ovl->reverse_complement_match_ == 0) {
+	            new_ovl->read_B_match_start_ = ovl->path.bbpos;
+	            new_ovl->read_B_match_end_ = ovl->path.bepos;
+	        }
+	        else {
+	            new_ovl->read_B_match_start_ = new_ovl->blen - ovl->path.bepos;
+	            new_ovl->read_B_match_end_ = new_ovl->blen - ovl->path.bbpos;
+	        }
+
+	        new_ovl->diffs = ovl->path.diffs;
+	        new_ovl->tlen = ovl->path.tlen;
+	        new_ovl->tps = tps;
+	        result_vec.push_back(new_ovl);
+	    }
+	    free(trace);
+	}
 
 
 void LAInterface::getOverlap(std::vector<LOverlap *> &result_vec, int from, int64 to) {
@@ -1414,7 +1527,7 @@ void LAInterface::getOverlap(std::vector<LOverlap *> &result_vec, int from, int6
     aln->path = &(ovl->path);
 
     tmax = 1000;
-    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, "Allocating trace vector");
+    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, (char *)"Allocating trace vector");
     if (trace == NULL)
         exit(1);
     in = 0;
@@ -1424,6 +1537,8 @@ void LAInterface::getOverlap(std::vector<LOverlap *> &result_vec, int from, int6
     pts[1] = to + 0;
     pts[2] = INT32_MAX;
 
+    //printf("from to %d %d\n",pts[0], pts[1]);
+
     npt = pts[0];
     idx = 1;
 
@@ -1438,7 +1553,7 @@ void LAInterface::getOverlap(std::vector<LOverlap *> &result_vec, int from, int6
         Read_Overlap(input, ovl);
         if (ovl->path.tlen > tmax) {
             tmax = ((int) 1.2 * ovl->path.tlen) + 100;
-            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, "Allocating trace vector");
+            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, (char *)"Allocating trace vector");
             if (trace == NULL)
                 exit(1);
         }
@@ -1447,6 +1562,9 @@ void LAInterface::getOverlap(std::vector<LOverlap *> &result_vec, int from, int6
         //  Determine if it should be displayed
 
         ar = ovl->aread + 1;
+        //printf("ar %d\n", ar);
+
+
         if (in) {
             while (ar > npt) {
                 npt = pts[idx++];
@@ -1526,7 +1644,7 @@ void LAInterface::getOverlapw(std::vector<LOverlap *> &result_vec, int from, int
     aln->path = &(ovl->path);
 
     tmax = 1000;
-    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, "Allocating trace vector");
+    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, (char *)"Allocating trace vector");
     if (trace == NULL)
         exit(1);
     in = 0;
@@ -1550,7 +1668,7 @@ void LAInterface::getOverlapw(std::vector<LOverlap *> &result_vec, int from, int
         Read_Overlap(input, ovl);
         if (ovl->path.tlen > tmax) {
             tmax = ((int) 1.2 * ovl->path.tlen) + 100;
-            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, "Allocating trace vector");
+            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, (char *)"Allocating trace vector");
             if (trace == NULL)
                 exit(1);
         }
@@ -1671,7 +1789,7 @@ void LAInterface::getAlignment(std::vector<LAlignment *> &result_vec, int from,
 
 
     tmax = 1000;
-    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, "Allocating trace vector");
+    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, (char *)"Allocating trace vector");
     if (trace == NULL)
         exit(1);
     in = 0;
@@ -1730,7 +1848,7 @@ void LAInterface::getAlignment(std::vector<LAlignment *> &result_vec, int from,
         Read_Overlap(input, ovl);
         if (ovl->path.tlen > tmax) {
             tmax = ((int) 1.2 * ovl->path.tlen) + 100;
-            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, "Allocating trace vector");
+            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, (char *)"Allocating trace vector");
             if (trace == NULL)
                 exit(1);
         }
@@ -2068,7 +2186,7 @@ void LAInterface::getAlignment(std::vector<LAlignment *> &result_vec, std::vecto
 
 
     tmax = 1000;
-    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, "Allocating trace vector");
+    trace = (uint16 *) Malloc(sizeof(uint16) * tmax, (char *)"Allocating trace vector");
     if (trace == NULL)
         exit(1);
     in = 0;
@@ -2133,7 +2251,7 @@ void LAInterface::getAlignment(std::vector<LAlignment *> &result_vec, std::vecto
         Read_Overlap(input, ovl);
         if (ovl->path.tlen > tmax) {
             tmax = ((int) 1.2 * ovl->path.tlen) + 100;
-            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, "Allocating trace vector");
+            trace = (uint16 *) Realloc(trace, sizeof(uint16) * tmax, (char *)"Allocating trace vector");
             if (trace == NULL)
                 exit(1);
         }
@@ -2490,7 +2608,7 @@ void LAInterface::showOverlap(int from, int to) {
 	      }
 
 	    tmax  = 1000;
-	    trace = (uint16 *) Malloc(sizeof(uint16)*tmax,"Allocating trace vector");
+	    trace = (uint16 *) Malloc(sizeof(uint16)*tmax, (char*)"Allocating trace vector");
 	    if (trace == NULL)
 	      exit (1);
 
@@ -2548,7 +2666,7 @@ void LAInterface::showOverlap(int from, int to) {
 	      { Read_Overlap(input,ovl);
 	        if (ovl->path.tlen > tmax)
 	          { tmax = ((int) 1.2*ovl->path.tlen) + 100;
-	            trace = (uint16 *) Realloc(trace,sizeof(uint16)*tmax,"Allocating trace vector");
+	            trace = (uint16 *) Realloc(trace,sizeof(uint16)*tmax, (char *)"Allocating trace vector");
 	            if (trace == NULL)
 	              exit (1);
 	          }
@@ -2752,7 +2870,7 @@ static int enlarge_vector(_Work_Data *work, int newmax)
     int   max;
 
     max = ((int) (newmax*1.2)) + 10000;
-    vec = Realloc(work->vector,max,"Enlarging DP vector");
+    vec = Realloc(work->vector,max, (char *)"Enlarging DP vector");
     if (vec == NULL)
         EXIT(1);
     work->vecmax = max;
@@ -3023,7 +3141,7 @@ static int enlarge_trace(_Work_Data *work, int newmax)
     int   max;
 
     max = ((int) (newmax*1.2)) + 10000;
-    vec = Realloc(work->trace,max,"Enlarging trace vector");
+    vec = Realloc(work->trace,max,(char *)"Enlarging trace vector");
     if (vec == NULL)
         EXIT(1);
     work->tramax = max;
@@ -3471,24 +3589,24 @@ int LAInterface::showAlignmentTags(LAlignment *alignment) {
     mtag = ':';
     dtag = ':';
 
-#define COLUMN(x,y) \
+#define COLUMN2(x,y) \
     {               \
         printf(" %c-%c ",ToU[x],ToU[y]); \
     }               \
 
 
     while (prefa > prefb)
-    { COLUMN(a[i],4)
+    { COLUMN2(a[i],4)
         i += 1;
         prefa -= 1;
     }
     while (prefb > prefa)
-    { COLUMN(4,b[j])
+    { COLUMN2(4,b[j])
         j += 1;
         prefb -= 1;
     }
     while (prefa > 0)
-    { COLUMN(a[i],b[j])
+    { COLUMN2(a[i],b[j])
         i += 1;
         j += 1;
         prefa -= 1;
@@ -3496,7 +3614,7 @@ int LAInterface::showAlignmentTags(LAlignment *alignment) {
 
     mtag = '[';
     if (prefb > 0)
-    COLUMN(5,5)
+    COLUMN2(5,5)
 
     mtag  = '|';
     dtag  = '*';
@@ -3510,7 +3628,7 @@ int LAInterface::showAlignmentTags(LAlignment *alignment) {
             { p = -p;
                 //printf("%d\n",trace[c]);
                 while (i != p)
-                { COLUMN(a[i],b[j])
+                { COLUMN2(a[i],b[j])
                     if (a[i] == b[j])
                         match += 1;
                     else
@@ -3518,13 +3636,13 @@ int LAInterface::showAlignmentTags(LAlignment *alignment) {
                     i += 1;
                     j += 1;
                 }
-                COLUMN(7,b[j])
+                COLUMN2(7,b[j])
                 j += 1;
                 diff += 1;
             }
             else
             { while (j != p)
-                { COLUMN(a[i],b[j])
+                { COLUMN2(a[i],b[j])
                     if (a[i] == b[j])
                         match += 1;
                     else
@@ -3532,13 +3650,13 @@ int LAInterface::showAlignmentTags(LAlignment *alignment) {
                     i += 1;
                     j += 1;
                 }
-                COLUMN(a[i],7)
+                COLUMN2(a[i],7)
                 i += 1;
                 diff += 1;
             }
         p = alignment->aepos;
         while (i <= p)
-        { COLUMN(a[i],b[j])
+        { COLUMN2(a[i],b[j])
             if (a[i] == b[j])
                 match += 1;
             else
@@ -3552,7 +3670,7 @@ int LAInterface::showAlignmentTags(LAlignment *alignment) {
 
         mtag = ']';
         if (a[i] != 4 && b[j] != 4 && border > 0)
-        COLUMN(6,6)
+        COLUMN2(6,6)
 
         mtag = ':';
         dtag = ':';
@@ -3561,16 +3679,16 @@ int LAInterface::showAlignmentTags(LAlignment *alignment) {
         while (c < border && (a[i] != 4 || b[j] != 4))
         { if (a[i] != 4)
             if (b[j] != 4)
-            { COLUMN(a[i],b[j])
+            { COLUMN2(a[i],b[j])
                 i += 1;
                 j += 1;
             }
             else
-            { COLUMN(a[i],4)
+            { COLUMN2(a[i],4)
                 i += 1;
             }
             else
-            { COLUMN(4,b[j])
+            { COLUMN2(4,b[j])
                 j += 1;
             }
             c += 1;
@@ -3674,7 +3792,7 @@ std::pair<std::string, std::string> LAInterface::getAlignmentTags(LAlignment *al
     aa.reserve((alignment->aepos - alignment->abpos) * 2);
     bb.reserve((alignment->bepos - alignment->bbpos) * 2);
 
-#define COLUMN(x,y) \
+#define COLUMN3(x,y) \
     {               \
         aa.append(1,ToU[x]); \
         bb.append(1,ToU[y]); \
@@ -3714,7 +3832,7 @@ std::pair<std::string, std::string> LAInterface::getAlignmentTags(LAlignment *al
             { p = -p;
                 //printf("%d\n",trace[c]);
                 while (i != p)
-                { COLUMN(a[i],b[j])
+                { COLUMN3(a[i],b[j])
                     if (a[i] == b[j])
                         match += 1;
                     else
@@ -3722,13 +3840,13 @@ std::pair<std::string, std::string> LAInterface::getAlignmentTags(LAlignment *al
                     i += 1;
                     j += 1;
                 }
-                COLUMN(7,b[j])
+                COLUMN3(7,b[j])
                 j += 1;
                 diff += 1;
             }
             else
             { while (j != p)
-                { COLUMN(a[i],b[j])
+                { COLUMN3(a[i],b[j])
                     if (a[i] == b[j])
                         match += 1;
                     else
@@ -3736,13 +3854,13 @@ std::pair<std::string, std::string> LAInterface::getAlignmentTags(LAlignment *al
                     i += 1;
                     j += 1;
                 }
-                COLUMN(a[i],7)
+                COLUMN3(a[i],7)
                 i += 1;
                 diff += 1;
             }
         p = alignment->aepos;
         while (i <= p)
-        { COLUMN(a[i],b[j])
+        { COLUMN3(a[i],b[j])
             if (a[i] == b[j])
                 match += 1;
             else
@@ -3838,48 +3956,6 @@ int LAInterface::printAlignment_exp(FILE *file, LAlignment *align, Work_Data *ew
 
     Abuf[width] = Bbuf[width] = Dbuf[width] = '\0';
     /* buffer/output next column */
-#define COLUMN(x,y)							\
-{ int u, v;								\
-  if (o >= width)							\
-    { fprintf(file,"\n");						\
-      fprintf(file,"%*s",indent,"");					\
-      if (coord > 0)							\
-        { if (sa <= aend)						\
-            fprintf(file," %*d",coord,sa);				\
-          else								\
-            fprintf(file," %*s",coord,"");				\
-          fprintf(file," %s\n",Abuf);					\
-          fprintf(file,"%*s %*s %s\n",indent,"",coord,"",Dbuf);		\
-          fprintf(file,"%*s",indent,"");				\
-          if (sb <= bend)						\
-            fprintf(file," %*d",coord,sb);				\
-          else								\
-            fprintf(file," %*s",coord,"");				\
-          fprintf(file," %s",Bbuf);					\
-        }								\
-      else								\
-        { fprintf(file," %s\n",Abuf);					\
-          fprintf(file,"%*s %s\n",indent,"",Dbuf);			\
-          fprintf(file,"%*s %s",indent,"",Bbuf);			\
-        }								\
-      fprintf(file," %5.1f%%\n",(100.*diff)/(diff+match));		\
-      o  = 0;								\
-      sa = i;								\
-      sb = j;								\
-      match = diff = 0;							\
-    }									\
-  u = (x);								\
-  v = (y);								\
-  if (u == 4 || v == 4)							\
-    Dbuf[o] = ' ';							\
-  else if (u == v)							\
-    Dbuf[o] = mtag;							\
-  else									\
-    Dbuf[o] = dtag;							\
-  Abuf[o] = N2A[u];							\
-  Bbuf[o] = N2A[v];							\
-  o += 1;								\
-}
 
     a = align->aseq - 1;
     b = align->bseq - 1;
@@ -4290,7 +4366,7 @@ static int qv_map[51] =
     'Y'
   };
 
-void LAInterface::getQV(std::vector<std::vector<int> > & QV, int from, int to) {
+int LAInterface::getQV(std::vector<std::vector<int> > & QV, int from, int to) {
 	int b,e;
     b = from;
     e = to;
@@ -4303,16 +4379,16 @@ void LAInterface::getQV(std::vector<std::vector<int> > & QV, int from, int to) {
     //if (DOIQV)
       { int status, kind;
         HITS_TRACK *track;
-        status = Check_Track(db1,"qual",&kind);
+        status = Check_Track(db1, (char *)"qual",&kind);
         if (status == -2)
           { fprintf(stderr,"%s: .qual-track does not exist for this db.\n",Prog_Name);
-            exit (1);
+             return (1);
           }
         if (status == -1)
           { fprintf(stderr,"%s: .qual-track not sync'd with db.\n",Prog_Name);
-            exit (1);
+            return (1);
           }
-        track = Load_Track(db1,"qual");
+        track = Load_Track(db1, (char *)"qual");
         qv_idx = (int64 *) track->anno;
         qv_val = (uint8 *) track->data;
       }
@@ -4414,7 +4490,7 @@ void LAInterface::getQV(std::vector<std::vector<int> > & QV, int from, int to) {
               }
           }*/
 	  }
-	return;
+	return 0;
 }
 
 
@@ -4519,7 +4595,7 @@ void LOverlap::trim_overlap() {
     }
 
 
-    //printf("[%6d %6d] [%6d %6d]\n", this->eff_read_A_start_, this->eff_read_A_end_, this->eff_read_B_start_, this->eff_read_B_end_);
+    //printf("[%6d %6d] [%6d %6d]\n", this->eff_read_A_read_start_, this->eff_read_A_read_end_, this->eff_read_B_read_start_, this->eff_read_B_read_end_);
 
     //printf("[%6d %6d] [%6d %6d]\n", this->eff_read_A_match_start_, this->eff_read_A_match_end_, this->eff_read_B_match_start_, this->eff_read_B_match_end_);
 
@@ -4541,8 +4617,8 @@ void LOverlap::trim_overlap() {
         //for trace point pairs, get the first one that is in untrimmed regions for both reads
 
         for (int i = 0; i < trace_points.size(); i++) {
-            if ( (trace_points[i].first >= this->eff_read_A_start_) and
-                (trace_points[i].second >= this->eff_read_B_start_) ) {
+            if ( (trace_points[i].first >= this->eff_read_A_read_start_) and
+                (trace_points[i].second >= this->eff_read_B_read_start_) ) {
                 this->eff_read_A_match_start_ = trace_points[i].first;
                 this->eff_read_B_match_start_ = trace_points[i].second;
                 this->eff_start_trace_point_index_ = i;
@@ -4552,8 +4628,8 @@ void LOverlap::trim_overlap() {
 
         //for trace point pairs, get the last one that is in untrimmed regions for both reads
         for (int i = (int) trace_points.size() - 1; i >= 0; i--) {
-            if ((trace_points[i].first <= this->eff_read_A_end_) and
-                (trace_points[i].second <= this->eff_read_B_end_)) {
+            if ((trace_points[i].first <= this->eff_read_A_read_end_) and
+                (trace_points[i].second <= this->eff_read_B_read_end_)) {
                 this->eff_read_A_match_end_ = trace_points[i].first;
                 this->eff_read_B_match_end_ = trace_points[i].second;
                 this->eff_end_trace_point_index_ = i;
@@ -4565,8 +4641,8 @@ void LOverlap::trim_overlap() {
     else {
 
         for (int i = 0; i < trace_points.size(); i++) {
-            if ( (trace_points[i].first >= this->eff_read_A_start_) and
-                 (trace_points[i].second <= this->eff_read_B_end_) ) {
+            if ( (trace_points[i].first >= this->eff_read_A_read_start_) and
+                 (trace_points[i].second <= this->eff_read_B_read_end_) ) {
                 this->eff_read_A_match_start_ = trace_points[i].first;
                 this->eff_read_B_match_end_ = trace_points[i].second;
                 this->eff_start_trace_point_index_ = i; // "start" with respect to A
@@ -4575,8 +4651,8 @@ void LOverlap::trim_overlap() {
         }
 
         for (int i = (int) trace_points.size() - 1; i >= 0; i--) {
-            if ((trace_points[i].first <= this->eff_read_A_end_) and
-                (trace_points[i].second >= this->eff_read_B_start_)) {
+            if ((trace_points[i].first <= this->eff_read_A_read_end_) and
+                (trace_points[i].second >= this->eff_read_B_read_start_)) {
                 this->eff_read_A_match_end_ = trace_points[i].first;
                 this->eff_read_B_match_start_ = trace_points[i].second;
                 this->eff_end_trace_point_index_ = i;
@@ -4593,10 +4669,10 @@ void LOverlap::trim_overlap() {
 
     /*printf("[%6d %6d] [%6d %6d]\n", this->eff_read_A_match_start_, this->eff_read_A_match_end_, this->eff_read_B_match_start_, this->eff_read_B_match_end_);
 
-    int overhang_read_A_left = this->eff_read_A_match_start_ - this->eff_read_A_start_;
-    int overhang_read_A_right = this->eff_read_A_end_ - this->eff_read_A_match_end_;
-    int overhang_read_B_left = this->eff_read_B_match_start_ - this->eff_read_B_start_;
-    int overhang_read_B_right = this->eff_read_B_end_ - this->eff_read_B_match_end_;
+    int overhang_read_A_left = this->eff_read_A_match_start_ - this->eff_read_A_read_start_;
+    int overhang_read_A_right = this->eff_read_A_read_end_ - this->eff_read_A_match_end_;
+    int overhang_read_B_left = this->eff_read_B_match_start_ - this->eff_read_B_read_start_;
+    int overhang_read_B_right = this->eff_read_B_read_end_ - this->eff_read_B_match_end_;
 
     printf("trim A_left %6d, A_right %6d, B_left %6d, B_right %6d\n",
            overhang_read_A_left, overhang_read_A_right,
@@ -4608,10 +4684,10 @@ void LOverlap::trim_overlap() {
 
 
 void LOverlap::TrimOverlapNaive(){
-    this->eff_read_B_match_start_ = std::max (this->read_B_match_start_,this->eff_read_B_start_);
-    this->eff_read_B_match_end_ = std::min (this->read_B_match_end_,this->eff_read_B_end_);
-    this->eff_read_A_match_start_ = std::max (this->read_A_match_start_,this->eff_read_A_start_);
-    this->eff_read_A_match_end_ = std::min (this->read_A_match_end_,this->eff_read_A_end_);;
+    this->eff_read_B_match_start_ = std::max (this->read_B_match_start_,this->eff_read_B_read_start_);
+    this->eff_read_B_match_end_ = std::min (this->read_B_match_end_,this->eff_read_B_read_end_);
+    this->eff_read_A_match_start_ = std::max (this->read_A_match_start_,this->eff_read_A_read_start_);
+    this->eff_read_A_match_end_ = std::min (this->read_A_match_end_,this->eff_read_A_read_end_);;
 }
 
 
@@ -4623,21 +4699,21 @@ void LOverlap::addtype(int max_overhang) {
         it is based on effective positions, rather than positions
      */
 
-    int overhang = std::min(this->eff_read_A_match_start_ - this->eff_read_A_start_, this->eff_read_B_match_start_ - this->eff_read_B_start_) + std::min(this->eff_read_A_end_ - this->eff_read_A_match_end_, this->eff_read_B_end_ - this->eff_read_B_match_end_);
+    int overhang = std::min(this->eff_read_A_match_start_ - this->eff_read_A_read_start_, this->eff_read_B_match_start_ - this->eff_read_B_read_start_) + std::min(this->eff_read_A_read_end_ - this->eff_read_A_match_end_, this->eff_read_B_read_end_ - this->eff_read_B_match_end_);
 
     //int tol = 0;
     if (overhang > max_overhang)
         this->match_type_ = INTERNAL;
-    else if ((this->eff_read_A_match_start_ - this->eff_read_A_start_ <= this->eff_read_B_match_start_ - this->eff_read_B_start_) and (this->eff_read_A_end_ - this->eff_read_A_match_end_ <= this->eff_read_B_end_ - this->eff_read_B_match_end_))
+    else if ((this->eff_read_A_match_start_ - this->eff_read_A_read_start_ <= this->eff_read_B_match_start_ - this->eff_read_B_read_start_) and (this->eff_read_A_read_end_ - this->eff_read_A_match_end_ <= this->eff_read_B_read_end_ - this->eff_read_B_match_end_))
         this->match_type_ = BCOVERA;
-    else if ((this->eff_read_A_match_start_ - this->eff_read_A_start_ >= this->eff_read_B_match_start_ - this->eff_read_B_start_) and (this->eff_read_A_end_ - this->eff_read_A_match_end_ >= this->eff_read_B_end_ - this->eff_read_B_match_end_))
+    else if ((this->eff_read_A_match_start_ - this->eff_read_A_read_start_ >= this->eff_read_B_match_start_ - this->eff_read_B_read_start_) and (this->eff_read_A_read_end_ - this->eff_read_A_match_end_ >= this->eff_read_B_read_end_ - this->eff_read_B_match_end_))
         this->match_type_ = ACOVERB;
-    else if (this->eff_read_A_match_start_ - this->eff_read_A_start_ > this->eff_read_B_match_start_ - this->eff_read_B_start_) {
-        if ((this->eff_read_B_end_ - this->eff_read_B_match_end_ > 0) and (this->eff_read_A_match_start_ - this->eff_read_A_start_ > 0))
+    else if (this->eff_read_A_match_start_ - this->eff_read_A_read_start_ > this->eff_read_B_match_start_ - this->eff_read_B_read_start_) {
+        if ((this->eff_read_B_read_end_ - this->eff_read_B_match_end_ > 0) and (this->eff_read_A_match_start_ - this->eff_read_A_read_start_ > 0))
             this->match_type_ = FORWARD;
     }
     else {
-        if ((this->eff_read_B_match_start_ - this->eff_read_B_start_ > 0) and (this->eff_read_A_end_ - this->eff_read_A_match_end_ > 0))
+        if ((this->eff_read_B_match_start_ - this->eff_read_B_read_start_ > 0) and (this->eff_read_A_read_end_ - this->eff_read_A_match_end_ > 0))
             this->match_type_ = BACKWARD;
     }
 }
@@ -4647,10 +4723,10 @@ void LOverlap::AddTypesAsymmetric(int max_overhang, int min_overhang) {
     //The function sets the class variable match_type_ according to the relative positions of the reads.
     //Possible things it can set to are:
     // BCOVERA, ACOVERB, INTERNAL, FORWARD, FORWARD_INTERNAL, BACKWARD, BACKWARD_INTERNAL
-    int overhang_read_A_left = this->eff_read_A_match_start_ - this->eff_read_A_start_;
-    int overhang_read_A_right = this->eff_read_A_end_ - this->eff_read_A_match_end_;
-    int overhang_read_B_left = this->eff_read_B_match_start_ - this->eff_read_B_start_;
-    int overhang_read_B_right = this->eff_read_B_end_ - this->eff_read_B_match_end_;
+    int overhang_read_A_left = this->eff_read_A_match_start_ - this->eff_read_A_read_start_;
+    int overhang_read_A_right = this->eff_read_A_read_end_ - this->eff_read_A_match_end_;
+    int overhang_read_B_left = this->eff_read_B_match_start_ - this->eff_read_B_read_start_;
+    int overhang_read_B_right = this->eff_read_B_read_end_ - this->eff_read_B_match_end_;
 
 
     //printf("     A_left %6d, A_right %6d, B_left %6d, B_right %6d\n",
@@ -4659,8 +4735,8 @@ void LOverlap::AddTypesAsymmetric(int max_overhang, int min_overhang) {
 
     if (this->reverse_complement_match_ == 1) {
         //Exchange overhang left and right of read B if match is reverse complement
-        overhang_read_B_left = this->eff_read_B_end_ - this->eff_read_B_match_end_;
-        overhang_read_B_right = this->eff_read_B_match_start_ - this->eff_read_B_start_;
+        overhang_read_B_left = this->eff_read_B_read_end_ - this->eff_read_B_match_end_;
+        overhang_read_B_right = this->eff_read_B_match_start_ - this->eff_read_B_read_start_;
     }
 
 
@@ -4709,15 +4785,15 @@ void LOverlap::AddTypesAsymmetric(int max_overhang, int min_overhang) {
     ofs <<  "===============================================\n"
     << "Read A id "<< std::setfill('0') << std::setw(5) <<this->read_A_id_
     << "\nRead B id "  << std::setfill('0') << std::setw(5) << this->read_B_id_
-    << "\nRead A eff start "<< std::setfill('0') << std::setw(5)  << this->eff_read_A_start_
-    << " Read A eff end "<< std::setfill('0') << std::setw(5)  << this->eff_read_A_end_
+    << "\nRead A eff start "<< std::setfill('0') << std::setw(5)  << this->eff_read_A_read_start_
+    << " Read A eff end "<< std::setfill('0') << std::setw(5)  << this->eff_read_A_read_end_
     << " Read A length " << std::setfill('0') << std::setw(5)  << this->alen
     << " Read A match start "<< std::setfill('0') << std::setw(5) <<  this->read_A_match_start_
     << " Read A eff match start " << std::setfill('0') << std::setw(5) <<  this->eff_read_A_match_start_
     << " Read A match end " << std::setfill('0') << std::setw(5)  << this->read_A_match_end_
     << " Read A eff match end " << std::setfill('0') << std::setw(5)  << this->eff_read_A_match_end_
-    << "\nRead B eff start "  << std::setfill('0') << std::setw(5) << this->eff_read_B_start_
-    << " Read B eff end " << std::setfill('0') << std::setw(5)  << this->eff_read_B_end_
+    << "\nRead B eff start "  << std::setfill('0') << std::setw(5) << this->eff_read_B_read_start_
+    << " Read B eff end " << std::setfill('0') << std::setw(5)  << this->eff_read_B_read_end_
     << " Read B length " << std::setfill('0') << std::setw(5)  << this->blen
     << " Read B match start "<< std::setfill('0') << std::setw(5) <<  this->read_B_match_start_
     << " Read B eff match start " << std::setfill('0') << std::setw(5) <<  this->eff_read_B_match_start_
@@ -4792,3 +4868,60 @@ int LAInterface::loadFASTA(std::string filename, std::vector<Read *> & reads) {
     gzclose(fp); // STEP 6: close the file handler
     return num;
 }
+
+
+
+
+bool pairAscend(const std::pair<int, int>& firstElem,  const std::pair<int, int>& secondElem) {
+    return firstElem.first < secondElem.first;
+}
+
+bool pairDescend(const std::pair<int, int>& firstElem,  const std::pair<int, int>& secondElem) {
+    return firstElem.first > secondElem.first;
+}
+
+
+bool compare_overlap(LOverlap * ovl1, LOverlap * ovl2) {
+    //Returns True if the sum of the match lengths of the two reads in ovl1 > the sum of the  overlap lengths of the two reads in ovl2
+    //Returns False otherwise.
+    return ((ovl1->read_A_match_end_ - ovl1->read_A_match_start_ + ovl1->read_B_match_end_ - ovl1->read_B_match_start_)
+            > (ovl2->read_A_match_end_ - ovl2->read_A_match_start_ + ovl2->read_B_match_end_ - ovl2->read_B_match_start_));
+}
+
+bool compare_sum_overlaps(const std::vector<LOverlap * > * ovl1, const std::vector<LOverlap *> * ovl2) {
+    //Returns True if the sum of matches over both reads for overlaps in ovl1  > sum of matches over both reads for overlaps in ovl2
+    //Returns False otherwise
+    int sum1 = 0;
+    int sum2 = 0;
+    for (int i = 0; i < ovl1->size(); i++)
+        sum1 += (*ovl1)[i]->read_A_match_end_ - (*ovl1)[i]->read_A_match_start_ +
+                (*ovl1)[i]->read_B_match_end_ - (*ovl1)[i]->read_B_match_start_;
+    for (int i = 0; i < ovl2->size(); i++)
+        sum2 += (*ovl2)[i]->read_A_match_end_ - (*ovl2)[i]->read_A_match_start_ +
+                (*ovl2)[i]->read_B_match_end_ - (*ovl2)[i]->read_B_match_start_;
+    return sum1 > sum2;
+}
+
+bool compare_pos(LOverlap * ovl1, LOverlap * ovl2) {
+    //True if ovl1 starts earlier than ovl2 on read a.
+    return (ovl1->read_A_match_start_) > (ovl2->read_A_match_start_);
+}
+
+bool compare_overlap_abpos(LOverlap * ovl1, LOverlap * ovl2) {
+    //True if ovl2 starts earlier than ovl1 on read a.
+    //flips the two argumenst in compare_pos
+    return ovl1->read_A_match_start_ < ovl2->read_A_match_start_;
+}
+
+bool compare_overlap_aepos(LOverlap * ovl1, LOverlap * ovl2) {
+    //Same as compare_pos?
+    return ovl1->read_A_match_start_ > ovl2->read_A_match_start_;
+}
+
+bool compare_overlap_weight(LOverlap * ovl1, LOverlap * ovl2) {
+    return (ovl1->weight > ovl2->weight);
+}
+
+bool compare_overlap_aln(LAlignment * ovl1, LAlignment * ovl2) {
+    return ((ovl1->aepos - ovl1->abpos + ovl1->bepos - ovl1->bbpos) > (ovl2->aepos - ovl2->abpos + ovl2->bepos - ovl2->bbpos));
+}
diff --git a/src/maximal/CMakeLists.txt b/src/maximal/CMakeLists.txt
new file mode 100644
index 0000000..8c74125
--- /dev/null
+++ b/src/maximal/CMakeLists.txt
@@ -0,0 +1,6 @@
+cmake_minimum_required(VERSION 3.2)
+
+add_executable(get_maximal_reads maximal)
+target_link_libraries(get_maximal_reads LAInterface ini spdlog)
+
+install(TARGETS get_maximal_reads DESTINATION ${libexec})
diff --git a/src/maximal/maximal.cpp b/src/maximal/maximal.cpp
new file mode 100644
index 0000000..7850fc5
--- /dev/null
+++ b/src/maximal/maximal.cpp
@@ -0,0 +1,860 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unordered_map>
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <tuple>
+#include <random>
+#include <omp.h>
+#include <time.h>
+#include <glob.h>
+
+
+#include "INIReader.h"
+#include "spdlog/spdlog.h"
+#include "DB.h"
+#include "align.h"
+#include "LAInterface.h"
+#include "cmdline.h"
+
+std::string lastN(std::string input, int n)
+{
+    return input.substr(input.size() - n);
+}
+
+inline std::vector<std::string> glob(const std::string& pat){
+    using namespace std;
+    glob_t glob_result;
+    int i = 1;
+    std::string search_name;
+    search_name = pat + "."+std::to_string(i)+".las";
+    std::cout << search_name << endl;
+    glob(search_name.c_str(),GLOB_TILDE,NULL,&glob_result);
+//    std::cout << "Number of files " << glob_result.gl_pathc << std::endl;
+
+    vector<string> ret;
+
+
+    while (glob_result.gl_pathc != 0){
+        ret.push_back(string(glob_result.gl_pathv[0]));
+        i ++;
+        search_name = pat + "."+std::to_string(i)+".las";
+        glob(search_name.c_str(),GLOB_TILDE,NULL,&glob_result);
+//        std::cout << "Number of files " << glob_result.gl_pathc << std::endl;
+    }
+
+    std::cout << "-------------------------"<< std::endl;
+    std::cout << "Number of files " << i-1 << std::endl;
+    std::cout << "Input string " << pat.c_str() << std::endl;
+    std::cout << "-------------------------"<< std::endl;
+
+    globfree(&glob_result);
+    return ret;
+}
+
+bool ProcessAlignment(LOverlap * match, Read * read_A, Read * read_B, int ALN_THRESHOLD,
+                      int THETA, int THETA2, bool trim){
+    //Function takes as input pointers to a match, and the read_A and read_B of that match, set constants
+    //ALN_THRESHOLD and THETA
+    //It inputs the effective read start and end into the match class object
+    //Next it trims match
+    //Finally it figures out the type of match we have here by calling AddTypesAsymmetric() on the
+    //class object
+    //std::cout<<" In ProcessAlignment"<<std::endl;
+    bool contained=false;
+    match->eff_read_A_read_start_ = read_A->effective_start;
+    match->eff_read_A_read_end_ = read_A->effective_end;
+
+    // removed the following if, so that things agree with the convention for reverse complement matches
+
+    match->eff_read_B_read_start_ = read_B->effective_start;
+    match->eff_read_B_read_end_ = read_B->effective_end;
+
+//    if (match->reverse_complement_match_ == 0) {
+//        match->eff_read_B_read_start_ = read_B->effective_start;
+//        match->eff_read_B_read_end_ = read_B->effective_end;
+//    } else {
+//        match->eff_read_B_read_start_ = read_B->len - read_B->effective_end;
+//        match->eff_read_B_read_end_ = read_B->len - read_B->effective_start;
+//    }
+
+    /*printf("bef %d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n", match->read_A_id_, match->read_B_id_,
+     * match->reverse_complement_match_,
+        match->read_A_match_start_, match->read_A_match_end_, match->read_B_match_start_, match->read_B_match_end_,
+           match->eff_read_A_read_start_, match->eff_read_A_read_end_, match->eff_read_B_read_start_, match->eff_read_B_read_end_
+    );*/
+
+    if (trim)
+        match->trim_overlap();
+    else {
+        match->eff_read_B_match_start_ = match->read_B_match_start_;
+        match->eff_read_B_match_end_ = match->read_B_match_end_;
+        match->eff_read_A_match_start_ = match->read_A_match_start_;
+        match->eff_read_A_match_end_ = match->read_A_match_end_;
+    }
+    /*printf("aft %d %d %d [%d %d] [%d %d] [%d %d] [%d %d]\n", match->read_A_id_, match->read_B_id_,
+     * match->reverse_complement_match_,
+           match->eff_read_A_match_start_, match->eff_read_A_match_end_, match->eff_read_B_match_start_,
+           match->eff_read_B_match_end_,
+           match->eff_read_A_read_start_, match->eff_read_A_read_end_, match->eff_read_B_read_start_, match->eff_read_B_read_end_
+    );*/
+    //std::cout<< contained<<std::endl;
+    if (((match->eff_read_B_match_end_ - match->eff_read_B_match_start_) < ALN_THRESHOLD)
+        or ((match->eff_read_A_match_end_ - match->eff_read_A_match_start_) < ALN_THRESHOLD) or (!match->active))
+
+    {
+        match->active = false;
+        match->match_type_ = NOT_ACTIVE;
+    } else {
+        match->AddTypesAsymmetric(THETA,THETA2);
+        if (match->match_type_ == BCOVERA) {
+            contained = true;
+        }
+        //std::cout<< contained<< std::endl;
+    }
+
+    match->weight =
+            match->eff_read_A_match_end_ - match->eff_read_A_match_start_
+            + match->eff_read_B_match_end_ - match->eff_read_B_match_start_;
+
+    match->length = match->read_A_match_end_ - match->read_A_match_start_
+                    + match->read_B_match_end_ - match->read_B_match_start_;
+
+    return contained;
+}
+
+std::vector<std::pair<int,int>> Merge(std::vector<LOverlap *> & intervals, int cutoff)
+//Returns sections of read a which are covered by overlaps. Each overlap is considered as
+// <start_pos+cutoff,end_pos-cutoff>.
+{
+    //std::cout<<"Merge"<<std::endl;
+    std::vector<std::pair<int, int > > ret;
+    int n = intervals.size(); // Length of the vector intervals
+    if (n == 0) return ret;
+
+    if(n == 1) {
+        ret.push_back(std::pair<int,int>(intervals[0]->read_A_match_start_, intervals[0]->read_A_match_end_));
+        return ret;
+    }
+
+    //Where is sort defined ? Is this std::sort?
+    sort(intervals.begin(),intervals.end(),compare_overlap_abpos); //sort according to left (start position of
+    // overlap beginning on a)
+
+    int left= intervals[0]->read_A_match_start_ + cutoff, right = intervals[0]->read_A_match_end_ - cutoff;
+    //left, right means maximal possible interval now
+
+    for(int i = 1; i < n; i++) {
+        //Ovl1 ~ Ovl2 if Ovl1 and Ovl2 have a nonzero intersection. (that is both the b read maps
+        // to the same position on the a read)
+        //This defines a chain of  connected overlaps. This for loop returns a a vector ret which
+        // is a pair of <start of connected overlaps, end of connected overlaps>
+        if(intervals[i]->read_A_match_start_ + cutoff <= right)
+        {
+            right=std::max(right, intervals[i]->read_A_match_end_ - cutoff);
+        }
+        else
+        {
+            ret.push_back(std::pair<int, int>(left,right));
+            left = intervals[i]->read_A_match_start_ + cutoff;
+            right = intervals[i]->read_A_match_end_ - cutoff;
+        }
+    }
+    ret.push_back(std::pair<int, int>(left,right));
+    return ret;
+}
+
+//Interval = pair<int, int>. Defined in LAInterface.h
+Interval Effective_length(std::vector<LOverlap *> & intervals, int min_cov) {
+//Returns <start_pos, end_pos>
+//start_pos : the first position at which Read a of the overlaps have at least min_cov matches on it.
+//end_pos : the last position that the  (#overlaps- min_cov)th read (in order of start positions ends).
+//Should compare_overlap_aepos actually compare read_A_match_end_? If that is done, then the end_pos
+// will be the last position
+// on the a read so that all positions beyond have less than min_cov matches on them
+    Interval ret;
+    sort(intervals.begin(),intervals.end(),compare_overlap_abpos); //sort according to left
+
+    if (intervals.size() > min_cov) {
+        ret.first = intervals[min_cov]->read_A_match_start_;
+    } else
+        ret.first = 0;
+    sort(intervals.begin(),intervals.end(),compare_overlap_aepos); //sort according to left
+    if (intervals.size() > min_cov) {
+        ret.second = intervals[min_cov]->read_A_match_end_;
+    } else
+        ret.second = 0;
+    return ret;
+}
+
+bool bridge(LOverlap* ovl, int s, int e){
+    //Returns True if [s e] on read a is bridged by ovl. False else.
+    //Put 500 in a typedef perhaps?
+    return ((ovl->read_A_match_start_ < s - 500) and (ovl->read_A_match_end_ > e + 500));
+}
+
+float number_of_bridging_reads(std::vector<LOverlap *> ovl_reads, int hinge_location, int hinge_type,int threshold){
+    int num_bridging_reads=0;
+    //int threshold=100;
+    std::vector<int> read_ends;
+    if (hinge_type==1){
+        for (int i=0; i < ovl_reads.size(); i++){
+            if ((ovl_reads[i]->read_A_match_start_ > hinge_location-threshold ) and
+                (ovl_reads[i]->read_A_match_start_ < hinge_location+threshold ))
+                read_ends.push_back(ovl_reads[i]->read_A_match_end_);
+        }
+    }
+    else if (hinge_type==-1){
+        for (int i=0; i < ovl_reads.size(); i++){
+            if ((ovl_reads[i]->read_A_match_end_ > hinge_location-threshold ) and
+                (ovl_reads[i]->read_A_match_end_ < hinge_location+threshold ))
+                read_ends.push_back(ovl_reads[i]->read_A_match_start_);
+        }
+    }
+    std::sort(read_ends.begin(),read_ends.end(), std::greater<int>());
+    int start_point=0;
+    int num_bins=0;
+    for (int i=0; i<read_ends.size(); i++) {
+        std::cout << hinge_location <<"\t"<< read_ends[i]<< std::endl;
+        if (read_ends[start_point] - read_ends[i] > 2 * threshold) {
+            num_bins++;
+            start_point = i;
+        }
+    }
+    return num_bins/((float)1);
+}
+
+
+int main(int argc, char *argv[]) {
+
+    mkdir("log",S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+
+    cmdline::parser cmdp;
+    cmdp.add<std::string>("db", 'b', "db file name", false, "");
+    cmdp.add<std::string>("las", 'l', "las file name", false, "");
+    cmdp.add<std::string>("paf", 'p', "paf file name", false, "");
+    cmdp.add<std::string>("config", 'c', "configuration file name", false, "");
+    cmdp.add<std::string>("fasta", 'f', "fasta file name", false, "");
+    cmdp.add<std::string>("prefix", 'x', "prefix of (intermediate) output", false, "out");
+    cmdp.add<std::string>("restrictreads",'r',"restrict to reads in the file",false,"");
+    cmdp.add<std::string>("log", 'g', "log folder name", false, "log");
+    cmdp.add("mlas", '\0', "multiple las files");
+    cmdp.add("debug", '\0', "debug mode");
+    cmdp.parse_check(argc, argv);
+
+    LAInterface la;
+    const char * name_db = cmdp.get<std::string>("db").c_str(); //.db file of reads to load
+    const char * name_las_base = cmdp.get<std::string>("las").c_str();//.las file of alignments
+    const char * name_paf = cmdp.get<std::string>("paf").c_str();
+    const char * name_fasta = cmdp.get<std::string>("fasta").c_str();
+    const char * name_config = cmdp.get<std::string>("config").c_str();//name of the configuration file, in INI format
+    std::string out = cmdp.get<std::string>("prefix");
+    bool has_qv = true;
+    const char * name_restrict = cmdp.get<std::string>("restrictreads").c_str();
+    std::string name_mask = out + ".mas";
+
+    std::string name_las_string;
+    if (cmdp.exist("mlas"))
+        name_las_string =  std::string(name_las_base);
+    else {
+        if (lastN(std::string(name_las_base), 4) == ".las")
+            name_las_string = std::string(name_las_base);
+        else
+            name_las_string = std::string(name_las_base) + ".las";
+    }
+
+
+    const char * name_las = name_las_string.c_str();
+    /**
+     * There are two sets of input, the first is db+las, which corresponds to daligner as an overlapper,
+     * the other is fasta + paf, which corresponds to minimap as an overlapper.
+     */
+
+    namespace spd = spdlog;
+
+    //auto console = spd::stdout_logger_mt("console",true);
+
+    std::vector<spdlog::sink_ptr> sinks;
+    sinks.push_back(std::make_shared<spdlog::sinks::stdout_sink_st>());
+    sinks.push_back(std::make_shared<spdlog::sinks::daily_file_sink_st>(cmdp.get<std::string>("log") + "/log", "txt", 23, 59));
+    auto console = std::make_shared<spdlog::logger>("log", begin(sinks), end(sinks));
+    spdlog::register_logger(console);
+    //auto console = std::make_shared<spdlog::logger>("name", begin(sinks), end(sinks));
+
+
+    console->info("Getting maximal reads");
+
+
+    console->info("name of db: {}, name of .las file {}", name_db, name_las);
+    console->info("name of fasta: {}, name of .paf file {}", name_fasta, name_paf);
+
+
+    std::ifstream ini_file(name_config);
+    std::string str((std::istreambuf_iterator<char>(ini_file)),
+                    std::istreambuf_iterator<char>());
+
+    console->info("Parameters passed in \n{}", str);
+
+    if (strlen(name_db) > 0)
+        la.openDB(name_db);
+
+
+    std::vector<std::string> name_las_list;
+    std::string name_las_str(name_las);
+    console->info("Las files: {}", name_las_str);
+    if (cmdp.exist("mlas")) {
+        console->info("Calling glob.");
+        name_las_list = glob(name_las_str);
+    }
+    else
+        name_las_list.push_back(name_las_str);
+
+
+
+
+    int n_read;
+    if (strlen(name_db) > 0)
+        n_read = la.getReadNumber();
+
+    std::vector<Read *> reads; //Vector of pointers to all reads
+
+    if (strlen(name_fasta) > 0) {
+        n_read = la.loadFASTA(name_fasta,reads);
+        has_qv = false;
+    }
+
+
+    console->info("# Reads: {}", n_read); // output some statistics
+
+
+
+
+    std::vector<std::vector<int>>  QV;
+
+    if (strlen(name_db) > 0) {
+        la.getRead(reads,0,n_read);
+        if (la.getQV(QV,0,n_read) != 0) // load QV track from .db file
+            has_qv = false;
+    }
+
+
+
+    if (has_qv)
+        for (int i = 0; i < n_read; i++) {
+            for (int j = 0; j < QV[i].size(); j++) QV[i][j] = int(QV[i][j] < 40);
+        }
+    //Binarize QV vector, 40 is the threshold
+    std::set<int> reads_to_keep, reads_to_keep_initial;
+    char * line = NULL;
+    size_t len = 0;
+    if (strlen(name_restrict) > 0){
+        FILE * restrict_reads;
+        restrict_reads = fopen(name_restrict, "r");
+        while (getline(&line, &len, restrict_reads) != -1){
+            std::stringstream ss;
+            ss.clear();
+            ss << line;
+            int num;
+            ss >> num;
+            reads_to_keep.insert(num);
+        }
+        fclose(restrict_reads);
+        console->info("Reads to debug loaded from: {}", name_restrict);
+        console->info("Number of reads to debug loaded: {}", reads_to_keep.size());
+    }
+    else
+        console->info("No debug restrictions.");
+
+
+
+    if (strlen(name_las_list[0].c_str()) > 0)
+        la.openAlignmentFile(name_las_list[0]); // get tspace
+
+    std::vector<std::pair<int, int> > QV_mask(n_read);
+    // QV_mask is the mask based on QV for reads, for each read, it has one pair [start, end]
+
+    if (has_qv) {
+        for (int i = 0; i < n_read; i++) {
+            int s = 0, e = 0;
+            int max = 0, maxs = s, maxe = e;
+
+            for (int j = 0; j < QV[i].size(); j++) {
+                if ((QV[i][j] == 1) and (j<QV[i].size() - 1)) {
+                    e ++;
+                }
+                else {
+                    if (e - s > max) {
+                        maxe = e ; maxs = s;
+                        max = e - s;
+                    }
+
+                    s = j+1;
+                    e = j+1;
+                }
+            }
+            // get the longest consecutive region that has good QV
+            //printf("maxs %d maxe %d size%d\n",maxs, maxe,QV[i].size());
+
+            QV_mask[i] = (std::pair<int, int>(maxs*la.tspace, maxe*la.tspace));
+            // tspace the the interval of trace points
+            // create mask by QV
+        }
+    }
+
+    INIReader reader(name_config);
+    if (reader.ParseError() < 0) {
+        console->warn("Can't load {}", name_config);
+        return 1;
+    }
+
+    int LENGTH_THRESHOLD = reader.GetInteger("filter", "length_threshold", -1);
+    double QUALITY_THRESHOLD = reader.GetReal("filter", "quality_threshold", 0.0);
+    int N_ITER = reader.GetInteger("filter", "n_iter", -1);
+    int ALN_THRESHOLD = reader.GetInteger("filter", "aln_threshold", -1);
+    int MIN_COV = reader.GetInteger("filter", "min_cov", -1);
+    int CUT_OFF = reader.GetInteger("filter", "cut_off", -1);
+    int THETA = reader.GetInteger("filter", "theta", -1);
+    int THETA2 = (int) reader.GetInteger("filter", "theta2", 0);
+    int N_PROC = reader.GetInteger("running", "n_proc", 4);
+    int EST_COV = reader.GetInteger("filter", "ec", 0); // load the estimated coverage (probably from other programs) from ini file, if it is zero, then estimate it
+    int reso = 40; // resolution of masks, repeat annotation, coverage, etc  = 40 basepairs
+    bool use_qv_mask = reader.GetBoolean("filter", "use_qv", true);
+    bool use_coverage_mask = reader.GetBoolean("filter", "coverage", true);
+    int COVERAGE_FRACTION = (int) reader.GetInteger("filter", "coverage_frac_repeat_annotation", 3);
+    const int MIN_REPEAT_ANNOTATION_THRESHOLD = (int) reader.GetInteger("filter", "min_repeat_annotation_threshold", 10);
+    const int MAX_REPEAT_ANNOTATION_THRESHOLD = (int) reader.GetInteger("filter", "max_repeat_annotation_threshold", 20);
+    const int REPEAT_ANNOTATION_GAP_THRESHOLD = (int) reader.GetInteger("filter", "repeat_annotation_gap_threshold",300);
+    //How far two hinges of the same type can be
+    const int NO_HINGE_REGION = (int) reader.GetInteger("filter", "no_hinge_region",500);
+    const int HINGE_MIN_SUPPORT = (int) reader.GetInteger("filter", "hinge_min_support", 7);
+    //Minimum number of reads that have to start in a reso length interval to be considered in hinge calling
+    const int HINGE_BIN_PILEUP_THRESHOLD = (int) reader.GetInteger("filter", "hinge_min_pileup", 7);
+    //Minimum number of reads to have in a pileup to consider a hinge bridged
+    const int HINGE_READ_UNBRIDGED_THRESHOLD = (int) reader.GetInteger("filter", "hinge_unbridged", 6);
+    //Number of reads that one has to see before a pileup to declare a potential hinge unbridged
+    int HINGE_BIN_LENGTH = (int) reader.GetInteger("filter", "hinge_bin", 100);
+    //Physical length of the bins considered
+    const int HINGE_TOLERANCE_LENGTH = (int) reader.GetInteger("filter", "hinge_tolerance_length", 100);
+    bool USE_TWO_MATCHES = (int) reader.GetInteger("layout", "use_two_matches", 1);
+
+    //Reads starting at +/- HINGE_TOLERANCE_LENGTH are considered reads starting at hinges
+    HINGE_BIN_LENGTH=2*HINGE_TOLERANCE_LENGTH;
+
+    console->info("use_qv_mask set to {}",use_qv_mask);
+    use_qv_mask = use_qv_mask and has_qv;
+
+    console->info("use_qv_mask set to {}",use_qv_mask);
+
+    omp_set_num_threads(N_PROC);
+    console->info("number processes set to {}", N_PROC);
+
+    console->info("LENGTH_THRESHOLD = {}",LENGTH_THRESHOLD);
+    console->info("QUALITY_THRESHOLD = {}",QUALITY_THRESHOLD);
+    console->info("N_ITER = {}",N_ITER);
+    console->info("ALN_THRESHOLD = {}",ALN_THRESHOLD);
+    console->info("MIN_COV = {}",MIN_COV);
+    console->info("CUT_OFF = {}",CUT_OFF);
+    console->info("THETA = {}",THETA);
+    console->info("EST_COV = {}",EST_COV);
+    console->info("reso = {}",reso);
+    console->info("use_coverage_mask = {}",use_coverage_mask);
+    console->info("COVERAGE_FRACTION = {}",COVERAGE_FRACTION);
+    console->info("MIN_REPEAT_ANNOTATION_THRESHOLD = {}",MIN_REPEAT_ANNOTATION_THRESHOLD);
+    console->info("MAX_REPEAT_ANNOTATION_THRESHOLD = {}",MAX_REPEAT_ANNOTATION_THRESHOLD);
+    console->info("REPEAT_ANNOTATION_GAP_THRESHOLD = {}",REPEAT_ANNOTATION_GAP_THRESHOLD);
+    console->info("NO_HINGE_REGION = {}",NO_HINGE_REGION);
+    console->info("HINGE_MIN_SUPPORT = {}",HINGE_MIN_SUPPORT);
+    console->info("HINGE_BIN_PILEUP_THRESHOLD = {}",HINGE_BIN_PILEUP_THRESHOLD);
+    console->info("HINGE_READ_UNBRIDGED_THRESHOLD = {}",HINGE_READ_UNBRIDGED_THRESHOLD);
+    console->info("HINGE_BIN_LENGTH = {}",HINGE_BIN_LENGTH);
+    console->info("HINGE_TOLERANCE_LENGTH = {}",HINGE_TOLERANCE_LENGTH);
+
+
+
+
+    std::vector<LOverlap *> aln;//Vector of pointers to all alignments
+    std::vector< std::vector<std::pair<int, int> > > coverages(n_read);
+    std::vector< std::vector<std::pair<int, int> > > cutoff_coverages(n_read);
+    std::vector< std::vector<std::pair<int, int> > > cgs(n_read); //coverage gradient;
+    std::vector<std::pair<int, int>> maskvec;
+    std::vector<std::vector<std::pair<int, int> > > repeat_annotation;
+    std::unordered_map<int, std::vector<std::pair<int, int>> > hinges;
+
+
+    std::ofstream cov(out + ".coverage.txt");
+    std::ofstream homo(out + ".homologous.txt");
+    std::ofstream filtered(out + ".filtered.fasta");
+    std::ofstream contained_out(out + ".contained.txt");
+    std::ofstream maximal_reads(out + ".max");
+
+
+    FILE *mask_file;
+    mask_file = fopen(name_mask.c_str(), "r");
+    int read, rs, re;
+
+    while (fscanf(mask_file, "%d %d %d", &read, &rs, &re) != EOF) {
+        reads[read]->effective_start = rs;
+        reads[read]->effective_end = re;
+    }
+    console->info("read mask finished");
+
+    int num_active_read = 0;
+    for (int i = 0; i < n_read; i++) {
+        if (reads[i]->active) num_active_read++;
+    }
+    console->info("active reads at start: {}", num_active_read);
+
+
+    num_active_read = 0;
+    for (int i = 0; i < n_read; i++) {
+        if (reads[i]->effective_end - reads[i]->effective_start < LENGTH_THRESHOLD) {
+            reads[i]->active = false;
+        }
+        else num_active_read++;
+    }
+    console->info("active reads after correcting for read lengths: {}", num_active_read);
+
+    console->info("number of las files: {}", name_las_list.size());
+
+    for (int part = 0; part < name_las_list.size(); part++) {
+
+
+        console->info("name of las: {}", name_las_list[part]);
+
+
+        if (strlen(name_las_list[part].c_str()) > 0)
+            la.openAlignmentFile(name_las_list[part]);
+
+        int64 n_aln = 0;
+
+        if (strlen(name_las_list[part].c_str()) > 0) {
+            n_aln = la.getAlignmentNumber();
+            console->info("Load alignments from {}", name_las_list[part]);
+            console->info("# Alignments: {}", n_aln);
+        }
+
+
+        if (strlen(name_las_list[part].c_str()) > 0) {
+            la.resetAlignment();
+            la.getOverlap(aln, 0, n_read);
+        }
+
+        if (strlen(name_paf) > 0) {
+            n_aln = la.loadPAF(std::string(name_paf), aln);
+            console->info("Load alignments from {}", name_paf);
+            console->info("# Alignments: {}", n_aln);
+        }
+
+        if (n_aln == 0) {
+            console->error("No alignments!");
+            return 1;
+        }
+
+        console->info("Input data finished, part {}/{}", part + 1, name_las_list.size());
+
+
+
+        int r_begin = aln.front()->read_A_id_;
+        int r_end = aln.back()->read_A_id_;
+
+
+        std::vector<std::vector <LOverlap * > > idx_pileup; // this is the pileup
+        std::vector<std::vector <LOverlap * > > idx_pileup_dedup; // this is the deduplicated pileup
+        std::vector<std::unordered_map<int, std::vector<LOverlap *> > > idx_ab; //unordered_map from (aid, bid) to alignments in a vector
+
+
+
+        for (int i = 0; i< n_read; i++) {
+            idx_pileup.push_back(std::vector<LOverlap *>());
+            idx_pileup_dedup.push_back(std::vector<LOverlap *>());
+            idx_ab.push_back(std::unordered_map<int, std::vector<LOverlap *>> ());
+            repeat_annotation.push_back(std::vector<std::pair<int, int> >());
+            maskvec.push_back(std::pair<int, int>());
+        }
+
+        for (int i = 0; i < aln.size(); i++) {
+            if (aln[i]->read_A_id_ == aln[i]->read_B_id_) {
+                aln[i]->active = false;
+            }
+            if (aln[i]->active) {
+                idx_pileup[aln[i]->read_A_id_].push_back(aln[i]);
+            }
+        }
+
+
+
+
+        for (int i = 0; i < n_read; i++) {// sort overlaps of a reads
+            std::sort(idx_pileup[i].begin(), idx_pileup[i].end(), compare_overlap);
+        }
+
+        for (int i = 0; i < aln.size(); i++) {
+            idx_ab[aln[i]->read_A_id_][aln[i]->read_B_id_] = std::vector<LOverlap *>();
+        }
+
+        for (int i = 0; i < aln.size(); i++) {
+            idx_ab[aln[i]->read_A_id_][aln[i]->read_B_id_].push_back(aln[i]);
+        }
+
+
+        for (int i = 0; i < n_read; i++) {
+            for (std::unordered_map<int, std::vector<LOverlap *> >::iterator it = idx_ab[i].begin(); it!= idx_ab[i].end(); it++) {
+                std::sort(it->second.begin(), it->second.end(), compare_overlap);
+                if (it->second.size() > 0)
+                    idx_pileup_dedup[i].push_back(it->second[0]);
+            }
+        }
+
+        console->info("profile coverage (with and without CUT_OFF)");
+
+        //std::vector< std::vector<std::pair<int, int> > > his;
+        for (int i = r_begin; i <= r_end; i ++) {
+            std::vector<std::pair<int, int> > coverage;
+
+            std::vector<std::pair<int, int> > cutoff_coverage;
+
+
+            //TODO : Implement set based gradient
+            std::vector<std::pair<int, int> > cg;
+            //profileCoverage: get the coverage based on pile-o-gram
+            la.profileCoverage(idx_pileup[i], cutoff_coverage, reso, CUT_OFF);
+            la.profileCoverage(idx_pileup[i], coverage, reso, 0);
+            cov << "read " << i <<" ";
+            for (int j = 0; j < coverage.size(); j++)
+                cov << coverage[j].first << ","  << coverage[j].second << " ";
+            cov << std::endl;
+
+            //Computes coverage gradients.
+            if (coverage.size() >= 2)
+                for (int j = 0; j < coverage.size() - 1; j++) {
+                    cg.push_back(std::pair<int,int>(coverage[j].first, coverage[j+1].second - coverage[j].second));
+                }
+            else cg.push_back(std::pair<int, int> (0,0));
+
+            coverages[i] = (coverage);
+            cutoff_coverages[i] = (cutoff_coverage);
+            cgs[i] = (cg);
+        }
+
+        console->info("profile coverage done part {}/{}", part + 1, name_las_list.size());
+
+
+        std::set<int> rand_reads;
+        srand(time(NULL));
+        rand_reads.insert(0);
+        while (rand_reads.size() < (r_end - r_begin)/500){
+            int rd_id=rand()%(r_end - r_begin) + r_begin;
+            if (reads[rd_id]->len > 5000)
+                rand_reads.insert(rd_id);
+        }
+
+        int num_slot = 0;
+        long int total_cov = 0;
+
+        std::vector<int> read_coverage;
+        long int read_cov=0;
+        int read_slot =0;
+        //Finding the average coverage, probing a small proportion of reads
+
+//    for (std::set<int>::iterator it=rand_reads.begin();it!=rand_reads.end(); ++it) {
+        for (int i =r_begin; i <= r_end;  i++){
+            if (reads[i]->len < 5000)
+                continue;
+            read_cov=0;
+            read_slot=0;
+            for (int j = 0; j < coverages[i].size(); j++) {
+                //printf("%d\n", coverages[i][j].second);
+                read_cov+=coverages[i][j].second;
+                read_slot++;
+            }
+            total_cov += read_cov;
+            num_slot += read_slot;
+            int mean_read_cov=read_cov / std::max(1,read_slot);
+            read_coverage.push_back(mean_read_cov);
+        }
+
+
+
+
+        size_t median_id = read_coverage.size() / 2;
+        if (median_id > 0)
+            std::nth_element(read_coverage.begin(), read_coverage.begin()+median_id, read_coverage.end());
+
+        int cov_est= read_coverage[median_id];
+
+        int mean_cov_est = total_cov / num_slot;
+
+
+        //get estimated coverage
+
+        if (EST_COV != 0) cov_est = EST_COV;
+        console->info("Estimated mean coverage: {}", mean_cov_est); //if the coverage is specified by ini file, cover the estimated one
+        console->info("Estimated median coverage: {}", cov_est);
+
+
+        // mask vector, same format as mask_QV
+        if (MIN_COV < cov_est/3)
+            MIN_COV = cov_est/3;
+
+        if (reads_to_keep.size()>0) {
+            reads_to_keep_initial = reads_to_keep;
+            for (std::set<int>::iterator iter = reads_to_keep_initial.begin();
+                 iter != reads_to_keep_initial.end(); ++iter) {
+                int i = *iter;
+                for (std::unordered_map<int, std::vector<LOverlap *> >::iterator it = idx_ab[i].begin();
+                     it != idx_ab[i].end(); it++) {
+                    if (it->second.size() > 0) {
+                        LOverlap *ovl = it->second[0];
+                        reads_to_keep.insert(ovl->read_B_id_);
+                    }
+                }
+            }
+            console->info("After accounting for neighbours of reads selected, have {} reads", reads_to_keep.size());
+        }
+
+        std::unordered_map<int, std::vector<LOverlap *> > matches_forward, matches_backward;
+
+        for (int i = r_begin; i <= r_end; i ++) {
+            //An initialisation for loop
+            //TODO Preallocate memory. Much more efficient.
+            //idx2.push_back(std::vector<LOverlap *>());
+            matches_forward[i] = std::vector<LOverlap *>();
+            matches_backward[i] = std::vector<LOverlap *>();
+        }
+
+
+
+
+        for (int i = r_begin; i <= r_end; i ++) {
+            bool contained = false;
+            //std::cout<< "Testing opt " << i << std::endl;
+            if (reads[i]->active == false) {
+                continue;
+            }
+
+            int containing_read;
+
+            for (std::unordered_map<int, std::vector<LOverlap *> >::iterator it = idx_ab[i].begin();
+                 it != idx_ab[i].end(); it++) {
+                std::sort(it->second.begin(), it->second.end(), compare_overlap);//Sort overlaps by lengths
+                //std::cout<<"Giving input to ProcessAlignment "<<it->second.size() <<std::endl;
+
+                if (it->second.size() > 0) {
+                    //Figure out if read is contained
+                    LOverlap *ovl = it->second[0];
+                    bool contained_alignment;
+
+                    if (strlen(name_db) > 0)
+                        contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
+                                                               reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2, true);
+                    else
+                        contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
+                                                               reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2, false);
+                    if (contained_alignment == true) {
+                        containing_read = ovl->read_B_id_;
+                    }
+
+                    if (reads[ovl->read_B_id_]->active == true)
+                        contained = contained or contained_alignment;
+
+                    //Filter matches that matter.
+                    //TODO Figure out a way to do this more efficiently
+                    if ((ovl->match_type_ == FORWARD) or (ovl->match_type_ == FORWARD_INTERNAL))
+                        matches_forward[i].push_back(it->second[0]);
+                    else if ((ovl->match_type_ == BACKWARD) or (ovl->match_type_ == BACKWARD_INTERNAL))
+                        matches_backward[i].push_back(it->second[0]);
+
+                }
+
+
+                if ((it->second.size() > 1) and (USE_TWO_MATCHES)) {
+                    //Figure out if read is contained
+                    LOverlap *ovl = it->second[1];
+                    bool contained_alignment;
+
+                    if (strlen(name_db) > 0)
+                        contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
+                                                               reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2, true);
+                    else
+                        contained_alignment = ProcessAlignment(ovl, reads[ovl->read_A_id_],
+                                                               reads[ovl->read_B_id_], ALN_THRESHOLD, THETA, THETA2, false);
+                    if (contained_alignment == true) {
+                        containing_read = ovl->read_B_id_;
+                    }
+
+                    if (reads[ovl->read_B_id_]->active == true)
+                        contained = contained or contained_alignment;
+
+                    //Filter matches that matter.
+                    //TODO Figure out a way to do this more efficiently
+                    if ((ovl->match_type_ == FORWARD) or (ovl->match_type_ == FORWARD_INTERNAL))
+                        matches_forward[i].push_back(it->second[1]);
+                    else if ((ovl->match_type_ == BACKWARD) or (ovl->match_type_ == BACKWARD_INTERNAL))
+                        matches_backward[i].push_back(it->second[1]);
+
+                }
+
+
+
+
+            }
+            if (contained) {
+                reads[i]->active = false;
+                contained_out << i << "\t" << containing_read << std::endl;
+
+            }
+        }
+        int num_overlaps = 0;
+        int num_forward_overlaps(0), num_forward_internal_overlaps(0), num_reverse_overlaps(0),
+                num_reverse_internal_overlaps(0), rev_complemented_matches(0);
+        for (int i = 0; i < n_read; i++) {//Isn't this just 0 or 1?
+            num_overlaps += matches_forward[i].size() + matches_backward[i].size();
+            for (int j = 0; j < matches_forward[i].size(); j++)
+                rev_complemented_matches += matches_forward[i][j]->reverse_complement_match_;
+            for (int j = 0; j < matches_backward[i].size(); j++)
+                rev_complemented_matches += matches_backward[i][j]->reverse_complement_match_;
+        }
+        console->info("{} overlaps", num_overlaps);
+        console->info("{} rev overlaps", rev_complemented_matches);
+
+        num_active_read = 0;
+        for (int i = r_begin; i <= r_end; i ++) {
+            if (reads[i]->active) {
+                num_active_read++;
+                maximal_reads << i << std::endl;
+            }
+        }
+        console->info("removed contained reads, active reads: {}", num_active_read);
+
+        num_active_read = 0;
+        for (int i = r_begin; i <= r_end; i ++) {
+            if (reads[i]->active) num_active_read++;
+        }
+        console->info("active reads: {}", num_active_read);
+        console->info("total reads: {}", r_end-r_begin+1);
+
+
+        for (int i = 0; i < aln.size(); i++) {
+            free(aln[i]);
+        }
+        aln.clear();
+    }
+
+
+
+    if (strlen(name_db)>0)
+        la.closeDB(); //close database
+    return 0;
+
+
+
+
+}
diff --git a/utils/build.sh b/utils/build.sh
index 0f1ea9c..3deb02b 100755
--- a/utils/build.sh
+++ b/utils/build.sh
@@ -16,7 +16,8 @@ make -j 8
 cd $pwd
 mkdir build
 cd $pwd/build
-cmake .. -DCMAKE_C_COMPILER=gcc-4.8 -DCMAKE_CXX_COMPILER=g++-4.8
+cmake .. -DCMAKE_INSTALL_PREFIX=../inst -DCMAKE_C_COMPILER=gcc-4.8 -DCMAKE_CXX_COMPILER=g++-4.8
 make -j 8
+make install
 
 exit $?
diff --git a/utils/setup.sh b/utils/setup.sh
index b65630e..934c2be 100755
--- a/utils/setup.sh
+++ b/utils/setup.sh
@@ -2,6 +2,7 @@
 PPWD=$PWD
 #echo $PWD
 #echo $DIR
-export PATH="$PATH:$PPWD/thirdparty/DALIGNER:$PPWD/thirdparty/DAZZ_DB:$PPWD/thirdparty/DEXTRACTOR/:$PPWD/thirdparty/DASCRUBBER"
-export PATH="$PATH:$PPWD/scripts"
-export PATH="$PATH:$PPWD/build/bin/consensus:$PPWD/build/bin/filter:$PPWD/build/bin/layout"
\ No newline at end of file
+export PATH="$PATH:$PPWD/thirdparty:$PPWD/thirdparty/DALIGNER:$PPWD/thirdparty/DAZZ_DB:$PPWD/thirdparty/DEXTRACTOR/:$PPWD/thirdparty/DASCRUBBER"
+export PATH="$PATH:$PPWD/thirdparty/racon/bin:$PPWD/thirdparty/racon/tools/minimap"
+export PATH="$PATH:$PPWD/inst/bin"
+export MANPATH="$MANPATH:$PPWD/inst/share/man"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/hinge.git



More information about the debian-med-commit mailing list