[med-svn] [r-cran-rncl] 06/10: New upstream version 0.8.2

Andreas Tille tille at debian.org
Wed Nov 29 19:40:28 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-cran-rncl.

commit ea566f8c8664ba4c653eb3a45b0176441746e383
Author: Andreas Tille <tille at debian.org>
Date:   Wed Nov 29 20:34:52 2017 +0100

    New upstream version 0.8.2
---
 DESCRIPTION                                     |   33 +
 LICENSE                                         |    2 +
 MD5                                             |   87 +
 NAMESPACE                                       |   11 +
 NEWS.md                                         |   75 +
 R/RcppExports.R                                 |   15 +
 R/collapse_singles.R                            |   38 +
 R/rncl-package.R                                |   22 +
 R/rncl.R                                        |  319 ++
 README.md                                       |   43 +
 cleanup                                         |   65 +
 debian/README.test                              |    9 -
 debian/changelog                                |   23 -
 debian/compat                                   |    1 -
 debian/control                                  |   27 -
 debian/copyright                                |   36 -
 debian/docs                                     |    3 -
 debian/rules                                    |   12 -
 debian/source/format                            |    1 -
 debian/tests/control                            |    3 -
 debian/tests/run-unit-test                      |   12 -
 debian/watch                                    |    3 -
 inst/newick_bad/Gudrun.nex                      |  149 +
 inst/newick_bad/bad_newick.tre                  |    1 +
 inst/newick_good/Gudrun.tre                     |    1 +
 inst/newick_good/missing_edge_lengths.tre       |    1 +
 inst/newick_good/simpleTree.tre                 |    1 +
 inst/newick_good/singleton_tree.tre             |    1 +
 inst/newick_good/singleton_with_edge_length.tre |    1 +
 inst/newick_good/test_sing.tre                  |    1 +
 inst/newick_good/tree1.tre                      |    1 +
 inst/newick_good/tree2.tre                      |    1 +
 inst/nexusfiles/MultiLineTrees.nex              |   89 +
 inst/nexusfiles/badnex.nex                      |  101 +
 inst/nexusfiles/co1.nex                         |   13 +
 inst/nexusfiles/multiLines.rds                  |  Bin 0 -> 1547 bytes
 inst/nexusfiles/newick.tre                      |    1 +
 inst/nexusfiles/test_empty.nex                  |   14 +
 inst/nexusfiles/test_subset_alltaxa.nex         |   23 +
 inst/nexusfiles/test_subset_taxa.nex            |   27 +
 inst/nexusfiles/test_underscores.nex            |   42 +
 inst/nexusfiles/treeWithDiscreteData.nex        |  354 ++
 inst/nexusfiles/treeWithUnderscoreLabels.nex    |  354 ++
 man/read_nexus_phylo.Rd                         |   50 +
 man/rncl.Rd                                     |  169 +
 src/GetNCL.cpp                                  |  424 ++
 src/Makevars                                    |    2 +
 src/Makevars.win                                |    6 +
 src/RcppExports.cpp                             |   44 +
 src/collapse_singles.cpp                        |  128 +
 src/ncl/ncl.h                                   |   97 +
 src/ncl/nxsallocatematrix.h                     |  253 ++
 src/ncl/nxsassumptionsblock.h                   |  391 ++
 src/ncl/nxsblock.h                              |  363 ++
 src/ncl/nxscdiscretematrix.h                    |  113 +
 src/ncl/nxscharactersblock.h                    | 1972 ++++++++
 src/ncl/nxscxxdiscretematrix.h                  |  264 ++
 src/ncl/nxsdatablock.h                          |   69 +
 src/ncl/nxsdefs.h                               |  108 +
 src/ncl/nxsdiscretedatum.h                      |   58 +
 src/ncl/nxsdiscretematrix.h                     |   90 +
 src/ncl/nxsdistancedatum.h                      |   42 +
 src/ncl/nxsdistancesblock.h                     |  197 +
 src/ncl/nxsemptyblock.h                         |   77 +
 src/ncl/nxsexception.h                          |   97 +
 src/ncl/nxsindent.h                             |   56 +
 src/ncl/nxsmultiformat.h                        |  260 ++
 src/ncl/nxspublicblocks.h                       |  731 +++
 src/ncl/nxsreader.h                             |  612 +++
 src/ncl/nxssetreader.h                          |  104 +
 src/ncl/nxsstring.h                             |  742 +++
 src/ncl/nxstaxaassociationblock.h               |  139 +
 src/ncl/nxstaxablock.h                          |  484 ++
 src/ncl/nxstoken.h                              |  876 ++++
 src/ncl/nxstreesblock.h                         | 1000 ++++
 src/ncl/nxsunalignedblock.h                     |  332 ++
 src/ncl/nxsutilcopy.h                           |  264 ++
 src/nxsassumptionsblock.cpp                     | 2807 ++++++++++++
 src/nxsblock.cpp                                |  644 +++
 src/nxscharactersblock.cpp                      | 5518 +++++++++++++++++++++++
 src/nxscxxdiscretematrix.cpp                    |  511 +++
 src/nxsdatablock.cpp                            |   63 +
 src/nxsdistancedatum.cpp                        |   36 +
 src/nxsdistancesblock.cpp                       |  714 +++
 src/nxsemptyblock.cpp                           |  158 +
 src/nxsexception.cpp                            |  106 +
 src/nxsmultiformat.cpp                          | 1606 +++++++
 src/nxspublicblocks.cpp                         |  811 ++++
 src/nxsreader.cpp                               | 1396 ++++++
 src/nxssetreader.cpp                            |  536 +++
 src/nxsstring.cpp                               | 1041 +++++
 src/nxstaxaassociationblock.cpp                 |  237 +
 src/nxstaxablock.cpp                            |  633 +++
 src/nxstoken.cpp                                | 1106 +++++
 src/nxstreesblock.cpp                           | 2146 +++++++++
 src/nxsunalignedblock.cpp                       |  915 ++++
 tests/test-all.R                                |    3 +
 tests/testthat/test.badnex.R                    |   14 +
 tests/testthat/test.rncl.R                      |  307 ++
 99 files changed, 33808 insertions(+), 130 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..201aea6
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,33 @@
+Package: rncl
+Title: An Interface to the Nexus Class Library
+Version: 0.8.2
+Authors at R: c(person("Francois", "Michonneau", role=c("aut", "cre"),
+    email="francois.michonneau at gmail.com"),
+    person("Ben", "Bolker", role=c("aut")),
+    person("Mark", "Holder", role=c("aut")),
+    person("Paul", "Lewis", role=c("aut")),
+    person("Brian", "O'Meara", role=c("aut")))
+Maintainer: Francois Michonneau <francois.michonneau at gmail.com>
+Description: An interface to the Nexus Class Library which allows parsing
+    of NEXUS, Newick and other phylogenetic tree file formats. It provides
+    elements of the file that can be used to build phylogenetic objects
+    such as ape's 'phylo' or phylobase's 'phylo4(d)'. This functionality
+    is demonstrated with 'read_newick_phylo()' and 'read_nexus_phylo()'.
+Imports: Rcpp (>= 0.11.0), progress (>= 1.1.2), stats
+Suggests: testthat, ape
+LinkingTo: Rcpp, progress
+Depends: R (>= 3.1.1)
+License: BSD_2_clause + file LICENSE
+URL: https://github.com/fmichonneau/rncl
+BugReports: https://github.com/fmichonneau/rncl/issues
+LazyData: true
+RoxygenNote: 5.0.1.9000
+NeedsCompilation: yes
+Packaged: 2016-12-16 09:34:39 UTC; francois
+Author: Francois Michonneau [aut, cre],
+  Ben Bolker [aut],
+  Mark Holder [aut],
+  Paul Lewis [aut],
+  Brian O'Meara [aut]
+Repository: CRAN
+Date/Publication: 2016-12-16 11:20:37
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..98aa99f
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2016
+COPYRIGHT HOLDER: Francois Michonneau
\ No newline at end of file
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..e19c0cf
--- /dev/null
+++ b/MD5
@@ -0,0 +1,87 @@
+c8d7b3523cbbcab22f284c6906f85303 *DESCRIPTION
+93179b4adf46867a798cfc929974d9de *LICENSE
+b4bdb7b6209055696300da6cc8b84b86 *NAMESPACE
+f591e7f2653ee6c7da88cf98509744d6 *NEWS.md
+5d0d8fcc5ad761ea05685692dc53441d *R/RcppExports.R
+ec768c58042278741b95cc4f1f90c134 *R/collapse_singles.R
+9cd88a5b879d5937495f45ce7cf8f4ff *R/rncl-package.R
+8d9e0760f5a219c8acedf10770e6b844 *R/rncl.R
+7c20839bfb3c861877630718cb91577a *README.md
+c88f0e72152ce9ff4e5c224d891a9544 *cleanup
+90175fc56ee204e4ddd87a05c14ce9cb *inst/newick_bad/Gudrun.nex
+1e10833709a3cb968edea67f98e16a33 *inst/newick_bad/bad_newick.tre
+5509435f4eb233c88f42eba7a83b7432 *inst/newick_good/Gudrun.tre
+14c2582eb8c94a8c6944294cad169285 *inst/newick_good/missing_edge_lengths.tre
+338d16f4e1a7cecb3a51a15d43b76d13 *inst/newick_good/simpleTree.tre
+75a2c6c9a33a401766479ce17db8088f *inst/newick_good/singleton_tree.tre
+b3be916f4d742b7a07d688ba88e9ef93 *inst/newick_good/singleton_with_edge_length.tre
+52ad70df750c63629facc1cc908bbc35 *inst/newick_good/test_sing.tre
+6b51e76cd13bf3a3c3498eb3978289ac *inst/newick_good/tree1.tre
+e24e0e19c7186a52d8299eb571fe534f *inst/newick_good/tree2.tre
+c05860e96ba5feab12b1269f43a43f1b *inst/nexusfiles/MultiLineTrees.nex
+fb2ae9b4cc08ab2d0959508a55c83633 *inst/nexusfiles/badnex.nex
+f8225a526530eabfaa8ea117e2a82aae *inst/nexusfiles/co1.nex
+a6fdad8b8df8bdca481fa30b57a1b118 *inst/nexusfiles/multiLines.rds
+4d5d4d71cf83b54eed9811470482e8d2 *inst/nexusfiles/newick.tre
+bc8aba7197b1ced0ab893cf182948370 *inst/nexusfiles/test_empty.nex
+5776170098330a838a9fbdaf2b445605 *inst/nexusfiles/test_subset_alltaxa.nex
+fa8bb823db33b1a99263c3187b1df83a *inst/nexusfiles/test_subset_taxa.nex
+3b5d79f6910b8a89151a6e2bb4065c5b *inst/nexusfiles/test_underscores.nex
+963e6a5568b7fae9291232b8abfd496c *inst/nexusfiles/treeWithDiscreteData.nex
+4961bb5af89e1cfd63944af5d8ee8408 *inst/nexusfiles/treeWithUnderscoreLabels.nex
+a4cfc9a028c4a50aea381932ae8668a0 *man/read_nexus_phylo.Rd
+0f325a7c85a6e4269fa2e4547b52ba87 *man/rncl.Rd
+2b7ecc9e2e87555c7ce2e2bead8d10d7 *src/GetNCL.cpp
+ed0d19722e28a7d316256f2c247be33e *src/Makevars
+1e0cc7f2ea27756865f63d6758ca90a8 *src/Makevars.win
+14909e88ff526fc1506fb9f239604065 *src/RcppExports.cpp
+48c487bb7ee4ca80c67cd14a940546d9 *src/collapse_singles.cpp
+87210b76102c11d0a2890f10494bf75c *src/ncl/ncl.h
+693c38f98d7c4292b1c02b5903e67ea6 *src/ncl/nxsallocatematrix.h
+9cd773a98f2878a26432953457be6cae *src/ncl/nxsassumptionsblock.h
+cafb0b77a61c18bd5e3f2dd217b29e8f *src/ncl/nxsblock.h
+994ff5c8246edc19238b40a1cf699098 *src/ncl/nxscdiscretematrix.h
+23afe1f8a6bbcd794e097f89df75fa2f *src/ncl/nxscharactersblock.h
+c0ab932a2f7f4f4ddecbee38a3055362 *src/ncl/nxscxxdiscretematrix.h
+2e24d36bd7f07a17e3efa6f305c91e52 *src/ncl/nxsdatablock.h
+19ccdb35c77ee583753e2605310acc2e *src/ncl/nxsdefs.h
+c9e55d1032b96ed8560abd86ed428c35 *src/ncl/nxsdiscretedatum.h
+56c32fd796763fd62c4916839921e344 *src/ncl/nxsdiscretematrix.h
+26e3921bf4a40066b12aa108d68a9e58 *src/ncl/nxsdistancedatum.h
+2a48381d52c22f16cc019d9dfdff6930 *src/ncl/nxsdistancesblock.h
+56d24853775abec4144713be735a06cc *src/ncl/nxsemptyblock.h
+e402d049dc9cccfb6f84c17b01b94e60 *src/ncl/nxsexception.h
+5e50cff40a6e1f729a599be4361dccb4 *src/ncl/nxsindent.h
+eb76426dff57be41085931b98b59481b *src/ncl/nxsmultiformat.h
+fca227a74785bd6aab93514268da93d8 *src/ncl/nxspublicblocks.h
+5e9599513e347ae6908d4f452d4c1ef4 *src/ncl/nxsreader.h
+97525aa3a9a6e3e1b1fcd85b9b8a0e9a *src/ncl/nxssetreader.h
+019995016f630d873851b924f8c565f3 *src/ncl/nxsstring.h
+fd8d445cfbd99169f5f3c85d0d9a4b58 *src/ncl/nxstaxaassociationblock.h
+0f5cbf5709f1f2e26e3c6c1d5fd43544 *src/ncl/nxstaxablock.h
+7e5a97a9094a97149b42dd4b1e07a8f0 *src/ncl/nxstoken.h
+2125f0c7682c75a72195435f621fff02 *src/ncl/nxstreesblock.h
+866f8bb860c59fa296972f29daa0ae0a *src/ncl/nxsunalignedblock.h
+4ddc662cda9776eedd67c97264d6cc79 *src/ncl/nxsutilcopy.h
+b6d51ea966d1f4cdaf5625a26a3cc6aa *src/nxsassumptionsblock.cpp
+26c8ee9e7bc3fb42b2a3e7e6e33f0e53 *src/nxsblock.cpp
+38a31640e1a60dce3797a333392b4bc1 *src/nxscharactersblock.cpp
+325a8687c4b81f940bd57477716fca97 *src/nxscxxdiscretematrix.cpp
+5a1c18976ba6e0363d9a2cca46a38985 *src/nxsdatablock.cpp
+86281e5739d006cd61fafc09b74c3047 *src/nxsdistancedatum.cpp
+b6678476d102320b01682682bc0a60bf *src/nxsdistancesblock.cpp
+0b450e2dbd68b29cf071273f58092980 *src/nxsemptyblock.cpp
+9544e90a66e7898e087de149afb82119 *src/nxsexception.cpp
+d936913e3590b8c2f231ec8f6a3760c8 *src/nxsmultiformat.cpp
+098926613198fa0dbae83fd951b9c869 *src/nxspublicblocks.cpp
+e049b4a3c5412ac0b0d25624bedb8d8b *src/nxsreader.cpp
+39e0dadc5306b7ab5179c3dfa5e81e81 *src/nxssetreader.cpp
+1244b1ec75c2663b152a74921ad6969a *src/nxsstring.cpp
+d249dd91842ed712244c7d82c9e3960d *src/nxstaxaassociationblock.cpp
+e0185442ae32e8cc6c44cf6d3a22ef68 *src/nxstaxablock.cpp
+def2787ca05c353460733e4f0784cae4 *src/nxstoken.cpp
+600c49f95cfb634385724b03e4b3ae67 *src/nxstreesblock.cpp
+ab4852075e7f4d6a098acf1d8ac99ece *src/nxsunalignedblock.cpp
+bcc662bde8a058b55f552ed6a29509d8 *tests/test-all.R
+01c699dd3359f9899c5fa55b83360779 *tests/testthat/test.badnex.R
+f8297020efed223393a0c76b61e4b61b *tests/testthat/test.rncl.R
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..a63e528
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,11 @@
+# Generated by roxygen2: do not edit by hand
+
+export(make_phylo)
+export(read_newick_phylo)
+export(read_nexus_phylo)
+export(rncl)
+importFrom(Rcpp,evalCpp)
+importFrom(Rcpp,loadRcppModules)
+importFrom(progress,progress_bar)
+importFrom(stats,na.omit)
+useDynLib(rncl)
diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..8586052
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,75 @@
+
+## rncl 0.8.2
+
+### New features
+
+* The re-numbering of the edge matrix to deal with singleton is now performed in
+  C++, leading to improved speed (about 3x for a 1700 tip tree) (#11).
+
+* A progress bar is displayed when removing the singleton, which is useful when
+  the tree is very large.
+
+### Major changes
+
+* the components of the edge matrix are storred as integer (they were storred as
+  double. (#17, reported by @KlausVigo)
+
+## rncl 0.6.0
+
+### New features
+
+* `rncl` now allows the parsing of tree files where the trees contain a subset
+  of the taxa listed in the TAXA block.
+
+### Major changes
+
+* Missing edge lengths are now represented by -999 instead of -1 in the object
+  returned by the `rncl` function.
+
+## rncl 0.4.0
+
+### New features
+
+* `rncl` now allows the parsing of tree files that contain some missing edge
+  lengths, using the `missing_edge_length` argument in the `read_newick_phylo`
+  and `read_nexus_phylo`. By default, if a tree has at least one missing edge
+  length, all edge lengths are dropped. Alternatively, the user can provide a
+  numeric value that will be used to replace all missing edge lengths. (#33 from
+  `rotl`)
+
+* If `read_newick_phylo` and `read_nexus_phylo` return a list of trees, the
+  elements of the list are named according to the names found in the tree file.
+
+### Major changes
+
+* Parsing tree files is now quiet, the default output of NCL is
+  silenced. Because of the implementation of this output, it's difficult to give
+  control to the user over this, but it's probably best to keep it quiet rather
+  than having unneeded messages pollute the screen.
+
+* The documentation of the function `rncl` is improved.
+
+* The function `make_phylo` is now deprecated and will be removed in the next
+  version. Use `read_newick_phylo` or `read_nexus_phylo` instead.
+
+### Minor changes
+
+* The option spacesAsUnderscore now also applies to the slot `taxaNames` and not
+  only to the elements of the slot `taxonLabelVector`.
+
+* If the file parsed contains trees that only include a subset of the taxa
+  listed in the NEXUS taxa block, the function fails more explicitly.
+
+### Bug fixes
+
+* The slot `treeNames` had duplicated values for each tree name.
+* Labels could have been assigned to the incorrect tips in some NEXUS files
+
+## rncl 0.2.2
+
+* change roles in authors to have a single creator (`'cre'`)
+* fix typo in documentation
+
+## rncl 0.2.0
+
+* initial release on CRAN
diff --git a/R/RcppExports.R b/R/RcppExports.R
new file mode 100644
index 0000000..735cbda
--- /dev/null
+++ b/R/RcppExports.R
@@ -0,0 +1,15 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+n_singletons <- function(ances) {
+    .Call('rncl_n_singletons', PACKAGE = 'rncl', ances)
+}
+
+collapse_single_cpp <- function(ances, desc, elen, nnode) {
+    .Call('rncl_collapse_single_cpp', PACKAGE = 'rncl', ances, desc, elen, nnode)
+}
+
+RNCL <- function(params, paramsVecR) {
+    .Call('rncl_RNCL', PACKAGE = 'rncl', params, paramsVecR)
+}
+
diff --git a/R/collapse_singles.R b/R/collapse_singles.R
new file mode 100644
index 0000000..c71c3fa
--- /dev/null
+++ b/R/collapse_singles.R
@@ -0,0 +1,38 @@
+##' @importFrom progress progress_bar
+##' @importFrom Rcpp loadRcppModules
+##' @importFrom stats na.omit
+collapse_singles <- function(tree) {
+
+    if (is.null(tree$edge.length)) {
+        elen <- numeric(0)
+    } else {
+        elen <- tree$edge.length
+    }
+
+    res <- collapse_single_cpp(
+        ances = tree$edge[, 1],
+        desc = tree$edge[, 2],
+        elen = elen,
+        nnode = tree$Nnode
+    )
+
+    new_mat <- matrix(c(res$ances, res$desc), ncol = 2)
+    tree$edge <- new_mat
+
+    if (length(res$edge.length) > 1) {
+        tree$edge.length <- res$edge.length
+    }
+
+    tree$Nnode <- res$Nnode
+
+    if (!is.null(tree$node.label)) {
+        idx_nd_lbl <- res$position_singletons + 1  - length(tree$tip.label)
+
+        warning("Dropping singleton nodes with labels: ",
+                paste(stats::na.omit(tree$node.label[idx_nd_lbl]), collapse = ", "))
+
+        tree$node.label <- tree$node.label[- idx_nd_lbl]
+    }
+
+    tree
+}
diff --git a/R/rncl-package.R b/R/rncl-package.R
new file mode 100644
index 0000000..f323a74
--- /dev/null
+++ b/R/rncl-package.R
@@ -0,0 +1,22 @@
+##' rncl: An R interface to the NEXUS Class Library
+##'
+##' rncl provides an interface to the NEXUS Class Library (NCL), a C++
+##' library intended to parse valid NEXUS files as well as other
+##' common formats used in phylogenetic analysis. Currently, rncl
+##' focuses on parsing trees and supports both NEXUS and Newick
+##' formatted files. Because NCL is used by several phylogenetic
+##' software (e.g., MrBayes, Garli), rncl can parse files generated by
+##' these programs. However, other popular programs (including BEAST)
+##' use an extension of the NEXUS file format, and if trees can be
+##' imported, associated annotations (e.g., confidence intervals on
+##' the time since divergence) cannot.
+##'
+##' NCL can also parse data associated with species included in NEXUS
+##' files. If you are interested in importing such data, see the
+##' phylobase package.
+##'
+##' @name rncl
+##' @docType package
+##' @useDynLib rncl
+##' @importFrom Rcpp evalCpp
+NULL
diff --git a/R/rncl.R b/R/rncl.R
new file mode 100644
index 0000000..7221882
--- /dev/null
+++ b/R/rncl.R
@@ -0,0 +1,319 @@
+##' Returns a list of the elements contained in a NEXUS file used to
+##' build phylogenetic objects in R
+##'
+##' NEXUS is a common file format used in phylogenetics to represent
+##' phylogenetic trees, and other types of phylogenetic data. This
+##' function uses NCL (the NEXUS Class Library) to parse NEXUS, Newick
+##' or other common phylogenetic file formats, and returns the
+##' relevant elements as a list. \code{phylo} (from the ape package)
+##' or \code{phylo4} (from the phylobase package) can be constructed
+##' from the elements contained in this list.
+##'
+##' @title Get the elements from a NEXUS (or Newick) file
+##' @param file path to a NEXUS or Newick file
+##' @param file.format a character string indicating the type of file
+##' to be parsed.
+##' @param spacesAsUnderscores In the NEXUS file format white spaces
+##' are not allowed and are represented by underscores. Therefore, NCL
+##' converts underscores found in taxon labels in the NEXUS file into
+##' white spaces (e.g. \code{species_1} will become \code{"species
+##' 1"}). If you want to preserve the underscores, set as \code{TRUE}
+##' (default). This option affects taxon labels, character labels and
+##' state labels.
+##' @param char.all If \code{TRUE} (default), returns all characters,
+##' even those excluded in the NEXUS file (only when NEXUS file
+##' contains DATA block).
+##' @param polymorphic.convert If TRUE (default), converts polymorphic
+##' characters to missing data (only when NEXUS file contains DATA
+##' block).
+##' @param levels.uniform If TRUE (default), uses the same levels for
+##' all characters (only when NEXUS file contains DATA block).
+##' @param ... additional parameters (currently not in use).
+##' @references Maddison DR, Swofford DL, Maddison WP (1997). "NEXUS:
+##' An extensible file format for systematic information". Systematic
+##' Biology 46(4) : 590-621.
+##' doi:\href{http://dx.doi.org/10.1093/sysbio/46.4.590}{10.1093/sysbio/46.4.590}
+##'
+##' Lewis, P. O. 2003. NCL: a C++ class library for interpreting data
+##' files in NEXUS format. Bioinformatics 19 (17) : 2330-2331.
+##' @author Francois Michonneau
+##' @seealso For examples on how to use the elements of the list
+##' returned by this function to build tree objects, inspect the
+##' source code of this package, in particular how
+##' \code{read_newick_phylo} and \code{read_nexus_phylo} work. For a
+##' more complex example that also use the data contained in NEXUS
+##' files, inspect the source code of the \code{readNCL} function in
+##' the phylobase package.
+##' @return A list that contains the elements extracted from a NEXUS
+##' or a Newick file.
+##'
+##' \itemize{
+##'
+##'   \item {\code{taxaNames}} {A vector of the taxa names listed in
+##' the TAXA block of the NEXUS file or inferred from the tree strings
+##' (if block missing or Newick file).}
+##'
+##'   \item {\code{treeNames}} {A vector listing the names of the trees}
+##'
+##'   \item {\code{taxonLabelVector}} {A list containing as many
+##' elements as there are trees in the file. Each element is a
+##' character vector that lists the taxon names encountered in the
+##' tree string *in the order they appear*, and therefore may not
+##' match the order they are listed in the translation table.}
+##'
+##'   \item {\code{parentVector}} { A list containing as many elements
+##' as there are trees in the file. Each element is a numeric vector
+##' listing the parent node for the node given by its position in the
+##' vector. If the beginning of the vector is 5 5 6, the parent node
+##' of node 1 is 5, the parent of node 2 is 5 and the parent of node 3
+##' is 6. The implicit root of the tree is identified with 0 (node
+##' without a parent).}
+##'
+##'   \item{\code{branchLengthVector}} { A list containing as many
+##' elements as there are trees in the file. Each element is a numeric
+##' vector listing the edge/branch lengths for the edges in the same
+##' order as nodes are listed in the corresponding \code{parentVector}
+##' element. Values of -999 indicate that the value is missing for this
+##' particular edge. The implicit root as a length of 0.}
+##'
+##'   \item{\code{nodeLabelsVector}} { A list containing as many
+##' elements as there are trees in the file. Each element is a
+##' character vector listing the node labels in the same order as the
+##' nodes are specified in the same order as nodes are listed in the
+##' corresponding \code{parentVector} element.}
+##'
+##'   \item{\code{trees}} { A character vector listing the tree
+##' strings where tip labels have been replaced by their indices in
+##' the \code{taxaNames} vector. They do not correspond to the numbers
+##' listed in the translation table that might be associated with the
+##' tree.}
+##'
+##'   \item{\code{dataTypes}} { A character vector indicating the type
+##' of data associated with the tree (e.g., \dQuote{standard}). }
+##'
+##'   \item{\code{nbCharacters}} { A numeric vector indicating how
+##' many characters/traits are available. }
+##'
+##'   \item{\code{charLabels}} { A character vector listing the names
+##' of the characters/traits that are available. }
+##'
+##'   \item {\code{nbStates}} { A numeric vector listing the number of
+##' possible states for each character/trait.}
+##'
+##'   \item {\code{stateLabels}} { A character vector listing in
+##' order, all possible states for each character/trait.}
+##'
+##'   \item {\code{dataChr}} { A character vector with as many
+##' elements as there are characters/traits in the dataset. Each
+##' element is string that can be parsed by R to create a factor
+##' vector representing the data found in the file.}
+##'
+##'   \item {\code{isRooted}} { A list with as many elements as there
+##' are trees in the file. Each element is a logical indicating
+##' whether the tree is rooted. NCL definition of a rooted tree
+##' differs from the one APE uses in some cases. }
+##'
+##'   \item {\code{hasPolytomies}} { A list with as many elements as
+##' there are trees in the file. Each element is a logical indicating
+##' whether the tree contains polytomies.}
+##'
+##'   \item {\code{hasSingletons}} { A list with as many elements as
+##' there are trees in the file. Each element is a logical indicating
+##' whether the tree contains singleton nodes, in other words nodes
+##' with a single descendant (also known as knuckles).}
+##'
+##' }
+##'
+##'
+##' @export
+rncl <- function(file, file.format = c("nexus", "newick"),
+                 spacesAsUnderscores = TRUE, char.all=TRUE,
+                 polymorphic.convert=TRUE, levels.uniform=TRUE, ...) {
+
+    file <- path.expand(file)
+    if (!file.exists(file)) {
+        stop(file, " doesn't exist.")
+    }
+
+    file.format <- match.arg(file.format)
+    if (file.format == "newick") file.format <- "relaxedphyliptree"
+
+    fileName <- list(fileName=file, fileFormat=file.format)
+
+    ## Order of the logical parameters for GetNCL R (and C++) arguments
+    ## - char.all (charall)
+    ## - polymorphic.convert (polyconvert)
+    ## - levels.uniform (levelsUnif)
+    ## - (returnTrees)
+    ## - (returnData)
+    parameters <- c(char.all, polymorphic.convert, levels.uniform, TRUE, TRUE)
+
+    ncl <- RNCL(fileName, parameters)
+
+    ## Return Error message
+    if (exists("ErrorMsg", where=ncl)) {
+        stop(ncl$ErrorMsg)
+    }
+
+    if (spacesAsUnderscores) {
+        ncl$taxonLabelVector <- lapply(ncl$taxonLabelVector, function(x) {
+                                           gsub("\\s", "_", x)
+                                       })
+        ncl$taxaNames <- gsub("\\s", "_", ncl$taxaNames)
+
+        ncl$charLabels <- gsub("\\s", "_", ncl$charLabels)
+
+        ncl$stateLabels <- gsub("\\s", "_", ncl$stateLabels)
+
+    }
+
+    ncl
+}
+
+## Returns the edge matrix from the parentVector (the i^th element is
+## the descendant element of node i)
+get_edge_matrix <- function(parentVector) {
+    edgeMat <- cbind(parentVector, seq_along(parentVector))
+    rootNd <- edgeMat[which(edgeMat[, 1] == 0), 2]
+    edgeMat <- edgeMat[-which(edgeMat[, 1] == 0), ]
+    attr(edgeMat, "root") <- rootNd
+    edgeMat
+}
+
+## Returns the edge lengths (missing are represented by -999)
+get_edge_length <- function(branchLengthVector, parentVector) {
+    edgeLgth <- branchLengthVector[which(parentVector != 0)]
+    edgeLgth[edgeLgth == -999] <- NA
+    edgeLgth
+}
+
+## Tests whether there are node labels
+has_node_labels <- function(nodeLabelsVector) {
+    any(nzchar(nodeLabelsVector))
+}
+
+
+## Pieces together the elements needed to build a phylo object, but
+## they are not converted as such to allow for singletons (and
+## possibly other kinds of trees that phylo doesn't support)
+build_raw_phylo <- function(ncl, missing_edge_length) {
+    if (length(ncl$trees) > 0) {
+        listTrees <- vector("list", length(ncl$trees))
+        names(listTrees) <- ncl$treeNames
+
+        for (i in 1:length(ncl$trees)) {
+            edgeMat <- get_edge_matrix(ncl$parentVector[[i]])
+            rootNd <- attr(edgeMat, "root")
+            attr(edgeMat, "root") <- NULL
+            attr(edgeMat, "dimnames") <- NULL
+
+            edgeLgth <- get_edge_length(ncl$branchLengthVector[[i]], ncl$parentVector[[i]])
+
+            tipLbl <- ncl$taxonLabelVector[[i]]
+
+            nNodes <- length(ncl$parentVector[[i]]) - length(tipLbl)
+
+            tr <- list(edge=edgeMat, tip.label=tipLbl, Nnode=nNodes)
+
+            if (!all(is.na(edgeLgth))) {
+                if (any(is.na(edgeLgth))) {
+                    if(!(identical(length(missing_edge_length), 1L))) {
+                        stop("A single numerical value should be provided for the missing edge length.")
+                    }
+                    if (is.na(missing_edge_length)) {
+                        warning("missing edge lengths are not allowed in phylo class. All removed.")
+                    } else {
+                        if(!identical(mode(missing_edge_length), "numeric")) {
+                            stop("A single numerical value should be provided for the missing edge lengths.")
+                        }
+                        warning("missing edge lengths replaced by ", sQuote(missing_edge_length), ".")
+                        edgeLgth[is.na(edgeLgth)] <- missing_edge_length
+                        tr <- c(tr,  list(edge.length = edgeLgth))
+                    }
+                } else {
+                    tr <- c(tr, list(edge.length=edgeLgth))
+                }
+            }
+
+            if (has_node_labels(ncl$nodeLabelsVector[[i]])) {
+                ndLbl <- ncl$nodeLabelsVector[[i]]
+                ndLbl[rootNd] <- ndLbl[1]
+                ndLbl <- ndLbl[min(tr$edge[, 1]):length(ndLbl)]
+                tr <- c(tr, list(node.label=ndLbl))
+            }
+
+            listTrees[[i]] <- tr
+        }
+
+    } else {
+        return(NULL)
+    }
+    listTrees
+}
+
+## polishes things up
+build_phylo <- function(ncl, simplify=FALSE, missing_edge_length) {
+    trees <- build_raw_phylo(ncl, missing_edge_length)
+    if (!is.null(trees)) {
+        trees <- lapply(trees, function(tr) {
+                            if (any(tabulate(tr$edge[, 1]) == 1L)) {
+                                tr <- collapse_singles(tr)
+                            }
+                            class(tr) <- "phylo"
+                            tr
+                        })
+        if (length(trees) == 1 || simplify) {
+            trees <- trees[[1]]
+        } else {
+            class(trees) <- "multiPhylo"
+        }
+    }
+    trees
+}
+
+##' Create phylo objects from NEXUS or Newick files
+##'
+##' These functions read NEXUS or Newick files and return an object of
+##' class phylo/multiPhylo.
+##' @title Read phylogenetic trees from files
+##' @param file Path of NEXUS or Newick file
+##' @param simplify If the file includes more than one tree, returns
+##'     only the first tree; otherwise, returns a multiPhylo object
+##' @param missing_edge_length If the tree contains missing edge
+##'     lengths, the value to be attributed to these edge lengths. By
+##'     default, (\code{missing_edge_length = NA}) if at least edge
+##'     length is missing, they are all removed. Otherwise, the value
+##'     must be a single numeric value. In any case, a warning will
+##'     be generated if the tree contains missing edge lengths.
+##' @param ... additional parameters to be passed to the rncl function
+##' @return A phylo or a multiPhylo object
+##' @author Francois Michonneau
+##' @seealso rncl-package
+##' @rdname read_nexus_phylo
+##' @note \code{make_phylo} will soon be deprecated, use
+##' \code{read_nexus_phylo} or \code{read_newick_phylo} instead.
+##' @export
+read_nexus_phylo <- function(file, simplify=FALSE, missing_edge_length = NA, ...) {
+    internal_make_phylo(file=file, simplify=simplify, file.format="nexus",
+               missing_edge_length = missing_edge_length, ...)
+}
+
+##' @rdname read_nexus_phylo
+##' @export
+read_newick_phylo <- function(file, simplify=FALSE, missing_edge_length = NA, ...) {
+    internal_make_phylo(file=file, simplify=simplify, file.format="newick",
+               missing_edge_length = missing_edge_length, ...)
+}
+
+internal_make_phylo <- function(file, simplify=FALSE, missing_edge_length = NA, ...) {
+    ncl <- rncl(file=file, ...)
+    build_phylo(ncl, simplify=simplify, missing_edge_length = missing_edge_length)
+}
+
+##' @rdname read_nexus_phylo
+##' @export
+make_phylo <- function(file, simplify=FALSE, missing_edge_length = NA, ...) {
+    .Deprecated(msg = paste0("Use ", sQuote("read_nexus_phylo"),
+                " or ", sQuote("read_newick_phylo"), " instead"))
+    internal_make_phylo(file = file, simplify=simplify, missing_edge_length = missing_edge_length, ...)
+}
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9d18862
--- /dev/null
+++ b/README.md
@@ -0,0 +1,43 @@
+[![Build Status](https://travis-ci.org/fmichonneau/rncl.svg)](https://travis-ci.org/fmichonneau/rncl)
+[![Build status](https://ci.appveyor.com/api/projects/status/bfcjqt83esp0nnak)](https://ci.appveyor.com/project/fmichonneau/rncl)
+[![Coverage Status](https://coveralls.io/repos/fmichonneau/rncl/badge.svg)](https://coveralls.io/r/fmichonneau/rncl)
+![](http://cranlogs.r-pkg.org/badges/rncl)
+[![Research software impact](http://depsy.org/api/package/cran/rncl/badge.svg)](http://depsy.org/package/r/rncl)
+
+# An R interface to the NEXUS Class Library
+
+This R package provides an interface to the C++ library
+[NCL](http://phylo.bio.ku.edu/ncldocs/v2.1/funcdocs/index.html). It can parse
+efficiently common file formats used to store phylogenetic trees, especially
+NEXUS and Newick files.
+
+This package is primarily intended to be used by package developers as it
+extracts the elements needed to build R objects that represent the content of
+the file. For instance, [phylobase](https://github.com/fmichonneau/phylobase)
+uses `rncl` to extract trees and/or data stored in NEXUS and Newick files to
+create objects of class `phylo4` or `phylo4d`.
+
+The package however provides two functions for users: `read_nexus_phylo()` and
+`read_newick_phylo()`. They read NEXUS and Newick files respectively, and return
+(a valid) `phylo` or `multiPhylo` object from the package
+[ape](https://cran.r-project.org/package=ape). These functions differ from those
+found in ape (respectively `read.tree` and `read.nexus`) as `rncl` functions can
+read trees with singletons, and missing branch lengths. However, `rncl` adheres
+to the NEXUS standards and only accepts tip labels without white spaces and tip
+labels cannot be duplicated in the same tree.
+
+
+# Development versions for Windows
+
+Because this package contains some C++ code, it can be tricky to build if you
+are using Windows. Unless you need a feature only available on GitHub, install
+`rncl` from CRAN.
+
+Otherwise, you can obtain a binary version from
+[here](https://ci.appveyor.com/project/fmichonneau/rncl/build/artifacts) (unless
+the AppVeyor badge on top is gray, in which case you can download an older
+version or come back in a few minutes, or red meaning the current version is
+broken and you need to get an older version). Once in appveyor, look for the
+file named `rncl_X.Y.Z.zip` where `X.Y.Z` represent the version number (e.g.,
+`rncl_0.4.0.zip`). Then you can install this compiled version of the package
+directly from R.
diff --git a/cleanup b/cleanup
new file mode 100755
index 0000000..53b0423
--- /dev/null
+++ b/cleanup
@@ -0,0 +1,65 @@
+rm -f confdefs.h config.log config.status a.out.dSYM
+rm -rf src/bin
+rm -f src/*.o src/*.so
+rm -f src/RcppSrc/*.o src/RcppSrc/*.a inst/Rcpp-version.txt
+rm -f inst/doc/*.cpp inst/doc/*.hpp inst/doc/*.R 
+rm -f inst/doc/*.Rd inst/doc/*.aux inst/doc/*.log inst/doc/*.tex
+rm -f vignettes/*.aux vignettes/*.log vignettes/*.tex vignettes/*.toc vignettes/*.out
+rm -f inst/Rcpp-license.txt
+rm -rf inst/doc/auto
+rm -rf autom4te.cache
+rm -rf src/include src/lib src/ncl/auto4te.cache
+cd src/ncl  
+test -f Makefile && make clean
+rm -f stamp-h1
+rm -f libtool
+rm -f configure.lineno
+rm -f Makefile
+rm -rf ncl/.deps autom4te.cache
+rm -f nclv2.1.pc config.log config.status config.h
+find . -name \*~ -exec rm {} \;
+find . -name \*.flc -exec rm {} \;
+rm -f src/ncl/config.log
+rm -f src/ncl/config.h
+rm -f src/ncl/Makefile
+rm -f src/ncl/nclv2.1.pc
+rm -f src/ncl/config.status
+rm -f src/ncl/stamp-h1
+rm -f src/ncl/libtool
+rm -f src/ncl/example/Makefile
+rm -f src/ncl/example/gapcode/NEXUSgapcode
+rm -f src/ncl/example/gapcode/.deps
+rm -f src/ncl/example/gapcode/Makefile
+rm -f src/ncl/example/normalizer/NEXUSinspector
+rm -f src/ncl/example/normalizer/NCLconverter
+rm -f src/ncl/example/normalizer/NEXUSnormalizer
+rm -f src/ncl/example/normalizer/NEXUSvalidator
+rm -f src/ncl/example/normalizer/.deps
+rm -f src/ncl/example/normalizer/NEXUSunion
+rm -f src/ncl/example/normalizer/NEX_us2ml
+rm -f src/ncl/example/normalizer/Makefile
+rm -f src/ncl/example/patristic/patristicmat
+rm -f src/ncl/example/patristic/.deps
+rm -f src/ncl/example/patristic/Makefile
+rm -f src/ncl/example/splitsinfile/NEXUStosplits
+rm -f src/ncl/example/splitsinfile/.deps
+rm -f src/ncl/example/splitsinfile/Makefile
+rm -f src/ncl/example/basicfactory/basicfactory
+rm -f src/ncl/example/basicfactory/.deps
+rm -f src/ncl/example/basicfactory/Makefile
+rm -f src/ncl/example/ncltest/.deps
+rm -f src/ncl/example/ncltest/ncltest
+rm -f src/ncl/example/ncltest/Makefile
+rm -f src/ncl/example/basiccmdline/.deps
+rm -f src/ncl/example/basiccmdline/basiccmdline
+rm -f src/ncl/example/basiccmdline/Makefile
+rm -f src/ncl/example/nclsimplest/.deps
+rm -f src/ncl/example/nclsimplest/Makefile
+rm -f src/ncl/example/nclsimplest/nclsimplest
+rm -f src/ncl/example/translate/NEXUStranslate
+rm -f src/ncl/example/translate/.deps
+rm -f src/ncl/example/translate/Makefile
+rm -f src/ncl/ncl/.deps
+rm -f src/ncl/ncl/Makefile
+rm -f tests/.RData
+rm -f tests/Rplots.pdf
diff --git a/debian/README.test b/debian/README.test
deleted file mode 100644
index 4fe93f7..0000000
--- a/debian/README.test
+++ /dev/null
@@ -1,9 +0,0 @@
-Notes on how this package can be tested.
-────────────────────────────────────────
-
-This package can be tested by running the provided test:
-
-cd tests
-LC_ALL=C R --no-save < test-all.R
-
-in order to confirm its integrity.
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 1f3c581..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,23 +0,0 @@
-r-cran-rncl (0.8.2-1) unstable; urgency=medium
-
-  * New upstream version
-    Closes: #848553
-  * debhelper 10
-  * d/watch: version=4
-  * Convert to dh-r
-  * Canonical homepage for CRAN
-  * New Build-Depends: r-cran-progress
-
- -- Andreas Tille <tille at debian.org>  Mon, 19 Dec 2016 00:03:00 +0100
-
-r-cran-rncl (0.6.0-2) unstable; urgency=medium
-
-  * Add missing Dependency: r-cran-rcpp
-
- -- Andreas Tille <tille at debian.org>  Thu, 28 Apr 2016 00:08:06 +0200
-
-r-cran-rncl (0.6.0-1) unstable; urgency=low
-
-  * Initial release (Closes: #818976)
-
- -- Andreas Tille <tille at debian.org>  Tue, 22 Mar 2016 14:29:52 +0100
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 5931ec6..0000000
--- a/debian/control
+++ /dev/null
@@ -1,27 +0,0 @@
-Source: r-cran-rncl
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: gnu-r
-Priority: optional
-Build-Depends: debhelper (>= 10),
-               dh-r,
-               r-base-dev,
-               r-cran-rcpp,
-               r-cran-progress
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/R/r-cran-rncl/trunk/
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/R/r-cran-rncl/trunk/
-Homepage: https://cran.r-project.org/package=rncl
-
-Package: r-cran-rncl
-Architecture: any
-Depends: ${misc:Depends},
-         ${shlibs:Depends},
-         ${R:Depends}
-Recommends: ${R:Recommends}
-Suggests: ${R:Suggests}
-Description: GNU R interface to the Nexus Class Library
- This R package provides an interface to the Nexus Class Library which
- allows parsing of NEXUS, Newick and other phylogenetic tree file
- formats. It provides elements of the file that can be used to build
- phylogenetic objects such as ape's 'phylo' or phylobase's 'phylo4(d)'.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 0cabc41..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,36 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Contact: Francois Michonneau <francois.michonneau at gmail.com>
-Upstream-Name: rncl
-Source: https://cran.r-project.org/package=rncl
-
-Files: *
-Copyright: 2013-2016 Francois Michonneau, Ben Bolker, Mark Holder, Paul Lewis, Brian O'Meara
-License: BSD-2-clause
-
-Files: debian/*
-Copyright: 2016 Andreas Tille <tille at debian.org>
-License: BSD-2-clause
-
-License: BSD-2-clause
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
- .
- 1. Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
- .
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
- .
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
- IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/debian/docs b/debian/docs
deleted file mode 100644
index 960011c..0000000
--- a/debian/docs
+++ /dev/null
@@ -1,3 +0,0 @@
-tests
-debian/README.test
-debian/tests/run-unit-test
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 1205192..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/make -f
-
-%:
-	dh $@ --buildsystem R
-
-override_dh_install:
-	dh_install
-	find debian -name LICENSE -delete
-
-override_dh_fixperms:
-	dh_fixperms
-	find debian -name "*.nex" -exec chmod -x \{\} \;
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/tests/control b/debian/tests/control
deleted file mode 100644
index b044b0c..0000000
--- a/debian/tests/control
+++ /dev/null
@@ -1,3 +0,0 @@
-Tests: run-unit-test
-Depends: @, r-cran-testthat
-Restrictions: allow-stderr
diff --git a/debian/tests/run-unit-test b/debian/tests/run-unit-test
deleted file mode 100644
index c5256ac..0000000
--- a/debian/tests/run-unit-test
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh -e
-
-oname=rncl
-pkg=r-cran-`echo $oname | tr [A-Z] [a-z]`
-
-if [ "$ADTTMP" = "" ] ; then
-  ADTTMP=`mktemp -d /tmp/${pkg}-test.XXXXXX`
-fi
-cd $ADTTMP
-cp -a /usr/share/doc/${pkg}/tests/* $ADTTMP
-LC_ALL=C R --no-save < test-all.R
-rm -fr $ADTTMP/*
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 4beeae7..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,3 +0,0 @@
-version=4
-http://cran.r-project.org/src/contrib/rncl_([-0-9\.]*).tar.gz
-
diff --git a/inst/newick_bad/Gudrun.nex b/inst/newick_bad/Gudrun.nex
new file mode 100644
index 0000000..f7576d3
--- /dev/null
+++ b/inst/newick_bad/Gudrun.nex
@@ -0,0 +1,149 @@
+#NEXUS
+[R-package APE, Wed Mar 19 07:11:50 2014]
+
+BEGIN TAXA;
+	DIMENSIONS NTAX = 68;
+	TAXLABELS
+		Laccaria_bicolor
+		Gloeophyllum_trabeum
+		Dacryopinax_sp._DJM-731_SS1
+		Trichosporon_asahii
+		Rhodosporidium_toruloides
+		Tetrapisispora_phaffii
+		Pyrenophora_tritici-repentis
+		Pyrenophora_teres
+		Setosphaeria_turcica
+		Bipolaris_zeicola
+		Bipolaris_victoriae
+		Bipolaris_sorokiniana
+		Bipolaris_oryzae
+		Bipolaris_maydis
+		Leptosphaeria_maculans
+		Parastagonospora_nodorum
+		Baudoinia_compniacensis
+		Pseudocercospora_fijiensis
+		Mycosphaerella_pini
+		Sphaerulina_musiva
+		Neofusicoccum_parvum
+		Macrophomina_phaseolina
+		Cyphellophora_europaea
+		Exophiala_dermatitidis
+		Coniosporium_apollinis
+		Cladophialophora_carrionii
+		Byssochlamys_spectabilis
+		Talaromyces_islandicus
+		Talaromyces_stipitatus
+		Talaromyces_marneffei
+		Penicillium_chrysogenum
+		Penicillium_simplicissimum
+		Penicillium_digitatum
+		Aspergillus_kawachii
+		Aspergillus_niger
+		Aspergillus_nidulans
+		Aspergillus_oryzae
+		Coccidioides_immitis
+		Coccidioides_posadasii
+		Eutypa_lata
+		Pestalotiopsis_fici
+		Colletotrichum_gloeosporioides
+		Colletotrichum_higginsianum
+		Colletotrichum_graminicola
+		Colletotrichum_orbiculare
+		Fusarium_verticillioides
+		Fusarium_fujikuroi
+		Fusarium_graminearum
+		Nectria_haematococca
+		Fusarium_oxysporum
+		Ophiocordyceps_sinensis
+		Cordyceps_militaris
+		Beauveria_bassiana
+		Sporothrix_schenckii
+		Ophiostoma_piceae
+		Grosmannia_clavigera
+		Gaeumannomyces_graminis
+		Podospora_anserina
+		Thielavia_terrestris
+		Chaetomium_thermophilum
+		Myceliophthora_thermophila
+		Sclerotinia_borealis
+		Sclerotinia_sclerotiorum
+		Botryotinia_fuckeliana
+		Marssonina_brunnea
+		Pyronema_omphalodes
+		Dactylellina_haptotyla
+		Arthrobotrys_oligospora
+	;
+END;
+BEGIN TREES;
+	TRANSLATE
+		1	Laccaria_bicolor,
+		2	Gloeophyllum_trabeum,
+		3	Dacryopinax_sp._DJM-731_SS1,
+		4	Trichosporon_asahii,
+		5	Rhodosporidium_toruloides,
+		6	Tetrapisispora_phaffii,
+		7	Pyrenophora_tritici-repentis,
+		8	Pyrenophora_teres,
+		9	Setosphaeria_turcica,
+		10	Bipolaris_zeicola,
+		11	Bipolaris_victoriae,
+		12	Bipolaris_sorokiniana,
+		13	Bipolaris_oryzae,
+		14	Bipolaris_maydis,
+		15	Leptosphaeria_maculans,
+		16	Parastagonospora_nodorum,
+		17	Baudoinia_compniacensis,
+		18	Pseudocercospora_fijiensis,
+		19	Mycosphaerella_pini,
+		20	Sphaerulina_musiva,
+		21	Neofusicoccum_parvum,
+		22	Macrophomina_phaseolina,
+		23	Cyphellophora_europaea,
+		24	Exophiala_dermatitidis,
+		25	Coniosporium_apollinis,
+		26	Cladophialophora_carrionii,
+		27	Byssochlamys_spectabilis,
+		28	Talaromyces_islandicus,
+		29	Talaromyces_stipitatus,
+		30	Talaromyces_marneffei,
+		31	Penicillium_chrysogenum,
+		32	Penicillium_simplicissimum,
+		33	Penicillium_digitatum,
+		34	Aspergillus_kawachii,
+		35	Aspergillus_niger,
+		36	Aspergillus_nidulans,
+		37	Aspergillus_oryzae,
+		38	Coccidioides_immitis,
+		39	Coccidioides_posadasii,
+		40	Eutypa_lata,
+		41	Pestalotiopsis_fici,
+		42	Colletotrichum_gloeosporioides,
+		43	Colletotrichum_higginsianum,
+		44	Colletotrichum_graminicola,
+		45	Colletotrichum_orbiculare,
+		46	Fusarium_verticillioides,
+		47	Fusarium_fujikuroi,
+		48	Fusarium_graminearum,
+		49	Nectria_haematococca,
+		50	Fusarium_oxysporum,
+		51	Ophiocordyceps_sinensis,
+		52	Cordyceps_militaris,
+		53	Beauveria_bassiana,
+		54	Sporothrix_schenckii,
+		55	Ophiostoma_piceae,
+		56	Grosmannia_clavigera,
+		57	Gaeumannomyces_graminis,
+		58	Podospora_anserina,
+		59	Thielavia_terrestris,
+		60	Chaetomium_thermophilum,
+		61	Myceliophthora_thermophila,
+		62	Sclerotinia_borealis,
+		63	Sclerotinia_sclerotiorum,
+		64	Botryotinia_fuckeliana,
+		65	Marssonina_brunnea,
+		66	Pyronema_omphalodes,
+		67	Dactylellina_haptotyla,
+		68	Arthrobotrys_oligospora
+	;
+	TREE * UNTITLED = [&R] ((((1,2),3,4),5),(6,((((((7,8),9,(10,11,12,13,14)),15,16),(17,(18,19,20)),(21,22)),((23,(24,25,26)),((27,(28,29,30),((31,32,33),(34,35,36,37))),(38,39))),(((40,41),((42,43,44,45),(((46,47),48,49,50),51,(52,53))),((54,55,56),57,(58,(59,60,61)))),(((62,63),64),65))),66,(67,68))));
+END;
diff --git a/inst/newick_bad/bad_newick.tre b/inst/newick_bad/bad_newick.tre
new file mode 100644
index 0000000..2d4a54e
--- /dev/null
+++ b/inst/newick_bad/bad_newick.tre
@@ -0,0 +1 @@
+((((Tinamiformes_292467:1.0E-22,((Apteryx_241840:1.0E-22)Apterygiformes_816668:0.03818,((Dromaius_283193:1.0E-22)Dromaiidae_283194:0.01843,(Casuarius_589156:1.0E-22)Casuariidae_589161:0.014445)Casuariiformes_589166:0.030882):0.00232,Tinamidae_292469:0.091284,(((Crypturellus_870604:0.061151,Tinamus_402450:0.042862):0.01736,(Eudromia_292460:0.084637,Nothoprocta_292463:0.080725):0.00491,(Crypturellus_870604:0.061151,Tinamus_402450:0.042862):0.01736,(Eudromia_292460:0.084637,Nothoprocta_2924 [...]
diff --git a/inst/newick_good/Gudrun.tre b/inst/newick_good/Gudrun.tre
new file mode 100644
index 0000000..2c2509e
--- /dev/null
+++ b/inst/newick_good/Gudrun.tre
@@ -0,0 +1 @@
+((((Laccaria_bicolor,Gloeophyllum_trabeum),Dacryopinax_sp._DJM_731_SS1,Trichosporon_asahii),Rhodosporidium_toruloides),(Tetrapisispora_phaffii,((((((Pyrenophora_tritici_repentis,Pyrenophora_teres),Setosphaeria_turcica,(Bipolaris_zeicola,Bipolaris_victoriae,Bipolaris_sorokiniana,Bipolaris_oryzae,Bipolaris_maydis)),Leptosphaeria_maculans,Parastagonospora_nodorum),(Baudoinia_compniacensis,(Pseudocercospora_fijiensis,Mycosphaerella_pini,Sphaerulina_musiva)),(Neofusicoccum_parvum,Macrophomina [...]
diff --git a/inst/newick_good/missing_edge_lengths.tre b/inst/newick_good/missing_edge_lengths.tre
new file mode 100644
index 0000000..1ee4167
--- /dev/null
+++ b/inst/newick_good/missing_edge_lengths.tre
@@ -0,0 +1 @@
+((('Mccoskerichthys sandae':0.083251,'Neoclinus blanchardi':0.064809),('Stathmonotus stahli':0.067711,('Stathmonotus culebrae':0.060052,'Stathmonotus lugubris':0.092242))),((('Coralliozetus angelicus':0.04122,'Coralliozetus micropes':0.061453),(('Coralliozetus boehlkei':0.02144,'Coralliozetus rosenblatti':0.0169),('Coralliozetus cardonae':0.094764,'Coralliozetus springeri':0.063853))),((('Cirriemblemaria lucasana':0.084154,'Protemblemaria bicirris':0.051726),('Emblemariopsis randalli':0. [...]
\ No newline at end of file
diff --git a/inst/newick_good/simpleTree.tre b/inst/newick_good/simpleTree.tre
new file mode 100644
index 0000000..b946a59
--- /dev/null
+++ b/inst/newick_good/simpleTree.tre
@@ -0,0 +1 @@
+((((A_1:0.1,B__2:0.1)cats:0.1,(C:0.1,D:0.1)dogs:0.1)mammals:0.1):0.1)fur:0.1;
\ No newline at end of file
diff --git a/inst/newick_good/singleton_tree.tre b/inst/newick_good/singleton_tree.tre
new file mode 100644
index 0000000..2a4358a
--- /dev/null
+++ b/inst/newick_good/singleton_tree.tre
@@ -0,0 +1 @@
+(((((A)cats,B)dogs,(C,D)mammals)tetrapods)animals,E)life;
diff --git a/inst/newick_good/singleton_with_edge_length.tre b/inst/newick_good/singleton_with_edge_length.tre
new file mode 100644
index 0000000..6f0bb3f
--- /dev/null
+++ b/inst/newick_good/singleton_with_edge_length.tre
@@ -0,0 +1 @@
+(((((A:0.1)cats:0.2,B:0.3)dogs:0.4,(C:0.5,D:0.6)mammals:0.7)tetrapods:0.8)animals:0.9,E:1.0)life;
diff --git a/inst/newick_good/test_sing.tre b/inst/newick_good/test_sing.tre
new file mode 100644
index 0000000..0ced7cf
--- /dev/null
+++ b/inst/newick_good/test_sing.tre
@@ -0,0 +1 @@
+(((a:1,b:1):1,(c:1,d:1):1):2):3;
\ No newline at end of file
diff --git a/inst/newick_good/tree1.tre b/inst/newick_good/tree1.tre
new file mode 100644
index 0000000..5884883
--- /dev/null
+++ b/inst/newick_good/tree1.tre
@@ -0,0 +1 @@
+(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;
\ No newline at end of file
diff --git a/inst/newick_good/tree2.tre b/inst/newick_good/tree2.tre
new file mode 100644
index 0000000..016564d
--- /dev/null
+++ b/inst/newick_good/tree2.tre
@@ -0,0 +1 @@
+(A,B,C,D)E;
\ No newline at end of file
diff --git a/inst/nexusfiles/MultiLineTrees.nex b/inst/nexusfiles/MultiLineTrees.nex
new file mode 100644
index 0000000..874a201
--- /dev/null
+++ b/inst/nexusfiles/MultiLineTrees.nex
@@ -0,0 +1,89 @@
+#NEXUS 
+
+Begin trees;
+	Translate
+		1 Acorus,
+		2 Protarum,
+		3 Biarum,
+		4 Helicodiceros,
+		5 Eminium,
+		6 Dracunculus,
+		7 Pinellia,
+		8 Peltandra,
+		9 Steudnera,
+		10 Remusatia,
+		11 Colocasia,
+		12 Arum,
+		13 Callopsis,
+		14 Spathicarpa,
+		15 Dieffenbachia,
+		16 Dracontium,
+		17 Anaphyllopsis,
+		18 Gonatopus,
+		19 Epipremnum,
+		20 Scindapsus,
+		21 Anadendrum,
+		22 Stenospermation,
+		23 Monstera,
+		24 Rhodospatha,
+		25 Holochlamys,
+		26 Heteropsis,
+		27 Amydrium,
+		28 Rhaphidophora,
+		29 Spathiphyllum,
+		30 Pothos,
+		31 Anthurium,
+		32 Cercestis,
+		33 Aglaonema1,
+		34 Montrichardia,
+		35 Philodendron,
+		36 Anubias,
+		37 Nephthytis,
+		38 Rhektophyllum,
+		39 Anchomanes,
+		40 Typhonodorum,
+		41 Typhonium,
+		42 Spirodela,
+		43 Landoltia,
+		44 Asterostigma,
+		45 Zantedeschia,
+		46 Calla,
+		47 Schismatoglottis,
+		48 Zamioculcas,
+		49 Culcasia,
+		50 Cyrtosperma,
+		51 Aglaonema,
+		52 Scaphispatha,
+		53 Chlorospatha,
+		54 Arophyton,
+		55 Jasarum,
+		56 Caladium,
+		57 Xanthosoma,
+		58 Hapaline,
+		59 Ambrosina,
+		60 Alocasia,
+		61 Pistia,
+		62 Homalomena,
+		63 Amorphophallus,
+		64 Alloschemone,
+		65 Arisaema,
+		66 Symplocarpus,
+		67 Orontium,
+		68 Lysichiton,
+		69 Gymnostachys
+		;
+tree PAUP_1 = [&U] (1:70,((((((((((((((((((((2:4,(((3:0,((4:1,12:0):0,(5:2,6:0):1):0):2,7:7):1,
+  ((9:1,10:0):1,11:0):1):0):2,61:13):0,(60:3,65:4):1):1,41:3):5,59:20):1,8:4):0,40:8):4,
+  ((((52:5,(53:2,54:6):6):2,58:7):1,((55:4,57:4):0,56:2):2):3,63:6):1):11,((32:2,37:1):3,
+  (33:1,51:0):2):3):1,(35:0,62:2):9):1,34:9):3,(13:5,45:13):4):0,(36:5,(38:0,39:0):4):1):2,
+  ((14:7,15:10):6,44:13):10):4,(46:17,47:6):3):3,(((16:1,17:1):0,50:1):13,(18:2,48:5):2):3):1,49:8):6,
+  ((((19:4,20:1):1,(((21:4,(23:2,(25:5,29:3):3):0):1,28:2):0,27:2):0):1,(((22:3,64:11):0,26:3):1,
+  24:2):1):7,(30:6,31:31):4):11):11,(42:11,43:14):20):38,(69:17,((66:3,68:3):1,67:0):12):1):36);
+tree PAUP_2 = [&U] (1:70,((((((((((((((((((((2:4,(((3:0,((4:1,12:0):0,(5:2,6:0):1):0):2,7:7):1,
+  ((9:1,10:0):1,11:0):1):0):2,61:13):0,(60:3,65:4):1):1,41:3):5,59:20):1,8:4):0,40:8):4,
+  ((((52:5,(53:2,54:6):6):2,58:7):1,((55:4,57:4):0,56:2):2):3,63:6):1):11,((32:2,37:1):3,
+  (33:1,51:0):2):3):1,(35:0,62:2):9):1,34:9):3,(13:5,45:13):4):0,(36:5,(38:0,39:0):4):1):2,
+  ((14:7,15:10):6,44:13):10):4,(46:17,47:6):3):3,(((16:1,17:1):0,50:1):13,(18:2,48:5):2):3):1,49:8):6,
+  ((((19:4,20:1):1,(((21:4,(23:2,(25:5,29:3):3):0):1,28:2):0,27:2):0):1,(((22:3,64:11):0,26:3):1,
+  24:2):1):7,(30:6,31:31):4):11):11,(42:11,43:14):20):38,(69:17,((66:3,68:3):1,67:0):12):1):36);
+End;
diff --git a/inst/nexusfiles/badnex.nex b/inst/nexusfiles/badnex.nex
new file mode 100644
index 0000000..c3b2b04
--- /dev/null
+++ b/inst/nexusfiles/badnex.nex
@@ -0,0 +1,101 @@
+#NEXUS
+[written Sat Oct 23 12:11:18 PDT 2010 by Mesquite  version 2.6 (build 486)
+at Macintosh-101.local/10.0.1.6]
+
+BEGIN TAXA;
+   TITLE Taxa;
+   DIMENSIONS NTAX=32;
+   TAXLABELS
+       S1FS3_S3FS1_&_S2LS4 Desulfodehalobacter_spongiphilus_strain_
+Desulfobacterium_indolicum_strain_DSM_33
+Desulfobacter_postgatei_strain_DSM_2034_ Geobacter_sulfurreducens_#U13928
+S2HS1_&_S3HS1 Geobacter_chapelleii_strain_172_#NR_0259 F2HS1b
+Shewanella_putrefaciens_strain_Hac334_#D Shewanella_aquimarina_#AY485225
+Pseudomonas_meridiana_strain_CMS_38T_AJ5 F2FS1_&_F3FS2 F2HS1_&_F2HS3
+Pseudomonas_stutzeri_strain_LS401_#U2641 F2FS2_&_F3HS2
+Marinobacter_hydrocarbonoclasticus_strai S3FS4_S3FS5_&_S3FS6
+Marinobacter_guineae_strain_LMG_24048_#A Marinobacter_lipolyticus_strain_SM
+S1HS1 Desulfopila_aestuarii_#AB110542 S3FS2
+S1HS2_S1HS3_S1HS4_S1HS5_S3HS4_&_F2HA2a F3FS1
+Desulfovibrio_putealis_#AY574979 Desulfovibrio_desulfuricans_subsp._desul
+S2LS1 Desulfovibrio_bizertensis_strain_MB3_#DQ S1FS1
+Desulfovibrio_marinisediminis_#AB353727 S2LS3_S3LS1_S3FS3_&_F2HA2b
+Acidobacterium_capsulatum_#D26171
+   ;
+
+END;
+
+
+BEGIN TREES;
+   Title Imported_trees;
+   LINK Taxa = Taxa;
+   TRANSLATE
+       1 S1FS3_S3FS1_&_S2LS4,
+       2 D_s_strain_,
+       3 D_i_strain_DSM_3,
+       4 D_p_strain_DSM_2_,
+       5 G_s_#,
+       6 S2HS1_&_S3HS1,
+       7 G_c_strain_1_#NR_0,
+       8 F2HS1b,
+       9 S_p_strain_H_#D,
+       10 S_a_#,
+       11 P_m_strain_C_3_A,
+       12 F2FS1_&_F3FS2,
+       13 F2HS1_&_F2HS3,
+       14 P_s_strain_L_#,
+       15 F2FS2_&_F3HS2,
+       16 M_h_s,
+       17 S3FS4_S3FS5_&_S3FS6,
+       18 M_g_s_L_2_#A,
+       19 M_l_s_S,
+       20 S1HS1,
+       21 D_a_#A,
+       22 S3FS2,
+       23 S1HS2_S1HS3_S1HS4_S1HS5_S3HS4_&_F2HA2a,
+       24 F3FS1,
+       25 D_p_#,
+       26 D_d_s._d,
+       27 S2LS1,
+       28 D_b_s_M_#D,
+       29 S1FS1,
+       30 D_m_#A,
+       31 S2LS3_S3LS1_S3FS3_&_F2HA2b,
+       32 A_c_#;
+   TREE Imported_tree_0 =
+(((1:0.014294865658239897,2:0.016346131753724284)100:0.04531876862101268,(3:0.07787046050007705,(4:0.1575116382471088,((((5:0.04822215084893891,(6:0.0271216303442505,7:0.02007948214228989)100:0.03528659400373911)99:0.05981681725589428,(((8:1.12919178858362E-6,9:1.12919178858362E-6)100:0.057530109591399765,10:0.037691124354788864)100:0.08304244998885398,((11:0.030702696010829537,((12:8.544451267088965E-4,13:1.12919178858362E-6)100:0.020184245020285173,(14:1.12919178858362E-6,15:0.00341810 [...]
+ 
+ 0!
+.025419418511636843,((24:0.008361788925720447,25:8.737200748323158E-4)100:0.12954900469798472,(26:0.12890159096837994,((27:1.12919178858362E-6,28:0.004140667057671182)100:0.07459471196756032,(29:1.12919178858362E-6,(30:1.12919178858362E-6,31:0.0033956963116614446)89:0.002536358334238893)100:0.14144374970927193)77:0.030026874274203138)83:0.03329468873634148)98:0.09177164340960056)45:0.034517167840112765)51:0.04130953485123759)27:0.02140636921130952)100:0.21688189318012785,32:0.21688189318 [...]
+
+END;
+
+
+Begin MESQUITE;
+       MESQUITESCRIPTVERSION 2;
+       TITLE AUTO;
+       tell ProjectCoordinator;
+       getEmployee #mesquite.minimal.ManageTaxa.ManageTaxa;
+       tell It;
+           setID 0 4448951287512167596;
+       endTell;
+       getWindow;
+       tell It;
+           suppress;
+           setResourcesState true false 100;
+           setPopoutState 400;
+           setExplanationSize 0;
+           setAnnotationSize 0;
+           setFontIncAnnot 0;
+           setFontIncExp 0;
+           setSize 700 464;
+           setLocation 8 8;
+           setFont SanSerif;
+           setFontSize 10;
+           getToolPalette;
+           tell It;
+           endTell;
+           desuppress;
+       endTell;
+       endTell;
+end;
diff --git a/inst/nexusfiles/co1.nex b/inst/nexusfiles/co1.nex
new file mode 100755
index 0000000..c066e8e
--- /dev/null
+++ b/inst/nexusfiles/co1.nex
@@ -0,0 +1,13 @@
+#NEXUS
+
+[ID: 0916634271]
+begin trees;
+   [Note: This tree contains information on the topology, 
+          branch lengths (if present), and the probability
+          of the partition indicated by the branch.]
+   tree con_50_majrule = (Cow:0.143336,Seal:0.225087,((((((Carp:0.171296,Loach:0.222039)1.00:0.194575,Frog:0.237101)0.76:0.073060,Chicken:0.546258)1.00:0.204809,Human:0.533183)0.99:0.124549,(Mouse:0.134574,Rat:0.113163)1.00:0.154442)0.88:0.055934,Whale:0.145592)0.93:0.047441);
+
+   [Note: This tree contains information only on the topology
+          and branch lengths (mean of the posterior probability density).]
+   tree con_50_majrule = (Cow:0.143336,Seal:0.225087,((((((Carp:0.171296,Loach:0.222039):0.194575,Frog:0.237101):0.073060,Chicken:0.546258):0.204809,Human:0.533183):0.124549,(Mouse:0.134574,Rat:0.113163):0.154442):0.055934,Whale:0.145592):0.047441);
+end;
diff --git a/inst/nexusfiles/multiLines.rds b/inst/nexusfiles/multiLines.rds
new file mode 100644
index 0000000..e104648
Binary files /dev/null and b/inst/nexusfiles/multiLines.rds differ
diff --git a/inst/nexusfiles/newick.tre b/inst/nexusfiles/newick.tre
new file mode 100644
index 0000000..e36dd9b
--- /dev/null
+++ b/inst/nexusfiles/newick.tre
@@ -0,0 +1 @@
+(a:1,(b:2,c:3)xx:4)yy;
\ No newline at end of file
diff --git a/inst/nexusfiles/test_empty.nex b/inst/nexusfiles/test_empty.nex
new file mode 100644
index 0000000..7a5a221
--- /dev/null
+++ b/inst/nexusfiles/test_empty.nex
@@ -0,0 +1,14 @@
+#NEXUS
+
+BEGIN TAXA;
+  DIMENSIONS NTAX=6;
+	TAXLABELS
+        cnidaria
+        porifera
+        ctenophora
+        protostomia
+        deuterostomia
+        xeno
+    ;
+END;
+
diff --git a/inst/nexusfiles/test_subset_alltaxa.nex b/inst/nexusfiles/test_subset_alltaxa.nex
new file mode 100644
index 0000000..7c09bf4
--- /dev/null
+++ b/inst/nexusfiles/test_subset_alltaxa.nex
@@ -0,0 +1,23 @@
+#NEXUS
+
+BEGIN TAXA;
+  DIMENSIONS NTAX=6;
+	TAXLABELS
+        cnidaria
+        porifera
+        ctenophora
+        protostomia
+        deuterostomia
+        xeno
+    ;
+END;
+
+BEGIN TREES;
+    TRANSLATE
+        1 deuterostomia,
+        2 protostomia,
+        3 porifera,
+        4 ctenophora,
+        5 cnidaria;
+    TREE hyp1 = (3,((4,5),(1,2)));
+END;
diff --git a/inst/nexusfiles/test_subset_taxa.nex b/inst/nexusfiles/test_subset_taxa.nex
new file mode 100644
index 0000000..1cdbdc0
--- /dev/null
+++ b/inst/nexusfiles/test_subset_taxa.nex
@@ -0,0 +1,27 @@
+#NEXUS
+
+BEGIN TAXA;
+  DIMENSIONS NTAX=6;
+	TAXLABELS
+        cnidaria
+        porifera
+        ctenophora
+        protostomia
+        deuterostomia
+        xeno
+    ;
+END;
+
+BEGIN TREES;
+    TRANSLATE
+        1 deuterostomia,
+        2 protostomia,
+        3 porifera,
+        4 ctenophora,
+        5 cnidaria,
+        6 xeno;
+    TREE hyp1 = (3,((4,5),(1,2)));
+    TREE hyp2 = (3:6,((4:2,5:1):4,(1:10,2:9):7):3);
+    TREE hyp3 = (3,(4,(6,(1,2))));
+    TREE hyp4 = (1,(2,(3,(4,(5,6)))));
+END;
diff --git a/inst/nexusfiles/test_underscores.nex b/inst/nexusfiles/test_underscores.nex
new file mode 100644
index 0000000..989c28f
--- /dev/null
+++ b/inst/nexusfiles/test_underscores.nex
@@ -0,0 +1,42 @@
+#NEXUS
+
+BEGIN TAXA;
+  DIMENSIONS NTAX=5;
+	TAXLABELS
+        cnidaria_1
+        porifera_2
+        ctenophora
+        protostomia
+        deuterostomia
+    ;
+END;
+
+BEGIN TREES;
+    TRANSLATE
+        1 deuterostomia,
+        2 protostomia,
+        3 porifera_2,
+        4 ctenophora,
+        5 cnidaria_1;
+    TREE hyp1 = (3,((4,5),(1,2)));
+END;
+
+
+BEGIN CHARACTERS;
+	TITLE  Test_underscores;
+	DIMENSIONS  NCHAR=2;
+	FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = "  0 1";
+	CHARSTATELABELS 
+		1 character_1 / tri di , 2 character_2 / marine not_only ; 
+	MATRIX
+	deuterostomia   11
+
+	protostomia     11
+
+	ctenophora     10
+
+	cnidaria_1       00
+
+	porifera_2    00
+;
+END;
diff --git a/inst/nexusfiles/treeWithDiscreteData.nex b/inst/nexusfiles/treeWithDiscreteData.nex
new file mode 100644
index 0000000..9cbbdfe
--- /dev/null
+++ b/inst/nexusfiles/treeWithDiscreteData.nex
@@ -0,0 +1,354 @@
+#NEXUS
+[written Tue May 29 18:24:39 PDT 2007 by Mesquite  version 1.06 (build g97) at cnidaria-1347.ucdavis.edu/169.237.66.185]
+
+BEGIN TAXA;
+	DIMENSIONS NTAX=18;
+	TAXLABELS
+		Myrmecocystuscfnavajo Myrmecocystuscreightoni Myrmecocystusdepilis Myrmecocystuskathjuli Myrmecocystuskennedyi Myrmecocystusmendax Myrmecocystusmexicanus Myrmecocystusmimicus Myrmecocystusnavajo Myrmecocystusnequazcatl Myrmecocystusplacodops Myrmecocystusromainei Myrmecocystussemirufus Myrmecocystussnellingi Myrmecocystustenuinodis Myrmecocystustestaceus Myrmecocystuswheeleri Myrmecocystusyuma 
+	;
+
+END;
+
+
+BEGIN CHARACTERS;
+	TITLE  Foraging;
+	DIMENSIONS  NCHAR=2;
+	FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = "  0 1 2";
+	CHARSTATELABELS 
+		1 time /  diurnal crepuscular nocturnal, 2 subgenus /  Endiodioctes Eremnocystus Myrmecocystus ; 
+	MATRIX
+	Myrmecocystuscfnavajo   22
+
+	Myrmecocystuscreightoni  11
+
+	Myrmecocystusdepilis     00
+
+	Myrmecocystuskathjuli    00
+
+	Myrmecocystuskennedyi    00
+
+	Myrmecocystusmendax      00
+
+	Myrmecocystusmexicanus   22
+
+	Myrmecocystusmimicus     00
+
+	Myrmecocystusnavajo      22
+
+	Myrmecocystusnequazcatl  00
+
+	Myrmecocystusplacodops   00
+
+	Myrmecocystusromainei    00
+
+	Myrmecocystussemirufus   00
+
+	Myrmecocystussnellingi   11
+
+	Myrmecocystustenuinodis  11
+
+	Myrmecocystustestaceus   12
+
+	Myrmecocystuswheeleri    00
+
+	Myrmecocystusyuma        11
+
+
+;
+
+END;
+
+BEGIN TREES;
+	TRANSLATE
+		1 Myrmecocystuscfnavajo,
+		2 Myrmecocystuscreightoni,
+		3 Myrmecocystusdepilis,
+		4 Myrmecocystuskathjuli,
+		5 Myrmecocystuskennedyi,
+		6 Myrmecocystusmendax,
+		7 Myrmecocystusmexicanus,
+		8 Myrmecocystusmimicus,
+		9 Myrmecocystusnavajo,
+		10 Myrmecocystusnequazcatl,
+		11 Myrmecocystusplacodops,
+		12 Myrmecocystusromainei,
+		13 Myrmecocystussemirufus,
+		14 Myrmecocystussnellingi,
+		15 Myrmecocystustenuinodis,
+		16 Myrmecocystustestaceus,
+		17 Myrmecocystuswheeleri,
+		18 Myrmecocystusyuma;
+	TREE bestML = (((((((((13:1.724765,11:1.724765):2.926053,6:4.650818):0.689044,(4:1.08387,17:1.08387):4.255993):0.198842,((8:2.708942,3:2.708942):2.027251,((12:2.193845,10:2.193845):2.257581,18:4.451425):0.284767):0.802512):0.506099,5:6.044804):4.524387,2:10.569191):0.836689,(14:2.770378,15:2.770378):8.635503):0.89482,16:12.300701):1.699299,(7:5.724923,(1:2.869547,9:2.869547):2.855375):8.275077);
+
+END;
+
+
+BEGIN ASSUMPTIONS;
+	TYPESET * UNTITLED  (CHARACTERS = Foraging)  =  unord:  1 -  2;
+END;
+
+BEGIN MESQUITECHARMODELS;
+	ProbModelSet * UNTITLED  (CHARACTERS = 'Matrix in file "treepluscharV01.nex"')  =  Browniandefault:  1 -  32;
+ProbModelSet * UNTITLED  (CHARACTERS = Foraging)  =  'Mk1 (est.)':  1 -  2;
+END;
+
+Begin MESQUITE;
+		MESQUITESCRIPTVERSION 2;
+		TITLE AUTO;
+		tell ProjectCoordinator;
+		getEmployee #mesquite.minimal.ManageTaxa.ManageTaxa;
+		tell It;
+			setID 0 9015005506118934442;
+		endTell;
+		getEmployee #mesquite.charMatrices.ManageCharacters.ManageCharacters;
+		tell It;
+			setID 0 2565950173085067248;
+			checksum 0 389122022;
+			setID 1 1161953040649633474;
+			checksum 1 3582198254;
+		endTell;
+		getEmployee  #mesquite.charMatrices.BasicDataWindowCoord.BasicDataWindowCoord;
+		tell It;
+			showDataWindow #2565950173085067248 #mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindowMaker;
+			tell It;
+				getWindow;
+				tell It;
+					setSize 420 280;
+					setLocation 400 156;
+					setFont SanSerif;
+					setFontSize 10;
+					onInfoBar;
+					setExplanationSize 30;
+					setAnnotationSize 20;
+					setFontIncAnnot 0;
+					setFontIncExp 0;
+					getToolPalette;
+					tell It;
+					endTell;
+					setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.arrow;
+					colorCells  #mesquite.charMatrices.NoColor.NoColor;
+					setBackground White;
+					toggleShowNames on;
+					toggleTight off;
+					toggleShowChanges on;
+					toggleSeparateLines off;
+					toggleShowStates on;
+					toggleAutoWithCharNames on;
+					toggleShowDefaultCharNames off;
+					toggleConstrainCW on;
+					toggleBirdsEye off;
+					toggleColorsPanel off;
+					birdsEyeWidth 2;
+					toggleLinkedScrolling on;
+					toggleScrollLinkedTables off;
+				endTell;
+				showWindow;
+				getWindow;
+				tell It;
+					forceAutosize;
+				endTell;
+				getEmployee #mesquite.charMatrices.AnnotPanel.AnnotPanel;
+				tell It;
+					togglePanel off;
+				endTell;
+				getEmployee #mesquite.charMatrices.ColorCells.ColorCells;
+				tell It;
+					setColor Red;
+					removeColor off;
+				endTell;
+			endTell;
+			showDataWindow #1161953040649633474 #mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindowMaker;
+			tell It;
+				getWindow;
+				tell It;
+					getTable;
+					tell It;
+						rowNamesWidth 232;
+					endTell;
+					setSize 798 748;
+					setLocation 348 22;
+					setFont SanSerif;
+					setFontSize 10;
+					onInfoBar;
+					setExplanationSize 30;
+					setAnnotationSize 20;
+					setFontIncAnnot 0;
+					setFontIncExp 0;
+					getToolPalette;
+					tell It;
+						setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.ibeam;
+					endTell;
+					setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.ibeam;
+					colorCells  #mesquite.charMatrices.NoColor.NoColor;
+					setBackground White;
+					toggleShowNames on;
+					toggleTight off;
+					toggleShowChanges on;
+					toggleSeparateLines off;
+					toggleShowStates on;
+					toggleAutoWithCharNames on;
+					toggleShowDefaultCharNames off;
+					toggleConstrainCW on;
+					toggleBirdsEye off;
+					toggleColorsPanel off;
+					birdsEyeWidth 2;
+					toggleLinkedScrolling on;
+					toggleScrollLinkedTables off;
+				endTell;
+				showWindow;
+				getWindow;
+				tell It;
+					forceAutosize;
+				endTell;
+				getEmployee #mesquite.categ.StateNamesEditor.StateNamesEditor;
+				tell It;
+					makeWindow;
+					tell It;
+						setSize 314 400;
+						setLocation 60 10;
+						setFont SanSerif;
+						setFontSize 10;
+						onInfoBar;
+						setExplanationSize 30;
+						setAnnotationSize 20;
+						setFontIncAnnot 0;
+						setFontIncExp 0;
+						getToolPalette;
+						tell It;
+							setTool mesquite.categ.StateNamesEditor.StateNamesWindow.ibeam;
+						endTell;
+						rowsAreCharacters on;
+						toggleConstrainChar on;
+						toggleConstrainCharNum 3;
+						togglePanel off;
+					endTell;
+					showWindow;
+				endTell;
+				getEmployee #mesquite.categ.StateNamesStrip.StateNamesStrip;
+				tell It;
+					showStrip off;
+				endTell;
+				getEmployee #mesquite.charMatrices.AnnotPanel.AnnotPanel;
+				tell It;
+					togglePanel off;
+				endTell;
+				getEmployee #mesquite.charMatrices.ColorCells.ColorCells;
+				tell It;
+					setColor Red;
+					removeColor off;
+				endTell;
+				getEmployee #mesquite.charMatrices.QuickKeySelector.QuickKeySelector;
+				tell It;
+					autotabOff;
+				endTell;
+			endTell;
+		endTell;
+		getEmployee  #mesquite.trees.BasicTreeWindowCoord.BasicTreeWindowCoord;
+		tell It;
+			makeTreeWindow #9015005506118934442  #mesquite.trees.BasicTreeWindowMaker.BasicTreeWindowMaker;
+			tell It;
+				setTreeSource  #mesquite.trees.StoredTrees.StoredTrees;
+				tell It;
+					setTreeBlock 1;
+					toggleUseWeights off;
+				endTell;
+				setAssignedID 630.1180487973731.4514395117633566598;
+				getTreeWindow;
+				tell It;
+					setSize 520 400;
+					setLocation 60 10;
+					setFont SanSerif;
+					setFontSize 10;
+					onInfoBar;
+					setExplanationSize 30;
+					setAnnotationSize 20;
+					setFontIncAnnot 0;
+					setFontIncExp 0;
+					getToolPalette;
+					tell It;
+					endTell;
+					setActive;
+					getTreeDrawCoordinator #mesquite.trees.BasicTreeDrawCoordinator.BasicTreeDrawCoordinator;
+				tell It;
+					suppress;
+					setTreeDrawer  #mesquite.trees.DiagonalDrawTree.DiagonalDrawTree;
+					tell It;
+						setEdgeWidth 12;
+						orientUp;
+						getEmployee #mesquite.trees.NodeLocsStandard.NodeLocsStandard;
+						tell It;
+							stretchToggle off;
+							branchLengthsToggle off;
+							toggleScale on;
+							toggleCenter off;
+							toggleEven off;
+							namesAngle ?;
+						endTell;
+					endTell;
+					setBackground White;
+					setBranchColor Black;
+					showNodeNumbers off;
+					labelBranchLengths off;
+					desuppress;
+					getEmployee #mesquite.trees.BasicDrawTaxonNames.BasicDrawTaxonNames;
+					tell It;
+						setColor Black;
+						toggleColorPartition on;
+						toggleShadePartition off;
+						toggleNodeLabels on;
+						toggleShowNames on;
+					endTell;
+				endTell;
+					setTreeNumber 1;
+					useSuggestedSize on;
+					toggleTextOnTree off;
+					newAssistant  #mesquite.ancstates.TraceCharacterHistory.TraceCharacterHistory;
+				tell It;
+					suspend ;
+					setDisplayMode  #mesquite.ancstates.ShadeStatesOnTree.ShadeStatesOnTree;
+					tell It;
+						toggleLabels off;
+					endTell;
+					setHistorySource  #mesquite.ancstates.RecAncestralStates.RecAncestralStates;
+					tell It;
+						getCharacterSource  #mesquite.charMatrices.CharSrcCoordObed.CharSrcCoordObed;
+						tell It;
+							setCharacterSource #mesquite.charMatrices.StoredCharacters.StoredCharacters;
+							tell It;
+								setDataSet #1161953040649633474;
+							endTell;
+						endTell;
+						setMethod  #mesquite.parsimony.ParsAncestralStates.ParsAncestralStates;
+						tell It;
+							setModelSource  #mesquite.parsimony.CurrentParsModels.CurrentParsModels;
+						endTell;
+					endTell;
+					setCharacter 1;
+					toggleShowLegend on;
+					toggleGray off;
+					toggleWeights on;
+					setInitialOffsetX -162;
+					setInitialOffsetY -177;
+					setLegendWidth 142;
+					setLegendHeight 177;
+					resume ;
+				endTell;
+				endTell;
+				showWindow;
+				getEmployee #mesquite.ornamental.BranchNotes.BranchNotes;
+				tell It;
+					setAlwaysOn off;
+				endTell;
+				getEmployee #mesquite.trees.ColorBranches.ColorBranches;
+				tell It;
+					setColor Red;
+					removeColor off;
+				endTell;
+			endTell;
+		endTell;
+		endTell;
+end;
+
+begin brownie;
+taxset all=1-18;
+end;
diff --git a/inst/nexusfiles/treeWithUnderscoreLabels.nex b/inst/nexusfiles/treeWithUnderscoreLabels.nex
new file mode 100644
index 0000000..e84d6a4
--- /dev/null
+++ b/inst/nexusfiles/treeWithUnderscoreLabels.nex
@@ -0,0 +1,354 @@
+#NEXUS
+[written Tue May 29 18:24:39 PDT 2007 by Mesquite  version 1.06 (build g97) at cnidaria-1347.ucdavis.edu/169.237.66.185]
+
+BEGIN TAXA;
+	DIMENSIONS NTAX=18;
+	TAXLABELS
+		Myrmecocystuscfnavajo Myrmecocystus_creightoni Myrmecocystusdepilis Myrmecocystuskathjuli Myrmecocystuskennedyi Myrmecocystusmendax Myrmecocystusmexicanus Myrmecocystusmimicus Myrmecocystusnavajo Myrmecocystusnequazcatl Myrmecocystusplacodops Myrmecocystusromainei Myrmecocystussemirufus Myrmecocystussnellingi Myrmecocystustenuinodis Myrmecocystustestaceus Myrmecocystuswheeleri Myrmecocystusyuma 
+	;
+
+END;
+
+
+BEGIN CHARACTERS;
+	TITLE  Foraging;
+	DIMENSIONS  NCHAR=2;
+	FORMAT DATATYPE = STANDARD GAP = - MISSING = ? SYMBOLS = "  0 1 2";
+	CHARSTATELABELS 
+		1 time_period /  diurnal crepuscular nocturnal, 2 subgenus /  Endiodioctes Eremnocystus Myrmecocystus ; 
+	MATRIX
+	Myrmecocystuscfnavajo   22
+
+	Myrmecocystus_creightoni  11
+
+	Myrmecocystusdepilis     00
+
+	Myrmecocystuskathjuli    00
+
+	Myrmecocystuskennedyi    00
+
+	Myrmecocystusmendax      00
+
+	Myrmecocystusmexicanus   22
+
+	Myrmecocystusmimicus     00
+
+	Myrmecocystusnavajo      22
+
+	Myrmecocystusnequazcatl  00
+
+	Myrmecocystusplacodops   00
+
+	Myrmecocystusromainei    00
+
+	Myrmecocystussemirufus   00
+
+	Myrmecocystussnellingi   11
+
+	Myrmecocystustenuinodis  11
+
+	Myrmecocystustestaceus   12
+
+	Myrmecocystuswheeleri    00
+
+	Myrmecocystusyuma        11
+
+
+;
+
+END;
+
+BEGIN TREES;
+	TRANSLATE
+		1 Myrmecocystuscfnavajo,
+		2 Myrmecocystus_creightoni,
+		3 Myrmecocystusdepilis,
+		4 Myrmecocystuskathjuli,
+		5 Myrmecocystuskennedyi,
+		6 Myrmecocystusmendax,
+		7 Myrmecocystusmexicanus,
+		8 Myrmecocystusmimicus,
+		9 Myrmecocystusnavajo,
+		10 Myrmecocystusnequazcatl,
+		11 Myrmecocystusplacodops,
+		12 Myrmecocystusromainei,
+		13 Myrmecocystussemirufus,
+		14 Myrmecocystussnellingi,
+		15 Myrmecocystustenuinodis,
+		16 Myrmecocystustestaceus,
+		17 Myrmecocystuswheeleri,
+		18 Myrmecocystusyuma;
+	TREE bestML = (((((((((13:1.724765,11:1.724765):2.926053,6:4.650818):0.689044,(4:1.08387,17:1.08387):4.255993):0.198842,((8:2.708942,3:2.708942):2.027251,((12:2.193845,10:2.193845):2.257581,18:4.451425):0.284767):0.802512):0.506099,5:6.044804):4.524387,2:10.569191):0.836689,(14:2.770378,15:2.770378):8.635503):0.89482,16:12.300701):1.699299,(7:5.724923,(1:2.869547,9:2.869547):2.855375):8.275077);
+
+END;
+
+
+BEGIN ASSUMPTIONS;
+	TYPESET * UNTITLED  (CHARACTERS = Foraging)  =  unord:  1 -  2;
+END;
+
+BEGIN MESQUITECHARMODELS;
+	ProbModelSet * UNTITLED  (CHARACTERS = 'Matrix in file "treepluscharV01.nex"')  =  Browniandefault:  1 -  32;
+ProbModelSet * UNTITLED  (CHARACTERS = Foraging)  =  'Mk1 (est.)':  1 -  2;
+END;
+
+Begin MESQUITE;
+		MESQUITESCRIPTVERSION 2;
+		TITLE AUTO;
+		tell ProjectCoordinator;
+		getEmployee #mesquite.minimal.ManageTaxa.ManageTaxa;
+		tell It;
+			setID 0 9015005506118934442;
+		endTell;
+		getEmployee #mesquite.charMatrices.ManageCharacters.ManageCharacters;
+		tell It;
+			setID 0 2565950173085067248;
+			checksum 0 389122022;
+			setID 1 1161953040649633474;
+			checksum 1 3582198254;
+		endTell;
+		getEmployee  #mesquite.charMatrices.BasicDataWindowCoord.BasicDataWindowCoord;
+		tell It;
+			showDataWindow #2565950173085067248 #mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindowMaker;
+			tell It;
+				getWindow;
+				tell It;
+					setSize 420 280;
+					setLocation 400 156;
+					setFont SanSerif;
+					setFontSize 10;
+					onInfoBar;
+					setExplanationSize 30;
+					setAnnotationSize 20;
+					setFontIncAnnot 0;
+					setFontIncExp 0;
+					getToolPalette;
+					tell It;
+					endTell;
+					setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.arrow;
+					colorCells  #mesquite.charMatrices.NoColor.NoColor;
+					setBackground White;
+					toggleShowNames on;
+					toggleTight off;
+					toggleShowChanges on;
+					toggleSeparateLines off;
+					toggleShowStates on;
+					toggleAutoWithCharNames on;
+					toggleShowDefaultCharNames off;
+					toggleConstrainCW on;
+					toggleBirdsEye off;
+					toggleColorsPanel off;
+					birdsEyeWidth 2;
+					toggleLinkedScrolling on;
+					toggleScrollLinkedTables off;
+				endTell;
+				showWindow;
+				getWindow;
+				tell It;
+					forceAutosize;
+				endTell;
+				getEmployee #mesquite.charMatrices.AnnotPanel.AnnotPanel;
+				tell It;
+					togglePanel off;
+				endTell;
+				getEmployee #mesquite.charMatrices.ColorCells.ColorCells;
+				tell It;
+					setColor Red;
+					removeColor off;
+				endTell;
+			endTell;
+			showDataWindow #1161953040649633474 #mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindowMaker;
+			tell It;
+				getWindow;
+				tell It;
+					getTable;
+					tell It;
+						rowNamesWidth 232;
+					endTell;
+					setSize 798 748;
+					setLocation 348 22;
+					setFont SanSerif;
+					setFontSize 10;
+					onInfoBar;
+					setExplanationSize 30;
+					setAnnotationSize 20;
+					setFontIncAnnot 0;
+					setFontIncExp 0;
+					getToolPalette;
+					tell It;
+						setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.ibeam;
+					endTell;
+					setTool mesquite.charMatrices.BasicDataWindowMaker.BasicDataWindow.ibeam;
+					colorCells  #mesquite.charMatrices.NoColor.NoColor;
+					setBackground White;
+					toggleShowNames on;
+					toggleTight off;
+					toggleShowChanges on;
+					toggleSeparateLines off;
+					toggleShowStates on;
+					toggleAutoWithCharNames on;
+					toggleShowDefaultCharNames off;
+					toggleConstrainCW on;
+					toggleBirdsEye off;
+					toggleColorsPanel off;
+					birdsEyeWidth 2;
+					toggleLinkedScrolling on;
+					toggleScrollLinkedTables off;
+				endTell;
+				showWindow;
+				getWindow;
+				tell It;
+					forceAutosize;
+				endTell;
+				getEmployee #mesquite.categ.StateNamesEditor.StateNamesEditor;
+				tell It;
+					makeWindow;
+					tell It;
+						setSize 314 400;
+						setLocation 60 10;
+						setFont SanSerif;
+						setFontSize 10;
+						onInfoBar;
+						setExplanationSize 30;
+						setAnnotationSize 20;
+						setFontIncAnnot 0;
+						setFontIncExp 0;
+						getToolPalette;
+						tell It;
+							setTool mesquite.categ.StateNamesEditor.StateNamesWindow.ibeam;
+						endTell;
+						rowsAreCharacters on;
+						toggleConstrainChar on;
+						toggleConstrainCharNum 3;
+						togglePanel off;
+					endTell;
+					showWindow;
+				endTell;
+				getEmployee #mesquite.categ.StateNamesStrip.StateNamesStrip;
+				tell It;
+					showStrip off;
+				endTell;
+				getEmployee #mesquite.charMatrices.AnnotPanel.AnnotPanel;
+				tell It;
+					togglePanel off;
+				endTell;
+				getEmployee #mesquite.charMatrices.ColorCells.ColorCells;
+				tell It;
+					setColor Red;
+					removeColor off;
+				endTell;
+				getEmployee #mesquite.charMatrices.QuickKeySelector.QuickKeySelector;
+				tell It;
+					autotabOff;
+				endTell;
+			endTell;
+		endTell;
+		getEmployee  #mesquite.trees.BasicTreeWindowCoord.BasicTreeWindowCoord;
+		tell It;
+			makeTreeWindow #9015005506118934442  #mesquite.trees.BasicTreeWindowMaker.BasicTreeWindowMaker;
+			tell It;
+				setTreeSource  #mesquite.trees.StoredTrees.StoredTrees;
+				tell It;
+					setTreeBlock 1;
+					toggleUseWeights off;
+				endTell;
+				setAssignedID 630.1180487973731.4514395117633566598;
+				getTreeWindow;
+				tell It;
+					setSize 520 400;
+					setLocation 60 10;
+					setFont SanSerif;
+					setFontSize 10;
+					onInfoBar;
+					setExplanationSize 30;
+					setAnnotationSize 20;
+					setFontIncAnnot 0;
+					setFontIncExp 0;
+					getToolPalette;
+					tell It;
+					endTell;
+					setActive;
+					getTreeDrawCoordinator #mesquite.trees.BasicTreeDrawCoordinator.BasicTreeDrawCoordinator;
+				tell It;
+					suppress;
+					setTreeDrawer  #mesquite.trees.DiagonalDrawTree.DiagonalDrawTree;
+					tell It;
+						setEdgeWidth 12;
+						orientUp;
+						getEmployee #mesquite.trees.NodeLocsStandard.NodeLocsStandard;
+						tell It;
+							stretchToggle off;
+							branchLengthsToggle off;
+							toggleScale on;
+							toggleCenter off;
+							toggleEven off;
+							namesAngle ?;
+						endTell;
+					endTell;
+					setBackground White;
+					setBranchColor Black;
+					showNodeNumbers off;
+					labelBranchLengths off;
+					desuppress;
+					getEmployee #mesquite.trees.BasicDrawTaxonNames.BasicDrawTaxonNames;
+					tell It;
+						setColor Black;
+						toggleColorPartition on;
+						toggleShadePartition off;
+						toggleNodeLabels on;
+						toggleShowNames on;
+					endTell;
+				endTell;
+					setTreeNumber 1;
+					useSuggestedSize on;
+					toggleTextOnTree off;
+					newAssistant  #mesquite.ancstates.TraceCharacterHistory.TraceCharacterHistory;
+				tell It;
+					suspend ;
+					setDisplayMode  #mesquite.ancstates.ShadeStatesOnTree.ShadeStatesOnTree;
+					tell It;
+						toggleLabels off;
+					endTell;
+					setHistorySource  #mesquite.ancstates.RecAncestralStates.RecAncestralStates;
+					tell It;
+						getCharacterSource  #mesquite.charMatrices.CharSrcCoordObed.CharSrcCoordObed;
+						tell It;
+							setCharacterSource #mesquite.charMatrices.StoredCharacters.StoredCharacters;
+							tell It;
+								setDataSet #1161953040649633474;
+							endTell;
+						endTell;
+						setMethod  #mesquite.parsimony.ParsAncestralStates.ParsAncestralStates;
+						tell It;
+							setModelSource  #mesquite.parsimony.CurrentParsModels.CurrentParsModels;
+						endTell;
+					endTell;
+					setCharacter 1;
+					toggleShowLegend on;
+					toggleGray off;
+					toggleWeights on;
+					setInitialOffsetX -162;
+					setInitialOffsetY -177;
+					setLegendWidth 142;
+					setLegendHeight 177;
+					resume ;
+				endTell;
+				endTell;
+				showWindow;
+				getEmployee #mesquite.ornamental.BranchNotes.BranchNotes;
+				tell It;
+					setAlwaysOn off;
+				endTell;
+				getEmployee #mesquite.trees.ColorBranches.ColorBranches;
+				tell It;
+					setColor Red;
+					removeColor off;
+				endTell;
+			endTell;
+		endTell;
+		endTell;
+end;
+
+begin brownie;
+taxset all=1-18;
+end;
diff --git a/man/read_nexus_phylo.Rd b/man/read_nexus_phylo.Rd
new file mode 100644
index 0000000..7d69073
--- /dev/null
+++ b/man/read_nexus_phylo.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rncl.R
+\name{read_nexus_phylo}
+\alias{make_phylo}
+\alias{read_newick_phylo}
+\alias{read_nexus_phylo}
+\title{Read phylogenetic trees from files}
+\usage{
+read_nexus_phylo(file, simplify = FALSE, missing_edge_length = NA, ...)
+
+read_newick_phylo(file, simplify = FALSE, missing_edge_length = NA, ...)
+
+make_phylo(file, simplify = FALSE, missing_edge_length = NA, ...)
+}
+\arguments{
+\item{file}{Path of NEXUS or Newick file}
+
+\item{simplify}{If the file includes more than one tree, returns
+only the first tree; otherwise, returns a multiPhylo object}
+
+\item{missing_edge_length}{If the tree contains missing edge
+lengths, the value to be attributed to these edge lengths. By
+default, (\code{missing_edge_length = NA}) if at least edge
+length is missing, they are all removed. Otherwise, the value
+must be a single numeric value. In any case, a warning will
+be generated if the tree contains missing edge lengths.}
+
+\item{...}{additional parameters to be passed to the rncl function}
+}
+\value{
+A phylo or a multiPhylo object
+}
+\description{
+Create phylo objects from NEXUS or Newick files
+}
+\details{
+These functions read NEXUS or Newick files and return an object of
+class phylo/multiPhylo.
+}
+\note{
+\code{make_phylo} will soon be deprecated, use
+\code{read_nexus_phylo} or \code{read_newick_phylo} instead.
+}
+\author{
+Francois Michonneau
+}
+\seealso{
+rncl-package
+}
+
diff --git a/man/rncl.Rd b/man/rncl.Rd
new file mode 100644
index 0000000..1d12c98
--- /dev/null
+++ b/man/rncl.Rd
@@ -0,0 +1,169 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rncl-package.R, R/rncl.R
+\docType{package}
+\name{rncl}
+\alias{rncl}
+\alias{rncl-package}
+\title{rncl: An R interface to the NEXUS Class Library}
+\usage{
+rncl(file, file.format = c("nexus", "newick"), spacesAsUnderscores = TRUE,
+  char.all = TRUE, polymorphic.convert = TRUE, levels.uniform = TRUE, ...)
+}
+\arguments{
+\item{file}{path to a NEXUS or Newick file}
+
+\item{file.format}{a character string indicating the type of file
+to be parsed.}
+
+\item{spacesAsUnderscores}{In the NEXUS file format white spaces
+are not allowed and are represented by underscores. Therefore, NCL
+converts underscores found in taxon labels in the NEXUS file into
+white spaces (e.g. \code{species_1} will become \code{"species
+1"}). If you want to preserve the underscores, set as \code{TRUE}
+(default). This option affects taxon labels, character labels and
+state labels.}
+
+\item{char.all}{If \code{TRUE} (default), returns all characters,
+even those excluded in the NEXUS file (only when NEXUS file
+contains DATA block).}
+
+\item{polymorphic.convert}{If TRUE (default), converts polymorphic
+characters to missing data (only when NEXUS file contains DATA
+block).}
+
+\item{levels.uniform}{If TRUE (default), uses the same levels for
+all characters (only when NEXUS file contains DATA block).}
+
+\item{...}{additional parameters (currently not in use).}
+}
+\value{
+A list that contains the elements extracted from a NEXUS
+or a Newick file.
+
+\itemize{
+
+  \item {\code{taxaNames}} {A vector of the taxa names listed in
+the TAXA block of the NEXUS file or inferred from the tree strings
+(if block missing or Newick file).}
+
+  \item {\code{treeNames}} {A vector listing the names of the trees}
+
+  \item {\code{taxonLabelVector}} {A list containing as many
+elements as there are trees in the file. Each element is a
+character vector that lists the taxon names encountered in the
+tree string *in the order they appear*, and therefore may not
+match the order they are listed in the translation table.}
+
+  \item {\code{parentVector}} { A list containing as many elements
+as there are trees in the file. Each element is a numeric vector
+listing the parent node for the node given by its position in the
+vector. If the beginning of the vector is 5 5 6, the parent node
+of node 1 is 5, the parent of node 2 is 5 and the parent of node 3
+is 6. The implicit root of the tree is identified with 0 (node
+without a parent).}
+
+  \item{\code{branchLengthVector}} { A list containing as many
+elements as there are trees in the file. Each element is a numeric
+vector listing the edge/branch lengths for the edges in the same
+order as nodes are listed in the corresponding \code{parentVector}
+element. Values of -999 indicate that the value is missing for this
+particular edge. The implicit root as a length of 0.}
+
+  \item{\code{nodeLabelsVector}} { A list containing as many
+elements as there are trees in the file. Each element is a
+character vector listing the node labels in the same order as the
+nodes are specified in the same order as nodes are listed in the
+corresponding \code{parentVector} element.}
+
+  \item{\code{trees}} { A character vector listing the tree
+strings where tip labels have been replaced by their indices in
+the \code{taxaNames} vector. They do not correspond to the numbers
+listed in the translation table that might be associated with the
+tree.}
+
+  \item{\code{dataTypes}} { A character vector indicating the type
+of data associated with the tree (e.g., \dQuote{standard}). }
+
+  \item{\code{nbCharacters}} { A numeric vector indicating how
+many characters/traits are available. }
+
+  \item{\code{charLabels}} { A character vector listing the names
+of the characters/traits that are available. }
+
+  \item {\code{nbStates}} { A numeric vector listing the number of
+possible states for each character/trait.}
+
+  \item {\code{stateLabels}} { A character vector listing in
+order, all possible states for each character/trait.}
+
+  \item {\code{dataChr}} { A character vector with as many
+elements as there are characters/traits in the dataset. Each
+element is string that can be parsed by R to create a factor
+vector representing the data found in the file.}
+
+  \item {\code{isRooted}} { A list with as many elements as there
+are trees in the file. Each element is a logical indicating
+whether the tree is rooted. NCL definition of a rooted tree
+differs from the one APE uses in some cases. }
+
+  \item {\code{hasPolytomies}} { A list with as many elements as
+there are trees in the file. Each element is a logical indicating
+whether the tree contains polytomies.}
+
+  \item {\code{hasSingletons}} { A list with as many elements as
+there are trees in the file. Each element is a logical indicating
+whether the tree contains singleton nodes, in other words nodes
+with a single descendant (also known as knuckles).}
+
+}
+}
+\description{
+rncl provides an interface to the NEXUS Class Library (NCL), a C++
+library intended to parse valid NEXUS files as well as other
+common formats used in phylogenetic analysis. Currently, rncl
+focuses on parsing trees and supports both NEXUS and Newick
+formatted files. Because NCL is used by several phylogenetic
+software (e.g., MrBayes, Garli), rncl can parse files generated by
+these programs. However, other popular programs (including BEAST)
+use an extension of the NEXUS file format, and if trees can be
+imported, associated annotations (e.g., confidence intervals on
+the time since divergence) cannot.
+
+Returns a list of the elements contained in a NEXUS file used to
+build phylogenetic objects in R
+}
+\details{
+NCL can also parse data associated with species included in NEXUS
+files. If you are interested in importing such data, see the
+phylobase package.
+
+NEXUS is a common file format used in phylogenetics to represent
+phylogenetic trees, and other types of phylogenetic data. This
+function uses NCL (the NEXUS Class Library) to parse NEXUS, Newick
+or other common phylogenetic file formats, and returns the
+relevant elements as a list. \code{phylo} (from the ape package)
+or \code{phylo4} (from the phylobase package) can be constructed
+from the elements contained in this list.
+}
+\author{
+Francois Michonneau
+}
+\references{
+Maddison DR, Swofford DL, Maddison WP (1997). "NEXUS:
+An extensible file format for systematic information". Systematic
+Biology 46(4) : 590-621.
+doi:\href{http://dx.doi.org/10.1093/sysbio/46.4.590}{10.1093/sysbio/46.4.590}
+
+Lewis, P. O. 2003. NCL: a C++ class library for interpreting data
+files in NEXUS format. Bioinformatics 19 (17) : 2330-2331.
+}
+\seealso{
+For examples on how to use the elements of the list
+returned by this function to build tree objects, inspect the
+source code of this package, in particular how
+\code{read_newick_phylo} and \code{read_nexus_phylo} work. For a
+more complex example that also use the data contained in NEXUS
+files, inspect the source code of the \code{readNCL} function in
+the phylobase package.
+}
+
diff --git a/src/GetNCL.cpp b/src/GetNCL.cpp
new file mode 100644
index 0000000..75cee34
--- /dev/null
+++ b/src/GetNCL.cpp
@@ -0,0 +1,424 @@
+// -*- mode: C++; -*-
+
+#include <Rcpp.h>
+#include "ncl/nxsmultiformat.h"
+
+//#define NEW_TREE_RETURN_TYPE
+
+NxsString contData(NxsCharactersBlock& charBlock, NxsString& charString,
+		   const int& eachChar, const int& nTax) {
+    for (int taxon=0; taxon < nTax; ++taxon) {
+	double state=charBlock.GetSimpleContinuousValue(taxon,eachChar);
+	if (state==DBL_MAX) {
+	    charString+="NA";
+	}
+	else {
+	    char buffer[100];
+	    sprintf(buffer, "%.10f", state);
+	    charString+=buffer;
+	}
+
+	if (taxon+1 < nTax) {
+	    charString+=',';
+	}
+    }
+    return charString;
+}
+
+
+NxsString stdData(NxsCharactersBlock& charBlock, NxsString& charString, const int& eachChar,
+		  const int& nTax, bool polyconvert) {
+    for (int taxon=0; taxon<nTax; ++taxon) {
+
+	int stateNumber=charBlock.GetInternalRepresentation(taxon, eachChar, 0);
+
+	if(charBlock.IsMissingState(taxon, eachChar)) {
+	    charString+="NA";
+	}
+	else if (charBlock.GetNumStates(taxon, eachChar)>1) {
+	    if(polyconvert) {
+		charString+="NA";
+	    }
+	    else {
+		charString+='"';
+		charString+='{';
+		for (unsigned int k=0; k < charBlock.GetNumStates(taxon, eachChar); ++k) {
+		    charString += charBlock.GetInternalRepresentation(taxon, eachChar, k);
+		    if (k+1 < charBlock.GetNumStates(taxon, eachChar)) {
+			charString+=',';
+		    }
+		}
+		charString+='}';
+		charString+='"';
+	    }
+	}
+	else {
+	    charString+='"';
+	    charString+=stateNumber;
+	    charString+='"';
+	}
+	if (taxon+1 < nTax) {
+	    charString+=',';
+	}
+    }
+    return charString;
+}
+
+
+//[[Rcpp::export]]
+Rcpp::List RNCL (SEXP params, SEXP paramsVecR) {
+
+    Rcpp::List list(params);
+    Rcpp::LogicalVector paramsVec(paramsVecR);
+
+    bool charall = paramsVec[0];
+    bool polyconvert = paramsVec[1];
+    bool levelsUnif = paramsVec[2];
+    bool returnTrees = paramsVec[3];
+    bool returnData = paramsVec[4];
+
+    int nCharToReturn = 0;
+
+    std::vector<std::string> dataTypes;      //vector of datatypes for each character block
+    std::vector<int> nbCharacters;           //number of characters for each character block
+    std::vector<std::string> dataChr;        //characters
+    std::vector<std::string> charLabels;     //labels for the characters
+    std::vector<std::string> stateLabels;    //labels for the states
+    std::vector<int> nbStates;               //number of states for each character (for Standard datatype)
+    Rcpp::List lTaxaLabelVector = Rcpp::List::create();
+    Rcpp::List lParentVector = Rcpp::List::create();
+    Rcpp::List lBranchLengthVector = Rcpp::List::create();
+    Rcpp::List lNodeLabelVector = Rcpp::List::create();
+    Rcpp::List lIsRooted = Rcpp::List::create();
+    Rcpp::List lHasPolytomies = Rcpp::List::create();
+    Rcpp::List lHasSingletons = Rcpp::List::create();
+    std::vector<std::string> trees;          //vector of Newick strings holding the names
+    std::vector<std::string> treeNames;      //vector of tree names
+    std::vector<std::string> taxaNames;      //vector of taxa names
+    std::string errorMsg;                    //error message
+
+#   if defined(FILENAME_AS_NEXUS)
+    std::string filename = "'" + list["fileName"] + "'";
+#   else
+    std::string filename = list["fileName"];
+#   endif
+
+    MultiFormatReader nexusReader(-1, NxsReader::IGNORE_WARNINGS);
+
+    /* make NCL less strict */
+    NxsTreesBlock * treesB = nexusReader.GetTreesBlockTemplate();
+    treesB->SetAllowImplicitNames(true);
+    nexusReader.cullIdenticalTaxaBlocks(true);
+    /* End of making NCL less strict */
+
+    MultiFormatReader::DataFormatType fileFormat =  MultiFormatReader::NEXUS_FORMAT;
+    std::string fileFormatString = list["fileFormat"];
+    if (!fileFormatString.empty())
+        {
+
+        fileFormat = MultiFormatReader::formatNameToCode(fileFormatString);
+        if (fileFormat == MultiFormatReader::UNSUPPORTED_FORMAT)
+            {
+            std::string m = "Unsupported format \"";
+            m.append(fileFormatString);
+            m.append("\"");
+            Rcpp::List res = Rcpp::List::create(Rcpp::Named("ErrorMsg") = m);
+	        return res;
+            }
+        }
+
+/*  fileFormatString should be one of these:
+    "nexus",
+    "dnafasta",
+    "aafasta",
+    "rnafasta",
+    "dnaphylip",
+    "rnaphylip",
+    "aaphylip",
+    "discretephylip",
+    "dnaphylipinterleaved",
+    "rnaphylipinterleaved",
+    "aaphylipinterleaved",
+    "discretephylipinterleaved",
+    "dnarelaxedphylip",
+    "rnarelaxedphylip",
+    "aarelaxedphylip",
+    "discreterelaxedphylip",
+    "dnarelaxedphylipinterleaved",
+    "rnarelaxedphylipinterleaved",
+    "aarelaxedphylipinterleaved",
+    "discreterelaxedphylipinterleaved",
+    "dnaaln",
+    "rnaaln",
+    "aaaln",
+    "phyliptree",
+    "relaxedphyliptree",
+    "nexml",
+    "dnafin",
+    "aafin",
+    "rnafin"
+    }; */
+    try {
+	nexusReader.ReadFilepath(const_cast < char* > (filename.c_str()), fileFormat);
+    }
+    catch (NxsException &x) {
+	errorMsg = x.msg;
+	Rcpp::List res = Rcpp::List::create(Rcpp::Named("ErrorMsg") = errorMsg);
+	return res;
+    }
+    catch (...) {
+	errorMsg = "Unknown error, check the formatting of your file first.";
+	Rcpp::List res = Rcpp::List::create(Rcpp::Named("ErrorMsg") = errorMsg);
+	return res;
+    }
+
+    const unsigned nTaxaBlocks = nexusReader.GetNumTaxaBlocks();
+    for (unsigned t = 0; t < nTaxaBlocks; ++t) {
+	/* Get blocks */
+	const NxsTaxaBlock * taxaBlock = nexusReader.GetTaxaBlock(t);
+	const unsigned nTreesBlocks = nexusReader.GetNumTreesBlocks(taxaBlock);
+	const unsigned nCharBlocks = nexusReader.GetNumCharactersBlocks(taxaBlock);
+
+	int nTax = taxaBlock->GetNumTaxonLabels();
+
+	/* Get taxa names */
+	for (int j=0; j < nTax; ++j) {
+	    taxaNames.push_back (taxaBlock->GetTaxonLabel(j));
+	}
+
+	/* Get trees */
+	if (returnTrees) {
+	    if (nTreesBlocks == 0) {
+		continue;
+	    }
+	    for (unsigned i = 0; i < nTreesBlocks; ++i) {
+		NxsTreesBlock* treeBlock = nexusReader.GetTreesBlock(taxaBlock, i);
+		const unsigned nTrees = treeBlock->GetNumTrees();
+		if (nTrees > 0) {
+		    // lTaxaLabelVector.reserve(nTrees);
+		    // lParentVector.reserve(nTrees);
+		    // lBranchLengthVector.reserve(nTrees);
+
+		    for (unsigned k = 0; k < nTrees; k++) {
+
+			std::vector<std::string> taxonLabelVector; //Index of the parent. 0 means no parent.
+			std::vector<int> parentVector;        //Index of the parent. 0 means no parent.
+			std::vector<double> branchLengthVector;
+                        std::vector<std::string> nodeLabelVector;
+
+			taxonLabelVector.reserve(nTax);
+			parentVector.reserve(2*nTax);
+			branchLengthVector.reserve(2*nTax);
+                        nodeLabelVector.reserve(2*nTax);
+
+			taxonLabelVector.clear();
+			parentVector.clear();
+			branchLengthVector.clear();
+                        nodeLabelVector.clear();
+
+			const NxsFullTreeDescription & ftd = treeBlock->GetFullTreeDescription(k);
+
+			NxsSimpleTree simpleTree(ftd, -999, -999.0);
+			std::vector<const NxsSimpleNode *> ndVector =  simpleTree.GetPreorderTraversal();
+
+                        /// first loop over nodes to figure out number of tips
+                        /// This is needed as we can't rely on the length of the number of taxa in the
+                        /// event some trees of the TREE block have only a subset of the taxa
+                        int ntips = 0;
+                        for (std::vector<const NxsSimpleNode *>::const_iterator ndIt = ndVector.begin();
+			     ndIt != ndVector.end(); ++ndIt) {
+
+                            NxsSimpleNode * nd = (NxsSimpleNode *) *ndIt;
+                            NxsSimpleEdge edge = nd->GetEdgeToParent();
+
+			    NxsSimpleNode * par = 0L;
+			    par = (NxsSimpleNode *) edge.GetParent();
+
+                            if (nd->IsTip() && par != 0L) {
+                                ntips++;
+                            }
+                        }
+
+                        /// Second loop to build the parentVector and associated edge lengths and edge labels
+                        /// vectors
+                        unsigned internalNdIndex = ntips; // internal node counter
+                        unsigned internalTipId = 0;       // tip counter
+
+			for (std::vector<const NxsSimpleNode *>::const_iterator ndIt = ndVector.begin();
+			     ndIt != ndVector.end(); ++ndIt)
+			{
+			    NxsSimpleNode * nd = (NxsSimpleNode *) *ndIt;
+			    unsigned nodeIndex;
+
+                            if (nd->IsTip())
+			    {
+                                // get the taxon label associated with this tip
+                                // we can't rely on  GetTaxonIndex as if the tree only includes
+                                // a subset of the TAXA block, it won't be accurate
+                                nodeIndex = internalTipId++;
+                                taxonLabelVector.push_back(taxaNames[nd->GetTaxonIndex()]);
+			    }
+                            else
+                            {
+                                nodeIndex = internalNdIndex++;
+                                nd->SetTaxonIndex(nodeIndex);
+                                nodeLabelVector.push_back(nd->GetName());
+                            }
+
+                            NxsSimpleEdge edge = nd->GetEdgeToParent();
+
+			    NxsSimpleNode * par = 0L;
+			    par = (NxsSimpleNode *) edge.GetParent();
+
+			    if (parentVector.size() < nodeIndex + 1)
+			    {
+				parentVector.resize(nodeIndex + 1);
+			    }
+			    if (branchLengthVector.size() < nodeIndex + 1)
+			    {
+				branchLengthVector.resize(nodeIndex + 1);
+			    }
+                            if (nodeLabelVector.size() < nodeIndex + 1)
+                            {
+                                nodeLabelVector.resize(nodeIndex + 1);
+                            }
+
+			    if (par != 0L)
+			    {
+				parentVector[nodeIndex] = 1 + par->GetTaxonIndex();
+				branchLengthVector[nodeIndex] = edge.GetDblEdgeLen();
+			    }
+			    else
+			    {
+                                parentVector[nodeIndex] = 0;
+                                branchLengthVector[nodeIndex] = -999.0;
+			    }
+			}
+
+			NxsString trNm = treeBlock->GetTreeName(k);
+			treeNames.push_back (trNm);
+			NxsString ts = treeBlock->GetTreeDescription(k);
+			trees.push_back (ts);
+                        bool isRooted = ftd.IsRooted();
+                        bool hasPolys = ftd.HasPolytomies();
+                        bool hasSingletons = ftd.HasDegreeTwoNodes();
+
+			lTaxaLabelVector.push_back (taxonLabelVector);
+			lParentVector.push_back (parentVector);
+			lBranchLengthVector.push_back (branchLengthVector);
+                        lIsRooted.push_back (isRooted);
+                        lHasPolytomies.push_back (hasPolys);
+                        lHasSingletons.push_back (hasSingletons);
+                        lNodeLabelVector.push_back (nodeLabelVector);
+		    }
+		}
+		else {
+		    continue;
+		}
+	    }
+	}
+
+	/* Get data */
+	if (returnData) {
+	    for (unsigned k = 0; k < nCharBlocks; ++k) {
+		NxsCharactersBlock * charBlock = nexusReader.GetCharactersBlock(taxaBlock, k);
+
+		if (nCharBlocks == 0) {
+		    continue;
+		}
+		else {
+		    NxsString dtType = charBlock->GetNameOfDatatype(charBlock->GetDataType());
+		    dataTypes.push_back(dtType);
+
+		    if (charall) {
+			nCharToReturn=charBlock->GetNCharTotal();
+		    }
+		    else {
+			nCharToReturn=charBlock->GetNumIncludedChars();
+		    }
+		    nbCharacters.push_back (nCharToReturn);
+		    for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) { //We only pass the non-eliminated chars
+			NxsString charLabel=charBlock->GetCharLabel(eachChar);
+			if (charLabel.length()>1) {
+			    charLabels.push_back (charLabel);
+			}
+			else {
+			    charLabels.push_back ("standard_char"); //FIXME: needs to fixed for sequence data
+			}
+
+			NxsString tmpCharString;
+			if (std::string("Continuous") == dtType) {
+			    tmpCharString = contData(*charBlock, tmpCharString, eachChar, nTax);
+			    nbStates.push_back (0);
+			}
+			else {
+			    if (std::string("Standard") == dtType) {
+				tmpCharString = stdData(*charBlock, tmpCharString, eachChar, nTax,
+							polyconvert);
+				unsigned int nCharStates = charBlock->GetNumObsStates(eachChar, false);
+				nbStates.push_back (nCharStates);
+				for (unsigned int l=0; l < nCharStates; ++l) {
+				    NxsString label = charBlock->GetStateLabel(eachChar, l);
+				    stateLabels.push_back (label);
+				}
+			    }
+			    else {
+				if (std::string("DNA") == dtType) {
+				    for (int taxon=0; taxon < nTax; ++taxon) {
+					for (int eachChar=0; eachChar < nCharToReturn; ++eachChar) {
+					    unsigned int nCharStates = charBlock->GetNumStates(taxon, eachChar);
+					    if (charBlock->IsGapState(taxon, eachChar)) {
+						tmpCharString += "-";
+					    }
+					    else {
+						if (charBlock->IsMissingState(taxon, eachChar)) {
+						    tmpCharString += "?";
+						}
+						else {
+						    if (nCharStates == 1) {
+							tmpCharString += charBlock->GetState(taxon, eachChar, 0);
+
+						    }
+						    else {
+							tmpCharString += "?"; //FIXME
+						    }
+						}
+					    }
+					}
+				    }
+				}
+				else { // other type of data not yet supported
+				    tmpCharString = "";
+				    nbStates.push_back (0);
+				    stateLabels.push_back (std::string(""));
+				}
+			    }
+			}
+			std::string charString = "c(" + tmpCharString + ");";
+			dataChr.push_back (charString);
+		    }
+		}
+	    }
+	}
+    }
+
+    /* Prepare list to return */
+    Rcpp::List res = Rcpp::List::create(Rcpp::Named("taxaNames") = taxaNames,
+					Rcpp::Named("treeNames") = treeNames,
+					Rcpp::Named("taxonLabelVector") = lTaxaLabelVector,
+					Rcpp::Named("parentVector") = lParentVector,
+					Rcpp::Named("branchLengthVector") = lBranchLengthVector,
+                                        Rcpp::Named("nodeLabelsVector") = lNodeLabelVector,
+					Rcpp::Named("trees") = trees,
+					Rcpp::Named("dataTypes") = dataTypes,
+					Rcpp::Named("nbCharacters") = nbCharacters,
+					Rcpp::Named("charLabels") = charLabels,
+					Rcpp::Named("nbStates") = nbStates,
+					Rcpp::Named("stateLabels") = stateLabels,
+					Rcpp::Named("dataChr") = dataChr,
+                                        Rcpp::Named("isRooted") = lIsRooted,
+                                        Rcpp::Named("hasPolytomies") = lHasPolytomies,
+                                        Rcpp::Named("hasSingletons") = lHasSingletons);
+
+    return res;
+}
diff --git a/src/Makevars b/src/Makevars
new file mode 100644
index 0000000..25844fb
--- /dev/null
+++ b/src/Makevars
@@ -0,0 +1,2 @@
+PKG_CPPFLAGS=-I. -DHAVE_INTTYPES_H -DASSERTS_TO_EXCEPTIONS
+## PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"`
diff --git a/src/Makevars.win b/src/Makevars.win
new file mode 100644
index 0000000..aa58e22
--- /dev/null
+++ b/src/Makevars.win
@@ -0,0 +1,6 @@
+## PKG_LIBS = -s $(shell Rscript -e 'Rcpp:::LdFlags()') -L"$(RHOME)/bin" -lR --no-export-all-symbols --add-stdcall-alias
+PKG_CXXFLAGS =  -I. -DHAVE_INTTYPES_H -DASSERTS_TO_EXCEPTIONS
+## PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::LdFlags()")
+
+
+
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
new file mode 100644
index 0000000..fbade96
--- /dev/null
+++ b/src/RcppExports.cpp
@@ -0,0 +1,44 @@
+// Generated by using Rcpp::compileAttributes() -> do not edit by hand
+// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#include <Rcpp.h>
+
+using namespace Rcpp;
+
+// n_singletons
+int n_singletons(Rcpp::IntegerVector ances);
+RcppExport SEXP rncl_n_singletons(SEXP ancesSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ances(ancesSEXP);
+    rcpp_result_gen = Rcpp::wrap(n_singletons(ances));
+    return rcpp_result_gen;
+END_RCPP
+}
+// collapse_single_cpp
+Rcpp::List collapse_single_cpp(Rcpp::IntegerVector ances, Rcpp::IntegerVector desc, Rcpp::NumericVector elen, Rcpp::NumericVector nnode);
+RcppExport SEXP rncl_collapse_single_cpp(SEXP ancesSEXP, SEXP descSEXP, SEXP elenSEXP, SEXP nnodeSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type ances(ancesSEXP);
+    Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type desc(descSEXP);
+    Rcpp::traits::input_parameter< Rcpp::NumericVector >::type elen(elenSEXP);
+    Rcpp::traits::input_parameter< Rcpp::NumericVector >::type nnode(nnodeSEXP);
+    rcpp_result_gen = Rcpp::wrap(collapse_single_cpp(ances, desc, elen, nnode));
+    return rcpp_result_gen;
+END_RCPP
+}
+// RNCL
+Rcpp::List RNCL(SEXP params, SEXP paramsVecR);
+RcppExport SEXP rncl_RNCL(SEXP paramsSEXP, SEXP paramsVecRSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< SEXP >::type params(paramsSEXP);
+    Rcpp::traits::input_parameter< SEXP >::type paramsVecR(paramsVecRSEXP);
+    rcpp_result_gen = Rcpp::wrap(RNCL(params, paramsVecR));
+    return rcpp_result_gen;
+END_RCPP
+}
diff --git a/src/collapse_singles.cpp b/src/collapse_singles.cpp
new file mode 100644
index 0000000..70fb3e1
--- /dev/null
+++ b/src/collapse_singles.cpp
@@ -0,0 +1,128 @@
+#include <Rcpp.h>
+#include <iostream>
+#include <set>
+#include <algorithm>
+#include <RProgress.h>
+
+std::vector<int> tabulate_tips (Rcpp::IntegerVector ances) {
+// tabulates ancestor nodes that are not the root.
+    int n = Rcpp::max(ances);
+    std::vector<int> ans(n);
+    for (int i=0; i < ances.size(); i++) {
+        int j = ances[i];
+        if (j > 0) {
+            ans[j - 1]++;
+        }
+    }
+    return ans;
+}
+
+bool is_one(int i) { return ( i == 1 ); }
+
+//[[Rcpp::export]]
+int n_singletons (Rcpp::IntegerVector ances) {
+    std::vector<int> tab_tips = tabulate_tips(ances);
+    int j = count_if (tab_tips.begin(), tab_tips.end(), is_one);
+    return j;
+}
+
+
+Rcpp::IntegerVector which_integer(Rcpp::IntegerVector x, Rcpp::IntegerVector yInt) {
+  Rcpp::IntegerVector v = Rcpp::seq(0, x.size()-1);
+  int y(1);
+  y = yInt[0];
+  return v[x == y];
+}
+
+Rcpp::IntegerVector match_and_substract(Rcpp::IntegerVector x, Rcpp::IntegerVector yInt) {
+    int y(1);
+    y = yInt[0];
+    for (unsigned k=0; k < x.size(); ++k) {
+	if (x[k] > y)
+	    x[k] = x[k] - 1;
+    }
+    return x;
+}
+
+std::vector<int> match_and_substract (std::vector<int> x, int y) {
+    for (unsigned k=0; k < x.size(); ++k) {
+ 	if (x[k] > y)
+ 	    x[k] = x[k] - 1;
+     }
+     return x;
+}
+
+
+//[[Rcpp::export]]
+Rcpp::List collapse_single_cpp(
+    Rcpp::IntegerVector ances,
+    Rcpp::IntegerVector desc,
+    Rcpp::NumericVector elen,
+    Rcpp::NumericVector nnode) {
+
+    int n_singles = n_singletons(ances);
+
+    std::vector<int> tab_node = tabulate_tips(ances);
+    Rcpp::IntegerVector tab_node_rcpp(tab_node.size());
+    tab_node_rcpp = tab_node;
+    Rcpp::IntegerVector position_singleton = which_integer(tab_node_rcpp, Rcpp::IntegerVector::create(1));
+    Rcpp::IntegerVector position_singleton_orig = position_singleton;
+
+    RProgress::RProgress pb("Progress [:bar] :current/:total (:percent) :eta", (double) n_singles, 60);
+    pb.tick(0);
+
+    while (position_singleton.size() > 0) {
+	// Rcpp::Rcout << "tabNode is ";
+	//     for (unsigned k = 0; k < tabNode.size(); ++k)
+	// 	Rcpp::Rcout << " " << tabNode[k];
+	// Rcpp::Rcout << std::endl;
+	// Rcpp::Rcout << "position singleton is ";
+	// for (unsigned k = 0; k < positionSingleton.size(); ++k)
+	//     Rcpp::Rcout << " " << positionSingleton[k];
+	// Rcpp::Rcout << std::endl;
+	int i = position_singleton[0];
+	Rcpp::IntegerVector iV(1);
+	iV = i;
+	//Rcpp::Rcout << "i is " << i << " and iV is " << iV << std::endl;
+	Rcpp::IntegerVector prev_node = which_integer(desc, iV + 1);
+	Rcpp::IntegerVector next_node = which_integer(ances, iV + 1);
+	//Rcpp::Rcout << "prev_node is " << prev_node << " and next_node is " << next_node << std::endl;
+	//Rcpp::Rcout << "before desc:";
+	//for (unsigned k = 0; k < desc.size(); ++k)
+	//    Rcpp::Rcout << " " << desc[k];
+	//Rcpp::Rcout << std::endl;
+	desc[prev_node] = desc[next_node];
+	//Rcpp::Rcout << "after desc:";
+	//for (unsigned k = 0; k < desc.size(); ++k)
+	//    Rcpp::Rcout << " " << desc[k];
+	//Rcpp::Rcout << std::endl;
+	Rcpp::IntegerVector to_rm = which_integer(ances, iV + 1);
+	//Rcpp::Rcout << "to_rm is " << to_rm << std::endl;
+	desc.erase(to_rm[0]);
+	ances.erase(to_rm[0]);
+	desc = match_and_substract(desc, iV);
+	ances = match_and_substract(ances, iV);
+	nnode = nnode - 1;
+
+	if (elen.size() > 0) {
+	    elen[prev_node] = elen[prev_node] + elen[next_node];
+	    elen.erase(next_node[0]);
+	}
+
+	tab_node = tabulate_tips(ances);
+	tab_node_rcpp(tab_node.size());
+	tab_node_rcpp = tab_node;
+	position_singleton = which_integer(tab_node_rcpp, Rcpp::IntegerVector::create(1));
+
+	pb.tick();
+    }
+
+    Rcpp::List res = Rcpp::List::create(
+	Rcpp::Named("ances") = ances,
+	Rcpp::Named("desc") = desc,
+	Rcpp::Named("Nnode") = nnode,
+	Rcpp::Named("edge.length") = elen,
+	Rcpp::Named("position_singletons") = position_singleton_orig);
+
+    return res;
+}
diff --git a/src/ncl/ncl.h b/src/ncl/ncl.h
new file mode 100644
index 0000000..25a6f1c
--- /dev/null
+++ b/src/ncl/ncl.h
@@ -0,0 +1,97 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NCL_H
+#define NCL_NCL_H
+
+#if !defined(__DECCXX)
+#	include <cctype>
+#	include <cmath>
+#	include <cstdarg>
+#	include <cstdio>
+#	include <cstdarg>
+#	include <cstdlib>
+#	include <ctime>
+#	include <climits>
+#	include <cfloat>
+#else
+#	include <ctype.h>
+#	include <stdarg.h>
+#	include <math.h>
+#	include <stdarg.h>
+#	include <stdio.h>
+#	include <stdlib.h>
+#	include <time.h>
+#	include <float.h>
+#endif
+
+#include <algorithm>
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <list>
+#include <map>
+#include <set>
+#include <stdexcept>
+#include <string>
+#if defined(__GNUC__)
+#	if __GNUC__ < 3
+#		include <strstream>
+#	else
+#		include <sstream>
+#	endif
+#endif
+#include <vector>
+
+# if ! defined (NCL_AVOID_USING_STD)
+	using namespace std;
+#endif
+
+#if defined( __BORLANDC__ )
+#	include <dos.h>
+#endif
+
+#if defined(__MWERKS__)
+#	define HAVE_PRAGMA_UNUSED
+		// mwerks (and may be other compilers) want return values even if the function throws an exception
+		//
+#	define DEMANDS_UNREACHABLE_RETURN
+
+#endif
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsstring.h"
+#include "ncl/nxsexception.h"
+#include "ncl/nxstoken.h"
+#include "ncl/nxsblock.h"
+#include "ncl/nxsreader.h"
+#include "ncl/nxssetreader.h"
+#include "ncl/nxstaxablock.h"
+#include "ncl/nxstreesblock.h"
+#include "ncl/nxsdistancedatum.h"
+#include "ncl/nxsdistancesblock.h"
+#include "ncl/nxsdiscretedatum.h"
+#include "ncl/nxscharactersblock.h"
+#include "ncl/nxsassumptionsblock.h"
+#include "ncl/nxsdatablock.h"
+#include "ncl/nxsunalignedblock.h"
+#include "ncl/nxspublicblocks.h"
+#include "ncl/nxsmultiformat.h"
+
+#endif
diff --git a/src/ncl/nxsallocatematrix.h b/src/ncl/nxsallocatematrix.h
new file mode 100644
index 0000000..ee07bb9
--- /dev/null
+++ b/src/ncl/nxsallocatematrix.h
@@ -0,0 +1,253 @@
+//	Copyright (C) 2008 Mark Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.1
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+// This code is based on code developed by Mark Holder for the CIPRES project
+
+#if !defined (NXS_ALLOCATE_MATRIX_H)
+# define NXS_ALLOCATE_MATRIX_H
+
+#include "ncl/nxsdefs.h"
+
+template<typename T>
+T *** NewThreeDArray(unsigned f , unsigned s , unsigned t);
+template<typename T>
+T ** NewTwoDArray(unsigned f , unsigned s);
+template<typename T>
+void DeleteThreeDArray(T ***& ptr);
+template<typename T>
+void DeleteTwoDArray(T **& ptr);
+
+/*!
+ Allocates a three dimensional array of doubles as one contiguous block of memory
+ the dimensions are f two dimensional arrays that are s by t.
+
+	The pointer should be freed by a call to DeleteThreeDArray
+
+ the array is set up so that
+ for(i = 0 ; i < f ; i++)
+	for (j = 0 ; j < s ; j++)
+		for (k = 0 ; k < t; k++)
+			array[i][j][k];
+
+ would be the same order of access as:
+
+	T *ptr = **array;
+	for (i = 0 ; i < f*s*t ; i++)
+		*ptr++;
+
+*/
+template<typename T> T *** NewThreeDArray(unsigned f , unsigned s , unsigned t)
+	{
+	NCL_ASSERT(f > 0 && s > 0 && t> 0);
+	const unsigned twoDStride = s*t;
+	T ***ptr;
+	ptr = new T **[f];
+	ptr[0] = new T *[f * s];
+	ptr[0][0] = new T[f * s * t];
+	for (unsigned sIt = 1 ; sIt < s ; sIt++)
+		ptr[0][sIt] = ptr[0][sIt-1] + t ;
+	for (unsigned fIt = 1 ; fIt < f ; fIt ++)
+		{
+		ptr[fIt] = ptr[fIt -1] +  s ;
+		ptr[fIt][0] = ptr[fIt -1][0] + twoDStride;
+		for (unsigned sIt = 1 ; sIt < s ; sIt++)
+			ptr[fIt][sIt] = ptr[fIt][sIt-1] + t ;
+		}
+	return ptr;
+	}
+
+/*!
+ Delete a Three Dimensional Array that has been allocated using NewThreeDArray and sets the pointer to NULL
+*/
+template<typename T> void DeleteThreeDArray	(T *** & ptr)
+	{
+	if (ptr)
+		{
+		if (*ptr)
+			{
+			delete [] **ptr;
+			delete [] * ptr;
+			}
+		delete [] ptr;
+		}
+	ptr = NULL;
+	}
+
+/*!
+ 	Allocates a two dimensional array of doubles as one contiguous block of memory
+ 	the dimensions are f by s.
+
+	The pointer should be freed by a call to DeleteTwoDArray
+
+ 	The array is set up so that:
+
+	for(i = 0 ; i < f ; i++)
+		for (j = 0 ; j < s ; j++)
+			array[i][j];
+
+	would be the same order of access as:
+
+  	T *ptr = **array;
+	for (i = 0 ; i < f*s*t ; i++)
+		*ptr++;
+*/
+template<typename T> T **NewTwoDArray(unsigned f , unsigned s)
+	{
+	NCL_ASSERT(f > 0 && s > 0);
+	T **ptr;
+	ptr = new T *[f];
+	*ptr = new T [f * s];
+	for (unsigned fIt = 1 ; fIt < f ; fIt ++)
+		ptr[fIt] = ptr[fIt -1] +  s ;
+	return ptr;
+	}
+
+/*!
+ Delete a 2 Dimensional Array NewTwoDArray and set the ptr to NULL
+*/
+template<typename T> inline void DeleteTwoDArray	(T ** & ptr)
+	{
+	if (ptr)
+		{
+		delete [] * ptr;
+		delete [] ptr;
+		ptr = NULL;
+		}
+	}
+
+
+template<typename T>
+class ScopedThreeDMatrix
+	{
+	public:
+		T *** ptr;
+
+		/*! returns an alias to the memory, but does not "surrender" the
+			ownership of the pointer to the caller
+		*/
+		T *** GetAlias() const
+			{
+			return ptr;
+			}
+		/*! Creates a new matrix.  See NewThreeDArray() for argument  explanation */
+		ScopedThreeDMatrix(unsigned f = 0, unsigned s = 0, unsigned t = 0)
+			:ptr(NULL)
+			{
+			Initialize(f, s, t);
+			}
+		/*! Frees the old matrix, and creates a new matrix.  See NewThreeDArray() for argument explanation  */
+		void Initialize(unsigned f = 0, unsigned s = 0, unsigned t = 0)
+			{
+			Free();
+			if (f > 0 && s > 0 && t > 0)
+				ptr = NewThreeDArray<T>(f, s, t);
+			}
+		/*! returns an alias to the memory, and "forgets" about the memory.
+			The caller is responsible for assuring that DeleteThreeDArray is
+			called on the pointer.
+		*/
+		T ***Surrender()
+			{
+			T ***temp = ptr;
+			ptr = NULL;
+			return temp;
+			}
+		~ScopedThreeDMatrix()
+			{
+			Free();
+			}
+		/*! Releases the memory. */
+		void Free()
+			{
+			if (ptr != NULL)
+				{
+				DeleteThreeDArray<T>(ptr);
+				ptr = 0L;
+				}
+			}
+	};
+
+
+/*!
+	Simple memory-management class for a 2-D array that is allocated using NewTwoDArray
+
+	Memory is deleted when the instance goes out of scope, unless Surrender is called.
+
+*/
+template<typename T>
+class ScopedTwoDMatrix
+	{
+
+	public:
+		T ** ptr;
+
+		/*! returns an alias to the memory, but does not "surrender" the
+			ownership of the pointer to the caller
+		*/
+		T ** GetAlias() const
+			{
+			return ptr;
+			}
+		/*! Creates a new matrix.  See NewTwoDArray() for argument explanation  */
+		ScopedTwoDMatrix(unsigned f = 0, unsigned s = 0)
+			:ptr(NULL)
+			{
+			Initialize(f, s);
+			}
+		/*! Frees the old matrix, and creates a new matrix.  See NewTwoDArray() for argument explanation  */
+		void Initialize(unsigned f, unsigned s)
+			{
+			Free();
+			if (f > 0 && s > 0)
+				ptr = NewTwoDArray<T>(f, s);
+			}
+		/*! returns an alias to the memory, and "forgets" about the memory.
+			The caller is responsible for assuring that DeleteTwoDArray is
+			called on the pointer.
+		*/
+		T **Surrender()
+			{
+			T** temp = ptr;
+			ptr = NULL;
+			return temp;
+			}
+
+		~ScopedTwoDMatrix()
+			{
+			Free();
+			}
+
+		/*! Releases the memory. */
+		void Free()
+			{
+			if (ptr != NULL)
+				{
+				DeleteTwoDArray<T>(ptr);
+				ptr = 0L;
+				}
+			}
+
+	};
+
+typedef ScopedTwoDMatrix<double> ScopedDblTwoDMatrix;
+typedef ScopedTwoDMatrix<unsigned> ScopedUIntTwoDMatrix;
+
+typedef ScopedThreeDMatrix<double> ScopedDblThreeDMatrix;
+typedef ScopedThreeDMatrix<unsigned> ScopedUIntThreeDMatrix;
+
+#endif
diff --git a/src/ncl/nxsassumptionsblock.h b/src/ncl/nxsassumptionsblock.h
new file mode 100644
index 0000000..2836145
--- /dev/null
+++ b/src/ncl/nxsassumptionsblock.h
@@ -0,0 +1,391 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_ASSUMPTIONSBLOCK_H
+#define NCL_ASSUMPTIONSBLOCK_H
+
+#include <vector>
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsblock.h"
+#include "ncl/nxstreesblock.h"
+#include "ncl/nxscharactersblock.h"
+#include "ncl/nxstaxablock.h"
+
+class NxsCharactersBlockAPI;
+class NxsTaxaBlockAPI;
+
+class NxsAssumptionsBlockAPI
+  : public NxsBlock
+	{
+	public:
+  		virtual void	SetCallback(NxsCharactersBlockAPI *p) = 0;
+
+
+
+		virtual void SetCharBlockPtr(NxsCharactersBlockAPI * c, NxsBlockLinkStatus s) = 0;
+		virtual void SetTaxaBlockPtr(NxsTaxaBlockAPI *, NxsBlockLinkStatus s) = 0;
+		virtual void SetTreesBlockPtr(NxsTreesBlockAPI *, NxsBlockLinkStatus s) = 0;
+
+		virtual NxsCharactersBlockAPI * GetCharBlockPtr(int *status=NULL) = 0; /*v2.1to2.2 13 */
+		virtual NxsTaxaBlockAPI * GetTaxaBlockPtr(int *status=NULL) = 0; /*v2.1to2.2 13 */
+		virtual NxsTreesBlockAPI * GetTreesBlockPtr(int *status=NULL) = 0; /*v2.1to2.2 13 */
+
+		/* i14 */ /*v2.1to2.2 14 */
+		/* i15 */ /*v2.1to2.2 15 */
+		/* i16 */ /*v2.1to2.2 16 */
+
+		virtual void AddCharPartition(const std::string & name, const NxsPartition &) = 0;
+		virtual void AddTaxPartition(const std::string & name, const NxsPartition &) = 0;
+		virtual void AddTreePartition(const std::string & name, const NxsPartition &) = 0;
+		virtual void AddCodeSet(const std::string & name, const NxsPartition &, bool asterisked) = 0;
+		virtual void AddCodonPosSet(const std::string & name, const NxsPartition &, bool asterisked) = 0;
+
+		virtual void FlagCharBlockAsUsed() = 0;
+		virtual void FlagTaxaBlockAsUsed() = 0;
+		virtual void FlagTreesBlockAsUsed() = 0;
+
+  		virtual void ReadCharsetDef(NxsString charset_name, NxsToken &token, bool asterisked) = 0;
+  		virtual void ReadExsetDef(NxsString charset_name, NxsToken &token, bool asterisked) = 0;
+		virtual void ReadTaxsetDef(NxsString set_name, NxsToken &token, bool asterisked) = 0;
+		virtual void ReadTreesetDef(NxsString set_name, NxsToken &token, bool asterisked) = 0;
+
+		virtual NxsTransformationManager & GetNxsTransformationManagerRef() = 0;
+		virtual const NxsTransformationManager & GetNxsTransformationManagerConstRef() const = 0;
+		virtual NxsGeneticCodesManager & GetNxsGeneticCodesManagerRef() = 0;
+		virtual void SetGapsAsNewstate(bool v) = 0;
+
+		/*!  delegates call to the NxsTransformationManager */
+		virtual std::vector<double> GetDefaultDoubleWeights() const
+			{
+		    return GetNxsTransformationManagerConstRef().GetDefaultDoubleWeights();
+			}
+
+		/*!  delegates call to the NxsTransformationManager */
+		virtual std::vector<int> GetDefaultIntWeights() const {
+		    return GetNxsTransformationManagerConstRef().GetDefaultIntWeights();
+		}
+
+ 	};
+
+/*!
+	This class handles reading and storage for the NxsReader block ASSUMPTIONS. It overrides the member functions Read
+	and Reset, which are abstract virtual functions in the base class NxsBlock. Adding a new data member? Don't forget
+	to:
+~
+	o Describe it in the class declaration using a C-style comment.
+	o Initialize it (unless it is self-initializing) in the constructor and re-initialize it in the Reset function.
+	o Describe the initial state in the constructor documentation.
+	o Delete memory allocated to it in both the destructor and Reset function.
+	o Report it in some way in the Report function.
+~
+*/
+class NxsAssumptionsBlock
+  : public NxsAssumptionsBlockAPI
+	{
+	enum NameOfAssumpBlockAsRead
+		{
+		UNREAD_OR_GENERATED_BLOCK,
+		ASSUMPTIONS_BLOCK_READ,
+		SETS_BLOCK_READ,
+		CODONS_BLOCK_READ
+		};
+
+
+	public:
+							NxsAssumptionsBlock(NxsTaxaBlockAPI *t);
+		virtual				~NxsAssumptionsBlock();
+
+		virtual bool		CanReadBlockType(const NxsToken & token);
+
+		void				ReplaceTaxaBlockPtr(NxsTaxaBlockAPI *tb);
+		void				SetCallback(NxsCharactersBlockAPI *p);
+
+		int					GetNumCharSets() const;
+		/* i17 */ /*v2.1to2.2 17 */
+		void				GetCharSetNames(NxsStringVector &names) const; /*v2.1to2.2 3 */
+		const NxsUnsignedSet *GetCharSet(NxsString nm) const; /*v2.1to2.2 4 */
+
+		int					GetNumCharPartitions(); /*v2.1to2.2 6 */
+		void				GetCharPartitionNames(std::vector<std::string> &names); /*v2.1to2.2 6 */
+		const NxsPartition		*GetCharPartition(std::string nm) const;
+
+		int					GetNumTaxSets(); /*v2.1to2.2 6 */
+		void				GetTaxSetNames(NxsStringVector &names); /*v2.1to2.2 3 */ /*v2.1to2.2 6 */
+		NxsUnsignedSet &	GetTaxSet(NxsString nm); /*v2.1to2.2 6 */ /*v2.1to2.2 8 */ /*v2.1to2.2 4 */
+
+		int					GetNumExSets();/*v2.1to2.2 6 */
+		void				GetExSetNames(NxsStringVector &names); /*v2.1to2.2 3 */ /*v2.1to2.2 6 */
+		NxsUnsignedSet &	GetExSet(NxsString nm); /*v2.1to2.2 6 */ /*v2.1to2.2 8 */ /*v2.1to2.2 4 */
+		NxsString			GetDefExSetName(); /*v2.1to2.2 6 */ /*v2.1to2.2 4 */
+		void				ApplyExset(NxsString nm); /*v2.1to2.2 4 */
+
+		virtual void		Read(NxsToken& token);
+		virtual void		Report(std::ostream& out) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void		Reset();
+		virtual void 		WriteAsNexus(std::ostream &out) const;
+
+		/*only used it the linkAPI is enabled*/
+		virtual void		HandleLinkCommand(NxsToken & );
+		virtual void		WriteLinkCommand(std::ostream &out) const;
+
+		virtual VecBlockPtr		GetImpliedBlocks()
+			{
+			return GetCreatedTaxaBlocks();
+			}
+
+		int					GetCharLinkStatus() {return charLinkStatus;}
+		int					GetTaxaLinkStatus() {return taxaLinkStatus;}
+		int					GetTreesLinkStatus() {return treesLinkStatus;}
+
+		void				FlagCharBlockAsUsed() {charLinkStatus |= NxsBlock::BLOCK_LINK_USED;}
+		void				FlagTaxaBlockAsUsed() {taxaLinkStatus |= NxsBlock::BLOCK_LINK_USED;}
+		void				FlagTreesBlockAsUsed() {treesLinkStatus |= NxsBlock::BLOCK_LINK_USED;}
+
+		void				SetCharLinkStatus(NxsBlockLinkStatus s);
+		void				SetTaxaLinkStatus(NxsBlockLinkStatus s);
+		void				SetTreesLinkStatus(NxsBlockLinkStatus s);
+
+		void				SetCharBlockPtr(NxsCharactersBlockAPI * c, NxsBlockLinkStatus s);
+		void				SetTaxaBlockPtr(NxsTaxaBlockAPI *, NxsBlockLinkStatus s);
+		void				SetTreesBlockPtr(NxsTreesBlockAPI *, NxsBlockLinkStatus s);
+		NxsCharactersBlockAPI * GetCharBlockPtr(int *status=NULL); /*v2.1to2.2 13 */
+		NxsTaxaBlockAPI *	GetTaxaBlockPtr(int *status=NULL); /*v2.1to2.2 13 */
+		NxsTreesBlockAPI *	GetTreesBlockPtr(int *status=NULL); /*v2.1to2.2 13 */
+
+		const NxsTransformationManager & GetNxsTransformationManagerConstRef() const
+			{
+			return transfMgr;
+			}
+		NxsTransformationManager & GetNxsTransformationManagerRef()
+			{
+			return transfMgr;
+			}
+		NxsGeneticCodesManager & GetNxsGeneticCodesManagerRef()
+			{
+			return codesMgr;
+			}
+		virtual void AddCharPartition(const std::string & name, const NxsPartition &);
+		virtual void AddTaxPartition(const std::string & name, const NxsPartition &);
+		virtual void AddTreePartition(const std::string & name, const NxsPartition &);
+		virtual void AddCodeSet(const std::string & name, const NxsPartition &, bool asterisked);
+		virtual void AddCodonPosSet(const std::string & name, const NxsPartition &, bool asterisked);
+
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, trees, and characters blocks!
+		*/
+		NxsAssumptionsBlock &operator=(const NxsAssumptionsBlock &other)
+			{
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			CopyAssumptionsContents(other);
+			return *this;
+			}
+
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, trees, and characters blocks!
+		|
+		| passedRefOfOwnedBlock is set to this->true to avoid double deletion (other
+		|	retains ownership of these blocks
+		*/
+		virtual void CopyAssumptionsContents(const NxsAssumptionsBlock &other)
+			{
+			taxa = other.taxa;
+			charBlockPtr = other.charBlockPtr;
+			treesBlockPtr = other.treesBlockPtr;
+			charsets = other.charsets;
+			taxsets = other.taxsets;
+			treesets = other.treesets;
+			exsets = other.exsets;
+			charPartitions = other.charPartitions;
+			taxPartitions = other.taxPartitions;
+			treePartitions = other.treePartitions;
+			def_exset = other.def_exset;
+			charLinkStatus = other.charLinkStatus;
+			taxaLinkStatus = other.taxaLinkStatus;
+			treesLinkStatus = other.treesLinkStatus;
+			passedRefOfOwnedBlock = true;
+			readAs = other.readAs;
+			transfMgr = other.transfMgr;
+			codesMgr = other.codesMgr;
+			createdSubBlocks = other.createdSubBlocks;
+			polyTCountValue = other.polyTCountValue;
+			gapsAsNewstate = other.gapsAsNewstate;
+            blockwideCharsLinkEstablished = other.blockwideCharsLinkEstablished;
+            blockwideTaxaLinkEstablished = other.blockwideTaxaLinkEstablished;
+            blockwideTreesLinkEstablished = other.blockwideTreesLinkEstablished;
+
+			codonPosSets = other.codonPosSets;
+			def_codonPosSet = other.def_codonPosSet;
+			codeSets = other.codeSets;
+			def_codeSet = other.def_codeSet;
+			}
+
+		virtual NxsAssumptionsBlock * Clone() const
+			{
+			NxsAssumptionsBlock * a = new NxsAssumptionsBlock(taxa);
+			*a = *this;
+			return a;
+			}
+		virtual void SetGapsAsNewstate(bool v)
+			{
+			gapsAsNewstate = v;
+			}
+
+
+	protected:
+		typedef std::vector<NxsAssumptionsBlockAPI *> VecAssumpBlockPtr;
+
+		virtual void 		ReadCharsetDef(NxsString charset_name, NxsToken &token, bool asterisked);
+		virtual void 		ReadExsetDef(NxsString charset_name, NxsToken &token, bool asterisked);
+		virtual void 		ReadTreesetDef(NxsString set_name, NxsToken &token, bool asterisked);
+		virtual void 		ReadTaxsetDef(NxsString set_name, NxsToken &token, bool asterisked);
+
+
+		VecBlockPtr 		GetCreatedTaxaBlocks();
+		virtual unsigned	TaxonLabelToNumber(NxsString s) const; /*v2.1to2.2 4 */
+
+		void				HandleCharPartition(NxsToken& token);
+		void				HandleCharSet(NxsToken& token);
+		void				HandleCodeSet(NxsToken& token);
+		void				HandleCodonPosSet(NxsToken& token);
+		void				HandleExSet(NxsToken& token);
+		void				HandleOptions(NxsToken & token);
+		void				HandleTaxPartition(NxsToken& token);
+		void				HandleTaxSet(NxsToken& token);
+		void				HandleTreePartition(NxsToken& token);
+		void				HandleTreeSet(NxsToken& token);
+		void				HandleTypeSet(NxsToken& token);
+		void				HandleUserType(NxsToken& token);
+		void				HandleWeightSet(NxsToken& token);
+
+		void				WriteCharSet(std::ostream &out) const
+			{
+			NxsWriteSetCommand("CHARSET", charsets, out);
+			}
+		void				WriteCharPartition(std::ostream &out) const
+			{
+			NxsWritePartitionCommand("CharPartition", charPartitions, out);
+			}
+		void				WriteExSet(std::ostream &out) const
+			{
+			NxsWriteSetCommand("EXSET", exsets, out, def_exset.c_str());
+			}
+		void				WriteOptions(std::ostream &out) const;
+		void				WriteTaxPartition(std::ostream &out) const
+			{
+			NxsWritePartitionCommand("TaxPartition", taxPartitions, out);
+			}
+		void				WriteTaxSet(std::ostream &out) const
+			{
+			NxsWriteSetCommand("TAXSET", taxsets, out);
+			}
+		void				WriteTreePartition(std::ostream &out) const
+			{
+			NxsWritePartitionCommand("TreePartition", treePartitions, out);
+			}
+		void				WriteTreeSet(std::ostream &out) const
+			{
+			NxsWriteSetCommand("TREESET", treesets, out);
+			}
+		void WriteCodeSet(std::ostream &out) const
+			{
+			NxsWritePartitionCommand("CodeSet", codeSets, out, def_codeSet.c_str());
+			}
+		void WriteCodonPosSet(std::ostream &out) const
+			{
+			NxsWritePartitionCommand("CodonPosSet", codonPosSets, out, def_codonPosSet.c_str());
+			}
+		NameOfAssumpBlockAsRead	GetIDOfBlockTypeIDFromParse() const
+			{
+			return readAs;
+			}
+	private:
+		NxsAssumptionsBlockAPI  *GetAssumptionsBlockForCharTitle(const char *title, NxsToken &token, const char *cmd);
+		NxsAssumptionsBlockAPI  *GetAssumptionsBlockForTaxaTitle(const char *title, NxsToken &token, const char *cmd);
+		NxsAssumptionsBlockAPI  *GetAssumptionsBlockForTreesTitle(const char *title, NxsToken &token, const char *cmd);
+
+		NxsAssumptionsBlockAPI  *GetAssumptionsBlockForCharBlock(NxsCharactersBlockAPI *, NxsBlockLinkStatus, NxsToken &token);
+		NxsAssumptionsBlockAPI  *GetAssumptionsBlockForTaxaBlock(NxsTaxaBlockAPI *, NxsBlockLinkStatus, NxsToken &token);
+		NxsAssumptionsBlockAPI  *GetAssumptionsBlockForTreesBlock(NxsTreesBlockAPI *, NxsBlockLinkStatus, NxsToken &token);
+
+		NxsAssumptionsBlockAPI  *CreateNewAssumptionsBlock(NxsToken &token);
+		NxsAssumptionsBlockAPI *DealWithPossibleParensInCharDependentCmd(NxsToken &token, const char *cmd, const std::vector<std::string> *unsupported = NULL, bool * isVect = NULL);
+		bool					HasAssumptionsBlockCommands() const;
+		bool					HasSetsBlockCommands() const;
+		bool					HasCodonsBlockCommands() const;
+
+
+		NxsTaxaBlockAPI			*taxa;				/* pointer to the NxsTaxaBlockAPI object */
+		NxsCharactersBlockAPI	*charBlockPtr;		/* pointer to the NxsCharactersBlockAPI-derived object to be notified in the event of exset changes */
+		NxsTreesBlockAPI		*treesBlockPtr;		/* pointer to the NxsTreesBlockAPI-derived object to be notified in the event of exset changes */
+
+
+
+		NxsUnsignedSetMap	charsets;
+		NxsUnsignedSetMap	taxsets;
+		NxsUnsignedSetMap	treesets;
+		NxsUnsignedSetMap	exsets;
+
+		NxsPartitionsByName charPartitions;
+		NxsPartitionsByName taxPartitions;
+		NxsPartitionsByName treePartitions;
+
+		NxsString			def_exset;			/* the default exset */
+
+		NxsPartitionsByName codonPosSets;
+		NxsString			def_codonPosSet;	/* the default codonPosSet */
+		NxsPartitionsByName codeSets;
+		NxsString			def_codeSet;		/* the default codeSet */
+
+		int					charLinkStatus;
+		int					taxaLinkStatus;
+		int					treesLinkStatus;
+		bool				passedRefOfOwnedBlock;
+		NameOfAssumpBlockAsRead	readAs;
+		NxsTransformationManager transfMgr;
+		NxsGeneticCodesManager	codesMgr;
+
+		std::vector<NxsAssumptionsBlockAPI *> createdSubBlocks;
+		enum PolyTCountValue
+			{
+			POLY_T_COUNT_UNKNOWN,
+			POLY_T_COUNT_MIN,
+			POLY_T_COUNT_MAX
+			};
+		PolyTCountValue		polyTCountValue;
+		bool				gapsAsNewstate;
+		bool blockwideCharsLinkEstablished;
+		bool blockwideTaxaLinkEstablished;
+		bool blockwideTreesLinkEstablished;
+
+		friend class NxsAssumptionsBlockFactory;
+		friend class PublicNexusReader;
+	};
+
+class NxsAssumptionsBlockFactory
+	:public NxsBlockFactory
+	{
+	public:
+		virtual NxsAssumptionsBlock * GetBlockReaderForID(const std::string & blockTypeName, NxsReader *reader, NxsToken *token);
+	};
+
+typedef NxsAssumptionsBlock AssumptionsBlock;	// for backward compatibility
+
+#endif
+
diff --git a/src/ncl/nxsblock.h b/src/ncl/nxsblock.h
new file mode 100644
index 0000000..3927a89
--- /dev/null
+++ b/src/ncl/nxsblock.h
@@ -0,0 +1,363 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#ifndef NCL_NXSBLOCK_H
+#define NCL_NXSBLOCK_H
+
+#include <vector>
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsexception.h"
+#include "ncl/nxstoken.h"
+
+class NxsReader;
+class NxsBlock;
+class NxsTaxaBlockAPI;
+
+typedef std::vector<NxsBlock *> VecBlockPtr;
+typedef std::vector<const NxsBlock *> VecConstBlockPtr;
+typedef std::pair<const NxsBlock *, std::string> BlockUniqueID;
+typedef std::map<BlockUniqueID, NxsBlock *> NxsBlockMapper;
+
+
+/*! This is the base class for the block interfaces that correspond to blocks
+that hold ordered lists (TAXA, CHARACTERS, TREES).
+
+	This interface is used internally during parsing, and is usually NOT
+	called directly by client code.
+*/
+class NxsLabelToIndicesMapper
+	{
+	public:
+		virtual ~NxsLabelToIndicesMapper(){}
+		virtual unsigned GetMaxIndex() const = 0;
+		virtual unsigned GetNumLabelsCurrentlyStored() const {return GetMaxIndex();}
+		/* Adds the 0-based indices corresponding to a label to the set.
+
+		 \returns the number of indices that correspond to the label (and the number
+		 of items that would be added to *inds if inds points to an empty set).
+		*/
+		virtual unsigned GetIndicesForLabel(const std::string &label, /* label, set name or string with the 1-based numeric representation of the object */
+											NxsUnsignedSet *inds /* The set of indices to add the taxa indices to (can be 0L). */
+											) const = 0;
+		/* Confusingly named function.
+			This function looks for the index set than is named `label` in the NxsLabelToIndicesMapper
+			It adds the indices from this set into `toFill` (if toFill is not NULL).
+			\returns the size of the set which was named label (the number of indices that were inserted).
+		*/
+		virtual unsigned GetIndexSet(const std::string &label, NxsUnsignedSet * toFill) const = 0;
+		/* Adds set `inds` to the collection of sets and gives it the name `label`
+			\returns true if the set replaced an existing set (in this case a warning will be issued - which can generate an NxsException, if the client code wants warning to generate exceptions).
+		*/
+		virtual bool AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds) = 0;
+		/* Adds partition `inds` to the collection of partition and gives it the name `label`
+			\returns true if the set replaced an existing partition (in this case a warning will be issued - which can generate an NxsException, if the client code wants warning to generate exceptions).
+		*/
+		virtual bool AddNewPartition(const std::string &label, const NxsPartition & inds) = 0;
+
+		/* Adds a new label to the collection of valid labels
+			AppendNewLabel is only overloaded in Taxa and State LabelToIndexMappers, all other
+			NxsLabelToIndicesMapper instances \throw NxsUnimplementedException
+		*/
+		virtual unsigned AppendNewLabel(std::string &/*label*/)
+			{
+			throw NxsUnimplementedException("AppendNewLabel called on fixed label interface");
+			}
+		static bool allowNumberAsIndexPlusOne;
+	protected:
+		static unsigned GetIndicesFromSets(const std::string &label, NxsUnsignedSet *inds, const NxsUnsignedSetMap & itemSets);
+		static unsigned GetIndicesFromSetOrAsNumber(const std::string &label, NxsUnsignedSet *inds, const NxsUnsignedSetMap & itemSets, const unsigned maxInd, const char * itemType);
+	};
+
+class NxsSetVectorItemValidator;
+
+std::string GetBlockIDTitleString(NxsBlock &);
+/*!
+	This is the base class from which all block classes are derived. A NxsBlock-derived class encapsulates a Nexus block
+	(e.g. DATA block, TREES block, etc.). The abstract virtual function Read must be overridden for each derived class
+	to provide the ability to read everything following the block name (which is read by the NxsReader object) to the
+	end or endblock statement. Derived classes must provide their own data storage and access functions. The abstract
+	virtual function Report must be overridden to provide some feedback to user on contents of block. The abstract
+	virtual function Reset must be overridden to empty the block of all its contents, restoring it to its
+	just-constructed state.
+*/
+class NxsBlock
+	{
+	friend class NxsReader;
+		/* i20 */ /*v2.1to2.2 20 */
+	public:
+		enum NxsBlockLinkStatus
+			{
+			BLOCK_LINK_UNINITIALIZED = 	       0x00,
+			BLOCK_LINK_UNKNOWN_STATUS =        0x01, /*backwards compatibility, this is the status of old block links*/
+			BLOCK_LINK_TO_ONLY_CHOICE =        0x02,
+			BLOCK_LINK_TO_MOST_RECENT =        0x04,
+			BLOCK_LINK_TO_IMPLIED_BLOCK =      0x08,
+			BLOCK_LINK_FROM_LINK_CMD =         0x10,
+			BLOCK_LINK_EQUIVALENT_TO_IMPLIED = 0x20,
+			BLOCK_LINK_UNUSED_MASK =           0x3F,
+			BLOCK_LINK_USED =                  0x40
+			};
+		enum NxsCommandResult
+			{
+			STOP_PARSING_BLOCK,
+			HANDLED_COMMAND,
+			UNKNOWN_COMMAND
+			};
+							NxsBlock();
+		virtual				~NxsBlock();
+
+		virtual void SetNexus(NxsReader *nxsptr);
+		NxsReader *GetNexus() const;
+		virtual bool CanReadBlockType(const NxsToken & token)
+			{
+			return token.Equals(NCL_BLOCKTYPE_ATTR_NAME);
+			}
+
+		NxsString			GetID() const;
+		bool				IsEmpty() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+
+		void				Enable();
+		void				Disable();
+		bool				IsEnabled() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool				IsUserSupplied() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+
+		virtual unsigned	CharLabelToNumber(NxsString s) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual unsigned	TaxonLabelToNumber(NxsString s) const;
+
+		virtual void		SkippingCommand(NxsString commandName);
+
+		virtual void		HandleBlockIDCommand(NxsToken &token);
+		virtual void		HandleEndblock(NxsToken &token);
+		virtual void		HandleLinkCommand(NxsToken &token);
+		virtual void		HandleTitleCommand(NxsToken &token);
+
+		virtual void		Report(std::ostream &out) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void		Reset();
+
+		mutable NxsString	errormsg;			/* workspace for creating error messages */
+
+
+		virtual VecBlockPtr GetImpliedBlocks();
+		virtual VecConstBlockPtr	GetImpliedBlocksConst() const;
+		BlockUniqueID		GetInstanceIdentifier() const
+			{
+			return BlockUniqueID(this, GetInstanceName());
+			}
+		/* i21 */ /*v2.1to2.2 21 */
+		const std::string  &GetInstanceName() const
+			{
+			return title;
+			}
+		virtual NxsBlock			*CloneBlock(NxsBlockMapper &memo) const;
+		bool				ImplementsLinkAPI() const;
+		void				SetImplementsLinkAPI(bool v);
+
+		virtual void				WriteAsNexus(std::ostream &out) const;
+		virtual void 				WriteBlockIDCommand(std::ostream &out) const;
+		virtual void 				WriteLinkCommand(std::ostream &out) const;
+		virtual void 				WriteTitleCommand(std::ostream &out) const;
+		std::string GetTitle() const
+			{
+			return title;
+			}
+		void SetTitle(const std::string &t, bool autogeneratedTitle)
+			{
+			title = t;
+			/* i19 */ /*v2.1to2.2 19 */
+			autoTitle = autogeneratedTitle;
+			}
+		bool IsAutoGeneratedTitle() const
+			{
+			return autoTitle;
+			}
+		void StoreSkippedCommands(bool v)
+			{
+			storeSkippedCommands = v;
+			}
+		void ClearStoredSkippedCommands()
+			{
+			skippedCommands.clear();
+			}
+
+		/*----------------------------------------------------------------------------------------
+		| Copies all NxsBlock fields - execept  the `nexusReader` and `next` pointers.
+		|	Aliasing of Block pointers results in very dangerous implication of copying for
+		|	many subclasses of NxsBlock.
+		| Copying of blocks should be restricted to empty blocks without linkages (e.g.
+		|	the CloneFactory mechanism requires some form of copy, but should typically be used with
+		|	empty blocks.
+		*/
+
+		virtual void CopyBaseBlockContents(const NxsBlock &other)
+			{
+			errormsg = other.errormsg;
+			isEmpty = other.isEmpty;
+			isEnabled = other.isEnabled;
+			isUserSupplied = other.isUserSupplied;
+			NCL_BLOCKTYPE_ATTR_NAME = other.NCL_BLOCKTYPE_ATTR_NAME;
+			title = other.title;
+			/* i19 */ /*v2.1to2.2 19 */
+			blockIDString = other.blockIDString;
+			linkAPI = other.linkAPI;
+			storeSkippedCommands = other.storeSkippedCommands;
+			skippedCommands = other.skippedCommands;
+			autoTitle = other.autoTitle;
+			}
+
+		virtual NxsBlock * Clone() const
+			{
+			NxsBlock * b = new NxsBlock();
+			b->CopyBaseBlockContents(*this);
+			b->nexusReader = NULL;
+			b->next = NULL;
+			return b;
+			}
+
+		unsigned			ReadVectorPartitionDef(NxsPartition &np, NxsLabelToIndicesMapper &ltm, const std::string & partName, const char * ptype, const char * cmd, NxsToken & token, bool warnAsterisked, bool demandAllInds, NxsSetVectorItemValidator & v);
+		void 				ReadPartitionDef(NxsPartition &np, NxsLabelToIndicesMapper &ltm, const std::string & partName, const char * ptype, const char * cmd, NxsToken & token, bool warnAsterisked, bool demandAllInds, bool storeAsPartition);
+		virtual bool		TolerateEOFInBlock() const
+			{
+			return false;
+			}
+		void 				WarnDangerousContent(const std::string &s, const NxsToken &t);
+		void 				WarnDangerousContent(const std::string &s, const ProcessedNxsToken &t);
+
+		void				WriteBasicBlockCommands(std::ostream & out) const;
+		virtual void		WriteSkippedCommands(std::ostream & out) const;
+		// used internally to deal with multiple blocks spawning the same TAXA block
+		virtual bool 		SwapEquivalentTaxaBlock(NxsTaxaBlockAPI * )
+		{
+			return false;
+		}
+		/*! This is the argument from the BLOCKID command.  It should be unique, but
+			that is dependent on the file being valid (NCL does not verify uniqueness).
+
+			This is not the ID used to identify block type. \ref BlockTypeIDDiscussion
+		*/
+		std::string GetBlockIDCommandString() const { return blockIDString; }
+	protected:
+		void				SkipCommand(NxsToken & token);
+
+		NxsCommandResult	HandleBasicBlockCommands(NxsToken & token);
+		void				DemandEndSemicolon(NxsToken &token, const char *contextString) const;
+		void				DemandEquals(NxsToken &token, const char *contextString) const;
+		void				DemandEquals(ProcessedNxsCommand::const_iterator &tokIt, const ProcessedNxsCommand::const_iterator & endIt, const char *contextString) const ;
+		void				DemandIsAtEquals(NxsToken &token, const char *contextString) const;
+		unsigned 			DemandPositiveInt(NxsToken &token, const char *contextString) const;
+		void				GenerateNxsException(NxsToken &token, const char *message = NULL) const;
+		void				GenerateUnexpectedTokenNxsException(NxsToken &token, const char *expected = NULL) const;
+		bool				isEmpty;			/* true if this object is currently storing data */
+		bool				isEnabled;			/* true if this block is currently ebabled */
+		bool				isUserSupplied;		/* true if this object has been read from a file; false otherwise */
+		NxsReader			*nexusReader;		/* pointer to the Nexus file reader object */
+		NxsBlock			*next;				/* DEPRECATED field pointer to next block in list */
+		NxsString			NCL_BLOCKTYPE_ATTR_NAME;					/* holds name of block (e.g., "DATA", "TREES", etc.) \ref BlockTypeIDDiscussion */
+		std::string			title;				/* holds the title of the block empty by default */
+		std::string 		blockIDString; 		/* Mesquite generates these. I don't know what they are for */
+		bool				linkAPI;
+		bool				autoTitle;			/* true if the title was generated internally*/
+		bool				storeSkippedCommands;
+		std::list<ProcessedNxsCommand> skippedCommands; /* commands accumulate by SkipCommand or by some other means */
+
+		virtual void		Read(NxsToken &token);
+		/* i22 */ /*v2.1to2.2 22 */
+		private:
+			NxsBlock &operator=(const NxsBlock &other); /*intentionally not defined because of aliasing issues */
+
+	};
+
+/*! This abstract class defines the interface for a factory that can generate NxsBlocks.
+
+	When the NxsReader::Execute() method encounters a block that it needs to handle, it will first check the registered "singelton"
+		blocks (a block that has been added to it using NxsReader::Add(), those NxsBlock instances are recycled).
+		If no singleton block says that it can read that block of NEXUS (see NxsBlock::CanReadBlockType()), then the NxsReader
+		will walk through its list of factories calling NxsBlockFactory::GetBlockReaderForID() for each until it gets a
+		non-NULL pointer.
+
+	If there is an exception during the parsing of that block BlockError will be called for the factory instance that generated the block
+
+	If the block returns "false" from NxsBlock::IsEnabled() method, then BlockSkipped will be called by the NxsReader using
+		 the factory instance that generated the block
+
+	Blocks generated by factories but used successfully in a parse have to be deleted by the client code (See \ref memoryManagement discussion).
+
+*/
+class NxsBlockFactory
+	{
+	public:
+		virtual ~NxsBlockFactory()
+			{
+			}
+		/*! \returns a NxsBlock instance with NxsBlock::Read method that is capable of parsing a NEXUS block of type `NCL_BLOCKTYPE_ATTR_NAME`
+		*/
+		virtual NxsBlock  *	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, /*!< The type of block that needs to be read see \ref BlockTypeIDDiscussion */
+												NxsReader *reader,  /*!< a pointer to the NxsReader controlling the parse. Can be NULL. Usually not needed for an implementation of this method */
+												NxsToken *token  /*!< a pointer to the NxsToken that is generating token. Can be NULL. Usually not needed for an implementation of this method */
+												) = 0;
+
+		/*! base-class implementation deletes the block (the NxsReader will not retain a reference to the block, so failing to delete can
+			lead to memory leaks if you do not have some fancy memory management scheme).
+
+			If there is an exception during the parsing of that block BlockError will be called for the factory instance that generated the block
+		*/
+		virtual void		BlockError(NxsBlock *b)
+			{
+			delete b;
+			}
+		/*! base-class implementation deletes the block (the NxsReader will not retain a reference to the block, so failing to delete can
+			lead to memory leaks if you do not have some fancy memory management scheme).
+
+		If the block returns "false" from NxsBlock::IsEnabled() method, then BlockSkipped will be called by the NxsReader using
+			 the factory instance that generated the block
+		*/
+		virtual void BlockSkipped(NxsBlock *b)
+			{
+			delete b;
+			}
+	};
+
+/*!
+	Sets the nexusReader data member of the NxsBlock object to 'nxsptr'.
+*/
+inline void NxsBlock::SetNexus(
+  NxsReader *nxsptr)	/* pointer to a NxsReader object */
+	{
+	nexusReader = nxsptr;
+	}
+/*!
+	Gets the nexusReader data member of the NxsBlock object to 'nxsptr'.
+*/
+inline NxsReader * NxsBlock::GetNexus() const
+	{
+	return nexusReader;
+	}
+
+
+/*!
+	Advances the token, and raise an exception if it is not an equals sign.
+
+ 	Sets errormsg and raises a NxsException on failure.
+	`contextString` is used in error messages:
+		"Expecting '=' ${contextString} but found..."
+*/
+inline void NxsBlock::DemandEquals(NxsToken &token, const char *contextString) const
+	{
+	token.GetNextToken();
+	DemandIsAtEquals(token, contextString);
+	}
+#endif
diff --git a/src/ncl/nxscdiscretematrix.h b/src/ncl/nxscdiscretematrix.h
new file mode 100644
index 0000000..0bfa147
--- /dev/null
+++ b/src/ncl/nxscdiscretematrix.h
@@ -0,0 +1,113 @@
+//	Copyright (C) 2008 Mark Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.1
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+// This code is based on code developed by Mark Holder for the CIPRES project
+
+#if !defined(NXS_C_DISCRETE_MATRIX_H)
+#define NXS_C_DISCRETE_MATRIX_H
+
+
+#if defined (HAVE_CONFIG_H)
+#	include <config.h>
+#endif
+
+#if defined(_MSC_VER)
+#	undef	HAVE_COMPILE_TIME_DISPATCH
+#else
+#	define HAVE_COMPILE_TIME_DISPATCH
+#endif
+
+/* For typedefs like uint8_t */
+#if HAVE_INTTYPES_H
+#	include <inttypes.h>
+#elif HAVE_STDINT_H
+#	include <stdint.h>
+#elif defined(_MSC_VER) && _MSC_VER >= 1200
+#	include <basetsd.h>
+	typedef   INT8 int8_t;
+	typedef  UINT8 uint8_t;
+	typedef  INT64 int64_t;
+	typedef UINT64 uint64_t;
+#elif defined(_MSC_VER)
+	typedef signed char int8_t;
+	typedef unsigned char uint8_t;
+	typedef long long int64_t;
+	typedef unsigned long long uint64_t;
+#elif defined(_WIN32)
+#	include <stdint.h>
+#endif
+
+	/* For size_t */
+#if defined(HAVE_STDDEF_H)
+#	include <stddef.h>
+#endif
+
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+
+typedef int8_t NxsCDiscreteState_t; /** type used to enumerate possible states.
+								-1 is used for gaps, other negative flags may be added later.
+								This size limits the maximum number of states allowed. */
+typedef int8_t NxsCDiscreteStateSet; /** type used to refer to unique combinations of states (the "fundamental" states and ambiguity codes)
+								-1 is used for gaps.  To handle all possible data sets, this must be large enough to hold
+								2^(nStates + 1) values if the datatype allows gaps.  Thus using int8_t limits us to 8 states */
+
+/*
+The following enum is a cropping of the NxsCharactersBlock::DataTypesEnum
+which includes all of the datatypes (and only those) that can be expressed
+in a NxsCDiscreteMatrix. Each of the enum facets will have the same
+value as in  NxsCharactersBlock::DataTypesEnum.
+
+This enum is also handy because it is accessible via C.
+*/
+typedef enum {
+			  NxsAltGeneric_Datatype = 1,
+			  NxsAltDNA_Datatype = 2,
+			  NxsAltRNA_Datatype = 3,
+			  NxsAltNuc_Datatype = 4,
+			  NxsAltAA_Datatype = 5,
+			  NxsAltCodon_Datatype = 6
+			  } NxsAltDatatypes;
+const int LowestNxsCDatatype = 1;
+const int HighestNxsCDatatype = 6;
+
+typedef struct NxsCDiscreteMatrixStruct
+	{
+	NxsCDiscreteState_t 	  *stateList; 		/** Flattened array of array of observed states.  If more than one state was observed, then the first element is the number of states observed.
+											  Exceptions: -1 is for gaps, nStates is for missing. */
+	unsigned * stateListPos;  	/** Maps a state set code (the elements of the matrix) to the index in stateList where the states are listed */
+	NxsCDiscreteStateSet ** matrix;			/** taxa x characters matrix of indices of state sets */
+	const char * symbolsList;	/** array of the characters used to stand for each state ("ACGT?NRY" for example) //@temp paup depends on all symbols being unique (even ambiguity codes)*/
+	unsigned nStates;
+	unsigned nChar;
+	unsigned nTax;
+	unsigned nObservedStateSets; /* the length of stateListPos */
+	NxsAltDatatypes datatype;
+	} NxsCDiscreteMatrix;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __TREEINFER_HELPER_H */
+
diff --git a/src/ncl/nxscharactersblock.h b/src/ncl/nxscharactersblock.h
new file mode 100644
index 0000000..4ef4ffe
--- /dev/null
+++ b/src/ncl/nxscharactersblock.h
@@ -0,0 +1,1972 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSCHARACTERSBLOCK_H
+#define NCL_NXSCHARACTERSBLOCK_H
+
+#include <sstream>
+#include <cfloat>
+#include <climits>
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsdiscretedatum.h"
+#include "ncl/nxstaxablock.h"
+
+
+class NxsTaxaBlockAPI;
+class NxsAssumptionsBlockAPI;
+class NxsDiscreteDatatypeMapper;
+
+void NxsWriteSetCommand(const char *cmd, const NxsUnsignedSetMap & usetmap, std::ostream &out, const char * nameOfDef = NULL);
+void NxsWritePartitionCommand(const char *cmd, const NxsPartitionsByName &partitions, std::ostream & out, const char * nameOfDef = NULL);
+
+/*! Internal representation of a stepmatrix with cells composed of doubles.
+	The ordering of columns and rows is identical, and is accessible via NxsRealStepMatrix::GetSymbols
+*/
+class NxsRealStepMatrix
+	{
+	public:
+		typedef std::vector<double> DblVec;
+		typedef std::vector<DblVec> DblMatrix;
+
+		NxsRealStepMatrix(const std::vector<std::string> &symbolsOrder, const DblMatrix & mat)
+			:symbols(symbolsOrder),
+			matrix(mat)
+			{
+			}
+
+		const std::vector<std::string> & GetSymbols() const
+			{
+			return symbols;
+			}
+
+		const DblMatrix & GetMatrix() const
+			{
+			return matrix;
+			}
+	private:
+		std::vector<std::string> symbols;
+		DblMatrix matrix;
+	};
+
+/*! Internal representation of a stepmatrix with cells composed of ints.
+	The ordering of columns and rows is identical, and is accessible via NxsRealStepMatrix::GetSymbols
+*/
+class NxsIntStepMatrix
+	{
+	public:
+		typedef std::vector<int> IntVec;
+		typedef std::vector<IntVec> IntMatrix;
+
+		NxsIntStepMatrix(const std::vector<std::string> &symbolsOrder, const IntMatrix & mat)
+			:symbols(symbolsOrder),
+			matrix(mat)
+			{
+			}
+		const std::vector<std::string> & GetSymbols() const
+			{
+			return symbols;
+			}
+		const IntMatrix & GetMatrix() const
+			{
+			return matrix;
+			}
+	private:
+		std::vector<std::string> symbols;
+		IntMatrix matrix;
+	};
+
+/* Work in progress...
+*/
+class NxsGeneticCodesManager
+	{
+	public:
+		NxsGeneticCodesManager();
+		void Reset() {}
+		bool IsEmpty() const
+			{
+			return true;
+			}
+		void WriteGeneticCode(std::ostream &	) const
+			{}
+		bool IsValidCodeName(const std::string &cn) const;
+	protected:
+		std::set<std::string> standardCodeNames;
+		std::set<std::string> userDefinedCodeNames;
+
+	};
+
+
+/*! NEXUS "types" (as in UserType and TypeSet commands) are assumptions about the costs of transformations of state (in
+	parsimony.
+
+ */
+class NxsTransformationManager
+	{
+	public:
+		typedef std::pair<int, std::set<unsigned> > IntWeightToIndexSet;
+		typedef std::list<IntWeightToIndexSet> ListOfIntWeights;
+
+		typedef std::pair<double, std::set<unsigned> > DblWeightToIndexSet;
+		typedef std::list<DblWeightToIndexSet> ListOfDblWeights;
+
+		typedef std::pair<std::string, std::set<unsigned> > TypeNameToIndexSet;
+		typedef std::list<TypeNameToIndexSet> ListOfTypeNamesToSets;
+
+
+		NxsTransformationManager()
+			{
+			Reset();
+			}
+
+		/*! \return the weight for a character index from a weight set */
+		static int GetWeightForIndex(const ListOfIntWeights & wtset, /*!< the weight set */
+									 unsigned index); /*!< character index which should be in the range in [0, nchar) */
+
+		/*! \returns the weight for a character index from a weight set */
+		static double GetWeightForIndex(const ListOfDblWeights & wtset, /*!< the weight set */
+										unsigned index); /*!< character index which should be in the range in [0, nchar) */
+
+
+		/*! \returns an integer step matrix for an ordered type with `nStates` states */
+		static const NxsIntStepMatrix::IntMatrix GetOrderedType(unsigned nStates);
+		/*! \returns an integer step matrix for an unorder type with `nStates` states (a matrix of 1's off the diagonal and 0's on the diagonal.*/
+		static const NxsIntStepMatrix::IntMatrix GetUnorderedType(unsigned nStates);
+
+		/*! \returns a set with all of the registered type names (the name will be all caps, not in the same case as the used in the NEXUS file)*/
+		const std::set<std::string> & GetTypeNames() const;
+		/*! \returns a set with all of the registered type names that were defined with UserType */
+		const std::set<std::string> & GetUserTypeNames() const;
+		/*! \returns a set with all of the builtin type names*/
+		const std::set<std::string> & GetStandardTypeNames() const;
+		/*! \returns the name of the current default type (starts as unorderd but can be overridden by a TypeSet command */
+		const std::string GetDefaultTypeName() const;
+		/*! \returns a set with all of the WtSet names */
+		std::set<std::string> GetWeightSetNames() const;
+		/*! \returns true if the name corresponds to a WtSet that is has double entries
+			If true, access the set via GetDoubleWeights
+			If false, retrieve the set by GetIntWeights
+		*/
+		bool IsDoubleWeightSet(const std::string &) const;
+
+		/*! \returns the default ("active") weights as doubles. If the list is empty then the default weights are available from GetDefaultIntWeights or have not been set (implying equal weights) */
+		std::vector<double> GetDefaultDoubleWeights() const
+			{
+			return GetDoubleWeights(def_wtset);
+			}
+
+		/*! \returns the default ("active") weights as doubles. If the list is empty then the default weights are available from GetDefaultDoubleWeights or have not been set (implying equal weights) */
+		std::vector<int> GetDefaultIntWeights() const
+			{
+			return GetIntWeights(def_wtset);
+			}
+
+		/*! \returns the double weights assocaited with the WtSet with name `wtsetname`.
+
+		If the list is empty then the default weights are available from GetIntWeights or
+		wtsetname is not the name of o WtSet
+		*/
+		std::vector<double> GetDoubleWeights(const std::string &wtsetname) const;
+		/*! \returns the int weights assocaited with the WtSet with name `wtsetname`.
+
+		If the list is empty then the default weights are available from GetDoubleWeights or
+		wtsetname is not the name of o WtSet
+		*/
+		std::vector<int> GetIntWeights(const std::string &) const;
+
+		/*! \returns a set with all of the TypeSet names */
+		std::set<std::string> GetTypeSetNames() const;
+
+		/*! \returns the name of the active ("default") WtSet */
+		const std::string & GetDefaultWeightSetName() const;
+		/*! \returns the name of the active ("default") TypeSet */
+		const std::string & GetDefaultTypeSetName() const;
+
+		bool IsEmpty() const;
+
+		bool IsValidTypeName(const std::string & ) const;
+		bool IsStandardType(const std::string & ) const;
+		bool IsIntType(const std::string & ) const;
+
+		const NxsIntStepMatrix & GetIntType(const std::string & name) const;
+
+		const NxsRealStepMatrix & GetRealType(const std::string & name) const;
+
+
+		void SetDefaultTypeName(const std::string &);
+		bool AddIntType(const std::string &, const NxsIntStepMatrix &);
+		bool AddRealType(const std::string &, const NxsRealStepMatrix &);
+
+		bool AddIntWeightSet(const std::string &, const ListOfIntWeights &, bool isDefault);
+		bool AddRealWeightSet(const std::string &, const ListOfDblWeights &, bool isDefault);
+
+		bool AddTypeSet(const std::string &, const NxsPartition &, bool isDefault);
+
+		void Reset();
+
+		void WriteUserType(std::ostream &out) const;
+		void WriteWtSet(std::ostream &out) const;
+		void WriteTypeSet(std::ostream &out) const
+			{
+			NxsWritePartitionCommand("TypeSet", typeSets, out, def_typeset.c_str());
+			}
+
+	private:
+		std::set<std::string> standardTypeNames;
+		std::set<std::string> userTypeNames;
+		std::set<std::string> allTypeNames;
+		std::map<std::string, NxsRealStepMatrix> dblUserTypes;
+		std::map<std::string, NxsIntStepMatrix> intUserTypes;
+		std::set<std::string> allWtSetNames;
+		std::map<std::string, ListOfDblWeights> dblWtSets;
+		std::map<std::string, ListOfIntWeights> intWtSets;
+		NxsPartitionsByName typeSets;
+		std::string def_wtset;
+		std::string def_typeset;
+		std::string def_type;
+	};
+
+inline const std::string NxsTransformationManager::GetDefaultTypeName() const
+	{
+	return def_type;
+	}
+inline const std::string & NxsTransformationManager::GetDefaultWeightSetName() const
+	{
+	return def_wtset;
+	}
+inline const std::string & NxsTransformationManager::GetDefaultTypeSetName() const
+	{
+	return def_typeset;
+	}
+inline const std::set<std::string> & NxsTransformationManager::GetTypeNames() const
+	{
+	return allTypeNames;
+	}
+inline const std::set<std::string> & NxsTransformationManager::GetUserTypeNames() const
+	{
+	return userTypeNames;
+	}
+inline const std::set<std::string> & NxsTransformationManager::GetStandardTypeNames() const
+	{
+	return standardTypeNames;
+	}
+inline bool NxsTransformationManager::IsDoubleWeightSet(const std::string &s) const
+	{
+	const std::vector<double> d = GetDoubleWeights(s);
+	return !(d.empty());
+	}
+
+/*! Intended to specify the interface of a NxsCharactersBlock, but actually does
+	not list all of the relevant functions. See NxsCharactersBlock documentation.
+*/
+class NxsCharactersBlockAPI
+  : public NxsBlock, public NxsLabelToIndicesMapper
+	{
+	public:
+		virtual unsigned	ApplyExset(NxsUnsignedSet &exset) = 0;
+		virtual bool AddNewExSet(const std::string &label, const NxsUnsignedSet & inds) = 0;
+		virtual bool IsRespectCase() const = 0;
+		virtual unsigned	GetNCharTotal() const = 0;
+		virtual NxsTransformationManager & GetNxsTransformationManagerRef() = 0;
+		virtual const NxsTransformationManager & GetNxsTransformationManagerRef() const = 0;
+		virtual std::vector<const NxsDiscreteDatatypeMapper *> GetAllDatatypeMappers() const = 0;
+		virtual bool AddNewCodonPosPartition(const std::string &label, const NxsPartition & inds, bool isDefault) = 0;
+		virtual std::string GetDefaultCodonPosPartitionName() const = 0;
+		virtual NxsPartition GetCodonPosPartition(const std::string &label) const = 0;
+		enum GapModeEnum
+		{
+			GAP_MODE_MISSING = 0,
+			GAP_MODE_NEWSTATE = 1
+		};
+		virtual GapModeEnum GetGapModeSetting() const = 0;
+		virtual void SetGapModeSetting(GapModeEnum m) = 0;
+
+	};
+
+#if defined(NCL_SMALL_STATE_CELL)
+	typedef signed char NxsDiscreteStateCell;
+#else
+	typedef int NxsDiscreteStateCell;
+#endif
+typedef std::vector<NxsDiscreteStateCell> NxsDiscreteStateRow;
+typedef std::vector<NxsDiscreteStateRow> NxsDiscreteStateMatrix;
+
+
+/*!
+	NXS_INVALID_STATE_CODE is used as a flag for uninitialized or unrecognized values
+	NXS_GAP_STATE_CODE may not be found in all datatypes, but is always -2 when
+		it present.
+	NXS_MISSING_CODE is always -1. It must be distinguished from the ambiguous set of all states because ? does not
+		mean that a new state could necessarily be present. This arises is PAUP-style symbols extensions to the
+		built-in datatypes. If you say FORMAT DATATYPE=DNA SYMBOLS="01" ; then the valid symbols become "ACGT01"
+		See AugmentedSymbolsToMixed.
+*/
+enum {
+	NXS_INVALID_STATE_CODE = -3, /* this must be kept negative */
+	NXS_GAP_STATE_CODE = -2, /* this must be kept negative */
+	NXS_MISSING_CODE = -1 /* this must be kept negative */
+	};
+
+class NxsCodonTriplet {
+	public:
+		unsigned char firstPos;
+		unsigned char secondPos;
+		unsigned char thirdPos;
+
+		NxsCodonTriplet(const char *triplet);
+		////////////////////////////////////////////////////////////////////////
+		// returns for a this => other substitution a (from-base, to-base) pair or
+		// (-1,-1) for codons that differ by more than one position.
+		// If codons are identical, then (0,0) will be returned.
+		//
+		typedef std::pair<int, int> MutDescription;
+		MutDescription getSingleMut(const NxsCodonTriplet & other) const;
+
+	};
+
+enum NxsGeneticCodesEnum {
+	NXS_GCODE_NO_CODE = -1,
+	NXS_GCODE_STANDARD = 0,
+	NXS_GCODE_VERT_MITO = 1,
+	NXS_GCODE_YEAST_MITO = 2,
+	NXS_GCODE_MOLD_MITO = 3,
+	NXS_GCODE_INVERT_MITO = 4,
+	NXS_GCODE_CILIATE = 5,
+	NXS_GCODE_ECHINO_MITO = 8,
+	NXS_GCODE_EUPLOTID = 9,
+	NXS_GCODE_PLANT_PLASTID = 10,
+	NXS_GCODE_ALT_YEAST = 11,
+	NXS_GCODE_ASCIDIAN_MITO = 12,
+	NXS_GCODE_ALT_FLATWORM_MITO = 13,
+	NXS_GCODE_BLEPHARISMA_MACRO = 14,
+	NXS_GCODE_CHLOROPHYCEAN_MITO = 15,
+	NXS_GCODE_TREMATODE_MITO = 20,
+	NXS_GCODE_SCENEDESMUS_MITO = 21,
+	NXS_GCODE_THRAUSTOCHYTRIUM_MITO = 22,
+	NXS_GCODE_CODE_ENUM_SIZE = 23
+	};
+NxsGeneticCodesEnum geneticCodeNameToEnum(std::string);
+std::string geneticCodeEnumToName(NxsGeneticCodesEnum);
+std::string getGeneticCodeAAOrder(NxsGeneticCodesEnum codeIndex);
+std::vector<std::string> getGeneticCodeNames();
+
+/* structure used to store information about how the codon indices of a compressed
+(no stop codons permitted) character matrix correspond to the:
+	* 64 codons in alphabetical order,
+	* the amino acids
+	* the codon strings ("AAA", "AAC"...)
+*/
+
+class CodonRecodingStruct
+{
+	public:
+		std::vector<int> compressedCodonIndToAllCodonsInd;
+		std::vector<int> aaInd; /* The index 0 to 20 of the amino acid for each codon - the order of the aas is "ACDEFGHIKLMNPQRSTVWY*" */
+		std::vector<std::string> codonStrings; /* The nucleotide abbreviations for each codon "AAA", "AAC"... */
+};
+
+class NxsDiscreteDatatypeMapper;
+/*! This class handles reading and storage for the NEXUS block CHARACTERS. It overrides the member functions Read and
+	Reset, which are abstract virtual functions in the base class NxsBlock.
+
+
+
+	\note{"ActiveChar" is equivalent to "IncludedChar". }
+
+
+	Important change in starting in version 2.1:
+		The ELIMINATE command is now dealt with as if it were an automatic exclude statment.
+		Previous versions of NCL were more in keeping with the NEXUS specification, in that NCL did not store
+		eliminate characters. This resulted
+		in a confusing situation in which the Characters block maintained an original index for a character and a
+		current index.	Some public functions of NxsCharactersBlock took arguments that needed the original character
+		index, while the vast majority of methods interpretted a character index as the current index of a character.
+		ELIMINATE commands are *very* rare in modern NEXUS files (Mesquite does not even recognize the command), thus
+		the increased complexity of the API that was caused by not storing ELIMINATED character was deemed a
+		counterproductive.
+		In NCL 2.1 (and later), the characters block stores every character, and the user of NCL can query the
+		NxsCharactersBlock about whether a character has been excluded or not (you can also ask for the set of
+		eliminated chararcters). Optimizations for avoiding excluded characters are no longer the responsibility
+		of NxsCharactersBlock.
+
+	Thus, a normal loop through all characters in the data matrix should look something
+	like this:
+>
+	for(unsigned j = 0; j < nchar; j++)
+		{
+		if (IsExcluded(j))
+			continue;
+		.
+		.
+		.
+		}
+
+	Below is a table showing the correspondence between the elements of a CHARACTERS block in a NEXUS file and the
+	variables and member functions of the NxsCharactersBlock class that can be used to access each piece of information
+	stored. Items in parenthesis should be viewed as "see also" items.
+>
+	NEXUS		  Command		 Data			Member
+	Command		  Atribute		 Member			Functions
+	---------------------------------------------------------------------
+	DIMENSIONS	  NEWTAXA		 newtaxa
+
+
+				  NCHAR			 nChar			GetNChar
+
+	FORMAT		  DATATYPE		 datatype		GetDataType
+
+				  RESPECTCASE	 respectingCase IsRespectCase
+
+				  MISSING		 missing		GetMissingSymbol
+
+				  GAP			 gap			GetGapSymbol
+
+				  SYMBOLS		 symbols		GetSymbols
+
+				  EQUATE		 userEquates	GetEquateKey
+												GetEquateValue
+												GetNumEquates
+
+				  MATCHCHAR		 matchchar		GetMatchcharSymbol
+
+				  (NO)LABELS	 labels			IsLabels
+
+				  TRANSPOSE		 transposing	IsTranspose
+
+				  INTERLEAVE	 interleaving	IsInterleave
+
+				  ITEMS							GetItems
+
+				  STATESFORMAT					GetStatesPresent
+
+				  (NO)TOKENS	 tokens			IsTokens
+
+	ELIMINATE					 eliminated		GetNumEliminated
+												IsExcluded
+	MATRIX						 matrix			GetState
+												GetInternalRepresentation
+												GetNumStates
+												GetNumMatrixRows
+												GetNumMatrixCols
+												IsPolymorphic
+>
+*/
+class NxsCharactersBlock
+  : public NxsCharactersBlockAPI, public NxsTaxaBlockSurrogate
+	{
+	friend class NxsAssumptionsBlock;
+
+
+	public:
+		typedef std::map<std::string, std::vector<double> > ContinuousCharCell;
+		typedef std::vector<ContinuousCharCell> ContinuousCharRow;
+		typedef std::vector<ContinuousCharRow> ContinuousCharMatrix;
+		typedef std::vector<std::string> VecString;
+		typedef std::map<unsigned, std::string> IndexToLabelMap;
+		typedef std::map<std::string, unsigned> LabelToIndexMap;
+		typedef std::pair<NxsDiscreteDatatypeMapper, NxsUnsignedSet> DatatypeMapperAndIndexSet;
+		typedef std::vector<DatatypeMapperAndIndexSet> VecDatatypeMapperAndIndexSet;
+
+
+		enum DataTypesEnum /*! values used to represent different basic types of data stored in a CHARACTERS block, and used with the data member `datatype' */
+			{
+			standard = 1, /*! indicates `matrix' holds characters with arbitrarily-assigned, discrete states, such as discrete morphological data */
+			dna = 2, /*! indicates `matrix' holds DNA sequences (states A, C, G, T) */
+			rna = 3, /*! indicates `matrix' holds RNA sequences (states A, C, G, U) */
+			nucleotide = 4, /*! indicates `matrix' holds nucleotide sequences */
+			protein = 5, /*! indicates `matrix' holds amino acid sequences */
+			continuous = 6, /*! indicates `matrix' holds continuous data */
+			codon = 7, /*! AAA=>0, AAC=1, AAAG=>2, AAU=>3, ACA=>4... UUU=>63 */
+			mixed = 8 /*! indicates that there are multiple datatype mappers that must be used to decode the columns of the matrix (one mapper per column, but not one mapper per matrix). A MrBayes NEXUS feature*/
+			};
+		enum StatesFormatEnum
+			{
+			STATES_PRESENT = 1,
+			STATE_COUNT,
+			STATE_FREQUENCY,
+			INDIVIDUALS
+			};
+
+		/*! In v2.1 of the API, the NxsTaxaBlockAPI and NxsAssumptionsBlockAPI pointers
+			are usually NULL. These block assignments are made during the parse. */
+		NxsCharactersBlock(NxsTaxaBlockAPI *tb, /*!< the taxa block object to consult for taxon labels (can be 0L)*/
+						   NxsAssumptionsBlockAPI *ab);	/*!< the assumptions block object to consult for exclusion sets (can be 0L) */
+		virtual ~NxsCharactersBlock() {}
+
+	// Commonly used functions
+		//Configuration
+		/*! Controls whether or not a characters block reader will support MrBayes' datatype=MIXED extension to NEXUS
+			The default is false.
+			\ref mixedDatatypes
+		*/
+		void SetSupportMixedDatatype(bool v)
+			{
+			supportMixedDatatype = v;
+			}
+		/*! \returns true if a sequence type will be converted to standard (default block would return false).
+				the "setter" function is NxsCharactersBlock::SetConvertAugmentedToMixed()
+
+			\ref mixedDatatypes
+		*/
+		bool AugmentedSymbolsToMixed();
+		/*! Instructs the NxsCharactersBlock to convert sequence data character blocks that have
+			"augmented" symbols lists into a mixture datatype.
+
+			By default or after SetConvertAugmentedToMixed(false), the NxsCharacterBlock will change
+			the datatype to standard (to indicate that the datatype is no longer simply a sequence type).
+
+			Note that GetOriginalDataType() will still store the name of the type that occurred in the file.
+
+			This is only applicable if SetAllowAugmentingOfSequenceSymbols(true) has been called.
+			\ref mixedDatatypes
+		*/
+		void SetConvertAugmentedToMixed(bool v)
+			{
+			convertAugmentedToMixed = v;
+			}
+		/*! Instructs the NxsCharactersBlock to accept extra symbols even if the datatype is
+			declared to be sequence data character blocks that have
+			"augmented" symbols lists into the standard datatype.
+		*/
+		void SetAllowAugmentingOfSequenceSymbols(bool v)
+			{
+			allowAugmentingOfSequenceSymbols = v;
+			}
+
+		/*! \retutrns the setting of allowAugmentingOfSequenceSymbols.
+			The default is false.
+			\ref mixedDatatypes
+		*/
+		bool GetAllowAugmentingOfSequenceSymbols() const
+			{
+			return allowAugmentingOfSequenceSymbols;
+			}
+		/*! \returns a data structure that allows you to identify the set of character
+			indices (each element in [0, nchar) range). If the type is not mixed, then
+			the map may be empty.
+		*/
+		std::map<DataTypesEnum, NxsUnsignedSet> GetDatatypeMapForMixedType() const
+			{
+			return mixedTypeMapping;
+			}
+
+
+		/*! \returns a facet of the DataTypesEnum that indicates the general type
+				of data. Because symbols can be augmented non-default polymorphism codes
+				can be introduced, this is not a complete description of the datatype
+				encoding.
+
+			If you want to test if the internal representation datatype of a NxsCharactersBlock
+				has been modified use some form of this idiom:
+
+			mapper = charBlock.GetDatatypeMapperForChar(0);
+			const bool hasGaps = charBlock.GetGapSymbol() != '\0';
+			NxsDiscreteDatatypeMapper defaultMapper(NxsCharactersBlock::dna, hasGaps);
+			if (mapper->IsSemanticallyEquivalent(&defaultMapper))
+				{
+				// the datatype has not been changed in a substantive way.
+				}
+			else
+				{
+				// new state codes have been introduced, so routines that do not
+				//	interrogate the mapper to check the mapper about the status of all of the
+				//	states may fail.
+				}
+
+
+
+		*/
+		DataTypesEnum GetDataType() const;
+		/*! Returns true If character `taxInd' has some stored character state. Assumes `taxInd' is in the range [0..`nchar').
+		*/
+		bool TaxonIndHasData(const unsigned ind) const;
+		/*! \returns the number of characters stored in the block.*/
+		unsigned GetNCharTotal() const ;
+		/*! Returns datatype listed in the CHARACTERS block.
+			The original datatype can differ from the current datatype if the datatype
+				was listed as a sequence type, but the symbols list of a built in type was augmented
+				(thus converting it to standard).
+			This will only happen if SetAllowAugmentingOfSequenceSymbols(true) has been called on the block.
+			see \ref mixedDatatypes
+		*/
+		DataTypesEnum GetOriginalDataType() const;
+		/*! Returns the number of characters that have not been exclude (via exset or eliminate command, for
+			example).
+			Synonymous with GetNumIncludedChars and (GetNChar - GetNumEliminated)
+		*/
+		unsigned GetNumActiveChar() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		/*! \returns a reference to the set of indices that are currently excluded. */
+		const NxsUnsignedSet & GetExcludedIndexSet() const;
+		/*! \returns true if character `j' is active. If character `j' has been excluded, returns false.
+			Assumes `j' is in the range [0..`nchar')
+		*/
+		bool IsActiveChar(unsigned j) const;
+		/*!  excludes all of the  indices in exset.
+
+			indices should be in the range [0, nchar)
+		*/
+		unsigned ApplyExset(NxsUnsignedSet &exset);
+		/*!  includes all of the  indices in exset.
+
+			indices should be in the range [0, nchar)
+		*/
+		unsigned ApplyIncludeset(NxsUnsignedSet &inset);
+		/*!  excludes character `i`
+
+			i should be in the range [0, nchar)
+		*/
+		void ExcludeCharacter(unsigned i);
+		/*!  includes character `i`
+
+			i should be in the range [0, nchar)
+		*/
+		void IncludeCharacter(unsigned i);
+
+		/*! \returns the label for character `i` has a label or a string with a single space if the is no label*/
+		NxsString GetCharLabel(unsigned i) const; /*v2.1to2.2 4 */
+		/*! \returns true if charlabels were stored for the matrix*/
+		bool HasCharLabels() const;
+		/*! \returns the current gapMode setting
+		During a parse this is controlled by the OPTIONS command in the ASSUMPTIONS block).
+			\note{The gapmode setting basically just holds the value for the client code's convenience.
+			It only affects post-read operations, such as NxsCharactersBlock::GetObsStates.
+			It does NOT change the internal encoding of the data (just triggers some filtering) of the data}
+		*/
+		GapModeEnum GetGapModeSetting() const
+		{
+			return this->gapMode;
+		}
+
+		//v2.0 API that queries based on symbols see \ref NxsCharacterBlockQueries
+		const char *GetSymbols() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		unsigned GetNumStates(unsigned i, unsigned j) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		char GetState(unsigned i, unsigned j, unsigned k = 0) const;
+		/*! \returns a reference to the Transformation Manager that stores information such as character transformation type (ORDERED, UNORDERED...)*/
+		const NxsTransformationManager & GetNxsTransformationManagerRef() const
+			{
+			return transfMgr;
+			}
+		/*! Returns true iff taxon `taxInd` has a gap for character `charInd` (both indices 0-based)*/
+		bool IsGapState(unsigned taxInd, unsigned charInd) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		/*! Returns true iff taxon `taxInd` has is missing for character `charInd` (both indices 0-based) */
+		bool IsMissingState(unsigned i, unsigned j) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		/*! Returns true iff taxon `taxInd` has is missing for character `charInd` (both indices 0-based) */
+		bool IsPolymorphic(unsigned i, unsigned j) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+
+	//v2.1 API for extracting character info
+	// for discrete characters
+		/*! \returns a const reference to the row of state codes for the taxon.
+
+			This enables faster access to the data. See \ref newerCharQueries
+
+			The row should not be modified by the caller.
+
+			taxonIndex should be in the range [0, ntax)
+		*/
+		const NxsDiscreteStateRow & GetDiscreteMatrixRow(unsigned taxonIndex) const;
+		/* \returns a pointer to the the NxsDiscreteDatatypeMapper that "knows" how the
+			internal state code labellings corrspond to symbols.
+
+			If the datatype is not mixed, then the same instance will apply regardless of
+			character index.
+
+			charIndex should be in the range [0, nchar)
+		*/
+		const NxsDiscreteDatatypeMapper * GetDatatypeMapperForChar(unsigned charIndex) const;
+		/*! \returns a vector of all of the NxsDiscreteDatatypeMapper used by any char in the matrix.
+
+			If the datatype is not mixed, then the vector should have length 1.
+
+			See GetDatatypeMapForMixedType().
+		*/
+		std::vector<const NxsDiscreteDatatypeMapper *> GetAllDatatypeMappers() const;
+
+	// For continuous characters
+		/*! \returns the vector of values for the appropriate item `key` in the indicated cell of the matrix.
+
+			For continuous data matrices, each cell of the matrix can store multiple "items"
+			such as the average, range, max...
+			This function retrieves the collection of data for the taxon `taxIndex`
+			and the character `taxIndex` and then returns the values for the item designated by `key`
+
+			GetItems() returns the list of all possible items
+		*/
+		std::vector<double> GetContinuousValues(unsigned taxIndex, unsigned charIndex, const std::string key) NCL_COULD_BE_CONST; /*v2.1to2.2 1 */
+		/*! \returns vector of items stored for each cell. (this is mainly relevant for continouos data).
+			For discrete data, only "STATES" is supported.
+		*/
+		std::vector<std::string> GetItems() const;
+		/*! \returns a facet of the StatesFormatEnum to indicate what the states mean for a cell.
+
+			For discrete data, only STATES_PRESENT is accepted by NCL.
+			For continuous matrices either STATES_PRESENT or INDIVIDUALS will be accepted.
+		*/
+		StatesFormatEnum GetStatesFormat() const;
+
+
+
+	// Functions that are used by many NCL-clients, but are often not needed
+		void FindConstantCharacters(NxsUnsignedSet &c) const;
+		void FindGappedCharacters(NxsUnsignedSet &c) const;
+		virtual const std::string & GetBlockName() const;
+		/*! sets the current gapMode setting.
+			During a parse this is controlled by the OPTIONS command in the ASSUMPTIONS block).
+			\note{The gapmode setting basically just holds the value for the client code's convenience.
+			It only affects post-read operations, such as NxsCharactersBlock::GetObsStates.
+			It does NOT change the internal encoding of the data (just triggers some filtering) of the data}
+		*/
+		void SetGapModeSetting(GapModeEnum m)
+		{
+			this->gapMode = m;
+		}
+		static const char * GetNameOfDatatype(DataTypesEnum);
+		NxsDiscreteStateCell GetInternalRepresentation(unsigned i, unsigned j, unsigned k = 0) NCL_COULD_BE_CONST; /*v2.1to2.2 1 */
+		/*! \returns the maximum observed number of states for any character.
+			\note{this function is slow}
+
+			If `onlyActiveChars` is true then calculation will skip characters that have been excluded (eg. by an exset).
+
+		*/
+		virtual unsigned GetMaxObsNumStates(bool countMissingStates=true, bool onlyActiveChars=false) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		/*! \returns the number of states observed in a column
+			If `countMissingStates` is true then missing data is treated as an observation of each state. If
+				it is false then missing codes do not contribute to the count.
+			Partially ambiguous states result in all states in the cell being counted as observed.
+
+			columnIndex should be in [0, nchar)
+			\warning{If countMissingStates is true then it a gap as a state, if countMissingStates is false
+			the the gap will count as a state if the gapmode is GAP_MODE_NEWSTATE}
+		*/
+		virtual unsigned GetNumObsStates(unsigned columnIndex, bool countMissingStates=true) NCL_COULD_BE_CONST { /*v2.1to2.2 1 */
+			return (unsigned)GetObsStates(columnIndex, countMissingStates).size();
+		}
+		/*! Returns the set of "fundamental" states seen in a column (possibly including the gap "state").
+
+			If `countMissingStates` is true then missing data is treated as an observation of each state. If
+				it is false then missing codes do not contribute to the count.
+			Partially ambiguous states result in all states in the cell being counted as observed.
+
+			columnIndex should be in [0, nchar)
+			\warning{If countMissingStates is true then it a gap as a state, if countMissingStates is false
+			a gap will count as a state if the gapmode is GAP_MODE_NEWSTATE}
+		*/
+		std::set<NxsDiscreteStateCell> GetObsStates(unsigned columnIndex, bool countMissingStates=true) const {
+			if (countMissingStates)
+				return GetMaximalStateSetOfColumn(columnIndex);
+			return GetNamedStateSetOfColumn(columnIndex);
+		}
+
+		double GetSimpleContinuousValue(unsigned i, unsigned j) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+
+		/*! \returns label for character state `charStateIndex' at character `charIndex', if a label has been specified. If no label was specified,
+			returns string containing a single blank (i.e., " ").
+
+			Both charIndex and charStateIndex should be 0-based
+		*/
+		NxsString GetStateLabel(unsigned charIndex, unsigned charStateIndex) const /*v2.1to2.2 4 */
+			{
+			return GetStateLabelImpl(charIndex, charStateIndex);
+			}
+		/*! If a datatype is standard, then it may have been originally specified as "restriction".
+
+			The restriction site datatype was added by MrBayes, It is supported by NCL, but you have
+			to call this function to see if the matrix was specified as restriction site data (we did not
+			add a facet to the DataTypesEnum to avoid the need to extend other code).
+		*/
+		bool WasRestrictionDataype() const;
+
+
+
+	// Rarely-needed functions (usually used mainly internally by NCL)
+		static std::map<char, NxsString> GetDefaultEquates(DataTypesEnum);
+		static std::string GetDefaultSymbolsForType(DataTypesEnum dt);
+		const NxsDiscreteDatatypeMapper & GetDatatypeMapperForCharRef(unsigned charIndex) const;
+		/*! \return the datatype as a human-readable string (uses NxsCharactersBlock::GetNameOfDatatype)*/
+		const char * GetDatatypeName() const
+			{
+			return NxsCharactersBlock::GetNameOfDatatype(datatype);
+			}
+		char GetGapSymbol() const;
+		void SetGapSymbol(char);
+		char GetMatchcharSymbol() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		char GetMissingSymbol() const;
+		unsigned GetMaxIndex() const;
+		const NxsDiscreteStateMatrix & GetRawDiscreteMatrixRef() const
+			{
+			return discreteMatrix;
+			}
+
+
+	// Low-level functions (difficult to use, or with potentially suprising behavior). These are used internally by NCL
+		virtual unsigned CharLabelToNumber(NxsString s) NCL_COULD_BE_CONST ; /*v2.1to2.2 d */
+		virtual unsigned CharLabelToNumber(const std::string & s) const;
+		unsigned GetIndexSet(const std::string &label, NxsUnsignedSet * toFill) const
+			{
+			return NxsLabelToIndicesMapper::GetIndicesFromSets(label, toFill, charSets);
+			}
+		unsigned GetIndicesForLabel(const std::string &label, NxsUnsignedSet *inds) const;
+		unsigned GetNCharTotal() ; // non const version for backwark compat.
+		unsigned GetNumIncludedChars() const ; // synonymous with GetNumActiveChar
+		unsigned GetNumEliminated() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */ //backward-compat.
+		unsigned GetNChar() const;
+		unsigned GetNumChar() const;
+		// poor function name -- same as GetNumObsStates. Backward compatibility \deprecated
+		virtual unsigned GetObsNumStates(unsigned columnIndex, bool countMissingStates=true) NCL_COULD_BE_CONST { /*v2.1to2.2 1 */
+			return (unsigned) GetObsStates(columnIndex, countMissingStates).size();
+		}
+		/*! Returns label for character state `charStateIndex' at character `charIndex', if a label has been specified. If no label was specified,
+			returns string containing a single blank (i.e., " ").
+		*/
+		NxsString GetStateLabel(unsigned charIndex, unsigned charStateIndex) /*v2.1to2.2 4 */ //			non-const version for backward compat
+
+			{
+			return GetStateLabelImpl(charIndex, charStateIndex);
+			}
+		/* It is probably better to ask for the taxa block (via GetTaxaBlockPtr() method) and then have
+			the full TaxaBlockAPI to query from rather than ask about the taxa via the characters block.*/
+		NxsString GetTaxonLabel(unsigned i) const; /*v2.1to2.2 4 */
+
+		// non-const version
+		NxsTransformationManager & GetNxsTransformationManagerRef()
+			{
+			return transfMgr;
+			}
+		bool IsActiveChar(unsigned j) ; // non-const version for backward
+		bool IsEliminated(unsigned charIndex) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */ //same as (!IsActive(charIndex))
+		bool IsExcluded(unsigned j) const; //same as IsEliminated(j)
+		bool IsExcluded(unsigned j) ;  //same as IsEliminated(j) non-const for backward compat.
+		bool IsMixedType() const;
+
+		virtual unsigned TaxonLabelToNumber(NxsString s) const; /*v2.1to2.2 4 */
+		virtual bool AddNewCodonPosPartition(const std::string &label, const NxsPartition & inds, bool isDefault);
+		bool AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds);
+		bool AddNewExSet(const std::string &label, const NxsUnsignedSet & inds);
+		bool AddNewPartition(const std::string &label, const NxsPartition & inds);
+		void Consume(NxsCharactersBlock &other);
+		/*! Behaves like GetMaximalStateSetOfColumn except that missing data columns do not increase
+			size of the returned state set.
+			This function is sensitive to the gapmode setting. A gap will count as a state if the gapmode is
+			GAP_MODE_NEWSTATE.
+
+			columnIndex should be in [0, nchar)
+		*/
+		std::set<NxsDiscreteStateCell> GetNamedStateSetOfColumn(const unsigned colIndex) const;
+		/*! Returns the set of "fundamental" states seen in a column (possibly including the gap "state").
+
+			Missing data, gaps, and partially ambiguous cells result in all states in the cell being counted as observed.
+
+			columnIndex should be in [0, nchar)
+
+		*/
+		std::set<NxsDiscreteStateCell> GetMaximalStateSetOfColumn(const unsigned colIndex) const;
+
+
+
+
+	// Output/writing functions
+		virtual void Report(std::ostream &out) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		void ShowStateLabels(std::ostream &out, unsigned i, unsigned c, unsigned first_taxon) const;
+		void WriteAsNexus(std::ostream &out) const;
+		virtual void DebugShowMatrix(std::ostream &out, bool use_matchchar, const char *marginText = NULL) const;
+		virtual void WriteLinkCommand(std::ostream &out) const;
+		void WriteStatesForTaxonAsNexus(std::ostream &out, unsigned taxNum, unsigned begChar, unsigned endChar) const;
+		void WriteCharLabelsCommand(std::ostream &out) const;
+		void WriteCharStateLabelsCommand(std::ostream &out) const;
+		void WriteEliminateCommand(std::ostream &out) const;
+		void WriteFormatCommand(std::ostream &out) const;
+		void WriteMatrixCommand(std::ostream &out) const;
+
+
+
+	// parsing related functions (used internally by NCL, rarely needed by client code)
+		bool IsInterleave() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool IsLabels() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool IsRespectCase() const;
+		bool IsTokens() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool IsTranspose() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		/* Returns the number of taxa that should be stored. Used during the parse.
+			\warning{After parsing, use TaxonIndHasData() for each taxon rather than this function.}
+			\todo{After the parse this, GetNTaxWithData() *should* agree with GetNTaxTotal(). We need a
+			test for this needed property of }
+		*/
+		unsigned GetNTaxWithData() const ; //
+		virtual VecBlockPtr GetImpliedBlocks();
+		unsigned GetNumEquates() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		unsigned GetNumUserEquates() const;
+		unsigned GetNumMatrixCols() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		/* Returns the number of taxa (should agree with GetNTaxTotal()) */
+		unsigned GetNumMatrixRows() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void Reset();
+		void SetNexus(NxsReader *nxsptr);
+		const ContinuousCharRow & GetContinuousMatrixRow(unsigned taxNum) const;
+
+
+		/*only used it the linkAPI is enabled*/
+		virtual void HandleLinkCommand(NxsToken & token);
+
+
+
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, assumptionsBlock blocks!
+		*/
+		NxsCharactersBlock & operator=(const NxsCharactersBlock &other)
+			{
+			Reset();
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			CopyTaxaBlockSurrogateContents(other);
+			CopyCharactersContents(other);
+			return *this;
+			}
+
+		virtual void CopyCharactersContents(const NxsCharactersBlock &other);
+		virtual NxsCharactersBlock * Clone() const
+			{
+			NxsCharactersBlock * a = new NxsCharactersBlock(taxa, assumptionsBlock);
+			*a = *this;
+			return a;
+			}
+
+
+		void SetWriteInterleaveLen(int interleaveLen)
+			{
+			writeInterleaveLen = interleaveLen;
+			}
+
+		std::string GetMatrixRowAsStr(const unsigned rowIndex) const;
+		NxsDiscreteStateCell	GetStateIndex(unsigned i, unsigned j, unsigned k) const;
+
+
+		/** converts a CodonPosPartition into the list<int> argument needed for the NewCodonsCharactersBlock call */
+		static void CodonPosPartitionToPosList(const NxsPartition &codonPos, std::list<int> * charIndices);
+
+		/* allocates a new charaters block with all of the active characters in `charBlock`
+			but with a 64-state codon datatype. The order of codons is:
+			 0   1   2   3   4   5  ... 63
+			AAA AAC AAG AAT ACA ACC ... TTT
+			The caller is responsible for deleting the new NxsCharactersBlock object
+			If charIndices is provided, it lists the bases in the RF by position the int can be < 0 to indicate that that position was not sampled
+		*/
+		static NxsCharactersBlock * NewCodonsCharactersBlock(
+			const NxsCharactersBlock * charBlock,
+			bool mapPartialAmbigToUnknown,
+			bool gapsToUnknown,
+			bool honorCharActive, /* if true then inactive characters are treated as missing */
+			const std::list<int> * charIndices = NULL, /* specifies the indices of the positions in the gene. -1 can be used to indicate tha codon position was not included in the original matrix */
+			NxsCharactersBlock ** spareNucs = NULL /* If non-null, then, on exit the NxsCharactersBlock * pointer will refer to a new character block with all of the positions that were not translated (all of the non-coding nucleotide positions) */
+			);
+		static NxsCharactersBlock * NewProteinCharactersBlock(
+			const NxsCharactersBlock * codonBlock,
+			bool mapPartialAmbigToUnknown,
+			bool gapToUnknown,
+			NxsGeneticCodesEnum codeIndex);
+		static NxsCharactersBlock * NewProteinCharactersBlock(
+			const NxsCharactersBlock * codonBlock,
+			bool mapPartialAmbigToUnknown,
+			bool gapToUnknown,
+			const std::vector<NxsDiscreteStateCell> & aaIndices); /** the index of the amino acid symbols for the codon (where the order of codons is alphabetical: AAA, AAC, AAG, AAT, ACA, ...TTT **/
+
+		virtual std::string GetDefaultCodonPosPartitionName() const {
+			return defCodonPosPartitionName;
+		}
+		virtual NxsPartition GetCodonPosPartition(const std::string &label) const {
+			NxsPartitionsByName::const_iterator pIt = codonPosPartitions.find(label);
+			if (pIt == codonPosPartitions.end())
+				return NxsPartition();
+			return pIt->second;
+		}
+
+		unsigned NumAmbigInTaxon(const unsigned taxInd, const NxsUnsignedSet * charIndices, const bool countOnlyCompletelyMissing, const bool treatGapsAsMissing) const;
+		bool FirstTaxonStatesAreSubsetOfSecond(const unsigned firstTaxonInd, const unsigned secondTaxonInd, const NxsUnsignedSet * charIndices, const bool treatAmbigAsMissing, const bool treatGapAsMissing) const;
+		//Returns the number of characters that differ, and the number of positions for which both taxa were non-missing
+
+		std::pair<unsigned, unsigned> GetPairwiseDist(const unsigned firstTaxonInd, const unsigned secondTaxonInd, const NxsUnsignedSet * charIndices, const bool treatAmbigAsMissing, const bool treatGapAsMissing) const;
+		CodonRecodingStruct RemoveStopCodons(NxsGeneticCodesEnum);
+		bool SwapEquivalentTaxaBlock(NxsTaxaBlockAPI * tb)
+		{
+			return SurrogateSwapEquivalentTaxaBlock(tb);
+		}
+
+		/*! Writes a range of characater states as NEXUS to out.
+
+		*/
+		void WriteStatesForMatrixRow(std::ostream &out, /*!< ostream that will be written to.*/
+									unsigned taxon, /*!< index of the row (taxon) to be written.  Should be in [0,ntax). */
+									unsigned first_taxon, /*!< UINT_MAX to avoid using the matchchar in output. Otherwise the [0,ntax) index of the taxon that is printed first. */
+									unsigned begChar, /*!< first character index to write. Should be in [0, nchar). */
+									unsigned endChar) const; /*!< end of character range. This index is one greater than the last index to be printed. Should be in the range (begChar, nchar] */
+
+
+	protected:
+		// This function should not be called to remove characters, it is only used in the creation of new char blocks from existing blocks
+		void SetNChar(unsigned nc)
+			{
+			this->nChar = nc;
+			}
+		// This function should not be called to remove characters, it is only used in the creation of new char blocks from existing blocks
+		void SetNTax(unsigned nt)
+			{
+			this->nTaxWithData = nt;
+			}
+
+		NxsString GetStateLabelImpl(unsigned i, unsigned j) const; /*v2.1to2.2 4 */
+
+		NxsDiscreteDatatypeMapper * GetMutableDatatypeMapperForChar(unsigned charIndex);
+		bool IsInSymbols(char ch) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		void ShowStates(std::ostream &out, unsigned i, unsigned j) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+
+		void HandleCharlabels(NxsToken &token);
+		void HandleCharstatelabels(NxsToken &token);
+		void HandleDimensions(NxsToken &token, NxsString newtaxaLabel, NxsString ntaxLabel, NxsString ncharLabel);
+		void HandleEliminate(NxsToken &token);
+		virtual void HandleFormat(NxsToken &token);
+		virtual void HandleMatrix(NxsToken &token);
+		bool HandleNextContinuousState(NxsToken &token, unsigned taxNum, unsigned charNum, ContinuousCharRow & row, const NxsString & nameStr);
+		bool HandleNextDiscreteState(NxsToken &token, unsigned taxNum, unsigned charNum, NxsDiscreteStateRow & row, NxsDiscreteDatatypeMapper &, const NxsDiscreteStateRow * firstTaxonRow, const NxsString & nameStr);
+		bool HandleNextTokenState(NxsToken &token, unsigned taxNum, unsigned charNum, NxsDiscreteStateRow & row, NxsDiscreteDatatypeMapper &, const NxsDiscreteStateRow * firstTaxonRow, const NxsString & nameStr);
+		void HandleStatelabels(NxsToken &token);
+		virtual void HandleStdMatrix(NxsToken &token);
+		virtual NxsDiscreteStateCell HandleTokenState(NxsToken &token, unsigned taxNum, unsigned charNum, NxsDiscreteDatatypeMapper &mapper, const NxsDiscreteStateRow * firstTaxonRow, const NxsString & nameStr);
+		virtual void HandleTransposedMatrix(NxsToken &token);
+		virtual void Read(NxsToken &token);
+		void ResetSymbols();
+
+		void WriteStates(NxsDiscreteDatum &d, char *s, unsigned slen) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+
+
+		NxsAssumptionsBlockAPI	*assumptionsBlock;	/* pointer to the ASSUMPTIONS block in which exsets, taxsets and charsets are stored */
+
+		unsigned nChar; /* number of columns in matrix	*/
+		unsigned nTaxWithData; /* number of non empty rows in the matrix*/
+
+		char matchchar; /* match symbol to use in matrix */
+		bool respectingCase; /* if true, RESPECTCASE keyword specified in FORMAT command */
+		bool transposing; /* indicates matrix will be in transposed format */
+		bool interleaving; /* indicates matrix will be in interleaved format */
+		mutable bool tokens; /* if false, data matrix entries must be single symbols; if true, multicharacter entries are allows */
+		bool labels; /* indicates whether or not labels will appear on left side of matrix */
+
+		char missing; /* missing data symbol */
+		char gap; /* gap symbol for use with molecular data */
+		GapModeEnum gapMode; /* manipulated by the assumptions block. This setting basically just holds the value. It only affects post-read operations, such as GetNamedStateSetOfColumn and it does NOT change the internal encoding of the data (just triggers some filtering) */
+		std::string symbols; /* list of valid character state symbols */
+		std::map<char, NxsString> userEquates; /* list of associations defined by EQUATE attribute of FORMAT command */
+		std::map<char, NxsString> defaultEquates;
+		VecDatatypeMapperAndIndexSet datatypeMapperVec;
+		NxsDiscreteStateMatrix	discreteMatrix; /* storage for discrete data */
+		ContinuousCharMatrix	continuousMatrix;	/* */
+
+		NxsUnsignedSet eliminated; /* array of (0-offset) character numbers that have been eliminated (will remain empty if no ELIMINATE command encountered) */
+		NxsUnsignedSet excluded; /* set of (0-offset) indices of characters that have been excluded.*/
+
+		LabelToIndexMap ucCharLabelToIndex;
+		IndexToLabelMap indToCharLabel;
+		NxsStringVectorMap charStates; /* storage for character state labels (if provided) */
+		NxsStringVector globalStateLabels; /* state labels that apply to all characters (if not pre-empted by thy charStates field) */
+		VecString items;
+
+		NxsUnsignedSetMap charSets;
+		NxsUnsignedSetMap exSets;
+		NxsPartitionsByName charPartitions;
+		NxsTransformationManager transfMgr;
+		bool datatypeReadFromFormat;
+		NxsPartitionsByName codonPosPartitions;
+		std::string defCodonPosPartitionName;
+		std::map<DataTypesEnum, NxsUnsignedSet> mixedTypeMapping;
+	private:
+		DataTypesEnum datatype; /* flag variable (see datatypes enum) */
+		DataTypesEnum originalDatatype; /* flag variable (see datatypes enum) */
+		StatesFormatEnum statesFormat;
+		bool restrictionDataype;
+		bool supportMixedDatatype;	/* (false by default) flag for whether or not MrBayes-style Mixed blocks should be supported */
+		bool convertAugmentedToMixed; /* false by default (see AugmentedSymbolsToMixed) */
+		bool allowAugmentingOfSequenceSymbols;
+		int writeInterleaveLen;
+
+		void CreateDatatypeMapperObjects(const NxsPartition & , const std::vector<DataTypesEnum> &);
+		friend class PublicNexusReader;
+		friend class MultiFormatReader;
+	};
+
+typedef NxsCharactersBlock CharactersBlock;
+
+
+class NxsCharactersBlockFactory
+	:public NxsBlockFactory
+	{
+	public:
+		virtual NxsCharactersBlock	*	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, NxsReader *reader, NxsToken *token);
+	};
+
+class NxsDiscreteStateSetInfo
+	{
+	public:
+		NxsDiscreteStateSetInfo(const std::set<NxsDiscreteStateCell> & stateSet, bool polymorphic=false, char symbol='\0')
+			:states(stateSet),
+			nexusSymbol(symbol),
+			isPolymorphic(polymorphic)
+			{}
+
+
+		std::set<NxsDiscreteStateCell> states;
+		char nexusSymbol;
+		bool isPolymorphic;
+	};
+
+/*! This class stores the information needed to map the internal storage for a cell of a matrix (a "state code") to
+	the set of states that it corresponds to.
+*/
+class NxsDiscreteDatatypeMapper
+	{
+	public:
+
+
+
+
+		static void GenerateNxsExceptionMatrixReading(const char *, unsigned taxInd, unsigned charInd, NxsToken *, const NxsString &nameStr);
+		static void GenerateNxsExceptionMatrixReading(const std::string &s, unsigned taxInd, unsigned charInd, NxsToken * token, const NxsString &nameStr)
+			{
+			GenerateNxsExceptionMatrixReading(s.c_str(), taxInd, charInd, token, nameStr);
+			}
+
+
+		NxsDiscreteDatatypeMapper();
+		NxsDiscreteDatatypeMapper(NxsCharactersBlock::DataTypesEnum datatypeE, bool hasGaps);
+		NxsDiscreteDatatypeMapper(NxsCharactersBlock::DataTypesEnum datatype, const std::string & symbols,
+								char missingChar, char gapChar, char matchChar,
+								bool respectCase, const std::map<char, NxsString> & extraEquates);
+
+		/*! \returns the number of state codes (including partially ambiguous) */
+		unsigned GetNumStateCodes() const
+		    {
+		    return (unsigned)stateSetsVec.size();
+		    }
+
+		NxsCharactersBlock::DataTypesEnum GetDatatype() const
+			{
+			return datatype;
+			}
+		unsigned GetNumStates() const;
+		unsigned GetNumStatesIncludingGap() const;
+		std::string GetSymbols() const
+		    {
+		    return symbols;
+		    }
+		std::string GetSymbolsWithGapChar() const
+		    {
+		    if (gapChar == '\0')
+		    	return GetSymbols();
+		    std::string s;
+		    s = symbols;
+		    s.append(1, gapChar);
+		    return s;
+		    }
+
+		const std::set<NxsDiscreteStateCell> & GetStateSetForCode(NxsDiscreteStateCell stateCode) const;
+		bool IsSemanticallyEquivalent(const NxsDiscreteDatatypeMapper &other) const;
+		bool IsPolymorphic(NxsDiscreteStateCell stateCode) const;
+		NxsDiscreteStateCell PositionInSymbols(const char currChar) const;
+		/*! Returns a state code for a NEXUS symbol.
+
+			will return NXS_INVALID_STATE_CODE if the char is unknown
+		*/
+		NxsDiscreteStateCell GetStateCodeStored(char currChar) const
+			{
+			  return cLookup[static_cast<int>(currChar)];
+			}
+		/*! \returns the highest legal internal state code (this is note the number of
+			state codes because the missing data and gap codes are negative).
+
+			Note that this is the highest state code that the mapper understands, it does
+				not imply that the NxsCharacterBlock that "owns" the mapper has a matrix
+				with a state code that is as large as the number returned.
+
+		*/
+		NxsDiscreteStateCell GetHighestStateCode() const
+			{
+			return ((NxsDiscreteStateCell) stateSetsVec.size()) + sclOffset - 1;
+			}
+
+
+
+
+
+		NxsDiscreteDatatypeMapper(const NxsDiscreteDatatypeMapper& other)
+			:datatype(other.datatype)
+			{
+			*this = other;
+			}
+		NxsDiscreteDatatypeMapper & operator=(const NxsDiscreteDatatypeMapper&);
+
+		char GetGapSymbol() const
+		    {
+		    return gapChar;
+		    }
+		// warning: unsafe to call after reading -- does not recode data!
+		void SetGapSymbol(char c)
+		    {
+		    gapChar = c;
+		    }
+		char GetMissingSymbol() const
+		    {
+		    return missing;
+		    }
+		std::map<char, NxsString> GetExtraEquates() const
+			{
+			return extraEquates;
+			}
+		unsigned GetNumStatesInStateCode(NxsDiscreteStateCell stateCode) const;
+		NxsDiscreteStateCell GetOneStateForCode(NxsDiscreteStateCell stateCode, unsigned stateIndex) const;
+		NxsDiscreteStateRow GetStateVectorForCode(NxsDiscreteStateCell stateCode) const;
+		std::vector<std::vector<int> > GetPythonicStateVectors() const;
+		NxsDiscreteStateCell PositionInSymbolsOrGaps(const char currChar) const
+			{
+			if (currChar == gapChar)
+				return NXS_GAP_STATE_CODE;
+			return PositionInSymbols(currChar);
+			}
+		std::string StateCodeToNexusString(NxsDiscreteStateCell, bool demandSymbols = true) const;
+		NxsDiscreteStateCell StateCodeForNexusChar(const char currChar, NxsToken * token,
+								  unsigned taxInd, unsigned charInd,
+								  const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr) const;
+		void WriteStartOfFormatCommand(std::ostream & out) const;
+		void WriteStateCodeRowAsNexus(std::ostream & out, const std::vector<NxsDiscreteStateCell> &row) const;
+		void WriteStateCodeRowAsNexus(std::ostream & out, std::vector<NxsDiscreteStateCell>::const_iterator & begIt, const std::vector<NxsDiscreteStateCell>::const_iterator & endIt) const;
+		void WriteStateCodeAsNexusString(std::ostream & out, NxsDiscreteStateCell scode, bool demandSymbols = true) const;
+		bool WasRestrictionDataype() const;
+		void SetWasRestrictionDataype(bool v) {restrictionDataype = v;}
+		NxsDiscreteStateCell EncodeNexusStateString(const std::string &stateAsNexus, NxsToken & token,
+								   const unsigned taxInd, const unsigned charInd,
+								   const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr);
+		NxsDiscreteStateCell StateCodeForStateSet(const std::set<NxsDiscreteStateCell> &, const bool isPolymorphic,
+								 const bool addToLookup, const char symbol);
+
+		void DebugPrint(std::ostream &) const;
+
+ 		bool GetUserDefinedEquatesBeforeConversion() const
+ 			{
+ 			return userDefinedEquatesBeforeConversion;
+			}
+
+		bool IsRespectCase() const
+			{
+			return respectCase;
+			}
+
+		const std::set<NxsDiscreteStateCell> & GetStateIntersection(NxsDiscreteStateCell stateCode, NxsDiscreteStateCell otherStateCode) const
+			{
+			if (stateIntersectionMatrix.empty())
+				BuildStateIntersectionMatrix();
+			const NxsDiscreteStateCell sc = stateCode - NXS_GAP_STATE_CODE;
+			const NxsDiscreteStateCell osc = otherStateCode - NXS_GAP_STATE_CODE;
+			return stateIntersectionMatrix.at(sc).at(osc);
+			}
+
+		bool FirstIsSubset(NxsDiscreteStateCell stateCode, NxsDiscreteStateCell otherStateCode, bool treatGapAsMissing) const
+			{
+			if (isStateSubsetMatrix.empty())
+				BuildStateSubsetMatrix();
+			const NxsDiscreteStateCell sc = stateCode - NXS_GAP_STATE_CODE;
+			const NxsDiscreteStateCell osc = otherStateCode - NXS_GAP_STATE_CODE;
+			if (treatGapAsMissing)
+				return isStateSubsetMatrixGapsMissing.at(sc).at(osc);
+			return isStateSubsetMatrix.at(sc).at(osc);
+			}
+
+		NxsGeneticCodesEnum geneticCode; /* only used for compressed codon codings */
+
+		/*! can be used to "See" the mapping while debugging */
+		void DebugWriteMapperFields(std::ostream & out) const;
+	private:
+		NxsDiscreteStateCell AddStateSet(const std::set<NxsDiscreteStateCell> & states, char nexusSymbol, bool symRespectCase, bool isPolymorphic);
+		NxsDiscreteStateCell StateCodeForNexusMultiStateSet(const char nexusSymbol, const std::string & stateAsNexus, NxsToken * token,
+								  unsigned taxInd, unsigned charInd,
+								  const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr);
+		NxsDiscreteStateCell StateCodeForNexusPossibleMultiStateSet(const char nexusSymbol, const std::string & stateAsNexus, NxsToken & token,
+								  unsigned taxInd, unsigned charInd,
+								  const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr);
+
+		void RefreshMappings(NxsToken *token);
+		void ValidateStateIndex(NxsDiscreteStateCell state) const;
+		void ValidateStateCode(NxsDiscreteStateCell state) const;
+		void BuildStateSubsetMatrix() const;
+		void BuildStateIntersectionMatrix() const;
+		void DeleteStateIndices(const std::set<NxsDiscreteStateCell> & deletedInds);
+
+		NxsDiscreteStateCell * cLookup; /* Nexus char to state code lookup -- alias to member of charToStateCodeLookup*/
+		NxsDiscreteStateSetInfo * stateCodeLookupPtr; /* state code to NxsDiscreteStateSetInfo object table -- alias to stateSets */
+		std::string symbols;
+		std::string lcsymbols; /* lowercase symbols (in the same order as symbols) */
+		unsigned nStates;
+		char matchChar;
+		char gapChar;
+		char missing;
+		bool respectCase;
+		std::map<char, NxsString> extraEquates;
+		NxsCharactersBlock::DataTypesEnum datatype; /* flag variable (see datatypes enum) */
+		std::vector<NxsDiscreteStateSetInfo> stateSetsVec; /* memory management for cLookup*/
+		std::vector<NxsDiscreteStateCell> charToStateCodeLookup; /* stateCodeLookup */
+		int sclOffset; /* offset of stateCodeLookup in stateSets */
+		bool restrictionDataype;
+		bool userDefinedEquatesBeforeConversion;
+
+		typedef std::vector< std::set<NxsDiscreteStateCell> > StateIntersectionRow;
+		typedef std::vector< StateIntersectionRow > StateIntersectionMatrix;
+		typedef std::vector< bool > IsStateSubsetRow;
+		typedef std::vector< IsStateSubsetRow > IsStateSubsetMatrix;
+		mutable StateIntersectionMatrix stateIntersectionMatrix;
+		mutable IsStateSubsetMatrix isStateSubsetMatrix;
+		mutable IsStateSubsetMatrix isStateSubsetMatrixGapsMissing;
+
+		friend class NxsCharactersBlock;
+		friend class MultiFormatReader;
+	};
+
+inline unsigned NxsDiscreteDatatypeMapper::GetNumStatesIncludingGap() const
+	{
+	return nStates + (gapChar == '\0' ? 0 : 1);
+	}
+
+inline unsigned NxsDiscreteDatatypeMapper::GetNumStates() const
+	{
+	return nStates;
+	}
+
+/*!
+	Returns the set of state indices that correspond to the states of state code `c`
+	Generates a NxsNCLAPIException if `c` is not a valid state code.
+	Not as efficient as GetStateSetForCode
+*/
+inline std::vector<NxsDiscreteStateCell> NxsDiscreteDatatypeMapper::GetStateVectorForCode(NxsDiscreteStateCell c) const
+	{
+	const std::set<NxsDiscreteStateCell> & ss = GetStateSetForCode(c);
+	return std::vector<NxsDiscreteStateCell>(ss.begin(), ss.end());
+	}
+
+/*!
+	Returns the set of state indices that correspond to the states of state code `c`
+	Generates a NxsNCLAPIException if `c` is not a valid state code.
+*/
+inline const std::set<NxsDiscreteStateCell>	& NxsDiscreteDatatypeMapper::GetStateSetForCode(NxsDiscreteStateCell c) const
+	{
+	NCL_ASSERT(stateCodeLookupPtr);
+	ValidateStateCode(c);
+	return stateCodeLookupPtr[c].states;
+	}
+
+/*!
+	Returns the `stateIndex`-th state for `stateCode` Thus if stateCode = 6 and this corresponds to {AG}
+	then:
+		GetOneStateForCode(6, 0) would return 0 (assuming that A is state 0), and
+		GetOneStateForCode(6, 1) would return 2 (assuming that G is state 2 in the symbols list)
+*/
+inline NxsDiscreteStateCell NxsDiscreteDatatypeMapper::GetOneStateForCode(NxsDiscreteStateCell stateCode, unsigned stateIndex) const
+	{
+	const std::set<NxsDiscreteStateCell> & s = GetStateSetForCode(stateCode);
+	unsigned i = 0;
+	for (std::set<NxsDiscreteStateCell>::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt, ++i)
+		{
+		if (i == stateIndex)
+			return *sIt;
+		}
+	NCL_ASSERT(false);
+	throw NxsException("State index out of range in NxsDiscreteDatatypeMapper::GetOneStateForCode");
+	}
+
+/*!
+	Returns the NEXUS reperesenation of the state code `scode` which may be a multiple character string such as {DNY}
+	Generates a NxsNCLAPIException if `c` is not a valid state code.
+	If insufficient symbols exist, then `demandSymbols` controls the behavior (if true then an NxsNCLAPIException
+	is raised, otherwise an empty string is returned.
+
+	Note that
+		WriteStateCodeAsNexusString(out, c);
+	Is more efficient than
+		out << StateCodeToNexusString(c);
+*/
+inline std::string NxsDiscreteDatatypeMapper::StateCodeToNexusString(NxsDiscreteStateCell scode, bool demandSymbols) const
+	{
+	std::ostringstream o;
+	WriteStateCodeAsNexusString(o, scode, demandSymbols);
+	return o.str();
+	}
+
+
+/*!
+	Called from HandleStdMatrix or HandleTransposedMatrix function to read in the next state. Always returns true
+	except in the special case of an interleaved matrix, in which case it returns false if a newline character is
+	encountered before the next token.
+*/
+inline NxsDiscreteStateCell NxsDiscreteDatatypeMapper::EncodeNexusStateString(
+  const std::string &stateAsNexus,
+  NxsToken & token, /* the token used to read from `in' */
+  const unsigned taxInd, /* the taxon index, in range [0..`ntax') */
+  const unsigned charInd, /* the character index, in range [0..`nChar') */
+  const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr)
+	{
+	const unsigned tlen = (unsigned) stateAsNexus.length();
+	if (tlen == 0)
+		GenerateNxsExceptionMatrixReading("Unexpected empty token encountered", taxInd, charInd, &token, nameStr);
+	if (tlen == 1)
+		return StateCodeForNexusChar(stateAsNexus[0], &token, taxInd, charInd, firstTaxonRow, nameStr);
+	return StateCodeForNexusMultiStateSet('\0', stateAsNexus, &token, taxInd, charInd, firstTaxonRow, nameStr);
+	}
+
+/*! MrBayes introduced the datatype=restriction syntax for 01 symbols.
+	NCL reads this type as standard, but sets a flag. If the datatype is reported as Standard, then you can call
+	WasRestrictionDataype to see if the datatype was declared "RESTRICTION"
+*/
+inline bool NxsDiscreteDatatypeMapper::WasRestrictionDataype() const
+	{
+	return restrictionDataype;
+	}
+/*! MrBayes introduced the datatype=restriction syntax for 01 symbols.
+	NCL reads this type as standard, but sets a flag. If the datatype is reported as Standard, then you can call
+	WasRestrictionDataype to see if the datatype was declared "RESTRICTION"
+*/
+inline bool NxsCharactersBlock::WasRestrictionDataype() const
+	{
+	return restrictionDataype;
+	}
+
+inline void NxsCharactersBlock::SetNexus(NxsReader *nxsptr)
+	{
+	NxsBlock::SetNexus(nxsptr);
+	NxsTaxaBlockSurrogate::SetNexusReader(nxsptr);
+	}
+
+inline bool NxsCharactersBlock::IsMixedType() const
+	{
+	return (datatypeMapperVec.size() > 1);
+	}
+
+/*! Returns the list of items that will be in each cell. This is always "STATES" for discrete datatypes, but can be
+	a vector of any string for continuous types
+*/
+inline std::vector<std::string> NxsCharactersBlock::GetItems() const
+	{
+	return items;
+	}
+
+/*!
+	Accessor for getting the list of continuous values associated with an "ITEM." Usually, these vectors will have
+	length of 1, but the "STATES" item may have a list of all observed values.
+
+	Values of DBL_MAX indicate missing data.
+	An empty vector indicates that the key was not used in this cell.
+*/
+inline std::vector<double> NxsCharactersBlock::GetContinuousValues(
+	unsigned i, /* the taxon in range [0..`ntax') */
+	unsigned j, /* the character in range [0..`nChar') */
+	const std::string key) NCL_COULD_BE_CONST /* The name of the ITEM in the FORMAT command. Must be ALL CAPS.*/ /*v2.1to2.2 1 */
+	{
+	const ContinuousCharCell & cell = continuousMatrix.at(i).at(j);
+	ContinuousCharCell::const_iterator cIt = cell.find(key);
+	if (cIt == cell.end())
+		return std::vector<double>();
+	return cIt->second;
+	}
+
+/*! Short cut for returning the AVERAGE item, which is the default of a continuous cell type. Note this does not
+	compute the average, this is just a shortcut for dealing with simple continuous matrices that do not
+	use the ITEMS subcommand of FORMAT.
+
+	Values of DBL_MAX indicate missing data.
+*/
+inline double NxsCharactersBlock::GetSimpleContinuousValue(
+	unsigned i, /* the taxon in range [0..`ntax') */
+	unsigned j) NCL_COULD_BE_CONST /* the character in range [0..`nChar') */ /*v2.1to2.2 1 */
+	{
+	const std::vector<double> av = GetContinuousValues(i, j, std::string("AVERAGE"));
+	if (av.empty())
+		return DBL_MAX;
+	return av.at(0);
+	}
+
+
+/*!
+	Returns label for character `i' (starting at zero), if a label has been specified. If no label was specified, returns string
+	containing a single blank (i.e., " ").
+*/
+inline NxsString NxsCharactersBlock::GetCharLabel( /*v2.1to2.2 4 */
+  unsigned i) const	/* the character in range [0..`nChar') */
+	{
+	std::map<unsigned, std::string>::const_iterator tlIt = indToCharLabel.find(i);
+	if (tlIt == indToCharLabel.end())
+		return NxsString(" "); /*v2.1to2.2 4 */
+	return NxsString(tlIt->second.c_str()); /*v2.1to2.2 4 */
+	}
+
+/*!
+	Returns true if at least one character has charlabels
+*/
+inline bool NxsCharactersBlock::HasCharLabels() const
+	{
+	return !indToCharLabel.empty();
+	}
+/*!
+	Returns the gap symbol currently in effect. If no gap symbol specified, returns '\0'.
+*/
+inline char NxsCharactersBlock::GetGapSymbol() const
+	{
+	return gap;
+	}
+
+/*!
+	//Warning: this function is unsafe -- it only effects the writing of the matrix as NEXUS and it does not correctly
+ recode the matrix.
+*/
+inline void NxsCharactersBlock::SetGapSymbol(char g)
+	{
+	gap = g;
+	if (datatypeMapperVec.size() == 1)
+		datatypeMapperVec[0].first.SetGapSymbol(g);
+	}
+
+
+/*! Returns value of `datatype' from the datatype mapper.
+	This if you have told NCL to read augmented symbols list (SetAllowAugmentingOfSequenceSymbols)
+		then it is possible that the datatype returned will be standard even if the GetOriginalDataType()
+		returns a molecular sequence datatype. This means that the symbols list was augmented.
+
+*/
+inline NxsCharactersBlock::DataTypesEnum NxsCharactersBlock::GetDataType() const
+	{
+	if (datatypeMapperVec.empty())
+		return datatype;
+	if (datatypeMapperVec.size() > 1)
+		return mixed;
+	return datatypeMapperVec[0].first.GetDatatype();
+	}
+
+inline NxsCharactersBlock::DataTypesEnum NxsCharactersBlock::GetOriginalDataType() const
+	{
+	return originalDatatype;
+	}
+
+/*!
+	Returns the `matchchar' symbol currently in effect. If no `matchchar' symbol specified, returns '\0'.
+*/
+inline char NxsCharactersBlock::GetMatchcharSymbol() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return matchchar;
+	}
+
+/*!
+	This function is no longer the most efficient way to access parsed data (see notes on NxsCharacterBlock and
+	GetMatrix() and GetMatrixDecoder() methods.
+
+	Returns internal representation of the state for taxon `i', character `j'. In the normal situation, `k' is 0 meaning
+	there is only one state with no uncertainty or polymorphism. If there are multiple states, specify a number in the
+	range [0..n) where n is the number of states returned by the GetNumStates function. Use the IsPolymorphic
+	function to determine whether the multiple states correspond to uncertainty in state assignment or polymorphism in
+	the taxon. The value returned from this function is one of the following:
+~
+	o -3 means gap state (see note below)
+	o -2 means missing state (see note below)
+	o an integer 0 or greater is internal representation of a state
+~
+	Note: gap and missing states are actually represented internally in a different way; for a description of the actual
+	internal representation of states, see the documentation for NxsDiscreteDatum.
+
+*/
+inline NxsDiscreteStateCell NxsCharactersBlock::GetInternalRepresentation(
+  unsigned i,	/* the taxon in range [0..`ntax') */
+  unsigned j,	/* the character in range [0..`nchar') */
+  unsigned k) NCL_COULD_BE_CONST /* the 0-offset index of state to return */ /*v2.1to2.2 1 */
+	{
+	if (IsGapState(i, j))
+		return -3;
+	else if (IsMissingState(i, j))
+		return -2;
+	else
+		return GetStateIndex(i, j, k);
+	}
+
+/*!
+	Returns the missing data symbol currently in effect. If no missing data symbol specified, returns '\0'.
+*/
+inline char NxsCharactersBlock::GetMissingSymbol() const
+	{
+	return missing;
+	}
+
+/*!
+	Name change to reinforce the change in meaning -- in NCL after 2.1 this will behaves just like the
+	GetNCharTotal(). It returns the number of characters in the matrix (regardless of whether they have been excluded).
+	The old GetNChar() function is now called GetNumIncludedChars();
+*/
+inline unsigned NxsCharactersBlock::GetNumChar() const
+	{
+	return nChar;
+	}
+
+/*!
+	Note the change in meaning -- in NCL after 2.1 this will behaves just like the
+	GetNCharTotal(). It returns the number of characters in the matrix (regardless of whether they have been excluded).
+	The old GetNChar() function is now called GetNumIncludedChars();
+*/
+inline unsigned NxsCharactersBlock::GetNChar() const
+	{
+	return nChar;
+	}
+
+/*!
+	Returns the number of characters which are not excluded (or eliminated) this number is <= GetNumChar()
+*/
+inline unsigned NxsCharactersBlock::GetNumIncludedChars() const
+	{
+	return (unsigned)nChar - (unsigned)excluded.size();
+	}
+
+
+inline unsigned NxsCharactersBlock::GetNCharTotal()
+	{
+	return nChar;
+	}
+
+inline unsigned NxsCharactersBlock::GetNCharTotal() const
+	{
+	return nChar;
+	}
+
+inline unsigned NxsCharactersBlock::GetNTaxWithData() const
+	{
+	return nTaxWithData;
+	}
+
+/*!
+	Returns the number of characters eliminated with the ELIMINATE command.
+*/
+inline unsigned NxsCharactersBlock::GetNumEliminated() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return (unsigned)eliminated.size();
+	}
+
+/*!
+	Returns the number of stored equates associations.
+*/
+inline unsigned NxsCharactersBlock::GetNumUserEquates() const
+	{
+	return (unsigned)(userEquates.size());
+	}
+
+/*!
+	Returns the number of stored equates associations.
+*/
+inline unsigned NxsCharactersBlock::GetNumEquates() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return (unsigned)(userEquates.size() + defaultEquates.size());
+	}
+
+/*!
+	Returns the number of actual columns in `matrix'. This number is equal to `nchar', but can be smaller than
+	`ncharTotal' since the user could have eliminated some of the characters.
+*/
+inline unsigned NxsCharactersBlock::GetNumMatrixCols() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return nChar;
+	}
+
+/*!
+	Returns the number of actual rows in `matrix'. This number is equal to `ntax', but can be smaller than `ntaxTotal'
+	since the user did not have to provide data for all taxa specified in the TAXA block.
+*/
+inline unsigned NxsCharactersBlock::GetNumMatrixRows() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return GetNTaxTotal();
+	}
+
+inline NxsDiscreteStateCell NxsCharactersBlock::GetStateIndex(
+  unsigned taxInd,	/* the taxon in range [0..`ntax') */
+  unsigned charInd, /* the character in range [0..`nchar') */
+  unsigned k) const
+	{
+	const NxsDiscreteDatatypeMapper * currMapper =	GetDatatypeMapperForChar(charInd);
+	NCL_ASSERT(currMapper);
+	const NxsDiscreteStateRow & row = GetDiscreteMatrixRow(taxInd);
+	NCL_ASSERT(row.size() > charInd);
+	return currMapper->GetOneStateForCode(row[charInd], k);
+	}
+/*! Returns symbol from symbols list representing the state for taxon `i' and character `j'.
+
+	The normal situation in which there is only one state with no uncertainty or polymorphism is
+	represented by `k' = 0.
+	If there are multiple states, specify a number in the range [0..n) where n is the number of states
+	returned by the GetNumStates function.
+	Use the IsPolymorphic function to determine whether the multiple states correspond to uncertainty in state
+	assignment or polymorphism in the taxon. Assumes `symbols' is non-NULL.
+
+	\warning{In NEXUS it is possible (via the TOKENS mode) to introduce a dataype that does not have unique single
+		char symbols for each state. This function is not guaranteed to succeed in such cases.
+		The NxsDiscreteDatatypeMapper method of accessing characters is more robust (and faster).
+		see \ref NxsCharacterBlockQueries
+		}
+*/
+inline char NxsCharactersBlock::GetState(
+  unsigned i,	/* the taxon in range [0..`ntax') */
+  unsigned j,	/* the character in range [0..`nchar') */
+  unsigned k) const	/* the 0-offset index of the state to return */
+	{
+	NCL_ASSERT(!symbols.empty());
+	const NxsDiscreteStateCell p = GetStateIndex(i, j, k);
+	if (p < 0)
+		{
+		NCL_ASSERT(p == NXS_GAP_STATE_CODE);
+		return gap;
+		}
+	NCL_ASSERT(p < (int)symbols.length());
+	return symbols[(int)p];
+	}
+
+/*! \returns The symbols string	Warning: returned value may be NULL.
+*/
+inline const char *NxsCharactersBlock::GetSymbols() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return symbols.c_str();
+	}
+
+/*!
+	Returns label for taxon number `i' (`i' ranges from 0 to `ntax' - 1).
+*/
+inline NxsString NxsCharactersBlock::GetTaxonLabel( /*v2.1to2.2 4 */
+  unsigned i) const	/* the taxon's position in the taxa block */
+	{
+	NxsString s = taxa->GetTaxonLabel(i); /*v2.1to2.2 4 */
+	return s;
+	}
+
+inline bool NxsCharactersBlock::TaxonIndHasData(
+  unsigned taxInd) const /* the character in question, in the range [0..`nchar') */
+	{
+	if (datatype == continuous)
+		return (taxInd < continuousMatrix.size() && !continuousMatrix[taxInd].empty());
+	return (taxInd < discreteMatrix.size() && !discreteMatrix[taxInd].empty());
+	}
+
+
+inline const NxsUnsignedSet & NxsCharactersBlock::GetExcludedIndexSet() const
+	{
+	return excluded;
+	}
+
+inline bool NxsCharactersBlock::IsActiveChar(
+  unsigned j) const	/* the character in question, in the range [0..`nchar') */
+	{
+	return (j < nChar && excluded.count(j) == 0);
+	}
+
+
+/*!
+	Returns true if character `j' has been excluded. If character `j' is active, returns false. Assumes `j' is in the
+	range [0..`nchar').
+*/
+inline bool NxsCharactersBlock::IsExcluded(
+  unsigned j) const	/* the character in question, in the range [0..`nchar') */
+	{
+	return !IsActiveChar(j);
+	}
+
+inline bool NxsCharactersBlock::IsActiveChar(
+  unsigned j) /* the character in question, in the range [0..`nchar') */
+	{
+	return (j < nChar && excluded.count(j) == 0);
+	}
+
+
+/*!
+	Returns true if character `j' has been excluded. If character `j' is active, returns false. Assumes `j' is in the
+	range [0..`nchar').
+*/
+inline bool NxsCharactersBlock::IsExcluded(
+  unsigned j) /* the character in question, in the range [0..`nchar') */
+	{
+	return !IsActiveChar(j);
+	}
+
+
+/*!
+	Returns true if INTERLEAVE was specified in the FORMAT command, false otherwise.
+*/
+inline bool NxsCharactersBlock::IsInterleave() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return interleaving;
+	}
+
+/*!
+	Returns true if LABELS was specified in the FORMAT command, false otherwise.
+*/
+inline bool NxsCharactersBlock::IsLabels() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return labels;
+	}
+
+/*!
+	Returns true if RESPECTCASE was specified in the FORMAT command, false otherwise.
+*/
+inline bool NxsCharactersBlock::IsRespectCase() const
+	{
+	return respectingCase;
+	}
+
+/*!
+	Returns true if TOKENS was specified in the FORMAT command, false otherwise.
+*/
+inline bool NxsCharactersBlock::IsTokens() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return tokens;
+	}
+
+/*!
+	Returns true if TRANSPOSE was specified in the FORMAT command, false otherwise.
+*/
+inline bool NxsCharactersBlock::IsTranspose() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return transposing;
+	}
+
+/*! Converts a taxon label to a number corresponding to the taxon's position within the list maintained by the
+	NxsTaxaBlockAPI object. This method overrides the virtual function of the same name in the NxsBlock base class. If
+	`s' is not a valid taxon label, returns the value 0.
+*/
+inline unsigned NxsCharactersBlock::TaxonLabelToNumber(
+  NxsString s) const	/* the taxon label to convert */ /*v2.1to2.2 4 */
+	{
+	try
+		{
+		return 1 + taxa->FindTaxon(s);
+		}
+	catch(NxsTaxaBlock::NxsX_NoSuchTaxon)
+		{
+		}
+
+	return 0;
+	}
+
+
+inline VecBlockPtr NxsCharactersBlock::GetImpliedBlocks()
+	{
+	return GetCreatedTaxaBlocks();
+	}
+inline const std::string & NxsCharactersBlock::GetBlockName() const
+	{
+	return NCL_BLOCKTYPE_ATTR_NAME;
+	}
+inline void NxsCharactersBlock::HandleLinkCommand(NxsToken & token)
+	{
+	HandleLinkTaxaCommand(token);
+	}
+inline void NxsCharactersBlock::WriteLinkCommand(std::ostream &out) const
+	{
+	WriteLinkTaxaCommand(out);
+	}
+
+
+inline NxsCharactersBlock::StatesFormatEnum NxsCharactersBlock::GetStatesFormat() const
+	{
+	return statesFormat;
+	}
+
+inline	unsigned NxsCharactersBlock::CharLabelToNumber(NxsString s) NCL_COULD_BE_CONST /*v2.1to2.2 d */
+	{ /*v2.1to2.2 d */
+	const NxsCharactersBlock *b = (const NxsCharactersBlock *)(this); /*v2.1to2.2 d */
+	return b->CharLabelToNumber(s); /*v2.1to2.2 d */
+	} /*v2.1to2.2 d */
+
+/*!
+	Returns true if character with `charIndex' (0-based index) was eliminated, false otherwise.
+*/
+inline bool NxsCharactersBlock::IsEliminated(
+  unsigned charIndex) NCL_COULD_BE_CONST /* the character in question */ /*v2.1to2.2 1 */
+	{
+	return eliminated.count(charIndex) > 0 ;
+	}
+
+/*
+	Returns an alias to the NxsDiscreteDatatypeMapper for character index.
+	NULL will be returned if the NxsCharactersBlock is not fully initialized or
+		if the block stores continuous characters.
+	The pointer is only guaranteed to be valid until the NxsCharactersBlock is modified.
+		(so do not store for long term usage).
+*/
+inline const NxsDiscreteDatatypeMapper * NxsCharactersBlock::GetDatatypeMapperForChar(unsigned charIndex) const
+	{
+	NxsCharactersBlock *mt = const_cast<NxsCharactersBlock *>(this);
+	return mt->GetMutableDatatypeMapperForChar(charIndex);
+	}
+
+inline const NxsDiscreteDatatypeMapper & NxsCharactersBlock::GetDatatypeMapperForCharRef(unsigned charIndex) const
+	{
+	const NxsDiscreteDatatypeMapper * dm = this->GetDatatypeMapperForChar(charIndex);
+	NCL_ASSERT(dm);
+	return *dm;
+	}
+
+inline const NxsDiscreteStateRow & NxsCharactersBlock::GetDiscreteMatrixRow(unsigned int taxIndex) const
+	{
+	return discreteMatrix.at(taxIndex);
+	}
+
+inline const NxsCharactersBlock::ContinuousCharRow & NxsCharactersBlock::GetContinuousMatrixRow(unsigned taxIndex) const
+	{
+	return continuousMatrix.at(taxIndex);
+	}
+
+/*!
+	Returns an alias to the NxsDiscreteDatatypeMapper for character index.
+	NULL will be returned if the NxsCharactersBlock is not fully initialized or
+		if the block stores continuous characters.
+	The pointer is only guaranteed to be valid until the NxsCharactersBlock is modified.
+		(so do not store for long term usage).
+*/
+inline NxsDiscreteDatatypeMapper * NxsCharactersBlock::GetMutableDatatypeMapperForChar(unsigned int charIndex)
+	{
+	if (datatypeMapperVec.size() == 1)
+		return &(datatypeMapperVec[0].first);
+	for (VecDatatypeMapperAndIndexSet::iterator dmvIt = datatypeMapperVec.begin(); dmvIt != datatypeMapperVec.end(); ++dmvIt)
+		{
+		const NxsUnsignedSet & currCS = dmvIt->second;
+		if (currCS.count(charIndex) > 0)
+			return &(dmvIt->first);
+		}
+	return NULL;
+	}
+
+inline std::vector<const NxsDiscreteDatatypeMapper *> NxsCharactersBlock::GetAllDatatypeMappers() const
+	{
+	std::vector<const NxsDiscreteDatatypeMapper *> v;
+	for (VecDatatypeMapperAndIndexSet::const_iterator dmvIt = datatypeMapperVec.begin(); dmvIt != datatypeMapperVec.end(); ++dmvIt)
+		v.push_back(&(dmvIt->first));
+	return v;
+	}
+
+inline void NxsDiscreteDatatypeMapper::WriteStateCodeRowAsNexus(std::ostream & out, const NxsDiscreteStateRow &row) const
+	{//@mth optimize
+	std::vector<NxsDiscreteStateCell>::const_iterator b = row.begin();
+	const std::vector<NxsDiscreteStateCell>::const_iterator e = row.end();
+	WriteStateCodeRowAsNexus(out, b, e);
+	}
+
+inline void NxsDiscreteDatatypeMapper::WriteStateCodeRowAsNexus(std::ostream & out, NxsDiscreteStateRow::const_iterator & begIt, const NxsDiscreteStateRow::const_iterator & endIt) const
+	{//@mth optimize
+	for (; begIt != endIt; ++begIt)
+		WriteStateCodeAsNexusString(out, *begIt, true);
+	}
+
+
+#endif
diff --git a/src/ncl/nxscxxdiscretematrix.h b/src/ncl/nxscxxdiscretematrix.h
new file mode 100644
index 0000000..7192f76
--- /dev/null
+++ b/src/ncl/nxscxxdiscretematrix.h
@@ -0,0 +1,264 @@
+//	Copyright (C) 2008 Mark Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.1
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+
+#if !defined(NXS_CXX_DISCRETE_MATRIX_H)
+#define NXS_CXX_DISCRETE_MATRIX_H
+
+#include <string>
+#include <vector>
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsallocatematrix.h"
+#include "ncl/nxscharactersblock.h"
+#include "ncl/nxscdiscretematrix.h"
+
+class NxsCharacterPattern;
+	/**
+	 * A C++ class that wraps a CDiscretMatrix in order to handle the memory
+	 management more cleanly. This is intended to be an alternate, low-level way
+	 to get character data out of a NxsCharactersBlock
+	 */
+class NxsCXXDiscreteMatrix
+	{
+	public:
+		NxsCXXDiscreteMatrix()
+			{
+			Initialize(0L, false);
+			}
+		NxsCXXDiscreteMatrix(const NxsCDiscreteMatrix & );
+		NxsCXXDiscreteMatrix(const NxsCharactersBlock & cb, bool convertGapsToMissing, const NxsUnsignedSet * toInclude = 0L, bool standardizeCoding = true);
+
+		void Initialize(const NxsCharactersBlock * cb, bool convertGapsToMissing, const NxsUnsignedSet * toInclude = 0L, bool standardizeCoding = true);
+
+		const NxsCDiscreteMatrix & getConstNativeC() const
+			{
+			return nativeCMatrix;
+			}
+
+		NxsCDiscreteMatrix & getNativeC()
+			{
+			return nativeCMatrix;
+			}
+
+		unsigned	getNChar() const
+			{
+			return nativeCMatrix.nChar;
+			}
+
+		unsigned	getNTax() const
+			{
+			return nativeCMatrix.nTax;
+			}
+
+		unsigned	getNStates() const
+			{
+			return nativeCMatrix.nStates;
+			}
+
+		const char *	getSymbolsList() const   //POL added 15-Nov-2005
+			{
+			return nativeCMatrix.symbolsList;
+			}
+
+		const std::vector<int8_t> &getStateList() const
+			{
+			return stateListAlias;
+			}
+
+		const std::vector<unsigned> &getStateListPos() const
+			{
+			return stateListPosAlias;
+			}
+
+		const NxsCDiscreteStateSet *getRow(unsigned i) const
+			{
+			NCL_ASSERT(i < nativeCMatrix.nTax);
+			return nativeCMatrix.matrix[i];
+			}
+
+		const std::vector<int8_t> getRowAsVector(unsigned i) const
+			{
+			NCL_ASSERT(i < nativeCMatrix.nTax);
+			std::vector<int8_t> v;
+			for (unsigned j = 0; j < nativeCMatrix.nChar; j++)
+				{
+				v.push_back(nativeCMatrix.matrix[i][j]);
+				}
+			return v;
+			}
+
+		const NxsCDiscreteStateSet * const * getMatrix() const
+			{
+			return nativeCMatrix.matrix;
+			}
+
+		const int getDatatype() const
+			{
+			return (int)nativeCMatrix.datatype;
+			}
+
+		bool hasWeights() const
+			{
+			return hasIntWeights() || hasDblWeights();
+			}
+
+		bool hasIntWeights() const
+			{
+			return !(intWts.empty());
+			}
+
+		bool hasDblWeights() const
+			{
+			return !(dblWts.empty());
+			}
+
+		std::vector<int> & getIntWeights()
+			{
+			return intWts;
+			}
+
+		std::vector<double> & getDblWeights()
+			{
+			return dblWts;
+			}
+
+		const std::vector<int> & getIntWeightsConst() const
+			{
+			return intWts;
+			}
+
+		const std::vector<double> & getDblWeightsConst() const
+			{
+			return dblWts;
+			}
+
+		const std::set<unsigned> & getExcludedCharIndices() const
+			{
+			return activeExSet;
+			}
+
+		std::vector<unsigned> getExcludedCharIndicesAsVector() const
+			{
+			return std::vector<unsigned>(activeExSet.begin(), activeExSet.end());
+			}
+
+	private:
+		typedef ScopedTwoDMatrix<NxsCDiscreteStateSet> ScopedStateSetTwoDMatrix;
+
+		NxsCDiscreteMatrix			nativeCMatrix; 		/** taxa x characters matrix in a C struct*/
+		std::string					symbolsStringAlias;	/** memory management alias to symbols field of nativeCMatrix */
+		ScopedStateSetTwoDMatrix	matrixAlias;		/** memory management alias to matrix field of nativeCMatrix */
+		std::vector<NxsCDiscreteState_t>	stateListAlias;		/** memory management alias to ambigList field of nativeCMatrix */
+		std::vector<unsigned>		stateListPosAlias;		/** memory management alias to symbolsList field of nativeCMatrix */
+		std::vector<int>			intWts;
+		std::vector<double>			dblWts;
+		std::set<unsigned>			activeExSet;
+		NxsCXXDiscreteMatrix(const NxsCXXDiscreteMatrix &); /** don't define, not copyable*/
+		NxsCXXDiscreteMatrix & operator=(const NxsCXXDiscreteMatrix &); /** don't define, not copyable*/
+	};
+
+
+
+
+class NxsCharacterPattern
+    {
+    public: 
+        
+        bool operator < (const NxsCharacterPattern & other) const {
+            return this->stateCodes < other.stateCodes;
+        }
+        bool operator == (const NxsCharacterPattern & other) const {
+            return this->stateCodes == other.stateCodes;
+        }
+        // returns true if none of the state codes are the missing or gap codes (negative values
+        //  note this does not test if all of the state codes correspond to completely specified
+        //  cells that are only compatible with one state!
+        bool StateCodesAreNonNegative() const {
+            for (std::vector<NxsCDiscreteState_t>::const_iterator scIt = stateCodes.begin();
+                                                                  scIt != stateCodes.end(); 
+                                                                  ++scIt)
+                {
+                if (*scIt < 0)
+                    return false;
+                }
+            return true;
+        }
+        std::vector<NxsCDiscreteState_t> stateCodes;
+        mutable unsigned count;
+        mutable unsigned patternIndex; // used as scratchspace not always valid!!!
+        mutable double sumOfPatternWeights; // stored as float.  Use NxsCXXDiscreteMatrix::hasIntWeights of the original matrix to see if these weights should be interpretted as ints
+    };
+    
+
+/*----------------------------------------------------------------------------------------------------------------------
+| Fills `compressedTransposedMatrix` with the compressed patterns found in `mat`
+|
+| Data structure for mapping between indices in these patterns can be obtained by the client providing
+|   `compressedIndexPattern` arguments.
+|
+| Characters or taxa can be omitted by providing `taxaToInclude` or `charactersToInclude` arguments.
+|   If these arguments are 0L (or not provided) then all data will be included. Note that skipping taxa
+|   will cause the taxon indexing within a pattern to disagree with the overall taxon numbering because there will
+|   be "frameshifts" for all of the skipped taxa.  The included taxa will be present in the expected order, but it is 
+|   the caller code's responsibility to keep track of which taxa are included in the pattern.
+*/
+unsigned NxsCompressDiscreteMatrix(
+  const NxsCXXDiscreteMatrix & mat,			/**< is the data source */
+  std::set<NxsCharacterPattern> & patternSet, /* matrix that will hold the compressed columns */
+  std::vector<const NxsCharacterPattern *> * compressedIndexPattern = 0L, /** if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+  const NxsUnsignedSet * taxaToInclude = 0L,	/**< if not 0L, this should be  the indices of the taxa in `mat` to include (if 0L all characters will be included). Excluding taxa will result in shorter patterns (the skipped taxa will not be filled with empty codes, instead the taxon indexing will be frameshifted -- the client code must keep track of these frameshifts). */
+  const NxsUnsignedSet * charactersToInclude = 0L);	/**< if not 0L, this should be  the indices of the characters in `mat` to include (if 0L all characters will be included) */
+    
+/*----------------------------------------------------------------------------------------------------------------------
+| Fills `compressedTransposedMatrix` with the compressed patterns found in `mat`
+|
+| Data structure for mapping between indices in these representations can be obtained by the client providing
+|   `originalIndexToCompressed` and/or compressedIndexToOriginal arguments.
+|
+| Characters or taxa can be omitted by providing `taxaToInclude` or `charactersToInclude` arguments.
+|   If these arguments are 0L (or not provided) then all data will be included. Note that skipping taxa
+|   will cause the taxon indexing within a pattern to disagree with the overall taxon numbering because there will
+|   be "frameshifts" for all of the skipped taxa.  The included taxa will be present in the expected order, but it is 
+|   the caller code's responsibility to keep track of which taxa are included in the pattern.
+*/
+unsigned NxsCompressDiscreteMatrix(
+  const NxsCXXDiscreteMatrix & mat,			/**< is the data source */
+  std::vector<NxsCharacterPattern> & compressedTransposedMatrix, /* matrix that will hold the compressed columns */
+  std::vector<int> * originalIndexToCompressed, /** if not 0L, this will be filled to provide map an index in `mat` to the corresponding index in `compressedTransposedMatrix` (-1 in the vector indicates that the character was not included) */
+  std::vector<std::set<unsigned> > * compressedIndexToOriginal, /** if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+  const NxsUnsignedSet * taxaToInclude = 0L,	/**< if not 0L, this should be  the indices of the taxa in `mat` to include (if 0L all characters will be included). Excluding taxa will result in shorter patterns (the skipped taxa will not be filled with empty codes, instead the taxon indexing will be frameshifted -- the client code must keep track of these frameshifts). */
+  const NxsUnsignedSet * charactersToInclude = 0L);	/**< if not 0L, this should be  the indices of the characters in `mat` to include (if 0L all characters will be included) */
+	
+
+void NxsConsumePatternSetToPatternVector(
+  std::set<NxsCharacterPattern> & patternSet, /* INPUT matrix that will hold the compressed columns */
+  std::vector<NxsCharacterPattern> & compressedTransposedMatrix, /* OUTPUT matrix that will hold the compressed columns */
+  const std::vector<const NxsCharacterPattern *> * compressedIndexPattern = 0L, /** INPUT This mapping must be provided if either  `originalIndexToCompressed` or `compressedIndexToOriginal` is requested */
+  std::vector<int> * originalIndexToCompressed = 0L, /** OUTPUT if not 0L, this will be filled to provide map an index in `mat` to the corresponding index in `compressedTransposedMatrix` (-1 in the vector indicates that the character was not included) */
+  std::vector<std::set<unsigned> > * compressedIndexToOriginal = 0L); /** OUTPUT  if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+
+void NxsTransposeCompressedMatrix(
+  const std::vector<NxsCharacterPattern> & compressedTransposedMatrix, 
+  ScopedTwoDMatrix<NxsCDiscreteStateSet> & destination,
+  std::vector<unsigned> * patternCounts = 0L,
+  std::vector<double> * patternWeights = 0L);
+  
+ 
+
+#endif  // NXS_CXX_DISCRETE_MATRIX_H
diff --git a/src/ncl/nxsdatablock.h b/src/ncl/nxsdatablock.h
new file mode 100644
index 0000000..b78d04f
--- /dev/null
+++ b/src/ncl/nxsdatablock.h
@@ -0,0 +1,69 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSDATABLOCK_H
+#define NCL_NXSDATABLOCK_H
+
+#include "ncl/nxscharactersblock.h"
+/*!
+	This class handles reading and storage for the NEXUS block DATA. It is derived from the NxsCharactersBlock class,
+	and differs from NxsCharactersBlock only in name and the fact that `newtaxa' is initially true rather than false.
+*/
+class NxsDataBlock
+  : public NxsCharactersBlock
+	{
+	public:
+		NxsDataBlock(NxsTaxaBlockAPI *tb, NxsAssumptionsBlockAPI *ab);
+
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, assumptionsBlock blocks!
+		*/
+		NxsDataBlock & operator=(const NxsDataBlock &other)
+			{
+			Reset();
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			CopyTaxaBlockSurrogateContents(other);
+			CopyCharactersContents(other);
+			return *this;
+			}
+
+		virtual NxsDataBlock * Clone() const
+			{
+			NxsDataBlock * a = new NxsDataBlock(taxa, assumptionsBlock);
+			*a = *this;
+			return a;
+			}
+
+		void TransferTo(NxsCharactersBlock &charactersblock);
+		void Reset();
+	private:
+		friend class MultiFormatReader;
+
+	};
+
+typedef NxsDataBlock DataBlock;
+
+class NxsDataBlockFactory
+	:public NxsBlockFactory
+	{
+	public:
+		virtual NxsDataBlock  *	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, NxsReader *reader, NxsToken *token);
+	};
+
+#endif
diff --git a/src/ncl/nxsdefs.h b/src/ncl/nxsdefs.h
new file mode 100644
index 0000000..eb73b54
--- /dev/null
+++ b/src/ncl/nxsdefs.h
@@ -0,0 +1,108 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#ifndef NCL_NXSDEFS_H
+#define NCL_NXSDEFS_H
+
+#include <iostream>
+#include <vector>
+#include <map>
+#include <set>
+#include <list>
+#include <utility>
+
+#define NCL_MAJOR_VERSION 2
+#define NCL_MINOR_VERSION 1
+#define NCL_NAME_AND_VERSION  "NCL version 2.1.17"
+#define NCL_COPYRIGHT         "Copyright (c) 1999-2011 by Paul O. Lewis and Mark T. Holder"
+#define NCL_HOMEPAGEURL       "http://sourceforge.net/projects/ncl"
+
+#if defined(RESERVE_ID_KEYWORD)
+#	define NCL_BLOCKTYPE_ATTR_NAME blockTypeName
+#else
+#	define NCL_BLOCKTYPE_ATTR_NAME id
+#endif
+// NCL_COULD_BE_CONST is a mechanism for declaring some old (v < 2.1) functions
+// 	to be const without breaking old client code.
+// If you would like your code to be more const-correct, then define NCL_CONST_FUNCS
+//	when you compile NCL and your code.  This will cause several functions that
+//	should have been declared as const to be declared that way in your code.
+// By default NCL_CONST_FUNCS will not be defined and these functions will not
+//	be defined as const member functions.
+#if defined(NCL_CONST_FUNCS) && NCL_CONST_FUNCS
+#	define NCL_COULD_BE_CONST const
+	int onlyDefinedInCouldBeConst();
+#else
+#	define NCL_COULD_BE_CONST
+#endif
+
+#if defined(IGNORE_NXS_ASSERT) || defined(NDEBUG)
+#	define NCL_ASSERT(expr)
+#else
+	void ncl_assertion_failed(char const * expr, char const * function, char const * file, long line);
+#	define NCL_ASSERT(expr)  if (!(expr)) ncl_assertion_failed((const char *)#expr, (const char *)__FUNCTION__, __FILE__, __LINE__)
+#endif
+
+// Maximum number of states that can be stored; the only limitation is that this
+// number be less than the maximum size of an int (not likely to be a problem).
+// A good number for this is 76, which is 96 (the number of distinct symbols
+// able to be input from a standard keyboard) less 20 (the number of symbols
+// symbols disallowed by the NEXUS standard for use as state symbols)
+//
+#define NCL_MAX_STATES         76
+
+typedef std::streampos	file_pos;
+
+#define	SUPPORT_OLD_NCL_NAMES
+
+class NxsString;
+
+typedef std::vector<bool> NxsBoolVector;
+typedef std::vector<char> NxsCharVector;
+typedef std::vector<int> NxsIntVector;
+typedef std::vector<unsigned> NxsUnsignedVector;
+typedef std::vector<NxsString> NxsStringVector;
+typedef std::vector<NxsStringVector> NxsAllelesVector;
+
+typedef std::set<unsigned> NxsUnsignedSet;
+
+typedef std::map< unsigned, NxsStringVector> NxsStringVectorMap;
+typedef std::map< NxsString, NxsString> NxsStringMap;
+typedef std::map< NxsString, NxsUnsignedSet> NxsUnsignedSetMap;
+
+typedef std::pair<std::string, NxsUnsignedSet> NxsPartitionGroup;
+typedef std::list<NxsPartitionGroup> NxsPartition;
+typedef std::map<std::string, NxsPartition> NxsPartitionsByName;
+
+// The following typedefs are simply for maintaining compatibility with existing code.
+// The names on the right are deprecated and should not be used.
+//
+typedef NxsBoolVector BoolVect;
+typedef NxsUnsignedSet IntSet;
+typedef NxsUnsignedSetMap IntSetMap;
+typedef NxsAllelesVector AllelesVect;
+typedef NxsStringVector LabelList;
+typedef NxsStringVector StrVec;
+typedef NxsStringVector vecStr;
+typedef NxsStringVectorMap LabelListBag;
+typedef NxsStringMap AssocList;
+
+class ProcessedNxsToken;
+typedef std::vector<ProcessedNxsToken> ProcessedNxsCommand;
+
+#endif
diff --git a/src/ncl/nxsdiscretedatum.h b/src/ncl/nxsdiscretedatum.h
new file mode 100644
index 0000000..26e39e0
--- /dev/null
+++ b/src/ncl/nxsdiscretedatum.h
@@ -0,0 +1,58 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSDISCRETEDATUM_H
+#define NCL_NXSDISCRETEDATUM_H
+
+/*!
+	Reference to a cell in a DiscreteMatrix.  This class has been deprecated and is retained in NCL >= 2.1 for backward
+	compatibility only.
+	It no longer stores the data for a cell, but can refer to the cell in a matrix in the context in which the
+	matrix is at hand. The only time that NxsDiscreteDatum appears in the public NCL interface is as an argument to
+	NxsCharactersBlock::WriteStates().  The new implementation of NxsDiscreteDatum should continue to work in this
+	context because the NxsCharactersBlock holds the matrix
+*/
+class NxsDiscreteDatum
+	{
+	friend class NxsDiscreteMatrix;
+	friend class NxsUnalignedBlock;
+
+	public:
+		NxsDiscreteDatum(): taxInd(0), charInd(0){}
+		NxsDiscreteDatum(unsigned row, unsigned col): taxInd(row), charInd(col){}
+		void				CopyFrom(const NxsDiscreteDatum & other);
+
+		unsigned taxInd; /*row of the matrix */
+		unsigned charInd; /*col of the matrix */
+
+	};
+
+typedef NxsDiscreteDatum DiscreteDatum;
+
+/*!
+	This assignment operator calls the CopyFrom member function to make a copy of the NxsDiscreteDatum object `other'.
+*/
+inline void NxsDiscreteDatum::CopyFrom(
+  const NxsDiscreteDatum & other)	/* is the object to be copied */
+	{
+	taxInd = other.taxInd;
+	charInd = other.charInd;
+	}
+
+#endif
diff --git a/src/ncl/nxsdiscretematrix.h b/src/ncl/nxsdiscretematrix.h
new file mode 100644
index 0000000..e011123
--- /dev/null
+++ b/src/ncl/nxsdiscretematrix.h
@@ -0,0 +1,90 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc., 
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSDISCRETEMATRIX_H
+#define NCL_NXSDISCRETEMATRIX_H
+
+#include <climits>
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	Class providing storage for the discrete data types (dna, rna, nucleotide, standard, and protein) inside a DATA or 
+|	CHARACTERS block. This class is also used to store the data for an ALLELES block. Maintains a matrix in which each 
+|	cell is an object of the class NxsDiscreteDatum. NxsDiscreteDatum stores the state for a particular combination of 
+|	taxon and character as an integer. Ordinarily, there will be a single state recorded for each taxon-character 
+|	combination, but exceptions exist if there is polymorphism for a taxon-character combination, or if there is 
+|	uncertainty about the state (e.g., in dna data, the data file might have contained an R or Y entry). Please consult 
+|	the documentation for the NxsDiscreteDatum class for the details about how states are stored. For data stored in an 
+|	ALLELES block, rows of the matrix correspond to individuals and columns to loci. Each NxsDiscreteDatum must 
+|	therefore store information about both genes at a single locus for a single individual in the case of diploid data.
+|	To do this, two macros HIWORD and LOWORD are used to divide up the unsigned value into two words. A maximum of 255 
+|	distinct allelic forms can be accommodated by this scheme, assuming at minimum a 32-bit architecture. Because it is
+|	not known in advance how many rows are going to be necessary, The NxsDiscreteMatrix class provides the AddRows 
+|	method, which expands the number of rows allocated for the matrix while preserving data already stored. 
+*/
+class NxsDiscreteMatrix
+	{
+	friend class NxsCharactersBlock;
+	friend class NxsAllelesBlock;
+
+	public:
+
+							NxsDiscreteMatrix(unsigned rows, unsigned cols);
+		virtual				~NxsDiscreteMatrix();
+
+		void				AddRows(unsigned nAddRows);
+		void				AddState(unsigned i, unsigned j, unsigned value);
+		void				CopyStatesFromFirstTaxon(unsigned i, unsigned j);
+		void				DebugSaveMatrix(ostream &out, unsigned colwidth = 12);
+		unsigned			DuplicateRow(unsigned row, unsigned count, unsigned startCol = 0, unsigned endCol = UINT_MAX);
+		void				Flush();
+		unsigned			GetState(unsigned i, unsigned j, unsigned k = 0);
+		unsigned			GetNumStates(unsigned i, unsigned j);
+		unsigned			GetObsNumStates(unsigned j);
+		bool				IsGap(unsigned i, unsigned j);
+		bool				IsMissing(unsigned i, unsigned j);
+		bool				IsPolymorphic(unsigned i, unsigned j);
+		void				Reset(unsigned rows, unsigned cols);
+		void				SetGap(unsigned i, unsigned j);
+		void				SetMissing(unsigned i, unsigned j);
+		void				SetPolymorphic(unsigned i, unsigned j, unsigned value = 1);
+		void				SetState(unsigned i, unsigned j, unsigned value);
+
+	private:
+
+		unsigned			nrows;	/* number of rows (taxa) in the data matrix */
+		unsigned			ncols;	/* number of columns (characters) in the data matrix */
+		NxsDiscreteDatum	**data;	/* storage for the data */
+
+		void				AddState(NxsDiscreteDatum &d, unsigned value);
+		bool				IsGap(NxsDiscreteDatum &d);
+		bool				IsMissing(NxsDiscreteDatum &d);
+		bool				IsPolymorphic(NxsDiscreteDatum &d);
+		NxsDiscreteDatum	&GetDiscreteDatum(unsigned i, unsigned j);
+		unsigned			GetNumStates(NxsDiscreteDatum &d);
+		unsigned			GetState(NxsDiscreteDatum &d, unsigned k = 0);
+		void				SetGap(NxsDiscreteDatum &d);
+		void				SetMissing(NxsDiscreteDatum &d);
+		void				SetPolymorphic(NxsDiscreteDatum &d, unsigned value);
+		void				SetState(NxsDiscreteDatum &d, unsigned value);
+	};
+
+typedef NxsDiscreteMatrix DiscreteMatrix;
+
+
+#endif
diff --git a/src/ncl/nxsdistancedatum.h b/src/ncl/nxsdistancedatum.h
new file mode 100644
index 0000000..bdde053
--- /dev/null
+++ b/src/ncl/nxsdistancedatum.h
@@ -0,0 +1,42 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSDISTANCEDATUM_H
+#define NCL_NXSDISTANCEDATUM_H
+
+/*!
+	This class stores pairwise distance values. It has no public access functions, reflecting the fact that it is
+	manipulated strictly by its only friend class, the NxsDistancesBlock class.
+*/
+class NxsDistanceDatum
+	{
+	public:
+
+		NxsDistanceDatum()
+		:value(0.0),
+		missing(true)
+		{}
+
+		double		value;		/* the pairwise distance value stored */
+		bool		missing;	/* true if there is missing data for this pair */
+	};
+
+typedef NxsDistanceDatum DistanceDatum;
+
+#endif
diff --git a/src/ncl/nxsdistancesblock.h b/src/ncl/nxsdistancesblock.h
new file mode 100644
index 0000000..39dd4d5
--- /dev/null
+++ b/src/ncl/nxsdistancesblock.h
@@ -0,0 +1,197 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSDISTANCESBLOCK_H
+#define NCL_NXSDISTANCESBLOCK_H
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxstaxablock.h"
+#include "ncl/nxsdistancedatum.h"
+
+/*!
+	This class handles reading and storage for the NEXUS block DISTANCES. It overrides the member functions Read and
+	Reset, which are abstract virtual functions in the base class NxsBlock. Below is a table showing the correspondence
+	between the elements of a DISTANCES block and the variables and member functions that can be used to access each
+	piece of information stored.
+>
+	NEXUS command   Command attribute  Data Members        Member Functions
+	------------------------------------------------------------------------
+	DIMENSIONS      NEWTAXA            newtaxa
+
+	                NTAX               ntax                GetNtax
+
+	                NCHAR              nchar               GetNchar
+
+	FORMAT          TRIANGLE           triangle            GetTriangle
+	                                                       IsUpperTriangular
+	                                                       IsLowerTriangular
+	                                                       IsRectangular
+
+	                [NO]DIAGONAL       diagonal            IsDiagonal
+
+	                [NO]LABELS         labels              IsLabels
+
+	                MISSING            missing             GetMissingSymbol
+
+	                INTERLEAVE         interleave          IsInterleave
+
+	                TAXLABELS          (stored in the      (access through
+					                   NxsTaxaBlockAPI        data member taxa)
+									   object)
+
+	MATRIX                             matrix              GetDistance
+	                                                       IsMissing
+	                                                       SetMissing
+	                                                       SetDistance
+	------------------------------------------------------------------------
+>
+*/
+class NxsDistancesBlock
+  : public NxsBlock, public NxsTaxaBlockSurrogate
+	{
+	public:
+							NxsDistancesBlock(NxsTaxaBlockAPI *t);
+		virtual				~NxsDistancesBlock();
+
+		double				GetDistance(unsigned i, unsigned j) const;
+		char				GetMissingSymbol() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		unsigned			GetNchar() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		unsigned			GetTriangle() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool				IsRectangular() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool				IsBoth() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool				IsDiagonal() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool				IsInterleave() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool				IsLabels() NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		bool				IsLowerTriangular() NCL_COULD_BE_CONST ;  /*v2.1to2.2 1 */
+		bool				IsMissing(unsigned i, unsigned j) const;
+		bool				IsUpperTriangular() NCL_COULD_BE_CONST ;  /*v2.1to2.2 1 */
+		virtual void		Report(std::ostream &out) NCL_COULD_BE_CONST ;  /*v2.1to2.2 1 */
+		virtual void		Reset();
+		void				SetDistance(unsigned i, unsigned j, double d);
+		void				SetMissing(unsigned i, unsigned j);
+		void				SetNchar(unsigned i);
+		void				SetNexus(NxsReader *nxsptr)
+			{
+			NxsBlock::SetNexus(nxsptr);
+			NxsTaxaBlockSurrogate::SetNexusReader(nxsptr);
+			}
+			/*! \ref BlockTypeIDDiscussion */
+        virtual const std::string & GetBlockName() const
+            {
+            return NCL_BLOCKTYPE_ATTR_NAME;
+            }
+
+		enum NxsDistancesBlockEnum		/* used by data member triangle to determine which triangle(s) of the distance matrix is/are occupied */
+			{
+			upper			= 1,		/* matrix is upper-triangular */
+			lower			= 2,		/* matrix is lower-triangular */
+			both			= 3			/* matrix is rectangular */
+			};
+
+		virtual VecBlockPtr		GetImpliedBlocks()
+			{
+			return GetCreatedTaxaBlocks();
+			}
+
+		/*only used it the linkAPI is enabled*/
+		virtual void		HandleLinkCommand(NxsToken & token)
+			{
+			HandleLinkTaxaCommand(token);
+			}
+		virtual void		WriteLinkCommand(std::ostream &out) const
+			{
+			WriteLinkTaxaCommand(out);
+			}
+		void				WriteAsNexus(std::ostream &out) const;
+
+
+		NxsDistancesBlock &operator=(const NxsDistancesBlock &other)
+			{
+			Reset();
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			CopyTaxaBlockSurrogateContents(other);
+			CopyDistancesContents(other);
+			return *this;
+			}
+
+		void CopyDistancesContents(const NxsDistancesBlock &other);
+		NxsDistancesBlock * Clone() const
+			{
+			NxsDistancesBlock *d = new NxsDistancesBlock(taxa);
+			*d = *this;
+			return d;
+			}
+		bool 		SwapEquivalentTaxaBlock(NxsTaxaBlockAPI * tb)
+		{
+			return SurrogateSwapEquivalentTaxaBlock(tb);
+		}
+
+	protected:
+		void				WriteFormatCommand(std::ostream &out) const;
+		void				WriteMatrixCommand(std::ostream &out) const;
+
+		void				HandleDimensionsCommand(NxsToken &token);
+		void				HandleFormatCommand(NxsToken &token);
+		void				HandleMatrixCommand(NxsToken &token);
+		bool				HandleNextPass(NxsToken &token, unsigned &offset, std::vector<unsigned> & fileMatrixCmdOrderToTaxInd, std::set<unsigned> & taxIndsRead);
+		virtual void		Read(NxsToken &token);
+
+	private:
+		NxsDistanceDatum & GetCell(unsigned i, unsigned j)
+			{
+			return matrix.at(i).at(j);
+			}
+		const NxsDistanceDatum & GetCell(unsigned i, unsigned j) const
+			{
+			return matrix.at(i).at(j);
+			}
+		typedef std::vector<NxsDistanceDatum> NxsDistanceDatumRow;
+		typedef std::vector<NxsDistanceDatumRow> NxsDistanceDatumMatrix;
+
+		unsigned			expectedNtax;		/* number of taxa (determines dimensions of the matrix) */
+		unsigned			nchar;		/* the number of characters used in generating the pairwise distances */
+
+		bool				diagonal;	/* true if diagonal elements provided when reading in DISTANCES block */
+		bool				interleave;	/* true if interleave format used when reading in DISTANCES block */
+		bool				labels;		/* true if taxon labels were provided when reading in DISTANCES block */
+
+		int					triangle;	/* indicates whether matrix is upper triangular, lower triangular, or rectangular, taking on one of the elements of the NxsDistancesBlockEnum enumeration */
+
+		char				missing;	/* the symbol used to represent missing data (e.g. '?') */
+
+		NxsDistanceDatumMatrix	matrix;	/* the structure used for storing the pairwise distance matrix */
+		friend class PublicNexusReader;
+	};
+
+typedef NxsDistancesBlock	DistancesBlock;
+
+class NxsDistancesBlockFactory
+	:public NxsBlockFactory
+	{
+	public:
+		virtual NxsDistancesBlock  *	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, NxsReader *reader, NxsToken *token);
+	};
+
+inline bool NxsDistancesBlock::IsBoth() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return this->IsRectangular();
+	}
+
+#endif
+
diff --git a/src/ncl/nxsemptyblock.h b/src/ncl/nxsemptyblock.h
new file mode 100644
index 0000000..7b55fec
--- /dev/null
+++ b/src/ncl/nxsemptyblock.h
@@ -0,0 +1,77 @@
+//	Copyright (C) 1999-2002 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library).
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc., 
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#ifndef NCL_NXSEMPTYBLOCK_H
+#define NCL_NXSEMPTYBLOCK_H
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	This is a template that can be used to create a class representing a NEXUS block. Here are the steps to follow if
+|	you wish to create a new block specifically for use with your particular application. Suppose your application is
+|	called Phylome and you want to create a private block called a PHYLOME block that can appear in NEXUS data files
+|	and contains commands for your program.
+|~
+|	o Copy the files nxsemptyblock.h and nxsemptyblock.cpp and rename them (e.g. nxsphylomeblock.h and 
+|	  nxsphylomeblock.cpp)
+|	o In nxsphylomeblock.h and nxsphylomeblock.cpp, replace all instances of EMPTY (case-sensitive, whole word search)
+|	  with PHYLOME
+|	o In nxsphylomeblock.h, replace both instances of NCL_NXSEMPTYBLOCK_H at the top of the file with
+|	  NCL_NXSPHYLOMEBLOCK_H
+|	o In nxsphylomeblock.h and nxsphylomeblock.cpp, replace all instances of NxsEmptyBlock (case-sensitive, whole word
+|	  search) with NxsPhylomeBlock
+|	o Modify the Read function in nxsphylomeblock.cpp to interpret what comes after the BEGIN PHYLOME command in the
+|	  NEXUS data file
+|	o Modify the CharLabelToNumber and TaxonLabelToNumber if you need to read in sets of characters or taxa, 
+|	  respectively. These functions provide a way for NxsSetReader objects to translate character or taxon labels to
+|	  the corresponding numbers. If you do not need these capabilities, then it is safe to just delete these functions
+|	  from nxsphylomeblock.h and nxsphylomeblock.cpp because they are no different that the base class versions
+|	o Modify the SkippingCommand function if you want to notify users when commands within the PHYLOME block are not 
+|	  recognized and are being skipped
+|	o In nxsphylomeblock.h, replace this comment with something meaningful for your class. Start off with something
+|	  like "This class handles reading and storage for the NEXUS block PHYLOME. It overrides the member functions 
+|	  Read and Reset, which are abstract virtual functions in the base class NxsBlock"
+|~
+|	Adding a new data member? Don't forget to:
+|~
+|	o Describe it in the class declaration using a C-style comment. 
+|	o Initialize it (unless it is self-initializing) in the constructor and reinitialize it in the Reset function.
+|	o Describe the initial state in the constructor documentation. 
+|	o Delete memory allocated to it in both the destructor and Reset function. 
+|	o Report it in some way in the Report function. 
+|~
+*/
+class NxsEmptyBlock
+  : public NxsBlock
+	{
+	public:
+
+						NxsEmptyBlock();
+		virtual			~NxsEmptyBlock();
+
+		virtual void	Report(ostream &out);
+
+	protected:
+
+		void			SkippingCommand(NxsString commandName);
+		unsigned		TaxonLabelToNumber(NxsString s);
+		unsigned		CharLabelToNumber(NxsString s);
+		void			HandleEndblock(NxsToken &token);
+		virtual void	Read(NxsToken &token);
+		virtual void	Reset();
+	};
+
+#endif
diff --git a/src/ncl/nxsexception.h b/src/ncl/nxsexception.h
new file mode 100644
index 0000000..ce10c9d
--- /dev/null
+++ b/src/ncl/nxsexception.h
@@ -0,0 +1,97 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSEXCEPTION_H
+#define NCL_NXSEXCEPTION_H
+
+#include "ncl/nxsstring.h"
+
+class NxsToken;
+class ProcessedNxsToken;
+class NxsTokenPosInfo;
+/*!
+	Exception class that conveys a message specific to the problem encountered.
+*/
+class NxsException: public std::exception
+	{
+	public:
+		mutable NxsString	msg;	/* NxsString to hold message */
+		file_pos	pos;	/* current file position */
+		long		line;	/* current line in file */
+		long		col;	/* column of current line */
+		virtual ~NxsException() throw()
+			{
+			}
+
+		NxsException(const std::string & s, file_pos fp = 0, long fl = 0L, long fc = 0L);
+		NxsException(const std::string &s, const NxsToken &t);
+		NxsException(const std::string &s, const ProcessedNxsToken &t);
+		NxsException(const std::string &s, const NxsTokenPosInfo &t);
+		const char * what () const throw ()
+			{
+			return msg.empty() ? "Unknown Nexus Exception" : msg.c_str();
+			}
+		const char * nxs_what () const;
+		void addPositionInfo(const NxsToken & t);
+		void addPositionInfo(const ProcessedNxsToken & t);
+		void addPositionInfo(const NxsTokenPosInfo & t);
+		void addPositionInfo(file_pos fp, long fl, long fc);
+	};
+
+typedef NxsException XNexus;
+
+/*!
+	Thrown when a programming error (a violation of one of the APIs used in NCL) is revealed.
+*/
+class NxsNCLAPIException: public NxsException
+	{
+	public:
+		NxsNCLAPIException(NxsString s) :NxsException(s, 0, -1L,-1L){}
+		NxsNCLAPIException(NxsString s, NxsToken &t) :NxsException(s, t){}
+	};
+
+/*!
+	Thrown when an unimplemented method is called.
+*/
+class NxsUnimplementedException: public NxsNCLAPIException
+	{
+	public:
+		NxsUnimplementedException(NxsString s):NxsNCLAPIException(s){}
+		NxsUnimplementedException(NxsString s, NxsToken &t):NxsNCLAPIException(s,t){}
+	};
+
+
+class DuplicatedLabelNxsException: public NxsException
+	{
+	public:
+		DuplicatedLabelNxsException(const std::string & s):NxsException(s){}
+	};
+
+
+/*------------------------------------------------------------------------------
+ This exception will be thrown if NCL signal handling is activated (static
+	methods in NxsReader control this) and a SIGINT is detected during a
+	parse.
+*/
+class NxsSignalCanceledParseException: public NxsException
+	{
+	public:
+		NxsSignalCanceledParseException(const std::string & s);
+	};
+#endif
diff --git a/src/ncl/nxsindent.h b/src/ncl/nxsindent.h
new file mode 100644
index 0000000..bd5fb64
--- /dev/null
+++ b/src/ncl/nxsindent.h
@@ -0,0 +1,56 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis and Mark T. Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc., 
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSINDENT_H
+#define NCL_NXSINDENT_H
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	Manipulator for use in indenting text `leftMarg' characters.
+*/
+class Indent
+	{
+	public:
+					Indent(unsigned i);
+
+		unsigned	leftMarg;	/* the amount by which to indent */
+	};
+	
+/*----------------------------------------------------------------------------------------------------------------------
+|	Initializes `leftMarg' to `i'.
+*/
+inline Indent::Indent(
+  unsigned i)	/* the amount (in characters) by which to indent */
+	:leftMarg(i)
+	{
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	Output operator for the Indent manipulator.
+*/
+inline ostream &operator <<(
+  ostream &o,		/* the ostream object */
+  const Indent &)	/* the Indent object to be sent to `o' */
+	{
+#if defined (HAVE_PRAGMA_UNUSED)
+#	pragma unused(i)
+#endif
+	return o;
+	}
+
+#endif
diff --git a/src/ncl/nxsmultiformat.h b/src/ncl/nxsmultiformat.h
new file mode 100644
index 0000000..79b50d2
--- /dev/null
+++ b/src/ncl/nxsmultiformat.h
@@ -0,0 +1,260 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis and Mark T. Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSMULTIFORMAT_H
+#define NCL_NXSMULTIFORMAT_H
+#include <iostream>
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxspublicblocks.h"
+class FileToCharBuffer;
+/*!
+	A special class of PublicNexusReader, that can parse
+		\li PHYLIP,
+		\li relaxed PHYLIP,
+		\li FASTA, and
+		\li ALN
+	formatted files in addition to NEXUS.  Non-NEXUS files are parsed and the
+	information from these files is added to the appropriate NxsBlock object.
+	So the parser essentially creates a the normal NCL interface even if the
+	input is not NEXUS
+*/
+class MultiFormatReader: public PublicNexusReader
+{
+	public:
+		/*! enumeration of all of the formats supported by MultiFormatReader
+
+			This enumeration type is used in calls to ReadStream and ReadFilepath
+			so that the reader knows what type of data to expect.
+		*/
+		enum DataFormatType
+			{
+				NEXUS_FORMAT,
+				FASTA_DNA_FORMAT,
+				FASTA_AA_FORMAT,
+				FASTA_RNA_FORMAT,
+				PHYLIP_DNA_FORMAT,
+				PHYLIP_RNA_FORMAT,
+				PHYLIP_AA_FORMAT,
+				PHYLIP_DISC_FORMAT,
+				INTERLEAVED_PHYLIP_DNA_FORMAT,
+				INTERLEAVED_PHYLIP_RNA_FORMAT,
+				INTERLEAVED_PHYLIP_AA_FORMAT,
+				INTERLEAVED_PHYLIP_DISC_FORMAT,
+				RELAXED_PHYLIP_DNA_FORMAT,
+				RELAXED_PHYLIP_RNA_FORMAT,
+				RELAXED_PHYLIP_AA_FORMAT,
+				RELAXED_PHYLIP_DISC_FORMAT,
+				INTERLEAVED_RELAXED_PHYLIP_DNA_FORMAT,
+				INTERLEAVED_RELAXED_PHYLIP_RNA_FORMAT,
+				INTERLEAVED_RELAXED_PHYLIP_AA_FORMAT,
+				INTERLEAVED_RELAXED_PHYLIP_DISC_FORMAT,
+				ALN_DNA_FORMAT,
+				ALN_RNA_FORMAT,
+				ALN_AA_FORMAT,
+				PHYLIP_TREE_FORMAT,
+				RELAXED_PHYLIP_TREE_FORMAT,
+				NEXML_FORMAT,
+				FIN_DNA_FORMAT,
+				FIN_AA_FORMAT,
+				FIN_RNA_FORMAT,
+				UNSUPPORTED_FORMAT // keep this last
+			};
+
+
+        void SetCoerceUnderscoresToSpaces(bool v) 
+            {
+            this->coerceUnderscoresToSpaces = v;
+            }
+
+        bool GetCoerceUnderscoresToSpaces() const
+            {
+            return this->coerceUnderscoresToSpaces;
+            }
+		
+		/*! \returns a vector with the "official" format names that can be used with formatNameToCode
+
+		Currently this list is:  {"nexus", "dnafasta", "aafasta", "rnafasta", "dnaphylip", "rnaphylip", "aaphylip", "discretephylip", "dnaphylipinterleaved", "rnaphylipinterleaved", "aaphylipinterleaved", "discretephylipinterleaved", "dnarelaxedphylip", "rnarelaxedphylip", "aarelaxedphylip", "discreterelaxedphylip", "dnarelaxedphylipinterleaved", "rnarelaxedphylipinterleaved", "aarelaxedphylipinterleaved", "discreterelaxedphylipinterleaved", "dnaaln", "rnaaln", "aaaln", "phyliptree", "relaxedp [...]
+
+		*/
+		static std::vector<std::string> getFormatNames();
+		/*! Converts a string such as "nexus" to the corresponding facet of the DataForamType enum.
+
+			Format names are not case sensitive
+		*/
+		static DataFormatType formatNameToCode(const std::string &);
+
+
+		/*!	Creates a new MultiFormatReader
+			\arg blocksToRead -1 indicates that every block type should be read.
+				alternatively, the caller can OR-together bits of the PublicNexusReader::NexusBlocksToRead enum
+				to indicate which blocks should be processed.
+			\arg mode should be a facet of the NxsReader::WarningHandlingMode enum
+				that indicates where warning messages should be directed.
+		*/
+		MultiFormatReader(const int blocksToRead = -1, NxsReader::WarningHandlingMode mode=NxsReader::WARNINGS_TO_STDERR)
+			:PublicNexusReader(blocksToRead, mode),
+			coerceUnderscoresToSpaces(false)
+			{}
+		virtual ~MultiFormatReader(){}
+		/*! Read the specified format
+			\arg inp the input stream
+			\arg formatName the "official" format name (list of legal choices is available from getFormatNames())
+		*/
+		void ReadStream(std::istream & inp, const char * formatName);
+		/*! Read the specified format
+			\arg inp the input stream
+			\arg format a facet of DataFormatType indicating the file format
+		*/
+		void ReadStream(std::istream & inp, DataFormatType format, const char * filepath=0L);
+
+		/*! Read a file of the specified format
+			\arg filepath the file path to open and read
+			\arg formatName the "official" format name (list of legal choices is available from getFormatNames())
+		*/
+		void ReadFilepath(const char * filepath, const char * formatName);
+		/*! Read a file of the specified format
+			\arg filepath the file path to open and read
+			\arg format a facet of DataFormatType indicating the file format
+		*/
+		void ReadFilepath(const char * filepath, DataFormatType format);
+
+		/*! A convenience function for reading FASTA files
+			\arg inf the input stream to read
+			\arg dt a facet of  NxsCharactersBlock::DataTypesEnum that indicates the expected datatype
+		*/
+		void readFastaFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt);
+
+	private:
+		void addTaxaNames(const std::list<std::string> & taxaName, NxsTaxaBlockAPI * taxa);
+		void moveDataToDataBlock(const std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, const unsigned nchar, NxsDataBlock * dataB);
+		void moveDataToMatrix(std::list<NxsDiscreteStateRow> & matList,  NxsDiscreteStateMatrix &mat);
+		void moveDataToUnalignedBlock(const std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, NxsUnalignedBlock * uB);
+		bool readFastaSequences(FileToCharBuffer & ftcb, const NxsDiscreteDatatypeMapper &dm, std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, size_t & longest);
+		bool readFinSequences(FileToCharBuffer & ftcb, NxsDiscreteDatatypeMapper &dm, std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, size_t & longest);
+		void readPhylipFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt, bool relaxedNames, bool interleaved);
+		void readPhylipTreeFile(std::istream & inf, bool relaxedNames);
+		void readAlnFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt);
+		bool readAlnData(FileToCharBuffer & ftcb, const NxsDiscreteDatatypeMapper &dm, std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList);
+
+		unsigned readPhylipHeader(std::istream & inf, unsigned & ntax, unsigned & nchar);
+		void readPhylipData(FileToCharBuffer & ftcb, const NxsDiscreteDatatypeMapper &dm, std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, const unsigned n_taxa, const unsigned n_char, bool relaxedNames);
+		void readInterleavedPhylipData(FileToCharBuffer & ftcb, const NxsDiscreteDatatypeMapper &dm, std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, const unsigned n_taxa, const unsigned n_char, bool relaxedNames);
+		std::string readPhylipName(FileToCharBuffer & ftcb, unsigned i, bool relaxedNames);
+
+		/*! A convenience function for reading .fin files
+			\arg inf the input stream to read
+			\arg dt a facet of  NxsCharactersBlock::DataTypesEnum that indicates the expected datatype
+		*/
+		void readFinFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt);
+		
+		bool coerceUnderscoresToSpaces;
+
+};
+
+/*! \enum MultiFormatReader::DataFormatType
+An enumeration of all of the formats supported by MultiFormatReader
+
+This enumeration type is used in calls to ReadStream and ReadFilepath
+so that the reader knows what type of data to expect.
+*/
+/*! var MultiFormatReader::NEXUS_FORMAT
+ read any NCL supported NEXUS block
+*/
+/*! var MultiFormatReader::FASTA_DNA_FORMAT
+ DNA sequence data in FASTA format
+*/
+/*! var MultiFormatReader::FASTA_AA_FORMAT
+ amino acid sequence data in FASTA format
+*/
+/*! var MultiFormatReader::FASTA_RNA_FORMAT
+ RNA sequence data in FASTA format
+*/
+/*! var MultiFormatReader::PHYLIP_DNA_FORMAT
+ DNA sequence data in non-interleaved PHYLIP format
+*/
+/*! var MultiFormatReader::PHYLIP_RNA_FORMAT
+ RNA sequence data in non-interleaved PHYLIP format
+*/
+/*! var MultiFormatReader::PHYLIP_AA_FORMAT
+ amino acid sequence data in non-interleaved PHYLIP format
+*/
+/*! var MultiFormatReader::PHYLIP_DISC_FORMAT
+ Discrete data (like the NEXUS "standard" format) in non-interleaved PHYLIP format
+*/
+/*! var MultiFormatReader::INTERLEAVED_PHYLIP_DNA_FORMAT
+ DNA sequence data in interleaved PHYLIP format
+*/
+/*! var MultiFormatReader::INTERLEAVED_PHYLIP_RNA_FORMAT
+ RNA sequence data in interleaved PHYLIP format
+*/
+/*! var MultiFormatReader::INTERLEAVED_PHYLIP_AA_FORMAT
+ amino acid sequence data in interleaved PHYLIP format
+*/
+/*! var MultiFormatReader::INTERLEAVED_PHYLIP_DISC_FORMAT
+ Discrete data (like the NEXUS "standard" format) data in interleaved PHYLIP format
+*/
+/*! var MultiFormatReader::RELAXED_PHYLIP_DNA_FORMAT
+ DNA sequence data in non-interleaved relaxed PHYLIP format
+*/
+/*! var MultiFormatReader::RELAXED_PHYLIP_RNA_FORMAT
+ RNA sequence data in non-interleaved relaxed PHYLIP format
+*/
+/*! var MultiFormatReader::RELAXED_PHYLIP_AA_FORMAT
+ amino acid sequence data in non-interleaved relaxed PHYLIP format
+*/
+/*! var MultiFormatReader::RELAXED_PHYLIP_DISC_FORMAT
+ Discrete data (like the NEXUS "standard" format) data in non-interleaved relaxed PHYLIP format
+*/
+/*! var MultiFormatReader::INTERLEAVED_RELAXED_PHYLIP_DNA_FORMAT
+ DNA sequence data in interleaved relaxed PHYLIP format
+*/
+/*! var MultiFormatReader::INTERLEAVED_RELAXED_PHYLIP_RNA_FORMAT
+ RNA sequence data in interleaved relaxed PHYLIP format
+*/
+/*! var MultiFormatReader::INTERLEAVED_RELAXED_PHYLIP_AA_FORMAT
+ Amino acid sequence data in interleaved relaxed PHYLIP format
+*/
+/*! var MultiFormatReader::INTERLEAVED_RELAXED_PHYLIP_DISC_FORMAT
+ Discrete data (like the NEXUS "standard" format) data in interleaved relaxed PHYLIP format
+*/
+/*! var MultiFormatReader::ALN_DNA_FORMAT
+ DNA sequence data in ALN format
+*/
+/*! var MultiFormatReader::ALN_RNA_FORMAT
+ RNA sequence data in ALN format
+*/
+/*! var MultiFormatReader::ALN_AA_FORMAT
+ Amino acid sequence data in ALN format
+*/
+/*! var MultiFormatReader::PHYLIP_TREE_FORMAT
+ Trees in NEWICK (PHYLIP) format
+*/
+/*! var MultiFormatReader::RELAXED_PHYLIP_TREE_FORMAT
+ Trees in NEWICK  format with relaxed phylip names
+*/
+/*! var MultiFormatReader::NEXML_FORMAT
+ NEXML formatted file currently unsupported, but support is planned
+*/
+/*! var MultiFormatReader::UNSUPPORTED_FORMAT
+For NCL internal use only ( to mark the end of the DataFormatType enum).
+*/
+
+#endif
+
diff --git a/src/ncl/nxspublicblocks.h b/src/ncl/nxspublicblocks.h
new file mode 100644
index 0000000..9970456
--- /dev/null
+++ b/src/ncl/nxspublicblocks.h
@@ -0,0 +1,731 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#ifndef NCL_NXSPUBLICBLOCKS_H
+#define NCL_NXSPUBLICBLOCKS_H
+
+#include <vector>
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsblock.h"
+#include "ncl/nxsassumptionsblock.h"
+#include "ncl/nxscharactersblock.h"
+#include "ncl/nxsdatablock.h"
+#include "ncl/nxsdistancesblock.h"
+#include "ncl/nxstaxablock.h"
+#include "ncl/nxstaxaassociationblock.h"
+#include "ncl/nxstreesblock.h"
+#include "ncl/nxsunalignedblock.h"
+#include "ncl/nxsreader.h"
+
+class NxsStoreTokensBlockReader
+	: public NxsBlock
+	{
+	public:
+		/*---------------------------------------------------------------------------------------
+		| If the blockname is empty then, any block will be read by the instance
+		*/
+		NxsStoreTokensBlockReader(std::string blockName, bool storeTokenInfo)
+			:storeAllTokenInfo(storeTokenInfo),
+			tolerateEOFInBlock(false)
+			{
+			NCL_BLOCKTYPE_ATTR_NAME = NxsString(blockName.c_str());
+			}
+		void Read(NxsToken &token);
+		void Reset();
+		void Report(std::ostream &out) NCL_COULD_BE_CONST  /*v2.1to2.2 1 */
+			{
+			ReportConst(out);
+			}
+		void WriteAsNexus(std::ostream &out) const;
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, assumptionsBlock blocks!
+		*/
+		NxsStoreTokensBlockReader & operator=(const NxsStoreTokensBlockReader &other)
+			{
+			Reset();
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			commandsRead = other.commandsRead;
+			justTokens = other.justTokens;
+			storeAllTokenInfo = other.storeAllTokenInfo;
+			tolerateEOFInBlock = other.tolerateEOFInBlock;
+			return *this;
+			}
+
+		NxsStoreTokensBlockReader * Clone() const
+			{
+			NxsStoreTokensBlockReader * b = new NxsStoreTokensBlockReader(NCL_BLOCKTYPE_ATTR_NAME, storeAllTokenInfo);
+			*b = *this;
+			return b;
+			}
+		/*! \ref BlockTypeIDDiscussion */
+		virtual bool CanReadBlockType(const NxsToken & token)
+			{
+			if (NCL_BLOCKTYPE_ATTR_NAME.length() == 0)
+				{
+				NCL_BLOCKTYPE_ATTR_NAME.assign(token.GetTokenReference().c_str());
+				NCL_BLOCKTYPE_ATTR_NAME.ToUpper();
+				return true;
+				}
+			return token.Equals(NCL_BLOCKTYPE_ATTR_NAME);
+			}
+		virtual bool TolerateEOFInBlock() const
+			{
+			return tolerateEOFInBlock; /*  */
+			}
+		void SetTolerateEOFInBlock(bool v)
+			{
+			tolerateEOFInBlock = v;
+			}
+		const std::list<ProcessedNxsCommand> & GetCommands() const
+			{
+			return commandsRead;
+			}
+	protected:
+		void ReadCommand(NxsToken &token);
+		void ReportConst(std::ostream &out) const;
+
+		typedef std::vector<std::string> VecString;
+		typedef std::list<VecString> ListVecString;
+
+
+		std::list<ProcessedNxsCommand> commandsRead;
+		ListVecString justTokens;
+		bool storeAllTokenInfo;
+		bool tolerateEOFInBlock;
+	};
+/*!
+ 	A factory class that delegates calls to the other "default" public block parsers that NCL provides.
+
+	Provided as a convenience class to make it possible to read all supported blocks with the addition of one factory
+		to the NxsReader.
+
+
+*/
+class NxsDefaultPublicBlockFactory
+	: public NxsBlockFactory
+	{
+	public:
+		/**----------------------------------------------------------------------------------------------------------------------
+		|	Constructor takes two booleans.
+		|	If readUnknownBlocks is "true" then a NxsStoreTokensBlockReader will be spawned for every unknown block.
+		|	storeTokenInfo is passed to the NxsStoreTokensBlockReader constructor (true for storage of full token info - such as
+		|		file position.
+		*/
+		NxsDefaultPublicBlockFactory(bool readUnknownBlocks, bool storeTokenInfo)
+			:tokenizeUnknownBlocks(readUnknownBlocks),
+			storeTokenInfoArg(storeTokenInfo)
+			{}
+		virtual NxsBlock  *	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, NxsReader *reader, NxsToken *token);
+
+	protected:
+		NxsAssumptionsBlockFactory assumpBlockFact;
+		NxsCharactersBlockFactory charBlockFact;
+		NxsDataBlockFactory dataBlockFact;
+		NxsDistancesBlockFactory distancesBlockFact;
+		NxsTaxaBlockFactory taxaBlockFact;
+		NxsTaxaAssociationBlockFactory taxaAssociationBlockFact;
+		NxsTreesBlockFactory treesBlockFact;
+		NxsUnalignedBlockFactory unalignedBlockFact;
+
+		bool tokenizeUnknownBlocks;
+		bool storeTokenInfoArg;
+	};
+
+
+/*!
+ 	A factory class that takes examplar that will be cloned to read each block.
+
+	To use this factory you MUST overload NxsBlock::Clone() for class that you would like to use to parse blocks
+*/
+class NxsCloneBlockFactory
+	: public NxsBlockFactory
+	{
+	public:
+		NxsCloneBlockFactory()
+			:defPrototype(NULL)
+			{}
+		/*! \returns a new NxsBlock instance (or NULL) to read the NEXUS content
+		in a block of name `NCL_BLOCKTYPE_ATTR_NAME`.
+
+			This function is called by the NxsReader during the parse if no
+			NxsBlock instances for this block ID type were added to the reader.
+		*/
+		virtual NxsBlock  *	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, /*!< The block ID \ref BlockTypeIDDiscussion */
+								NxsReader *, /*!< pointer to the NxsReader that is conducting the parse */
+								NxsToken *) /*!< pointer to the current NxsToken object that wraps the istream (this function should not advance the token) */
+			{
+			std::string b(NCL_BLOCKTYPE_ATTR_NAME.c_str());
+			NxsString::to_upper(b);
+			std::map<std::string , const NxsBlock *>::const_iterator pIt = prototypes.find(b);
+			if (pIt == prototypes.end())
+				return (defPrototype ? defPrototype->Clone() : NULL);
+			return pIt->second->Clone();
+			}
+
+		/*! Registers a block instance to be used whenever an unknown block (any
+		block with an ID that does not correspond with any of the registered blocks)
+		is encountered in a file.
+		*/
+		bool AddDefaultPrototype(const NxsBlock * exemplar)
+			{
+			bool replaced = defPrototype != NULL;
+			defPrototype = exemplar;
+			return replaced;
+			}
+		/*! Registers the block instance passed in as a template to clone a block reader
+			whenever a block with the name `blockName` is encountered.
+		*/
+		bool AddPrototype(const NxsBlock * exemplar, /*!< The block to be cloned */
+						  const char * blockName = NULL) /*!< The block ID \ref BlockTypeIDDiscussion */
+			{
+			std::string b;
+			if (blockName)
+				b.assign(blockName);
+			else
+				{
+				if (exemplar == NULL)
+					return false;
+				NxsString bId  = exemplar->GetID();
+				b.assign(bId.c_str());
+				}
+			NxsString::to_upper(b);
+			bool replaced = prototypes.find(b) != prototypes.end();
+			prototypes[b] = exemplar;
+			return replaced;
+			}
+
+	protected:
+		std::map<std::string , const NxsBlock *> prototypes;
+		const NxsBlock * defPrototype;
+	};
+
+
+
+typedef std::pair<std::string, std::string> NxsNameToNameTrans;
+
+/*! hacky (home-spun)  writing of XML attributes */
+void writeAttributeValue(std::ostream & out, const std::string & v);
+
+/*! This class is used internally to keep track of operations that may be needed
+to make taxon labels from different sources avoid clashing with each other.
+*/
+class NxsConversionOutputRecord
+	{
+	public:
+
+		NxsConversionOutputRecord()
+			:addNumbersToDisambiguateNames(false),
+			writeNameTranslationFile(true),
+			translationFilename("NameTranslationFile"),
+			numberTranslationFiles(true),
+			verboseWritingOfNameTranslationFile(true)
+			{}
+
+		void writeNameTranslation(std::vector<NxsNameToNameTrans>, const NxsTaxaBlockAPI * );
+
+		static std::string getUniqueFilenameWithLowestIndex(const char * prefix);
+		static void writeTaxonNameTranslationFilepath(const char * fn, const std::vector<NxsNameToNameTrans> & nameTrans, const NxsTaxaBlockAPI *, bool verbose=false);
+		static void writeTaxonNameTranslationStream(std::ostream & fn, const std::vector<NxsNameToNameTrans> & nameTrans, const NxsTaxaBlockAPI *);
+
+		//The following set of members were added to deal with name clashes that
+		//	are legal (but a very bad idea) in phylip.  If names are auto-translated
+		//	to unique names (by the addition of numbers, then it is very useful to print out a file
+		//	listing the translations.
+		bool addNumbersToDisambiguateNames; // if true, then taxon names may be altered on reading to  make them unique
+		bool writeNameTranslationFile; // if true, and taxon names are modified, then a translation file will be written
+		std::string translationFilename; // if writeNameTranslationFile is used, then this will be the file name or prefix
+		bool numberTranslationFiles; // if true, then translationFilename will serve as a prefix and the real filename may contain a number to make it unique.
+		bool verboseWritingOfNameTranslationFile; // if true, then writing a translationFilename will trigger a message to std::cerr
+		std::map<const NxsTaxaBlockAPI *, std::string> taxaBlocksToConversionFiles;
+	};
+
+
+/*!
+A NxsReader that uses clone factories to read public blocks.
+
+The blocks created by reading a file MUST BE DELETED by the caller (either by a
+	call to DeleteBlocksFromFactories() or by requesting each pointer to a block
+	and then deleting the blocks).
+
+Blocks are created by cloning a template block. If you would like to alter
+	the default behavior of a block, you can request a reference to the
+	"template" NxsBlock of the appropriate type, modify it, and then parse the file.
+
+You may give the reader "context" programatically by adding "Read" blocks (which
+	will mimic the behavior of those blocks having appeared in the file itself.
+
+Commands in Non-public blocks are dealt with by creating a NxsStoreTokensBlockReader
+	to store the commands.
+
+After parsing, the client can request the number of TAXA blocks read, and the
+	number of CHARACTERS, TREES, ... blocks that refer to a particular taxa
+	block.
+
+
+
+NOT COPYABLE
+*/
+class PublicNexusReader: public ExceptionRaisingNxsReader
+	{
+	public:
+
+		static BlockReaderList parseFileOrThrow(const char *filepath,
+												NxsReader::WarningHandlingMode mode = NxsReader::WARNINGS_TO_STDERR,
+												bool parsePrivateBlocks=true,
+												bool storeTokenInfo=true);
+
+		/*! Enumeration of bits used that can be "ORed" together to create an argument for
+			PublicNexusReader instance that will only read certain NEXUS blocks
+		*/
+		enum NexusBlocksToRead
+		{
+			NEXUS_TAXA_BLOCK_BIT = 0x01, /// Flags TAXA blocks as a type to be read
+			NEXUS_TREES_BLOCK_BIT = 0x02, /// Flags TREES blocks as a type to be read
+			NEXUS_CHARACTERS_BLOCK_BIT = 0x04, /// Flags CHARACTERS and DATA blocks as types to be read
+			NEXUS_ASSUMPTIONS_BLOCK_BIT = 0x08, /// Flags ASSUMPTIONS blocks as a type to be read
+			NEXUS_SETS_BLOCK_BIT = 0x10, /// Flags SETS blocks as a type to be read
+			NEXUS_UNALIGNED_BLOCK_BIT = 0x20, /// Flags UNALIGNED blocks as a type to be read
+			NEXUS_DISTANCES_BLOCK_BIT = 0x40, /// Flags DISTANCES blocks as a type to be read
+			NEXUS_TAXAASSOCIATION_BLOCK_BIT = 0x80, /// Flags TAXAASSOCIATION blocks to be read
+			NEXUS_UNKNOWN_BLOCK_BIT = 0x100 /// to be used internally
+		};
+
+		/*!	Creates a new PublicNexusReader
+			\arg blocksToRead -1 indicates that every block type should be read.
+				alternatively, the caller can OR-together bits of the NexusBlocksToRead enum
+				to indicate which blocks should be processed.
+			\arg mode should be a facet of the NxsReader::WarningHandlingMode enum
+				that indicates where warning messages should be directed.
+		*/
+		PublicNexusReader(const int blocksToRead = -1, NxsReader::WarningHandlingMode mode=NxsReader::WARNINGS_TO_STDERR);
+		virtual ~PublicNexusReader();
+
+		virtual void	Execute(NxsToken& token, bool notifyStartStop = true);
+		std::string GetErrorMessage()
+			{
+			return errorMsg;
+			}
+
+		/*! \arg a vector of taxon names.
+		 	\returns a new taxa block for the these taxa. This taxa block will also
+		 	be stored in the reader so that future files can refer to these taxa.
+
+		 	This function is useful if you want to programmatically create
+		 	a NEXUS TAXA block and have the reader treat it in the same way as
+		 	if the NxsTaxaBlock were read from a file
+		*/
+		NxsTaxaBlock * RegisterTaxa(const std::vector<std::string> & tl);
+
+		/*!	\returns a pointer to the template for the NxsAssumptionsBlock.
+			This object will be cloned whenever an ASSUMPTIONS or SETS block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every NxsAssumptionsBlock created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsAssumptionsBlock * GetAssumptionsBlockTemplate() {return assumptionsBlockTemplate;}
+		/*!	\returns a pointer to the template for the NxsCharactersBlock.
+			This object will be cloned whenever a CHARACTERS block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every NxsCharactersBlock created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsCharactersBlock * GetCharactersBlockTemplate() {return charactersBlockTemplate;}
+		/*!	\returns a pointer to the template for the NxsDataBlock.
+			This object will be cloned whenever a DATA block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every NxsDataBlock created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsDataBlock * GetDataBlockTemplate() {return dataBlockTemplate;}
+		/*!	\returns a pointer to the template for the NxsDistancesBlock.
+			This object will be cloned whenever a DISTANCES block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every NxsDistancesBlock created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsDistancesBlock * GetDistancesBlockTemplate() {return distancesBlockTemplate;}
+		/*!	\returns a pointer to the template for the NxsTaxaBlock.
+			This object will be cloned whenever a TAXA block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every NxsTaxaBlock created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsTaxaBlock * GetTaxaBlockTemplate() {return taxaBlockTemplate;}
+		/*!	\returns a pointer to the template for the NxsTaxaAssociationBlock.
+			This object will be cloned whenever a TAXAASSOCIATION block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every NxsTaxaBlock created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsTaxaAssociationBlock * GetTaxaAssociationBlockTemplate() {return taxaAssociationBlockTemplate;}
+		/*!	\returns a pointer to the template for the NxsTreesBlock.
+			This object will be cloned whenever a TREES block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every NxsTreesBlock created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsTreesBlock * GetTreesBlockTemplate() {return treesBlockTemplate;}
+		/*!	\returns a pointer to the template for the GetUnalignedBlockTemplate.
+			This object will be cloned whenever an UNALIGNED block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every GetUnalignedBlockTemplate created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsUnalignedBlock * GetUnalignedBlockTemplate() {return unalignedBlockTemplate;}
+		/*!	\returns a pointer to the template for the NxsStoreTokensBlockReader.
+			This object will be cloned whenever an unknown block is encountered,
+			so modifying the default behavior of this instance will change the behavior
+			of every NxsStoreTokensBlockReader created by the reader.
+
+			Do NOT DELETE the template! (the client has to delete all spawned blocks, but
+			the PublicNexusReader deletes its own templates)
+		*/
+		NxsStoreTokensBlockReader * GetUnknownBlockTemplate() const {return storerBlockTemplate;}
+
+		/*! \returns the number of NxsAssumptionsBlock objects created during the parse which
+				refer to the taxa block `taxa`
+			If `taxa` is 0L, then the total number of Assumptions blocks will be returned
+		*/
+		unsigned GetNumAssumptionsBlocks(const NxsTaxaBlock *taxa) const;
+		/*! \returns the number of NxsAssumptionsBlock objects created during the parse which
+				refer to the characters block `chars`
+			If `chars` is 0L, then the total number of Assumptions blocks will be returned
+		*/
+		unsigned GetNumAssumptionsBlocks(const NxsCharactersBlock *chars) const;
+		/*! \returns the number of NxsAssumptionsBlock objects created during the parse which
+				refer to the trees block `trees`
+			If `trees` is 0L, then the total number of Assumptions blocks will be returned
+		*/
+		unsigned GetNumAssumptionsBlocks(const NxsTreesBlock *trees) const;
+		/*! \returns a pointer to the NxsCharactersBlock with index
+			Indexing starts at 0 and refers to the index in a list of NxsCharactersBlock
+			objects that refer to the NxsTaxaBlock `taxa`.  Thus, the index does not
+			necessarily represent the position among ALL of the NxsDistancesBlock objects
+
+			0L will be returned if the index is out of range. Indices should be <
+				the number returned by GetNumCharactersBlocks(taxa).
+			If `taxa` is 0L, then the total block indexing scheme will refer to the
+				total number of Assumptions blocks read.
+		*/
+		NxsAssumptionsBlock * GetAssumptionsBlock(const NxsTaxaBlock *taxa, unsigned index) const;
+		NxsAssumptionsBlock * GetAssumptionsBlock(const NxsCharactersBlock *taxa, unsigned index) const;
+		NxsAssumptionsBlock * GetAssumptionsBlock(const NxsTreesBlock *taxa, unsigned index) const;
+
+		/*! \returns the number of NxsStoreTokensBlockReader objects created during the parse.
+		*/
+		unsigned GetNumUnknownBlocks() const;
+		/*! \returns a pointer to the NxsTaxaBlock with index (indexing starts at 0).
+				0L will be returned if the index is out of range. index should be < the
+				number returned by GetNumUnknownBlocks();
+		*/
+		NxsStoreTokensBlockReader * GetUnknownBlock(unsigned index) const;
+
+		/*! \returns the number of NxsCharactersBlock objects created during the parse which
+				refer to the taxa in `taxa`
+			If `taxa` is 0L, then the total number of Characters blocks will be returned
+		*/
+		unsigned GetNumCharactersBlocks(const NxsTaxaBlock *taxa) const;
+		/*! \returns a pointer to the NxsCharactersBlock with index
+			Indexing starts at 0 and refers to the index in a list of NxsCharactersBlock
+			objects that refer to the NxsTaxaBlock `taxa`.  Thus, the index does not
+			necessarily represent the position among ALL of the NxsDistancesBlock objects
+
+			0L will be returned if the index is out of range. Indices should be <
+				the number returned by GetNumCharactersBlocks(taxa).
+			If `taxa` is 0L, then the total block indexing scheme will refer to the
+				total number of Characters blocks read.
+		*/
+		NxsCharactersBlock * GetCharactersBlock(const NxsTaxaBlock *taxa, unsigned index) const;
+
+		/*! \returns the number of NxsDistancesBlock objects created during the parse which
+				refer to the taxa in `taxa`
+			If `taxa` is 0L, then the total number of Distances blocks will be returned
+		*/
+		unsigned GetNumDistancesBlocks(const NxsTaxaBlock *taxa) const;
+		/*! \returns a pointer to the NxsDistancesBlock with index
+			Indexing starts at 0 and refers to the index in a list of NxsDistancesBlock
+			objects that refer to the NxsTaxaBlock `taxa`.  Thus, the index does not
+			necessarily represent the position among ALL of the NxsDistancesBlock objects
+
+			0L will be returned if the index is out of range. Indices should be <
+				the number returned by GetNumDistancesBlocks(taxa).
+			If `taxa` is 0L, then the total block indexing scheme will refer to the
+				total number of Distances blocks read.
+		*/
+		NxsDistancesBlock * GetDistancesBlock(const NxsTaxaBlock *taxa, unsigned index) const;
+
+		/*! \returns the number of NxsTaxaBlock objects created during the parse (some
+		 	of these objects may be "implied" by another block, but the client
+		 	code can treat them as if they occurred in the file explicitly).
+		*/
+		unsigned GetNumTaxaBlocks() const;
+		/*! \returns a pointer to the NxsTaxaBlock with index (indexing starts at 0).
+				0L will be returned if the index is out of range.
+		*/
+		NxsTaxaBlock * GetTaxaBlock(unsigned index) const;
+
+		/*! \returns the number of NxsTaxaAssociationBlock objects created during the parse which
+				refer to the taxa as their first of second taxa block
+			If `taxa` is 0L, then the total number of Trees blocks will be returned
+		*/
+		unsigned GetNumTaxaAssociationBlocks(const NxsTaxaBlock *taxa) const;
+		/*! \returns a pointer to the NxsTaxaAssociationBlock with index
+			Indexing starts at 0 and refers to the index in a list of NxsTaxaAssociationBlock
+			objects that refer to the NxsTaxaBlock `taxa`.  Thus, the index does not
+			necessarily represent the position among ALL of the NxsTaxaAssociationBlock objects
+
+			0L will be returned if the index is out of range. Indices should be <
+				the number returned by GetNumTreesBlocks(taxa).
+			If `taxa` is 0L, then the total block indexing scheme will refer to the
+				total number of Trees blocks read.
+		*/
+		NxsTaxaAssociationBlock * GetTaxaAssociationBlock(const NxsTaxaBlock *taxa, unsigned index) const;
+
+		/*! \returns the number of NxsTreesBlock objects created during the parse which
+				refer to the taxa in `taxa`
+			If `taxa` is 0L, then the total number of Trees blocks will be returned
+		*/
+		unsigned GetNumTreesBlocks(const NxsTaxaBlock *taxa) const;
+		/*! \returns a pointer to the NxsTreesBlock with index
+			Indexing starts at 0 and refers to the index in a list of NxsTreesBlock
+			objects that refer to the NxsTaxaBlock `taxa`.  Thus, the index does not
+			necessarily represent the position among ALL of the NxsTreesBlock objects
+
+			0L will be returned if the index is out of range. Indices should be <
+				the number returned by GetNumTreesBlocks(taxa).
+			If `taxa` is 0L, then the total block indexing scheme will refer to the
+				total number of Trees blocks read.
+		*/
+		NxsTreesBlock * GetTreesBlock(const NxsTaxaBlock *taxa, unsigned index) const;
+
+		/*! \returns the number of NxsUnalignedBlock objects created during the parse which
+				refer to the taxa in `taxa`
+			If `taxa` is 0L, then the total number of Unaligned blocks will be returned
+		*/
+		unsigned GetNumUnalignedBlocks(const NxsTaxaBlock *taxa) const;
+		/*! \returns a pointer to the NxsUnalignedBlock with index
+			Indexing starts at 0 and refers to the index in a list of NxsUnalignedBlock
+			objects that refer to the NxsTaxaBlock `taxa`.  Thus, the index does not
+			necessarily represent the position among ALL of the NxsUnalignedBlock objects
+
+			0L will be returned if the index is out of range. Indices should be <
+				the number returned by GetNumUnalignedBlocks(taxa).
+			If `taxa` is 0L, then the total block indexing scheme will refer to the
+				total number of Unaligned blocks read.
+		*/
+		NxsUnalignedBlock * GetUnalignedBlock(const NxsTaxaBlock *taxa, unsigned index) const;
+
+
+		/*! Deletes all of the blocks that were spawned during the parse.
+			\warning Do not call this function if you still retain references
+			  to the spawned objects.
+		*/
+		virtual void DeleteBlocksFromFactories()
+			{
+			NxsReader::DeleteBlocksFromFactories();
+			ClearUsedBlockList();
+			}
+		/*! Mainly used internally.
+			\ref ClearContent() which is probably the function that you want
+			Removes the record of the order of blocks encountered during the
+				last parse.
+
+		*/
+		virtual void ClearUsedBlockList();
+		/*! Removes all references to blocks spawned during the parse, but does
+			NOT delete them.
+			Call this function if you want to reuse the parser and you want to
+
+			delete all of the spawned blocks yourself (after calling
+			ClearUsedBlockList() the DeleteBlocksFromFactories() function
+			will not delete blocks)
+		*/
+		virtual void ClearContent()
+			{
+			assumptionsBlockVec.clear();
+			charactersBlockVec.clear();
+			dataBlockVec.clear();
+			distancesBlockVec.clear();
+			storerBlockVec.clear();
+			taxaBlockVec.clear();
+			taxaAssociationBlockVec.clear();
+			treesBlockVec.clear();
+			unalignedBlockVec.clear();
+			ExceptionRaisingNxsReader::ClearContent();
+			}
+
+		/*! Adds (or "registers") a NxsAssumptionsBlock with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadAssumptionsBlock(NxsAssumptionsBlock * block)
+			{
+			assumptionsBlockVec.push_back(block);
+			AddReadBlock("ASSUMPTIONS", block);
+			}
+		/*! Adds (or "registers") a NxsCharactersBlock with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadCharactersBlock(NxsCharactersBlock * block)
+			{
+			charactersBlockVec.push_back(block);
+			AddReadBlock("CHARACTERS", block);
+			}
+		/*! Adds (or "registers") a NxsDataBlock with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadDataBlock(NxsDataBlock * block)
+			{
+			dataBlockVec.push_back(block);
+			AddReadBlock("CHARACTERS", block);
+			}
+		/*! Adds (or "registers") a NxsDistancesBlock with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadDistancesBlock(NxsDistancesBlock * block)
+			{
+			distancesBlockVec.push_back(block);
+			AddReadBlock("DISTANCES", block);
+			}
+		/*! Adds (or "registers") a NxsTaxaBlock with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadTaxaBlock(NxsTaxaBlock * block)
+			{
+			taxaBlockVec.push_back(block);
+			AddReadBlock("TAXA", block);
+			}
+		/*! Adds (or "registers") a NxsTaxaAssociationBlock with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadTaxaAssociationBlock(NxsTaxaAssociationBlock * block)
+			{
+			taxaAssociationBlockVec.push_back(block);
+			AddReadBlock("TAXAASSOCIATION", block);
+			}
+		/*! Adds (or "registers") a NxsTreesBlock with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadTreesBlock(NxsTreesBlock * block)
+			{
+			treesBlockVec.push_back(block);
+			AddReadBlock("TREES", block);
+			}
+		/*! Adds (or "registers") a NxsUnalignedBlock with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadUnalignedBlock(NxsUnalignedBlock * block)
+			{
+			unalignedBlockVec.push_back(block);
+			AddReadBlock("UNKNOWN", block);
+			}
+		/*! Adds (or "registers") a NxsStoreTokensBlockReader with the reader. Can be
+			useful if you:
+				-# obtain references to blocks you want to keep,
+				-# call ClearContent()
+				-# add back the block instances that provide necessary context for additional parses.
+		*/
+		void AddReadUnknownBlock(NxsStoreTokensBlockReader * block)
+			{
+			storerBlockVec.push_back(block);
+			AddReadBlock(block->GetID(), block);
+			}
+
+		/*! this public field is used in some hacks that relate to printing out
+			translation records during parsing (when names have to change in order
+			for the input file to be valid NEXUS, but we want the parser to be
+			loose but to log its changes).
+		*/
+		NxsConversionOutputRecord conversionOutputRecord;
+
+	protected:
+		void PostExecuteHook();
+		virtual void    AddFactory(NxsBlockFactory *);
+		int bitsForBlocksToRead;
+		NxsCloneBlockFactory cloneFactory;
+
+		NxsAssumptionsBlock * assumptionsBlockTemplate;
+		NxsCharactersBlock * charactersBlockTemplate;
+		NxsDataBlock * dataBlockTemplate;
+		NxsDistancesBlock * distancesBlockTemplate;
+		NxsStoreTokensBlockReader * storerBlockTemplate;
+		NxsTaxaBlock * taxaBlockTemplate;
+		NxsTaxaAssociationBlock * taxaAssociationBlockTemplate;
+		NxsTreesBlock * treesBlockTemplate;
+		NxsUnalignedBlock * unalignedBlockTemplate;
+
+		std::vector<NxsAssumptionsBlock *> assumptionsBlockVec;
+		std::vector<NxsCharactersBlock *> charactersBlockVec;
+		std::vector<NxsDataBlock *> dataBlockVec;
+		std::vector<NxsDistancesBlock *> distancesBlockVec;
+		std::vector<NxsStoreTokensBlockReader *> storerBlockVec;
+		std::vector<NxsTaxaBlock *> taxaBlockVec;
+		std::vector<NxsTaxaAssociationBlock *> taxaAssociationBlockVec;
+		std::vector<NxsTreesBlock *> treesBlockVec;
+		std::vector<NxsUnalignedBlock *> unalignedBlockVec;
+
+		std::string errorMsg;
+	private:
+		PublicNexusReader(const PublicNexusReader &); // do not define. Not copyable
+		PublicNexusReader & operator=(const PublicNexusReader &); // do not define. Not copyable
+
+	};
+
+
+#endif
+
+
diff --git a/src/ncl/nxsreader.h b/src/ncl/nxsreader.h
new file mode 100644
index 0000000..1f60eb1
--- /dev/null
+++ b/src/ncl/nxsreader.h
@@ -0,0 +1,612 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSREADER_H
+#define NCL_NXSREADER_H
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsstring.h"
+#include "ncl/nxsexception.h"
+#include "ncl/nxstoken.h"
+
+class NxsBlock;
+class NxsBlockFactory;
+class NxsTaxaBlockFactory;
+class NxsAssumptionsBlockAPI;
+class NxsCharactersBlockAPI;
+class NxsTaxaBlockAPI;
+class NxsTreesBlockAPI;
+
+typedef std::list<NxsBlock *> BlockReaderList;
+typedef std::map<std::string, BlockReaderList> BlockTypeToBlockList;
+
+
+/*!
+	This is the class that orchestrates the reading of a NEXUS data file, and so is the central class to NCL.
+
+	NxsReader does not call delete on any of the blocks that are added to it via the Add method.
+
+
+	In the "classic" (v2.0) NCL API:
+		-# An NxsReader is created.
+		-# pointers to instances of NxsBlocks that are expected to be needed should be added to `blockList' using the
+			NxsReader::Add() member function.
+		-# NxsReader::Execute() is then called, which reads the data file until encountering a block name, at which
+			point the correct block is looked up in `blockList' and that object's NxsBlock::Read() method is called.
+		-# NxsReader::PostBlockReadingHook(NxsBlock) is called after a block is successfully read.  This allows one to gather
+			the parsed data from the NxsBlock.  If another block of the same type is encountered, then NxsBlock::Reset()
+			will be called and the same NxsBlock instance will be used to read the next block.
+		.
+	Versions of NCL after 2.0.04 also support a "factory API" augments the former behavior:
+		-# An NxsReader is created.
+		-# In addition to NxsBlocks added using the Add method, you can register instances of a NxsBlockFactory using
+			the NxsReader::AddFactory() method.
+		-# In the NxsReader::Execute() method, if an appropriate block is not found in the `blockList` then the
+			the factories are asked to create a block for the current block name.  The first non-NULL block pointer
+			returned is used.
+		-#  PostBlockReadingHook is still called, but blocks created by a factory will not be "recycled" later in the
+			NxsReader::Execute(), so it is not necessary to pull all of the data out of them.
+		-#If a block created by a factory is skipped or has an error, then the factory will be notified using
+			NxsBlockFactory::BlockError(NxsBlock *) or NxsBlockFactory::BlockSkipped(NxsBlock *).  In the event of
+			skipping or an error, NxsReader will never refer to that instance of the factory-created block again.
+			Hence the base class behavior of BlockError() and BlockSkipped() is to delete the instance.
+		-#	Every time a NxsBlock successfully reads a NEXUS block, the NxsBlock is added to container of "used" blocks.
+			When a block is Reset() it is removed from this container.  NxsReader::GetUsedBlocks() can be called at any
+			point to get a copy of this container (which maps a block type name to a list of NxsBlock*). This container
+			thus stores the state of the parsed NEXUS file.  If no blocks were recycled (if all of the blocks came from
+			factories rather than blocks added using NxsReader::Add() method), then the GetUsedBlocks will contain binary
+			representation of every block parsed.
+		.
+
+	Important: The use of the factories that are supplied with NCL can trigger casts of pointers. This can be unsafe if
+		you create NxsBlocks that do not have the expected inheritance.  For example, if you create a class to
+		read Taxa blocks, but do NOT derive this class from NxsTaxaBlockAPI then the casts will be unsafe.  If you
+		do this, and you wish to use the factory API then you must write your own factories.
+
+	See NCL_TOP/examples/normalizer examples for an example of the factory API (using the MultiFormatReader).  In those
+		examples the PublicNexusReader::PublicNexusReader() constructor is the function that installs the templates for
+		a clone factory.
+
+	\section NexusErrors Errors in NEXUS files
+	When an illegal construct is found, a NxsException is raised. This exception is caught within NxsReader::Execute
+		and NxsReader::NexusError is called. This allows subclasses of NxsReader to handle exceptional circumstances by
+		overriding one function. Whenever you are using a NxsReader instance that is NOT a subclass of ExceptionRaisingNxsReader
+		this is still the behavior (thus any code that was written to the v2.0 API will still have this behavior).
+
+	ExceptionRaisingNxsReader implements NexusError by raising another NxsException.  This results in the exception
+		being propagated to the caller.  The newer NxsReader classes (including PublicNexusReader and MultiFormatReader)
+		are derived from ExceptionRaisingNxsReader. So their Execute methods will also raise NxsExceptions. Deriving a subclass
+		of these classes and overriding NexusError would prevent this behavior.
+
+	The advantage of ExceptionRaisingNxsReader is that one is no longer required to subclass NxsReader to handle errors.
+
+	\section signalsection Signal Handling in NCL:
+		Traditionally, the user of an application can send an SIGINT to cause it to stop. NCL has very limited support
+	for handling signals, and this support is turned off by default.
+
+		If you want NCL to raise an NxsSignalCanceledParseException if a signal is encountered during a parse then call:
+			NxsReader::setNCLCatchesSignals(true);
+	before calling Execute on your NxsReader instance. Note that only the slowly-parsed blocks (TREES and CHARACTERS) and
+	the NxsReader currently check to see if a signal has been caught.  So the NxsSignalCanceledParseException will often
+	have a generic message indicating that the signal was caught during the parse.
+
+		The NCL signal handler is only installed during NxsReader::Execute calls!
+
+		Note: that if you want your program to exit on SIGINT, you can leave the signal handling turned off. If you do turn
+			NCL's signal handling on, then after you do your apps clean up you'll have to exit by something like this:
+				signal(SIGINT, SIG_DFL);
+				kill(getpid(), SIGINT);
+			\see http://www.cons.org/cracauer/sigint.html
+
+*/
+class NxsReader
+	{
+		static void installNCLSignalHandler();
+		static void uninstallNCLSignalHandler();
+	public:
+		/** Enum of arguments for ExceptionRaisingNxsReader ctor */
+		enum WarningHandlingMode
+			{
+			IGNORE_WARNINGS,  /**< warnings that are not error-generating are ignored silently */
+			WARNINGS_TO_STDERR, /**< warnings that are not error-generating are written to standard error stream */
+			WARNINGS_TO_STDOUT, /**< warnings that are not error-generating are written to standard output stream */
+			WARNINGS_ARE_ERRORS /**< warnings that are not error-generating by some other mechanism are still converted to NxsException objects */
+			};
+		/** Enum different levels of warnings.  See NxsReader::SetWarningOutputLevel*/
+		enum NxsWarnLevel
+			{ //TODO: we need another warning level for status messages.
+			UNCOMMON_SYNTAX_WARNING = 0,  /**< Legal but uncommon syntax that could indicate a typo */
+			SKIPPING_CONTENT_WARNING = 1, /**< Content is being skipped by NCL */
+			OVERWRITING_CONTENT_WARNING = 2, /**< New content is replacing old information (eg. CharSets with the same name as a previously defined CharSet)*/
+			DEPRECATED_WARNING = 3, /**< Use of a deprecated feature */
+			AMBIGUOUS_CONTENT_WARNING = 4, /**< commands that could have multiple plausible interpretations */
+			ILLEGAL_CONTENT_WARNING = 5, /**< content that violates NEXUS rules, but is still parseable (eg. CharPartitions that only have some of the characters)*/
+			PROBABLY_INCORRECT_CONTENT_WARNING = 6, /**< Severe Warning that is generated when the file contains characters that should almost certainly be removed */
+			FATAL_WARNING = 7, /**< a higher warning level then any of the warning generated by NCL.  Primarily used in constructs such as (FATAL_WARNING - 1) to mean only the most severe warnings.*/
+			SUPPRESS_WARNINGS_LEVEL = 8 /**<  if the NxsReader's warning level is set to this, then warnings will be suppressed */
+			};
+
+		/*! If true then NCL will call a handler function if signals are encountered during NxsReader::Execute
+			(signal handling is off by default).
+			\sa The section on signal handling \ref signalsection
+		*/
+		static void setNCLCatchesSignals(bool);
+		/*! \returns true if NCL will call a handler function if signals are encountered during NxsReader::Execute
+					(signal handling is off by default).
+			\sa The section on signal handling \ref signalsection
+		*/
+		static bool getNCLCatchesSignals();
+		/*! Usually used internally when signal catching is enabled. If the number of sigints has changed, since the last
+			call, then NCL has detected a signal.
+			\sa The section on signal handling \ref signalsection
+		*/
+		static unsigned getNumSignalIntsCaught();
+		/*! Used internally.  If NCL is handling SIGINTs then this will be incremented with every SIGINT.
+			\sa The section on signal handling \ref signalsection
+		*/
+		static void setNumSignalsIntsCaught(unsigned);
+
+
+						NxsReader();
+		virtual			~NxsReader();
+
+
+		// functions to add and remove Block reader objects or factories for block readers.
+		virtual void    Add(NxsBlock *newBlock);
+		void			Detach(NxsBlock *newBlock);
+		virtual void    AddFactory(NxsBlockFactory *);
+		void			RemoveFactory(NxsBlockFactory *);
+
+		// trigger for NEXUS parsing.
+		virtual void	Execute(NxsToken& token, bool notifyStartStop = true);
+
+		// shortcuts for calling execute...
+		void			ReadFilepath(const char *filename);
+		void			ReadFilestream(std::istream & inf);
+		void			ReadStringAsNexusContent(const std::string & s);
+
+		virtual void	DebugReportBlock(NxsBlock &nexusBlock);
+
+		const char		*NCLNameAndVersion();
+		const char		*NCLCopyrightNotice();
+		const char		*NCLHomePageURL();
+
+		// hooks for subclasses to specialize the behavior
+		virtual void	ExecuteStarting();
+		virtual void	ExecuteStopping();
+		virtual bool	EnteringBlock(NxsString blockName);
+		virtual void	ExitingBlock(NxsString blockName);
+		virtual void	OutputComment(const NxsString &comment);
+		virtual void	SkippingDisabledBlock(NxsString blockName);
+		virtual void	SkippingBlock(NxsString blockName);
+
+		/*! This hook is called by the block readers when they encounter content
+			that is interpretable but is questionable (or even illegal according
+			to the NEXUS standard).
+
+			The default NexusWarn behavior is to generate a NexusException for any
+			warnLevel >= PROBABLY_INCORRECT_CONTENT_WARNING
+		 	and to ignore all other warnings.
+		*/
+		virtual void	NexusWarn(const std::string &s, NxsWarnLevel warnLevel, file_pos pos, long line, long col)
+			{
+			if (warnLevel >= PROBABLY_INCORRECT_CONTENT_WARNING)
+				{
+				NxsString e(s.c_str());
+				throw NxsException(e, pos, line, col);
+				}
+			}
+		/*! Used internally as a more convenient way of calling NexusWarn */
+		void	NexusWarnToken(const std::string &m, NxsWarnLevel warnLevel, const ProcessedNxsToken &token)
+			{
+			NexusWarn(m , warnLevel, token.GetFilePosition(), token.GetLineNumber(), token.GetColumnNumber());
+			}
+		/*! Used internally as a more convenient way of calling NexusWarn */
+		void	NexusWarnToken(const std::string &m, NxsWarnLevel warnLevel, const NxsToken &token)
+			{
+			NexusWarn(m , warnLevel, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+			}
+
+		/*! Called when a erron is detected */
+		virtual void	NexusError(NxsString msg, file_pos pos, long line, long col);
+
+
+
+		virtual void			ClearUsedBlockList();
+		NxsBlock 			   *CreateBlockFromFactories(const std::string & currBlockName, NxsToken &token, NxsBlockFactory **sourceOfBlock = NULL);
+		BlockReaderList 		GetUsedBlocksInOrder();
+		BlockReaderList 		GetBlocksFromLastExecuteInOrder();
+		BlockTypeToBlockList 	GetUsedBlocks();
+		std::set<NxsBlock*> 	GetSetOfAllUsedBlocks();
+
+		NxsTaxaBlockAPI 		*GetLastStoredTaxaBlock();
+		NxsCharactersBlockAPI 	*GetLastStoredCharactersBlock();
+		NxsTreesBlockAPI 		*GetLastStoredTreesBlock();
+
+
+		NxsTaxaBlockAPI 		*GetTaxaBlockByTitle(const char *title, unsigned *nMatches);
+		NxsCharactersBlockAPI 	*GetCharBlockByTitle(const char *title, unsigned *nMatches);
+		NxsTreesBlockAPI 		*GetTreesBlockByTitle(const char *title, unsigned *nMatches);
+
+		NxsTaxaBlockFactory 	*GetTaxaBlockFactory();
+		//NxsCharactersBlockFactory 	*GetCharBlockFactory();
+		//NxsTreesBlockFactory 	*GetTreesBlockFactory();
+
+		void			SetTaxaBlockFactory(NxsTaxaBlockFactory *);
+		//void			SetCharBlockFactory(NxsCharactersBlockFactory *);
+		//void			SetTreesBlockFactory(NxsTreesBlockFactory *);
+
+		virtual void    DeleteBlocksFromFactories();
+		unsigned		RemoveBlockFromUsedBlockList(NxsBlock *);
+
+		/*! throws away references to all blocks that that have been read. If the block
+			 was registered with the reader, then "Reset" is called on the block.
+				If the block came from a factory then the reference to the block is
+					removed from the reader (resulting in a memory leak if the client
+					code does not delete the block).
+			 This can be called if the client would like to store the information
+				from the NEXUS file, and get rid of the blocks to save memory (but
+				still maintain things like factories that were registered with the
+				NxsReader and tweaks to the default settings).
+		*/
+		virtual void	ClearContent();
+
+		/*!
+			 This function is useful after ClearContext() has been called.
+			 Instances of blocks that have been read can be registered with reader
+				so that future NEXUS files can be parsed with the context of those blocks.
+			 For example one might want to reregister a NxsTaxaBlock before reading
+				 a trees file, then you could call:
+				reader->AddReadBlock("TAXA", taxaB);
+			 NOTE: if you are using a PublicNexusReader, or MultiFormatReader
+				you should almost certainly call the type-specific forms such as
+				reader->AddReadTaxaBlock(taxaB);
+		*/
+		virtual void AddReadBlock(const NxsString &blockID, NxsBlock *block)
+			{
+			this->BlockReadHook(blockID, block);
+			}
+
+		/*! Call cullIdenticalTaxaBlocks(true) before reading a file if you want
+				the reader to discard a TaxaBlock that is identical to a previous
+				taxa block.  Use of this assumes that the reader of taxa blocks is
+				a NxsTaxaBlockAPI instance.
+			\warning: this function should only be called if you have registered
+				a NxsTaxaBlockFactory.  The culling of blocks WILL CALL DELETE
+				on them.
+		*/
+		void cullIdenticalTaxaBlocks(bool v=true)
+			{
+			this->destroyRepeatedTaxaBlocks = v;
+			}
+		std::vector<std::string> GetAllTitlesForBlock(const NxsBlock *b) const;
+
+
+		/*! Passing true to this method is a hacky way to enable all status messages while still filtering
+			NexusWarn messages.
+		*/
+		void SetAlwaysReportStatusMessages(bool v) {
+			this->alwaysReportStatusMessages = v;
+		}
+		/*! The reader's currentWarningLevel is set here.
+			By default this field is set to UNCOMMON_SYNTAX_WARNING.
+			By setting it to any other facet of NxsWarnLevel you can reduce the
+				number of warnings sent to std::cerr.
+			The warning level is checked in ExceptionRaisingNxsReader::NexusWarn
+				and NxsReader::statusMessage.
+			If the level of the message is greater or equal to the readers level
+				then the message will be sent to std::cerr
+			By calling SetWarningOutputLevel(SUPPRESS_WARNINGS_LEVEL) you can
+				make the reader ignore warnings.
+			\note if alwaysReportStatusMessages then NxsReader::statusMessage
+				will report all messages
+		*/
+		void SetWarningOutputLevel(NxsWarnLevel lev) {
+			currentWarningLevel = lev;
+		}
+		NxsWarnLevel GetWarningOutputLevel() const {
+			return currentWarningLevel;
+		}
+
+		/*! Messages about the status processing a file (such as "Executing...")
+			are sent here. If the reader's currentWarningLevel is set to the lowest
+			level (UNCOMMON_SYNTAX_WARNING) then these messages will show up in stderr.
+		*/
+		virtual void statusMessage(const std::string & m) const;
+
+		/*! \deprecated This function is almost never needed.
+			\returns if true no blocks have registered as readers (does not indicate
+				whether content has been read, nor does it indicate if any
+				factories have been added).
+		*/
+		bool			BlockListEmpty();
+		unsigned		PositionInBlockList(NxsBlock *b);
+
+		void			Reassign(NxsBlock *oldb, NxsBlock *newb);
+
+		/*! The block `b` will be given a priority level of `priorityLevel`
+
+			By default all blocks have priority level of 0. Lowering a blocks
+				priority level below 0, means that a Get....ByTitle() function
+				will not return the block if it finds a matching block that has
+				a higher priority
+
+			If you are re-using blocks (if you are not using the v2.1 API) be
+				aware that if a block has a low (negative) priority, but is then
+				used to read content then its priority will be bumped back up to 0.
+
+			The block priority affects calls Get...ByTitle() functions and
+				Find...ByTitle() functions.
+
+			This function is mainly used to "demote" NxsTaxaBlock instances so that they
+				will not clash with a TAXA block found in the file. \ref TaxaBlockClashes
+		*/
+		void AssignBlockPriority(NxsBlock *b, int priorityLevel);
+		/*! Returns the block priority for a block (or 0 if the block instance is unknown) */
+		int	GetBlockPriority(NxsBlock *b) const;
+		/*! Lowers the priority of all read blocks to `priorityLevel` */
+		void DemoteBlocks(int priorityLevel=-1);
+	protected:
+		/*! A convenience function to allow one to quickly get a list of block reader
+			that were generated (or used) in reading a filepath */
+		static 			BlockReaderList parseFileWithReader(NxsReader & reader, const char *filepath, bool parsePrivateBlocks=true, bool storeTokenInfo=true);
+		static bool nclCatchesSignals; // default False;
+		typedef void (*SignalHandlerFuncPtr) (int);
+		static SignalHandlerFuncPtr prevSignalCatcher; // the signal handler that was installed before NCL's signal handler
+		static unsigned numSigIntsCaught;
+		static bool prevSignalStored ;
+
+		void			CoreExecutionTasks(NxsToken& token, bool notifyStartStop = true);
+
+
+		void			AddBlockToUsedBlockList(const std::string &, NxsBlock *, NxsToken *);
+		bool 			BlockIsASingeltonReader(NxsBlock *) const ;
+		void 			BlockReadHook(const NxsString &currBlockName, NxsBlock *currBlock, NxsToken *token = NULL );
+		bool 			ExecuteBlock(NxsToken &token, const NxsString &currBlockName, NxsBlock *currBlock, NxsBlockFactory * sourceOfBlock);
+		NxsBlock	   *FindBlockOfTypeByTitle(const std::string &btype, const char *title, unsigned *nMatches);
+		NxsBlock	   *FindBlockByTitle(const BlockReaderList & chosenBlockList, const char *title, unsigned *nMatches);
+		BlockReaderList FindAllBlocksByTitle(const BlockReaderList & chosenBlockList, const char *title);
+		NxsBlock	   *GetLastStoredBlockByID(const std::string &key);
+		NxsTaxaBlockAPI *GetOriginalTaxaBlock(const NxsTaxaBlockAPI *) const;
+		bool			IsRepeatedTaxaBlock(const NxsTaxaBlockAPI *) const;
+		void 			NewBlockTitleCheckHook(const std::string &blockname, NxsBlock *p, NxsToken *token);
+		bool 			ReadUntilEndblock(NxsToken &token, const std::string & currBlockName);
+		void			RegisterAltTitle(const NxsBlock * b, std::string t);
+		std::set<NxsBlock *> RemoveBlocksFromFactoriesFromUsedBlockLists();
+		virtual void 	PostBlockReadingHook(NxsBlock &);
+
+
+		NxsBlock		*blockList;	/* pointer to first block in list of blocks */
+		NxsBlock		*currBlock;	/* pointer to current block in list of blocks */
+		typedef std::list<NxsBlockFactory *> BlockFactoryList;
+		NxsTaxaBlockFactory *taxaBlockFactory;
+		BlockFactoryList factories; /* list of pointers to factories capable of creating NxsBlock objects*/
+		bool destroyRepeatedTaxaBlocks;
+		NxsWarnLevel currentWarningLevel;
+		bool alwaysReportStatusMessages;
+
+	private:
+
+		BlockReaderList FindAllBlocksByTitleNoPrioritization(const BlockReaderList & chosenBlockList, const char *title);
+
+		BlockReaderList blocksInOrder;
+		std::map<NxsBlock *, int> blockPriorities;
+
+		BlockReaderList lastExecuteBlocksInOrder;
+		BlockTypeToBlockList blockTypeToBlockList;
+		typedef std::pair<unsigned, std::list<std::string> > NxsBlockTitleHistory;
+		typedef std::map<std::string, NxsBlockTitleHistory > NxsBlockTitleHistoryMap;
+		NxsBlockTitleHistoryMap blockTitleHistoryMap;
+		std::map<const NxsBlock *, std::list<std::string> > blockTitleAliases; // to deal with culling blocks and then using the titles of culled copies
+
+
+	};
+
+typedef NxsBlock NexusBlock;
+typedef NxsReader Nexus;
+
+/*! A subclass of NxsReader that is used in much of NCL v2.1.
+
+	The NexusError function raises a NxsException so that all errors are treated
+		as parse-ending conditions that the caller of Execute must handle.
+	See \ref NexusErrors
+*/
+class ExceptionRaisingNxsReader : public NxsReader
+	{
+	public:
+		/*! The `warnMode` argument should be a facet of NxsReader::WarningHandlingMode
+			this arguments determines what happens to warnings which are NOT converted
+			to exceptions -- by default only content that is probably incorrect will
+			be converted to a NxsException -- see ::SetWarningToErrorThreshold().
+
+
+		*/
+		ExceptionRaisingNxsReader(NxsReader::WarningHandlingMode mode=NxsReader::WARNINGS_TO_STDERR)
+			:warnMode(mode),
+			warningToErrorThreshold(PROBABLY_INCORRECT_CONTENT_WARNING)
+			{}
+		/*! Raise a NxsException. */
+		void NexusError(NxsString msg, file_pos pos, long line, long col)
+			{
+			throw NxsException(msg, pos, line, col);
+			}
+		virtual void NexusWarn(const std::string & msg, NxsWarnLevel level, file_pos pos, long line, long col);
+
+		void SkippingBlock(NxsString blockName);
+		void SkippingDisabledBlock(NxsString blockName);
+		/*! Sets the threshold for converting a warning into an error.   This
+			is useful for making the parser stricter.
+
+			Argument should be a facet of NxsReader::NxsWarnLevel.
+			Overrides the default setting of PROBABLY_INCORRECT_CONTENT_WARNING
+		*/
+		void SetWarningToErrorThreshold(int t)
+			{
+			warningToErrorThreshold = t;
+			}
+		virtual void ClearContent()
+			{
+			NxsReader::ClearContent();
+			}
+	private:
+		NxsReader::WarningHandlingMode warnMode;
+		int warningToErrorThreshold;
+	};
+
+/*! A subclass of NxsReader that is used preserves the same output style as version 2.0, but
+		allows for more flexibility in the streams that are used for output.
+
+	Messages from SkippingBlock, SkippingDisabledBlock, and Warnings are sent to the outstream
+	Warnings and Errors are written to the errstream
+
+*/
+class DefaultErrorReportNxsReader : public NxsReader
+	{
+	public:
+		static BlockReaderList parseFile(const char *filepath, std::ostream * stdOutstream, std::ostream * errOutstream, bool parsePrivateBlocks=true, bool storeTokenInfo=true);
+		/*! creates an instance that will write messages to the specified streams */
+		DefaultErrorReportNxsReader(std::ostream * stdOutstream, /*!< outputstream */
+									std::ostream * errOutstream) /*!< error stream */
+			:NxsReader(),
+			stdOut(stdOutstream),
+			errOut(errOutstream)
+			{
+			}
+
+		virtual ~DefaultErrorReportNxsReader() {}
+
+		/*! \returns true. silent */
+		virtual bool EnteringBlock(NxsString )
+			{
+			return true;
+			}
+
+		/*! writes a message to output  */
+		void SkippingBlock(NxsString blockName)
+			{
+			if (stdOut != 0L)
+				{
+				*stdOut << "[!Skipping unknown block (" << blockName << ")...]\n";
+				stdOut->flush();
+				}
+			}
+
+		/*! writes a message to output  */
+		void SkippingDisabledBlock(NxsString blockName)
+			{
+			if (stdOut != 0L)
+				{
+				*stdOut << "[!Skipping disabled block (" << blockName << ")...]\n";
+				stdOut->flush();
+				}
+			}
+
+		/*! writes a message to output and the error stream if the warnLevel >=  currentWarningLevel
+			If the message is = PROBABLY_INCORRECT_CONTENT_WARNING (and the reader is not
+			ignoring warnings, a NxsException will be raised.
+		*/
+		void NexusWarn(const std::string & msg, /*!< description of the warning */
+						NxsWarnLevel warnLevel, /*!< severity of the warning*/
+						file_pos pos, long line, long col)
+			{
+			if (warnLevel < currentWarningLevel)
+				return;
+			if (warnLevel >= PROBABLY_INCORRECT_CONTENT_WARNING)
+				{
+				NxsString e(msg.c_str());
+				throw NxsException(e, pos, line, col);
+				}
+			if (errOut != 0)
+				{
+				*errOut << "\nWarning:  ";
+				if (line > 0 || pos > 0)
+					*errOut << "at line " << line << ", column " << col << " (file position " << pos << "):\n";
+				*errOut  << msg << std::endl;
+				}
+			else if (stdOut != 0L)
+				{
+				*stdOut << "\nWarning:  ";
+				if (line > 0 || pos > 0)
+					*stdOut << "at line " << line << ", column " << col << " (file position " << pos << "):\n";
+				*stdOut  << msg << std::endl;
+				}
+			}
+
+		/*! Raises a NxsException.
+		*/
+		void NexusError(NxsString msg, file_pos pos, long line, long col)
+			{
+			NexusWarn(msg, NxsReader::FATAL_WARNING, pos, line, col);
+			throw NxsException(msg, pos, line, col);
+			}
+
+		std::ostream * stdOut;
+		std::ostream * errOut;
+	};
+
+/*!
+	Returns a map from all block ids that have been read to all instances that the NxsReader knows have been read and
+		have NOT been cleared.
+	NOTE:  If the factory interface to NCL is not being used this may not be a complete list of all of the blocks that
+		have been read!!!
+*/
+inline BlockTypeToBlockList NxsReader::GetUsedBlocks()
+	{
+	return blockTypeToBlockList;
+	}
+
+/*! Convenience function to get the factory for NxsTaxaBlocks */
+inline bool NxsReader::IsRepeatedTaxaBlock(const NxsTaxaBlockAPI * testB) const
+	{
+	return (GetOriginalTaxaBlock(testB) != NULL);
+	}
+
+
+/*! Returns a list of  all blocks that the NxsReader knows have been read and have NOT been cleared.
+
+	If a block has been "implied" by another block then the implied block will appear before the
+	explicit block.
+*/
+inline BlockReaderList NxsReader::GetUsedBlocksInOrder()
+	{
+	return blocksInOrder;
+	}
+
+/*! Similar to NxsReader::GetUsedBlocksInOrder, except this list of blocks is cleared at the beginning
+	of each  NxsReader::Execute  or (NxsReader::Read...) call. So the list returned list will
+	only reflect blocks from the last execution operation.
+
+	If a block has been "implied" by another block then the implied block will appear before the
+	explicit block.
+*/
+inline BlockReaderList NxsReader::GetBlocksFromLastExecuteInOrder()
+	{
+	return lastExecuteBlocksInOrder;
+	}
+
+/*! Convenience function to get the factory for NxsTaxaBlocks */
+inline NxsTaxaBlockFactory * NxsReader::GetTaxaBlockFactory()
+	{
+	return this->taxaBlockFactory;
+	}
+
+
+
+#endif
+
diff --git a/src/ncl/nxssetreader.h b/src/ncl/nxssetreader.h
new file mode 100644
index 0000000..14d20cb
--- /dev/null
+++ b/src/ncl/nxssetreader.h
@@ -0,0 +1,104 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSSETREADER_H
+#define NCL_NXSSETREADER_H
+#include <sstream>
+#include "ncl/nxstoken.h"
+#include "ncl/nxsblock.h"
+/*!
+	A class for reading NEXUS set objects and storing them in a set of int values. The NxsUnsignedSet `nxsset' will be
+	cleared, and `nxsset' will be built up as the set is read, with each element in the list storing a
+	member of the set (ranges are stored as individual elements). This class handles set descriptions of the following
+	form:
+>
+	4-7 15 20-.\3;
+>
+	The above set includes every number from 4 to 7 (inclusive), 15 and every third number from 20 to max, where `max'
+	would ordinarily be set to either the last character (if `settype' is `NxsSetReaderEnum::charset') or the last
+	taxon (if `settype' is `NxsSetReaderEnum::taxset'). If `max' equaled 30, the example above would be stored as
+	follows (remember that internally the numbers are stored with offset 0, even though in the NEXUS data file the
+	numbers always start at 1.
+>
+	3, 4, 5, 6, 14, 19, 22, 25, 28
+>
+	The following example of how NxsSetReader is used comes from the NxsCharactersBlock::HandleEliminate function:
+>
+	NxsSetReader(token, ncharTotal, eliminated, *this, NxsSetReader::charset).Run();
+>
+	This reads in a set of eliminated characters from a NEXUS data file, storing the resulting set in the data member
+	`eliminated'. In this case `max' is set to `ncharTotal' (the total number of characters), and the block reference
+	is set to the NxsCharactersBlock object, which provides a
+*/
+class NxsSetReader
+	{
+		static unsigned InterpretTokenAsIndices(NxsToken &t,
+								 const NxsLabelToIndicesMapper &,
+								 const char * setType,
+								 const char * cmd,
+								 NxsUnsignedSet * destination);
+		static void AddRangeToSet(unsigned first, unsigned last, unsigned stride, NxsUnsignedSet * destination, const NxsUnsignedSet * taboo, NxsToken &t);
+	public:
+		static void ReadSetDefinition(NxsToken &t,
+								 const NxsLabelToIndicesMapper &,
+								 const char * setType,
+								 const char * cmd,
+								 NxsUnsignedSet * destination,
+								 const NxsUnsignedSet * taboo = NULL);
+		static void	WriteSetAsNexusValue(const NxsUnsignedSet	&, std::ostream & out);
+		static std::string	GetSetAsNexusString(const NxsUnsignedSet &s)
+			{
+			std::stringstream os;
+			NxsSetReader::WriteSetAsNexusValue(s, os);
+			//os << ' ';
+			return os.str();
+			}
+		static std::vector<unsigned> GetSetAsVector(const NxsUnsignedSet &s);
+		enum NxsSetReaderEnum	/* For use with the variable `settype' */
+			{
+			generic = 1,		/* means expect a generic set (say, characters weights) */
+			charset,			/* means expect a character set */
+			taxset				/* means expect a taxon set */
+			};
+
+						NxsSetReader(NxsToken &t, unsigned maxValue, NxsUnsignedSet &iset, NxsBlock &nxsblk, unsigned type);
+
+		bool			Run();
+		void			WriteAsNexusValue(std::ostream & out) const
+			{
+			WriteSetAsNexusValue(nxsset, out);
+			}
+	protected:
+
+		bool			AddRange(unsigned first, unsigned last, unsigned modulus = 0);
+
+	private:
+
+		unsigned		GetTokenValue();
+
+		NxsBlock		█		/* reference to the block object used for looking up labels */
+		NxsToken		&token;		/* reference to the token being used to parse the NEXUS data file */
+		NxsUnsignedSet	&nxsset;	/* reference to the NxsUnsignedSet set being read */
+		unsigned		max;		/* maximum number of elements in the set */
+		unsigned		settype;	/* the type of set being read (see the NxsSetReaderEnum enumeration) */
+	};
+
+typedef NxsSetReader SetReader;
+
+#endif
diff --git a/src/ncl/nxsstring.h b/src/ncl/nxsstring.h
new file mode 100644
index 0000000..7509ac0
--- /dev/null
+++ b/src/ncl/nxsstring.h
@@ -0,0 +1,742 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis and Mark T. Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSSTRING_H
+#define NCL_NXSSTRING_H
+
+
+#include <cstring>
+#include <cstdio>
+#include <functional>
+#include <ostream>
+#include <string>
+#include "ncl/nxsdefs.h"
+
+
+
+
+
+// Define HAVE_NCL_NXSSTRING_ENDL if your code needs it
+#if ! defined (HAVE_NCL_NXSSTRING_ENDL)
+#   define HIDE_NCL_NXSSTRING_ENDL
+#else
+#   warning "use of endl with NxsString instances has been deprecated"
+#endif
+class IndexSet;
+
+/*!
+	A string class for use with the Nexus Class Library. NxsString inherits most of its functionality from the standard
+	template library class string, adding certain abilities needed for use in NCL, such as the ability to discern
+	whether a short string represents an abbreviation for the string currently stored. Another important addition is
+	the member function PrintF, which accepts a format string and an arbitrary number of arguments, allowing a string
+	to be built in a manner similar to the standard C function printf. Many operators are also provided for appending
+	numbers to the ends of strings, an ability which is very useful for producing default labels (e.g. taxon1, taxon2,
+	etc.).
+*/
+class NxsString
+  : public std::string
+	{
+	public:
+		enum NxsQuotingRequirements
+			{
+			kNoQuotesNeededForNexus = 0, /// this enum value is kept equivalent to false
+			kSingleQuotesNeededForNexus, /// punctuation or non-space whitespace characters present
+			kUnderscoresSufficeForNexus  /// No nexus token-breakers
+			};
+
+		class NxsX_NotANumber {};	/* exception thrown if attempt to convert string to a number fails */
+		static bool case_insensitive_equals(const char *o, const char * t);
+		static bool to_long(const char *c, long *n);
+		static bool to_double(const char *c, double *n);
+		static std::string strip_leading_whitespace(const std::string & s);
+		static std::string strip_trailing_whitespace(const std::string & s);
+		static std::string strip_surrounding_whitespace(const std::string & s);
+		static std::string strip_whitespace(const std::string & s);
+		static void split(const std::string &s, std::list<std::string> *);
+		static std::string get_upper(const std::string &s)
+			{
+			std::string t(s);
+			to_upper(t);
+			return t;
+			}
+		static std::string & to_lower(std::string & s);
+		static std::string & to_upper(std::string & s);
+		static NxsQuotingRequirements determine_quoting_requirements(const std::string &);
+		static void blanks_to_underscores(std::string &s);
+		static void add_nxs_quotes(std::string &s);
+		static int index_in_vector(const std::string &t, const std::vector<std::string> &v);
+		static int index_in_array(const std::string &t, const char * * v, const unsigned n);
+
+		enum CmpEnum				/* enum that is used to specify string comparison modes */
+			{
+			respect_case,
+			no_respect_case,
+			abbrev
+			};
+
+							NxsString();
+							NxsString(const char *s);
+							NxsString(const NxsString &s);
+
+		static std::string	GetEscaped(const std::string &s);
+		static std::string	GetEscapedInt(const int &v);
+		static std::string	GetEscapedDouble(const double &v);
+		//	Accessors
+		//
+		bool				Abbreviates(const NxsString &s, NxsString::CmpEnum mode = NxsString::no_respect_case) const;
+		unsigned			ConvertToUnsigned() const;
+		int					ConvertToInt() const;
+		long				ConvertToLong() const;
+		double				ConvertToDouble() const;
+		bool				Equals(const NxsString &s, NxsString::CmpEnum mode = respect_case) const;
+		bool				EqualsCaseInsensitive(const NxsString &s) const;
+		NxsString			GetQuoted() const;
+		bool				IsADouble() const;
+		bool				IsALong() const;
+		bool				IsCapAbbreviation(const NxsString &s) const;
+		bool				IsInVector(const NxsStringVector &s, NxsString::CmpEnum mode = respect_case) const;
+		bool				IsStdAbbreviation(const NxsString &s, bool respectCase) const;
+		static bool			IsNexusPunctuation(const char c);
+		static bool         IsNewickPunctuation(const char c);
+		bool				QuotesNeeded() const;
+		NxsString 			UpperCasePrefix() const;
+		friend std::ostream &operator<<(std::ostream &out, const NxsString &s);
+
+		//	Modifiers
+		//
+		//NxsString		   &operator=(const NxsString &s);
+		NxsString			&operator=(char);
+		NxsString			&operator=(const char *s);
+		NxsString			&operator+=(const char *s);
+		NxsString			&operator+=(const std::string &s);
+		NxsString			&operator+=(const char c);
+		NxsString			&operator+=(const int i);
+		NxsString			&operator+=(unsigned i);
+		NxsString			&operator+=(unsigned long i);
+		NxsString			&operator+=(const long l);
+		NxsString			&operator+=(const double d);
+		NxsString			&operator+=(const IndexSet &d);
+		NxsString			&operator<<(int i);
+		NxsString			&operator<<(unsigned i);
+		NxsString			&operator<<(long l);
+		NxsString			&operator<<(unsigned long l);
+		NxsString			&operator<<(double d);
+		NxsString			&operator<<(const char *c);
+		NxsString			&operator<<(char c);
+		NxsString			&operator<<(const std::string &s);
+		NxsString			&operator<<(const IndexSet &s);
+#       if ! defined(HIDE_NCL_NXSSTRING_ENDL)
+            NxsString			&operator<<(NxsString &(*funcPtr)(NxsString	&));
+#       endif
+
+		// Functions that should be in base class string but aren't
+		void				clear();
+
+		int					PrintF(const char *formatStr, ...);
+
+		unsigned char		*p_str(unsigned char *) const;
+
+		NxsString			&AddQuotes();
+		NxsString 			&AddTail(char c, unsigned n);
+		NxsString			&NumberThenWord(unsigned i, NxsString s);
+		NxsString 			&ShortenTo(unsigned n);
+		NxsString			&AppendDouble(unsigned minFieldFormat, unsigned precFormat, double x);
+		NxsString 			&Capitalize();
+
+		NxsString 			&RightJustifyString(const NxsString &s, unsigned w, bool clear_first = false);
+		NxsString 			&RightJustifyLong(long x, unsigned w, bool clear_first = false);
+		NxsString 			&RightJustifyDbl(double x, unsigned w, unsigned p, bool clear_first = false);
+
+		NxsString 			&ToLower()
+			{
+			to_lower(*this);
+			return *this;
+			}
+		NxsString 			&ToUpper()
+			{
+			to_upper(*this);
+			return *this;
+			}
+
+
+		NxsString 			&BlanksToUnderscores();
+		NxsString 			&UnderscoresToBlanks();
+
+		//	Debugging
+		//
+		static NxsString 	ToHex(long p, unsigned nFours);
+	};
+
+#if defined (NXS_SUPPORT_OLD_NAMES)
+	typedef NxsString nxsstring;
+#endif
+
+
+/*!
+	Replaces the stored string with a copy of itself surrounded by single quotes (single quotes inside the string are
+	converted to the '' pair of characters that signify a single quote). Returns a reference to itself.
+*/
+inline NxsString &NxsString::AddQuotes()
+	{
+	add_nxs_quotes(*this);
+	return *this;
+	}
+
+inline std::string	NxsString::GetEscapedDouble(const double &v)
+	{
+	NxsString s;
+	s << v;
+	return NxsString::GetEscaped(s);
+	}
+
+inline std::string	NxsString::GetEscapedInt(const int &v)
+	{
+	NxsString s;
+	s << v;
+	return NxsString::GetEscaped(s);
+	}
+
+inline std::string	NxsString::GetEscaped(const std::string &s)
+	{
+	NxsQuotingRequirements r = determine_quoting_requirements(s);
+	if (r == kNoQuotesNeededForNexus)
+		return s;
+	std::string x(s.c_str());
+	if (r == kUnderscoresSufficeForNexus)
+		blanks_to_underscores(x);
+	else
+		add_nxs_quotes(x);
+	return x;
+	}
+
+
+/*!
+	Function object (Unary Predicate functor) that stores one string. The ()(const NxsString &) operator then returns the
+	result of a case-insensitive compare. Useful for STL find algorithms. Could be made faster than sequential case
+	insenstive comparisons, because the string stored in the object is just capitalized once.
+*/
+class NStrCaseInsensitiveEquals
+	{
+	public :
+
+					NStrCaseInsensitiveEquals(const NxsString &s);
+		bool		operator()(const NxsString &s);
+
+	protected :
+
+		NxsString	compStr;
+	};
+
+/*!
+	Function object (Unary Predicate functor) that stores one string. The ()(const NxsString &) operator then returns the
+	result of a case-sensitive compare. Useful for STL find algorithms.
+*/
+class NStrCaseSensitiveEquals
+	{
+	public :
+
+					NStrCaseSensitiveEquals(const NxsString &s);
+		bool		operator()(const NxsString &s) const;
+
+	protected :
+
+		NxsString	compStr;
+	};
+
+/*!
+	Binary function class that performs case-Insensitive string compares.
+*/
+struct NxsStringEqual
+  : public std::binary_function<NxsString, NxsString, bool>
+	{
+	bool operator()(const NxsString &x, const NxsString &y) const;
+	};
+
+// ############################# start NStrCaseInsensitiveEquals functions ##########################
+
+/*!
+	Creates a function object for case-insensitive comparisons of `s' to a container of strings.
+*/
+inline NStrCaseInsensitiveEquals::NStrCaseInsensitiveEquals(
+  const NxsString &s)	/* the string to be compared */
+	{
+	compStr = s;
+	compStr.Capitalize();
+	}
+
+/*!
+	Returns the result of a case-sensitive compare of `s' and the string stored when the NStrCaseInsensitiveEquals object
+	was created. Could be made more efficient (currently capitalizes the entire argument even though the first character may
+	be wrong).
+*/
+inline bool NStrCaseInsensitiveEquals::operator()(
+  const NxsString &s)	/* the string to be compared */
+	{
+	if (s.length() == compStr.length())
+		{
+		NxsString capS(s);
+		capS.Capitalize();
+		return capS == compStr;
+		}
+	return false;
+	}
+
+// ############################# start NStrCaseSensitiveEquals functions ##########################
+
+/*!
+	Creates a function object for case-sensitive comparisons of `s' to a container of strings.
+*/
+inline NStrCaseSensitiveEquals::NStrCaseSensitiveEquals(
+  const NxsString &s)	/* the string that all other strings will be compared to when the (const NxsString &) operator is called */
+	{
+	compStr = s;
+	}
+
+/*!
+	Returns the result of a case-sensitive compare of `s' and the string stored when the NStrCaseSensitiveEquals was
+	created.
+*/
+inline bool NStrCaseSensitiveEquals::operator()(
+  const NxsString &s)	/* the string to be compared */
+  const
+	{
+	return (compStr == s);
+	}
+
+// ############################# start NxsStringEqual functions ##########################
+
+/*!
+	Returns true if the strings `x' and `y' are identical (NOT case sensitive)
+*/
+inline bool NxsStringEqual::operator()(
+  const NxsString &x,	/* first string */
+  const NxsString &y)	/* second string to be compared with `x' */
+  const
+	{
+	return x.EqualsCaseInsensitive(y);
+	}
+
+// ############################# start NxsString functions ##########################
+
+/*!
+	The default constructor.
+*/
+inline NxsString::NxsString()
+	{
+	}
+
+/*!
+	Returns a single-quoted version of the NxsString. The calling object is not altered. Written for ease of use. Simply
+	copies the stored string, then returns the copy after calling its AddQuotes function.
+*/
+inline NxsString NxsString::GetQuoted()
+  const
+	{
+	NxsString s(*this);
+	s.AddQuotes();
+	return s;
+	}
+
+/*!
+	Most containers in the standard template library can be completely erased using the clear function, but none is
+	provided for the class string and hence is provided here.
+*/
+inline void NxsString::clear()
+	{
+	erase();
+	}
+
+/*!
+	Returns true if the Equals comparison function is true for this or any element in the vector `s'.
+*/
+inline bool NxsString::IsInVector(
+  const NxsStringVector &s, /* the vector of NxsString objects to be searched */
+  NxsString::CmpEnum mode)	/* the argument passed to the Equals function, which is called for every element in the vector `s' */
+  const
+	{
+	for (NxsStringVector::const_iterator sIt = s.begin(); sIt != s.end(); sIt++)
+		{
+		if (Equals(*sIt, mode))
+			return true;
+		}
+	return false;
+	}
+
+/*!
+	A copy constructor taking a C-string argument.
+*/
+inline NxsString::NxsString(
+  const char *s)	/* the C-string that forms the basis for the new NxsString object */
+	{
+	assign(s);
+	}
+
+/*!
+	A copy constructor taking a NxsString reference argument.
+*/
+inline NxsString::NxsString(
+  const NxsString &s)	/* reference to a NxsString to be used to create this copy */
+  :std::string()
+	{
+	assign(s);
+	}
+
+/*!
+	Sets the stored string equal to the supplied C-string `s'.
+*/
+inline NxsString &NxsString::operator=(
+  const char *s)	/* the string for comparison */
+	{
+	assign(s);
+	return *this;
+	}
+
+//inline NxsString& NxsString::operator=(
+//  const NxsString &s)
+//	{
+//	assign(s);
+//	return *this;
+//	}
+
+/*!
+	Appends the supplied C-string `s' to the stored string.
+*/
+inline NxsString &NxsString::operator+=(
+  const char *s)	/* the C-string to be appended */
+	{
+	append(std::string(s));
+	return *this;
+	}
+
+/*!
+	Appends the characters in the supplied NxsString reference `s' to the stored string.
+*/
+inline NxsString &NxsString::operator+=(
+  const std::string &s)	/* the string to append */
+	{
+	append(s);
+	return *this;
+	}
+
+/*!
+	Appends the character `c' to the stored string.
+*/
+inline NxsString &NxsString::operator+=(
+  const char c)	/* the character to append */
+	{
+	char s[2];
+	s[0] = c;
+	s[1] = '\0';
+	append(std::string(s));
+	return *this;
+	}
+
+/*!
+	Sets the stored std::string to the supplied character 'c'.
+*/
+inline NxsString &NxsString::operator=(
+  char c)	/* the character to which the stored std::string should be set */
+	{
+	clear();
+	return (*this += c);
+	}
+
+/*!
+	Uses the standard C sprintf function to append the character representation of the supplied integer i' to the stored
+	string (format code %d). For example, if the stored string is "taxon" and `i' is 9, the result is "taxon9".
+*/
+inline NxsString &NxsString::operator+=(
+  const int i)	/* the int to append */
+	{
+	char tmp[81];
+	std::sprintf(tmp, "%d", i);
+	append(tmp);
+	return *this;
+	}
+
+/*!
+	Capitalizes all lower case letters in the stored string by calling ToUpper.
+*/
+inline NxsString &NxsString::Capitalize()
+	{
+	ToUpper();
+	return *this;
+	}
+
+/*!
+	Returns true if the stored string is an abbreviation (or complete copy) of the supplied string `s'.
+*/
+inline bool NxsString::Abbreviates(
+  const NxsString	&s,		/* the full comparison string */
+  NxsString::CmpEnum	mode)	/* if equal to abbrev, a non-case-sensitive comparison will be made, otherwise comparison will respect case */
+  const
+	{
+	if (mode == NxsString::abbrev)
+		return IsCapAbbreviation(s);
+	else
+		return IsStdAbbreviation(s, mode == respect_case);
+	}
+
+/*!
+	Uses standard C function std::sprintf to append the unsigned integer `i' to the stored string (format code %u).
+*/
+inline NxsString& NxsString::operator+=(
+  unsigned i)	/* the integer to be appended */
+	{
+	char tmp[81];
+	std::sprintf(tmp, "%u", i);
+	append(tmp);
+	return *this;
+	}
+
+/*!
+	Uses standard C function std::sprintf to append the long integer `l' to the stored string (format code %ld).
+*/
+inline NxsString& NxsString::operator+=(
+  const long l)	/* the long integer to be appended */
+	{
+	char tmp[81];
+	std::sprintf(tmp, "%ld", l);
+	append(tmp);
+	return *this;
+	}
+
+/*!
+	Uses standard C function std::sprintf to append the unsigned long integer `l' to the stored string (format code %lu).
+*/
+inline NxsString& NxsString::operator+=(
+  const unsigned long l)	/* the unsigned long integer to be appended */
+	{
+	char tmp[81];
+	std::sprintf(tmp, "%lu", l);
+	append(tmp);
+	return *this;
+	}
+
+/*!
+	Uses the mode argument to call (and return the result of) the correct string comparison function.
+*/
+inline bool NxsString::Equals(
+  const NxsString &s,		/* the string to which *this is compared */
+  NxsString::CmpEnum mode)	/* should be one of these three: respect_case, no_respect_case or abbrev */
+  const
+	{
+	switch (mode) {
+		case NxsString::respect_case :
+			return (strcmp(this->c_str(), s.c_str()) == 0);
+		case NxsString::no_respect_case :
+			return this->EqualsCaseInsensitive(s);
+		case NxsString::abbrev :
+			return this->IsCapAbbreviation(s);
+		default :
+			NCL_ASSERT(0);// incorrect setting for mode
+		}
+	return false;
+	}
+
+# if ! defined(HIDE_NCL_NXSSTRING_ENDL)
+
+	Allows functions that take and return references to NxsString strings to be placed in a series of << operators.
+	See the NxsString endl function.
+inline NxsString &NxsString::operator<<(
+  NxsString &(*funcPtr)(NxsString &))	/* pointer to a function returning a reference to a NxsString */
+	{
+	return funcPtr(*this);
+	}
+#endif
+
+/*!
+	Returns true if `c' is any Nexus punctuation character:
+>
+	()[]{}/\,;:=*'"`-+<>
+>
+*/
+inline bool NxsString::IsNexusPunctuation(
+  const char c)	/* the character in question */
+	{
+	return (strchr("()[]{}/\\,;:=*\'\"`-+<>", c) != NULL);
+	}
+
+
+/*! Returns true if `c' is any Newick punctuation character:
+>
+	()[]':;,
+>
+List of punctuation taken from the "unquoted labels may not contain" section 
+of http://evolution.genetics.washington.edu/phylip/newick_doc.html
+
+Thanks to Andrew Lenards for pointing out the need for this when dealing with
+non-NEXUS trees.
+ parentheses, square brackets,
+        single_quotes, colons, semicolons, or commas
+*/
+inline bool NxsString::IsNewickPunctuation(
+  const char c)	/* the character in question */
+	{
+	const bool v = (strchr("()[]':;,", c) != NULL);
+	return v;
+	}
+
+
+/*!
+	Creates a new string (and returns a reference to the new string) composed of the integer `i' followed by a space and
+	then the string `s'. If `i' is not 1, then an 's' character is appended to make `s' plural. For example, if `i' were 0,
+	1, or 2, and `s' is "character", then the returned string would be "0 characters", "1 character" or "2 characters",
+	respectively. Obviously this only works if adding an 's' to the supplied string makes it plural.
+*/
+inline NxsString &NxsString::NumberThenWord(
+  unsigned i,			/* the number */
+  const NxsString s)	/* the string needing to be pluralized */
+  	{
+	(*this).erase();
+  	*this << i << ' ' << s;
+  	if (i != 1)
+  		*this << 's';
+  	return *this;
+  	}
+
+/*!
+	Another way to call the += operator (written to make it possible to use a NxsString like an std::ostream)
+*/
+inline NxsString &NxsString::operator<<(
+  int i)	/* the integer to append */
+  	{
+  	return (*this += i);
+  	}
+
+/*!
+	Another way to call the += operator (written to make it possible to use a NxsString like an std::ostream)
+*/
+inline NxsString &NxsString::operator<<(
+  unsigned i)	/* the unsigned integer to append */
+	{
+	return (*this += (int) i);
+	}
+
+/*!
+	Another way to call the += operator (written to make it possible to use a NxsString like an std::ostream)
+*/
+inline NxsString &NxsString::operator<<(
+  long l)	/* the long integer to append */
+	{
+	return (*this += l);
+	}
+
+/*!
+	Another way to call the += operator (written to make it possible to use a NxsString like an std::ostream)
+*/
+inline NxsString &NxsString::operator<<(
+  unsigned long l)	/* the unsigned long integer to append */
+	{
+	return (*this += l);
+	}
+
+/*!
+	Another way to call the += operator (written to make it possible to use a NxsString like an std::ostream)
+*/
+inline NxsString &NxsString::operator<<(
+  double d)	/* the double floating point value to append */
+	{
+	return (*this += d);
+	}
+
+/*!
+	Another way to call the += operator (written to make it possible to use a NxsString like an std::ostream)
+*/
+inline NxsString &NxsString::operator<<(
+  const char *c)	/* the C-string to append */
+	{
+	return (*this += c);
+	}
+
+/*!
+	Another way to call the += operator (written to make it possible to use a NxsString like an std::ostream)
+*/
+inline NxsString &NxsString::operator<<(
+  char c)	/* the char to append */
+	{
+	return (*this += c);
+	}
+
+/*!
+	Another way to call the += operator (written to make it possible to use a NxsString like an std::ostream)
+*/
+inline NxsString &NxsString::operator<<(
+  const std::string &s)	/* the NxsString to append */
+	{
+	return (*this += s);
+	}
+
+
+
+/*!
+	Returns string as a Pascal string (array of unsigned characters with the length in the first byte).
+*/
+inline unsigned char *NxsString::p_str(
+  unsigned char *buffer)	/* buffer to receive current string in Pascal form (i.e. length in first byte) */
+  const
+	{
+	memmove(buffer + 1, c_str(), length());
+	buffer[0] = (unsigned char)length();
+	return buffer;
+	}
+
+
+
+// ############################# start of standalone functions ##########################
+
+# if ! defined(HIDE_NCL_NXSSTRING_ENDL)
+/* "Define HIDE_NCL_NXSSTRING_ENDL to hide the dangerous definition of endl from NCLs nxsstring.h" */
+/*!
+	Appends a newline character to the string `s' and the returns a reference to `s'. Used with << operator to allow
+	strings to be written to like std::ostreams.
+*/
+inline NxsString &endl(
+  NxsString &s)	/* the string to which the newline character is to be appended */
+	{
+	return (s += '\n');
+	}
+#endif
+
+/*!
+	Appends a newline character to the string `s' and the returns a reference to `s'. Used with << operator to allow
+	strings to be written to like std::ostreams.
+*/
+inline NxsString &nxsendl(
+  NxsString &s)	/* the string to which the newline character is to be appended */
+	{
+	return (s += '\n');
+	}
+
+/*!
+	Writes the string `s' to the std::ostream `out'.
+*/
+inline std::ostream &operator<<(
+  std::ostream &out,			/* the stream to which the string `s' is to be written */
+  const NxsString &s)	/* the string to write */
+	{
+	out << s.c_str();
+	return out;
+	}
+
+NxsStringVector 	BreakPipeSeparatedList(const NxsString &strList);
+NxsStringVector 	GetVecOfPossibleAbbrevMatches(const NxsString &testStr,const NxsStringVector &possMatches);
+bool 				SetToShortestAbbreviation(NxsStringVector &strVec, bool allowTooShort = false);
+
+#endif
diff --git a/src/ncl/nxstaxaassociationblock.h b/src/ncl/nxstaxaassociationblock.h
new file mode 100644
index 0000000..e7bd99e
--- /dev/null
+++ b/src/ncl/nxstaxaassociationblock.h
@@ -0,0 +1,139 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#ifndef NCL_NXSTAXAASSOCIATIONBLOCK_H
+#define NCL_NXSTAXAASSOCIATIONBLOCK_H
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxstaxablock.h"
+
+/*! 
+*/
+class NxsTaxaAssociationBlockAPI
+  : public NxsBlock
+  {
+  public:
+		virtual NxsTaxaBlockAPI * GetFirstTaxaBlock() const = 0;
+		virtual NxsTaxaBlockAPI * GetSecondTaxaBlock() const = 0;
+		virtual std::set<unsigned> GetAssociatesForTaxonInFirstBlock(unsigned) const = 0;
+		virtual std::set<unsigned> GetAssociatesForTaxonInSecondBlock(unsigned) const = 0;
+  };
+
+/*! The default implementation of the NxsTaxaBlockAPI that is used to parse TAXA blocks into a list of
+	unique (case-insensitive) labels.
+
+*/
+class NxsTaxaAssociationBlock
+  : public NxsTaxaAssociationBlockAPI
+	{
+        typedef std::map<unsigned, std::set<unsigned> > AssociationMap;
+	public:
+							NxsTaxaAssociationBlock();
+		virtual				~NxsTaxaAssociationBlock() {}
+
+		virtual void AddAssociation(unsigned firstIndex, const std::set<unsigned> & secIndices) {
+		    std::set<unsigned> & former = this->firstToSecond[firstIndex];
+		    for (std::set<unsigned>::const_iterator sIt = secIndices.begin(); sIt != secIndices.end(); ++sIt) {
+		        const unsigned & secIndex = *sIt;
+		        former.insert(secIndex);
+		        this->secondToFirst[secIndex].insert(firstIndex);
+		    }
+		}
+		
+		
+		virtual void SetFirstTaxaBlock(NxsTaxaBlockAPI *f) {
+		    NxsTaxaBlockAPI * s = this->secondTaxaBlock;
+		    this->Reset();
+		    this->secondTaxaBlock = s;
+		    this->firstTaxaBlock = f;
+		}
+		virtual NxsTaxaBlockAPI * GetFirstTaxaBlock() const {
+		    return this->firstTaxaBlock;
+		}
+		virtual void SetSecondTaxaBlock(NxsTaxaBlockAPI *s) {
+		    NxsTaxaBlockAPI * f = this->firstTaxaBlock;
+		    this->Reset();
+		    this->secondTaxaBlock = s;
+		    this->firstTaxaBlock = f;
+		}
+		virtual NxsTaxaBlockAPI * GetSecondTaxaBlock() const {
+		    return this->secondTaxaBlock;
+		}
+		virtual std::set<unsigned> GetAssociatesForTaxonInFirstBlock(unsigned i) const {
+		    AssociationMap::const_iterator m = this->firstToSecond.find(i);
+		    if (m == this->firstToSecond.end()) {
+		        return std::set<unsigned>();
+		    }
+		    return m->second;
+		}
+		virtual std::set<unsigned> GetAssociatesForTaxonInSecondBlock(unsigned i) const  {
+		    AssociationMap::const_iterator m = this->secondToFirst.find(i);
+		    if (m == this->secondToFirst.end()) {
+		        return std::set<unsigned>();
+		    }
+		    return m->second;
+		}
+
+		virtual void		Report(std::ostream &out) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void 		Reset();
+		void				WriteAsNexus(std::ostream &out) const;
+
+
+		NxsTaxaAssociationBlock &operator=(const NxsTaxaAssociationBlock &other)
+			{
+			Reset();
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			CopyTaxaAssociationContents(other);
+			return *this;
+			}
+
+		void CopyTaxaAssociationContents(const NxsTaxaAssociationBlock &other)
+			{
+			firstToSecond = other.firstToSecond;
+			secondToFirst = other.secondToFirst;
+			firstTaxaBlock = other.firstTaxaBlock;
+			secondTaxaBlock = other.secondTaxaBlock;
+			}
+		NxsTaxaAssociationBlock * Clone() const
+			{
+			NxsTaxaAssociationBlock *taxa = new NxsTaxaAssociationBlock();
+			*taxa = *this;
+			return taxa;
+			}
+	protected:
+        AssociationMap firstToSecond;
+        AssociationMap secondToFirst;
+        NxsTaxaBlockAPI * firstTaxaBlock;
+        NxsTaxaBlockAPI * secondTaxaBlock;
+
+		virtual void 	Read(NxsToken &token);
+        void HandleTaxaCommand(NxsToken &token);
+        void HandleAssociatesCommand(NxsToken &token);
+        NxsTaxaBlockAPI * ProcessTaxaBlockName(const NxsString & value,  NxsToken &token) const;
+
+};
+
+class NxsTaxaAssociationBlockFactory
+	:public NxsBlockFactory
+	{
+	public:
+		virtual NxsTaxaAssociationBlock  *	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, NxsReader *reader, NxsToken *token);
+	};
+
+#endif
+
diff --git a/src/ncl/nxstaxablock.h b/src/ncl/nxstaxablock.h
new file mode 100644
index 0000000..8fd2498
--- /dev/null
+++ b/src/ncl/nxstaxablock.h
@@ -0,0 +1,484 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#ifndef NCL_NXSTAXABLOCK_H
+#define NCL_NXSTAXABLOCK_H
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsblock.h"
+
+/*! This abstract class describes the interface that every block that wants to serve
+	as a reader of NEXUS TAXA blocks should fulfill.
+
+	A parsed taxa block in NEXUS is essentially a list of unique taxon labels.
+
+	When compared the label comparison is not case-sensitive.
+
+	The taxon can be referred to in NEXUS files by its number (numbering starting at 1), or its label.
+*/
+class NxsTaxaBlockAPI
+  : public NxsBlock, public NxsLabelToIndicesMapper
+  {
+  public:
+		/*! Adds taxon label 's' to end of list of taxon labels and increments dimNTax by 1.
+
+			\returns the (0-based) index of taxon label just added.
+		*/
+		virtual unsigned			AddTaxonLabel(const std::string & s) = 0;
+		/*! Changes the label for taxon `i` to `s`
+			The index `i` should be 0-based.
+			\throws NxsNCLAPIException if `i` is out of range.
+			\throws DuplicatedLabelNxsException if the label is already in the block
+			\throws NxsException if the label is not a legal taxon name (eg. it is a punctation character).
+		*/
+		virtual void  				ChangeTaxonLabel(unsigned i, NxsString s) = 0;  /*v2.1to2.2 4 */
+		/*! \returns the 0-based index of taxon named 's' in taxonLabels list.
+
+			\throws NxsX_NoSuchTaxon exception if taxon named 's' cannot be found.
+
+			\warning {This function does NOT implement the interpret-label-as-a-number functionality}
+		*/
+		virtual unsigned			FindTaxon(const NxsString & label) const  = 0;  /*v2.1to2.2 4 */
+		/*! \returns true if the label `label` already a taxon label (not case sensitive)
+		*/
+		virtual bool  				IsAlreadyDefined(const std::string & label) = 0;
+		/*! \returns the length of the longest label.
+
+			\note {The label length, does not include any extra characters (such as quotes) that may be needed to write the file
+			out to NEXUS format}
+		*/
+		virtual unsigned			GetMaxTaxonLabelLength() = 0;
+		/*! \returns the number of taxon labels (should be the same as GetNTax after a valid parse).
+		*/
+		virtual unsigned			GetNumTaxonLabels() const = 0;
+		/*! \returns the number of taxa
+		*/
+		virtual unsigned			GetNTax() const = 0;
+		/*! \returns the number of taxa
+			The prescence of this function and GetNTax is a historical artifact.  They have the same behavior.
+		*/
+		virtual unsigned			GetNTaxTotal() const = 0;
+		/*! \returns the label for taxon with (0-based) index of `i`
+
+			\throws NxsNCLAPIException if `i` is out of range.
+			\warning{Can return a 1-based number if the taxon lacks a name (in NEXUS parsing "1" refers to the first taxon).}
+		*/
+		virtual NxsString 			GetTaxonLabel(unsigned i) const = 0;  /*v2.1to2.2 4 */
+
+		/*! \returns a vector of all of the taxon labels */
+		virtual std::vector<std::string> GetAllLabels() const;
+
+		/*! \returns a 1-based number of the taxon with label of `label` (not case-sensitive).
+			This is a low-level function not intended for widespread use (it is faster way to
+			query the label list because it does not throw exceptions or do the numeric interpretation
+			of labels).
+
+			\warning{does NOT apply the numeric interpretation of the label.}
+
+			\warning{ 1-based numbering}
+		*/
+		virtual unsigned			TaxLabelToNumber(const std::string &label) const = 0;
+
+		/*! hook called during NxsTaxaBlock::Read() when the TaxLabels command is encountered */
+		virtual void 				HandleTaxLabels(NxsToken &token) = 0;
+		/*! writes the taxon labes as NEXUS to `out` */
+		virtual void 				WriteTaxLabelsCommand(std::ostream &out) const = 0;
+		/*! Sets the number of taxa to be included in the block
+
+			\warning{This can cause the cropping of labels.}
+		*/
+		virtual void 				SetNtax(unsigned n) = 0;
+
+		/*! \returns the number of taxa that have not been inactivated by calling InactivateTaxa
+		*/
+		virtual unsigned		 	GetNumActiveTaxa() const = 0;
+		/*! \returns true if the taxon (denoted by a 0-based index) is in range and not inactivated
+
+			\note{Works, but is not currently used by the library added for planned delete/restor functionality}
+		*/
+		virtual bool		 		IsActiveTaxon(unsigned i) const = 0;
+		/*! flags a set of taxa as inactive. Takes a set of 0-based indices.
+
+			\note{Works, but is not currently used by the library added for planned delete/restor functionality}
+		*/
+		virtual unsigned		 	InactivateTaxa(const std::set<unsigned> &s) = 0;
+		/*! flags a set of taxa as active. Takes a set of 0-based indices.
+
+			\note{Works, but is not currently used by the library added for planned delete/restor functionality}
+		*/
+		virtual unsigned		 	ActivateTaxa(const std::set<unsigned> &) = 0;
+		/*! flags a taxon as active. Takes a  0-based index.
+
+			\note{Works, but is not currently used by the library added for planned delete/restor functionality}
+		*/
+		virtual unsigned		 	InactivateTaxon(unsigned ) = 0;
+		/*! flags a taxon as active. Takes a  0-based index.
+
+			\note{Works, but is not currently used by the library added for planned delete/restor functionality}
+		*/
+		virtual unsigned		 	ActivateTaxon(unsigned ) = 0;
+
+  };
+
+/*! The default implementation of the NxsTaxaBlockAPI that is used to parse TAXA blocks into a list of
+	unique (case-insensitive) labels.
+
+*/
+class NxsTaxaBlock
+  : public NxsTaxaBlockAPI
+	{
+	friend class NxsDataBlock;
+	friend class NxsAllelesBlock;
+	friend class NxsCharactersBlock;
+	friend class NxsDistancesBlock;
+
+	public:
+							NxsTaxaBlock();
+		virtual				~NxsTaxaBlock();
+
+		virtual unsigned	AddTaxonLabel(const std::string & s);
+		void  				ChangeTaxonLabel(unsigned i, NxsString s); /*v2.1to2.2 4 */
+		unsigned			TaxLabelToNumber(const std::string &label) const;
+		unsigned			FindTaxon(const NxsString & label) const;  /*v2.1to2.2 4 */
+		bool  				IsAlreadyDefined(const std::string &label);
+		unsigned GetIndexSet(const std::string &label, NxsUnsignedSet * toFill) const
+			{
+			return NxsLabelToIndicesMapper::GetIndicesFromSets(label, toFill, taxSets);
+			}
+		unsigned			GetMaxTaxonLabelLength();
+		unsigned			GetNTax() const;
+		unsigned			GetNTaxTotal() const;
+		unsigned			GetNumTaxonLabels() const;
+		NxsString 			GetTaxonLabel(unsigned i) const;  /*v2.1to2.2 4 */
+		void 				HandleTaxLabels(NxsToken &token);
+		bool 				NeedsQuotes(unsigned i);
+		virtual void		Report(std::ostream &out) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void 		Reset();
+		void 				SetNtax(unsigned n);
+		void				WriteAsNexus(std::ostream &out) const;
+
+
+		virtual unsigned		 	GetNumActiveTaxa() const;
+		virtual bool		 		IsActiveTaxon(unsigned i) const;
+		virtual unsigned		 	InactivateTaxa(const std::set<unsigned> &);
+		virtual unsigned		 	ActivateTaxa(const std::set<unsigned> &);
+		virtual unsigned		 	InactivateTaxon(unsigned );
+		virtual unsigned		 	ActivateTaxon(unsigned );
+
+		class NxsX_NoSuchTaxon {};	/* thrown if FindTaxon cannot locate a supplied taxon label in the taxLabels vector */
+
+		void 				WriteTaxLabelsCommand(std::ostream &out) const;
+
+		unsigned GetMaxIndex() const;
+		unsigned GetNumLabelsCurrentlyStored() const;
+		unsigned GetIndicesForLabel(const std::string &label, NxsUnsignedSet *inds) const;
+		bool AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds);
+		bool AddNewPartition(const std::string &label, const NxsPartition & inds);
+
+		/*----------------------------------------------------------------------
+		| AppendNewLabel should not be called in most client code.  It is only
+		| 	added because some blocks create their own taxa block on-the-fly.
+		*/
+		virtual unsigned AppendNewLabel(std::string &label)
+			{
+			while (dimNTax <= taxLabels.size())
+				dimNTax++;
+			return AddTaxonLabel(label);
+			}
+
+		NxsTaxaBlock &operator=(const NxsTaxaBlock &other)
+			{
+			Reset();
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			CopyTaxaContents(other);
+			return *this;
+			}
+
+		void CopyTaxaContents(const NxsTaxaBlock &other)
+			{
+			taxLabels = other.taxLabels;
+			labelToIndex = other.labelToIndex;
+			dimNTax = other.dimNTax;
+			taxSets = taxSets;
+			taxPartitions = other.taxPartitions;
+			inactiveTaxa = other.inactiveTaxa;
+			}
+		NxsTaxaBlock * Clone() const
+			{
+			NxsTaxaBlock *taxa = new NxsTaxaBlock();
+			*taxa = *this;
+			return taxa;
+			}
+	protected:
+		NxsStringVector	taxLabels;	/* storage for list of taxon labels */
+		std::map<std::string, unsigned> labelToIndex;
+		unsigned		dimNTax;
+		NxsUnsignedSetMap taxSets;
+		NxsPartitionsByName taxPartitions;
+		std::set<unsigned> inactiveTaxa;
+
+		virtual void 	Read(NxsToken &token);
+		void CheckCapitalizedTaxonLabel(const std::string &s) const;
+		unsigned CapitalizedTaxLabelToNumber(const std::string & s) const;
+		void 			RemoveTaxonLabel(unsigned taxInd);
+	};
+
+
+/*! This class is the base class for blocks that can (in a pinch) serve as
+a TAXA block reader(NxsCharactersBlock, NxsTreesBlock, NxsUnalignedBlock, and NxsDistancesBlock)
+
+	Client code rarely needs to call the special functionality of this class.
+
+	In broad terms, this class delegates calls to the taxa block if there is an
+		external taxa block.
+	If no external taxa block has been associated with the NxsTaxaBlockSurrogate, then
+		will create one (which will later be returned to the NxsReader as an
+		implied block)
+*/
+class NxsTaxaBlockSurrogate
+	{
+	public:
+		void SetCreateImpliedBlock(bool v)
+        	{
+        	createImpliedBlock = v;
+        	}
+
+		/*! \returns an integer that will be a facet of NxsBlockLinkStatus which reflects how the taxa block was determined.*/
+		int GetTaxaLinkStatus() const
+			{
+			return taxaLinkStatus;
+			}
+
+		void SetTaxaLinkStatus(NxsBlock::NxsBlockLinkStatus s);
+		NxsTaxaBlockAPI * GetTaxaBlockPtr(int *status) const;
+		NxsTaxaBlockAPI * GetTaxaBlockPtr() {return GetTaxaBlockPtr(0L);}
+
+		virtual const std::string & GetBlockName() const = 0;
+
+		virtual unsigned			GetNTax() const;
+		virtual unsigned			GetNTaxTotal() const;
+		virtual unsigned			GetNumActiveTaxa() const;
+		virtual bool				IsActiveTaxon(unsigned i) const;
+		virtual unsigned			InactivateTaxa(const std::set<unsigned> &);
+		virtual unsigned			ActivateTaxa(const std::set<unsigned> &);
+		virtual unsigned			InactivateTaxon(unsigned );
+		virtual unsigned			ActivateTaxon(unsigned );
+
+		NxsTaxaBlockSurrogate &operator=(const NxsTaxaBlockSurrogate &other)
+			{
+			ResetSurrogate();
+			CopyTaxaBlockSurrogateContents(other);
+			return *this;
+			}
+
+		/*
+		|  Aliases the same taxa block as `other`, but `other` retains ownership!!
+		*/
+		virtual void CopyTaxaBlockSurrogateContents(const NxsTaxaBlockSurrogate &other)
+			{
+			ResetSurrogate();
+			taxa = other.taxa;
+			taxaLinkStatus = other.taxaLinkStatus;
+			newtaxa = other.newtaxa;
+			ownsTaxaBlock = false;
+			passedRefOfOwnedBlock = other.passedRefOfOwnedBlock;
+			createImpliedBlock = other.createImpliedBlock;
+			nxsReader = other.nxsReader;
+			}
+
+	protected:
+		NxsTaxaBlockSurrogate(NxsTaxaBlockAPI *tb, NxsReader * reader)
+			:taxa(tb),
+			newtaxa(false),
+			ownsTaxaBlock(false),
+			passedRefOfOwnedBlock(false),
+			createImpliedBlock(false),
+			nxsReader(reader)
+			{
+			taxaLinkStatus = (tb == NULL ? NxsBlock::BLOCK_LINK_UNINITIALIZED : NxsBlock::BLOCK_LINK_TO_ONLY_CHOICE);
+			}
+		virtual ~NxsTaxaBlockSurrogate()
+			{
+			ResetSurrogate();
+			}
+
+		void SetTaxaBlockPtr(NxsTaxaBlockAPI *c, NxsBlock::NxsBlockLinkStatus s);
+
+		VecBlockPtr GetCreatedTaxaBlocks();
+		void FlagTaxaBlockAsUsed()
+			{
+			taxaLinkStatus |= NxsBlock::BLOCK_LINK_USED;
+			}
+
+		void AssureTaxaBlock(bool allocBlock, NxsToken &, const char *cmd);
+		void ResetSurrogate();
+		void SetNexusReader(NxsReader *nxsptr)
+			{
+			nxsReader = nxsptr;
+			}
+		virtual void 			HandleTaxLabels(NxsToken & token);
+		virtual void			HandleLinkTaxaCommand(NxsToken & );
+		virtual void			WriteLinkTaxaCommand(std::ostream &out) const;
+		bool					SurrogateSwapEquivalentTaxaBlock(NxsTaxaBlockAPI * tb);
+
+		NxsTaxaBlockAPI			*taxa;				/* pointer to the TAXA block in which taxon labels are stored */
+		int						taxaLinkStatus;
+		bool					newtaxa;
+		bool					ownsTaxaBlock;
+		bool					passedRefOfOwnedBlock;
+		bool					createImpliedBlock; /*if true and NEWTAXA is read in the DIMENSIONS command, then a new TaxaBlock will be allocated (instead of resetting the TAXA block). false by default.*/
+		NxsReader 				*nxsReader;
+	};
+// The following typedef maintains compatibility with existing code.
+// The TaxaBlock class name is deprecated; please use NxsTaxaBlock instead.
+//
+typedef NxsTaxaBlock TaxaBlock;
+class NxsTaxaBlockFactory
+	:public NxsBlockFactory
+	{
+	public:
+		virtual NxsTaxaBlock  *	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, NxsReader *reader, NxsToken *token);
+	};
+
+inline unsigned NxsTaxaBlock::GetNTax() const
+	{
+	return dimNTax;
+	}
+
+inline unsigned NxsTaxaBlock::GetNTaxTotal() const
+	{
+	return dimNTax;
+	}
+
+/*!	\returns a 1-based number of the taxon with label of `r` OR returns 0 to indicate that the label was not found.
+	Not for public usage
+
+	\warning{`r` must be capitalized for this function to work.}
+
+	\warning{does NOT apply the numeric interpretation of the label.}
+
+	\warning{ 1-based numbering}
+*/
+inline unsigned NxsTaxaBlock::CapitalizedTaxLabelToNumber(const std::string &r) const
+	{
+	std::map<std::string, unsigned>::const_iterator rIt = labelToIndex.find(r);
+	if (rIt == labelToIndex.end())
+		return 0;
+	return rIt->second + 1;
+	}
+
+
+/*!
+	Returns true if taxon `i' is active. If taxon `i' has been deleted, returns false. Assumes `i' is in the range
+	[0..`ntax').
+*/
+inline bool NxsTaxaBlockSurrogate::IsActiveTaxon(
+  unsigned taxInd) const	/* the taxon in question, in the range [0..`ntax') */
+	{
+	if (!taxa)
+	    throw NxsNCLAPIException("Calling IsActiveTaxon on uninitialized block");
+	return taxa->IsActiveTaxon(taxInd);
+	}
+inline unsigned NxsTaxaBlockSurrogate::GetNumActiveTaxa() const
+	{
+	if (!taxa)
+	    throw NxsNCLAPIException("Calling GetNumActiveTaxa on uninitialized block");
+	return taxa->GetNumActiveTaxa();
+	}
+inline unsigned NxsTaxaBlockSurrogate::InactivateTaxa(const std::set<unsigned> &s)
+	{
+	if (!taxa)
+	    throw NxsNCLAPIException("Calling InactivateTaxa on uninitialized block");
+	return taxa->InactivateTaxa(s);
+	}
+inline unsigned NxsTaxaBlockSurrogate::ActivateTaxa(const std::set<unsigned> &s)
+	{
+	if (!taxa)
+	    throw NxsNCLAPIException("Calling ActivateTaxa on uninitialized block");
+	return taxa->ActivateTaxa(s);
+	}
+
+inline unsigned NxsTaxaBlockSurrogate::InactivateTaxon(unsigned i)
+	{
+	if (!taxa)
+	    throw NxsNCLAPIException("Calling InactivateTaxon on uninitialized block");
+	return taxa->InactivateTaxon(i);
+	}
+
+inline unsigned NxsTaxaBlockSurrogate::ActivateTaxon(unsigned i)
+	{
+	if (!taxa)
+	    throw NxsNCLAPIException("Calling ActivateTaxon on uninitialized block");
+	return taxa->ActivateTaxon(i);
+	}
+
+inline unsigned NxsTaxaBlockSurrogate::GetNTax() const
+	{
+	if (!taxa)
+	    throw NxsNCLAPIException("Calling GetNTax on uninitialized block");
+	return taxa->GetNTax();
+	}
+
+inline unsigned NxsTaxaBlockSurrogate::GetNTaxTotal() const
+	{
+	if (!taxa)
+	    throw NxsNCLAPIException("Calling GetNTaxTotal on uninitialized block");
+	return taxa->GetNTaxTotal();
+	}
+
+
+inline unsigned NxsTaxaBlock::GetNumActiveTaxa() const
+	{
+	return GetNTax() - (unsigned)inactiveTaxa.size();
+	}
+
+inline bool NxsTaxaBlock::IsActiveTaxon(unsigned i) const
+	{
+	return i < GetNTax() && (inactiveTaxa.count(i) == 0);
+	}
+
+inline unsigned NxsTaxaBlock::InactivateTaxa(const std::set<unsigned> &s)
+	{
+	for (std::set<unsigned>::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
+		InactivateTaxon(*sIt);
+	return GetNumActiveTaxa();
+	}
+
+inline unsigned NxsTaxaBlock::ActivateTaxa(const std::set<unsigned> &s)
+	{
+	for (std::set<unsigned>::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
+		ActivateTaxon(*sIt);
+	return GetNumActiveTaxa();
+	}
+inline unsigned NxsTaxaBlock::InactivateTaxon(unsigned i)
+	{
+	if (i > GetNTax())
+		throw NxsNCLAPIException("Taxon index out of range in InactivateTaxon");
+	inactiveTaxa.insert(i);
+	return GetNumActiveTaxa();
+	}
+inline unsigned NxsTaxaBlock::ActivateTaxon(unsigned i)
+	{
+	if (i > GetNTax())
+		throw NxsNCLAPIException("Taxon index out of range in InactivateTaxon");
+	inactiveTaxa.erase(i);
+	return GetNumActiveTaxa();
+	}
+
+
+#endif
diff --git a/src/ncl/nxstoken.h b/src/ncl/nxstoken.h
new file mode 100644
index 0000000..4589133
--- /dev/null
+++ b/src/ncl/nxstoken.h
@@ -0,0 +1,876 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSTOKEN_H
+#define NCL_NXSTOKEN_H
+
+#include "ncl/nxsexception.h"
+class NxsToken;
+
+class NxsX_UnexpectedEOF: public NxsException
+	{
+	public:
+		NxsX_UnexpectedEOF(NxsToken &);
+	};
+
+
+/*!
+     General notes on NexusTokenizing
+
+
+  File position information (pos, line and column) refer to the end of the token.
+
+  Note 1:  the GetEmbeddedComments methods of ProcessedNxsToken and NxsToken can be tricky to use if detailed
+    position location of the comment is required.  A vector of "embedded comments" in the NCL context is a collection of
+    all comments that were encountered during a GetNextToken operation.  The behavior depends on whether the tokenizer
+    can tell if a section of text is has the potential to span comment. Potentially comment-spanning tokens have to be
+    read until a token-breaker is found.  Thus they include trailing comments.  Thus it is not always easy (or possible)
+    for client code to determine  whether a specifie comment belongs "with" a particular NEXUS token rather than the
+    previous or next token.
+    For example:
+  Text                            Result as (token, {"embedded comment"}) pairs    Explanation
+  ============================    =============================================   =====================================
+  ;a[1]b;                         (;, {}), (ab, {1}), (;, {})                     ab is a comment-spanning token
+  ;[1]a[2]b;                      (;, {}), (ab, {1, 2}), (;, {})                  tokenizer realizes that ; is always a single token
+                                                                                    so [1] is not encountered until the second GetNextToken() call.
+  a[1];[2]b;                      (a, {1}), (;, {}) (b, {2}), (;, {})             First GetNextToken() call reads "a" token until ; (thus reading "[1]")
+                                                                                    ; is a single character token, so no comments are read, thus making
+                                                                                    [2] part of the third GetNextToken call().
+
+    In some cases the comment position information and token position information may reveal the exact location of the
+    comments.  Fortunately the relative order of comments is retained and the exact position is rarely needed.\
+
+  Note 2: Using the NxsToken class with the saveCommandComments LabileFlag causes [&comment text here] comments to be
+    returned as tokens ONLY if they are not preceded by potentially comment-spanning tokens. This "feature" is new to
+	 NCL v2.1 and is the result of a bug-fix (previous versions of NCL incorrectly broke tokens at the start of any comment).
+
+  Text                      Result as in saveCommandComments mode                Explanation
+  ========================= =============================================      =====================================
+  =[&R](1,                  ("=",{}) ("&R", {}), ("(",{}), ("1",{}), (",",{})  [&R] is not in the middle of potentially-comment-spanning token.
+  a[&R]b,                   ("ab",{"&R"}), (",",{})                            [&R] is in the middle of comment-spanning token "ab"
+  a[&R],                    ("a",{"&R"}), (",",{})                             [&R] is in on the trailing end of a potentially-comment-spanning token "a"
+                                                                                the tokenizer
+    This wart makes it more tedious to deal with command comments. However it is tolerable becuase the only supported use of command
+    comments in NCL is after single-character tokens (for example after = in a tree descpription).
+
+    The NHX command comments are not processed by NCL, but they occur in contexts in which it will be possible to determine
+    the correct location of the comment  (though it is necessary to check the embedded comments when processing NHX trees):
+
+ Text                      Result as NOT IN saveCommandComments mode                        Explanation
+ ========================= ===================================================      =====================================
+ ):3.5[&&NHXtext],         (")",{}) (":", {}), ("3.5",{"&&NHXtext"}), (",",{})      "3.5" is potentially-comment-spanning, but the comment still
+                                                                                      is stored with other metadata for the same edge.
+ )[&&NHXtext],             (")",{}) (",", {"&&NHXtext"})                            NHX comment is parsed with the second token, but
+                                                                                      because , is NOT potentially-comment-spanning
+                                                                                      know that [&&NHXtext] must have preceded the comma (the
+                                                                                      token and comment column numbers would also make this clear.
+*/
+
+/*!
+ New in 2.1
+	 - See Note 2 above (bug-fix, but wart introduced).  This could lead to loss of backward compatibility if client
+		code relies of saveCommandComments in contexts in which command comments occur within potentially comment-spanning
+       tokens.
+	 - NxsComment class.
+    - Comments are stored in tokenization (the GetNextToken() call will trash the previous comments, so client code
+		must store comments if they are needed permanently).
+	 - NxsToken::SetEOFAllowed method added and SetEOFAllowed(false) is called when entering a block.  This means that
+		when parsing block contents the NxsToken tokenizer will raise an NxsException if it runs out of file (thus
+		Block reader code no longer needs to check for atEOF() constantly to issue an appropriate error).
+	 - ProcessedNxsToken class, NxsToken:ProcessAsSimpleKeyValuePairs, and NxsToken:ProcessAsCommand methods.  This makes
+		it easier to parse commands (by allowing random access to the tokens in a command). These methods are not appropriate
+		for very long commands (such as MATRIX) or commands that require fiddling with the tokenizing rules (such as disabling
+		the hyphen as a token breaker)
+	 - lazy NxsToken::GetFilePosition() and low level io operations dramatically speed up tokenization (~10-20 times faster).
+	 - some other utility functions were added, and some refactoring (delegation to NxsString) was done to clean up
+*/
+
+
+
+
+/*!
+   Storage for a comment text and (end of the comment) file position information
+*/
+class NxsComment
+	{
+	public:
+		NxsComment(const std::string & s, long lineNumber, long colNumber)
+			:body(s),
+			line(lineNumber),
+			col(colNumber)
+			{}
+		long		GetLineNumber() const
+			{
+			return line;
+			}
+		long 		GetColumnNumber() const
+			{
+			return col;
+			}
+		const std::string & GetText() const
+			{
+			return body;
+			}
+		void WriteAsNexus(std::ostream &out) const
+			{
+			out << '[' << body << ']';
+			}
+	private:
+		std::string body;
+		long line;
+		long col;
+	};
+
+
+/*!
+   Storage for a file position, line number and column number.
+*/
+class NxsTokenPosInfo
+	{
+	public:
+		NxsTokenPosInfo()
+			:pos(0),
+			line(-1),
+			col(-1)
+			{}
+		NxsTokenPosInfo(file_pos position, long lineno, long columnno)
+			:pos(position),
+			line(lineno),
+			col(columnno)
+			{}
+		NxsTokenPosInfo(const NxsToken &);
+
+		file_pos 	GetFilePosition() const
+			{
+			return pos;
+			}
+
+		long		GetLineNumber() const
+			{
+			return line;
+			}
+
+		long 		GetColumnNumber() const
+			{
+			return col;
+			}
+
+
+		file_pos	pos;	/* current file position */
+		long		line;	/* current line in file */
+		long		col;	/* column of current line */
+	};
+
+
+/*!
+	A structure for storing the name of a command and to maps of option names
+ 		to value strings.
+	Produced by ProcessedNxsToken::ParseSimpleCmd (see that commands comments for rules on how it parses a NEXUS
+	command into a NxsSimpleCommandStrings struct).
+*/
+class NxsSimpleCommandStrings
+	{
+	public:
+		typedef std::vector<std::string> VecString;
+		typedef std::list<VecString> MatString;
+		typedef std::pair<NxsTokenPosInfo, std::string> SingleValFromFile;
+		typedef std::pair<NxsTokenPosInfo, VecString > MultiValFromFile;
+		typedef std::pair<NxsTokenPosInfo, MatString > MatFromFile;
+		typedef std::map<std::string, SingleValFromFile> StringToValFromFile;
+		typedef std::map<std::string,  MultiValFromFile> StringToMultiValFromFile;
+		typedef std::map<std::string,  MatFromFile> StringToMatFromFile;
+
+		// Looks for k in opts and multiOpts. Returns all of the values
+		// 	for the command option (will be an empty vector of strings if the option was not found).
+		// Case-sensitive!
+		// If an option is in multiOpts and opts, then only the value from opts will be returned!
+		MultiValFromFile GetOptValue(const std::string &k) const
+			{
+			MultiValFromFile mvff;
+			StringToValFromFile::const_iterator s = this->opts.find(k);
+			if (s != this->opts.end())
+				{
+				const SingleValFromFile & v(s->second);
+				mvff.first = v.first;
+				mvff.second.push_back(v.second);
+				}
+			else
+				{
+				StringToMultiValFromFile::const_iterator m = this->multiOpts.find(k);
+				if (m != this->multiOpts.end())
+					{
+					const MultiValFromFile & mv(m->second);
+					mvff.first = mv.first;
+					mvff.second  = mv.second;
+					}
+				}
+			return mvff;
+			}
+
+		MatFromFile GetMatOptValue(const std::string & k) const
+			{
+			StringToMatFromFile::const_iterator mIt = this->matOpts.find(k);
+			if (mIt ==  this->matOpts.end())
+				return MatFromFile();
+			return mIt->second;
+			}
+
+		bool HasKey(const std::string k) const
+			{
+			if (this->opts.find(k) !=  this->opts.end())
+				return true;
+			return ((this->multiOpts.find(k) !=  this->multiOpts.end()) || (this->matOpts.find(k) !=  this->matOpts.end()));
+			}
+
+	  	std::string cmdName;
+	  	NxsTokenPosInfo cmdPos;
+		StringToValFromFile opts;
+		StringToMultiValFromFile multiOpts;
+		StringToMatFromFile matOpts;
+	};
+
+/*!
+   Storage for a single NEXUS token, and embedded comments, along with end-of-the-token file position information.
+*/
+class ProcessedNxsToken
+	{
+	public:
+		static void IncrementNotLast(std::vector<ProcessedNxsToken>::const_iterator & it,
+									 const std::vector<ProcessedNxsToken>::const_iterator &endIt,
+									 const char * context);
+		static NxsSimpleCommandStrings ParseSimpleCmd(const std::vector<ProcessedNxsToken> &, bool convertToLower);
+
+
+		ProcessedNxsToken(const NxsToken &t);
+
+		ProcessedNxsToken(std::string &s)
+			:token(s)
+			{}
+
+		ProcessedNxsToken(std::string &s, file_pos position,long lineno, long columnno)
+			:token(s),
+			posInfo(position, lineno, columnno)
+			{}
+
+		std::string GetToken() const
+			{
+			return token;
+			}
+
+		const std::vector<NxsComment> & GetEmbeddedComments() const
+			{
+			return embeddedComments;
+			}
+
+		NxsTokenPosInfo 	GetFilePosInfo() const
+			{
+			return posInfo;
+			}
+		const NxsTokenPosInfo & GetFilePosInfoConstRef() const
+			{
+			return posInfo;
+			}
+
+		file_pos 	GetFilePosition() const
+			{
+			return posInfo.GetFilePosition();
+			}
+
+		long		GetLineNumber() const
+			{
+			return posInfo.GetLineNumber();
+			}
+
+		long 		GetColumnNumber() const
+			{
+			return posInfo.GetColumnNumber();
+			}
+
+		bool		Equals(const char *c) const
+			{
+			return NxsString::case_insensitive_equals(token.c_str(), c);
+			}
+		bool		EqualsCaseSensitive(const char *c) const
+			{
+			return (strcmp(token.c_str(), c) == 0);
+			}
+
+		void 		SetEmbeddedComments(const std::vector<NxsComment> & c)
+			{
+			embeddedComments = c;
+			}
+
+		void WriteAsNexus(std::ostream &out) const
+			{
+			for(std::vector<NxsComment>::const_iterator cIt = embeddedComments.begin(); cIt != embeddedComments.end(); ++cIt)
+				cIt->WriteAsNexus(out);
+			out << NxsString::GetEscaped(token);
+			}
+	private:
+		std::string token;
+		NxsTokenPosInfo posInfo;
+		std::vector<NxsComment> embeddedComments; /* comments that were processed in the same GetToken operation that created this token. */
+	};
+
+/*!
+  ProcessedNxsCommand is merely of a collection of ProcessedNxsToken objects. The NxsToken object can use a ; as a
+	separator to parse of its input stream until the next ";" and return a ProcessedNxsCommand.
+
+	See NxsToken::ProcessAsCommand method.
+*/
+typedef std::vector<ProcessedNxsToken> ProcessedNxsCommand;
+bool WriteCommandAsNexus(std::ostream &, const ProcessedNxsCommand &);
+
+
+/**---------------------------------------------------------------------------------------------------------------------
+	NxsToken objects are used by NxsReader to extract words (tokens) from a NEXUS data file. NxsToken objects know to
+	correctly skip NEXUS comments and understand NEXUS punctuation, making reading a NEXUS file as simple as repeatedly
+	calling the GetNextToken() function and then interpreting the token returned. If the token object is not attached
+	to an input stream, calls to GetNextToken() will have no effect. If the token object is not attached to an output
+	stream, output comments will be discarded (i.e., not output anywhere) and calls to Write or Writeln will be
+	ineffective. If input and output streams have been attached to the token object, however, tokens are read one at a
+	time from the input stream, and comments are correctly read and either written to the output stream (if an output
+	comment) or ignored (if not an output comment). Sequences of characters surrounded by single quotes are read in as
+	single tokens. A pair of adjacent single quotes are stored as a single quote, and underscore characters are stored
+	as blanks.
+*/
+class NxsToken
+	{
+	public:
+		static std::string	EscapeString(const std::string &);
+		static bool 		NeedsQuotes(const std::string &);
+		static std::string	GetQuoted(const std::string &);
+		static void 		DemandEndSemicolon(NxsToken &token, NxsString & errormsg, const char *contextString);
+		static unsigned 	DemandPositiveInt(NxsToken &token, NxsString & errormsg, const char *contextString);
+		static std::map<std::string, std::string> ParseAsSimpleKeyValuePairs(const ProcessedNxsCommand & tv, const char *cmdName);
+
+		static std::vector<ProcessedNxsToken> Tokenize(const std::string & );
+
+		enum NxsTokenFlags	/* For use with the variable labileFlags */
+			{
+			saveCommandComments		= 0x0001,	/* if set, command comments of the form [&X] are not ignored but are instead saved as regular tokens (without the square brackets, however) */
+			parentheticalToken		= 0x0002,	/* if set, and if next character encountered is a left parenthesis, token will include everything up to the matching right parenthesis */
+			curlyBracketedToken		= 0x0004,	/* if set, and if next character encountered is a left curly bracket, token will include everything up to the matching right curly bracket */
+			doubleQuotedToken		= 0x0008,	/* if set, grabs entire phrase surrounded by double quotes */
+			singleCharacterToken	= 0x0010,	/* if set, next non-whitespace character returned as token */
+			newlineIsToken			= 0x0020,	/* if set, newline character treated as a token and atEOL set if newline encountered */
+			tildeIsPunctuation		= 0x0040,	/* if set, tilde character treated as punctuation and returned as a separate token */
+			useSpecialPunctuation	= 0x0080,	/* if set, character specified by the data member special is treated as punctuation and returned as a separate token */
+			hyphenNotPunctuation	= 0x0100,	/* if set, the hyphen character is not treated as punctutation (it is normally returned as a separate token) */
+			preserveUnderscores		= 0x0200,	/* if set, underscore characters inside tokens are not converted to blank spaces (normally, all underscores are automatically converted to blanks) */
+			ignorePunctuation		= 0x0400,	/* if set, the normal punctuation symbols are treated the same as any other darkspace characters */
+			spaceDoesNotBreakToken  = 0x0800    /* if set, then internal spaces in a token will not caus it to be broken (useful for reading newick strings which do not have the correct quoting or _ for spaces) */
+			};
+
+		NxsString		errormsg;
+
+						NxsToken(std::istream &i);
+		virtual			~NxsToken();
+
+		bool			AtEOF();
+		bool			AtEOL();
+		bool			Abbreviation(NxsString s);
+		bool			Begins(NxsString s, bool respect_case = false);
+		void			BlanksToUnderscores();
+		bool			Equals(NxsString s, bool respect_case = false) const;
+		bool		EqualsCaseSensitive(const char *c) const
+			{
+			return (strcmp(token.c_str(), c) == 0);
+			}
+
+		long			GetFileColumn() const;
+		file_pos		GetFilePosition() const;
+		long			GetFileLine() const;
+		void			GetNextToken();
+		NxsString		GetToken(bool respect_case = true);
+		const char		*GetTokenAsCStr(bool respect_case = true);
+		const NxsString	&GetTokenReference() const;
+		int				GetTokenLength() const;
+		bool			IsPlusMinusToken();
+		bool			IsPunctuationToken();
+		bool			IsWhitespaceToken();
+		bool			IsPlusMinusToken(const std::string & t);
+		bool			IsPunctuationToken(const std::string & t);
+		bool			IsWhitespaceToken(const std::string & t);
+		std::map<std::string, std::string> ProcessAsSimpleKeyValuePairs(const char *cmdName);
+		void 			ProcessAsCommand(ProcessedNxsCommand *tokenVec);
+		void			ReplaceToken(const NxsString s);
+		void			ResetToken();
+		void			SetSpecialPunctuationCharacter(char c);
+		void			SetLabileFlagBit(int bit);
+		bool			StoppedOn(char ch);
+		void			StripWhitespace();
+		void			ToUpper();
+		void			Write(std::ostream &out);
+		void			Writeln(std::ostream &out);
+
+		virtual void	OutputComment(const NxsString &msg);
+
+		void			SetEOFAllowed(bool e)
+			{
+			eofAllowed = e;
+			}
+		bool			GetEOFAllowed() const
+			{
+			return eofAllowed;
+			}
+		void			SetBlockName(const char *);
+		std::string 	GetBlockName();
+		const std::vector<NxsComment> & GetEmbeddedComments() const
+			{
+			return embeddedComments;
+			}
+		char			PeekAtNextChar() const;
+		
+		/// Calling with `true` will force the NxsToken to only consider newick's
+		//		punctuation characters to be punctuation (newick's punctuation
+		//		chars are ()[]':;, this is a subset of NEXUS punctuation.
+		//	Calling with `false` will restore NEXUS punctuation rules.
+		void UseNewickTokenization(bool v);
+
+	protected:
+
+		void			AppendToComment(char ch);
+		void			AppendToToken(char ch);
+		bool			GetComment();
+		void			GetCurlyBracketedToken();
+		void			GetDoubleQuotedToken();
+		void			GetQuoted();
+		void			GetQuotedWithInternalSingleQuotesDoubled();
+		void			GetParentheticalToken();
+		bool			IsPunctuation(char ch);
+		bool			IsWhitespace(char ch);
+
+	private:
+		void AdvanceToNextCharInStream();
+		char			GetNextChar();
+		//char ReadNextChar();
+
+		std::istream	&inputStream;		/* reference to input stream from which tokens will be read */
+		signed char		nextCharInStream;
+		file_pos		posOffBy;			/* offset of the file pos (according to the stream) and the tokenizer (which is usually a character or two behind, due to saved chars */
+		file_pos		usualPosOffBy;		/* default of posOffBy.  Usually this is -1, but it can be positive if a tokenizer is created from a substring of the file */
+		long			fileLine;			/* current file line */
+		long			fileColumn;			/* current column in current line (refers to column immediately following token just read) */
+		NxsString		token;				/* the character buffer used to store the current token */
+		NxsString		comment;			/* temporary buffer used to store output comments while they are being built */
+		bool			eofAllowed;
+		signed char		saved;				/* either '\0' or is last character read from input stream */
+		bool			atEOF;				/* true if end of file has been encountered */
+		bool			atEOL;				/* true if newline encountered while newlineIsToken labile flag set */
+		char			special;			/* ad hoc punctuation character; default value is '\0' */
+		int				labileFlags;		/* storage for flags in the NxsTokenFlags enum */
+		char			whitespace[4];		/* stores the 3 whitespace characters: blank space, tab and newline */
+		std::string 	currBlock;
+		std::vector<NxsComment>		embeddedComments;
+		typedef bool (* CharPredFunc)(const char);
+		CharPredFunc    isPunctuationFn;
+	};
+
+typedef NxsToken NexusToken;
+
+
+inline ProcessedNxsToken::ProcessedNxsToken(const NxsToken &t)
+	:token(t.GetTokenReference()),
+	posInfo(t)
+	{}
+
+inline NxsTokenPosInfo::NxsTokenPosInfo(const NxsToken &t)
+	:pos(t.GetFilePosition()),
+	line(t.GetFileLine()),
+	col(t.GetFileColumn())
+	{}
+
+/*!
+	Stores the current block name (for better error reporting only).  Use NULL to clear the currBlock name.
+*/
+inline void NxsToken::SetBlockName(const char *c)
+	{
+	if (c == 0L)
+		currBlock.clear();
+	else
+		currBlock.assign(c);
+	}
+
+/*!
+	Returns the token's block name (for better error reporting)
+*/
+inline std::string NxsToken::GetBlockName()
+	{
+	return currBlock;
+	}
+
+/*!
+	Returns copy of s but with quoting according to the NEXUS Standard iff s needs to be quoted.
+*/
+inline std::string NxsToken::EscapeString(const std::string &s)
+	{
+	return NxsString::GetEscaped(s);
+	}
+
+/*!
+	Returns the token for functions that only need read only access - faster than GetToken.
+*/
+inline const NxsString &NxsToken::GetTokenReference() const
+	{
+	return token;
+	}
+
+/**
+  This function is called whenever an output comment (i.e., a comment beginning with an exclamation point) is found
+	in the data file.
+
+  This base-class version of OutputComment suppresses these messages. You can override this virtual function to display
+    the output comment in the most appropriate way for application platform you are supporting.
+*/
+inline void NxsToken::OutputComment(
+  const NxsString &)	/* the contents of the printable comment discovered in the NEXUS data file */
+	{
+	}
+
+/*!
+	Adds `ch' to end of comment NxsString.
+*/
+inline void NxsToken::AppendToComment(
+  char ch)	/* character to be appended to comment */
+	{
+	comment += ch;
+	}
+
+/*!
+	Adds `ch' to end of current token.
+*/
+inline void NxsToken::AppendToToken(
+  char ch)	/* character to be appended to token */
+	{
+	token.push_back(ch);
+	}
+
+
+/*!
+	Returns true if character supplied is considered a whitespace character. Note: treats '\n' as darkspace if labile
+	flag newlineIsToken is in effect.
+*/
+inline bool NxsToken::IsWhitespace(
+  char ch)	/* the character in question */
+	{
+	bool ws = false;
+
+	// If ch is found in the whitespace array, it's whitespace
+	//
+	if (strchr(whitespace, ch) != NULL)
+		ws = true;
+
+	// Unless of course ch is the newline character and we're currently
+	// treating newlines as darkspace!
+	//
+	if (labileFlags & newlineIsToken && ch == '\n')
+		ws = false;
+
+	return ws;
+	}
+
+/*!
+	Returns true if and only if last call to GetNextToken encountered the end-of-file character (or for some reason the
+	input stream is now out of commission).
+*/
+inline bool NxsToken::AtEOF()
+	{
+	return atEOF;
+	}
+
+/*!
+	Returns true if and only if last call to GetNextToken encountered the newline character while the newlineIsToken
+	labile flag was in effect.
+*/
+inline bool NxsToken::AtEOL()
+	{
+	return atEOL;
+	}
+
+/*!
+	Converts all blanks in token to underscore characters. Normally, underscores found in the tokens read from a NEXUS
+	file are converted to blanks automatically as they are read; this function reverts the blanks back to underscores.
+*/
+inline void NxsToken::BlanksToUnderscores()
+	{
+	token.BlanksToUnderscores();
+	}
+
+/*!
+	Returns value stored in `filecol', which keeps track of the current column in the data file (i.e., number of
+	characters since the last new line was encountered).
+*/
+inline long  NxsToken::GetFileColumn() const
+	{
+	return fileColumn;
+	}
+
+/*!
+	Returns value stored in filepos, which keeps track of the current position in the data file (i.e., number of
+	characters since the beginning of the file).  Note: for Metrowerks compiler, you must use the offset() method of
+	the streampos class to use the value returned.
+*/
+inline file_pos  NxsToken::GetFilePosition() const
+	{
+	return inputStream.rdbuf()->pubseekoff(0,std::ios::cur, std::ios::in) + posOffBy;
+	}
+
+/*!
+	Returns value stored in `fileline', which keeps track of the current line in the data file (i.e., number of new
+	lines encountered thus far).
+*/
+inline long  NxsToken::GetFileLine() const
+	{
+	return fileLine;
+	}
+
+/*!
+	Returns the data member `token'. Specifying false for`respect_case' parameter causes all characters in `token'
+	to be converted to upper case before `token' is returned. Specifying true results in GetToken returning exactly
+	what it read from the file.
+*/
+inline NxsString NxsToken::GetToken(
+  bool respect_case)	/* determines whether token is converted to upper case before being returned */
+	{
+	if (!respect_case)
+		ToUpper();
+
+	return token;
+	}
+
+/*!
+	Returns the data member `token' as a C-style string. Specifying false for`respect_case' parameter causes all
+	characters in `token' to be converted to upper case before the `token' C-string is returned. Specifying true
+	results in GetTokenAsCStr returning exactly what it read from the file.
+*/
+inline const char *NxsToken::GetTokenAsCStr(
+  bool respect_case)	/* determines whether token is converted to upper case before being returned */
+	{
+	if (!respect_case)
+		ToUpper();
+
+	return token.c_str();
+	}
+
+/*!
+	Returns token.size().
+*/
+inline int NxsToken::GetTokenLength() const
+	{
+	return (int)token.size();
+	}
+
+/*!
+	Returns true if current token is a single character and this character is either '+' or '-'.
+*/
+inline bool NxsToken::IsPlusMinusToken()
+	{
+	return IsPlusMinusToken(token);
+	}
+
+/*!
+	Returns true if t is a single character and this character is either '+' or '-'.
+*/
+inline bool NxsToken::IsPlusMinusToken(const std::string &t)
+	{
+	return (t.size() == 1 && ( t[0] == '+' || t[0] == '-') );
+	}
+
+
+/*!
+	Returns true if character supplied is considered a punctuation character. The following twenty characters are
+	considered punctuation characters:
+>
+	()[]{}/\,;:=*'"`+-<>
+>
+	Exceptions:
+~
+	o The tilde character ('~') is also considered punctuation if the tildeIsPunctuation labile flag is set
+	o The special punctuation character (specified using the SetSpecialPunctuationCharacter) is also considered
+	  punctuation if the useSpecialPunctuation labile flag is set
+	o The hyphen (i.e., minus sign) character ('-') is not considered punctuation if the hyphenNotPunctuation
+	  labile flag is set
+~
+	Use the SetLabileFlagBit method to set one or more NxsLabileFlags flags in `labileFlags'
+*/
+inline bool NxsToken::IsPunctuation(
+  char ch)	/* the character in question */
+	{
+
+	// PAUP 4.0b10
+	//  o allows ]`<> inside taxon names
+	//  o allows `<> inside taxset names
+	//
+	if (isPunctuationFn(ch))
+		{
+		if  (labileFlags & hyphenNotPunctuation)
+#			if defined(NCL_VERSION_2_STYLE_HYPHEN) && NCL_VERSION_2_STYLE_HYPHEN
+				return (ch != '-');
+#			else
+				return (ch != '-'  && ch != '+');
+#			endif
+		return true;
+		}
+	if (labileFlags & tildeIsPunctuation  && ch == '~')
+		return true;
+	return (labileFlags & useSpecialPunctuation  && ch == special);
+	}
+
+
+/*!
+	Returns true if current token is a single character and this character is a punctuation character (as defined in
+	IsPunctuation function).
+*/
+inline bool NxsToken::IsPunctuationToken()
+	{
+	return IsPunctuationToken(token);
+	}
+
+/*!
+	Returns true if t is a single character and this character is a punctuation character (as defined in
+	IsPunctuation function).
+*/
+inline bool NxsToken::IsPunctuationToken(const std::string &t)
+	{
+	return (t.size() == 1 && IsPunctuation(t[0]));
+	}
+
+
+/*!
+	Returns true if current token is a single character and this character is a whitespace character (as defined in
+	IsWhitespace function).
+*/
+inline bool NxsToken::IsWhitespaceToken()
+	{
+	return IsWhitespaceToken(token);
+	}
+
+/*!
+	Returns true if t is a single character and this character is a whitespace character (as defined in IsWhitespace function).
+*/
+inline bool NxsToken::IsWhitespaceToken(const std::string &t)
+	{
+	return (t.size() == 1 && IsWhitespace( t[0]));
+	}
+
+/*!
+	Replaces current token NxsString with s.
+*/
+inline void NxsToken::ReplaceToken(
+  const NxsString s)	/* NxsString to replace current token NxsString */
+	{
+	token = s;
+	}
+
+/*!
+	Sets token to the empty NxsString ("").
+*/
+inline void NxsToken::ResetToken()
+	{
+	token.clear();
+	embeddedComments.clear();
+	}
+
+/*!
+	Sets the special punctuation character to `c'. If the labile bit useSpecialPunctuation is set, this character will
+	be added to the standard list of punctuation symbols, and will be returned as a separate token like the other
+	punctuation characters.
+*/
+inline void NxsToken::SetSpecialPunctuationCharacter(
+  char c)	/* the character to which `special' is set */
+	{
+	special = c;
+	}
+
+/*!
+	Sets the bit specified in the variable `labileFlags'. The available bits are specified in the NxsTokenFlags enum.
+	All bits in `labileFlags' are cleared after each token is read.
+*/
+inline void NxsToken::SetLabileFlagBit(
+  int bit)	/* the bit (see NxsTokenFlags enum) to set in `labileFlags' */
+	{
+	labileFlags |= bit;
+	}
+
+/*!
+	Checks character stored in the variable saved to see if it matches supplied character `ch'. Good for checking such
+	things as whether token stopped reading characters because it encountered a newline (and labileFlags bit
+	newlineIsToken was set):
+>
+	StoppedOn('\n');
+>
+	or whether token stopped reading characters because of a punctuation character such as a comma:
+>
+	StoppedOn(',');
+>
+*/
+inline bool NxsToken::StoppedOn(
+  char ch)	/* the character to compare with saved character */
+	{
+	if (saved == ch)
+		return true;
+	else
+		return false;
+	}
+inline char NxsToken::PeekAtNextChar() const
+	{
+	return nextCharInStream;
+	}
+/*!
+	Simply outputs the current NxsString stored in `token' to the output stream `out'. Does not send a newline to the
+	output stream afterwards.
+*/
+inline void NxsToken::Write(
+  std::ostream &out)	/* the output stream to which to write token NxsString */
+	{
+	out << token;
+	}
+
+/*!
+	Simply outputs the current NxsString stored in `token' to the output stream `out'. Sends a newline to the output
+	stream afterwards.
+*/
+inline void NxsToken::Writeln(
+  std::ostream &out)	/* the output stream to which to write `token' */
+	{
+	out << token << std::endl;
+	}
+
+inline std::map<std::string, std::string> NxsToken::ProcessAsSimpleKeyValuePairs(const char *cmdName)
+	{
+	ProcessedNxsCommand tokenVec;
+	ProcessAsCommand(&tokenVec);
+	return ParseAsSimpleKeyValuePairs(tokenVec, cmdName);
+	}
+
+/*!
+	Returns true if token NxsString exactly equals `s'. If abbreviations are to be allowed, either Begins or
+	Abbreviation should be used instead of Equals.
+*/
+inline bool NxsToken::Equals(
+  NxsString s, /* the string for comparison to the string currently stored in this token */
+  bool respect_case) const	/* if true, comparison will be case-sensitive */
+	{
+	if (respect_case)
+		return (strcmp(token.c_str(), s.c_str()) == 0);
+	return NxsString::case_insensitive_equals(token.c_str(), s.c_str());
+	}
+
+#endif
diff --git a/src/ncl/nxstreesblock.h b/src/ncl/nxstreesblock.h
new file mode 100644
index 0000000..081781c
--- /dev/null
+++ b/src/ncl/nxstreesblock.h
@@ -0,0 +1,1000 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSTREESBLOCK_H
+#define NCL_NXSTREESBLOCK_H
+#include <climits>
+#include <cfloat>
+#include "ncl/nxsdefs.h"
+#include "ncl/nxstaxablock.h"
+
+
+class NxsTreesBlockAPI
+  : public NxsBlock, public NxsLabelToIndicesMapper
+	{
+ 	public:
+		virtual unsigned	GetNumDefaultTree() = 0;
+		virtual unsigned	GetNumTrees() = 0;
+		virtual NxsString	GetTreeName(unsigned i) = 0;
+		virtual NxsString	GetTreeDescription(unsigned i) = 0;
+		virtual NxsString	GetTranslatedTreeDescription(unsigned i) = 0;
+		virtual bool		IsDefaultTree(unsigned i) = 0;
+		virtual bool		IsRootedTree(unsigned i) = 0;
+	};
+/*! This function provides rudimentary support for parsing of NHX comments.
+	It is called during the creation of a NxsSimpleTree to handle any NHX comments
+
+	It fills `infoMap` with the key value pairs parsed from a comment that starts with
+		&&NHX
+	\returns the unparsed portion of the comment
+*/
+std::string parseNHXComment(const std::string comment, /*! the comment without the [] braces. If the comment does not start with &&NHX then the entire comment will be returned*/
+			std::map<std::string, std::string> *infoMap); /*!< the destination for key value pairs parsed out of the NHX comment */
+class NxsFullTreeDescription;
+class NxsSimpleNode;
+/*! The edge used by the NxsSimpleTree class.
+*/
+class NxsSimpleEdge
+	{
+	public:
+		bool EdgeLenIsDefaultValue() const
+			{
+			return defaultEdgeLen;
+			}
+
+		bool IsIntEdgeLen() const
+			{
+			return hasIntEdgeLens;
+			}
+
+		double GetDblEdgeLen() const
+			{
+			return hasIntEdgeLens ? (double) iEdgeLen : dEdgeLen ;
+			}
+
+		int GetIntEdgeLen() const
+			{
+			return hasIntEdgeLens ? iEdgeLen : (int) dEdgeLen ;
+			}
+
+		std::vector<NxsComment> GetUnprocessedComments() const
+			{
+			return unprocessedComments;
+			}
+
+		/*! \returns true if `key` was processed from a comment.
+			If the key was found and `value` pointer is not NULL, then the
+				*value will hold the value on exit
+		*/
+		bool GetInfo(const std::string &key, std::string *value) const
+			{
+			std::map<std::string, std::string>::const_iterator kvit = parsedInfo.find(key);
+			if (kvit == parsedInfo.end())
+				return false;
+			if (value != NULL)
+				*value = kvit->second;
+			return true;
+			}
+		/*! Returns a reference to the map that stores information in a generic
+			key to value mapping where both elements are strings.
+
+			This map is populated by the information from NHX comments during the creation of
+			a NxsSimpleTree.
+		*/
+		const std::map<std::string, std::string> & GetInfo() const
+			{
+			return parsedInfo;
+			}
+		const NxsSimpleNode * GetParent() const
+			{
+			return parent;
+			}
+		const NxsSimpleNode * GetChild() const
+			{
+			return child;
+			}
+
+		void SetDblEdgeLen(double e, const char *asString)
+			{
+			defaultEdgeLen = false;
+			hasIntEdgeLens = false;
+			dEdgeLen = e;
+			if (asString)
+				lenAsString.assign(asString);
+
+			}
+
+		void SetIntEdgeLen(int e, const char *asString)
+			{
+			defaultEdgeLen = false;
+			hasIntEdgeLens = true;
+			iEdgeLen = e;
+			if (asString)
+				lenAsString.assign(asString);
+			}
+		mutable void * scratch;
+		void SetParent(NxsSimpleNode *p)
+		    {
+		    this->parent = p;
+		    }
+	private:
+		void WriteAsNewick(std::ostream &out, bool nhx) const;
+		void DealWithNexusComments(const std::vector<NxsComment> & ecs, bool NHXComments);
+
+		NxsSimpleEdge(NxsSimpleNode  *par, NxsSimpleNode * des, double edgeLen)
+			:scratch(0L),
+			parent(par),
+			child(des),
+			defaultEdgeLen(true),
+			hasIntEdgeLens(false),
+			dEdgeLen(edgeLen)
+			{
+			}
+
+		NxsSimpleEdge(int edgeLen, NxsSimpleNode *par, NxsSimpleNode * des)
+			:scratch(0L),
+			parent(par),
+			child(des),
+			defaultEdgeLen(true),
+			hasIntEdgeLens(true),
+			iEdgeLen(edgeLen)
+			{
+			}
+
+		NxsSimpleNode * GetMutableParent() const
+			{
+			return parent;
+			}
+
+		NxsSimpleNode * parent;
+		NxsSimpleNode * child;
+		bool			defaultEdgeLen;
+		bool			hasIntEdgeLens;
+		int				iEdgeLen;
+		double			dEdgeLen;
+		std::string		lenAsString; /*easy (but inefficient) means of preserving the formatting of the input branch length */
+		std::vector<NxsComment> unprocessedComments;
+		std::map<std::string, std::string> parsedInfo;
+		friend class NxsSimpleTree;
+		friend class NxsSimpleNode;
+	};
+
+/*! The node used by the NxsSimpleTree class.
+*/
+class NxsSimpleNode
+	{
+	public:
+		NxsSimpleEdge GetEdgeToParent() const
+			{
+			return edgeToPar;
+			}
+
+		const NxsSimpleEdge & GetEdgeToParentRef() const
+			{
+			return edgeToPar;
+			}
+
+		NxsSimpleEdge & GetMutableEdgeToParentRef()
+			{
+			return edgeToPar;
+			}
+
+		bool IsTip() const
+			{
+			return (lChild == 0L);
+			}
+		NxsSimpleNode *GetFirstChild() const
+			{
+			return lChild;
+			}
+		NxsSimpleNode * GetNextSib() const
+			{
+			return rSib;
+			}
+		NxsSimpleNode * GetLastChild() const
+			{
+			NxsSimpleNode * currNode = GetFirstChild();
+			if (!currNode)
+				return NULL;
+			NxsSimpleNode * nextNd = currNode->GetNextSib();
+			while (nextNd)
+				{
+				currNode = nextNd;
+				nextNd = currNode->GetNextSib();
+				}
+			return currNode;
+			}
+
+		std::vector<NxsSimpleNode *> GetChildren() const
+			{
+			std::vector<NxsSimpleNode *> children;
+			NxsSimpleNode * currNode = GetFirstChild();
+			while(currNode)
+				{
+				children.push_back(currNode);
+				currNode = currNode->GetNextSib();
+				}
+			return children;
+			}
+		// present for every leaf. UINT_MAX for internals labeled with taxlabels
+		unsigned GetTaxonIndex() const
+			{
+			return taxIndex;
+			}
+
+		// present for every leaf. UINT_MAX for internals labeled with taxlabels
+		void SetTaxonIndex(unsigned i)
+			{
+			taxIndex = i;
+			}
+
+		// non-empty only for internals that are labelled with names that are NOT taxLabels
+		std::string GetName() const
+			{
+			return name;
+			}
+		void SetName(const std::string &n)
+			{
+			name = n;
+			}
+		mutable void * scratch;
+
+		NxsSimpleNode(NxsSimpleNode *par, double edgeLen)
+			:scratch(0L),
+			lChild(0L),
+			rSib(0L),
+			edgeToPar(par, 0L, edgeLen),
+			taxIndex(UINT_MAX)
+			{
+			edgeToPar.child = this;
+			}
+
+
+	public:
+		void WriteAsNewick(std::ostream &out, bool nhx, bool useLeafNames, bool escapeNames, const NxsTaxaBlockAPI *taxa=0L, bool escapeInternals=true) const;
+
+
+		NxsSimpleNode(int edgeLen, NxsSimpleNode *par)
+			:scratch(0L),
+			lChild(0L),
+			rSib(0L),
+			edgeToPar(edgeLen, par, 0L),
+			taxIndex(UINT_MAX)
+			{
+			edgeToPar.child = this;
+			}
+
+		NxsSimpleNode * GetParent() const
+			{
+			return edgeToPar.GetMutableParent();
+			}
+
+		void AddSib(NxsSimpleNode *n)
+			{
+			if (rSib)
+				rSib->AddSib(n);
+			else
+				rSib = n;
+			}
+		void AddChild(NxsSimpleNode *n)
+			{
+			if (lChild)
+				lChild->AddSib(n);
+			else
+				lChild = n;
+			}
+
+		bool RemoveChild(NxsSimpleNode *n)
+			{
+			if (n == 0L || lChild == 0L)
+			    return false;
+			if (lChild == n)
+				lChild = lChild->rSib;
+			else
+			    {
+			    NxsSimpleNode * c = lChild;
+				for (;;)
+				    {
+				    if (c->rSib == n)
+				        {
+				        c->rSib = n->rSib;
+				        break;
+				        }
+				    if (c->rSib == 0L)
+				        return false;
+	    			}
+		    	}
+			n->edgeToPar.parent = 0L;
+			return true;
+			}
+		void AddSelfAndDesToPreorder(std::vector<const NxsSimpleNode *> &p) const;
+		NxsSimpleNode * FindTaxonIndex(unsigned leafIndex);
+
+        void LowLevelSetFirstChild(NxsSimpleNode *nd) {
+            lChild = nd;
+        }
+        void LowLevelSetNextSib(NxsSimpleNode *nd) {
+            rSib = nd;
+        }
+    private:
+		NxsSimpleNode * lChild;
+		NxsSimpleNode * rSib;
+		NxsSimpleEdge edgeToPar;
+		std::string name; // non-empty only for internals that are labelled with names that are NOT taxLabels
+		unsigned taxIndex; // present for every leaf. UINT_MAX for internals labeled with taxlabels
+		friend class NxsSimpleTree;
+	};
+/*! A simple tree class.
+	Internally NCL stores trees as newick strings with metadata (see the NxsFullTreeDescription class)
+	but you can create a NxsSimpleTree
+*/
+class NxsSimpleTree
+	{
+	public:
+		NxsSimpleTree(const NxsFullTreeDescription &ftd,
+					  const int defaultIntEdgeLen,
+					  const double defaultDblEdgeLen,
+					  bool treatInternalNodeLabelsAsStrings=false)
+			:defIntEdgeLen(defaultIntEdgeLen),
+			defDblEdgeLen(defaultDblEdgeLen),
+			realEdgeLens(false)
+			{
+			Initialize(ftd, treatInternalNodeLabelsAsStrings);
+			}
+		NxsSimpleTree(const int defaultIntEdgeLen, const double defaultDblEdgeLen)
+			:defIntEdgeLen(defaultIntEdgeLen),
+			defDblEdgeLen(defaultDblEdgeLen),
+			realEdgeLens(false)
+			{}
+		~NxsSimpleTree()
+			{
+			Clear();
+			}
+		void Initialize(const NxsFullTreeDescription &, bool treatInternalNodeLabelsAsStrings=false);
+
+
+		std::vector<const NxsSimpleNode *> GetPreorderTraversal() const;
+		std::vector<NxsSimpleNode *> & GetLeavesRef()
+			{
+			return leaves;
+			}
+		std::vector<std::vector<int> > GetIntPathDistances(bool toMRCA=false) const;
+		std::vector<std::vector<double> > GetDblPathDistances(bool toMRCA=false) const;
+
+		/** Writes just the newick description with numbers for leaf labels.
+			Neither the tree name or NEXUS ; are written
+		*/
+		void WriteAsNewick(std::ostream &out, bool nhx, bool useLeafNames, bool escapeNames, const NxsTaxaBlockAPI * taxa, bool escapeInternals=true) const
+			{
+			if (root)
+				root->WriteAsNewick(out, nhx, useLeafNames, escapeNames, taxa, escapeInternals);
+			}
+		NxsSimpleNode * RerootAt(unsigned leafIndex);
+        NxsSimpleNode * RerootAtNode(NxsSimpleNode *newRoot);
+
+		const NxsSimpleNode * GetRootConst() const
+			{
+			return root;
+			}
+	protected:
+		std::vector<NxsSimpleNode *> allNodes;
+		std::vector<NxsSimpleNode *> leaves;
+		NxsSimpleNode * root;
+		int defIntEdgeLen;
+		double defDblEdgeLen;
+		bool realEdgeLens;
+	public:
+		NxsSimpleNode * AllocNewNode(NxsSimpleNode *p)
+			{
+			NxsSimpleNode * nd;
+			if (realEdgeLens)
+				nd = new NxsSimpleNode(p, defDblEdgeLen);
+			else
+				nd = new NxsSimpleNode(defIntEdgeLen, p);
+			allNodes.push_back(nd);
+			return nd;
+			}
+
+		void Clear()
+			{
+			root = NULL;
+			for (std::vector<NxsSimpleNode *>::iterator nIt = allNodes.begin(); nIt != allNodes.end(); ++nIt)
+				delete *nIt;
+			allNodes.clear();
+			leaves.clear();
+			}
+		void FlipRootsChildToRoot(NxsSimpleNode *subRoot);
+		NxsSimpleTree(const NxsSimpleTree &); //not defined.  Not copyable
+		NxsSimpleTree & operator=(const NxsSimpleTree &); //not defined.  Not copyable
+	};
+
+/*! A class that encapsulates a newick string description of a tree and metadata about the tree.
+
+	the NxsTreesBlock stores the trees as NxsFullTreeDescription because during its parse
+	and validation of a tree string.
+	By default, NCL will "process" each tree -- converting the taxon labels to
+		numbers for the taxa (the number will be 1 + the taxon index).
+		During this processing, the trees block detects things about the tree such as whether
+		there are branch lengths on the tree, whether there are polytomies...
+
+	This data about the tree is then stored in a NxsFullTreeDescription
+	so that the client code can access some information about a tree before it parses
+	the newick string.
+
+	If you do not want to parse the newick string yourself, you can construct a
+		NxsSimpleTree object from a NxsFullTreeDescription object if the NxsFullTreeDescription
+		is "processed"
+
+	If the NxsTreesBlock is configured NOT to process trees (see NxsTreesBlock::SetProcessAllTreesDuringParse())
+*/
+class NxsFullTreeDescription
+	{
+	public:
+		enum TreeDescFlags
+			{ 	NXS_IS_ROOTED_BIT					= 0x0001,
+				NXS_HAS_SOME_EDGE_LENGTHS_BIT		= 0x0002,
+				NXS_MISSING_SOME_EDGE_LENGTHS_BIT	= 0x0004,
+				NXS_EDGE_LENGTH_UNION 				= 0x0006,
+				NXS_INT_EDGE_LENGTHS_BIT 			= 0x0008,
+				NXS_HAS_ALL_TAXA_BIT				= 0x0010,
+				NXS_HAS_NHX_BIT 					= 0x0020,
+				NXS_HAS_DEG_TWO_NODES_BIT			= 0x0040,
+				NXS_HAS_POLYTOMY_BIT				= 0x0080,
+				NXS_HAS_INTERNAL_NAMES_BIT			= 0x0100,
+				NXS_HAS_NEW_INTERNAL_NAMES_BIT		= 0x0200,
+				NXS_KNOWN_INTERNAL_NAMES_BIT		= 0x0400,
+				NXS_SOME_ZERO_EDGE_LEN_BIT			= 0x0800,
+				NXS_SOME_NEGATIVE_EDGE_LEN_BIT		= 0x1000,
+				NXS_TREE_PROCESSED 					= 0x2000
+			};
+		/*! Creates a Tree description from a newick string, name and int with bits that indicate
+			some metadata about the tree.
+		*/
+		NxsFullTreeDescription(const std::string & newickStr, /*!< the newick string */
+				const std::string &treeName, /*!< the name of the tree */
+				int infoFlags) /*!< union of the relevant bits from TreeDescFlags */
+			:newick(newickStr),
+			name(treeName),
+			flags(infoFlags),
+			minIntEdgeLen(INT_MAX),
+			minDblEdgeLen(DBL_MAX),
+			requireNewickNameTokenizing(false)
+			{}
+		/*! Tokenizes the tree into a vector of NEXUS tokens.
+			This makes it easier for to parse.
+		*/
+		std::vector<std::string> GetTreeTokens() const;
+
+		/** returns a newick string.
+			If the NxsFullTreeDescription is processed, then the string will have
+				1-based numbers corresponding to (1 + Taxa block's index of taxon)
+			If it is not processed, then it will correspond with the exact string
+				in the file. Handling unprocessed newick strings requires that the
+				client code consult the Translation table and implement NEXUS'
+				numeric interpretation of labels in order to decode correctly
+				decode all taxon labels
+		*/
+		const std::string &	GetNewick() const
+			{
+			return newick;
+			}
+		/*! \returns the name of the tree */
+		const std::string &	GetName() const
+			{
+			return name;
+			}
+		/*! \returns true if the newick string has been processed. */
+		bool IsProcessed() const
+			{
+			return (flags&NXS_TREE_PROCESSED) != 0;
+			}
+		/*! \throws a NxsNCLAPIException if the tree has not been "processed" */
+		void AssertProcessed() const
+			{
+			if (!IsProcessed())
+				throw NxsNCLAPIException("Tree description queries are only supported on processed tree descriptions.");
+			}
+		/*! \returns true if the tree was rooted.  */
+		bool IsRooted() const
+			{
+			AssertProcessed();
+			return (flags&NXS_IS_ROOTED_BIT) != 0;
+			}
+		/*! \returns true all of the edges in the tree have edge length.
+			\raises a NxsNCLAPIException if the tree has not been processed!
+		*/
+		bool AllEdgesHaveLengths() const
+			{
+			AssertProcessed();
+			return (flags&NXS_EDGE_LENGTH_UNION) == NXS_HAS_SOME_EDGE_LENGTHS_BIT;
+			}
+		/*! \returns true at least one edge in the tree have edge length
+			\raises a NxsNCLAPIException if the tree has not been processed!
+		*/
+		bool SomeEdgesHaveLengths() const
+			{
+			AssertProcessed();
+			return (flags&NXS_HAS_SOME_EDGE_LENGTHS_BIT) != 0;
+			}
+		/*! \returns true all of the edge lengths that are specified can be read as integers
+			\raises a NxsNCLAPIException if the tree has not been processed!
+		*/
+		bool EdgeLengthsAreAllIntegers() const
+			{
+			AssertProcessed();
+			return (flags&NXS_INT_EDGE_LENGTHS_BIT) != 0;
+			}
+		/*! \returns true if the tree contains all of the taxa listed in the NxsTaxaBlock associated with the trees block that generated this NxsFullTreeDescription
+			\raises a NxsNCLAPIException if the tree has not been processed!
+		*/
+		bool AllTaxaAreIncluded() const
+			{
+			AssertProcessed();
+			return (flags&NXS_HAS_ALL_TAXA_BIT) != 0;
+			}
+		/*! \returns true if some of the edges in the tree have New Hampshire Extended style comments  (see http://www.phylosoft.org/NHX)
+			\raises a NxsNCLAPIException if the tree has not been processed!
+		*/
+		bool HasNHXComments() const
+			{
+			AssertProcessed();
+			return (flags&NXS_HAS_NHX_BIT) != 0;
+			}
+		/*! \returns true if the tree has polytomies
+			\raises a NxsNCLAPIException if the tree has not been processed!
+		*/
+		bool HasPolytomies() const
+			{
+			AssertProcessed();
+			return (flags&NXS_HAS_POLYTOMY_BIT) != 0;
+			}
+		/*! \returns true if the tree some internal nodes that only have one child.
+			\raises a NxsNCLAPIException if the tree has not been processed!
+		*/
+		bool HasDegreeTwoNodes() const
+			{
+			AssertProcessed();
+			return (flags&NXS_HAS_DEG_TWO_NODES_BIT) != 0;
+			}
+		/*! If EdgeLengthsAreAllIntegers returns true then this will return the
+			shortest edge length in the tree (useful as means of checking for
+			constraints by programs that prohibit 0 or negative branch lengths)
+		*/
+		int smallestIntEdgeLength() const
+			{
+			return minIntEdgeLen;
+			}
+		/*!	If EdgeLengthsAreAllIntegers returns false then this will return the
+			shortest edge length in the tree (useful as means of checking for
+			constraints by programs that prohibit 0 or negative branch lengths)
+		*/
+		double smallestRealEdgeLength() const
+			{
+			return minDblEdgeLen;
+			}
+		bool RequiresNewickNameTokenizing() const
+		    {
+		    return this->requireNewickNameTokenizing;
+		    }
+		void SetRequiresNewickNameTokenizing(bool v)
+		    {
+		    this->requireNewickNameTokenizing = v;
+		    }
+	private:
+		std::string newick; /*with 1-based numbers corresponding to (1 + Taxa block's index of taxon)*/
+		std::string name;
+		int flags;
+		int minIntEdgeLen; /* if EdgeLengthsAreAllIntegers returns true then this will hold shortest edge length in the tree (useful as means of checking for constraints by programs that prohibit 0 or negative branch lengths)*/
+		double minDblEdgeLen; /* if EdgeLengthsAreAllIntegers returns false then this will hold shortest edge length in the tree (useful as means of checking for constraints by programs that prohibit 0 or negative branch lengths)*/
+		bool requireNewickNameTokenizing;  /* False by default. If true, then newick rather than NEXUS tokenizing rules should be used for the taxa names */
+
+	friend class NxsTreesBlock;
+	};
+class NxsTreesBlock;
+typedef bool (* ProcessedTreeValidationFunction)(NxsFullTreeDescription &, void *, NxsTreesBlock *);
+/*!
+	This class handles reading and storage for the NEXUS block TREES.
+	The class can  read the TRANSLATE and TREE commands.
+
+	The tree is validated during the parse and then stored as a NxsFullTreeDescription
+		object which will hold the newick string. This newick string will have
+		numbers rather than names. The numbers in the tree string start at 1 (like other NEXUS numbering),
+		but they are simply 1 + the taxon index.
+
+	In previous versions of NCL (before v2.1), the client code would have to use the translate
+		table to convert the newick string into the taxon numbers.
+
+	As of v2.1, NCL now does this translation.
+
+*/
+class NxsTreesBlock
+  : public NxsTreesBlockAPI, public NxsTaxaBlockSurrogate
+	{
+ 	public:
+	    NxsTreesBlock(NxsTaxaBlockAPI *tb);
+		virtual				~NxsTreesBlock();
+
+		void		ReplaceTaxaBlockPtr(NxsTaxaBlockAPI *tb);
+		unsigned GetIndexSet(const std::string &label, NxsUnsignedSet * toFill) const
+			{
+			return NxsLabelToIndicesMapper::GetIndicesFromSets(label, toFill, treeSets);
+			}
+
+		/*! \returns the index of the default tree (the last tree in the TREES block with a * before its name)
+				if no default tree was specified than the first index (0) will be returned
+		*/
+		unsigned	GetNumDefaultTree();
+		/*! \returns the number of trees stored */
+		unsigned	GetNumTrees();
+		/*! \returns the number of trees stored */
+		unsigned	GetNumTrees() const;
+		/*! \returns the NxsFullTreeDescription for tree with index `i`
+		`i` should be in the range [0, num_trees)
+
+		If the NxsFullTreeDescription is processed (see NxsFullTreeDescription::IsProcessed())
+			then its newick string will have numbers rather than names. The numbers in the tree
+			string start at 1 (like other NEXUS numbering), but they are simply 1 + the taxon index.
+
+		In previous versions of NCL (before v2.1), the client code would have to use the translate
+			table to convert the newick string into the taxon numbers.
+
+
+		*/
+		const NxsFullTreeDescription & GetFullTreeDescription(unsigned i) const;
+		/*! \returns a 1-based number for the last tree read that has the name `name` */
+		unsigned	TreeLabelToNumber(const std::string & name) const;
+		/*! \returns the tree name for the tree with index `i`
+		i should be in the range [0, ntrees)
+		*/
+		NxsString	GetTreeName(unsigned i);
+		/*! \returns the tree description object for the tree with index `i`
+		i should be in the range [0, ntrees)
+		*/
+		NxsString	GetTreeDescription(unsigned i);
+		/*! \returns the newick string for the tree with index i. The string will have
+			the taxon names rather than numbers (or other translate table keys) in it.
+		i should be in the range [0, ntrees)
+		*/
+		NxsString	GetTranslatedTreeDescription(unsigned i);
+		/*! \returns true if the tree with index i is the default tree
+		i should be in the range [0, ntrees)
+		*/
+		bool		IsDefaultTree(unsigned i);
+		/*! \returns true if the tree is thought to be rooted (could be rooted
+			because this is NCL's default, or it could indicate that a [&R]
+			comment was encountered.
+		i should be in the range [0, ntrees)
+		*/
+		bool		IsRootedTree(unsigned i);
+		virtual void		Report(std::ostream &out) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void		BriefReport(NxsString &s) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void		Reset();
+		void				SetNexus(NxsReader *nxsptr)
+			{
+			NxsBlock::SetNexus(nxsptr);
+			NxsTaxaBlockSurrogate::SetNexusReader(nxsptr);
+			}
+		/*! \ref BlockTypeIDDiscussion */
+        virtual const std::string & GetBlockName() const
+            {
+            return NCL_BLOCKTYPE_ATTR_NAME;
+            }
+
+		void WriteAsNexus(std::ostream &out) const;
+
+		virtual VecBlockPtr	GetImpliedBlocks()
+			{
+			return GetCreatedTaxaBlocks();
+			}
+
+		/*only used it the linkAPI is enabled*/
+		virtual void		HandleLinkCommand(NxsToken & token)
+			{
+			HandleLinkTaxaCommand(token);
+			}
+		virtual void		WriteLinkCommand(std::ostream &out) const
+			{
+			WriteLinkTaxaCommand(out);
+			}
+
+		unsigned GetMaxIndex() const;
+		unsigned GetIndicesForLabel(const std::string &label, NxsUnsignedSet *inds) const;
+		bool AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds);
+		bool AddNewPartition(const std::string &label, const NxsPartition & inds);
+
+		bool GetAllowImplicitNames() const
+			{
+			return allowImplicitNames;
+			}
+		bool GetUseNewickTokenizingDuringParse() const
+		    {
+		    return useNewickTokenizingDuringParse;
+		    }
+		/*! \returns true if the block uses the v2.1 style of parsing in which the tree is interpretted and converted into
+				a newick string with standard taxon numbering
+			If false, then the NxsTreesBlock uses the v2.0 API in which the tree reader simply stores the tree string
+				as written in the file (so the client code has to check the translate table in order to interpret
+				the newick stream).
+			true by default.
+		*/
+		bool GetProcessAllTreesDuringParse() const
+			{
+			return processAllTreesDuringParse;
+			}
+		void SetAllowImplicitNames(bool s)
+			{
+			allowImplicitNames = s;
+			}
+		void SetUseNewickTokenizingDuringParse(bool v)
+		    {
+		    useNewickTokenizingDuringParse = v;
+		    }
+		void SetTreatIntegerLabelsAsNumbers(bool s)
+		    {
+		    treatIntegerLabelsAsNumbers = s;
+		    }
+		/*! If true then the block will use the v2.1 style of parsing in which the tree is interpretted and converted into
+				a newick string with standard taxon numbering
+			If false, then the NxsTreesBlock will use the v2.0 API in which the tree reader simply stores the tree string
+				as written in the file (so the client code has to check the translate table in order to interpret
+				the newick stream).
+			true by default.
+		*/
+		void SetProcessAllTreesDuringParse(bool s)
+			{
+			processAllTreesDuringParse = s;
+			}
+		/* Interprets the newick string as a tree. This converts the newick string
+			into one in which 1-based numbers are used for taxon labels (raw newick
+			strings can contain numbers, taxon labels, tax set names or translate
+			table keys as taxon identifiers).
+
+			\raises NxsException
+			This function builds trees as in memory. It may  reveal illegal newick strings that were not
+			detected as illegal on the parse, so NxsExceptions may  be raised.
+
+			Explicitly calling this function is not necessary unless
+			processAllTreesDuringParse is false (because of a previous call to
+			SetProcessAllTreesDuringParse()).
+		*/
+		void ProcessTree(NxsFullTreeDescription &treeDesc) const;
+		/* Convenience function that calls ProcessTree() one each stored
+			NxsFullTreeDescription instance.
+
+			\raises NxsException
+			This function builds trees as in memory. It may  reveal illegal newick strings that were not
+			detected as illegal on the parse, so NxsExceptions may  be raised.
+
+			Explicitly calling this function is not necessary unless
+			processAllTreesDuringParse is false (because of a previous call to
+			SetProcessAllTreesDuringParse()).
+		*/
+		void ProcessAllTrees() const
+			{
+			std::vector<NxsFullTreeDescription>::iterator trIt = trees.begin();
+			for (; trIt != trees.end(); ++trIt)
+				ProcessTree(*trIt);
+			}
+
+
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, assumptionsBlock blocks!
+		*/
+		NxsTreesBlock & operator=(const NxsTreesBlock &other)
+			{
+			Reset();
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			CopyTaxaBlockSurrogateContents(other);
+			CopyTreesBlockContents(other);
+			return *this;
+			}
+
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, assumptionsBlock blocks!
+		*/
+		virtual void CopyTreesBlockContents(const NxsTreesBlock &other)
+			{
+			allowImplicitNames = other.allowImplicitNames;
+			useNewickTokenizingDuringParse = other.useNewickTokenizingDuringParse;
+			treatIntegerLabelsAsNumbers = other.treatIntegerLabelsAsNumbers;
+			processAllTreesDuringParse = other.processAllTreesDuringParse;
+			writeFromNodeEdgeDataStructure = other.writeFromNodeEdgeDataStructure;
+			validateInternalNodeLabels = other.validateInternalNodeLabels;
+			allowNumericInterpretationOfTaxLabels = other.allowNumericInterpretationOfTaxLabels;
+			constructingTaxaBlock = other.constructingTaxaBlock;
+			newtaxa = other.newtaxa;
+			trees = other.trees;
+			capNameToInd = other.capNameToInd;
+			defaultTreeInd = other.defaultTreeInd;
+			writeTranslateTable = other.writeTranslateTable;
+			treeSets = other.treeSets;
+			treePartitions = other.treePartitions;
+			processedTreeValidationFunction = other.processedTreeValidationFunction;
+			ptvArg = other.ptvArg;
+			treatAsRootedByDefault = other.treatAsRootedByDefault;
+			allowUnquotedSpaces = other.allowUnquotedSpaces;
+			disambiguateDuplicateNames = other.disambiguateDuplicateNames;
+			}
+        bool GetTreatAsRootedByDefault() const {
+            return treatAsRootedByDefault;
+        }
+        void SetTreatAsRootedByDefault(bool v) {
+            this->treatAsRootedByDefault = v;
+        }
+		virtual NxsTreesBlock * Clone() const
+			{
+			NxsTreesBlock * a = new NxsTreesBlock(taxa);
+			*a = *this;
+			return a;
+			}
+		static void ProcessTokenVecIntoTree(const ProcessedNxsCommand & token,
+		                                    NxsFullTreeDescription & ftd,
+		                                    NxsLabelToIndicesMapper *,
+		                                    std::map<std::string, unsigned> &capNameToInd,
+		                                    bool allowNewTaxa,
+		                                    NxsReader * nexusReader,
+		                                    const bool respectCase=false,
+		                                    const bool validateInternalNodeLabels=true,
+		                                    const bool treatIntegerLabelsAsNumbers=false,
+		                                    const bool allowNumericInterpretationOfTaxLabels=true,
+		                                    const bool autoNumberDuplicateNames=false);
+		static void ProcessTokenStreamIntoTree(NxsToken & token, NxsFullTreeDescription & ftd,
+		                                      NxsLabelToIndicesMapper *,
+		                                      std::map<std::string, unsigned> &capNameToInd,
+		                                      bool allowNewTaxa,
+		                                      NxsReader * nexusReader,
+		                                      const bool respectCase=false,
+		                                      const bool validateInternalNodeLabels=true,
+		                                      const bool treatIntegerLabelsAsNumbers=false,
+		                                      const bool allowNumericInterpretationOfTaxLabels=true,
+		                                      const bool allowUnquotedSpaces=false,
+		                                      const bool autoNumberDuplicateNames=false);
+
+		void SetWriteFromNodeEdgeDataStructure(bool v)
+			{
+			writeFromNodeEdgeDataStructure = v;
+			}
+		/* 	Processes all trees and then
+			Provides lowlevel access to the "raw" vector of trees stored in the trees block
+		*/
+		std::vector<NxsFullTreeDescription> & GetProcessedTrees()
+			{
+			ProcessAllTrees();
+			return trees;
+			}
+
+		/*! This function allows you to register a callback function that is called after each tree is parsed.
+
+			The signature of your function should be:\n
+				\code
+				bool someFunctionName(NxsFullTreeDescription &treeDesc, void * blob, NxsTreesBlock * treesB);
+				\endcode
+			where:
+				- treeDesc is the NxsFullTreeDescription for the tree that was just read.
+				- blob is pointer to any object or 0L. You supply this blob of data as an argument in
+					setValidationCallbacks and the NxsTreesBlock passes it every time that it calls the callback.
+					By passing in your own object, you can do bookkeeping between calls without using global variables
+					(though you will have to cast the pointer to the blob of data, of course).
+				- treesB is a pointer to the block that is conducting the parse.
+
+			If your function returns false, then the trees block will not store.
+			If your callback function returns true, then the tree will be stored.
+			In either case the NxsTreesBlock will continue parsing after your function returns.
+
+			This Callback hook is convenient for rejecting unwanted trees to save on memory, but it can also
+			be used as an optimization.
+
+			See the example executable in example/splitsinfile.  This NCL client, uses this callback to store
+			the splits from a trees as the TREES block is being parsed.  It returns false in each case, so that
+			the trees are not stored after they are used.
+		*/
+
+		void setValidationCallbacks(
+			ProcessedTreeValidationFunction func, /*!< your pointer to your callback function */
+			void * blob) /*!< pointer to any object that you would like to access during parse */
+			{
+			this->processedTreeValidationFunction = func;
+			this->ptvArg = blob;
+			}
+		bool 		SwapEquivalentTaxaBlock(NxsTaxaBlockAPI * tb)
+		{
+			return SurrogateSwapEquivalentTaxaBlock(tb);
+		}
+		void ReadPhylipTreeFile(NxsToken & token);
+		void setWriteTranslateTable(bool wtt)
+		{
+			this->writeTranslateTable = wtt;
+		}
+		void setAllowNumericInterpretationOfTaxLabels(bool x) {
+			this->allowNumericInterpretationOfTaxLabels = x;
+		}
+		void SetAllowUnquotedSpaces(bool x) {
+		    this->allowUnquotedSpaces = x;
+		}
+		void SetDisambiguateDuplicateNames(bool x) {
+		    this->disambiguateDuplicateNames = x;
+		}
+		/*! Sets the boolean field that determines whether or not the trees
+			block will validate treat internal node labels
+			as taxon labels during the parse. In this case the labels will
+			checked against the taxa block (true is the default).
+
+			This can cause problems if the internal node names are integers that
+			are not intended to be taxon labels (eg. support statements for the
+			subtending branches).
+		*/
+		void setValidateInternalNodeLabels(bool x) {
+			this->validateInternalNodeLabels = x; /** if true then labels that occur for internal nodes will be validated via the taxa block (true is the default).  This can cause problems if the internal node names are integer that are not intended to be taxon labels. */
+		}
+		/*! \returns true if the block will validate treat internal node labels
+			as taxon labels during the parse. In this case the labels will
+			checked against the taxa block (true is the default).
+
+			This can cause problems if the internal node names are integers that
+			are not intended to be taxon labels (eg. support statements for the
+			subtending branches).
+		*/
+		bool getValidateInternalNodeLabels() const {
+			return this->validateInternalNodeLabels;
+		}
+		void WriteTranslateCommand(std::ostream & out) const;
+	protected :
+		void ReadTreeFromOpenParensToken(NxsFullTreeDescription &td, NxsToken & token);
+
+		void WriteTreesCommand(std::ostream & out) const;
+		void ConstructDefaultTranslateTable(NxsToken &token, const char * cmd);
+
+		bool allowImplicitNames; /** false by default, true causes the trees block to create a taxa block from the labels found in the trees. */
+		bool useNewickTokenizingDuringParse; /** false by default */
+		bool treatIntegerLabelsAsNumbers; // if true and allowImplicitNames is true, then new taxon labels that are integers will be treated as the taxon number (rather than arbitrary labels)
+		bool processAllTreesDuringParse; /** true by default, false speeds processing but disables detection of errors*/
+		bool constructingTaxaBlock; /** true if new names are being tolerated */
+		bool writeFromNodeEdgeDataStructure; /**this will probably only ever be set to true in testing code. If true the WriteTrees function will convert each tree to NxsSimpleTree object to write the newick*/
+		bool validateInternalNodeLabels; /** if true then labels that occur for internal nodes will be validated via the taxa block (true is the default).  This can cause problems if the internal node names are integer that are not intended to be taxon labels. */
+		bool allowNumericInterpretationOfTaxLabels;
+		bool allowUnquotedSpaces; // default false. If true, then spaces are not token breakers in tree strings
+		bool disambiguateDuplicateNames; // default false. If true, then spaces are not token breakers in tree strings
+
+		mutable std::vector<NxsFullTreeDescription> trees;
+		mutable std::map<std::string, unsigned> capNameToInd;
+		unsigned			defaultTreeInd;		/* 0-offset index of default tree specified by user, or 0 if user failed to specify a default tree using an asterisk in the NEXUS data file */
+		NxsUnsignedSetMap 	treeSets;
+		NxsPartitionsByName treePartitions;
+
+		bool writeTranslateTable ; // only affects writing to NEXUS. Default is true
+
+		ProcessedTreeValidationFunction processedTreeValidationFunction;
+		void * ptvArg;
+        bool treatAsRootedByDefault; /* true by default */
+		virtual	void		Read(NxsToken &token);
+		void				HandleTranslateCommand(NxsToken &token);
+		void				HandleTreeCommand(NxsToken &token, bool rooted);
+
+		friend class PublicNexusReader;
+	};
+
+typedef NxsTreesBlock TreesBlock;
+class NxsTreesBlockFactory
+	:public NxsBlockFactory
+	{
+	public:
+		virtual NxsTreesBlock  *	GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, NxsReader *reader, NxsToken *token);
+	};
+
+#endif
diff --git a/src/ncl/nxsunalignedblock.h b/src/ncl/nxsunalignedblock.h
new file mode 100644
index 0000000..9826c41
--- /dev/null
+++ b/src/ncl/nxsunalignedblock.h
@@ -0,0 +1,332 @@
+//	Copyright (C) 2007 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef NCL_NXSUNALIGNEDBLOCK_H
+#define NCL_NXSUNALIGNEDBLOCK_H
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxstaxablock.h"
+#include "ncl/nxscharactersblock.h"
+//@POL Note: This file is not yet ready for use (Paul Lewis, 19-May-2007)
+
+class NxsTaxaBlockAPI;
+
+/*!
+	This class handles reading and storage for the NEXUS block UNALIGNED. It overrides the member functions Read and
+	Reset, which are abstract virtual functions in the base class NxsBlock.
+>
+	Below is a table showing the correspondence between the elements of an UNALIGNED block in a NEXUS file and the
+	variables and member functions of the NxsUnalignedBlock class that can be used to access each piece of information
+	stored. Items in parenthesis should be viewed as "see also" items.
+>
+	NEXUS		  Command		 Data			Member
+	Command		  Atribute		 Member			Functions
+	---------------------------------------------------------------------
+	DIMENSIONS	  NEWTAXA		 newtaxa
+
+				  NTAX			 ntax			GetNTax
+
+	FORMAT		  DATATYPE		 datatype		GetDataType
+
+				  RESPECTCASE	 respectingCase IsRespectCase
+
+				  MISSING		 missing		GetMissingSymbol
+
+				  SYMBOLS		 symbols		GetSymbols
+
+				  EQUATE		 equates		GetEquateKey
+												GetEquateValue
+												GetNumEquates
+
+				  (NO)LABELS	 labels			IsLabels
+
+	TAXLABELS					 taxonLabels	GetTaxonLabels
+
+	MATRIX						 matrix			GetState
+												GetInternalRepresentation
+												GetNumStates
+												GetNumMatrixRows
+												IsPolymorphic
+>
+*/
+class NxsUnalignedBlock
+  : public NxsBlock, public NxsTaxaBlockSurrogate
+	{
+	public:
+
+		class NxsX_NoDataForTaxon
+			{
+			public:
+				NxsX_NoDataForTaxon(unsigned i) : taxon_index(i) {}
+				unsigned taxon_index;
+			};	/* thrown if a function is called with an index to a taxon for which no data is stored */
+
+								NxsUnalignedBlock(NxsTaxaBlockAPI * tb);
+		virtual					~NxsUnalignedBlock();
+
+		void					ShowStateLabels(std::ostream & out, NxsDiscreteDatum s);
+		NxsCharactersBlock::DataTypesEnum	GetDataType() const ;
+		NxsCharactersBlock::DataTypesEnum	GetOriginalDataType() const ;
+		const NxsDiscreteStateRow * GetDiscreteMatrixRow(unsigned taxInd) const
+			{
+			if (taxInd >= uMatrix.size())
+				return NULL;
+			return &uMatrix[taxInd];
+			}
+		NxsDiscreteStateRow		GetInternalRepresentation(unsigned i, unsigned j);
+		unsigned				GetNTaxWithData();
+		unsigned				GetNTaxTotal();
+		unsigned				GetNTaxTotal() const;
+		unsigned				GetNumEquates();
+		unsigned				GetNumMatrixRows();
+		unsigned				GetNumStates(unsigned i, unsigned j);
+		unsigned				NumCharsForTaxon(unsigned i);
+		char					GetMissingSymbol();
+		bool					IsLabels();
+		bool					IsMissingState(unsigned i, unsigned j);
+		bool					IsPolymorphic(unsigned i, unsigned j);
+		bool					IsRespectCase();
+		unsigned				GetStateSymbolIndex(unsigned i, unsigned j, unsigned k = 0);	// added by mth for standard data types
+		const char *			GetSymbols();
+		virtual void			DebugShowMatrix(std::ostream & out, const char * marginText = NULL) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void			Report(std::ostream & out) NCL_COULD_BE_CONST ; /*v2.1to2.2 1 */
+		virtual void			Reset();
+		void					SetNexus(NxsReader *nxsptr)
+			{
+			NxsBlock::SetNexus(nxsptr);
+			NxsTaxaBlockSurrogate::SetNexusReader(nxsptr);
+			}
+		virtual const std::string & GetBlockName() const
+			{
+			return NCL_BLOCKTYPE_ATTR_NAME;
+			}
+
+		void					WriteAsNexus(std::ostream & out) const;
+		void					WriteFormatCommand(std::ostream & out) const;
+		void					WriteMatrixCommand(std::ostream & out) const;
+		const char *			GetDatatypeName() const
+			{
+			return NxsCharactersBlock::GetNameOfDatatype(datatype);
+			}
+
+		virtual VecBlockPtr		GetImpliedBlocks()
+			{
+			return GetCreatedTaxaBlocks();
+			}
+
+		/*only used it the linkAPI is enabled*/
+		virtual void		HandleLinkCommand(NxsToken & token)
+			{
+			HandleLinkTaxaCommand(token);
+			}
+		virtual void		WriteLinkCommand(std::ostream &out) const
+			{
+			WriteLinkTaxaCommand(out);
+			}
+
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, assumptionsBlock blocks!
+		*/
+		NxsUnalignedBlock & operator=(const NxsUnalignedBlock &other)
+			{
+			Reset();
+			CopyBaseBlockContents(static_cast<const NxsBlock &>(other));
+			CopyTaxaBlockSurrogateContents(other);
+			CopyUnalignedBlockContents(other);
+			return *this;
+			}
+
+		/*---------------------------------------------------------------------------------------
+		| Results in aliasing of the taxa, assumptionsBlock blocks!
+		*/
+		virtual void CopyUnalignedBlockContents(const NxsUnalignedBlock &other)
+			{
+			nChar = other.nChar;
+			nTaxWithData = other.nTaxWithData;
+			matchchar = other.matchchar;
+			respectingCase = other.respectingCase;
+			transposing = other.transposing;
+			labels = other.labels;
+			missing = other.missing;
+			gap = other.gap;
+			symbols = other.symbols;
+			equates = other.equates;
+			mapper = other.mapper;
+			uMatrix = other.uMatrix;
+			datatype = other.datatype;
+			statesFormat = other.statesFormat;
+			}
+
+		virtual NxsUnalignedBlock * Clone() const
+			{
+			NxsUnalignedBlock * a = new NxsUnalignedBlock(taxa);
+			*a = *this;
+			return a;
+			}
+		bool 		SwapEquivalentTaxaBlock(NxsTaxaBlockAPI * tb)
+		{
+			return SurrogateSwapEquivalentTaxaBlock(tb);
+		}
+        std::string GetMatrixRowAsStr(const unsigned rowIndex) const;
+	protected:
+		bool					IsInSymbols(char ch);
+		void					HandleDimensions(NxsToken & token);
+		void					HandleEndblock(NxsToken & token);
+		virtual void			HandleFormat(NxsToken & token);
+		virtual void			HandleMatrix(NxsToken & token);
+		virtual bool			HandleNextState(NxsToken & token, unsigned taxInd, unsigned charInd, NxsDiscreteStateRow & new_row, const NxsString &);
+		virtual void			Read(NxsToken & token);
+		void					ResetSymbols();
+		std::string				FormatState(NxsDiscreteDatum x) const;
+
+		void					WriteStatesForMatrixRow(std::ostream &out, unsigned currTaxonIndex) const;
+
+		unsigned				nChar;				/* number of columns in matrix	*/
+		unsigned				nTaxWithData;		/* number of non empty rows in the matrix*/
+
+		char					matchchar;			/* match symbol to use in matrix */
+		bool					respectingCase;		/* if true, RESPECTCASE keyword specified in FORMAT command */
+		bool					transposing;		/* indicates matrix will be in transposed format */
+		bool					labels;				/* indicates whether or not labels will appear on left side of matrix */
+
+		char					missing;			/* missing data symbol */
+		char                    gap; /* gap symbol, will often be \0, but can be - */
+
+		std::string				symbols;			/* list of valid character state symbols */
+		std::map<char, NxsString> equates;			/* list of associations defined by EQUATE attribute of FORMAT command */
+
+		NxsDiscreteDatatypeMapper mapper;
+		NxsDiscreteStateMatrix	uMatrix;		/* storage for unaligned data */
+
+	private:
+		NxsCharactersBlock::DataTypesEnum			datatype;			/* flag variable (see datatypes enum) */
+		NxsCharactersBlock::DataTypesEnum			originalDatatype;			/* flag variable (see datatypes enum) */
+		NxsCharactersBlock::StatesFormatEnum		statesFormat;
+
+		NxsDiscreteStateCell						GetStateIndex(unsigned i, unsigned j, unsigned k);
+		void					ResetDatatypeMapper();
+		bool					TaxonIndHasData(const unsigned ind) const;
+		friend class PublicNexusReader;
+		friend class MultiFormatReader;
+	};
+
+class NxsUnalignedBlockFactory
+	:public NxsBlockFactory
+	{
+	public:
+		virtual NxsUnalignedBlock  *GetBlockReaderForID(const std::string & NCL_BLOCKTYPE_ATTR_NAME, NxsReader *reader, NxsToken *token);
+	};
+
+/*!
+	Returns datatype listed in the CHARACTERS block.
+	The original datatype can differ from the current datatype if the symbols list of a built in type was augmented
+	(thus converting it to standard).
+*/
+inline NxsCharactersBlock::DataTypesEnum NxsUnalignedBlock::GetOriginalDataType() const
+	{
+	return originalDatatype;
+	}
+
+
+/*!
+	Returns value of `datatype' as an unsigned integer. If you want the name of the datatype, you should call
+	NxsUnalignedBlock::GetDatatypeName instead.
+*/
+inline NxsCharactersBlock::DataTypesEnum NxsUnalignedBlock::GetDataType() const
+	{
+	return datatype;
+	}
+
+
+/*!
+	Returns the missing data symbol currently in effect. If no missing data symbol specified, returns '\0'.
+*/
+inline char NxsUnalignedBlock::GetMissingSymbol()
+	{
+	return missing;
+	}
+
+/*!
+	Returns the number of taxa that have data (or will have data according to the Dimensions command, if the matrix
+		has not been read.
+*/
+inline unsigned NxsUnalignedBlock::GetNTaxWithData()
+	{
+	return nTaxWithData;
+	}
+
+/*!
+	Returns the number of taxa in the taxa block associated with the unaligned block.
+*/
+inline unsigned NxsUnalignedBlock::GetNTaxTotal()
+	{
+	return (unsigned)uMatrix.size();
+	}
+
+/*!
+	Returns the number of taxa in the taxa block associated with the unaligned block.
+*/
+inline unsigned NxsUnalignedBlock::GetNTaxTotal() const
+	{
+	return (unsigned)uMatrix.size();
+	}
+
+/*!
+	Returns the number of stored equate associations.
+*/
+inline unsigned NxsUnalignedBlock::GetNumEquates()
+	{
+	return (unsigned)equates.size();
+	}
+
+/*!
+	Returns the number of actual rows in `matrix'. This number is equal to `ntax', and hence this function is identical
+	to GetNTax. Note that `ntax' can be smaller than `ntaxTotal' since the user did not have to provide data for all
+	taxa specified in the TAXA block.
+*/
+inline unsigned NxsUnalignedBlock::GetNumMatrixRows()
+	{
+	return (unsigned)uMatrix.size();
+	}
+
+/*!
+	Returns data member `symbols'. Warning: returned value may be NULL.
+*/
+inline const char * NxsUnalignedBlock::GetSymbols()
+	{
+	return symbols.c_str();
+	}
+
+/*!
+	Returns true if LABELS was specified in the FORMAT command, false otherwise.
+*/
+inline bool NxsUnalignedBlock::IsLabels()
+	{
+	return labels;
+	}
+
+/*!
+	Returns true if RESPECTCASE was specified in the FORMAT command, false otherwise.
+*/
+inline bool NxsUnalignedBlock::IsRespectCase()
+	{
+	return respectingCase;
+	}
+
+#endif
diff --git a/src/ncl/nxsutilcopy.h b/src/ncl/nxsutilcopy.h
new file mode 100644
index 0000000..eee67e3
--- /dev/null
+++ b/src/ncl/nxsutilcopy.h
@@ -0,0 +1,264 @@
+//	Copyright (C) 2008 Mark Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.1
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+// This code is based on code developed by Mark Holder for the CIPRES project
+// Much of this file comes from Andrei Alexandrescu "Modern C++ Design"
+
+#if !defined NXS_UTIL_COPY_H
+#define NXS_UTIL_COPY_H
+#include <algorithm>
+#include <cstring>
+
+#if defined(_MSC_VER)
+#	undef	HAVE_COMPILE_TIME_DISPATCH
+#else
+#	define HAVE_COMPILE_TIME_DISPATCH
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+///	Int2Type<compile time constant integer> defines a unique (and stateless)
+///		class associated with a given integer.  Used for compile time dispatching
+///		of function calls or creation of appropriate templated classes.
+///
+///	defines an unnamed enum "value" that is equal to the integer used to
+///		define the class.
+///	\author	Andrei Alexandrescu "Modern C++ Design"
+//////////
+
+template<int v>
+class Int2Type
+	{
+		public:
+			enum {value = v};
+	};
+
+typedef Int2Type<true>  TrueAsAType;
+typedef Int2Type<false> FalseAsAType;
+
+////////////////////////////////////////////////////////////////////////////////
+///	Type2Type<typename> defines a unique (and stateless) class for each type
+///		that is specified as the template argument.
+///	This is useful in controlling the return type of templated functions in
+///		lieu of partial template specialization of templated functions (which is
+///		not allowed by the C++ standard)
+///	Defines the typedef OriginalType which corresponds to the template argument
+///	\author	Andrei Alexandrescu "Modern C++ Design"
+//////////
+
+template<typename T>
+class Type2Type
+	{
+		public:
+			typedef T OriginalType;
+	};
+
+
+namespace ncl
+{
+namespace hidden
+{
+// used by #COMPILE_TIME_ASSERT
+template<bool> struct CompileTimeChecker
+	{
+	CompileTimeChecker(...); //default
+	};
+// used by #COMPILE_TIME_ASSERT
+template<> struct CompileTimeChecker<false>{};
+}
+}
+
+////////////////////////////////////////////////////////////////////////////////
+///	\def COMPILE_TIME_ASSERT(condition, msg)
+///	\brief A error-detection macro for asserting that conditions are true at compile time.
+///
+///	Usage:
+///
+///		COMPILE_TIME_ASSERT(condition to test, error_clue)
+///
+///	\note	The error_clue must be one alphanumeric word (no spaces of punctaution).
+///	The condition to test must be known at compile time (if not a cryptic message such as "illegal non-type template argument"
+///	error will be generated.
+///	If the compile time assertion evaluates to false, a message such as "Illegal conversion
+///		from ERROR_error_clue to CompileTimeChecker<false> ..." will be generated.
+///
+///	Implementation Details:
+///
+///		ncl::hidden::CompileTimeChecker is a boolean-templated class.
+///		The constructor of ncl::hidden::CompileTimeChecker<true> accepts any type
+///		The the only constructor for ncl::hidden::CompileTimeChecker<false> is the default constructor
+///		The macro COMPILE_TIME_ASSERT(condition, msg):
+///			1	Declares a dummy class ERROR_msg
+///			2	checks if it can instantiate CompileTimeChecker<condition> from an ERROR_msg type object.
+///		If the condition is true, the construction will succeed, if not the error message will be generated
+///		Note that while the  CompileTimeChecker exists in ncl::hidden:: namespace.  The macro is visible
+///		by any file that includes compile_assert.h and the class such as ERROR_msg will be added to the global
+///		namespace.
+///		The resultign code is not affected by the insertion of COMPILE_TIME_ASSERT because the entire construction
+///		is inside a sizeof() so no objects are really instantiated.
+///
+///	\author	Andrei Alexandrescu "Modern C++ Design"
+///
+//////////
+#define COMPILE_TIME_ASSERT(condition, msg) {class ERROR_##msg {}; (void)sizeof(ncl::hidden::CompileTimeChecker<(condition)>(ERROR_##msg()));}
+
+#if defined(HAVE_COMPILE_TIME_DISPATCH)
+template<typename T> struct SupportsBitwiseCopy { enum {kResult = false};	};
+template<typename T> struct SupportsBitwiseCopy<T*> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<short int> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<int> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<char> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<long int> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<double> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<unsigned short int> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<unsigned int> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<unsigned char> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<unsigned long int> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<bool> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<wchar_t> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<float> {	enum {kResult = true}; 	};
+template<> struct SupportsBitwiseCopy<long double> {	enum {kResult = true}; 	};
+
+
+//	This file uses tricks discussed in Andrei Alexandrescu's book to
+//	implement a call to memcpy for primitive types or any class which
+//  has the statement template<> struct SupportsBitwiseCopy<CLASS> {	enum {kResult = true}; 	};
+//	because of potential portability issues with TypeList, primitive types are
+//	have SupportsBitwiseCopy specialized here by brute force enumeration
+
+
+class NullType {};
+
+template <typename T>
+class TypeTraits
+	{
+	private :
+		template <class U> struct PointerTraits
+			{
+				enum {kResult = false};
+				enum {kCopyWithMemCopy = false};	// only allowing memcpy on bare pointers
+				enum {kSizeOfPointee = 0};	// only allowing memcpy on bare pointers
+			};
+		template <class U> struct PointerTraits<U*>
+			{
+				enum {kResult = true};
+				enum {kCopyWithMemCopy = SupportsBitwiseCopy<U>::kResult};
+				enum {kSizeOfPointee = sizeof(U)};
+			};
+		template <class U> struct PointerTraits<const U*>
+			{
+				enum {kResult = true};
+				enum {kCopyWithMemCopy = SupportsBitwiseCopy<U>::kResult};
+				enum {kSizeOfPointee = sizeof(U)};
+			};
+	public:
+		enum {kIsPointer = PointerTraits<T>::kResult};
+		enum {kCanUseMemCpyOnPointee = PointerTraits<T>::kCopyWithMemCopy};
+		enum {kPointeeSize = PointerTraits<T>::kSizeOfPointee}; //only valid if kIsPointer !!
+	//	typedef PointerTraits<T>::PointeeType  PointeeType;
+	};
+
+template<class T, class U>
+class Conversion
+	{
+	public:
+		enum {kSameType = false};
+	};
+
+template<class T>
+class Conversion<T,T>
+	{
+	public:
+		enum {kSameType = true};
+	};
+
+template<class T>
+class Conversion<const T*,T*>
+	{
+	public:
+		enum {kSameType = true};
+	};
+
+template<class T>
+class Conversion<T*, const T*>
+	{
+	public:
+		enum {kSameType = true};
+	};
+
+enum CopyAlgoSeclector
+	{
+		kConservative,
+		kFast
+	};
+
+template <typename InIt, typename OutIt>
+inline OutIt CopyImpl(InIt first, InIt last, OutIt resultP, Int2Type<kConservative>)
+	{
+	return std::copy(first, last, resultP);
+	}
+
+template <typename InIt, typename OutIt>
+inline OutIt CopyImpl(InIt first, InIt last, OutIt resultP, Int2Type<kFast>)
+	{
+	return (OutIt) std::memcpy(resultP, first,  ((std::size_t) (last - first)) * sizeof(*first));
+	}
+
+template <typename InIt, typename OutIt>
+OutIt ncl_copy(InIt first, InIt last, OutIt resultP)
+	{
+		enum { kUseMemCpy =(TypeTraits<InIt>::kIsPointer &&
+							TypeTraits<OutIt>::kIsPointer &&
+							TypeTraits<InIt>::kCanUseMemCpyOnPointee &&
+							TypeTraits<OutIt>::kCanUseMemCpyOnPointee &&
+							int(TypeTraits<InIt>::kPointeeSize) == int(TypeTraits<OutIt>::kPointeeSize)) ? kFast : kConservative};
+		return CopyImpl(first, last, resultP, Int2Type<kUseMemCpy>());
+	}
+
+#else //HAVE_COMPILE_TIME_DISPATCH
+
+template <typename InIt, typename OutIt>
+inline OutIt ncl_copy(InIt first, InIt last, OutIt resultP)
+	{
+	return std::copy(first, last, resultP);
+	}
+
+#endif //HAVE_COMPILE_TIME_DISPATCH
+
+
+//adds an element from the first -> last array to the corresponding element in the result array
+template <typename InIt, typename OutIt>
+inline OutIt ncl_iadd(InIt first, InIt last, OutIt resultP)
+	{
+	for (; first != last; ++first, ++resultP)
+		*resultP += *first;
+	return resultP;
+	}
+
+//adds each element in resultP array with the correcpsonding element from the first -> last array
+template <typename InIt, typename OutIt>
+inline OutIt ncl_imult(InIt first, InIt last, OutIt resultP)
+	{
+	for (; first != last; ++first, ++resultP)
+		*resultP *= *first;
+	return resultP;
+	}
+
+
+#endif
+
diff --git a/src/nxsassumptionsblock.cpp b/src/nxsassumptionsblock.cpp
new file mode 100644
index 0000000..6a32092
--- /dev/null
+++ b/src/nxsassumptionsblock.cpp
@@ -0,0 +1,2807 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <iomanip>
+#include <climits>
+
+#include "ncl/nxsassumptionsblock.h"
+#include "ncl/nxssetreader.h"
+#include "ncl/nxsreader.h"
+using namespace std;
+
+class BogusToIndMapper: public NxsLabelToIndicesMapper
+	{
+	public:
+		BogusToIndMapper()
+			:queried(false)
+			{}
+		virtual ~BogusToIndMapper(){}
+
+		virtual unsigned GetMaxIndex() const
+			{
+			return UINT_MAX;
+			}
+		virtual unsigned GetIndicesForLabel(const std::string &label, NxsUnsignedSet *) const
+			{
+			queried=true;
+			NxsString e;
+			e << "The symbol " << label << " was not recognized";
+			throw NxsException(e);
+			}
+
+		virtual unsigned GetIndexSet(const std::string &, NxsUnsignedSet * ) const
+			{
+			return 0;
+			}
+		virtual bool AddNewIndexSet(const std::string &, const NxsUnsignedSet & )
+			{
+			return false;
+			}
+		virtual bool AddNewPartition(const std::string &, const NxsPartition & )
+			{
+			return false;
+			}
+
+		/// AppendNewLabel is only overloaded in Taxa and State LabelToIndexMappers
+		virtual unsigned AppendNewLabel(std::string &label)
+			{
+			queried=true;
+			NxsString e;
+			e << "The symbol " << label << " was not recognized";
+			throw NxsException(e);
+			}
+
+		std::vector<std::string> GetLabels() const
+			{
+			queried=true;
+			NxsString e;
+			throw NxsException(e);
+			}
+		mutable bool queried;
+	};
+
+
+void NxsTransformationManager::Reset()
+	{
+	standardTypeNames.clear();
+	standardTypeNames.insert("UNORD");
+	standardTypeNames.insert("ORD");
+	standardTypeNames.insert("IRREV");
+	standardTypeNames.insert("IRREV.UP");
+	standardTypeNames.insert("IRREV.DOWN");
+	standardTypeNames.insert("DOLLO");
+	standardTypeNames.insert("DOLLO.UP");
+	standardTypeNames.insert("DOLLO.DOWN");
+	standardTypeNames.insert("STRAT");
+	standardTypeNames.insert("SQUARED"); /* new in Mesquite */
+	standardTypeNames.insert("LINEAR"); /* new in Mesquite */
+	allTypeNames.clear();
+	allTypeNames.insert(standardTypeNames.begin(), standardTypeNames.end());
+
+	userTypeNames.clear();
+	dblUserTypes.clear();
+	intUserTypes.clear();
+	dblWtSets.clear();
+	intWtSets.clear();
+	typeSets.clear();
+	def_wtset.clear();
+	def_typeset.clear();
+	def_type.clear();
+	}
+
+void NxsTransformationManager::WriteUserType(std::ostream &out) const
+	{
+	if (dblUserTypes.empty() && intUserTypes.empty())
+		return;
+	for (std::map<std::string, NxsRealStepMatrix>::const_iterator csIt = dblUserTypes.begin(); csIt != dblUserTypes.end(); ++csIt)
+		{
+		out << "    UserType ";
+		out << NexusToken::EscapeString(csIt->first) << " (Stepmatrix) = ";
+		const NxsRealStepMatrix & p = csIt->second;
+		const std::vector<std::string> & states = p.GetSymbols();
+		const NxsRealStepMatrix::DblMatrix & mat = p.GetMatrix();
+		const unsigned nStates = (const unsigned)states.size();
+		out << nStates;
+		out << "\n    ";
+		for (std::vector<std::string>::const_iterator sIt = states.begin(); sIt != states.end(); ++sIt)
+			out << "   "<< NxsString::GetEscaped(*sIt) ;
+		NxsString n;
+		std::ios::fmtflags prevflags = out.setf(std::ios::showpoint);
+		for (unsigned i = 0; i < nStates; ++i)
+			{
+			out << "\n    ";
+			for (unsigned j = 0; j < nStates; ++j)
+				{
+				const double el = mat.at(i).at(j);
+				if (i == j && el == 0.0)
+					out << "   .";
+				else
+					{
+					n.clear();
+					if (el == DBL_MAX)
+						n += "i";
+					else
+						n += el;
+					out << "   " << NxsString::GetEscaped(n);
+					}
+				}
+			}
+		out.flags(prevflags);
+		out << ";\n";
+		}
+
+	for (std::map<std::string, NxsIntStepMatrix>::const_iterator csIt = intUserTypes.begin(); csIt != intUserTypes.end(); ++csIt)
+		{
+		out << "    UserType ";
+		out << NexusToken::EscapeString(csIt->first) << " (Stepmatrix) = ";
+		const NxsIntStepMatrix & p = csIt->second;
+		const std::vector<std::string> & states = p.GetSymbols();
+		const NxsIntStepMatrix::IntMatrix & mat = p.GetMatrix();
+		const unsigned nStates = (const unsigned)states.size();
+		out << nStates;
+		out << "\n    ";
+		for (std::vector<std::string>::const_iterator sIt = states.begin(); sIt != states.end(); ++sIt)
+			out << "   "<< NxsString::GetEscaped(*sIt) ;
+		NxsString n;
+		for (unsigned i = 0; i < nStates; ++i)
+			{
+			out << "\n    ";
+			for (unsigned j = 0; j < nStates; ++j)
+				{
+				const int el = mat.at(i).at(j);
+				if (i == j && el == 0)
+					out << "   .";
+				else
+					{
+					if (el == INT_MAX)
+						n = "i";
+					else
+						{
+						n.clear();
+						n += el;
+						}
+					out << "   " << NxsString::GetEscaped(n);
+					}
+				}
+			}
+		out << ";\n";
+		}
+	}
+
+void NxsTransformationManager::WriteWtSet(std::ostream &out) const
+	{
+	if (dblWtSets.empty() && intWtSets.empty())
+		return;
+	const char * dtp = (def_wtset.empty() ? NULL : def_wtset.c_str());
+	for (std::map<std::string, ListOfDblWeights>::const_iterator csIt = dblWtSets.begin(); csIt != dblWtSets.end(); ++csIt)
+		{
+		out << "    WtSet ";
+		if (NxsString::case_insensitive_equals(csIt->first.c_str(), dtp))
+			out << "* ";
+		out << NexusToken::EscapeString(csIt->first) << " =";
+		const ListOfDblWeights & p = csIt->second;
+		bool first = true;
+		for (ListOfDblWeights::const_iterator pIt = p.begin(); pIt != p.end(); ++pIt)
+			{
+			const DblWeightToIndexSet & g = *pIt;
+			if (!first)
+				out << ',';
+			out << " \'" << g.first << "' :";
+			NxsSetReader::WriteSetAsNexusValue(g.second, out);
+			first = false;
+			}
+		out << ";\n";
+		}
+	for (std::map<std::string, ListOfIntWeights>::const_iterator csIt = intWtSets.begin(); csIt != intWtSets.end(); ++csIt)
+		{
+		out << "    WtSet ";
+		if (NxsString::case_insensitive_equals(csIt->first.c_str(), dtp))
+			out << "* ";
+		out << NexusToken::EscapeString(csIt->first) << " =";
+		const ListOfIntWeights & p = csIt->second;
+		bool first = true;
+		for (ListOfIntWeights::const_iterator pIt = p.begin(); pIt != p.end(); ++pIt)
+			{
+			const IntWeightToIndexSet & g = *pIt;
+			if (!first)
+				out << ',';
+			out << " \'" << g.first << "' :";
+			NxsSetReader::WriteSetAsNexusValue(g.second, out);
+			first = false;
+			}
+		out << ";\n";
+		}
+	}
+
+
+/*!
+	Returns -1 if `index` is not in `wtset`
+*/
+void NxsTransformationManager::SetDefaultTypeName(const std::string &n)
+	{
+	if (!(n.empty() || IsValidTypeName(n)))
+		{
+		NxsString e(n.c_str());
+		e << " is not the name of a known type (and therefore is not a valid default type)";
+		throw NxsException(e);
+		}
+	def_type = n;
+	}
+
+
+bool NxsTransformationManager::IsEmpty() const
+	{
+	return (userTypeNames.empty()
+			&& dblWtSets.empty()
+			&& intWtSets.empty()
+			&& typeSets.empty()
+			&& (def_type.empty() || !NxsString::case_insensitive_equals(def_type.c_str(), "ORD")));
+	}
+
+/*!
+	Returns -1 if `index` is not in `wtset`
+*/
+double NxsTransformationManager::GetWeightForIndex(const ListOfDblWeights & wtset, unsigned index)
+	{
+	for (ListOfDblWeights::const_iterator wIt = wtset.begin(); wIt != wtset.end(); ++wIt)
+		{
+		if (wIt->second.count(index) > 0)
+			return wIt->first;
+		}
+	return -1.0;
+	}
+
+/*!
+	Returns -1 if `index` is not in `wtset`
+*/
+int NxsTransformationManager::GetWeightForIndex(const ListOfIntWeights & wtset, unsigned index)
+	{
+	for (ListOfIntWeights::const_iterator wIt = wtset.begin(); wIt != wtset.end(); ++wIt)
+		{
+		if (wIt->second.count(index) > 0)
+			return wIt->first;
+		}
+	return -1;
+	}
+
+/*!
+	Raises an NxsNCLAPIException if the `n` is not a type name.
+*/
+bool NxsTransformationManager::IsIntType(const std::string & n) const
+	{
+	std::string capName(n.c_str());
+	NxsString::to_upper(capName);
+	if (standardTypeNames.count(capName) > 0)
+		return true;
+	if (intUserTypes.find(capName) != intUserTypes.end())
+		return true;
+	if (dblUserTypes.find(capName) != dblUserTypes.end())
+		return false;
+	NxsString errormsg = "Type name ";
+	errormsg << n << " not found.";
+	throw NxsNCLAPIException(errormsg);
+	}
+
+
+/*!
+	Returns true if `n` is the name of a known type (standard or user type) -- not case-sensitive.
+*/
+bool NxsTransformationManager::IsValidTypeName(const std::string & n) const
+	{
+	std::string capName(n.c_str());
+	NxsString::to_upper(capName);
+	return (allTypeNames.count(capName) > 0);
+	}
+
+/*!
+	Raises an NxsNCLAPIException if the `n` is not a type name.
+*/
+bool NxsTransformationManager::IsStandardType(const std::string & n) const
+	{
+	std::string capName(n.c_str());
+	NxsString::to_upper(capName);
+	return (standardTypeNames.count(capName) > 0);
+	}
+
+/*! Returns the Stepmatrix for User Type with the name `n`
+
+	Raises an NxsNCLAPIException if the `n` is not a type name or is the name of
+		a builtin type (like "ORDERED")
+*/
+const NxsIntStepMatrix & NxsTransformationManager::GetIntType(const std::string & n) const
+	{
+	std::string capName(n.c_str());
+	NxsString::to_upper(capName);
+	if (IsStandardType(n))
+		throw NxsNCLAPIException("Standard (predefined) types cannot be fetched using GetIntType.");
+
+	std::map<std::string, NxsIntStepMatrix>::const_iterator i = intUserTypes.find(capName);
+	if (i != intUserTypes.end())
+		return i->second;
+	NxsString errormsg = "Type name ";
+	errormsg << n << " not found.";
+	throw NxsNCLAPIException(errormsg);
+	}
+
+/*! Returns the Stepmatrix for User Type with the name `n`
+
+	Raises an NxsNCLAPIException if the `n` is not a type name or is the name of
+		a builtin type (like "ORDERED")
+*/
+const NxsRealStepMatrix & NxsTransformationManager::GetRealType(const std::string & n) const
+	{
+	std::string capName(n.c_str());
+	NxsString::to_upper(capName);
+	std::map<std::string, NxsRealStepMatrix>::const_iterator i = dblUserTypes.find(capName);
+	if (i != dblUserTypes.end())
+		return i->second;
+	NxsString errormsg = "Type name ";
+	errormsg << n << " not found.";
+	throw NxsNCLAPIException(errormsg);
+	}
+
+const NxsIntStepMatrix::IntMatrix NxsTransformationManager::GetOrderedType(unsigned nStates)
+	{
+	NxsIntStepMatrix::IntVec v(nStates, 1);
+	NxsIntStepMatrix::IntMatrix m(nStates, v);
+	for (unsigned i = 0; i < nStates; ++i)
+		m[i][i] = 0;
+	return m;
+	}
+
+const NxsIntStepMatrix::IntMatrix NxsTransformationManager::GetUnorderedType(unsigned nStates)
+	{
+	NxsIntStepMatrix::IntVec v(nStates, 0);
+	NxsIntStepMatrix::IntMatrix m(nStates, v);
+	for (unsigned i = 0; i < nStates; ++i)
+		{
+		for (unsigned j = 0; j < nStates; ++j)
+			{
+			if (i > j)
+				m[i][j] = j - i;
+			else
+				m[i][j] = i - j;
+			}
+		}
+	return m;
+	}
+
+
+/*!
+	Raises an NxsNCLAPIException if the `name` is the name of a "standard" type.
+	Returns true if another user type was replaced.
+*/
+bool NxsTransformationManager::AddIntType(const std::string &name, const NxsIntStepMatrix &s)
+	{
+	std::string capName(name.c_str());
+	NxsString::to_upper(capName);
+	if (standardTypeNames.count(capName) > 0)
+		{
+		NxsString errormsg(name.c_str());
+		errormsg <<  " is a predefined type which cannot be redefined";
+		throw NxsNCLAPIException(errormsg);
+		}
+	bool replacing = intUserTypes.find(capName) != intUserTypes.end();
+	if (!replacing &&  dblUserTypes.find(capName) != dblUserTypes.end())
+		{
+		replacing = true;
+		dblUserTypes.erase(capName);
+		}
+	intUserTypes.insert(pair<std::string, NxsIntStepMatrix>(capName, s));
+	userTypeNames.insert(name);
+	allTypeNames.insert(capName);
+	return replacing;
+	}
+
+/*!
+	Raises an NxsNCLAPIException if the `name` is the name of a "standard" type.
+	Returns true if another user type was replaced.
+*/
+bool NxsTransformationManager::AddRealType(const std::string &name, const NxsRealStepMatrix &s)
+	{
+	std::string capName(name.c_str());
+	NxsString::to_upper(capName);
+	if (standardTypeNames.count(capName) > 0)
+		{
+		NxsString errormsg(name.c_str());
+		errormsg <<  " is a predefined type which cannot be redefined";
+		throw NxsNCLAPIException(errormsg);
+		}
+	bool replacing = dblUserTypes.find(capName) != dblUserTypes.end();
+	if (!replacing &&  intUserTypes.find(capName) != intUserTypes.end())
+		{
+		replacing = true;
+		intUserTypes.erase(capName);
+		}
+	dblUserTypes.insert(pair<std::string, NxsRealStepMatrix>(capName, s));
+	userTypeNames.insert(capName);
+	allTypeNames.insert(capName);
+	return replacing;
+	}
+
+bool NxsTransformationManager::AddIntWeightSet(const std::string &name, const NxsTransformationManager::ListOfIntWeights &ws, bool isDefault)
+	{
+	std::string capName(name.c_str());
+	NxsString::to_upper(capName);
+	bool replacing = intWtSets.find(capName) != intWtSets.end();
+	if (!replacing &&  dblWtSets.find(capName) != dblWtSets.end())
+		{
+		replacing = true;
+		dblWtSets.erase(capName);
+		}
+	intWtSets[capName] = ws;
+	if (isDefault)
+		def_wtset = name;
+	return replacing;
+	}
+
+bool NxsTransformationManager::AddRealWeightSet(const std::string &name, const NxsTransformationManager::ListOfDblWeights &ws, bool isDefault)
+	{
+	std::string capName(name.c_str());
+	NxsString::to_upper(capName);
+	bool replacing = dblWtSets.find(capName) != dblWtSets.end();
+	if (!replacing &&  intWtSets.find(capName) != intWtSets.end())
+		{
+		replacing = true;
+		intWtSets.erase(capName);
+		}
+	dblWtSets[capName] = ws;
+	if (isDefault)
+		{
+		def_wtset = name;
+		}
+	return replacing;
+	}
+
+bool NxsTransformationManager::AddTypeSet(const std::string &name, const NxsPartition &ts, bool isDefault)
+	{
+	std::string capName(name.c_str());
+	NxsString::to_upper(capName);
+	bool replacing = typeSets.find(capName) != typeSets.end();
+	typeSets[capName] = ts;
+	if (isDefault)
+		{
+		def_typeset = name;
+		}
+	return replacing;
+	}
+
+std::set<std::string> NxsTransformationManager::GetWeightSetNames() const
+	{
+	std::set<std::string> s;
+	std::map<std::string, ListOfDblWeights>::const_iterator dws = dblWtSets.begin();
+	for (; dws != dblWtSets.end(); ++dws)
+		s.insert(dws->first);
+	std::map<std::string, ListOfIntWeights>::const_iterator iws = intWtSets.begin();
+	for (; iws != intWtSets.end(); ++iws)
+		s.insert(iws->first);
+	return s;
+	}
+
+std::set<std::string> NxsTransformationManager::GetTypeSetNames() const
+	{
+	std::set<std::string> s;
+	std::map<std::string, ListOfTypeNamesToSets>::const_iterator dws = typeSets.begin();
+	for (; dws != typeSets.end(); ++dws)
+		s.insert(dws->first);
+	return s;
+	}
+
+
+void NxsAssumptionsBlock::AddCharPartition(const std::string & name, const NxsPartition &p)
+	{
+	charPartitions[name] = p;
+	}
+void NxsAssumptionsBlock::AddTaxPartition(const std::string & name, const NxsPartition &p)
+	{
+	taxPartitions[name] = p;
+	}
+
+void NxsAssumptionsBlock::AddTreePartition(const std::string & name, const NxsPartition &p)
+	{
+	treePartitions[name] = p;
+	}
+
+void NxsAssumptionsBlock::AddCodeSet(const std::string & name, const NxsPartition &p, bool asterisked)
+	{
+	codeSets[name] = p;
+	if (asterisked)
+		def_codeSet.assign(name.c_str());
+	}
+
+void NxsAssumptionsBlock::AddCodonPosSet(const std::string & name, const NxsPartition &p, bool asterisked)
+	{
+	codonPosSets[name] = p;
+	if (asterisked)
+		def_codonPosSet.assign(name.c_str());
+	}
+
+
+void NxsAssumptionsBlock::WriteOptions(std::ostream & out) const
+	{
+	const std::string d = transfMgr.GetDefaultTypeName();
+	if ((!d.empty() && !NxsString::case_insensitive_equals(d.c_str(), "ORD"))
+		|| gapsAsNewstate || polyTCountValue != POLY_T_COUNT_UNKNOWN)
+		{
+		out << "    OPTIONS";
+		if (!d.empty())
+			out << " DefType = " << NxsString::GetEscaped(d);
+		if (gapsAsNewstate)
+			out << " GapMode = NewState";
+		if (polyTCountValue == POLY_T_COUNT_MIN)
+			out << " PolyTCount = MinSteps";
+		else if (polyTCountValue == POLY_T_COUNT_MAX)
+			out << " PolyTCount = MaxSteps";
+		out << ";\n";
+		}
+	}
+
+bool NxsAssumptionsBlock::HasAssumptionsBlockCommands() const
+	{
+	return (gapsAsNewstate
+			|| !transfMgr.IsEmpty()
+			|| !exsets.empty()
+			||  polyTCountValue != POLY_T_COUNT_UNKNOWN);
+	}
+bool NxsAssumptionsBlock::HasSetsBlockCommands() const
+	{
+	return (!charsets.empty() || !taxsets.empty() || !treesets.empty()
+			||!charPartitions.empty() || !taxPartitions.empty() || !treePartitions.empty() );
+	}
+bool NxsAssumptionsBlock::HasCodonsBlockCommands() const
+	{
+	return (!codeSets.empty() || !codonPosSets.empty() || !codesMgr.IsEmpty());
+	}
+
+bool NxsAssumptionsBlock::CanReadBlockType(const NxsToken & token)
+	{
+	if (token.Equals("ASSUMPTIONS"))
+		{
+		NCL_BLOCKTYPE_ATTR_NAME = "ASSUMPTIONS";
+		readAs = ASSUMPTIONS_BLOCK_READ;
+		return true;
+		}
+	if (token.Equals("SETS"))
+		{
+		NCL_BLOCKTYPE_ATTR_NAME = "SETS";
+		readAs = SETS_BLOCK_READ;
+		return true;
+		}
+	if (token.Equals("CODONS"))
+		{
+		NCL_BLOCKTYPE_ATTR_NAME = "CODONS";
+		readAs = CODONS_BLOCK_READ;
+		return true;
+		}
+	return token.Equals(GetID());
+	}
+
+
+
+void NxsAssumptionsBlock::WriteAsNexus(std::ostream &out) const
+	{
+	NxsAssumptionsBlock *t = const_cast<NxsAssumptionsBlock *>(this); /*title switching*/
+	const std::string ft = title;
+	t->title.clear();
+	NameOfAssumpBlockAsRead treatAs = this->readAs;
+	if (treatAs == UNREAD_OR_GENERATED_BLOCK)
+		{
+		/*	Precedence order here is reverse the order of writing. This delays
+			skipped blocks to the last among SETS, CODONS, and ASSUMPTIONS blocks.
+		*/
+		if (HasAssumptionsBlockCommands())
+			treatAs = ASSUMPTIONS_BLOCK_READ;
+		else if (HasCodonsBlockCommands())
+			treatAs = CODONS_BLOCK_READ;
+		else if (HasSetsBlockCommands())
+			treatAs = SETS_BLOCK_READ;
+		}
+	try
+		{
+		if (HasSetsBlockCommands())
+			{
+			if (treatAs == SETS_BLOCK_READ && !IsAutoGeneratedTitle())
+				t->title = ft;
+			out << "BEGIN SETS;\n";
+			WriteBasicBlockCommands(out);
+			this->WriteTaxSet(out);
+			this->WriteCharSet(out);
+			this->WriteTreeSet(out);
+			this->WriteTaxPartition(out);
+			this->WriteCharPartition(out);
+			this->WriteTreePartition(out);
+			if (treatAs == SETS_BLOCK_READ)
+				{
+				WriteSkippedCommands(out);
+				t->title.clear();
+				}
+			out << "END;\n";
+			}
+		if (HasCodonsBlockCommands())
+			{
+			if (treatAs == CODONS_BLOCK_READ && !IsAutoGeneratedTitle())
+				t->title = ft;
+			out << "BEGIN CODONS;\n";
+			WriteBasicBlockCommands(out);
+			this->codesMgr.WriteGeneticCode(out);
+			this->WriteCodonPosSet(out);
+			this->WriteCodeSet(out);
+			if (treatAs == CODONS_BLOCK_READ)
+				{
+				WriteSkippedCommands(out);
+				t->title.clear();
+				}
+			out << "END;\n";
+			}
+		if (HasAssumptionsBlockCommands())
+			{
+			if (treatAs == ASSUMPTIONS_BLOCK_READ && !IsAutoGeneratedTitle())
+				t->title = ft;
+			out << "BEGIN ASSUMPTIONS;\n";
+			WriteBasicBlockCommands(out);
+			this->WriteExSet(out);
+			this->transfMgr.WriteUserType(out);
+			this->transfMgr.WriteWtSet(out);
+			this->transfMgr.WriteTypeSet(out);
+			this->WriteOptions(out);
+			if (treatAs == ASSUMPTIONS_BLOCK_READ)
+				WriteSkippedCommands(out);
+			out << "END;\n";
+			}
+		}
+	catch (...)
+		{
+		t->title =  ft;
+		throw;
+		}
+	t->title =  ft;
+	}
+
+
+NxsCharactersBlockAPI * NxsAssumptionsBlock::GetCharBlockPtr(int *status)
+	{
+	if (status)
+		*status = GetCharLinkStatus();
+	return charBlockPtr;
+	}
+NxsTaxaBlockAPI * NxsAssumptionsBlock::GetTaxaBlockPtr(int *status)
+	{
+	if (status)
+		*status = GetTaxaLinkStatus();
+	return taxa;
+	}
+NxsTreesBlockAPI * NxsAssumptionsBlock::GetTreesBlockPtr(int *status)
+	{
+	if (status)
+		*status = GetTreesLinkStatus();
+	return treesBlockPtr;
+	}
+
+NxsAssumptionsBlockAPI	*NxsAssumptionsBlock::CreateNewAssumptionsBlock(NxsToken &token)
+	{
+	NxsAssumptionsBlockAPI * aba = NULL;
+	if (nexusReader)
+		{
+		NxsString n("ASSUMPTIONS");
+		NxsBlock * block = nexusReader->CreateBlockFromFactories(n, token, NULL);
+		aba = static_cast<NxsAssumptionsBlockAPI *>(block);
+		}
+	if (aba == NULL)
+		{
+		NxsAssumptionsBlock *ab =  new NxsAssumptionsBlock(NULL);
+		ab->SetImplementsLinkAPI(this->ImplementsLinkAPI());
+		aba = ab;
+		}
+	aba->SetNexus(nexusReader);
+	passedRefOfOwnedBlock = false;
+	createdSubBlocks.push_back(aba);
+	return aba;
+	}
+
+NxsAssumptionsBlockAPI	*NxsAssumptionsBlock::GetAssumptionsBlockForCharBlock(NxsCharactersBlockAPI *cb, NxsBlockLinkStatus status, NxsToken &token)
+	{
+	int cbstatus;
+	NxsCharactersBlockAPI * thisCB = GetCharBlockPtr(&cbstatus);
+	if (thisCB == NULL)
+		{
+		SetCharBlockPtr(cb, status);
+		return this;
+		}
+	else if (cb == thisCB)
+		{
+		int f = cbstatus & NxsBlock::BLOCK_LINK_UNUSED_MASK;
+		if (f == status)
+			return this;
+		if (!(cbstatus & NxsBlock::BLOCK_LINK_USED))
+			{
+			if (cbstatus == BLOCK_LINK_UNINITIALIZED || cbstatus == BLOCK_LINK_UNKNOWN_STATUS)
+				SetCharLinkStatus(status);
+			}
+		else
+			{
+			/* return the same block for these two link statuses since they are both "safe" */
+			if ((f | status) == BLOCK_LINK_TO_ONLY_CHOICE)
+				charLinkStatus |= BLOCK_LINK_TO_ONLY_CHOICE;
+			}
+		return this;
+		}
+	for (VecAssumpBlockPtr::iterator bIt = createdSubBlocks.begin(); bIt != createdSubBlocks.end(); ++bIt)
+		{
+		NxsAssumptionsBlockAPI	* c = *bIt;
+		if (c && cb == c->GetCharBlockPtr(&cbstatus) && (cbstatus & BLOCK_LINK_UNUSED_MASK) == status)
+			return c;
+		}
+	
+	NxsAssumptionsBlockAPI	*effBlock = CreateNewAssumptionsBlock(token);
+	effBlock->SetCharBlockPtr(cb, status);
+	return effBlock;
+	}
+
+NxsAssumptionsBlockAPI	*NxsAssumptionsBlock::GetAssumptionsBlockForTaxaBlock(NxsTaxaBlockAPI *cb, NxsBlockLinkStatus status, NxsToken &token)
+	{
+	int cbstatus;
+	NxsTaxaBlockAPI * thisCB = GetTaxaBlockPtr(&cbstatus);
+	if (thisCB == NULL)
+		{
+		SetTaxaBlockPtr(cb, status);
+		return this;
+		}
+	else if (cb == GetTaxaBlockPtr(&cbstatus))
+		{
+		int f = cbstatus & NxsBlock::BLOCK_LINK_UNUSED_MASK;
+		if (f == status)
+			return this;
+		if (!(cbstatus & NxsBlock::BLOCK_LINK_USED))
+			{
+			if (cbstatus == BLOCK_LINK_UNINITIALIZED || cbstatus == BLOCK_LINK_UNKNOWN_STATUS)
+				{
+				SetTaxaLinkStatus(status);
+				return this;
+				}
+			}
+		else
+			{
+			/* return the same block for these two link statuses since they are both "safe" */
+			if ((f | status) == (BLOCK_LINK_FROM_LINK_CMD | BLOCK_LINK_TO_ONLY_CHOICE))
+				{
+				taxaLinkStatus |= (BLOCK_LINK_FROM_LINK_CMD | BLOCK_LINK_TO_ONLY_CHOICE);
+				return this;
+				}
+			}
+		}
+	for (VecAssumpBlockPtr::iterator bIt = createdSubBlocks.begin(); bIt != createdSubBlocks.end(); ++bIt)
+		{
+		NxsAssumptionsBlockAPI	* c = *bIt;
+		if (c && cb == c->GetTaxaBlockPtr(&cbstatus) && (cbstatus & BLOCK_LINK_UNUSED_MASK) == status)
+			return c;
+		}
+	NxsAssumptionsBlockAPI	*effBlock = CreateNewAssumptionsBlock(token);
+	effBlock->SetTaxaBlockPtr(cb, status);
+	return effBlock;
+	}
+NxsAssumptionsBlockAPI	*NxsAssumptionsBlock::GetAssumptionsBlockForTreesBlock(NxsTreesBlockAPI *cb, NxsBlockLinkStatus status, NxsToken &token)
+	{
+	int cbstatus;
+	NxsTreesBlockAPI * thisCB = GetTreesBlockPtr(&cbstatus);
+	if (thisCB == NULL)
+		{
+		SetTreesBlockPtr(cb, status);
+		return this;
+		}
+	else if (cb == GetTreesBlockPtr(&cbstatus))
+		{
+		int f = cbstatus & NxsBlock::BLOCK_LINK_UNUSED_MASK;
+		if (f == status)
+			return this;
+		if (!(cbstatus & NxsBlock::BLOCK_LINK_USED))
+			{
+			if (cbstatus == BLOCK_LINK_UNINITIALIZED || cbstatus == BLOCK_LINK_UNKNOWN_STATUS)
+				{
+				SetTreesLinkStatus(status);
+				return this;
+				}
+			}
+		else
+			{
+			/* return the same block for these two link statuses since they are both "safe" */
+			if ((f | status) == (BLOCK_LINK_FROM_LINK_CMD | BLOCK_LINK_TO_ONLY_CHOICE))
+				{
+				treesLinkStatus |= (BLOCK_LINK_FROM_LINK_CMD | BLOCK_LINK_TO_ONLY_CHOICE);
+				return this;
+				}
+			}
+		}
+	for (VecAssumpBlockPtr::iterator bIt = createdSubBlocks.begin(); bIt != createdSubBlocks.end(); ++bIt)
+		{
+		NxsAssumptionsBlockAPI	* c = *bIt;
+		if (c && cb == c->GetTreesBlockPtr(&cbstatus) && (cbstatus & BLOCK_LINK_UNUSED_MASK) == status)
+			return c;
+		}
+	NxsAssumptionsBlockAPI	*effBlock = CreateNewAssumptionsBlock(token);
+	effBlock->SetTreesBlockPtr(cb, status);
+	return effBlock;
+	}
+
+NxsAssumptionsBlockAPI	*NxsAssumptionsBlock::GetAssumptionsBlockForCharTitle(const char *charTitle, NxsToken &token, const char *cmd)
+	{
+	if (!nexusReader)
+		NxsNCLAPIException("No NxsReader when reading Assumptions block.");
+	unsigned ncb = 0;
+	NxsCharactersBlockAPI * cb = nexusReader->GetCharBlockByTitle(charTitle, &ncb);
+	if (charTitle == NULL)
+		{
+		int cbstatus;
+		NxsCharactersBlockAPI * thisCB = GetCharBlockPtr(&cbstatus);
+		int ust = cbstatus & NxsBlock::BLOCK_LINK_UNUSED_MASK;
+		if (thisCB != NULL && ust != BLOCK_LINK_UNINITIALIZED && ust != BLOCK_LINK_UNKNOWN_STATUS)
+			{
+			if (ncb > 1 && !blockwideCharsLinkEstablished)
+				{
+				errormsg = "Multiple CHARACTERS blocks have been encountered, but a ";
+				errormsg += cmd;
+				errormsg += " command was found which does not specify which CHARACTERS block it uses.";
+				errormsg << "The first CHARACTERS block that was used by this " << this->GetID() << " block will be used";
+				if (nexusReader)
+					nexusReader->NexusWarnToken(errormsg, NxsReader::AMBIGUOUS_CONTENT_WARNING, token);
+				errormsg.clear();
+				}
+			return this;
+			}
+		}
+	NxsAssumptionsBlockAPI * effectiveB = NULL;
+	if (cb == NULL)
+		{
+		if (charBlockPtr)
+			{
+			const NxsString	 t=charBlockPtr->GetID();
+			if (t.empty())
+				{
+				if (charTitle == NULL)
+					effectiveB = this;
+				}
+			else
+				{
+				if (NxsString::case_insensitive_equals(charTitle, t.c_str()))
+					effectiveB = this;
+				}
+			}
+		if (effectiveB == NULL)
+			{
+			errormsg.clear();
+			errormsg << "A CHARACTERS (or DATA) block ";
+			if (charTitle)
+				errormsg << "with the title " << NxsString::GetEscaped(charTitle);
+			errormsg << " must precede an " << NCL_BLOCKTYPE_ATTR_NAME << " block with a " <<  cmd <<  " command.";
+			errormsg << "\n(If such a block exists, then this program may not be using an API for the NCL library that supports block linking).";
+			throw NxsException(errormsg, token);
+			}
+		}
+	else if (ncb > 1)
+		{
+		errormsg = "Multiple CHARACTERS blocks have been encountered, but a ";
+		errormsg += cmd;
+		errormsg += " command was found which does not specify which CHARACTERS block it uses.   The most recent CHARACTERS block will be used.";
+		if (nexusReader)
+			nexusReader->NexusWarnToken(errormsg, NxsReader::AMBIGUOUS_CONTENT_WARNING, token);
+		errormsg.clear();
+		effectiveB = GetAssumptionsBlockForCharBlock(cb, NxsBlock::BLOCK_LINK_TO_MOST_RECENT, token);
+		}
+	else
+		{
+		NxsBlockLinkStatus statusRequested = (charTitle == NULL ? NxsBlock::BLOCK_LINK_TO_ONLY_CHOICE : NxsBlock::BLOCK_LINK_FROM_LINK_CMD);
+		effectiveB = GetAssumptionsBlockForCharBlock(cb, statusRequested, token);
+		}
+	effectiveB->FlagCharBlockAsUsed();
+	return effectiveB;
+	}
+
+NxsAssumptionsBlockAPI	*NxsAssumptionsBlock::GetAssumptionsBlockForTaxaTitle(const char *taxTitle, NxsToken &token, const char *cmd)
+	{
+	if (!nexusReader)
+		NxsNCLAPIException("No NxsReader when reading Assumptions block.");
+	if (taxTitle == NULL)
+		{
+		int cbstatus;
+		NxsTaxaBlockAPI * thisCB = GetTaxaBlockPtr(&cbstatus);
+		int ust = cbstatus & NxsBlock::BLOCK_LINK_UNUSED_MASK;
+		if (thisCB != NULL && ust != BLOCK_LINK_UNINITIALIZED && ust != BLOCK_LINK_UNKNOWN_STATUS)
+			return this;
+		}
+	unsigned ncb = 0;
+	NxsTaxaBlockAPI * cb = nexusReader->GetTaxaBlockByTitle(taxTitle, &ncb);
+	NxsAssumptionsBlockAPI *effectiveB = NULL;
+	if (cb == NULL)
+		{
+		if (taxa)
+			{
+			const NxsString	 t=taxa->GetID();
+			if (t.empty())
+				{
+				if (taxTitle == NULL)
+					effectiveB = this;
+				}
+			else
+				{
+				if (NxsString::case_insensitive_equals(taxTitle, t.c_str()))
+					effectiveB = this;
+				}
+			}
+		if (effectiveB == NULL)
+			{
+			errormsg.clear();
+			errormsg <<  "A TAXA block ";
+			if (taxTitle)
+				errormsg << "with the title " << NxsString::GetEscaped(taxTitle);
+			errormsg << " must precede an " << NCL_BLOCKTYPE_ATTR_NAME << " block with a " <<  cmd <<  " command.";
+			errormsg << "\n(If such a block exists, then this program may not be using an API for the NCL library that supports block linking).";
+			throw NxsException(errormsg, token);
+			}
+		}
+	else if (ncb > 1)
+		{
+		errormsg = "Multiple TAXA blocks have been encountered, but a ";
+		errormsg += cmd;
+		errormsg += " command was found which does not specify which TAXA block it uses.  The most recent TAXA block will be used.";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::AMBIGUOUS_CONTENT_WARNING, token);
+		errormsg.clear();
+		effectiveB = GetAssumptionsBlockForTaxaBlock(cb, NxsBlock::BLOCK_LINK_TO_MOST_RECENT, token);
+		}
+	else
+		{
+		NxsBlockLinkStatus statusRequested = (taxTitle == NULL ? NxsBlock::BLOCK_LINK_TO_ONLY_CHOICE : NxsBlock::BLOCK_LINK_FROM_LINK_CMD);
+		effectiveB = GetAssumptionsBlockForTaxaBlock(cb, statusRequested, token);
+		}
+	effectiveB->FlagTaxaBlockAsUsed();
+	return effectiveB;
+	}
+
+NxsAssumptionsBlockAPI	*NxsAssumptionsBlock::GetAssumptionsBlockForTreesTitle(const char *treesTitle, NxsToken &token, const char *cmd)
+	{
+	if (!nexusReader)
+		NxsNCLAPIException("No NxsReader when reading Assumptions block.");
+	if (treesTitle == NULL)
+		{
+		int cbstatus;
+		NxsTreesBlockAPI * thisCB = GetTreesBlockPtr(&cbstatus);
+		int ust = cbstatus & NxsBlock::BLOCK_LINK_UNUSED_MASK;
+		if (thisCB != NULL && ust != BLOCK_LINK_UNINITIALIZED && ust != BLOCK_LINK_UNKNOWN_STATUS)
+			return this;
+		}
+	unsigned ncb = 0;
+	NxsTreesBlockAPI * cb = nexusReader->GetTreesBlockByTitle(treesTitle, &ncb);
+	NxsAssumptionsBlockAPI *effectiveB = NULL;
+	if (cb == NULL)
+		{
+		if (treesBlockPtr)
+			{
+			const NxsString	 t=treesBlockPtr->GetID();
+			if (t.empty())
+				{
+				if (treesTitle == NULL)
+					effectiveB = this;
+				}
+			else
+				{
+				if (NxsString::case_insensitive_equals(treesTitle, t.c_str()))
+					effectiveB = this;
+				}
+			}
+		if (effectiveB == NULL)
+			{
+			errormsg.clear();
+			errormsg <<  "A TREES block";
+			if (treesTitle)
+				errormsg << "with the title " << NxsString::GetEscaped(treesTitle);
+			errormsg << " must precede an " << NCL_BLOCKTYPE_ATTR_NAME << " block with a " <<  cmd <<  " command.";
+			errormsg << "\n(If such a block exists, then this program may not be using an API for the NCL library that supports block linking).";
+			throw NxsException(errormsg, token);
+			}
+		}
+	else if (ncb > 1)
+		{
+		errormsg = "Multiple TREES blocks have been encountered, but a ";
+		errormsg += cmd;
+		errormsg += " command was found which does not specify which TREES block it uses.  The most recent TREES block will be used.";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::AMBIGUOUS_CONTENT_WARNING, token);
+		errormsg.clear();
+		effectiveB = GetAssumptionsBlockForTreesBlock(cb, NxsBlock::BLOCK_LINK_TO_MOST_RECENT, token);
+		}
+	else
+		{
+		NxsBlockLinkStatus statusRequested = (treesTitle == NULL ? NxsBlock::BLOCK_LINK_TO_ONLY_CHOICE : NxsBlock::BLOCK_LINK_FROM_LINK_CMD);
+		effectiveB = GetAssumptionsBlockForTreesBlock(cb, statusRequested, token);
+		}
+	effectiveB->FlagTreesBlockAsUsed();
+	return effectiveB;
+	}
+
+void NxsAssumptionsBlock::SetCharLinkStatus(NxsBlockLinkStatus s)
+	{
+	if (charLinkStatus & NxsBlock::BLOCK_LINK_USED)
+		{
+		throw NxsNCLAPIException("Resetting a used charLinkStatus");
+		}
+	charLinkStatus = s;
+	}
+
+void NxsAssumptionsBlock::SetTaxaLinkStatus(NxsBlockLinkStatus s)
+	{
+	if (taxaLinkStatus & NxsBlock::BLOCK_LINK_USED)
+		{
+		throw NxsNCLAPIException("Resetting a used taxaLinkStatus");
+		}
+	taxaLinkStatus = s;
+	}
+
+void NxsAssumptionsBlock::SetTreesLinkStatus(NxsBlockLinkStatus s)
+	{
+	if (treesLinkStatus & NxsBlock::BLOCK_LINK_USED)
+		{
+		throw NxsNCLAPIException("Resetting a used charLinkStatus");
+		}
+	treesLinkStatus = s;
+	}
+
+void NxsAssumptionsBlock::SetCharBlockPtr(NxsCharactersBlockAPI * c, NxsBlockLinkStatus s)
+	{
+	SetCharLinkStatus(s);
+	charBlockPtr = c;
+	}
+
+void NxsAssumptionsBlock::SetTaxaBlockPtr(NxsTaxaBlockAPI *c, NxsBlockLinkStatus s)
+	{
+	SetTaxaLinkStatus(s);
+	taxa = c;
+	}
+
+void NxsAssumptionsBlock::SetTreesBlockPtr(NxsTreesBlockAPI * c, NxsBlockLinkStatus s)
+	{
+	SetTreesLinkStatus(s);
+	treesBlockPtr = c;
+	}
+
+/*!
+	Sets NCL_BLOCKTYPE_ATTR_NAME = "ASSUMPTIONS", charBlockPtr = NULL, and taxa = t. Assumes taxa is non-NULL.
+*/
+NxsAssumptionsBlock::NxsAssumptionsBlock(
+  NxsTaxaBlockAPI *t)	/* pointer to the taxa block */
+	:taxa(t),
+	charBlockPtr(NULL),
+	treesBlockPtr(NULL),
+	charLinkStatus(NxsBlock::BLOCK_LINK_UNINITIALIZED),
+	taxaLinkStatus(NxsBlock::BLOCK_LINK_UNINITIALIZED),
+	treesLinkStatus(NxsBlock::BLOCK_LINK_UNINITIALIZED),
+	passedRefOfOwnedBlock(false)
+	{
+	taxaLinkStatus = (t == NULL ? NxsBlock::BLOCK_LINK_UNINITIALIZED : NxsBlock::BLOCK_LINK_UNKNOWN_STATUS);
+	NCL_BLOCKTYPE_ATTR_NAME = "ASSUMPTIONS";
+	Reset();
+	}
+
+/*!
+	Nothing needs to be done in the destructor.
+*/
+NxsAssumptionsBlock::~NxsAssumptionsBlock()
+	{
+	}
+
+/*!
+	Makes data member taxa point to 'tb'. Assumes tb is non-NULL.
+	NOTEAPICHANGE: In v2.1 this can throw a NxsNCLAPIException if the Assumptions block has been used, but not reset
+	before the pointer is reassigned.
+*/
+void NxsAssumptionsBlock::ReplaceTaxaBlockPtr(
+  NxsTaxaBlockAPI *tb)	/* pointer to new NxsTaxaBlockAPI object */
+	{
+	NCL_ASSERT(tb != NULL);
+	if (tb != taxa)
+		SetTaxaBlockPtr(tb, NxsBlock::BLOCK_LINK_UNKNOWN_STATUS);
+	}
+
+/*!
+	Returns the number of character sets stored.
+*/
+int NxsAssumptionsBlock::GetNumCharSets() const
+	{
+	return (int)charsets.size();
+	}
+
+/*!
+	Erases 'names' vector, then fills 'names' with the names of all stored character sets.
+*/
+void NxsAssumptionsBlock::GetCharSetNames(
+  NxsStringVector &names) const	/* the vector in which to store the names */ /*v2.1to2.2 3 */
+	{
+	names.erase(names.begin(), names.end());
+	NxsUnsignedSetMap::const_iterator i;
+	for (i = charsets.begin(); i != charsets.end(); i++)
+		names.push_back((*i).first);
+	}
+
+/*!
+	Returns pointer to character set having name 'nm'.
+*/
+const NxsUnsignedSet *NxsAssumptionsBlock::GetCharSet(
+  NxsString n) const /* the name of the character set to return */ /*v2.1to2.2 4 */
+	{
+	NxsUnsignedSetMap::const_iterator it = charsets.find(n); /*v2.1to2.2 5 */
+	if (it == charsets.end())
+		return NULL;
+	return &(it->second);
+	}
+
+/*!
+	Returns the number of character partitions stored.
+*/
+int NxsAssumptionsBlock::GetNumCharPartitions() /*v2.1to2.2 6 */
+	{
+	return (int)charPartitions.size();
+	}
+
+/*!
+	Erases 'names' vector, then fills 'names' with the names of all stored character partitions.
+*/
+void NxsAssumptionsBlock::GetCharPartitionNames(
+	vector<std::string> &names)	/* the vector in which to store the names */ /*v2.1to2.2 6 */
+	{
+	names.erase(names.begin(), names.end());
+	NxsPartitionsByName::const_iterator i;
+	for (i = charPartitions.begin(); i != charPartitions.end(); i++)
+	names.push_back((*i).first);
+	}
+
+/*!
+	Returns pointer to character partition having name 'nm'.
+*/
+const NxsPartition *NxsAssumptionsBlock::GetCharPartition(
+	std::string nm) const /* the name of the character set to return */
+	{
+	NxsPartitionsByName::const_iterator it = charPartitions.find(nm);
+	if (it == charPartitions.end())
+		return NULL;
+	return &(it->second);
+	}
+
+/*!
+	Returns the number of taxon sets stored.
+*/
+int NxsAssumptionsBlock::GetNumTaxSets()/*v2.1to2.2 6 */
+	{
+	return (int)taxsets.size();
+	}
+
+/*!
+	Erases 'names' vector, then fills 'names' with the names of all stored taxon sets.
+*/
+void NxsAssumptionsBlock::GetTaxSetNames(
+  NxsStringVector &names)	/* the vector in which to store the names */ /*v2.1to2.2 7 */
+	{
+	names.erase(names.begin(), names.end());
+	NxsUnsignedSetMap::const_iterator i;
+	for (i = taxsets.begin(); i != taxsets.end(); i++)
+		names.push_back((*i).first);
+	}
+
+/*!
+	Returns reference to taxon set having name 'nm'.
+*/
+NxsUnsignedSet &NxsAssumptionsBlock::GetTaxSet( /*v2.1to2.2 8 */
+  NxsString nm) /* the name of the taxon set to return */ /*v2.1to2.2 9 */
+	{
+	return taxsets[nm]; /*v2.1to2.2 10 */
+	}
+
+
+/*!
+	Returns the number of exclusion sets stored.
+*/
+int NxsAssumptionsBlock::GetNumExSets() /*v2.1to2.2 6 */
+	{
+	return (int)exsets.size();
+	}
+
+/*!
+	Erases names, then fills names with the names of all stored exclusion sets.
+*/
+void NxsAssumptionsBlock::GetExSetNames(
+  NxsStringVector &names)	/* the vector in which to store the names */ /*v2.1to2.2 7 */
+	{
+	names.erase(names.begin(), names.end());
+	NxsUnsignedSetMap::const_iterator i;
+	for (i = exsets.begin(); i != exsets.end(); i++)
+		names.push_back((*i).first);
+	}
+
+/*!
+	Returns reference to exclusion set having name 'nm'.
+*/
+NxsUnsignedSet &NxsAssumptionsBlock::GetExSet( /*v2.1to2.2 8 */
+  NxsString nm) /* the name of the exclusion set to return */ /*v2.1to2.2 9 */
+	{
+	return exsets[nm]; /*v2.1to2.2 11 */
+	}
+
+/*!
+	Returns name of default exclusion set. If returned string has zero length, then no default exclusion set was defined
+	in the data set.
+*/
+NxsString NxsAssumptionsBlock::GetDefExSetName() /*v2.1to2.2 12 */
+	{
+	return def_exset;
+	}
+
+/*!
+	Applies exclusion set having name 'nm' by calling the ApplyExset method of the NxsCharactersBlockAPI or
+	NxsCharactersBlockAPI-derived object stored in the charBlockPtr pointer (which will be whichever block last called the
+	NxsAssumptionsBlock::SetCallback method).
+*/
+void NxsAssumptionsBlock::ApplyExset(
+  NxsString n) /* the name of the exclusion set to apply */ /*v2.1to2.2 4 */
+	{
+	NxsString nm(n.c_str()); // null-op needed for ease of generation of v2.2 from 2.1 code
+	NCL_ASSERT(charBlockPtr != NULL);
+	charBlockPtr->ApplyExset(exsets[nm]);
+	}
+
+NxsAssumptionsBlockAPI * NxsAssumptionsBlock::DealWithPossibleParensInCharDependentCmd(NxsToken &token, const char *cmd, const std::vector<std::string> *unsupported, bool * isVect)
+	{
+	token.GetNextToken();
+	NxsString charblock_name;
+	errormsg.clear();
+	if (isVect)
+		*isVect = false;
+	if (token.Equals("("))
+		{
+		token.GetNextToken();
+		while (!token.Equals(")"))
+			{
+			if (token.Equals("CHARACTERS"))
+				{
+				NxsString t;
+				t << "after \"(Characters\" in a " << cmd << " command";
+				DemandEquals(token, t.c_str());
+				token.GetNextToken();
+				charblock_name = token.GetToken();
+				}
+			else if (token.Equals("VECTOR"))
+				{
+				if (!isVect)
+					GenerateNxsException(token, "VECTOR-style set definitions are not currently supported");
+				else
+					*isVect = true;
+				}
+			else if (token.Equals("NOTOKENS"))
+				GenerateNxsException(token, "NOTOKENS-style set definitions are not currently supported");
+			else if (token.Equals(";"))
+				{
+				NxsString s;
+				s << "; encountered in " << cmd << " command before parentheses were closed";
+				GenerateNxsException(token, s.c_str());
+				}
+			else if (!(token.Equals("STANDARD") || token.Equals("TOKENS")) && nexusReader)
+				{
+				bool found = false;
+				if (unsupported)
+					{
+					for (std::vector<std::string>::const_iterator u = unsupported->begin(); u != unsupported->end(); ++u)
+						{
+						if (token.Equals(u->c_str()))
+							{
+							found = true;
+							break;
+							}
+						}
+					}
+				if (found)
+					{
+					NxsString s;
+					s << "The " << token.GetTokenReference()<< " as a " << cmd << " qualifier is not supported.";
+					GenerateNxsException(token, s.c_str());
+					}
+				else
+					{
+					errormsg  << "Skipping unknown " << cmd << " qualifier: "  << token.GetTokenReference();
+					nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+					errormsg.clear();
+					}
+				}
+			token.GetNextToken();
+			}
+		token.GetNextToken();
+		}
+	const char *cbn = (charblock_name.empty() ? NULL : charblock_name.c_str());
+	NxsString u;
+	u << "in " << cmd << " definition";
+	DemandIsAtEquals(token, u.c_str());
+	NxsAssumptionsBlockAPI * naba = this->GetAssumptionsBlockForCharTitle(cbn, token, cmd);
+	return naba;
+	}
+
+
+/*!
+	Reads and stores information contained in the command TypeSet within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleTypeSet(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	errormsg.clear();
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString typeset_name = token.GetToken();
+	//typeset_name.ToUpper();
+	NxsAssumptionsBlockAPI * effectiveAssumpBlock = DealWithPossibleParensInCharDependentCmd(token, "TypeSet");
+	token.GetNextToken();
+	NCL_ASSERT(effectiveAssumpBlock);
+	NxsPartition newPartition;
+	NxsCharactersBlockAPI *cbp = effectiveAssumpBlock->GetCharBlockPtr();
+	NCL_ASSERT(cbp);
+	effectiveAssumpBlock->ReadPartitionDef(newPartition, *cbp, typeset_name, "Character", "TypeSet", token, false, false, false);
+	NxsTransformationManager &ctm = cbp->GetNxsTransformationManagerRef();
+	for (NxsPartition::const_iterator groupIt = newPartition.begin(); groupIt != newPartition.end(); ++groupIt)
+		{
+		if (!ctm.IsValidTypeName(groupIt->first))
+			{
+			errormsg << "The group name " << groupIt->first << " found in a TypeSet command does not correspond to a known type";
+			throw NxsException(errormsg, token);
+			}
+		}
+	NxsTransformationManager &tm = effectiveAssumpBlock->GetNxsTransformationManagerRef();
+	ctm.AddTypeSet(typeset_name, newPartition, asterisked);
+	tm.AddTypeSet(typeset_name, newPartition, asterisked);
+	}
+
+
+void NxsAssumptionsBlock::HandleUserType(NxsToken& token)
+	{
+	token.GetNextToken();
+	errormsg.clear();
+	if (token.Equals("*"))
+		{
+		errormsg << "An * is ignored in a UserType command";
+		if (nexusReader)
+			nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+		token.GetNextToken();
+		errormsg.clear();
+		}
+	NxsString user_type_name = token.GetToken();
+	//codonPosSetName.ToUpper();
+	token.GetNextToken();
+	NxsString charblock_name;
+	bool floatMat = false;
+	bool cstreeform = false;
+	if (token.Equals("("))
+		{
+		token.GetNextToken();
+		while (!token.Equals(")"))
+			{
+			if (token.Equals("CHARACTERS"))
+				{
+				NxsString t;
+				t << "after \"(Characters\" in a UserType command";
+				DemandEquals(token, t.c_str());
+				token.GetNextToken();
+				charblock_name = token.GetToken();
+				}
+			else if (token.Equals("CSTREE"))
+				cstreeform = true;
+			else if (token.Equals("NOTOKENS"))
+				GenerateNxsException(token, "NOTOKENS-style UserType are not supported");
+			else if (token.Equals("REALMATRIX"))
+				floatMat = true;
+			else if (token.Equals(";"))
+				{
+				NxsString s;
+				s << "; encountered in UserType command before parentheses were closed";
+				GenerateNxsException(token, s.c_str());
+				}
+			else if (!(token.Equals("STEPMATRIX") || token.Equals("TOKENS")) && nexusReader)
+				{
+				errormsg  << "Skipping unknown UserType qualifier: "  << token.GetTokenReference();
+				nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+				errormsg.clear();
+				}
+			token.GetNextToken();
+			}
+		token.GetNextToken();
+		}
+	if (token.Equals("STEPMATRIX") || token.Equals("REALMATRIX"))
+		{
+		errormsg  << "UserType qualifier "<< token.GetTokenReference() << " should occur in parentheses ("<< token.GetTokenReference() <<") ";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::DEPRECATED_WARNING, token);
+		errormsg.clear();
+		token.GetNextToken();
+		}
+	DemandIsAtEquals(token, "in UserType definition");
+
+	const char *cbn = (charblock_name.empty() ? NULL : charblock_name.c_str());
+	NxsAssumptionsBlockAPI * effectiveAssumpBlock =  this->GetAssumptionsBlockForCharTitle(cbn, token, "UserType");
+	NCL_ASSERT(effectiveAssumpBlock);
+	NxsCharactersBlockAPI *cbp = effectiveAssumpBlock->GetCharBlockPtr();
+	NCL_ASSERT(cbp);
+
+	NxsRealStepMatrix::DblMatrix dm;
+	NxsIntStepMatrix::IntMatrix im;
+	std::vector<std::string> symbolsOrder;
+	const std::vector<const NxsDiscreteDatatypeMapper *> mappers = cbp->GetAllDatatypeMappers();
+
+	if (cstreeform)
+		{
+		bool success = false;
+		ProcessedNxsCommand tokenVec;
+		token.GetNextToken();
+		token.ProcessAsCommand(&tokenVec);
+
+		for (std::vector<const NxsDiscreteDatatypeMapper *>::const_iterator mIt = mappers.begin(); !success && mIt != mappers.end(); ++mIt)
+			{
+			const NxsDiscreteDatatypeMapper * mapper = *mIt;
+			std::string s = mapper->GetSymbolsWithGapChar();
+			symbolsOrder.clear();
+			std::map<std::string, unsigned> symMap;
+			for (unsigned i = 0; i < s.length(); ++i)
+				{
+				std::string sym(1, s[i]);
+				symbolsOrder.push_back(sym);
+				symMap[sym] = i;
+				}
+			std::string newick;
+			NxsFullTreeDescription treeDesc(newick, user_type_name, 0);
+			BogusToIndMapper labelToIndMapper;
+			try
+				{
+				NxsTreesBlock::ProcessTokenVecIntoTree(tokenVec, treeDesc, &labelToIndMapper, symMap, false, nexusReader, mapper->IsRespectCase());
+				success = true;
+				}
+			catch (NxsException &x)
+				{
+				if (!labelToIndMapper.queried)
+					throw x;
+				}
+			NxsSimpleTree cstree(treeDesc, 1, 1.0);
+			if (treeDesc.SomeEdgesHaveLengths() && !treeDesc.EdgeLengthsAreAllIntegers())
+				{
+				floatMat = true;
+				dm = cstree.GetDblPathDistances(false);
+				if (dm.size() < symbolsOrder.size())
+					symbolsOrder.resize(dm.size());
+				else if (dm.size() > symbolsOrder.size())
+					success = false;
+				}
+			else
+				{
+				im = cstree.GetIntPathDistances();
+				if (im.size() < symbolsOrder.size())
+					symbolsOrder.resize(im.size());
+				else if (im.size() > symbolsOrder.size())
+					success = false;
+				}
+
+			}
+		if (!success)
+			{
+			errormsg << "No datatype was found with all of the symbols the UserType CSTree";
+			throw NxsException(errormsg, token);
+			}
+		}
+	else
+		{
+		/* BEGIN Read as Stepmatrix section  */
+		token.GetNextToken();
+		NxsString t = token.GetToken();
+		long longNstates;
+		if (!NxsString::to_long(t.c_str(), &longNstates) || longNstates < 2)
+			{
+			errormsg << "Expecting a number of states after the = in the UserType command (the number of states must be greater than one).  Found " << t;
+			throw NxsException(t, token);
+			}
+		const bool respectCase = cbp->IsRespectCase();
+		unsigned nStates = (unsigned) longNstates;
+		NxsRealStepMatrix::DblVec dv(nStates, DBL_MAX);
+		NxsIntStepMatrix::IntVec iv(nStates, INT_MAX);
+		dm.assign(nStates, dv);
+		im.assign(nStates, iv);
+		std::set<char> symbolsSet;
+		for (unsigned i = 0; i < nStates;)
+			{
+			token.GetNextToken();
+			if (token.Equals(";"))
+				{
+				errormsg << "; prematurely terminated the state declaration portion of a UserType stepmatrix.";
+				throw NxsException(t, token);
+				}
+			NxsString tokStr = token.GetToken();
+			if (!respectCase)
+				tokStr.ToUpper();
+			for (NxsString::const_iterator cIt = tokStr.begin(); cIt != tokStr.end(); ++cIt, ++i)
+				{
+				char s = *cIt;
+				if (symbolsSet.count(s) > 0)
+					{
+					errormsg << "State names cannot be repeated in a UserType stepmatrix.  " << s << " was encountered more than once.";
+					throw NxsException(t, token);
+					}
+				bool found = false;
+				for (std::vector<const NxsDiscreteDatatypeMapper *>::const_iterator mIt = mappers.begin(); mIt != mappers.end(); ++mIt)
+					{
+					const NxsDiscreteDatatypeMapper * mapper = *mIt;
+					if (mapper->PositionInSymbolsOrGaps(s) != NXS_INVALID_STATE_CODE)
+						{
+						found = true;
+						break;
+						}
+					}
+				if (!found)
+					{
+					errormsg << "Unrecognized state " << s << "in  UserType stepmatrix.";
+					throw NxsException(t, token);
+					}
+				symbolsSet.insert(s);
+				symbolsOrder.push_back(std::string(1,s));
+				}
+			}
+
+		double currDblWt;
+		long currLongWt;
+
+		for (unsigned i = 0; i < nStates; ++i)
+			{
+			for (unsigned j = 0; j < nStates; ++j)
+				{
+				token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation); // this allows us to deal with sci. not. in weights.
+				token.GetNextToken();
+				NxsString s = token.GetToken();
+				if (i == j && (token.Equals(".") || token.Equals("-")))
+					{
+					im[i][i] = 0;
+					dm[i][i] = 0.0;
+					}
+				else
+					{
+					bool v = NxsString::to_double(s.c_str(), &currDblWt);
+					if (!v)
+						{
+						if (!token.Equals("I") && !token.Equals("INF"))
+							{
+							errormsg << "Expecting a number or i (for infinity) as an element of the UserType stepmatrix. Found " << s;
+							throw NxsException(errormsg, token);
+							}
+						}
+					else
+						{
+						dm[i][j] = currDblWt;
+						if (!floatMat)
+							{
+							floatMat = !NxsString::to_long(s.c_str(), &currLongWt);
+							if (!floatMat)
+								im[i][j] = (int)currLongWt;
+							}
+						}
+					}
+				}
+			}
+		/* END Read as Stepmatrix section  */
+		token.GetNextToken();
+		if (!token.Equals(";"))
+			{
+			errormsg << "Expecting ; at the end of the UserType command. Found "  << token.GetTokenReference();
+			throw NxsException(t, token);
+			}
+		}
+
+	NxsTransformationManager &ctm = cbp->GetNxsTransformationManagerRef();
+	NxsTransformationManager &tm = effectiveAssumpBlock->GetNxsTransformationManagerRef();
+	if (floatMat)
+		{
+		const NxsRealStepMatrix nrsm(symbolsOrder, dm);
+		ctm.AddRealType(user_type_name, nrsm);
+		tm.AddRealType(user_type_name, nrsm);
+		}
+	else
+		{
+		const NxsIntStepMatrix nism(symbolsOrder, im);
+		ctm.AddIntType(user_type_name, nism);
+		tm.AddIntType(user_type_name, nism);
+		}
+	}
+
+
+NxsGeneticCodesManager::NxsGeneticCodesManager()
+	{
+	standardCodeNames.insert(std::string("UNIVERSAL"));
+	standardCodeNames.insert(std::string("UNIVERSAL.EXT"));
+	standardCodeNames.insert(std::string("MTDNA.DROS"));
+	standardCodeNames.insert(std::string("MTDNA.DROS.EXT"));
+	standardCodeNames.insert(std::string("MTDNA.MAM"));
+	standardCodeNames.insert(std::string("MTDNA.MAM.EXT"));
+	standardCodeNames.insert(std::string("MTDNA.YEAST"));
+	}
+
+bool NxsGeneticCodesManager::IsValidCodeName(const std::string &n) const
+	{
+	std::string capName(n.c_str());
+	NxsString::to_upper(capName);
+	return (standardCodeNames.count(capName) > 0) || (userDefinedCodeNames.count(capName) > 0);
+	}
+/*!
+	Reads and stores information contained in the command TypeSet within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleCodeSet(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	std::vector<std::string> unsupported;
+	unsupported.push_back(std::string("TAXA"));
+	unsupported.push_back(std::string("UNALIGNED"));
+	NxsString codeset_name = token.GetToken();
+	//codeset_name.ToUpper();
+	NxsAssumptionsBlockAPI * effectiveAssumpBlock = DealWithPossibleParensInCharDependentCmd(token, "CodeSet", &unsupported);
+	token.GetNextToken();
+	NxsPartition newPartition;
+	NxsCharactersBlockAPI *cbp = effectiveAssumpBlock->GetCharBlockPtr();
+	NCL_ASSERT(cbp);
+	effectiveAssumpBlock->ReadPartitionDef(newPartition, *cbp, codeset_name, "Character", "CodeSet", token, false, false, false);
+	NxsGeneticCodesManager &gcm = effectiveAssumpBlock->GetNxsGeneticCodesManagerRef();
+	for (NxsPartition::const_iterator groupIt = newPartition.begin(); groupIt != newPartition.end(); ++groupIt)
+		{
+		const std::string & s = groupIt->first;
+		if (!gcm.IsValidCodeName(s))
+			{
+			errormsg << "The Genetic code name " << groupIt->first << " found in a CodeSet command does not correspond to a known code";
+			throw NxsException(errormsg, token);
+			}
+		}
+	effectiveAssumpBlock->AddCodeSet(codeset_name, newPartition, asterisked);
+	}
+
+/*!
+	Reads and stores information contained in the command TypeSet within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleCodonPosSet(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString codonPosSetName = token.GetToken();
+	//codonPosSetName.ToUpper();
+	NxsAssumptionsBlockAPI * effectiveAssumpBlock = DealWithPossibleParensInCharDependentCmd(token, "CodonPosSet", NULL);
+	token.GetNextToken();
+	NxsPartition newPartition;
+	NxsCharactersBlockAPI *cbp = effectiveAssumpBlock->GetCharBlockPtr();
+	NCL_ASSERT(cbp);
+	effectiveAssumpBlock->ReadPartitionDef(newPartition, *cbp, codonPosSetName, "Character", "CodonPosSet", token, false, false, false);
+	for (NxsPartition::const_iterator groupIt = newPartition.begin(); groupIt != newPartition.end(); ++groupIt)
+		{
+		const std::string & s = groupIt->first;
+		bool legal = false;
+		if (s.length() == 1)
+			{
+			const char c = s[0];
+			if (c == 'n' || c == 'N' || c == '1' || c == '2' || c == '3' || c == '?')
+				legal = true;
+			}
+		if (!legal)
+			{
+			errormsg << "The Codon Position category name " << groupIt->first << " found in a CodonPosSet command is not legal.  \"N\", \"1\", \"2\", or \"3\" were expected.";
+			throw NxsException(errormsg, token);
+			}
+		}
+	effectiveAssumpBlock->AddCodonPosSet(codonPosSetName, newPartition, asterisked);
+	cbp->AddNewCodonPosPartition(codonPosSetName, newPartition, asterisked);
+	}
+
+class NxsSetVectorItemValidator
+	{
+	public:
+		virtual ~NxsSetVectorItemValidator(){};
+		virtual std::string convert(NxsToken &) = 0;
+	};
+
+class WtSetVectorItemValidator: public NxsSetVectorItemValidator
+	{
+	public:
+		virtual ~WtSetVectorItemValidator(){};
+		virtual std::string convert(NxsToken & token)
+			{
+			NxsString s = token.GetToken();
+			const char * c = s.c_str();
+			long currLongWt;
+			double currDblWt;
+			if (NxsString::to_long(c, &currLongWt) || NxsString::to_double(s.c_str(), &currDblWt))
+				return std::string(c);
+			NxsString errormsg;
+			errormsg << "Expecting a number as a character weight.  Found \"" << c << "\" instead.";
+			throw NxsException(errormsg, token);
+			}
+
+	};
+
+/*!
+	Reads and stores information contained in the command TypeSet within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleWeightSet(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString wtset_name = token.GetToken();
+	//wtset_name.ToUpper();
+	bool isVect;
+	NxsAssumptionsBlockAPI * effectiveAssumpBlock = DealWithPossibleParensInCharDependentCmd(token, "WtSet", NULL, &isVect);
+	token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation); // this allows us to deal with sci. not. in weights.
+	token.GetNextToken();
+	NxsPartition newPartition;
+	NxsCharactersBlockAPI *cbp = effectiveAssumpBlock->GetCharBlockPtr();
+	NCL_ASSERT(cbp);
+	if (isVect)
+		{
+		WtSetVectorItemValidator validator;
+		effectiveAssumpBlock->ReadVectorPartitionDef(newPartition, *cbp, wtset_name, "Character", "WtSet", token, false, true, validator);
+		}
+	else
+		effectiveAssumpBlock->ReadPartitionDef(newPartition, *cbp, wtset_name, "Character", "WtSet", token, false, false, false);
+	bool floatWts = false;
+	long currLongWt;
+	double currDblWt;
+	NxsTransformationManager &ctm = cbp->GetNxsTransformationManagerRef();
+	NxsTransformationManager::ListOfIntWeights liw;
+	NxsTransformationManager::ListOfDblWeights diw;
+	for (NxsPartition::const_iterator groupIt = newPartition.begin(); groupIt != newPartition.end(); ++groupIt)
+		{
+		const std::string & s = groupIt->first;
+		if (!floatWts)
+			{
+			floatWts = !NxsString::to_long(s.c_str(), &currLongWt);
+			if (!floatWts)
+				liw.push_back(NxsTransformationManager::IntWeightToIndexSet((int)currLongWt, groupIt->second));
+			}
+		bool v = NxsString::to_double(s.c_str(), &currDblWt);
+		if (!v)
+			{
+			errormsg << "Invalid weight " << groupIt->first << " found in a WtSet command.";
+			throw NxsException(errormsg, token);
+			}
+		diw.push_back(NxsTransformationManager::DblWeightToIndexSet(currDblWt, groupIt->second));
+		}
+	NxsTransformationManager &tm = effectiveAssumpBlock->GetNxsTransformationManagerRef();
+	if (floatWts)
+		{
+		ctm.AddRealWeightSet(wtset_name, diw, asterisked);
+		tm.AddRealWeightSet(wtset_name, diw, asterisked);
+		}
+	else
+		{
+		ctm.AddIntWeightSet(wtset_name, liw, asterisked);
+		tm.AddIntWeightSet(wtset_name, liw, asterisked);
+		}
+	}
+
+
+/*!
+	Reads and stores information contained in the command CharPartition within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleCharPartition(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString charpart_name = token.GetToken();
+	//charpart_name.ToUpper();
+	NxsAssumptionsBlockAPI * effectiveAssumpBlock = DealWithPossibleParensInCharDependentCmd(token, "CharPartition");
+	token.GetNextToken();
+
+	NxsPartition newPartition;
+	NxsCharactersBlockAPI *cbp = effectiveAssumpBlock->GetCharBlockPtr();
+	NCL_ASSERT(cbp);
+	effectiveAssumpBlock->ReadPartitionDef(newPartition, *cbp, charpart_name, "Character", "CharPartition", token, asterisked, false, true);
+	effectiveAssumpBlock->AddCharPartition(charpart_name, newPartition);
+	}
+
+/*!
+	Reads and stores information contained in the command CHARSET within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleCharSet(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString charset_name = token.GetToken();
+	//charset_name.ToUpper();
+	NxsAssumptionsBlockAPI * effectiveAssumpBlock = DealWithPossibleParensInCharDependentCmd(token, "CharSet");
+	token.GetNextToken();
+	effectiveAssumpBlock->ReadCharsetDef(charset_name, token, asterisked);
+	}
+
+/*!
+	Called after verifying that the correct Char block pointer is set.
+*/
+void NxsAssumptionsBlock::ReadCharsetDef(NxsString charset_name, NxsToken &token, bool asterisked)
+	{
+	NCL_ASSERT(charBlockPtr != NULL);
+	NxsCharactersBlockAPI &charBlock = *charBlockPtr;
+	NxsUnsignedSet s;
+	NxsSetReader::ReadSetDefinition(token, charBlock, "Character", "CharSet", &s);
+	charsets[charset_name] = s;
+	if (asterisked && nexusReader != NULL)
+		{
+		nexusReader->NexusWarnToken("An * is ignored in a CHARSET command", NxsReader::SKIPPING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	if (charBlock.AddNewIndexSet(charset_name, s) && nexusReader)
+		{
+		errormsg = "A CHARSET with the name ";
+		errormsg += charset_name;
+		errormsg += " has already been encountered.    The later definition will preempt the earlier definition(s).";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::OVERWRITING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	}
+
+
+/*!
+	Reads and stores information contained in the command CHARSET within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleExSet(
+  NxsToken &token)	/* the token used to read from in */
+	{
+
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString exset_name = token.GetToken();
+	//exset_name.ToUpper();
+	NxsAssumptionsBlockAPI * effectiveAssumpBlock = DealWithPossibleParensInCharDependentCmd(token, "ExSet");
+	token.GetNextToken();
+	effectiveAssumpBlock->ReadExsetDef(exset_name, token, asterisked);
+	}
+
+/*!
+	Called after verifying that the correct Char block pointer is set.
+*/
+void NxsAssumptionsBlock::ReadExsetDef(NxsString charset_name, NxsToken &token, bool asterisked)
+	{
+	NCL_ASSERT(charBlockPtr != NULL);
+	NxsCharactersBlockAPI &charBlock = *charBlockPtr;
+	NxsUnsignedSet s;
+	NxsSetReader::ReadSetDefinition(token, charBlock, "Character", "ExSet", &s);
+	exsets[charset_name] = s;
+	if (charBlock.AddNewExSet(charset_name, s) && nexusReader)
+		{
+		errormsg = "A ExSet with the name ";
+		errormsg += charset_name;
+		errormsg += " has already been encountered.    The later definition will preempt the earlier definition(s).";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::OVERWRITING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	if (asterisked)
+		{
+		def_exset = charset_name;
+		ApplyExset(charset_name);
+		}
+	}
+
+
+/*!
+	Reads and stores information contained in the command TaxPartition within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleTaxPartition(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString taxpart_name = token.GetToken();
+	//taxpart_name.ToUpper();
+
+	NxsAssumptionsBlockAPI *effectiveAssumpBlock = NULL;
+	token.GetNextToken();
+	NxsString taxblock_name;
+
+	if (token.Equals("("))
+		{
+		token.GetNextToken();
+		while (!token.Equals(")"))
+			{
+			if (token.Equals("TAXA"))
+				{
+				DemandEquals(token, "after \"(Taxa\" in a TaxPartition command");
+				token.GetNextToken();
+				taxblock_name = token.GetToken();
+				}
+			else if (token.Equals("VECTOR"))
+				GenerateNxsException(token, "VECTOR-style set definitions are not currently supported");
+			else if (token.Equals("NOTOKENS"))
+				GenerateNxsException(token, "NOTOKENS-style set definitions are not currently supported");
+			else if (token.Equals(";"))
+				GenerateNxsException(token, "; encountered in TaxPartition command before parentheses were closed");
+			else if (!(token.Equals("STANDARD") || token.Equals("TOKENS")) && nexusReader)
+				{
+				errormsg = "Skipping unknown TaxPartition qualifier: ";
+				errormsg << token.GetTokenReference();
+				nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+				errormsg.clear();
+				}
+			token.GetNextToken();
+			}
+		token.GetNextToken();
+		}
+	const char *cbn = (taxblock_name.empty() ? NULL : taxblock_name.c_str());
+	effectiveAssumpBlock = this->GetAssumptionsBlockForTaxaTitle(cbn, token, "TaxPartition");
+	DemandIsAtEquals(token, "in TaxPartition definition");
+	token.GetNextToken();
+	NxsPartition newPartition;
+	NCL_ASSERT(taxa);
+	effectiveAssumpBlock->ReadPartitionDef(newPartition, *taxa, taxpart_name, "Taxa", "TaxPartition", token, asterisked, false, true);
+	effectiveAssumpBlock->AddTaxPartition(taxpart_name, newPartition);
+	}
+/*!
+	Reads and stores information contained in the command TreePartition within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleTreePartition(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString treepart_name = token.GetToken();
+	//treepart_name.ToUpper();
+	NxsAssumptionsBlockAPI *effectiveAssumpBlock = NULL;
+	token.GetNextToken();
+	NxsString treeblock_name;
+
+	if (token.Equals("("))
+		{
+		token.GetNextToken();
+		while (!token.Equals(")"))
+			{
+			if (token.Equals("TREES"))
+				{
+				DemandEquals(token, "after \"(Trees\" in a TreePartition command");
+				token.GetNextToken();
+				treeblock_name = token.GetToken();
+				}
+			else if (token.Equals("VECTOR"))
+				GenerateNxsException(token, "VECTOR-style set definitions are not currently supported");
+			else if (token.Equals("NOTOKENS"))
+				GenerateNxsException(token, "NOTOKENS-style set definitions are not currently supported");
+			else if (token.Equals(";"))
+				GenerateNxsException(token, "; encountered in TreePartition command before parentheses were closed");
+			else if (!(token.Equals("STANDARD") || token.Equals("TOKENS")) && nexusReader)
+				{
+				errormsg = "Skipping unknown TreePartition qualifier: ";
+				errormsg << token.GetTokenReference();
+				nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+				errormsg.clear();
+				}
+			token.GetNextToken();
+			}
+		token.GetNextToken();
+		}
+	const char *cbn = (treeblock_name.empty() ? NULL : treeblock_name.c_str());
+	effectiveAssumpBlock = this->GetAssumptionsBlockForTreesTitle(cbn, token, "TreePartition");
+	DemandIsAtEquals(token, "in TreePartition definition");
+	token.GetNextToken();
+	NxsPartition newPartition;
+	NCL_ASSERT(treesBlockPtr);
+	effectiveAssumpBlock->ReadPartitionDef(newPartition, *treesBlockPtr, treepart_name, "Tree", "TreePartition", token, asterisked, false, true);
+	effectiveAssumpBlock->AddTreePartition(treepart_name, newPartition);
+	}
+
+void NxsBlock::ReadPartitionDef(
+  NxsPartition &np,
+  NxsLabelToIndicesMapper &ltm,
+  const std::string & partName,
+  const char * ptype,
+  const char * cmd,
+  NxsToken & token,
+  bool warnAsterisked,
+  bool demandAllInds,
+  bool storeAsPartition)
+	{
+	NxsUnsignedSet allInds;
+	const unsigned total = ltm.GetMaxIndex() + 1;
+	std::set<NxsString> prevGroupNames;
+	errormsg.clear();
+	for (;;)
+		{
+		if (token.Equals(";"))
+			break;
+		NxsString groupN = token.GetToken();
+		NxsString capGroupN = groupN;
+		capGroupN.ToUpper();
+		if (prevGroupNames.count(capGroupN) > 0)
+			{
+			errormsg << "Illegal repitition of a subset name (" << groupN << ") in the " << cmd << " definition of " << partName;
+			throw NxsException(errormsg, token);
+			}
+		token.GetNextToken();
+		if (!token.Equals(":"))
+			{
+			errormsg << "Expecting a : after the subset name " << groupN << " in the " << cmd  << " definition of " << partName << ". Found " << token.GetToken();
+			throw NxsException(errormsg, token);
+			}
+		token.GetNextToken();
+		NxsUnsignedSet s;
+		NxsSetReader::ReadSetDefinition(token, ltm, ptype, cmd, &s, &allInds);
+		allInds.insert(s.begin(), s.end());
+		np.push_back(NxsPartitionGroup(groupN, s));
+		if (token.Equals(";"))
+			break;
+		NCL_ASSERT(token.Equals(","));
+		 // this flag allows us to deal with sci. not. in WtSet commands.
+		 //	It shouldn't hurt in other contexts, though the parser will be
+		 //		more lax than it should (and will accept unquoted tokens-like-this as names).
+		token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
+		token.GetNextToken();
+		}
+	if (allInds.size() < total)
+		{
+		unsigned n = 0;
+		for (;n < total; ++n)
+			{
+			if (allInds.count(n) == 0)
+				break;
+			}
+		errormsg << partName << " is a not a valid "<< cmd <<". At least one " << ptype << " ("<< n+1 << ") is not included";
+		if (demandAllInds)
+			throw NxsException(errormsg, token);
+		else if (nexusReader)
+			{
+			nexusReader->NexusWarnToken(errormsg, NxsReader::ILLEGAL_CONTENT_WARNING, token);
+			errormsg.clear();
+			}
+		}
+	if (warnAsterisked && nexusReader != NULL)
+		{
+		errormsg << "An * is ignored in a " << cmd << " command";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	if (storeAsPartition && ltm.AddNewPartition(partName, np) && nexusReader)
+		{
+		errormsg << "A " << cmd << " with the name ";
+		errormsg += partName;
+		errormsg += " has already been encountered.    The later definition will preempt the earlier definition(s).";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::OVERWRITING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	}
+
+unsigned NxsBlock::ReadVectorPartitionDef(NxsPartition &np, NxsLabelToIndicesMapper &ltm, const std::string & partName, const char * ptype, const char * cmd, NxsToken & token, bool warnAsterisked, bool demandAllInds, NxsSetVectorItemValidator & v)
+	{
+	NxsUnsignedSet allInds;
+	const unsigned total = ltm.GetMaxIndex() + 1;
+	std::map<std::string, NxsUnsignedSet> subsetMap;
+	errormsg.clear();
+	unsigned ind = 0;
+	for (; ind < total; ++ind)
+		{
+		if (token.Equals(";"))
+			break;
+		const std::string key = v.convert(token);
+		const std::string capKey = NxsString::get_upper(key);
+		NxsUnsignedSet & s = subsetMap[key];
+		s.insert(ind);
+		token.GetNextToken();
+		}
+	if (ind < total)
+		{
+		errormsg << partName << " is a not a valid "<< cmd <<". Only " << ind + 1 << " entries for " << ptype << "(s) were included in the definition";
+		if (demandAllInds)
+			throw NxsException(errormsg, token);
+		else if (nexusReader)
+			{
+			nexusReader->NexusWarnToken(errormsg, NxsReader::ILLEGAL_CONTENT_WARNING, token);
+			errormsg.clear();
+			}
+		}
+	if (warnAsterisked && nexusReader != NULL)
+		{
+		errormsg << "An * is ignored in a " << cmd << " command";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	np.clear();
+	for (std::map<std::string, NxsUnsignedSet>::const_iterator sIt = subsetMap.begin(); sIt != subsetMap.end(); ++sIt)
+		{
+		const std::string & k = sIt->first;
+		const NxsUnsignedSet & valset = sIt->second;
+		np.push_back(NxsPartitionGroup(k, valset));
+		}
+	if (ltm.AddNewPartition(partName, np) && nexusReader)
+		{
+		errormsg << "A " << cmd << " with the name ";
+		errormsg += partName;
+		errormsg += " has already been encountered.    The later definition will preempt the earlier definition(s).";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::OVERWRITING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	return ind;
+	}
+
+void NxsWriteSetCommand(const char *cmd, const NxsUnsignedSetMap & usetmap, std::ostream &out, const char * nameOfDef)
+	{
+	if (usetmap.empty())
+		return;
+	for (NxsUnsignedSetMap::const_iterator csIt = usetmap.begin(); csIt != usetmap.end(); ++csIt)
+		{
+		out << "    " << cmd << " ";
+		if (NxsString::case_insensitive_equals(csIt->first.c_str(), nameOfDef))
+			out << "* ";
+		out << NexusToken::EscapeString(csIt->first) << " =";
+		NxsSetReader::WriteSetAsNexusValue(csIt->second, out);
+		out << ";\n";
+		}
+	}
+
+void NxsWritePartitionCommand(const char *cmd, const NxsPartitionsByName & usetmap, std::ostream &out, const char * nameOfDef)
+	{
+	if (usetmap.empty())
+		return;
+	for (NxsPartitionsByName::const_iterator csIt = usetmap.begin(); csIt != usetmap.end(); ++csIt)
+		{
+		out << "    " << cmd << " ";
+		if (NxsString::case_insensitive_equals(csIt->first.c_str(), nameOfDef))
+			out << "* ";
+		out << NexusToken::EscapeString(csIt->first) << " =";
+		const NxsPartition & p = csIt->second;
+		bool first = true;
+		for (NxsPartition::const_iterator pIt = p.begin(); pIt != p.end(); ++pIt)
+			{
+			const NxsPartitionGroup & g = *pIt;
+			if (!first)
+				out << ',';
+			out << ' ' << NxsString::GetEscaped(g.first) << " :";
+			NxsSetReader::WriteSetAsNexusValue(g.second, out);
+			first = false;
+			}
+		out << ";\n";
+		}
+	}
+
+/*!
+	Reads and stores information contained in the command TAXSET within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleTaxSet(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString taxset_name = token.GetToken();
+	//taxset_name.ToUpper();
+	token.GetNextToken();
+	NxsAssumptionsBlockAPI *effectiveAssumpBlock = NULL;
+	NxsString taxblock_name;
+	if (token.Equals("("))
+		{
+		token.GetNextToken();
+		while (!token.Equals(")"))
+			{
+			if (token.Equals("TAXA"))
+				{
+				DemandEquals(token, "after \"(Taxa\" in a TaxSet command");
+				token.GetNextToken();
+				taxblock_name = token.GetToken();
+				}
+			else if (token.Equals("VECTOR"))
+				GenerateNxsException(token, "VECTOR-style set definitions are not currently supported");
+			else if (token.Equals(";"))
+				GenerateNxsException(token, "; encountered in TaxSet command before parentheses were closed");
+			else if (!token.Equals("STANDARD") && nexusReader)
+				{
+				errormsg = "Skipping unknown TaxSet qualifier: ";
+				errormsg << token.GetTokenReference();
+				nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+				errormsg.clear();
+				}
+			token.GetNextToken();
+			}
+		token.GetNextToken();
+		}
+	const char *tbn = (taxblock_name.empty() ? NULL : taxblock_name.c_str());
+	effectiveAssumpBlock = this->GetAssumptionsBlockForTaxaTitle(tbn, token, "TAXSET");
+	DemandIsAtEquals(token, "in TAXSET definition");
+	token.GetNextToken();
+	effectiveAssumpBlock->ReadTaxsetDef(taxset_name, token, asterisked);
+	}
+
+/*!
+	Called after verifying that the correct taxa block pointer is set.
+*/
+void NxsAssumptionsBlock::ReadTaxsetDef(NxsString taxset_name, NxsToken &token, bool asterisked)
+	{
+	NCL_ASSERT(taxa != NULL);
+	NxsTaxaBlockAPI &taxaBlock = *taxa;
+	NxsUnsignedSet s;
+	NxsSetReader::ReadSetDefinition(token, taxaBlock, "Taxon", "TaxSet", &s);
+	taxsets[taxset_name] = s;
+	if (asterisked && nexusReader != NULL)
+		{
+		nexusReader->NexusWarnToken("An * is ignored in a TaxSet command", NxsReader::SKIPPING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	if (taxaBlock.AddNewIndexSet(taxset_name, s) && nexusReader)
+		{
+		errormsg = "A TaxSet with the name ";
+		errormsg += taxset_name;
+		errormsg += " has already been encountered.    The later definition will preempt the earlier definition(s).";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	}
+
+/*!
+	Reads and stores information contained in the command TAXSET within an ASSUMPTIONS block.
+*/
+void NxsAssumptionsBlock::HandleTreeSet(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	bool asterisked = false;
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		asterisked = true;
+		token.GetNextToken();
+		}
+	NxsString treeset_name = token.GetToken();
+	//treeset_name.ToUpper();
+	token.GetNextToken();
+	NxsAssumptionsBlockAPI *effectiveAssumpBlock = NULL;
+	NxsString treeblock_name;
+	if (token.Equals("("))
+		{
+		token.GetNextToken();
+		while (!token.Equals(")"))
+			{
+			if (token.Equals("TREES"))
+				{
+				DemandEquals(token, "after \"(Trees\" in a TreeSet command");
+				token.GetNextToken();
+				treeblock_name = token.GetToken();
+				}
+			else if (token.Equals("VECTOR"))
+				GenerateNxsException(token, "VECTOR-style set definitions are not currently supported");
+			else if (token.Equals(";"))
+				GenerateNxsException(token, "; encountered in TreeSet command before parentheses were closed");
+			else if (!token.Equals("STANDARD") && nexusReader)
+				{
+				errormsg = "Skipping unknown TreeSet qualifier: ";
+				errormsg << token.GetTokenReference();
+				nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+				errormsg.clear();
+				}
+			token.GetNextToken();
+			}
+		token.GetNextToken();
+		}
+	const char *tbn = (treeblock_name.empty() ? NULL : treeblock_name.c_str());
+	effectiveAssumpBlock = this->GetAssumptionsBlockForTreesTitle(tbn, token, "TreeSet");
+	DemandIsAtEquals(token, "in TreeSet definition");
+	token.GetNextToken();
+	effectiveAssumpBlock->ReadTreesetDef(treeset_name, token, asterisked);
+	}
+
+/*!
+	Called after verifying that the correct trees block pointer is set.
+*/
+void NxsAssumptionsBlock::ReadTreesetDef(NxsString treeset_name, NxsToken &token, bool asterisked)
+	{
+	NCL_ASSERT(treesBlockPtr != NULL);
+	NxsTreesBlockAPI &treesBlock = *treesBlockPtr;
+	NxsUnsignedSet s;
+	NxsSetReader::ReadSetDefinition(token, treesBlock, "Trees", "TreeSet", &s);
+	treesets[treeset_name] = s;
+	if (asterisked && nexusReader != NULL)
+		{
+		nexusReader->NexusWarnToken("An * is ignored in a TreeSet command", NxsReader::SKIPPING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	if (treesBlock.AddNewIndexSet(treeset_name, s) && nexusReader)
+		{
+		errormsg = "A TreeSet with the name ";
+		errormsg += treeset_name;
+		errormsg += " has already been encountered.    The later definition will preempt the earlier definition(s).";
+		nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	}
+
+/*!
+	This function provides the ability to read everything following the block name (which is read by the NxsReader
+	object) to the end or ENDBLOCK statement. Characters are read from the input stream in. Overrides the pure virtual
+	function in the base class.
+*/
+void NxsAssumptionsBlock::Read(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	isEmpty = false;
+	isUserSupplied = true;
+	NxsString n = "BEGIN ";
+	n << NCL_BLOCKTYPE_ATTR_NAME;
+	DemandEndSemicolon(token, n.c_str());
+
+	for(;;)
+		{
+		token.GetNextToken();
+		
+		int prevCharLinkStatus = charLinkStatus;
+		int prevTaxaLinkStatus = taxaLinkStatus;
+		int prevTreesLinkStatus = treesLinkStatus;
+
+		
+		NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
+		if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
+			return;
+		if ((charLinkStatus & BLOCK_LINK_FROM_LINK_CMD) && !(prevCharLinkStatus & BLOCK_LINK_FROM_LINK_CMD))
+			blockwideCharsLinkEstablished = true;
+		if ((treesLinkStatus & BLOCK_LINK_FROM_LINK_CMD) && !(prevTreesLinkStatus & BLOCK_LINK_FROM_LINK_CMD))
+			blockwideTreesLinkEstablished = true;
+		if ((taxaLinkStatus & BLOCK_LINK_FROM_LINK_CMD) && !(prevTaxaLinkStatus & BLOCK_LINK_FROM_LINK_CMD))
+			blockwideTaxaLinkEstablished = true;
+		if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
+			{
+			if (token.Equals("CHARPARTITION"))
+				HandleCharPartition(token);
+			else if (token.Equals("CHARSET"))
+				HandleCharSet(token);
+			else if (token.Equals("CODESET"))
+				HandleCodeSet(token);
+			else if (token.Equals("CODONPOSSET"))
+				HandleCodonPosSet(token);
+			else if (token.Equals("EXSET"))
+				HandleExSet(token);
+			else if (token.Equals("OPTIONS"))
+				HandleOptions(token);
+			else if (token.Equals("TAXSET"))
+				HandleTaxSet(token);
+			else if (token.Equals("TAXPARTITION"))
+				HandleTaxPartition(token);
+			else if (token.Equals("TREESET"))
+				HandleTreeSet(token);
+			else if (token.Equals("TREEPARTITION"))
+				HandleTreePartition(token);
+			else if (token.Equals("TYPESET"))
+				HandleTypeSet(token);
+			else if (token.Equals("USERTYPE"))
+				HandleUserType(token);
+			else if (token.Equals("WTSET"))
+				HandleWeightSet(token);
+			else
+				SkipCommand(token);
+
+			}
+		}	// for(;;)
+	}
+void NxsAssumptionsBlock::HandleOptions(NxsToken &token)
+	{
+	errormsg.clear();
+	token.GetNextToken();
+	std::map<std::string, std::string> kv = token.ProcessAsSimpleKeyValuePairs("OPTIONS");
+	std::map<std::string, std::string>::const_iterator kvIt = kv.begin();
+	for (; kvIt != kv.end(); ++kvIt)
+		{
+		if (NxsString::case_insensitive_equals(kvIt->first.c_str(), "DEFTYPE"))
+			{
+			NxsAssumptionsBlockAPI	* effAssumpB = GetAssumptionsBlockForCharTitle(NULL, token, "OPTIONS");
+			NCL_ASSERT(effAssumpB);
+			NxsCharactersBlockAPI * cb = effAssumpB->GetCharBlockPtr();
+			NCL_ASSERT(cb);
+			NxsTransformationManager & tmRef = cb->GetNxsTransformationManagerRef();
+			if (!tmRef.IsValidTypeName(kvIt->second.c_str()))
+				{
+				errormsg << kvIt->second << " is not a valid type name for OPTIONS DefType. Expceting one of:\n";
+				const std::set<std::string> & tn = tmRef.GetTypeNames();
+				for (std::set<std::string>::const_iterator tnIt = tn.begin(); tnIt != tn.end(); ++tnIt)
+					errormsg << ' ' << NxsString::GetEscaped(*tnIt);
+				throw NxsException(errormsg, token);
+				}
+			try
+				{
+				tmRef.SetDefaultTypeName(kvIt->second);
+				NxsTransformationManager & etmRef  = effAssumpB->GetNxsTransformationManagerRef();
+				etmRef.SetDefaultTypeName(kvIt->second);
+				}
+			catch (const NxsException & x)
+				{
+				throw NxsException(x.msg, token);
+				}
+			}
+		else if (NxsString::case_insensitive_equals(kvIt->first.c_str(), "POLYTCOUNT"))
+			{
+			if (NxsString::case_insensitive_equals(kvIt->second.c_str(), "MINSTEPS"))
+				polyTCountValue = POLY_T_COUNT_MIN;
+			else if (NxsString::case_insensitive_equals(kvIt->second.c_str(), "MAXSTEPS"))
+				polyTCountValue = POLY_T_COUNT_MAX;
+			else
+				{
+				errormsg << "Unknown value (" << kvIt->second << ") found for OPTIONS PolyTCount (expecting MINSTEPS or MAXSTEPS).";
+				throw NxsException(errormsg, token);
+				}
+			}
+		else if (NxsString::case_insensitive_equals(kvIt->first.c_str(), "GAPMODE"))
+			{
+			NxsAssumptionsBlockAPI	* effAssumpB = GetAssumptionsBlockForCharTitle(NULL, token, "OPTIONS");
+			NCL_ASSERT(effAssumpB);
+			NxsCharactersBlockAPI * cb = effAssumpB->GetCharBlockPtr();
+			NCL_ASSERT(cb);
+			if (NxsString::case_insensitive_equals(kvIt->second.c_str(), "MISSING"))
+				{
+				effAssumpB->SetGapsAsNewstate(false);
+				cb->SetGapModeSetting(NxsCharactersBlockAPI::GAP_MODE_MISSING);
+				}
+			else if (NxsString::case_insensitive_equals(kvIt->second.c_str(), "NEWSTATE"))
+				{
+				effAssumpB->SetGapsAsNewstate(true);
+				cb->SetGapModeSetting(NxsCharactersBlockAPI::GAP_MODE_NEWSTATE);
+				}
+			else
+				{
+				errormsg << "Unknown value (" << kvIt->second << ") found for OPTIONS GapMode (expecting MISSING or NEWSTATE).";
+				throw NxsException(errormsg, token);
+				}
+			}
+		else if (nexusReader)
+			{
+			errormsg << "Skipping unknown subcommand (" << kvIt->first << ") in OPTIONS command of " << NCL_BLOCKTYPE_ATTR_NAME << " Block";
+			nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+			errormsg.clear();
+			}
+		}
+	}
+
+/*!
+	Prepares for reading a new ASSUMPTIONS block. Overrides the pure virtual function in the base class.
+*/
+void NxsAssumptionsBlock::Reset()
+	{
+	if (!passedRefOfOwnedBlock)
+		{
+		VecAssumpBlockPtr::iterator bIt = createdSubBlocks.begin();
+		for(; bIt != createdSubBlocks.end(); ++bIt)
+			{
+			if (*bIt)
+				delete *bIt;
+			}
+		createdSubBlocks.clear();
+		}
+	passedRefOfOwnedBlock = false;
+	NxsBlock::Reset();
+	exsets.clear();
+	taxsets.clear();
+	charsets.clear();
+	def_exset.clear();
+	charPartitions.clear();
+	taxPartitions.clear();
+	treePartitions.clear();
+	readAs = UNREAD_OR_GENERATED_BLOCK;
+	charLinkStatus &= BLOCK_LINK_UNUSED_MASK;
+	taxaLinkStatus &= BLOCK_LINK_UNUSED_MASK;
+	treesLinkStatus &= BLOCK_LINK_UNUSED_MASK;
+	if (charLinkStatus & NxsBlock::BLOCK_LINK_FROM_LINK_CMD)
+		SetCharBlockPtr(NULL, NxsBlock::BLOCK_LINK_UNINITIALIZED);
+	if (taxaLinkStatus & NxsBlock::BLOCK_LINK_FROM_LINK_CMD)
+		SetTaxaBlockPtr(NULL, NxsBlock::BLOCK_LINK_UNINITIALIZED);
+	if (treesLinkStatus & NxsBlock::BLOCK_LINK_FROM_LINK_CMD)
+		SetTreesBlockPtr(NULL, NxsBlock::BLOCK_LINK_UNINITIALIZED);
+	transfMgr.Reset();
+	codesMgr.Reset();
+	polyTCountValue = POLY_T_COUNT_UNKNOWN;
+	gapsAsNewstate = false;
+	blockwideCharsLinkEstablished = false;
+	blockwideTaxaLinkEstablished = false;
+	blockwideTreesLinkEstablished = false;
+
+	codonPosSets.clear();
+	def_codonPosSet.clear();
+	codeSets.clear();
+	def_codeSet.clear();
+	}
+
+/*!
+	This function outputs a brief report of the contents of this ASSUMPTIONS block. Overrides the pure virtual function
+	in the base class.
+*/
+void NxsAssumptionsBlock::Report(
+  std::ostream &out)  NCL_COULD_BE_CONST /* the output stream to which to write the report */ /*v2.1to2.2 1 */
+	{
+	out << endl;
+	out << NCL_BLOCKTYPE_ATTR_NAME << " block contains the following:" << endl;
+
+	if (charsets.empty())
+		out << "  No character sets were defined" << endl;
+	else
+		{
+		NxsUnsignedSetMap::const_iterator charsets_iter = charsets.begin();
+		if (charsets.size() == 1)
+			{
+			out << "  1 character set defined:" << endl;
+			out << "   " << (*charsets_iter).first << endl;
+			}
+		else
+			{
+			out << "  " << (unsigned)charsets.size() << " character sets defined:" << endl;
+			for (; charsets_iter != charsets.end(); charsets_iter++)
+				{
+				NxsString nm = (*charsets_iter).first;
+				out << "   " << nm;
+				out << endl;
+				}
+			}
+		}	// if (charsets.empty()) ... else
+
+	if (taxsets.empty())
+		out << "  No taxon sets were defined" << endl;
+	else
+		{
+		NxsUnsignedSetMap::const_iterator taxsets_iter = taxsets.begin();
+		if (taxsets.size() == 1)
+			{
+			out << "  1 taxon set defined:" << endl;
+			out << "   " << (*taxsets_iter).first << endl;
+			}
+		else
+			{
+			out << "  " << (unsigned)taxsets.size() << " taxon sets defined:" << endl;
+			for (; taxsets_iter != taxsets.end(); taxsets_iter++)
+				{
+				NxsString nm = (*taxsets_iter).first;
+				out << "   " << nm;
+				out << endl;
+				}
+			}
+		}	// if (taxsets.empty()) ... else
+
+	if (exsets.empty())
+		out << "  No exclusion sets were defined" << endl;
+	else
+		{
+		NxsUnsignedSetMap::const_iterator exsets_iter = exsets.begin();
+		if (exsets.size() == 1)
+			{
+			out << "  1 exclusion set defined:" << endl;
+			out << "   " << (*exsets_iter).first << endl;
+			}
+		else
+			{
+			out << "  " << (unsigned)exsets.size() << " exclusion sets defined:" << endl;
+			for (; exsets_iter != exsets.end(); exsets_iter++)
+				{
+				NxsString nm = (*exsets_iter).first;
+				out << "   " << nm;
+				if (NxsString::case_insensitive_equals(nm.c_str(), def_exset.c_str()))
+					out << " (default)";
+				out << endl;
+				}
+			}
+		}
+
+	out << endl;
+	}
+
+/*!
+	A CHARACTERS, DATA, or ALLELES block can call this function to specify that it is to receive notification when the
+	current taxon or character set changes (e.g., an "EXSET *" command is read or a program requests that one of the
+	predefined taxon sets, character sets, or exsets be applied). Normally, a NxsCharactersBlockAPI-derived object calls
+	this function upon entering its MATRIX command, since when that happens it becomes the primary data-containing block.
+*/
+void NxsAssumptionsBlock::SetCallback(
+  NxsCharactersBlockAPI* p) /* the object to be called in the event of a change in character status */
+	{
+	charBlockPtr = p;
+	SetCharLinkStatus(NxsBlock::BLOCK_LINK_TO_MOST_RECENT);
+	}
+
+/*!
+	Converts a taxon label to a number corresponding to the taxon's position within the list maintained by the
+	NxsTaxaBlockAPI object. This method overrides the virtual function of the same name in the NxsBlock base class. If s
+	is not a valid taxon label, returns the value 0.
+*/
+unsigned NxsAssumptionsBlock::TaxonLabelToNumber(
+  NxsString s)	const /* the taxon label to convert */ /*v2.1to2.2 4 */
+	{
+	NCL_ASSERT(taxa != NULL);
+	int i;
+	try
+		{
+		i = 1 + taxa->FindTaxon(s);
+		}
+	catch(NxsTaxaBlock::NxsX_NoSuchTaxon)
+		{
+		i = 0;
+		}
+
+	return i;
+	}
+
+/*only used it the linkAPI is enabled*/
+void NxsAssumptionsBlock::HandleLinkCommand(NxsToken & token)
+	{
+	if (!nexusReader)
+		NxsNCLAPIException("No NxsReader when reading Assumptions block.");
+
+	token.GetNextToken();
+	const std::map<std::string, std::string> kv = token.ProcessAsSimpleKeyValuePairs("LINK");
+	std::map<std::string, std::string>::const_iterator pairIt = kv.begin();
+	for (;pairIt != kv.end(); ++pairIt)
+		{
+		NxsString key(pairIt->first.c_str());
+		key.ToUpper();
+		NxsString value(pairIt->second.c_str());
+		if (key == "TAXA")
+			{
+			if (taxa && !taxa->GetID().EqualsCaseInsensitive(value))
+				{
+				if (GetTaxaLinkStatus() & NxsBlock::BLOCK_LINK_USED)
+					{
+					errormsg = "LINK to a Taxa block must occur before commands that use a taxa block";
+					throw NxsException(errormsg, token);
+					}
+				SetTaxaBlockPtr(NULL, NxsBlock::BLOCK_LINK_UNINITIALIZED);
+				}
+			if (!taxa)
+				{
+				NxsTaxaBlockAPI * cb = nexusReader->GetTaxaBlockByTitle(value.c_str(), NULL);
+				if (cb == NULL)
+					{
+					errormsg = "Unknown TAXA block (";
+					errormsg += value;
+					errormsg +=") referred to in the LINK command";
+					throw NxsException(errormsg, token);
+					}
+				SetTaxaBlockPtr(cb, NxsBlock::BLOCK_LINK_FROM_LINK_CMD);
+				}
+			}
+		else if (key == "CHARACTERS")
+			{
+			if (charBlockPtr && !charBlockPtr->GetID().EqualsCaseInsensitive(value))
+				{
+				if (GetCharLinkStatus() & NxsBlock::BLOCK_LINK_USED)
+					{
+					errormsg = "LINK to a CHARACTERS block must occur before commands that use a CHARACTERS block";
+					throw NxsException(errormsg, token);
+					}
+				SetCharBlockPtr(NULL, NxsBlock::BLOCK_LINK_UNINITIALIZED);
+				}
+			if (!charBlockPtr)
+				{
+				NxsCharactersBlockAPI * cb = nexusReader->GetCharBlockByTitle(value.c_str(), NULL);
+				if (cb == NULL)
+					{
+					errormsg = "Unknown CHARACTERS block (";
+					errormsg += value;
+					errormsg +=") referred to in the LINK command";
+					throw NxsException(errormsg, token);
+					}
+				SetCharBlockPtr(cb, NxsBlock::BLOCK_LINK_FROM_LINK_CMD);
+				}
+			}
+		else if (key == "TREES")
+			{
+			if (treesBlockPtr && !treesBlockPtr->GetID().EqualsCaseInsensitive(value))
+				{
+				if (GetTreesLinkStatus() & NxsBlock::BLOCK_LINK_USED)
+					{
+					errormsg = "LINK to a TREES block must occur before commands that use a TREES block";
+					throw NxsException(errormsg, token);
+					}
+				SetTreesBlockPtr(NULL, NxsBlock::BLOCK_LINK_UNINITIALIZED);
+				}
+			if (!treesBlockPtr)
+				{
+				NxsTreesBlockAPI * cb = nexusReader->GetTreesBlockByTitle(value.c_str(), NULL);
+				if (cb == NULL)
+					{
+					errormsg = "Unknown TREES block (";
+					errormsg += value;
+					errormsg +=") referred to in the LINK command";
+					throw NxsException(errormsg, token);
+					}
+				SetTreesBlockPtr(cb, NxsBlock::BLOCK_LINK_FROM_LINK_CMD);
+				}
+			}
+		else
+			{
+			errormsg = "Skipping unknown LINK subcommand: ";
+			errormsg += pairIt->first.c_str();
+			nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+			errormsg.clear(); //this token pos will be off a bit.
+			}
+		}
+	}
+void NxsAssumptionsBlock::WriteLinkCommand(std::ostream &out) const
+	{
+	if ( (taxa && !(taxa->GetTitle().empty()))
+		|| (treesBlockPtr  && !(treesBlockPtr->GetTitle().empty()))
+		|| (charBlockPtr && !(charBlockPtr->GetTitle().empty())))
+		{
+		out << "    LINK";
+		if (taxa)
+			out << " TAXA = " << NxsString::GetEscaped(taxa->GetTitle());
+		if (charBlockPtr)
+			out << " CHARACTERS = " << NxsString::GetEscaped(charBlockPtr->GetTitle());
+		if (treesBlockPtr)
+			out << " TREES = " << NxsString::GetEscaped(treesBlockPtr->GetTitle());
+		out << ";\n";
+		}
+	}
+
+VecBlockPtr NxsAssumptionsBlock::GetCreatedTaxaBlocks()
+	{
+	passedRefOfOwnedBlock = true;
+	VecBlockPtr r;
+	VecAssumpBlockPtr::iterator bIt = createdSubBlocks.begin();
+	for(; bIt != createdSubBlocks.end(); ++bIt)
+		r.push_back(*bIt);
+	return r;
+	}
+
+
+NxsAssumptionsBlock *NxsAssumptionsBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
+	{
+	if (reader == NULL || (idneeded != "ASSUMPTIONS" && idneeded != "CODONS" && idneeded != "SETS"))
+		return NULL;
+	NxsAssumptionsBlock * nb =	new NxsAssumptionsBlock(NULL);
+	nb->SetImplementsLinkAPI(true);
+	return nb;
+	}
diff --git a/src/nxsblock.cpp b/src/nxsblock.cpp
new file mode 100644
index 0000000..a78dbc8
--- /dev/null
+++ b/src/nxsblock.cpp
@@ -0,0 +1,644 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsblock.h"
+#include "ncl/nxsreader.h"
+
+#include <Rcpp.h>
+
+#include <cstdlib>
+using namespace std;
+
+bool NxsLabelToIndicesMapper::allowNumberAsIndexPlusOne = true; //@TEMPORARY hack
+
+
+/* i18 */ /*v2.1to2.2 18 */
+
+#if !defined(IGNORE_NXS_ASSERT) && !defined(NDEBUG)
+	void ncl_assertion_failed(char const * expr, char const * function, char const * file, long line)
+		{
+#		if defined (ASSERTS_TO_EXCEPTIONS)
+			NxsString err;
+#		else
+			std::ostream & err(Rcerr);
+#		endif
+			err << "\nNCL assertion failed:";
+			err << "\n  expr: " << expr;
+			err << "\n  func: " << function;
+			err << "\n  file: " << file;
+			err << "\n  line: " << line ;
+#		if defined (ASSERTS_TO_EXCEPTIONS)
+			throw NxsNCLAPIException(err);
+#		else
+			Rcpp::Rcerr << std::endl;
+			std::exit(1);
+#		endif
+		}
+#endif
+
+
+/// returns the number of indices added (could be zero if the set name is not found.
+
+unsigned NxsLabelToIndicesMapper::GetIndicesFromSets(const std::string &label,
+  NxsUnsignedSet *inds,
+  const NxsUnsignedSetMap & itemSets)
+	{
+	const unsigned labelLen = (unsigned)label.length();
+	NxsUnsignedSetMap::const_iterator csIt = itemSets.begin();
+	for (; csIt != itemSets.end(); ++csIt)
+		{
+		const NxsString & csLabel = csIt->first;
+		if (labelLen == csLabel.length() && NxsString::case_insensitive_equals(label.c_str(), csLabel.c_str()))
+			{
+			const std::set<unsigned> & csSet = csIt->second;
+			if (inds)
+				inds->insert(csSet.begin(), csSet.end());
+			return (unsigned)csSet.size();
+			}
+		}
+	return 0;
+	}
+
+
+/// returns the number of indices added (will generate an NxsException if the name is neither a set name or a number).
+unsigned NxsLabelToIndicesMapper::GetIndicesFromSetOrAsNumber(const std::string &label,
+  NxsUnsignedSet *inds,
+  const NxsUnsignedSetMap & itemSets,
+  const unsigned maxInd,
+  const char * itemType) /* "tree", "character"... */
+	{
+	unsigned n = GetIndicesFromSets(label, inds, itemSets);
+	if (n > 0)
+		return n;
+	long i;
+	if (!NxsString::to_long(label.c_str(), &i))
+		{
+		NxsString emsg;
+		emsg << "Expecting a  number or " << itemType << " label, found " <<  label;
+		throw NxsException(emsg);
+		}
+	if (!NxsLabelToIndicesMapper::allowNumberAsIndexPlusOne)
+		{
+		NxsString emsg;
+		emsg << "Numbers are not to be used as labels to indicate " << itemType << " indices, but " << label << " was encountered.";
+		throw NxsException(emsg);
+		}
+	i--;
+	if (i > (long)maxInd  || i < 0)
+		{
+		NxsString emsg = "Expecting a ";
+		emsg << itemType << " name or a number corresponding to a " << itemType << "\'s number (a number from 1 to ";
+		emsg << maxInd + 1 << "). Found " << label;
+		throw NxsException(emsg);
+		}
+	unsigned asu = (unsigned) (i);
+	if (inds)
+		inds->insert(asu);
+	return 1;
+	}
+
+void NxsBlock::WarnDangerousContent(const std::string &s, const NxsToken &token)
+	{
+	if (nexusReader)
+		nexusReader->NexusWarnToken(s, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, token);
+	else
+		throw NxsException(s, token);
+	}
+
+void NxsBlock::WarnDangerousContent(const std::string &s, const ProcessedNxsToken &token)
+	{
+	if (nexusReader)
+		nexusReader->NexusWarnToken(s, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, token);
+	else
+		throw NxsException(s, token);
+	}
+void NxsBlock::SkipCommand(NxsToken & token)
+	{
+	if (nexusReader)
+		{
+		errormsg = "Skipping command: ";
+		errormsg << token.GetTokenReference();
+		nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	if (!token.Equals(";"))
+		SkippingCommand(token.GetToken());
+	if (storeSkippedCommands)
+		{
+		ProcessedNxsCommand pnc;
+		token.ProcessAsCommand(&pnc);
+		skippedCommands.push_back(pnc);
+		}
+	else
+		token.ProcessAsCommand(NULL);
+	}
+
+void NxsBlock::WriteSkippedCommands(std::ostream & out) const
+	{
+	for (list<ProcessedNxsCommand>::const_iterator cIt = skippedCommands.begin(); cIt != skippedCommands.end(); ++cIt)
+		{
+		if (WriteCommandAsNexus(out, *cIt))
+			out << '\n';
+		}
+
+	}
+
+
+
+/*!
+	Hook to consolidate the handling of COMMANDS that are common to all blocks (TITLE, BLOCKID, END, ENDBLOCK -- and,
+		evenually, LINK).
+	HandleXYZ() where XYZ is the command name is then called.
+	Returns NxsCommandResult(HANDLED_COMMAND), NxsCommandResult(HANDLED_COMMAND), or NxsCommandResult(UNKNOWN_COMMAND)
+		to tell the caller whether the command was recognized.
+*/
+NxsBlock::NxsCommandResult NxsBlock::HandleBasicBlockCommands(NxsToken & token)
+	{
+	if (token.Equals("TITLE"))
+		{
+		HandleTitleCommand(token);
+		return NxsBlock::NxsCommandResult(HANDLED_COMMAND);
+		}
+	if (false && token.Equals("BLOCKID")) /*now we are skipping this to put it at the end of blocks*/
+		{
+		HandleBlockIDCommand(token);
+		return NxsBlock::NxsCommandResult(HANDLED_COMMAND);
+		}
+	if (token.Equals("LINK") && this->ImplementsLinkAPI())
+		{
+		HandleLinkCommand(token);
+		return NxsBlock::NxsCommandResult(HANDLED_COMMAND);
+		}
+	if (token.Equals("END") || token.Equals("ENDBLOCK"))
+		{
+		HandleEndblock(token);
+		return NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK);
+		}
+	return NxsBlock::NxsCommandResult(UNKNOWN_COMMAND);
+	}
+
+/*!
+	Stores the next token as the this->title field.
+*/
+void NxsBlock::HandleTitleCommand(NxsToken & token)
+	{
+	token.GetNextToken();
+	if (token.Equals(";"))
+		GenerateUnexpectedTokenNxsException(token, "a title for the block");
+	if (!title.empty() && nexusReader)
+		{
+		errormsg = "Multiple TITLE commands were encountered the title \"";
+		errormsg += title;
+		errormsg += "\" will be replaced by \"";
+		errormsg += token.GetToken() ;
+		errormsg += '\"';
+		nexusReader->NexusWarnToken(errormsg, NxsReader::OVERWRITING_CONTENT_WARNING, token);
+		errormsg.clear();
+		}
+	title = token.GetToken();
+	/* i19 */ /*v2.1to2.2 19 */
+	autoTitle = false;
+	DemandEndSemicolon(token, "TITLE");
+	}
+
+/*!
+	Stores the next token as the this->blockid field.
+*/
+void NxsBlock::HandleBlockIDCommand(NxsToken & token)
+	{
+	token.GetNextToken();
+	if (token.Equals(";"))
+		GenerateUnexpectedTokenNxsException(token, "an id for the block");
+	blockIDString = token.GetToken();
+	DemandEndSemicolon(token, "BLOCKID");
+	}
+
+/*!
+	The LINK command was invented for use in Mesquite where multiple instances of a particular type of data-containing
+	block are allowed. The NEXUS specification does not discuss the correct behavior in such cases.
+
+	Some of the problems caused by failing to specify how multiple data-containing block should be handled can
+	be avoided by explicitly linking blocks. For instance a CHARACTERS block may have a "LINK taxa=TaxaBlockTitle;"
+	to indicate which block of taxa it uses.
+
+	The NxsBlock version merely raises a NxsUnimplementedException.
+
+	Before version 2.1 Links between blocks were "off" by default (see below)
+	In version 2.1, the block scoping was made more robust, so the Link API was
+	enabled for all factory-created blocks in the commonly-used PublicNexusReader.
+	In 2.1 and greater it is safe to call SetImplementsLinkAPI(true) on any block (as far as we know).
+
+LINK API in NCL version > 2.0.04 and < 2.1
+	NCL versions after 2.0.04 will support for LINK for the public blocks, but will have the functionality
+		turned off by default (for backwards-compatibility).  When turned-off, LINK commands will be skipped.
+		Calling SetImplementsLinkAPI(true) on an instance will enable the use of the HandleLinkCommand() and
+		WriteLinkCommand()
+
+	HandleLinkCommand should be a pure virtual function, but implementing it that way would break old code that uses
+		NCL.  Instead the ImplementsLinkAPI/SetImplementsLinkAPI mechanism was invented.
+		NCL components will only call HandleLinkCommand() or WriteLinkCommand() if ImplementsLinkAPI() returns true.
+		For backward compatibility default all blocks have linkAPI=false.
+
+	Client code should always call ImplementsLinkAPI() to check whether HandleLinkCommand() or WriteLinkCommand()
+		are available.  Failure to do this may result in NxsUnimplementedException() being called.
+*/
+void NxsBlock::HandleLinkCommand(NxsToken & )
+	{
+	throw NxsUnimplementedException("NxsBlock::HandleLinkCommand");
+	}
+/*!
+
+*/
+void NxsBlock::WriteBasicBlockCommands(std::ostream &out) const
+	{
+	WriteTitleCommand(out);
+	WriteBlockIDCommand(out);
+	if (this->ImplementsLinkAPI())
+		WriteLinkCommand(out);
+	}
+
+/*!
+	Initializes all pointer data members to NULL, and all bool data members to true except isUserSupplied, which is
+	initialized to false.
+*/
+NxsBlock::NxsBlock()
+	:isEmpty(true),
+	isEnabled(true),
+	isUserSupplied(false),
+	nexusReader(NULL),
+	next(NULL),
+	linkAPI(false),
+	autoTitle(false),
+	storeSkippedCommands(true)
+	{
+	}
+
+
+/*!
+	Raises an exception if it is not an equals sign.
+
+ 	Sets errormsg and raises a NxsException on failure.
+	`contextString` is used in error messages:
+		"Expecting '=' ${contextString} but found..."
+*/void NxsBlock::DemandIsAtEquals(NxsToken &token, const char *contextString) const
+	{
+	if (!token.Equals("="))
+		{
+		errormsg = "Expecting '=' ";
+		if (contextString)
+			errormsg.append(contextString);
+		errormsg << " but found " << token.GetToken() << " instead";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+	}
+
+void NxsBlock::DemandEquals(ProcessedNxsCommand::const_iterator & tokIt, const ProcessedNxsCommand::const_iterator & endIt, const char *contextString) const
+	{
+	++tokIt;
+	if (tokIt == endIt)
+		{
+		errormsg = "Expecting '=' ";
+		if (contextString)
+			errormsg.append(contextString);
+		errormsg << " but found ; instead";
+		--tokIt;
+		throw NxsException(errormsg, *tokIt);
+		}
+	if (!tokIt->Equals("="))
+		{
+		errormsg = "Expecting '=' ";
+		if (contextString)
+			errormsg.append(contextString);
+		errormsg << " but found " << tokIt->GetToken() << " instead";
+		throw NxsException(errormsg, *tokIt);
+		}
+	}
+
+
+/*!
+ throws a NxsException with the token info for `token`
+ `expected` should fill in the phrase "Expecting ${expected}, but found..."
+ expected can be NULL.
+
+ Sets this->errormsg
+*/
+void NxsBlock::GenerateNxsException(NxsToken &token, const char *message) const
+	{
+	if (message)
+		errormsg = message;
+	throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+	}
+
+/*!
+ throws a NxsException with the token info for `token`
+ `expected` should fill in the phrase "Expecting ${expected}, but found..."
+ expected can be NULL.
+
+ Sets this->errormsg
+*/
+void NxsBlock::GenerateUnexpectedTokenNxsException(NxsToken &token, const char *expected) const
+	{
+	errormsg = "Unexpected token";
+	if (expected)
+		{
+		errormsg += ". Expecting ";
+		errormsg += expected;
+		errormsg += ", but found: ";
+		}
+	else
+		{
+		errormsg += ": ";
+		}
+	errormsg += token.GetToken();
+	throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+	}
+
+
+
+/*!
+	Called when the END or ENDBLOCK command needs to be parsed from within a block.
+ 	Basically just checks to make sure the next token in the data file is a semicolon.
+*/
+void NxsBlock::HandleEndblock(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	DemandEndSemicolon(token, "END or ENDBLOCK");
+	}
+
+/*!
+	Nothing to be done.
+*/
+NxsBlock::~NxsBlock()
+	{
+	if (nexusReader)
+		nexusReader->Detach(this);
+	}
+
+/*! This base class version simply returns 0 but a derived class should override this function if it needs to construct
+	and run a NxsSetReader object to read a set involving characters. The NxsSetReader object may need to use this
+	function to look up a character label encountered in the set. A class that overrides this method should return the
+	character index in the range [1..nchar].
+*/
+unsigned NxsBlock::CharLabelToNumber(
+  NxsString) NCL_COULD_BE_CONST /* the character label to be translated to the character's number */ /*v2.1to2.2 1 */
+	{
+	return 0;
+	}
+
+/*!
+	Sets the value of isEnabled to false. A NxsBlock can be disabled (by calling this method) if blocks of that type
+	are to be skipped during execution of the NEXUS file. If a disabled block is encountered, the virtual
+	NxsReader::SkippingDisabledBlock function is called, giving your application the opportunity to inform the user
+	that a block was skipped.
+*/
+void NxsBlock::Disable()
+	{
+	isEnabled = false;
+	}
+
+/*!
+	Sets the value of isEnabled to true. A NxsBlock can be disabled (by calling Disable) if blocks of that type are to
+	be skipped during execution of the NEXUS file. If a disabled block is encountered, the virtual
+	NxsReader::SkippingDisabledBlock function is called, giving your application the opportunity to inform the user
+	that a block was skipped.
+*/
+void NxsBlock::Enable()
+	{
+	isEnabled = true;
+	}
+
+/*!
+	Returns value of isEnabled, which can be controlled through use of the Enable and Disable member functions. A
+	NxsBlock should be disabled if blocks of that type are to be skipped during execution of the NEXUS file. If a
+	disabled block is encountered, the virtual NxsReader::SkippingDisabledBlock function is called, giving your
+	application the opportunity to inform the user that a block was skipped.
+*/
+bool NxsBlock::IsEnabled() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return isEnabled;
+	}
+
+/*!
+	Returns value of isUserSupplied, which is true if and only if this block's Read function is called to process a
+	block of this type appearing in a data file. This is useful because in some cases, a block object may be created
+	internally (e.g. a NxsTaxaBlockAPI may be populated using taxon names provided in a DATA block), and such blocks do
+	not require permission from the user to delete data stored therein.
+*/
+bool NxsBlock::IsUserSupplied() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return isUserSupplied;
+	}
+
+/*!
+	Returns true if Read function has not been called since the last Reset. This base class version simply returns the
+	value of the data member isEmpty. If you derive a new block class from NxsBlock, be sure to set isEmpty to true in
+	your Reset function and isEmpty to false in your Read function.
+*/
+bool NxsBlock::IsEmpty() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return isEmpty;
+	}
+
+/*!
+	Returns the `NCL_BLOCKTYPE_ATTR_NAME` NxsString.  \ref BlockTypeIDDiscussion
+*/
+NxsString NxsBlock::GetID() const
+	{
+	return NCL_BLOCKTYPE_ATTR_NAME;
+	}
+
+/*!
+	This virtual function must be overridden for each derived class to provide the ability to read everything following
+	the block name (which is read by the NxsReader object) to the end or endblock statement. Characters are read from
+	the input stream 'in'. Note that to get output comments displayed, you must derive a class from NxsToken, override
+	the member function OutputComment to display a supplied comment, and then pass a reference to an object of the
+	derived class to this function.
+*/
+void NxsBlock::Read(
+  NxsToken &)	/* the NxsToken to use for reading block */
+	{
+	}
+
+/*!
+	This virtual function should be overridden for each derived class to completely reset the block object in
+	preparation for reading in another block of this type. This function is called by the NxsReader object just prior to
+	calling the block object's Read function.
+*/
+void NxsBlock::Reset()
+	{
+	title = std::string();
+	/* i19 */ /*v2.1to2.2 19 */
+	autoTitle = false;
+	// Reset base class data members that could have changed
+	//
+	errormsg.clear();
+	isEnabled      = true;
+	isEmpty        = true;
+	isUserSupplied = false;
+	skippedCommands.clear();
+
+	}
+
+/*!
+	This virtual function provides a brief report of the contents of the block.
+*/
+void NxsBlock::Report(
+  std::ostream &) NCL_COULD_BE_CONST /* the output stream to which the report is sent */ /*v2.1to2.2 1 */
+	{
+	}
+
+/*!
+	This function is called when an unknown command named commandName is about to be skipped. This version of the
+	function does nothing (i.e., no warning is issued that a command was unrecognized). Override this virtual function
+	in a derived class to provide such warnings to the user.
+*/
+void NxsBlock::SkippingCommand(
+  NxsString )	/* the name of the command being skipped */
+	{
+	}
+
+/*!
+	This base class version simply returns 0, but a derived class should override this function if it needs to construct
+	and run a NxsSetReader object to read a set involving taxa. The NxsSetReader object may need to use this function to
+	look up a taxon label encountered in the set. A class that overrides this method should return the taxon index in
+	the range [1..ntax].
+*/
+unsigned NxsBlock::TaxonLabelToNumber(
+  NxsString ) const /* the taxon label to be translated to a taxon number */
+	{
+	return 0;
+	}
+
+/*!
+	Returns a vector of Blocks that were created by the reading in of this block (the prototypical case is the taxa block
+	that is implied by a data block).
+*/
+VecBlockPtr	NxsBlock::GetImpliedBlocks()
+	{
+	return VecBlockPtr();
+	}
+
+/*!
+	Returns a vector of Blocks that were created by the reading in of this block (the prototypical case is the taxa block
+	that is implied by a data block).
+*/
+VecConstBlockPtr NxsBlock::GetImpliedBlocksConst() const
+	{
+	return VecConstBlockPtr();
+	}
+
+/*!
+	Should returns a  new instance (deep copy) of the same type of block with the same state.
+	Note the NxsReader field should not be cloned (it should be aliased).
+
+	NxsBlock version throws NxsUnimplementedException (in future versions of NCL this will be a pure virtual.
+
+	NxsBlocks are expected to clone their linked blocks, but memo is passed in to avoid double cloning of shared references.
+	memo is an mapper of an old block to a new instance (used when groups of blocks are being cloned).
+*/
+NxsBlock * NxsBlock::CloneBlock(
+  NxsBlockMapper & /// memo is an mapper of an old block to a new instance (used when groups of blocks are being cloned)
+  ) const
+	{
+	throw NxsUnimplementedException(NxsString("CloneBlock"));
+	}
+
+/*!
+	Should serialize the content of the block as NEXUS.
+	NxsBlock version throws NxsUnimplementedException (in future versions of NCL this will be a pure virtual.
+*/
+void NxsBlock::WriteAsNexus(std::ostream &) const
+	{
+	throw NxsUnimplementedException(NxsString("NxsBlock::WriteAsNexus"));
+	}
+
+void NxsBlock::WriteTitleCommand(std::ostream &out) const
+	{
+	const std::string &t = this->GetInstanceName();
+	if (t.length() > 0)
+		out << "    TITLE " << NxsString::GetEscaped(t) << ";\n";
+	}
+
+void NxsBlock::WriteBlockIDCommand(std::ostream &out) const
+	{
+	const std::string & t = this->blockIDString;
+	if (t.length() > 0)
+		out << "    BLOCKID " << NxsString::GetEscaped(t) << ";\n";
+	}
+
+/*!
+	Should write the LINK command to out.
+	NxsBlock versio merely raises NxsUnimplementedException.
+	See notes on HandleLinkCommand.
+*/
+void NxsBlock::WriteLinkCommand(std::ostream &) const
+	{
+	throw NxsUnimplementedException("NxsBlock::WriteLinkCommand");
+	}
+
+std::string GetBlockIDTitleString(NxsBlock &b)
+	{
+	const std::string &t = b.GetInstanceName();
+	std::string r = b.GetID();
+	r.append(" block");
+	if (t.length() > 0)
+		{
+		r.append(" (");
+		r.append(t);
+		r.append(")");
+		}
+	return r;
+	}
+
+/*!
+	Accessor to "linkAPI" field. See notes on HandleLinkCommand.
+*/
+bool NxsBlock::ImplementsLinkAPI() const
+	{
+	return this->linkAPI;
+	}
+
+/*!
+	Should be called with SetImplementsLinkAPI(true) by all blocks that overload both:
+		HandleLinkCommand() and
+		WriteLinkCommand()
+	and that want LINK to be dealt with as a Basic block command.
+	See notes on HandleLinkCommand.
+*/
+void NxsBlock::SetImplementsLinkAPI(bool v)
+	{
+	this->linkAPI = v;
+	}
+/*!
+	Advances the token, and returns the unsigned int that the token represents
+
+ 	Sets errormsg and raises a NxsException on failure.
+	`contextString` is used in error messages:
+		"${contextString} must be a number greater than 0"
+*/
+unsigned NxsBlock::DemandPositiveInt(NxsToken &token, const char *contextString) const
+	{
+	return NxsToken::DemandPositiveInt(token, this->errormsg, contextString);
+	}
+
+void NxsBlock::DemandEndSemicolon(NxsToken &token, const char *contextString) const
+	{
+	NxsToken::DemandEndSemicolon(token, this->errormsg, contextString);
+	}
diff --git a/src/nxscharactersblock.cpp b/src/nxscharactersblock.cpp
new file mode 100644
index 0000000..864bb94
--- /dev/null
+++ b/src/nxscharactersblock.cpp
@@ -0,0 +1,5518 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+/**
+ * This file includes contributions by Brian O'Meara. August 2005.
+ * These changes include the ability to parse continuous data types.
+ */
+#include <iomanip>
+#include <climits>
+
+#include "ncl/nxscharactersblock.h"
+#include "ncl/nxsreader.h"
+#include "ncl/nxsassumptionsblock.h"
+#include "ncl/nxssetreader.h"
+#include <algorithm>
+#include <iterator>
+using namespace std;
+
+CodonRecodingStruct getCodonRecodingStruct(NxsGeneticCodesEnum gCode);
+std::vector<NxsDiscreteStateCell> getToCodonRecodingMapper(NxsGeneticCodesEnum gCode);
+
+
+void NxsDiscreteDatatypeMapper::DebugWriteMapperFields(std::ostream & out) const
+{
+	out << nStates << "\"fundamental\" states\n";
+	out << "Symbols = \"" << symbols << "\"\n";
+	if (respectCase)
+		out << "Symbol comparison respects case (is case-sensitive)\n";
+	else
+		out << "Symbol comparison does not respect case (is case-insensitive)\n";
+	if (gapChar == '\0')
+		out << "No Gaps\n";
+	else
+		out << "Gap char is " << gapChar << "\n";
+
+	out << "State codes:\n";
+	int nsc = (int)GetNumStateCodes();
+	for (int scc = 0; scc < nsc; ++scc)
+		{
+		int sc = scc + sclOffset;
+		out << sc << ' ';
+		if (sc == NXS_MISSING_CODE)
+			out << missing << '\n';
+		else if (sc == NXS_GAP_STATE_CODE)
+			out << gapChar << '\n';
+		else
+			{
+			const std::set<NxsDiscreteStateCell> & ssfc(GetStateSetForCode(sc));
+			std::set<NxsDiscreteStateCell>::const_iterator sIt = ssfc.begin();
+			if (ssfc.size() == 1)
+				{
+				out << symbols[*sIt];
+				}
+			else
+				{
+				if (IsPolymorphic(sc))
+					out << '(';
+				else
+					out << '{';
+				for (; sIt != ssfc.end(); ++sIt)
+					{
+					if (*sIt == NXS_MISSING_CODE)
+						out << missing;
+					else if (*sIt == NXS_GAP_STATE_CODE)
+						out << gapChar;
+					else
+						out << symbols[*sIt];
+					}
+				if (IsPolymorphic(sc))
+					out << ')';
+				else
+					out << '}';
+				}
+			out << '\n';
+			}
+		}
+
+	std::map<char, NxsString>::const_iterator eeIt = extraEquates.begin();
+	if (eeIt != extraEquates.end())
+		{
+		out << "Extra equates:\n";
+		for (; eeIt != extraEquates.end(); ++eeIt)
+			out << eeIt->first  << " -> " << eeIt->second << '\n';
+		}
+	out.flush();
+}
+
+static unsigned char lcBaseToInd(char );
+
+static unsigned char lcBaseToInd(char c) {
+	if (c == 'a')
+		return 0;
+	if (c == 'c')
+		return 1;
+	if (c == 'g')
+		return 2;
+	if (c == 't')
+		return 3;
+	throw NxsException("Expecting a DNA base");
+}
+
+NxsCodonTriplet::NxsCodonTriplet(const char *triplet)
+{
+	std::string s(triplet);
+	if (s.length() != 3)
+		throw NxsException("Expecting a triplet of bases");
+	NxsString::to_lower(s);
+	this->firstPos = lcBaseToInd(s[0]);
+	this->secondPos = lcBaseToInd(s[1]);
+	this->thirdPos = lcBaseToInd(s[2]);
+}
+
+
+NxsCodonTriplet::MutDescription NxsCodonTriplet::getSingleMut(const NxsCodonTriplet & other) const {
+	if (firstPos == other.firstPos) {
+		if (secondPos == other.secondPos) {
+			if (thirdPos == other.thirdPos)
+				return MutDescription(0,0);
+			return MutDescription((int)thirdPos, (int)other.thirdPos);
+		}
+		if (thirdPos == other.thirdPos)
+			return MutDescription((int)secondPos, (int)other.secondPos);
+		return MutDescription(-1, -1);
+	}
+	if (secondPos == other.secondPos) {
+		if (thirdPos == other.thirdPos)
+			return MutDescription((int)firstPos, (int)other.firstPos);
+		return MutDescription(-1, -1);
+	}
+	return MutDescription(-1, -1);
+}
+
+
+/*******************************************************************************
+ * deletes "fundamental" states (rather than gaps or ambiguity codes) from a
+ * datatype mapper.
+ * Equates (default or user-defined) are not supported in the current version of the function
+ *	(so this will only work on standard or codons data).
+ */
+
+void NxsDiscreteDatatypeMapper::DeleteStateIndices(const std::set<NxsDiscreteStateCell> & deletedInds)
+{
+	if (deletedInds.empty())
+		return;
+	if (*(deletedInds.begin()) < 0 || *(deletedInds.rbegin()) >= (NxsDiscreteStateCell)this->nStates)
+		throw NxsException("DeleteStateIndices can only delete fundamental states");
+	if (!(NxsCharactersBlock::GetDefaultEquates(this->datatype).empty() && extraEquates.empty()))
+		throw NxsException("DeleteStateIndices can not currently work on datatypes with equates");
+	std::vector<NxsDiscreteStateCell> remap;
+	NxsDiscreteStateCell newIndex = 0;
+	std::string nsym;
+	for (NxsDiscreteStateCell i = 0; i < (NxsDiscreteStateCell) this->nStates; ++i)
+		{
+		if (deletedInds.find(i) == deletedInds.end())
+			{
+			remap.push_back(newIndex++);
+			nsym.append(1, symbols[i]);
+			}
+		else
+			remap.push_back(NXS_INVALID_STATE_CODE);
+		}
+	const unsigned oldNStates = nStates;
+	std::vector<NxsDiscreteStateSetInfo> oldStateSetsVec = this->stateSetsVec;
+	symbols = nsym;
+
+	this->RefreshMappings(0L);
+
+	for (unsigned i = oldNStates - sclOffset; i < oldStateSetsVec.size(); ++i)
+		{
+		const NxsDiscreteStateSetInfo & ssi = oldStateSetsVec[i];
+		std::set<NxsDiscreteStateCell> stSet;
+		for (std::set<NxsDiscreteStateCell>::const_iterator s = ssi.states.begin(); s != ssi.states.end(); ++s)
+			{
+			NxsDiscreteStateCell u = *s;
+			if (u < 0)
+				stSet.insert(u);
+			else
+				{
+				NxsDiscreteStateCell r = remap.at(u);
+				if (r >= 0)
+					stSet.insert(r);
+				}
+			}
+		// We have to add every "extra" state set, so that the indexing for the higher state codes is just shifted by the number of states deleted
+		AddStateSet(stSet, ssi.nexusSymbol, true, ssi.isPolymorphic);
+		}
+}
+
+std::vector<NxsDiscreteStateCell> getToCodonRecodingMapper(NxsGeneticCodesEnum gCode)
+{
+	std::vector<NxsDiscreteStateCell> v;
+	if(gCode == NXS_GCODE_STANDARD) {
+		const NxsDiscreteStateCell trnxs_gcode_standard[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
+		std::copy(trnxs_gcode_standard, trnxs_gcode_standard + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_VERT_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_vert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, -1, 46, -1, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59};
+		std::copy(trnxs_gcode_vert_mito, trnxs_gcode_vert_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_YEAST_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_yeast_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_yeast_mito, trnxs_gcode_yeast_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_MOLD_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_mold_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_mold_mito, trnxs_gcode_mold_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_INVERT_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_invert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_invert_mito, trnxs_gcode_invert_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_CILIATE) {
+		const NxsDiscreteStateCell trnxs_gcode_ciliate[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, -1, 56, 57, 58, 59, 60, 61, 62};
+		std::copy(trnxs_gcode_ciliate, trnxs_gcode_ciliate + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_ECHINO_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_echino_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_echino_mito, trnxs_gcode_echino_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_EUPLOTID) {
+		const NxsDiscreteStateCell trnxs_gcode_euplotid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_euplotid, trnxs_gcode_euplotid + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_PLANT_PLASTID) {
+		const NxsDiscreteStateCell trnxs_gcode_plant_plastid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
+		std::copy(trnxs_gcode_plant_plastid, trnxs_gcode_plant_plastid + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_ALT_YEAST) {
+		const NxsDiscreteStateCell trnxs_gcode_alt_yeast[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
+		std::copy(trnxs_gcode_alt_yeast, trnxs_gcode_alt_yeast + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_ASCIDIAN_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_ascidian_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_ascidian_mito, trnxs_gcode_ascidian_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_ALT_FLATWORM_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_alt_flatworm_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, -1, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62};
+		std::copy(trnxs_gcode_alt_flatworm_mito, trnxs_gcode_alt_flatworm_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_BLEPHARISMA_MACRO) {
+		const NxsDiscreteStateCell trnxs_gcode_blepharisma_macro[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, 49, 50, 51, 52, 53, 54, -1, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_blepharisma_macro, trnxs_gcode_blepharisma_macro + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_CHLOROPHYCEAN_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_chlorophycean_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, 49, 50, 51, 52, 53, 54, -1, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_chlorophycean_mito, trnxs_gcode_chlorophycean_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_TREMATODE_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_trematode_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61};
+		std::copy(trnxs_gcode_trematode_mito, trnxs_gcode_trematode_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_SCENEDESMUS_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_scenedesmus_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, 49, 50, -1, 51, 52, 53, -1, 54, 55, 56, 57, 58, 59, 60};
+		std::copy(trnxs_gcode_scenedesmus_mito, trnxs_gcode_scenedesmus_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	if(gCode == NXS_GCODE_THRAUSTOCHYTRIUM_MITO) {
+		const NxsDiscreteStateCell trnxs_gcode_thraustochytrium_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -1, 48, -1, 49, 50, 51, 52, 53, -1, 54, 55, 56, -1, 57, 58, 59};
+		std::copy(trnxs_gcode_thraustochytrium_mito, trnxs_gcode_thraustochytrium_mito + 64, std::back_inserter(v));
+		return v;
+	}
+	throw NxsException("Unrecognized genetic code.");
+}
+
+
+CodonRecodingStruct getCodonRecodingStruct(NxsGeneticCodesEnum gCode)
+{
+	CodonRecodingStruct c;
+	unsigned n;
+
+	if(gCode == NXS_GCODE_STANDARD) {
+		const int ccitacnxs_gcode_standard[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
+		n = 61;
+		const int caaindnxs_gcode_standard[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_standard[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_standard, ccitacnxs_gcode_standard + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_standard, caaindnxs_gcode_standard + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_standard, ccodstrnxs_gcode_standard + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_VERT_MITO) {
+		const int ccitacnxs_gcode_vert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 60;
+		const int caaindnxs_gcode_vert_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 15, 15, 10, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_vert_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGC", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_vert_mito, ccitacnxs_gcode_vert_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_vert_mito, caaindnxs_gcode_vert_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_vert_mito, ccodstrnxs_gcode_vert_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_YEAST_MITO) {
+		const int ccitacnxs_gcode_yeast_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_yeast_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_yeast_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_yeast_mito, ccitacnxs_gcode_yeast_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_yeast_mito, caaindnxs_gcode_yeast_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_yeast_mito, ccodstrnxs_gcode_yeast_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_MOLD_MITO) {
+		const int ccitacnxs_gcode_mold_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_mold_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_mold_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_mold_mito, ccitacnxs_gcode_mold_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_mold_mito, caaindnxs_gcode_mold_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_mold_mito, ccodstrnxs_gcode_mold_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_INVERT_MITO) {
+		const int ccitacnxs_gcode_invert_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_invert_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 15, 15, 15, 15, 10, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_invert_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_invert_mito, ccitacnxs_gcode_invert_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_invert_mito, caaindnxs_gcode_invert_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_invert_mito, ccodstrnxs_gcode_invert_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_CILIATE) {
+		const int ccitacnxs_gcode_ciliate[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
+		n = 63;
+		const int caaindnxs_gcode_ciliate[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 13, 19, 13, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_ciliate[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_ciliate, ccitacnxs_gcode_ciliate + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_ciliate, caaindnxs_gcode_ciliate + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_ciliate, ccodstrnxs_gcode_ciliate + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_ECHINO_MITO) {
+		const int ccitacnxs_gcode_echino_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_echino_mito[] = {11, 11, 8, 11, 16, 16, 16, 16, 15, 15, 15, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_echino_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_echino_mito, ccitacnxs_gcode_echino_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_echino_mito, caaindnxs_gcode_echino_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_echino_mito, ccodstrnxs_gcode_echino_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_EUPLOTID) {
+		const int ccitacnxs_gcode_euplotid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_euplotid[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_euplotid[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_euplotid, ccitacnxs_gcode_euplotid + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_euplotid, caaindnxs_gcode_euplotid + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_euplotid, ccodstrnxs_gcode_euplotid + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_PLANT_PLASTID) {
+		const int ccitacnxs_gcode_plant_plastid[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
+		n = 61;
+		const int caaindnxs_gcode_plant_plastid[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_plant_plastid[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_plant_plastid, ccitacnxs_gcode_plant_plastid + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_plant_plastid, caaindnxs_gcode_plant_plastid + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_plant_plastid, ccodstrnxs_gcode_plant_plastid + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_ALT_YEAST) {
+		const int ccitacnxs_gcode_alt_yeast[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
+		n = 61;
+		const int caaindnxs_gcode_alt_yeast[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 15, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_alt_yeast[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_alt_yeast, ccitacnxs_gcode_alt_yeast + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_alt_yeast, caaindnxs_gcode_alt_yeast + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_alt_yeast, ccodstrnxs_gcode_alt_yeast + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_ASCIDIAN_MITO) {
+		const int ccitacnxs_gcode_ascidian_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_ascidian_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 5, 15, 5, 15, 10, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_ascidian_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_ascidian_mito, ccitacnxs_gcode_ascidian_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_ascidian_mito, caaindnxs_gcode_ascidian_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_ascidian_mito, ccodstrnxs_gcode_ascidian_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_ALT_FLATWORM_MITO) {
+		const int ccitacnxs_gcode_alt_flatworm_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 63;
+		const int caaindnxs_gcode_alt_flatworm_mito[] = {11, 11, 8, 11, 16, 16, 16, 16, 15, 15, 15, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_alt_flatworm_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_alt_flatworm_mito, ccitacnxs_gcode_alt_flatworm_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_alt_flatworm_mito, caaindnxs_gcode_alt_flatworm_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_alt_flatworm_mito, ccodstrnxs_gcode_alt_flatworm_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_BLEPHARISMA_MACRO) {
+		const int ccitacnxs_gcode_blepharisma_macro[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_blepharisma_macro[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 13, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_blepharisma_macro[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_blepharisma_macro, ccitacnxs_gcode_blepharisma_macro + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_blepharisma_macro, caaindnxs_gcode_blepharisma_macro + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_blepharisma_macro, ccodstrnxs_gcode_blepharisma_macro + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_CHLOROPHYCEAN_MITO) {
+		const int ccitacnxs_gcode_chlorophycean_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_chlorophycean_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 9, 19, 15, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_chlorophycean_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_chlorophycean_mito, ccitacnxs_gcode_chlorophycean_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_chlorophycean_mito, caaindnxs_gcode_chlorophycean_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_chlorophycean_mito, ccodstrnxs_gcode_chlorophycean_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_TREMATODE_MITO) {
+		const int ccitacnxs_gcode_trematode_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
+		n = 62;
+		const int caaindnxs_gcode_trematode_mito[] = {11, 11, 8, 11, 16, 16, 16, 16, 15, 15, 15, 15, 10, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 18, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_trematode_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_trematode_mito, ccitacnxs_gcode_trematode_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_trematode_mito, caaindnxs_gcode_trematode_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_trematode_mito, ccodstrnxs_gcode_trematode_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_SCENEDESMUS_MITO) {
+		const int ccitacnxs_gcode_scenedesmus_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 50, 51, 53, 54, 55, 57, 58, 59, 60, 61, 62, 63};
+		n = 61;
+		const int caaindnxs_gcode_scenedesmus_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 9, 19, 15, 15, 15, 1, 18, 1, 9, 4, 9, 4};
+		const char * ccodstrnxs_gcode_scenedesmus_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAG", "TAT", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_scenedesmus_mito, ccitacnxs_gcode_scenedesmus_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_scenedesmus_mito, caaindnxs_gcode_scenedesmus_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_scenedesmus_mito, ccodstrnxs_gcode_scenedesmus_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	if(gCode == NXS_GCODE_THRAUSTOCHYTRIUM_MITO) {
+		const int ccitacnxs_gcode_thraustochytrium_mito[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 49, 51, 52, 53, 54, 55, 57, 58, 59, 61, 62, 63};
+		n = 60;
+		const int caaindnxs_gcode_thraustochytrium_mito[] = {8, 11, 8, 11, 16, 16, 16, 16, 14, 15, 14, 15, 7, 7, 10, 7, 13, 6, 13, 6, 12, 12, 12, 12, 14, 14, 14, 14, 9, 9, 9, 9, 3, 2, 3, 2, 0, 0, 0, 0, 5, 5, 5, 5, 17, 17, 17, 17, 19, 19, 15, 15, 15, 15, 1, 18, 1, 4, 9, 4};
+		const char * ccodstrnxs_gcode_thraustochytrium_mito[] = {"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAC", "TAT", "TCA", "TCC", "TCG", "TCT", "TGC", "TGG", "TGT", "TTC", "TTG", "TTT"};
+		std::copy(ccitacnxs_gcode_thraustochytrium_mito, ccitacnxs_gcode_thraustochytrium_mito + n, std::back_inserter(c.compressedCodonIndToAllCodonsInd));
+		std::copy(caaindnxs_gcode_thraustochytrium_mito, caaindnxs_gcode_thraustochytrium_mito + n, std::back_inserter(c.aaInd));
+		std::copy(ccodstrnxs_gcode_thraustochytrium_mito, ccodstrnxs_gcode_thraustochytrium_mito + n, std::back_inserter(c.codonStrings));
+		return c;
+	}
+	throw NxsException("Unrecognized genetic code.");
+}
+
+CodonRecodingStruct NxsCharactersBlock::RemoveStopCodons(NxsGeneticCodesEnum gCode)
+{
+	NxsDiscreteDatatypeMapper * mapper = this->GetMutableDatatypeMapperForChar(0);
+	if (mapper == 0L)
+		throw NxsException("Invalid characters block (no datatype mapper)");
+	if (mapper->GetDatatype() != codon)
+		throw NxsException("Characters block must be of the type codons when RemoveStopCodons is called");
+	if (mapper->geneticCode != NXS_GCODE_NO_CODE)
+		throw NxsException("Characters block must be an uncompressed codons type when RemoveStopCodons is called");
+
+	const std::vector<NxsDiscreteStateCell> v = getToCodonRecodingMapper(gCode);
+	CodonRecodingStruct c = getCodonRecodingStruct(gCode);
+	const unsigned nRS = (unsigned)c.compressedCodonIndToAllCodonsInd.size();
+	const unsigned offset = 64 - nRS;
+	NxsDiscreteStateMatrix	dMat(this->discreteMatrix);
+	unsigned rowInd = 0;
+	for (NxsDiscreteStateMatrix::iterator rowIt = dMat.begin(); rowIt != dMat.end(); ++rowIt)
+		{
+		NxsDiscreteStateRow & row = *rowIt;
+		unsigned charInd = 0;
+		for (NxsDiscreteStateRow::iterator cellIt = row.begin(); cellIt != row.end(); ++cellIt)
+			{
+			const NxsDiscreteStateCell cell = *cellIt;
+			if (cell >= 64)
+				*cellIt = cell - offset;
+			else if (cell >= 0)
+				{
+				const NxsDiscreteStateCell recoded = v[cell];
+				if (recoded < 0)
+					{
+					NxsString m;
+					m << "Stop codon found at character ";
+					m << charInd + 1;
+					m << " for taxon ";
+					m << rowInd + 1;
+					throw NxsException(m);
+					}
+				*cellIt = recoded;
+				}
+			++charInd;
+			}
+		++rowInd;
+		}
+	dMat.swap(this->discreteMatrix);
+	std::set<NxsDiscreteStateCell> deletedInds;
+	for (NxsDiscreteStateCell i = 0; i < 64; ++i)
+		{
+		if (v[(int)i] < 0)
+			deletedInds.insert(i);
+		}
+	mapper->DeleteStateIndices(deletedInds);
+	return c;
+}
+
+unsigned NxsCharactersBlock::NumAmbigInTaxon(const unsigned taxInd, const NxsUnsignedSet * charIndices, const bool countOnlyCompletelyMissing, const bool treatGapsAsMissing) const
+{
+	const NxsDiscreteStateRow & row = GetDiscreteMatrixRow(taxInd);
+	unsigned nAmbig = 0;
+	const NxsDiscreteDatatypeMapper * m;
+	if (charIndices == NULL)
+		{
+		unsigned cInd = 0;
+		for (NxsDiscreteStateRow::const_iterator cellIt = row.begin(); cellIt != row.end(); ++cellIt)
+			{
+			m = GetDatatypeMapperForChar(cInd++);
+			NCL_ASSERT(m);
+			const NxsDiscreteStateCell & c = *cellIt;
+			if (c < 0 || c >= (NxsDiscreteStateCell) m->GetNumStates())
+				{
+				if (countOnlyCompletelyMissing)
+					{
+					if (c == NXS_MISSING_CODE)
+						nAmbig++;
+					}
+				else
+					{
+					if (c != NXS_GAP_STATE_CODE || treatGapsAsMissing)
+						nAmbig++;
+					}
+				}
+			}
+		}
+	else
+		{
+		for (NxsUnsignedSet::const_iterator c = charIndices->begin(); c != charIndices->end(); ++c)
+			{
+			const unsigned cIndex = *c;
+			m = GetDatatypeMapperForChar(cIndex);
+			const NxsDiscreteStateCell & sc = row.at(cIndex);
+			if (sc < 0 || sc >= (NxsDiscreteStateCell) m->GetNumStates())
+				{
+				if (countOnlyCompletelyMissing)
+					{
+					if (sc == NXS_MISSING_CODE)
+						nAmbig++;
+					}
+				else
+					{
+					if (sc != NXS_GAP_STATE_CODE || treatGapsAsMissing)
+						nAmbig++;
+					}
+				}
+			}
+		}
+	return nAmbig;
+}
+
+bool NxsCharactersBlock::FirstTaxonStatesAreSubsetOfSecond(
+  const unsigned firstTaxonInd,
+  const unsigned secondTaxonInd,
+  const NxsUnsignedSet * charIndices,
+  const bool treatAmbigAsMissing,
+  const bool treatGapAsMissing) const
+{
+	const NxsDiscreteStateRow & firstRow = GetDiscreteMatrixRow(firstTaxonInd);
+	const NxsDiscreteStateRow & secondRow = GetDiscreteMatrixRow(secondTaxonInd);
+	const NxsDiscreteDatatypeMapper * m;
+	if (charIndices == NULL)
+		{
+		unsigned cInd = 0;
+		NxsDiscreteStateRow::const_iterator firstIt = firstRow.begin();
+		NxsDiscreteStateRow::const_iterator secondIt = secondRow.begin();
+		for (; firstIt != firstRow.end(); ++firstIt, ++secondIt)
+			{
+			m = GetDatatypeMapperForChar(cInd++);
+			const NxsDiscreteStateCell ns = (NxsDiscreteStateCell) m->GetNumStates();
+			NxsDiscreteStateCell f = *firstIt;
+			NxsDiscreteStateCell s = *secondIt;
+			if (treatAmbigAsMissing)
+				{
+				if (f >= ns)
+					f = NXS_MISSING_CODE;
+				if (s >= ns)
+					s = NXS_MISSING_CODE;
+				}
+			if (!m->FirstIsSubset(f, s, treatGapAsMissing))
+				return false;
+			}
+		}
+	else
+		{
+		for (NxsUnsignedSet::const_iterator c = charIndices->begin(); c != charIndices->end(); ++c)
+			{
+			const unsigned cIndex = *c;
+			m = GetDatatypeMapperForChar(cIndex);
+			const NxsDiscreteStateCell ns = m->GetNumStates();
+			NxsDiscreteStateCell f = firstRow.at(cIndex);
+			NxsDiscreteStateCell s = secondRow.at(cIndex);
+			if (treatAmbigAsMissing)
+				{
+				if (f >= ns)
+					f = NXS_MISSING_CODE;
+				if (s >= ns)
+					s = NXS_MISSING_CODE;
+				}
+			if (!m->FirstIsSubset(f, s, treatGapAsMissing))
+				return false;
+			}
+		}
+	return true;
+}
+
+std::pair<unsigned, unsigned> NxsCharactersBlock::GetPairwiseDist(
+  const unsigned firstTaxonInd,
+  const unsigned secondTaxonInd,
+  const NxsUnsignedSet * charIndices,
+  const bool treatAmbigAsMissing,
+  const bool treatGapAsMissing) const
+{
+	const NxsDiscreteStateRow & firstRow = GetDiscreteMatrixRow(firstTaxonInd);
+	const NxsDiscreteStateRow & secondRow = GetDiscreteMatrixRow(secondTaxonInd);
+	const NxsDiscreteDatatypeMapper * m;
+	unsigned nDiffs = 0;
+	unsigned nSites = 0;
+	if (charIndices == NULL)
+		{
+		unsigned cInd = 0;
+		NxsDiscreteStateRow::const_iterator firstIt = firstRow.begin();
+		NxsDiscreteStateRow::const_iterator secondIt = secondRow.begin();
+		for (; firstIt != firstRow.end(); ++firstIt, ++secondIt)
+			{
+			m = GetDatatypeMapperForChar(cInd++);
+			const NxsDiscreteStateCell ns = m->GetNumStates();
+			NxsDiscreteStateCell f = *firstIt;
+			NxsDiscreteStateCell s = *secondIt;
+			if (treatAmbigAsMissing)
+				{
+				if (f >= ns)
+					f = NXS_MISSING_CODE;
+				if (s >= ns)
+					s = NXS_MISSING_CODE;
+				}
+			if (f < 0 || s < 0)
+				{
+				if (treatGapAsMissing && (f == NXS_GAP_STATE_CODE || s == NXS_GAP_STATE_CODE))
+					continue;
+				if (f == NXS_MISSING_CODE || s == NXS_MISSING_CODE)
+					continue;
+				}
+			nSites++;
+			const std::set<NxsDiscreteStateCell> & ssim = m->GetStateIntersection(f, s);
+			if (!ssim.empty())
+				++nDiffs;
+			}
+		}
+	else
+		{
+		for (NxsUnsignedSet::const_iterator c = charIndices->begin(); c != charIndices->end(); ++c)
+			{
+			m = GetDatatypeMapperForChar(*c);
+			const NxsDiscreteStateCell ns = (NxsDiscreteStateCell) m->GetNumStates();
+			NxsDiscreteStateCell f = firstRow.at(*c);
+			NxsDiscreteStateCell s = secondRow.at(*c);
+			if (treatAmbigAsMissing)
+				{
+				if (f >= ns)
+					f = NXS_MISSING_CODE;
+				if (s >= ns)
+					s = NXS_MISSING_CODE;
+				}
+			if (f < 0 || s < 0)
+				{
+				if (treatGapAsMissing && (f == NXS_GAP_STATE_CODE || s == NXS_GAP_STATE_CODE))
+					continue;
+				if (f == NXS_MISSING_CODE || s == NXS_MISSING_CODE)
+					continue;
+				}
+			nSites++;
+			const std::set<NxsDiscreteStateCell> & ssi = m->GetStateIntersection(f, s);
+			if (!ssi.empty())
+				++nDiffs;
+			}
+		}
+	return std::pair<unsigned, unsigned>(nDiffs, nSites);
+}
+
+
+void NxsDiscreteDatatypeMapper::BuildStateSubsetMatrix() const
+{
+	if (stateIntersectionMatrix.empty())
+		BuildStateIntersectionMatrix();
+	isStateSubsetMatrix.clear();
+	isStateSubsetMatrixGapsMissing.clear();
+	const unsigned nsPlus = (unsigned)stateSetsVec.size();
+	IsStateSubsetRow r(nsPlus, false);
+	isStateSubsetMatrix.assign(nsPlus, r);
+	isStateSubsetMatrixGapsMissing.assign(nsPlus, r);
+	for (unsigned i = 0; i < nsPlus; ++i)
+		{
+		for (unsigned j = 0; j < nsPlus; ++j)
+			{
+			if (!stateIntersectionMatrix[i][j].empty())
+				{
+				isStateSubsetMatrix[i][j] = true;
+				isStateSubsetMatrixGapsMissing[i][j] = true;
+				}
+			}
+		}
+	isStateSubsetMatrixGapsMissing[0][1] = true;
+	isStateSubsetMatrixGapsMissing[1][0] = true;
+}
+
+void NxsDiscreteDatatypeMapper::BuildStateIntersectionMatrix() const
+{
+	const std::set<NxsDiscreteStateCell> emptySet;
+
+	stateIntersectionMatrix.clear();
+
+	const unsigned nsPlus = (unsigned const)stateSetsVec.size();
+	const unsigned offset = (unsigned)(sclOffset + 2);
+	StateIntersectionRow emptyRow(nsPlus, emptySet);
+	stateIntersectionMatrix.assign(nsPlus, emptyRow);
+	for (unsigned i = offset; i < nsPlus; ++i)
+		{
+		for (unsigned j = i; j < nsPlus; ++j)
+			{
+			const unsigned offi = i + sclOffset;
+			const unsigned offj = j + sclOffset;
+			std::set<NxsDiscreteStateCell> intersect;
+			const std::set<NxsDiscreteStateCell>	&fs =  GetStateSetForCode(offi);
+			const std::set<NxsDiscreteStateCell>	&ss =  GetStateSetForCode(offj);
+			set_intersection(fs.begin(), fs.end(), ss.begin(), ss.end(), inserter(intersect, intersect.begin()));
+			stateIntersectionMatrix[i - NXS_GAP_STATE_CODE][j - NXS_GAP_STATE_CODE] = intersect;
+			if (i != j)
+				stateIntersectionMatrix[j - NXS_GAP_STATE_CODE][i - NXS_GAP_STATE_CODE] = stateIntersectionMatrix[i - NXS_GAP_STATE_CODE][j - NXS_GAP_STATE_CODE];
+			}
+		}
+
+	std::set<NxsDiscreteStateCell> tmpSet;
+	NCL_ASSERT(1 == NXS_MISSING_CODE - NXS_GAP_STATE_CODE);
+	tmpSet.insert(NXS_GAP_STATE_CODE);
+	stateIntersectionMatrix[0][0] = tmpSet;
+
+	tmpSet.clear();
+	tmpSet.insert(NXS_MISSING_CODE);
+	stateIntersectionMatrix[1][1] = tmpSet;
+	for (unsigned i = offset; i < nsPlus; ++i)
+		{
+		const unsigned offi = i + sclOffset;
+		stateIntersectionMatrix[1][i - NXS_GAP_STATE_CODE] = GetStateSetForCode(offi);
+		}
+}
+
+
+NxsGeneticCodesEnum geneticCodeNameToEnum(std::string n)
+{
+	NxsString::to_lower(n);
+	if (n == "standard")
+		return NXS_GCODE_STANDARD;
+	if (n == "vertmito")
+		return NXS_GCODE_VERT_MITO;
+	if (n == "yeastmito")
+		return NXS_GCODE_YEAST_MITO;
+	if (n == "moldmito")
+		return NXS_GCODE_MOLD_MITO;
+	if (n == "invertmito")
+		return NXS_GCODE_INVERT_MITO;
+	if (n == "ciliate")
+		return NXS_GCODE_CILIATE;
+	if (n == "echinomito")
+		return NXS_GCODE_ECHINO_MITO;
+	if (n == "euplotid")
+		return NXS_GCODE_EUPLOTID;
+	if (n == "plantplastid")
+		return NXS_GCODE_PLANT_PLASTID;
+	if (n == "altyeast")
+		return NXS_GCODE_ALT_YEAST;
+	if (n == "ascidianmito")
+		return NXS_GCODE_ASCIDIAN_MITO;
+	if (n == "altflatwormmito")
+		return NXS_GCODE_ALT_FLATWORM_MITO;
+	if (n == "blepharismamacro")
+		return NXS_GCODE_BLEPHARISMA_MACRO;
+	if (n == "chlorophyceanmito")
+		return NXS_GCODE_CHLOROPHYCEAN_MITO;
+	if (n == "trematodemito")
+		return NXS_GCODE_TREMATODE_MITO;
+	if (n == "scenedesmusmito")
+		return NXS_GCODE_SCENEDESMUS_MITO;
+	if (n == "thraustochytriummito")
+		return NXS_GCODE_THRAUSTOCHYTRIUM_MITO;
+	NxsString err = "Unrecognized genetic code name: ";
+	err << n;
+	throw NxsException(err);
+}
+
+std::string geneticCodeEnumToName(NxsGeneticCodesEnum n)
+{
+	if (n == NXS_GCODE_STANDARD)
+		return "Standard";
+	if (n == NXS_GCODE_VERT_MITO)
+		return "VertMito";
+	if (n == NXS_GCODE_YEAST_MITO)
+		return "YeastMito";
+	if (n == NXS_GCODE_MOLD_MITO)
+		return "MoldMito";
+	if (n == NXS_GCODE_INVERT_MITO)
+		return "InvertMito";
+	if (n == NXS_GCODE_CILIATE)
+		return "Ciliate";
+	if (n == NXS_GCODE_ECHINO_MITO)
+		return "EchinoMito";
+	if (n == NXS_GCODE_EUPLOTID)
+		return "Euplotid";
+	if (n == NXS_GCODE_PLANT_PLASTID)
+		return "PlantPlastid";
+	if (n == NXS_GCODE_ALT_YEAST)
+		return "AltYeast";
+	if (n == NXS_GCODE_ASCIDIAN_MITO)
+		return "AscidianMito";
+	if (n == NXS_GCODE_ALT_FLATWORM_MITO)
+		return "AltFlatwormMito";
+	if (n == NXS_GCODE_BLEPHARISMA_MACRO)
+		return "BlepharismaMacro";
+	if (n == NXS_GCODE_CHLOROPHYCEAN_MITO)
+		return "ChlorophyceanMito";
+	if (n == NXS_GCODE_TREMATODE_MITO)
+		return "Trematodemito";
+	if (n == NXS_GCODE_SCENEDESMUS_MITO)
+		return "ScenedesmusMito";
+	if (n == NXS_GCODE_THRAUSTOCHYTRIUM_MITO)
+		return "ThraustochytriumMito";
+	NxsString err = "Unrecognized genetic code enumeration: ";
+	err << n;
+	throw NxsException(err);
+}
+
+std::vector<std::string> getGeneticCodeNames()
+{
+	std::vector<std::string> n(NXS_GCODE_CODE_ENUM_SIZE);
+	n[NXS_GCODE_STANDARD] = "Standard" ;
+	n[NXS_GCODE_VERT_MITO] = "VertMito" ;
+	n[NXS_GCODE_YEAST_MITO] = "YeastMito" ;
+	n[NXS_GCODE_MOLD_MITO] = "MoldMito" ;
+	n[NXS_GCODE_INVERT_MITO] = "InvertMito" ;
+	n[NXS_GCODE_CILIATE] = "Ciliate" ;
+	n[NXS_GCODE_ECHINO_MITO] = "EchinoMito" ;
+	n[NXS_GCODE_EUPLOTID] = "Euplotid" ;
+	n[NXS_GCODE_PLANT_PLASTID] = "PlantPlastid" ;
+	n[NXS_GCODE_ALT_YEAST] = "AltYeast" ;
+	n[NXS_GCODE_ASCIDIAN_MITO] = "AscidianMito" ;
+	n[NXS_GCODE_ALT_FLATWORM_MITO] = "AltFlatwormMito" ;
+	n[NXS_GCODE_BLEPHARISMA_MACRO] = "BlepharismaMacro" ;
+	n[NXS_GCODE_CHLOROPHYCEAN_MITO] = "ChlorophyceanMito" ;
+	n[NXS_GCODE_TREMATODE_MITO] = "TrematodeMito" ;
+	n[NXS_GCODE_SCENEDESMUS_MITO] = "ScenedesmusMito" ;
+	n[NXS_GCODE_THRAUSTOCHYTRIUM_MITO] = "ThraustochytriumMito" ;
+	return n;
+}
+
+
+
+/*
+  code index 0 => "Standard"
+  code index 1 => "Vertebrate Mitochondrial"
+  code index 2 => "Yeast Mitochondrial"
+  code index 3 => "Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma"
+  code index 4 => "Invertebrate Mitochondrial"
+  code index 5 => "Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear"
+  code index 8 => "Echinoderm Mitochondrial; Flatworm Mitochondrial"
+  code index 9 => "Euplotid Nuclear"
+  code index 10 => "Bacterial and Plant Plastid"
+  code index 11 => "Alternative Yeast Nuclear"
+  code index 12 => "Ascidian Mitochondrial"
+  code index 13 => "Alternative Flatworm Mitochondrial"
+  code index 14 => "Blepharisma Macronuclear"
+  code index 15 => "Chlorophycean Mitochondrial"
+  code index 20 => "Trematode Mitochondrial"
+  code index 21 => "Scenedesmus obliquus Mitochondrial"
+  code index 22 => "Thraustochytrium Mitochondrial"
+*/
+std::string getGeneticCodeAAOrder(NxsGeneticCodesEnum codeIndex)
+{
+	std::vector<std::string> code(NXS_GCODE_CODE_ENUM_SIZE);
+	code[NXS_GCODE_STANDARD] =  "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF";
+	code[NXS_GCODE_VERT_MITO] = "KNKNTTTT*S*SMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
+	code[NXS_GCODE_YEAST_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
+	code[NXS_GCODE_MOLD_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
+	code[NXS_GCODE_INVERT_MITO] = "KNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
+	code[NXS_GCODE_CILIATE] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSS*CWCLFLF";
+	code[NXS_GCODE_ECHINO_MITO] = "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
+	code[NXS_GCODE_EUPLOTID] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSCCWCLFLF";
+	code[NXS_GCODE_PLANT_PLASTID] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF";
+	code[NXS_GCODE_ALT_YEAST] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLSLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF";
+	code[NXS_GCODE_ASCIDIAN_MITO] = "KNKNTTTTGSGSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
+	code[NXS_GCODE_ALT_FLATWORM_MITO] = "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYY*YSSSSWCWCLFLF";
+	code[NXS_GCODE_BLEPHARISMA_MACRO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YQYSSSS*CWCLFLF";
+	code[NXS_GCODE_CHLOROPHYCEAN_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YLYSSSS*CWCLFLF";
+	code[NXS_GCODE_TREMATODE_MITO] = "NNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF";
+	code[NXS_GCODE_SCENEDESMUS_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YLY*SSS*CWCLFLF";
+	code[NXS_GCODE_THRAUSTOCHYTRIUM_MITO] = "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWC*FLF";
+	int c = (int) codeIndex;
+	return code.at(c);
+}
+
+
+
+std::vector<NxsDiscreteStateCell> getGeneticCodeIndicesAAOrder(const NxsGeneticCodesEnum codeIndex);
+
+
+std::vector<NxsDiscreteStateCell> getGeneticCodeIndicesAAOrder(const NxsGeneticCodesEnum codeIndex)
+{
+	std::vector<NxsDiscreteStateCell> aaInd(64);
+	aaInd[0] = 8;
+	aaInd[1] = 11;
+	aaInd[2] = 8;
+	aaInd[3] = 11;
+	aaInd[4] = 16;
+	aaInd[5] = 16;
+	aaInd[6] = 16;
+	aaInd[7] = 16;
+	aaInd[8] = 14;
+	aaInd[9] = 15;
+	aaInd[10] = 14;
+	aaInd[11] = 15;
+	aaInd[12] = 7;
+	aaInd[13] = 7;
+	aaInd[14] = 10;
+	aaInd[15] = 7;
+	aaInd[16] = 13;
+	aaInd[17] = 6;
+	aaInd[18] = 13;
+	aaInd[19] = 6;
+	aaInd[20] = 12;
+	aaInd[21] = 12;
+	aaInd[22] = 12;
+	aaInd[23] = 12;
+	aaInd[24] = 14;
+	aaInd[25] = 14;
+	aaInd[26] = 14;
+	aaInd[27] = 14;
+	aaInd[28] = 9;
+	aaInd[29] = 9;
+	aaInd[30] = 9;
+	aaInd[31] = 9;
+	aaInd[32] = 3;
+	aaInd[33] = 2;
+	aaInd[34] = 3;
+	aaInd[35] = 2;
+	aaInd[36] = 0;
+	aaInd[37] = 0;
+	aaInd[38] = 0;
+	aaInd[39] = 0;
+	aaInd[40] = 5;
+	aaInd[41] = 5;
+	aaInd[42] = 5;
+	aaInd[43] = 5;
+	aaInd[44] = 17;
+	aaInd[45] = 17;
+	aaInd[46] = 17;
+	aaInd[47] = 17;
+	aaInd[48] = 20;
+	aaInd[49] = 19;
+	aaInd[50] = 20;
+	aaInd[51] = 19;
+	aaInd[52] = 15;
+	aaInd[53] = 15;
+	aaInd[54] = 15;
+	aaInd[55] = 15;
+	aaInd[56] = 20;
+	aaInd[57] = 1;
+	aaInd[58] = 18;
+	aaInd[59] = 1;
+	aaInd[60] = 9;
+	aaInd[61] = 4;
+	aaInd[62] = 9;
+	aaInd[63] = 4;
+	if (codeIndex == NXS_GCODE_VERT_MITO) {
+		aaInd[8] = 20;
+		aaInd[10] = 20;
+		aaInd[12] = 10;
+		aaInd[56] = 18;
+	}
+	else if (codeIndex == NXS_GCODE_YEAST_MITO) {
+		aaInd[56] = 18;
+	}
+	else if (codeIndex == NXS_GCODE_MOLD_MITO) {
+		aaInd[56] = 18;
+	}
+	else if (codeIndex == NXS_GCODE_INVERT_MITO) {
+		aaInd[8] = 15;
+		aaInd[10] = 15;
+		aaInd[12] = 10;
+		aaInd[56] = 18;
+	}
+	else if (codeIndex == NXS_GCODE_CILIATE) {
+		aaInd[48] = 13;
+		aaInd[50] = 13;
+	}
+	else if (codeIndex == NXS_GCODE_ECHINO_MITO) {
+		aaInd[0] = 11;
+		aaInd[8] = 15;
+		aaInd[10] = 15;
+		aaInd[56] = 18;
+	}
+	else if (codeIndex == NXS_GCODE_EUPLOTID) {
+		aaInd[56] = 1;
+	}
+	else if (codeIndex == NXS_GCODE_ALT_YEAST) {
+		aaInd[30] = 15;
+	}
+	else if (codeIndex == NXS_GCODE_ASCIDIAN_MITO) {
+		aaInd[8] = 5;
+		aaInd[10] = 5;
+		aaInd[12] = 10;
+		aaInd[56] = 18;
+	}
+	else if (codeIndex == NXS_GCODE_ALT_FLATWORM_MITO) {
+		aaInd[0] = 11;
+		aaInd[8] = 15;
+		aaInd[10] = 15;
+		aaInd[48] = 19;
+		aaInd[56] = 18;
+	}
+	else if (codeIndex == NXS_GCODE_BLEPHARISMA_MACRO) {
+		aaInd[50] = 13;
+	}
+	else if (codeIndex == NXS_GCODE_CHLOROPHYCEAN_MITO) {
+		aaInd[50] = 9;
+	}
+	else if (codeIndex == NXS_GCODE_TREMATODE_MITO) {
+		aaInd[0] = 11;
+		aaInd[8] = 15;
+		aaInd[10] = 15;
+		aaInd[12] = 10;
+		aaInd[56] = 18;
+	}
+	else if (codeIndex == NXS_GCODE_SCENEDESMUS_MITO) {
+		aaInd[50] = 9;
+		aaInd[52] = 20;
+	}
+	else if (codeIndex == NXS_GCODE_THRAUSTOCHYTRIUM_MITO) {
+		aaInd[60] = 20;
+	}
+	return aaInd;
+}
+
+
+void NxsCharactersBlock::CodonPosPartitionToPosList(const NxsPartition &codonPos, std::list<int> * charIndices)
+{
+	if (charIndices == 0L)
+		return;
+	const NxsUnsignedSet * firstPos = 0L;
+	const NxsUnsignedSet * secondPos = 0L;
+	const NxsUnsignedSet * thirdPos = 0L;
+	for (NxsPartition::const_iterator pIt = codonPos.begin(); pIt != codonPos.end(); ++pIt)
+		{
+		if (pIt->first == "1")
+			{
+			NCL_ASSERT(firstPos == 0L);
+			firstPos = &(pIt->second);
+			}
+		else if (pIt->first == "2")
+			{
+			NCL_ASSERT(secondPos == 0L);
+			secondPos = &(pIt->second);
+			}
+		else if (pIt->first == "3")
+			{
+			NCL_ASSERT(thirdPos == 0L);
+			thirdPos = &(pIt->second);
+			}
+		}
+	if (firstPos == 0L || secondPos == 0L || thirdPos == 0L)
+		throw NxsException("Expecting partition subsets named 1, 2, and 3");
+	if (firstPos->size() != secondPos->size() || firstPos->size() != thirdPos->size())
+		throw NxsException("Expecting the partition subsets named 1, 2, and 3 to have the same size");
+	NxsUnsignedSet::const_iterator fIt = firstPos->begin();
+	NxsUnsignedSet::const_iterator sIt = secondPos->begin();
+	NxsUnsignedSet::const_iterator thIt = thirdPos->begin();
+	const NxsUnsignedSet::const_iterator endIt = firstPos->end();
+	for (; fIt != endIt; ++fIt, ++sIt, ++thIt)
+		{
+		charIndices->push_back(*fIt);
+		charIndices->push_back(*sIt);
+		charIndices->push_back(*thIt);
+		}
+}
+
+/* allocates a new charaters block with amino acids for the codons in the characters block (which should have datatype = codon).
+*/
+NxsCharactersBlock * NxsCharactersBlock::NewProteinCharactersBlock(
+  const NxsCharactersBlock * codonBlock,
+  bool mapPartialAmbigToUnknown,
+  bool gapToUnknown,
+  NxsGeneticCodesEnum codeIndex)
+{
+	std::vector<NxsDiscreteStateCell> aas = getGeneticCodeIndicesAAOrder(codeIndex);
+	return NxsCharactersBlock::NewProteinCharactersBlock(codonBlock, mapPartialAmbigToUnknown, gapToUnknown, aas);
+}
+
+
+/* allocates a new charaters block with amino acids for the codons in the characters block (which should have datatype = codon).
+
+*/
+NxsCharactersBlock * NxsCharactersBlock::NewProteinCharactersBlock(
+  const NxsCharactersBlock * codonBlock,
+  bool mapPartialAmbigToUnknown,
+  bool gapToUnknown,
+  const std::vector<NxsDiscreteStateCell> & aaIndices) /** the index of the amino acid symbols for the codon (where the order of codons is alphabetical: AAA, AAC, AAG, AAT, ACA, ...TTT **/
+{
+	if (!codonBlock)
+		return NULL;
+	if (codonBlock->GetDataType() != NxsCharactersBlock::codon)
+		throw NxsException("NewProteinCharactersBlock must be called with a block of codon datatype");
+	const unsigned nc = codonBlock->GetNCharTotal();
+
+	/* create a new characters block with the same TAXA, but no ASSUMPTIONS block */
+	NxsTaxaBlockAPI * taxa = codonBlock->GetTaxaBlockPtr(NULL);
+	NxsCharactersBlock * aaBlock = new NxsCharactersBlock(taxa, NULL);
+	aaBlock->SetNChar(nc);
+	aaBlock->SetNTax(codonBlock->GetNTaxWithData());
+	aaBlock->missing = codonBlock->missing;
+	aaBlock->gap = (gapToUnknown ? '\0' : codonBlock->gap);
+	aaBlock->gapMode = codonBlock->gapMode;
+	aaBlock->datatype = NxsCharactersBlock::protein;
+	aaBlock->originalDatatype = codonBlock->originalDatatype;
+	aaBlock->ResetSymbols();
+	aaBlock->tokens = false;
+
+
+	NxsPartition dummy;
+	std::vector<DataTypesEnum> dummyVec;
+	aaBlock->CreateDatatypeMapperObjects(dummy, dummyVec);
+	const NxsDiscreteDatatypeMapper * codonMapper = codonBlock->GetDatatypeMapperForChar(0);
+	NxsDiscreteDatatypeMapper * aaMapper = aaBlock->GetMutableDatatypeMapperForChar(0);
+	aaMapper->geneticCode = codonMapper->geneticCode;
+
+	const unsigned ntax = (taxa == 0L ? codonBlock->GetNTaxWithData() : taxa->GetNTax());
+	aaBlock->datatypeReadFromFormat = false;
+	aaBlock->statesFormat = STATES_PRESENT;
+	aaBlock->restrictionDataype = false;
+	aaBlock->supportMixedDatatype = false;
+	aaBlock->convertAugmentedToMixed = false;
+	aaBlock->writeInterleaveLen = INT_MAX;
+
+
+	NxsDiscreteStateRow matRow(nc, 0);
+	aaBlock->discreteMatrix.assign(ntax, matRow);
+	if (mapPartialAmbigToUnknown && (gapToUnknown || codonBlock->GetGapSymbol() != '\0'))
+		{
+		for (unsigned taxInd = 0; taxInd < ntax; ++taxInd)
+			{
+			const NxsDiscreteStateRow & sourceRow = codonBlock->discreteMatrix.at(taxInd);
+			NxsDiscreteStateRow & destRow = aaBlock->discreteMatrix.at(taxInd);
+			for (unsigned c = 0; c < nc ; ++c)
+				{
+				const NxsDiscreteStateCell codon = sourceRow[c];
+				if (codon < 0 || codon > 63)
+					destRow[c] = NXS_MISSING_CODE;
+				else
+					destRow[c] = aaIndices.at(codon);
+				}
+			}
+		}
+	else
+		{
+		throw NxsException("NewProteinCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
+		}
+	return aaBlock;
+}
+
+
+/* allocates a new charaters block with all of the active characters in `charBlock`
+	but with a 64-state codon datatype. The order of codons is:
+	 0   1   2   3   4   5  ... 63
+	AAA AAC AAG AAT ACA ACC ... TTT
+	The caller is responsible for deleting the new NxsCharactersBlock object
+
+	If honorCharActive is true, then inactive characters are simply skipped in the reading
+	frame (treated as if they were introns) rather than being treated as missing.
+*/
+NxsCharactersBlock * NxsCharactersBlock::NewCodonsCharactersBlock(
+  const NxsCharactersBlock * dnaBlock,
+  bool mapPartialAmbigToUnknown,
+  bool gapsToUnknown,
+  bool honorCharActive,
+  const std::list<int> * charIndices,
+  NxsCharactersBlock ** spareNucs)
+{
+	if (!dnaBlock)
+		return NULL;
+	DataTypesEnum nucType = dnaBlock->GetDataType();
+	if (nucType != NxsCharactersBlock::dna && nucType != NxsCharactersBlock::rna && nucType != NxsCharactersBlock::nucleotide)
+		return NULL;
+	std::list<int> charInds;
+	const std::list<int> * sourceChars;
+	std::list<int> culled;
+	NxsUnsignedSet untranslated;
+
+
+
+	unsigned nc = dnaBlock->GetNCharTotal();
+
+	if (charIndices == NULL)
+		{
+		for (unsigned i = 0; i < nc; ++i)
+			charInds.push_back((int)i);
+		sourceChars = &charInds;
+		}
+	else
+		sourceChars = charIndices;
+
+	if (honorCharActive)
+		{
+		for (std::list<int>::const_iterator cIt = sourceChars->begin(); cIt != sourceChars->end(); ++cIt)
+			{
+			const int c = *cIt;
+			if (c < 0 || dnaBlock->IsActiveChar((unsigned) c))
+				culled.push_back(c);
+			}
+		if (spareNucs)
+			{
+			for (unsigned c = 0; c < nc; ++c)
+				{
+				if (dnaBlock->IsActiveChar((unsigned) c))
+					untranslated.insert(c);
+				}
+			}
+		sourceChars = &culled;
+		}
+	else if (spareNucs)
+		{
+		for (unsigned c = 0; c < nc; ++c)
+			untranslated.insert(c);
+		}
+
+	const unsigned nnucs = (const unsigned)sourceChars->size();
+	if (nnucs % 3)
+		throw NxsException("Cannot create a codons block with a number of characters that is not a multiple of 3");
+	const unsigned ncodons = nnucs/3;
+
+	/* create a new characters block with the same TAXA, but no ASSUMPTIONS block */
+	NxsTaxaBlockAPI * taxa = dnaBlock->GetTaxaBlockPtr(NULL);
+	NxsCharactersBlock * codonsBlock = new NxsCharactersBlock(taxa, NULL);
+	codonsBlock->SetNChar(ncodons);
+	codonsBlock->SetNTax(dnaBlock->GetNTaxWithData());
+	codonsBlock->missing = dnaBlock->missing;
+	codonsBlock->gap = (gapsToUnknown ? '\0' : dnaBlock->gap);
+	codonsBlock->gapMode = dnaBlock->gapMode;
+	codonsBlock->symbols.assign(64, '\0');
+	codonsBlock->tokens = false;
+	const char * gsl[] = {"AAA",  "AAC",  "AAG",  "AAT",  "ACA",  "ACC",  "ACG",  "ACT",  "AGA",  "AGC",  "AGG",  "AGT",  "ATA",  "ATC",  "ATG",  "ATT",  "CAA",  "CAC",  "CAG",  "CAT",  "CCA",  "CCC",  "CCG",  "CCT",  "CGA",  "CGC",  "CGG",  "CGT",  "CTA",  "CTC",  "CTG",  "CTT",  "GAA",  "GAC",  "GAG",  "GAT",  "GCA",  "GCC",  "GCG",  "GCT",  "GGA",  "GGC",  "GGG",  "GGT",  "GTA",  "GTC",  "GTG",  "GTT",  "TAA",  "TAC",  "TAG",  "TAT",  "TCA",  "TCC",  "TCG",  "TCT",  "TGA",  "TGC",  "TGG" [...]
+
+	codonsBlock->globalStateLabels.reserve(64);
+	for (unsigned i = 0 ; i < 64; ++i)
+		codonsBlock->globalStateLabels.push_back(NxsString(gsl[i]));
+
+	/* equivalent of HandleFormat */
+	codonsBlock->datatype = NxsCharactersBlock::codon;
+	codonsBlock->originalDatatype = nucType;
+
+	const NxsPartition dummy;
+	const std::vector<DataTypesEnum> dummyVec;
+	codonsBlock->CreateDatatypeMapperObjects(dummy, dummyVec);
+	NxsDiscreteDatatypeMapper * codonMapper = codonsBlock->GetMutableDatatypeMapperForChar(0);
+	codonMapper->geneticCode = NXS_GCODE_NO_CODE;
+
+	const unsigned ntax = (taxa == 0L ? dnaBlock->GetNTaxWithData() : taxa->GetNTax());
+	codonsBlock->datatypeReadFromFormat = false;
+	codonsBlock->statesFormat = STATES_PRESENT;
+	codonsBlock->restrictionDataype = false;
+	codonsBlock->supportMixedDatatype = false;
+	codonsBlock->convertAugmentedToMixed = false;
+	codonsBlock->writeInterleaveLen = INT_MAX;
+
+
+	const int maxUnambigNucState = 3;
+	const NxsDiscreteStateCell codonMissingState = NXS_MISSING_CODE;
+	NxsDiscreteStateRow matRow(ncodons, 0);
+	codonsBlock->discreteMatrix.assign(ntax, matRow);
+	const std::list<int>::const_iterator endNucIt = sourceChars->end();
+	if (mapPartialAmbigToUnknown && (gapsToUnknown || dnaBlock->GetGapSymbol() != '\0'))
+		{
+		for (unsigned taxInd = 0; taxInd < ntax; ++taxInd)
+			{
+			std::list<int>::const_iterator nucIt = sourceChars->begin();
+			const NxsDiscreteStateRow & sourceRow = dnaBlock->discreteMatrix.at(taxInd);
+			NxsDiscreteStateRow & destRow = codonsBlock->discreteMatrix.at(taxInd);
+			for (unsigned codonInd = 0; codonInd < ncodons ; ++codonInd)
+				{
+				NCL_ASSERT(nucIt != endNucIt);
+				const int fInd = *nucIt++;
+				NCL_ASSERT(nucIt != endNucIt);
+				const int sInd = *nucIt++;
+				NCL_ASSERT(nucIt != endNucIt);
+				const int tInd = *nucIt++;
+				if (spareNucs)
+					{
+					untranslated.erase(fInd);
+					untranslated.erase(sInd);
+					untranslated.erase(tInd);
+					}
+				if (fInd < 0 || sInd < 0 || tInd < 0)
+					destRow[codonInd] = codonMissingState;
+				else
+					{
+					const NxsDiscreteStateCell fb = sourceRow[fInd];
+					const NxsDiscreteStateCell sb = sourceRow[sInd];
+					const NxsDiscreteStateCell tb = sourceRow[tInd];
+					if (fb < 0 || sb < 0 || tb < 0 || fb > maxUnambigNucState || sb > maxUnambigNucState || tb > maxUnambigNucState)
+						destRow[codonInd] = codonMissingState;
+					else
+						destRow[codonInd] = 16*fb + 4*sb + tb;
+					}
+				}
+			}
+		}
+	else
+		{
+		throw NxsException("NewCodonsCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
+		}
+	if (!untranslated.empty())
+		{
+		const unsigned nunt = (const unsigned)untranslated.size();
+
+		NxsCharactersBlock * untBlock = new NxsCharactersBlock(taxa, NULL);
+		untBlock->SetNChar(nunt);
+		untBlock->SetNTax(ntax);
+		untBlock->missing = dnaBlock->missing;
+		untBlock->gap = (gapsToUnknown ? '\0' : dnaBlock->gap);
+		untBlock->gapMode = dnaBlock->gapMode;
+		untBlock->datatype = nucType;
+		untBlock->originalDatatype = dnaBlock->originalDatatype;
+		untBlock->ResetSymbols();
+		untBlock->tokens = false;
+
+
+		untBlock->CreateDatatypeMapperObjects(dummy, dummyVec);
+		untBlock->datatypeReadFromFormat = false;
+		untBlock->statesFormat = STATES_PRESENT;
+		untBlock->restrictionDataype = false;
+		untBlock->supportMixedDatatype = false;
+		untBlock->convertAugmentedToMixed = false;
+		untBlock->writeInterleaveLen = INT_MAX;
+
+
+		NxsDiscreteStateRow umatRow(nunt, 0);
+		untBlock->discreteMatrix.assign(ntax, umatRow);
+		if (mapPartialAmbigToUnknown && (gapsToUnknown || dnaBlock->GetGapSymbol() != '\0'))
+			{
+			for (unsigned taxInd = 0; taxInd < ntax; ++taxInd)
+				{
+				const NxsDiscreteStateRow & sourceRow = dnaBlock->discreteMatrix.at(taxInd);
+				NxsDiscreteStateRow & destRow = untBlock->discreteMatrix.at(taxInd);
+				unsigned untIndex = 0;
+				for (NxsUnsignedSet::const_iterator uIt  = untranslated.begin(); uIt != untranslated.end() ; ++uIt, ++untIndex)
+					{
+					const unsigned ind = *uIt;
+					destRow.at(untIndex) = sourceRow[ind];
+					}
+				}
+			}
+		else
+			{
+			throw NxsException("NewProteinCharactersBlock is not implemented for cases in which you are not mapping any ambiguity to the missing state code.");
+			}
+		*spareNucs = untBlock;
+		}
+	else if (spareNucs)
+		*spareNucs = NULL;
+	return codonsBlock;
+}
+
+
+std::vector<double>  NxsTransformationManager::GetDoubleWeights(const std::string &set_name) const
+	{
+	std::vector<double> r;
+	const ListOfDblWeights *p = 0L;
+	std::map<std::string, ListOfDblWeights>::const_iterator dIt = dblWtSets.begin();
+	for (; dIt != dblWtSets.end(); ++dIt)
+		{
+		if (NxsString::case_insensitive_equals(dIt->first.c_str(), set_name.c_str()))
+			{
+			p = &(dIt->second);
+			break;
+			}
+		}
+	if (p)
+		{
+		ListOfDblWeights::const_iterator wIt = p->begin();
+		const ListOfDblWeights::const_iterator ewIt = p->end();
+		for (; wIt != ewIt; ++wIt)
+			{
+			double w = wIt->first;
+			const std::set<unsigned> &s = wIt->second;
+			std::set<unsigned>::const_reverse_iterator ip = s.rbegin();
+			const std::set<unsigned>::const_reverse_iterator e = s.rend();
+			for (; ip != e; ++ip)
+				{
+				if (*ip >= r.size())
+					r.resize(1 + *ip, 1.0);
+				r[*ip] = w;
+				}
+			}
+		}
+	return r;
+	}
+
+std::vector<int> NxsTransformationManager::GetIntWeights(const std::string &set_name) const
+	{
+	std::vector<int> r;
+	const ListOfIntWeights *p = 0L;
+	std::map<std::string, ListOfIntWeights>::const_iterator dIt = intWtSets.begin();
+	for (; dIt != intWtSets.end(); ++dIt)
+		{
+		if (NxsString::case_insensitive_equals(dIt->first.c_str(), set_name.c_str()))
+			{
+			p = &(dIt->second);
+			break;
+			}
+		}
+	if (p)
+		{
+		ListOfIntWeights::const_iterator wIt = p->begin();
+		const ListOfIntWeights::const_iterator ewIt = p->end();
+		for (; wIt != ewIt; ++wIt)
+			{
+			int w = wIt->first;
+			const std::set<unsigned> &s = wIt->second;
+			std::set<unsigned>::const_reverse_iterator ip = s.rbegin();
+			const std::set<unsigned>::const_reverse_iterator e = s.rend();
+			for (; ip != e; ++ip)
+				{
+				if (*ip >= r.size())
+					r.resize(1 + *ip, 1);
+				r[*ip] = w;
+				}
+			}
+		}
+	return r;
+	}
+
+/*! creates a datatype mapper from the parsing information (this is the ctor used
+		most frequently during a parse).
+*/
+NxsDiscreteDatatypeMapper::NxsDiscreteDatatypeMapper(
+	NxsCharactersBlock::DataTypesEnum datatypeE,
+	const std::string & symbolsStr,
+	char missingChar,
+	char gap,
+	char matchingChar,
+	bool respectingCase,
+	const std::map<char, NxsString> & moreEquates)
+	:geneticCode(NXS_GCODE_NO_CODE),
+	cLookup(NULL),
+	stateCodeLookupPtr(NULL),
+	symbols(symbolsStr),
+	nStates(0),
+	matchChar(matchingChar),
+	gapChar(gap),
+	missing(missingChar),
+	respectCase(respectingCase),
+	extraEquates(moreEquates),
+	datatype(datatypeE),
+	restrictionDataype(false),
+	userDefinedEquatesBeforeConversion(false)
+	{
+	if (symbols.empty())
+		symbols = NxsCharactersBlock::GetDefaultSymbolsForType(datatype);
+	if (datatype == NxsCharactersBlock::mixed)
+		throw NxsException("Cannot create a mixed datatype mapper"); // this should be the only empty string-generating datatype
+	RefreshMappings(0L);
+	}
+
+void NxsDiscreteDatatypeMapper::DebugPrint(std::ostream & out) const
+	{
+	out << GetNumStatesIncludingGap() << "states (";
+	if (gapChar == '\0')
+		out << "no gaps";
+	else
+		out << "including the gap \"state\"";
+	const int nsc = (int) stateSetsVec.size();
+	out << '\n' << nsc << " state codes.\n";
+	out << "NEXUS     State Code      States\n";
+	for (int sc = sclOffset; sc < sclOffset + nsc; ++sc)
+		{
+		std::string nex;
+		for (int c = 0; c < 127; ++c)
+			{
+			if (cLookup[c] == sc)
+				nex.append(1, (char) c);
+			}
+		int buf =  (int) (10 - nex.size());
+		nex.append(buf, ' ');
+		out << nex << "    " << sc << "     ";
+		const std::set<NxsDiscreteStateCell>	&ss = GetStateSetForCode(sc);
+		std::string decoded;
+		for (std::set<NxsDiscreteStateCell>::const_iterator s = ss.begin(); s != ss.end(); ++s)
+			decoded.append(StateCodeToNexusString(*s));
+		if (decoded.length() < 2)
+			out << decoded;
+		else if (IsPolymorphic(sc))
+			out << '(' << decoded << ')';
+		else
+			out << '{' << decoded << '}';
+		out << '\n';
+		}
+	}
+
+/*!
+	Takes the parsed settings that pertain to the datatype and converts them into a set of NxsDiscreteDatatypeMapper
+	objects to be used to encode the characters.
+*/
+void NxsCharactersBlock::CreateDatatypeMapperObjects(const NxsPartition & dtParts, const std::vector<DataTypesEnum> & dtcodes)
+	{
+	try {
+		mixedTypeMapping.clear();
+		if (datatype != mixed)
+			{
+			NxsDiscreteDatatypeMapper d(datatype, symbols, missing, gap, matchchar, respectingCase, userEquates);
+			datatype = d.GetDatatype();
+			DatatypeMapperAndIndexSet das(d, NxsUnsignedSet());
+			datatypeMapperVec.clear();
+			datatypeMapperVec.push_back(das);
+			}
+		else
+			{
+			datatypeMapperVec.clear();
+			NCL_ASSERT(dtParts.size() == dtcodes.size());
+			datatypeMapperVec.reserve(dtParts.size());
+			std::vector<DataTypesEnum>::const_iterator cIt = dtcodes.begin();
+			//@@@TMP add code to fill  DataTypesEnum -> NxsUnsignedSet map  here ! for DZ and DS.
+			for (NxsPartition::const_iterator pIt = dtParts.begin(); pIt != dtParts.end(); ++pIt, ++cIt)
+				{
+				std::string mt;
+				if (*cIt == standard)
+					mt.assign("0123456789"); /*mrbayes is the only program to support MIXED and it uses a default (not extendable) symbols list of 0123456789 rather than 01*/
+				NxsDiscreteDatatypeMapper d(*cIt, mt, missing, gap, matchchar, respectingCase, userEquates);
+				const NxsUnsignedSet & indexSet = pIt->second;
+				DatatypeMapperAndIndexSet das(d, pIt->second);
+				NxsUnsignedSet & mappedInds =  mixedTypeMapping[*cIt];
+				mappedInds.insert(indexSet.begin(), indexSet.end());
+				datatypeMapperVec.push_back(das);
+				}
+			}
+		}
+	catch (const NxsException & x)
+		{
+		std::string y = "An error was detected while trying to create a datatype mapping structure.  This portion of code tends to generate cryptic error messages, so if the following message is not helpful, double check the syntax in the FORMAT command of your block.\n";
+		y.append(x.msg);
+		throw NxsException(y, x.pos, x.line, x.col);
+		}
+	}
+
+
+
+
+/*!
+	If you say FORMAT DATATYPE=DNA SYMBOLS="01" ; then the valid symbols become "ACGT01"
+
+	AugmentedSymbolsToMixed tries to split such a matrix into a datatype=mixed(dna:charset_dna,standard:charset_std)
+	by inferring the charpartition (charset_dna,charset_std).  It does this by using GetNamedStateSetOfColumn to
+	detect which states were listed in a column.
+
+	Returns true if the translation to mixed was performed.  This will only occur if GetOriginalDataType() != GetDataType()
+ 	because this is the symptom that there was symbol augmentation of a built in datatype.
+
+	Note that in the GetNamedStateSetOfColumn
+	then ? will not expand the states present in a symbol. Thus when parsing:
+	Matrix 1:                     Matrix 2:
+	s   ACGT10{ACGT01-}           s   ACGT10?
+	t   ACGT100                   t   ACGT100
+		The last character of the first taxon would be parsed as having the potential to have states {ACGT01-}.
+		But when interperted with GetNamedStateSetOfColumn, Matrix 2 can be "explained" by four DNA columns, and three
+		Standard (01) columns.  Matrix 1, on the other hand would be found to have four DNA columns, and two
+		Standard (01) columns, and one standard ("ACGT01") column.
+	Note: this function ignores the gap mode setting and treats gaps as newstates for the purposes of
+		the conversion.
+
+	Temporary:  Will return false if userDefinedEquatesBeforeConversion is true
+*/
+bool NxsCharactersBlock::AugmentedSymbolsToMixed()
+	{
+	DataTypesEnum odt = GetOriginalDataType();
+	if (IsMixedType() || (odt == GetDataType()))
+		return false;
+	const std::string origSymb = GetDefaultSymbolsForType(odt);
+	const std::string cutSymb = symbols.substr(0, origSymb.length());
+	if (origSymb != cutSymb)
+		return false;
+	const std::string augmentSymbols = symbols.substr(origSymb.length());
+	if (augmentSymbols.empty())
+		return false;
+	for (std::string::const_iterator a = augmentSymbols.begin(); a != augmentSymbols.end(); ++a)
+		{
+		if (!isdigit(*a))
+			return false;
+		}
+
+	NxsUnsignedSet stdTypeChars;
+	NxsUnsignedSet origTypeChars;
+	std::set<NxsDiscreteStateCell> torigStateInds;
+	std::set<NxsDiscreteStateCell> tstdStateInds;
+	torigStateInds.insert(NXS_GAP_STATE_CODE);
+	tstdStateInds.insert(NXS_GAP_STATE_CODE);
+	for (NxsDiscreteStateCell j = 0; j < (NxsDiscreteStateCell)origSymb.length(); ++j)
+		torigStateInds.insert(j);
+	for (NxsDiscreteStateCell j =  (NxsDiscreteStateCell)origSymb.length(); j < (NxsDiscreteStateCell)symbols.length(); ++j)
+		tstdStateInds.insert(j);
+	const std::set<NxsDiscreteStateCell> origStateInds(torigStateInds);
+	const unsigned nosi = (unsigned)origStateInds.size();
+	const std::set<NxsDiscreteStateCell> stdStateInds(tstdStateInds);
+	const unsigned nssi = (unsigned)stdStateInds.size();
+
+	/*Check each column for patterns that can not be mapped to origSymb or augmentSymbols */
+	const unsigned nChars = GetNCharTotal();
+	GapModeEnum cached_gap_mode = this->gapMode;
+	this->gapMode = GAP_MODE_NEWSTATE;
+	try {
+		for (unsigned colIndex = 0; colIndex < nChars; ++colIndex)
+			{
+			const std::set<NxsDiscreteStateCell> cs = GetNamedStateSetOfColumn(colIndex);
+			std::set<NxsDiscreteStateCell> origUnion;
+			set_union(origStateInds.begin(), origStateInds.end(), cs.begin(), cs.end(), inserter(origUnion, origUnion.begin()));
+			if (origUnion.size() > nosi)
+				{
+				std::set<NxsDiscreteStateCell> stdUnion;
+				set_union(stdStateInds.begin(), stdStateInds.end(), cs.begin(), cs.end(), inserter(stdUnion, stdUnion.begin()));
+				if (stdUnion.size() > nssi)
+					return false;
+				stdTypeChars.insert(colIndex);
+				}
+			else
+				origTypeChars.insert(colIndex);
+			}
+		}
+	catch (...)
+		{
+		this->gapMode = cached_gap_mode;
+		throw;
+		}
+	this->gapMode = cached_gap_mode;
+	/* If we get here then the mapping to mixed type will succeed */
+
+	/* copy the incoming matrix and mapper */
+	VecDatatypeMapperAndIndexSet mdm = datatypeMapperVec;
+	const NxsDiscreteDatatypeMapper & oldMapper = mdm[0].first;
+	if (oldMapper.GetUserDefinedEquatesBeforeConversion())
+		return false; /* dealing with equates correctly is not implemented below, so we'll bale out */
+
+	/* add the new mappers */
+	std::map<char, NxsString> noEquates;
+	datatypeMapperVec.clear();
+	NxsDiscreteDatatypeMapper o(odt, origSymb, missing, gap, matchchar, respectingCase, noEquates);
+	datatypeMapperVec.push_back(DatatypeMapperAndIndexSet(o, origTypeChars));
+	NxsDiscreteDatatypeMapper s(NxsCharactersBlock::standard, augmentSymbols, missing, gap, matchchar, respectingCase, noEquates);
+	datatypeMapperVec.push_back(DatatypeMapperAndIndexSet(s, stdTypeChars));
+
+
+	NxsDiscreteDatatypeMapper & newOrigTMapper = datatypeMapperVec[0].first;
+	NxsDiscreteDatatypeMapper & newStdTMapper = datatypeMapperVec[1].first;
+
+	/* now we recode discrete matrix with new state codes */
+	const NxsDiscreteStateCell nOrigStates = (NxsDiscreteStateCell) origSymb.size();
+	std::map<NxsDiscreteStateCell, NxsDiscreteStateCell> oldToNewStateCode;
+	NxsDiscreteStateMatrix::iterator rowIt = discreteMatrix.begin();
+	for (unsigned colIndex = 0; rowIt != discreteMatrix.end(); ++colIndex, ++rowIt)
+		{
+		NxsDiscreteStateRow & row = *rowIt;
+		unsigned column = 0;
+		for (NxsDiscreteStateRow::iterator cell = row.begin(); cell != row.end(); ++cell, ++column)
+			{
+			const NxsDiscreteStateCell initStateCode = *cell;
+			if (initStateCode  >= 0 ) //gap and missing codes do not need translation
+				{
+				std::map<NxsDiscreteStateCell, NxsDiscreteStateCell>::const_iterator otnIt = oldToNewStateCode.find(initStateCode);
+				if (otnIt == oldToNewStateCode.end())
+					{
+					const bool isOrigT = origTypeChars.count(column) > 0;
+					const std::set<NxsDiscreteStateCell> oldSymbols = oldMapper.GetStateSetForCode(initStateCode);
+					const std::string oldNexusString = oldMapper.StateCodeToNexusString(initStateCode);
+					const char oldNexusChar = (oldNexusString.length() == 1 ? oldNexusString[0] : '\0');
+					const bool isPoly =  oldMapper.IsPolymorphic(initStateCode);
+					NxsDiscreteStateCell newStateCode ;
+					if (isOrigT)
+						{ //old symbol indices will still be the new symbol indices
+						newStateCode = newOrigTMapper.StateCodeForStateSet(oldSymbols, isPoly, true, oldNexusChar);
+						newOrigTMapper.StateCodeToNexusString(newStateCode);
+						}
+					else
+						{
+						std::set<NxsDiscreteStateCell> transSymbols;
+						for (std::set<NxsDiscreteStateCell>::const_iterator sIt = oldSymbols.begin(); sIt != oldSymbols.end(); ++sIt)
+							{
+							if (*sIt >= nOrigStates)
+								transSymbols.insert(*sIt - nOrigStates);
+							else
+								{
+								NCL_ASSERT(*sIt < 0);
+								transSymbols.insert(*sIt);
+								}
+							}
+						newStateCode = newStdTMapper.StateCodeForStateSet(transSymbols, isPoly, true, oldNexusChar);
+						newStdTMapper.StateCodeToNexusString(newStateCode);
+						}
+					oldToNewStateCode[initStateCode] = newStateCode;
+					*cell = newStateCode;
+					}
+				else
+					*cell = otnIt->second;
+				}
+			}
+		}
+	datatype = NxsCharactersBlock::mixed;
+	mixedTypeMapping.clear();
+	mixedTypeMapping[odt] = origTypeChars;
+	mixedTypeMapping[NxsCharactersBlock::standard] = stdTypeChars;
+	return true;
+	}
+/*!
+	Called when FORMAT command needs to be parsed from within the DIMENSIONS block. Deals with everything after the
+	token FORMAT up to and including the semicolon that terminates the FORMAT command.
+*/
+void NxsCharactersBlock::HandleFormat(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	errormsg.clear();
+	ProcessedNxsCommand tokenVec;
+	token.ProcessAsCommand( &tokenVec);
+
+	const ProcessedNxsCommand::const_iterator tvEnd = tokenVec.end();
+	NxsPartition dtParts;
+	std::vector<DataTypesEnum> dtv;
+	std::vector<bool> isR;
+	if (!datatypeReadFromFormat)
+		{
+		bool standardDataTypeAssumed = true;
+		bool ignoreCaseAssumed = true;
+		datatype = standard;
+		originalDatatype = standard;
+		ResetSymbols();
+		respectingCase = false;
+		restrictionDataype = false;
+		for (ProcessedNxsCommand::const_iterator wIt = tokenVec.begin(); wIt != tvEnd; ++wIt)
+			{
+			if (wIt->Equals("DATATYPE"))
+				{
+				DemandEquals(wIt, tvEnd, " after keyword DATATYPE");
+				ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, " after \"DATATYPE =\" in FORMAT command");
+				if (wIt->Equals("STANDARD"))
+					{
+					datatype = standard;
+					symbols = "01";
+					}
+				else if (wIt->Equals("DNA"))
+					datatype = dna;
+				else if (wIt->Equals("RNA"))
+					datatype = rna;
+				else if (wIt->Equals("NUCLEOTIDE"))
+					datatype = nucleotide;
+				else if (wIt->Equals("PROTEIN"))
+					datatype = protein;
+				else if (wIt->Equals("RESTRICTION"))
+					{
+					datatype = standard;
+					restrictionDataype = true;
+					}
+				else if (wIt->Equals("CONTINUOUS"))
+					{
+					datatype = continuous;
+					statesFormat = INDIVIDUALS;
+					items = std::vector<std::string>(1, std::string("AVERAGE"));
+					tokens = true;
+					}
+				else if (supportMixedDatatype && wIt->Equals("MIXED"))
+					{
+					datatype = mixed;
+					ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, " after \"DATATYPE=MIXED\" in FORMAT command. Expecting (");
+					if (!wIt->Equals("("))
+						{
+						errormsg << "Expecting ( after \"DATATYPE=MIXED\" but found " << wIt->GetToken();
+						throw NxsException(errormsg, *wIt);
+						}
+					ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, " after \"DATATYPE=MIXED(\" in FORMAT command. Expecting a datatype");
+					ostringstream fakestream;
+					while (!wIt->Equals(")"))
+						{
+						fakestream << ' ' << NxsString::GetEscaped(wIt->GetToken());
+						ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, " in \"DATATYPE=MIXED\" in FORMAT command. Expecting a closing ) to terminate the list.");
+						}
+					fakestream << ';';
+					const std::string accumulated = fakestream.str();
+					istringstream fakeinput(accumulated);
+					NxsToken subToken(fakeinput);
+					try
+						{
+						std::string mt("mixed datatype definition");
+						subToken.GetNextToken();
+						this->ReadPartitionDef(dtParts, *this, mt, "Character", "Datatype=Mixed", subToken, false, true, false);
+						}
+					catch (NxsException & x)
+						{
+						errormsg = x.msg;
+						throw NxsException(errormsg, *wIt);
+						}
+					catch (...)
+						{
+						errormsg << "Error parsing \"DATATYPE=MIXED\" subcommand in FORMAT the command.";
+						throw NxsException(errormsg, *wIt);
+						}
+					for (NxsPartition::const_iterator pIt = dtParts.begin(); pIt != dtParts.end(); ++pIt)
+						{
+						NxsString name(pIt->first.c_str());
+						name.ToUpper();
+						if (name == "RESTRICTION")
+							{
+							dtv.push_back(standard);
+							isR.push_back(true);
+							}
+						else
+							{
+							isR.push_back(false);
+							if (name == "STANDARD")
+								dtv.push_back(standard);
+							else if (name == "DNA")
+								dtv.push_back(dna);
+							else if (name == "RNA")
+								dtv.push_back(rna);
+							else if (name == "NUCLEOTIDE")
+								dtv.push_back(nucleotide);
+							else if (name == "PROTEIN")
+								dtv.push_back(protein);
+							else
+								{
+								errormsg << pIt->first <<  " is not a valid DATATYPE within a " <<  NCL_BLOCKTYPE_ATTR_NAME << " block";
+								throw NxsException(errormsg, *wIt);
+								}
+							}
+						}
+					}
+				else
+					{
+					errormsg << wIt->GetToken() <<  " is not a valid DATATYPE within a " <<  NCL_BLOCKTYPE_ATTR_NAME << " block";
+					throw NxsException(errormsg, *wIt);
+					}
+				datatypeReadFromFormat = true;
+				originalDatatype = datatype;
+				ResetSymbols();
+				standardDataTypeAssumed = false;
+				if (!ignoreCaseAssumed)
+					break;
+				}
+			else if (wIt->Equals("RESPECTCASE"))
+				{
+				ignoreCaseAssumed = false;
+				respectingCase = true;
+				if (!standardDataTypeAssumed)
+					break;
+				}
+			}
+		}
+	for (ProcessedNxsCommand::const_iterator wIt = tokenVec.begin(); wIt != tvEnd; ++wIt)
+		{
+
+		if (wIt->Equals("DATATYPE"))// we should have already processed this
+			{
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after DATATYPE in FORMAT command"); // =
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after DATATYPE = in FORMAT command"); // datatype
+			}
+		else if (wIt->Equals("RESPECTCASE"))
+			{
+			if (!respectingCase)
+				{
+				errormsg << "Only one FORMAT command should occur per DATA or CHARACTERS block.";
+				throw NxsException(errormsg, *wIt);
+				}
+			}
+		else if (wIt->Equals("MISSING"))
+			{
+			DemandEquals(wIt, tvEnd, "after keyword MISSING");
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"MISSING = \" in FORMAT command");
+			const std::string t = wIt->GetToken();
+			if (t.length() != 1)
+				{
+				errormsg << "MISSING symbol should be a single character, but " << t << " was specified";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			else if (token.IsPunctuationToken(t) && !token.IsPlusMinusToken(t))
+				{
+				errormsg << "MISSING symbol specified cannot be a punctuation token (" << t << " was specified)";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			else if (token.IsWhitespaceToken(t))
+				{
+				errormsg << "MISSING symbol specified cannot be a whitespace character (" << t << " was specified)";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			missing = t[0];
+			}
+		else if (wIt->Equals("GAP"))
+			{
+			DemandEquals(wIt, tvEnd, "after keyword GAP");
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"GAP = \" in FORMAT command");
+			const std::string t = wIt->GetToken();
+			if (t.length() != 1)
+				{
+				errormsg << "GAP symbol should be a single character, but " << t << " was specified";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			else if (token.IsPunctuationToken(t) && !token.IsPlusMinusToken(t))
+				{
+				errormsg << "GAP symbol specified cannot be a punctuation token " << t << " was specified";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			else if (token.IsWhitespaceToken(t))
+				{
+				errormsg << "GAP symbol specified cannot be a whitespace character " << t << " was specified";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			gap = t[0];
+			}
+		else if (wIt->Equals("MATCHCHAR"))
+			{
+			DemandEquals(wIt, tvEnd, "after keyword MATCHCHAR");
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"MATCHCHAR = \" in FORMAT command");
+			const std::string t = wIt->GetToken();
+			if (t.length() != 1)
+				{
+				errormsg << "MATCHCHAR symbol should be a single character, but " << t << " was specified";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			else if (token.IsPunctuationToken(t) && !token.IsPlusMinusToken(t))
+				{
+				errormsg << "MATCHCHAR symbol specified cannot be a punctuation token (" << t << " was specified)";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			else if (token.IsWhitespaceToken(t))
+				{
+				errormsg << "MATCHCHAR symbol specified cannot be a whitespace character (" << t << " was specified)";
+				WarnDangerousContent(errormsg, *wIt);
+				}
+			matchchar = t[0];
+			}
+		else if (wIt->Equals("SYMBOLS") || wIt->Equals("SYMBOL"))
+			{
+			if (datatype == NxsCharactersBlock::continuous)
+				throw NxsException("SYMBOLS subcommand not allowed for DATATYPE=CONTINUOUS", *wIt);
+			if (restrictionDataype)
+				throw NxsException("SYMBOLS subcommand not allowed for DATATYPE=RESTRICTION", *wIt);
+			//NxsDiscreteStateCell numDefStates;
+			unsigned maxNewStates = NCL_MAX_STATES;
+			switch(datatype)
+				{
+				case NxsCharactersBlock::dna:
+				case NxsCharactersBlock::rna:
+				case NxsCharactersBlock::nucleotide:
+				    //numDefStates = 4;
+					maxNewStates = NCL_MAX_STATES-4;
+					break;
+
+				case NxsCharactersBlock::protein:
+				    //numDefStates = 21;
+					maxNewStates = NCL_MAX_STATES-21;
+					break;
+
+				default:
+				    //numDefStates = 0; // replace symbols list for standard datatype
+					symbols.clear();
+					maxNewStates = NCL_MAX_STATES;
+				}
+			DemandEquals(wIt, tvEnd, "after keyword SYMBOLS");
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "\" to start the symbols list");
+			if (!wIt->Equals("\""))
+				{
+				errormsg << "Expecting \" after Symbols= but " << wIt->GetToken() << " was found";
+				throw NxsException(errormsg, *wIt);
+				}
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "closing \" of symbols list");
+			NxsString s;
+			while (!wIt->Equals("\""))
+				{
+				s += wIt->GetToken().c_str();
+				ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "closing \" of symbols list");
+				}
+
+			const std::string tos = NxsString::strip_whitespace(s);
+			const char * to = tos.c_str();
+			unsigned tlen = (unsigned)tos.length();
+			if (tlen > maxNewStates)
+				{
+				errormsg << "SYMBOLS defines " << tlen << " new states but only " << maxNewStates << " new states allowed for this DATATYPE";
+				throw NxsException(errormsg, *wIt);
+				}
+			// Check to make sure user has not used any symbols already in the
+			// default symbols list for this data type
+			//
+			std::string preprocessedS;
+			for (unsigned i = 0; i < tlen; i++)
+				{
+				if (to[i] == '~')
+					{
+					if (i == 0 || i == tlen -1)
+						{
+						errormsg << "A ~ in a SYMBOLS list is interpreted as a range of symbols.  The ~ cannot be the first or last character in the symbols list";
+						throw NxsException(errormsg, token);
+						}
+					const int jj = i - 1 ;
+					const char prevChar = to[jj];
+					const char nextChar = to[i+1];
+					if ((isdigit(prevChar) && isdigit(nextChar)) || (isalpha(prevChar) && isalpha(nextChar)))
+						{
+						if (nextChar > prevChar)
+							{
+							for (char c = (char)((int)prevChar + 1) ; c < nextChar;)
+								{
+								preprocessedS.append(1, c);
+								c = (char) ((int)c + 1);
+								}
+							}
+						else
+							{
+							errormsg << "Endpoint of SYMBOLS range must be greater than the starting point.  This was not true of " << prevChar << '~' << nextChar;
+							throw NxsException(errormsg, token);
+							}
+						}
+					else
+						{
+						errormsg << prevChar << '~' << nextChar << " is an illegal SYMBOLS range. A range must go from a letter to a letter or from a number to number" ;
+						throw NxsException(errormsg, token);
+						}
+					}
+				else
+					preprocessedS += to[i];
+				}
+			NxsString processedS;
+			for (std::string::const_iterator pp = preprocessedS.begin(); pp != preprocessedS.end(); ++pp)
+				{
+				const char c = *pp;
+				if (IsInSymbols(c))
+					{
+					errormsg << "The character " << c << " defined in SYMBOLS is predefined for this DATATYPE and should not occur in a SYMBOLS statement";
+					if (nexusReader)
+						{
+						nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+						errormsg.clear();
+						}
+					}
+				else if (   (respectingCase && (userEquates.find(c) != userEquates.end()))
+						 || (! respectingCase && (userEquates.find(toupper(c)) != userEquates.end() || userEquates.find(tolower(*pp)) != userEquates.end())))
+					{
+					errormsg << "The character " << *pp << " defined in SYMBOLS subcommand, has already been introduced as an EQUATE key.  The use of a character as both a state symbol and an equate key is not allowed.";
+					throw NxsException(errormsg, token);
+					}
+				else
+					processedS += *pp;
+				}
+			if (!processedS.empty())
+				{
+				if (this->datatype == dna || this->datatype == rna || this->restrictionDataype || this->datatype == protein)
+					{
+					if (this->allowAugmentingOfSequenceSymbols)
+						{
+						errormsg << "Adding symbols to the " << GetNameOfDatatype(this->datatype) << " datatype will cause the matrix to be treated as if it were a";
+						if (this->convertAugmentedToMixed)
+							errormsg << " MIXED datatype matrix";
+						else
+							errormsg << " STANDARD datatype matrix";
+						if (!this->convertAugmentedToMixed)
+							nexusReader->NexusWarnToken(errormsg, NxsReader::AMBIGUOUS_CONTENT_WARNING, token);
+						errormsg.clear();
+						}
+					else
+						{
+						errormsg << "Symbols cannot be added to the " << GetNameOfDatatype(this->datatype) << " datatype.";
+						throw NxsException(errormsg, token);
+						}
+					}
+				// If we've made it this far, go ahead and add the user-defined
+				// symbols to the end of the list of predefined symbols
+				//
+				symbols += processedS.c_str();
+				}
+			}
+
+		else if (wIt->Equals("EQUATE"))
+			{
+			if (datatype == NxsCharactersBlock::continuous)
+				throw NxsException("EQUATE subcommand not allowed for DATATYPE=CONTINUOUS", *wIt);
+
+			DemandEquals(wIt, tvEnd, "after keyword EQUATE");
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "\" to start the Equate definition");
+			if (!wIt->Equals("\""))
+				{
+				errormsg << "Expecting '\"' after keyword EQUATE but found " << wIt->GetToken() << " instead";
+				throw NxsException(errormsg, *wIt);
+				}
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "\" to end the Equate definition");
+			while (!wIt->Equals("\""))
+				{
+				std::string t = wIt->GetToken();
+				if (t.length() != 1)
+					{
+					errormsg << "Expecting single-character EQUATE symbol but found " << wIt->GetToken() << " instead";
+					throw NxsException(errormsg, *wIt);
+					}
+				const char ch = t[0];
+				bool badEquateSymbol = false;
+
+				// Equate symbols cannot be punctuation (except for + and -)
+				//
+				if (token.IsPunctuationToken(t) && !token.IsPlusMinusToken(t))
+					badEquateSymbol = true;
+				else if (ch == '^')
+					badEquateSymbol = true;
+				if (badEquateSymbol)
+					{
+					errormsg << "EQUATE symbol specified (" << wIt->GetToken() <<  ") is not valid. Equate symbols cannot be any of the following: ()[]{}/\\,;:=*'\"`<>^";
+					WarnDangerousContent(errormsg, *wIt);
+					}
+				if (ch == missing || ch == matchchar || ch == gap || IsInSymbols(ch))
+					{
+					errormsg << "EQUATE symbol specified (" << wIt->GetToken() <<  ") is not valid; An Equate symbol cannot be a state symbol or identical to the  missing,  gap, or matchchar symbols.";
+					throw NxsException(errormsg, *wIt);
+					}
+
+				DemandEquals(wIt, tvEnd, " in EQUATE definition");
+				ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "State or set of states in Equate definition");
+				NxsString s;
+				s = wIt->GetToken().c_str();
+				if (wIt->Equals("{"))
+					{
+					while (!wIt->Equals("}"))
+						{
+						ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "} to close the state set in an equate definition");
+						s += wIt->GetToken().c_str();
+						}
+					}
+				else if (wIt->Equals("("))
+					{
+					while (!wIt->Equals(")"))
+						{
+						ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, ") to close the state set in an equate definition");
+						s += wIt->GetToken().c_str();
+						}
+					}
+				const std::string nows = NxsString::strip_whitespace(s);
+				userEquates[ch] = NxsString(nows.c_str());
+				ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "\" to end the Equate definition");
+				}
+			}
+
+		else if (wIt->Equals("LABELS"))
+			labels = true;
+		else if (wIt->Equals("NOLABELS"))
+			labels = false;
+		else if (wIt->Equals("TRANSPOSE"))
+			transposing = true;
+		else if (wIt->Equals("INTERLEAVE"))
+			interleaving = true;
+		else if (wIt->Equals("ITEMS"))
+			{
+			DemandEquals(wIt, tvEnd, "after keyword ITEMS");
+			items.clear();
+			// This should be STATES (no other item is supported at this time)
+			//
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"ITEMS =\" in FORMAT command");
+			if (datatype == NxsCharactersBlock::continuous)
+				{
+				std::string s;
+				if (wIt->Equals("("))
+					{
+					ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, ") to close Items list in FORMAT command");
+					while (!wIt->Equals(")"))
+						{
+						s = wIt->GetToken();
+						NxsString::to_upper(s);
+						items.push_back(std::string(s.c_str()));
+						ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, ") to close Items list in FORMAT command");
+						}
+					}
+				else
+					{
+					s = wIt->GetToken();
+					NxsString::to_upper(s);
+					items.push_back(std::string(s.c_str()));
+					}
+				}
+			else
+				{
+				if (!wIt->Equals("STATES"))
+					throw NxsException("Sorry, only ITEMS=STATES is supported for discrete datatypes at this time", *wIt);
+				items = std::vector<std::string>(1, std::string("STATES"));
+				}
+			}
+		else if (wIt->Equals("STATESFORMAT"))
+			{
+			DemandEquals(wIt, tvEnd, "after keyword STATESFORMAT");
+			ProcessedNxsToken::IncrementNotLast(wIt, tvEnd, "after \"STATESFORMAT =\" in FORMAT command");
+			if (wIt->Equals("STATESPRESENT"))
+				statesFormat = STATES_PRESENT;
+			else
+				{
+				if (datatype == NxsCharactersBlock::continuous)
+					{
+					if (wIt->Equals("INDIVIDUALS"))
+						statesFormat = INDIVIDUALS;
+					else
+						throw NxsException("Sorry, only STATESFORMAT=STATESPRESENT or STATESFORMAT=INDIVIDUALS are supported for continuous datatypes at this time", *wIt);
+					}
+				else
+					throw NxsException("Sorry, only STATESFORMAT=STATESPRESENT supported for discrete datatypes at this time", *wIt);
+				}
+			}
+		else if (wIt->Equals("TOKENS"))
+			tokens = true;
+		else if (wIt->Equals("NOTOKENS"))
+			{
+			if (datatype == NxsCharactersBlock::continuous)
+				throw NxsException("NOTOKENS is not allowed for the CONTINUOUS datatype", *wIt);
+			tokens = false;
+			}
+		}
+	if (IsInSymbols(missing))
+		{
+		errormsg << "The \"missing\" character \'" << missing << "\' may not be included in the SYMBOLS list.";
+		throw NxsException(errormsg, *tokenVec.begin());
+		}
+	if (IsInSymbols(matchchar))
+		{
+		errormsg << "The \"matchchar\" character \'" << matchchar << "\' may not be included in the SYMBOLS list.";
+		throw NxsException(errormsg, *tokenVec.begin());
+		}
+	if (IsInSymbols(gap))
+		{
+		errormsg << "The \"gap\" character \'" << gap << "\' may not be included in the SYMBOLS list.";
+		throw NxsException(errormsg, *tokenVec.begin());
+		}
+
+	if (matchchar != '\0')
+		{
+		if ((matchchar == gap) || (!respectingCase && toupper(matchchar) == toupper(gap)))
+			{
+			errormsg << "MatchChar and Gap symbol cannot be identical!  Both were set to " << gap;
+			throw NxsException(errormsg, *tokenVec.begin());
+			}
+		if ((matchchar == missing) || (!respectingCase && toupper(matchchar) == toupper(missing)))
+			{
+			errormsg << "MatchChar and Missing symbol cannot be identical!  Both were set to " << missing;
+			throw NxsException(errormsg, *tokenVec.begin());
+			}
+		}
+	if ((gap != '\0') && ((gap == missing) || (!respectingCase && toupper(gap) == toupper(missing))))
+		{
+		errormsg << "Gap symbol and Missing symbol cannot be identical!  Both were set to " << missing;
+		throw NxsException(errormsg, *tokenVec.begin());
+		}
+
+	// Perform some last checks before leaving the FORMAT command
+	//
+	if (!tokens && datatype == continuous)
+		GenerateNxsException(token, "TOKENS must be defined for DATATYPE=CONTINUOUS");
+	if (tokens && (datatype == dna || datatype == rna || datatype == nucleotide))
+		GenerateNxsException(token, "TOKENS not allowed for the DATATYPEs DNA, RNA, or NUCLEOTIDE");
+	CreateDatatypeMapperObjects(dtParts, dtv);
+	if (IsMixedType() && tokens)
+		{
+		errormsg = "The combination of DATATYPE=Mixed  and TOKENS are not currently supported.";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+	unsigned mapInd = 0;
+	for (std::vector<bool>::const_iterator b = isR.begin(); b != isR.end(); ++b, ++mapInd)
+		{
+		if (*b)
+			{
+			DatatypeMapperAndIndexSet &mapper = datatypeMapperVec.at(mapInd);
+			mapper.first.SetWasRestrictionDataype(true);
+			}
+		}
+	}
+
+/*! creates a standard datatype mapper (symbols "01" and no gaps or equates) */
+NxsDiscreteDatatypeMapper::NxsDiscreteDatatypeMapper()
+	:geneticCode(NXS_GCODE_NO_CODE),
+	datatype(NxsCharactersBlock::standard),
+	restrictionDataype(false),
+	userDefinedEquatesBeforeConversion(false)
+	{
+	symbols.assign("01");
+	matchChar = '\0';
+	gapChar = '\0';
+	missing = '?';
+	respectCase = false;
+	RefreshMappings(0L);
+	}
+
+NxsDiscreteDatatypeMapper & NxsDiscreteDatatypeMapper::operator=(const NxsDiscreteDatatypeMapper& other)
+	{
+	symbols = other.symbols;
+	lcsymbols = other.lcsymbols;
+	nStates = other.nStates;
+	matchChar = other.matchChar;
+	gapChar = other.gapChar;
+	missing = other.missing;
+	respectCase = other.respectCase;
+	extraEquates = other.extraEquates;
+	datatype = other.datatype;
+	geneticCode = other.geneticCode;
+	sclOffset = other.sclOffset;
+	stateSetsVec = other.stateSetsVec;
+	if (stateSetsVec.empty())
+		stateCodeLookupPtr = 0L;
+	else
+		stateCodeLookupPtr = &stateSetsVec[-sclOffset];
+	charToStateCodeLookup = other.charToStateCodeLookup;
+	if (charToStateCodeLookup.empty())
+		cLookup = 0L;
+	else
+		cLookup = &charToStateCodeLookup[127];
+	restrictionDataype = other.restrictionDataype;
+	userDefinedEquatesBeforeConversion = other.userDefinedEquatesBeforeConversion;
+	return *this;
+	}
+
+/*! creates a datatype mapper for a datatype with the default symbols, and possibly a gap char.
+
+	Uses ? as the missing symbol.
+*/
+NxsDiscreteDatatypeMapper::NxsDiscreteDatatypeMapper(
+	NxsCharactersBlock::DataTypesEnum datatypeE, /*!< the datatype enum facet -- cannot be mixed*/
+	bool usegaps) /*!< if true then '-' will be used as the gapChar*/
+	:geneticCode(NXS_GCODE_NO_CODE),
+	cLookup(NULL),
+	stateCodeLookupPtr(NULL),
+	matchChar('.'),
+	gapChar('\0'),
+	missing('?'),
+	respectCase(false),
+	datatype(datatypeE),
+	restrictionDataype(false),
+	userDefinedEquatesBeforeConversion(false)
+	{
+	symbols = NxsCharactersBlock::GetDefaultSymbolsForType(datatype);
+	if (usegaps)
+		gapChar = '-';
+	if (datatype == NxsCharactersBlock::mixed)
+		throw NxsException("Cannot create a mixed datatype mapper"); // this should be the only empty string-generating datatype
+	RefreshMappings(0L);
+	}
+
+/*! \returns true if this and other have:
+		The same symbols list,
+		the same interpretation of state codes.
+
+	Note that the "keys" used in equates may differ (that is a syntactic difference not a semantic one) even if the function returns true.
+
+	This function is useful when checking to see if a Datatype differs significantly from the default type.  If two types are semantically
+	equivalent then their state-coded matrices can be concatenated (or you can manipulate either matrix using the same operations).
+*/
+bool NxsDiscreteDatatypeMapper::IsSemanticallyEquivalent(const NxsDiscreteDatatypeMapper &other) const
+	{
+	if (datatype != other.datatype)
+		return false;
+	if (symbols != other.symbols)
+		return false;
+	bool thisHasGap = (gapChar != '\0');
+	bool otherHasGap = (other.gapChar != '\0');
+	if (thisHasGap != otherHasGap)
+		return false;
+	const NxsDiscreteStateCell nsc = (NxsDiscreteStateCell)GetHighestStateCode();
+	if(nsc != (NxsDiscreteStateCell) other.GetHighestStateCode())
+		return false;
+	for (NxsDiscreteStateCell i = 0; i <= nsc; ++i)
+		{
+		if (GetStateSetForCode(i) != other.GetStateSetForCode(i))
+			return false;
+		}
+	return true;
+	}
+
+/*!
+	Must be called when the symbols list changes.
+	Uses symbols, gap, missing, respectCase,  extraEquates, and datatype fields to establish new mappings.
+	token can be NULL if the call is not triggered by the reading of a NEXUS token.
+*/
+void NxsDiscreteDatatypeMapper::RefreshMappings(NxsToken *token)
+	{
+	nStates = (unsigned)symbols.length();
+	if (nStates ==  0)
+		{
+		if (datatype != NxsCharactersBlock::continuous)
+			throw NxsException("Cannot create a datatype mapper with no symbols");
+		return;
+		}
+	if (!respectCase)
+		{
+		NxsString::to_upper(symbols);
+		lcsymbols = symbols;
+		}
+	else
+		lcsymbols.clear();
+
+	NxsString::to_lower(lcsymbols);
+
+	if (missing == '\0')
+		throw NxsException("Cannot create a datatype mapper with no missing data symbol");
+
+	charToStateCodeLookup.assign(384, NXS_INVALID_STATE_CODE); /*256+128 = 384 -- this way we can deal with signed or unsigned chars by pointing cLookup to element 128*/
+	cLookup = &charToStateCodeLookup[127];
+	stateIntersectionMatrix.clear();
+	isStateSubsetMatrix.clear();
+	isStateSubsetMatrixGapsMissing.clear();
+
+	stateSetsVec.clear();
+	stateCodeLookupPtr = 0L;
+	sclOffset = (gapChar == '\0' ? -1 : -2);
+
+	std::string bogus;
+	std::istringstream bogusStream(bogus);
+	NxsToken bogusToken(bogusStream);
+	token = (token == NULL ? &bogusToken : token);
+
+	/* add the "fundamental" states. */
+	std::set<NxsDiscreteStateCell> stSet;
+	std::set<NxsDiscreteStateCell> missingSet;
+	if (gapChar != '\0')
+		{
+		stSet.insert(NXS_GAP_STATE_CODE);
+		/* this is the one of only 2 times that  we don't call AddStateSet to add a state set
+			we do this to avoid illegal indexing of stateSets[1] when there
+			is only one element in the vector.
+		*/
+		stateSetsVec.push_back(NxsDiscreteStateSetInfo(stSet, false, gapChar));
+		cLookup[(int) gapChar] = NXS_GAP_STATE_CODE;
+
+		missingSet.insert(NXS_GAP_STATE_CODE);
+		}
+
+
+		/*
+			Add the missing state code
+			this is the other time that we don't call AddStateSet (to avoid illegal indexing).
+		*/
+	NCL_ASSERT(missing != '\0');
+	NCL_ASSERT(nStates > 0);
+	for (NxsDiscreteStateCell s = 0; s < (NxsDiscreteStateCell) nStates; ++s)
+		missingSet.insert(s);
+
+	char sym = (respectCase ? missing : (char) toupper(missing));
+	stateSetsVec.push_back(NxsDiscreteStateSetInfo(missingSet, false, sym));
+	const NxsDiscreteStateCell stateCode = (const NxsDiscreteStateCell)stateSetsVec.size() + sclOffset - 1;
+	NCL_ASSERT(NXS_MISSING_CODE == stateCode);
+	if (respectCase)
+		cLookup[(int) missing] = stateCode;
+	else
+		{
+		cLookup[(int) tolower(missing)] = stateCode;
+		cLookup[(int) toupper(missing)] = stateCode;
+		}
+	NCL_ASSERT(cLookup[(int) missing] == NXS_MISSING_CODE);
+	for (NxsDiscreteStateCell s = 0; s < (NxsDiscreteStateCell) nStates; ++s)
+		{
+		stSet.clear();
+		stSet.insert(s);
+		AddStateSet(stSet, symbols[s], respectCase, false);
+		}
+
+	/* add the default equates */
+	std::map<char, NxsString> defEq = NxsCharactersBlock::GetDefaultEquates(datatype);
+
+
+
+	bool convertToStandard = false;
+	if (((datatype == NxsCharactersBlock::nucleotide) || (datatype == NxsCharactersBlock::dna)) && symbols != "ACGT")
+		convertToStandard = true;
+	else if ((datatype == NxsCharactersBlock::rna) && symbols != "ACGU")
+		convertToStandard = true;
+	else if ((datatype == NxsCharactersBlock::protein) && symbols != "ACDEFGHIKLMNPQRSTVWY*")
+		convertToStandard = true;
+	if (convertToStandard)
+		{
+		if (!extraEquates.empty())
+			userDefinedEquatesBeforeConversion = true;
+		defEq.insert(extraEquates.begin(), extraEquates.end());
+		extraEquates.clear();
+		defEq.swap(extraEquates);
+		/* respectcase is only "applicable" to Standard datatype
+			Any symbol extension will be at the end of the symbols list,
+			so here we add the lower case symbols as equates.
+		*/
+		if (respectCase)
+			{
+			std::string lcsym = NxsCharactersBlock::GetDefaultSymbolsForType(datatype);
+			NxsString::to_lower(lcsym);
+			std::string ucsym = lcsym;
+			NxsString::to_upper(ucsym);
+			for (unsigned i = 0; i < ucsym.length(); ++i)
+				{
+				if (ucsym[i] != lcsym[i])
+					{
+					NxsString u;
+					u.append(1, ucsym[i]);
+					extraEquates[lcsym[i]] = u;
+					}
+				}
+			}
+		datatype =  NxsCharactersBlock::standard;
+		}
+
+	/* It is nice to put the all-states code at state code = num_states So here, we'll put this equate in that slot (if such an equate exists)*/
+	std::set<char> targetSet(symbols.begin(), symbols.end());
+	char allStateEquateKey = '\0';
+	std::map<char, NxsString>::const_iterator eqIt = defEq.begin();
+	NxsString taxonName;
+	for (; eqIt != defEq.end(); ++eqIt)
+		{
+		const char c = eqIt->first;
+		const char u = toupper(c);
+		bool addEq = true;
+		if (c == missing || c == matchChar || c == gapChar)
+			addEq = false;
+		if (!respectCase && (u == toupper(missing) || u == toupper(matchChar) || u == toupper(gapChar)))
+			addEq = false;
+		if (addEq)
+			{
+			const NxsString & s = eqIt->second;
+			unsigned slen = (unsigned)s.length();
+			if (slen == 2 + symbols.length())
+				{
+				if (s[0] == '{' && s[slen -1] == '}')
+					{
+					std::set<char> contained;
+					for (unsigned j = 1; j < slen - 1; ++j)
+						contained.insert(s[j]);
+					if (contained == targetSet)
+						{
+						allStateEquateKey = c;
+						NxsDiscreteStateCell sc = StateCodeForNexusPossibleMultiStateSet(allStateEquateKey, s, *token, UINT_MAX, UINT_MAX, 0L, taxonName);
+						cLookup[(int) allStateEquateKey] = sc;
+						break;
+						}
+					}
+				}
+
+			}
+		}
+
+	eqIt = defEq.begin();
+	for (; eqIt != defEq.end(); ++eqIt)
+		{
+		const char c = eqIt->first;
+		if (c == allStateEquateKey)
+			continue;
+		const char u = toupper(c);
+		bool addEq = true;
+		if (c == missing || c == matchChar || c == gapChar)
+			addEq = false;
+		if (!respectCase && (u == toupper(missing) || u == toupper(matchChar) || u == toupper(gapChar)))
+			addEq = false;
+		if (addEq)
+			{
+			const NxsString & s = eqIt->second;
+			NxsDiscreteStateCell sc = StateCodeForNexusPossibleMultiStateSet(c, s, *token, UINT_MAX, UINT_MAX, 0L, taxonName);
+			cLookup[(int) c] = sc;
+			}
+		}
+
+
+
+
+	/* add user-defined equates, and only retain the new ones (those that are not datatype defaults). */
+	std::map<char, NxsString> neededExtraEquates;
+	for (eqIt = extraEquates.begin(); eqIt != extraEquates.end(); ++eqIt)
+		{
+		const char c = eqIt->first;
+		const char u = toupper(c);
+		if (PositionInSymbols(c) == NXS_INVALID_STATE_CODE)
+			{
+			bool addEq = true;
+			if (c == missing || c == matchChar || c == gapChar)
+				addEq = false;
+			if (!respectCase && (u == toupper(missing) || u == toupper(matchChar) || u == toupper(gapChar)))
+				addEq = false;
+			if (addEq)
+				{
+				const NxsDiscreteStateCell prevCode = cLookup[(int) c];
+				const NxsString & s = eqIt->second;
+				NxsDiscreteStateCell sc = StateCodeForNexusPossibleMultiStateSet(c, s, *token, UINT_MAX, UINT_MAX, 0L, taxonName);
+				cLookup[(int) c] = sc;
+				if (sc != prevCode) /* the equate was new */
+					neededExtraEquates[c] = s;
+				}
+			}
+		else
+			{
+			NCL_ASSERT(convertToStandard); // a equate key that is equal to a symbol can happen if the symbols list is augmented (resulting in a conversion to standard datatype)
+			}
+		}
+	extraEquates = neededExtraEquates;
+	}
+
+/*!
+	Returns the state code of a (possible new state set) `sset`.  This may trigger the reallocation of mapping info.
+	nexusSymbol can be '\0' if there is not a single-character symbol that represents this state set.
+
+	if `addToLookup` is false and the state set is not found then NXS_INVALID_STATE_CODE will be returned.
+
+	if the stateset is added with a `nexusSymbol` then the new "symbol" will be case-sensitive
+	(this is an mechanism for entering equates and equates are always case sensitive).
+
+	New "fundamental" states can NOT be introduced using this function -- if unknown states are encountered, an exception will be generated.
+*/
+NxsDiscreteStateCell NxsDiscreteDatatypeMapper::StateCodeForStateSet(const std::set<NxsDiscreteStateCell> & sset, bool isPolymorphic, bool addToLookup, char nexusSymbol)
+	{
+	if (sset.size() == 1)
+		{
+		NxsDiscreteStateCell c = *sset.begin();
+		ValidateStateIndex(c);
+		return c;
+		}
+	NCL_ASSERT(stateCodeLookupPtr);
+	NxsDiscreteStateSetInfo *sclStart = stateCodeLookupPtr + nStates;
+	const NxsDiscreteStateCell nCodes = (NxsDiscreteStateCell)stateSetsVec.size();
+
+	/*we can start at nStates, because < nStates will be handled in the sset.size() == 1 above */
+	for (NxsDiscreteStateCell i = nStates - sclOffset; i < nCodes; ++i)
+		{
+		NxsDiscreteStateSetInfo & stateSetInfo = *sclStart++;
+		if (sset == stateSetInfo.states && isPolymorphic == stateSetInfo.isPolymorphic)
+			return i + sclOffset;
+		}
+	for (std::set<NxsDiscreteStateCell>::const_iterator sIt = sset.begin(); sIt != sset.end(); ++sIt)
+		ValidateStateIndex(*sIt);
+	if (!isPolymorphic)
+		{
+		const unsigned nsymbs = (const unsigned)sset.size();
+		if (gapChar != '\0' && nsymbs == GetNumStatesIncludingGap())
+			return NXS_MISSING_CODE;
+		}
+	if (!addToLookup)
+		return NXS_INVALID_STATE_CODE;
+	return AddStateSet(sset, nexusSymbol, true, isPolymorphic);
+	}
+
+/*!
+	Adds a new state set and returns its code.
+	Does NOT check if the state set is present.
+	It is also MANDATORY that this function be called with the fundamental states first (and in order) before
+	 being called with any multi state sets (this is done by RefreshMappings)
+*/
+NxsDiscreteStateCell NxsDiscreteDatatypeMapper::AddStateSet(const std::set<NxsDiscreteStateCell> & states, char nexusSymbol, bool symRespectCase, bool isPolymorphic)
+	{
+	stateIntersectionMatrix.clear();
+	isStateSubsetMatrix.clear();
+	isStateSubsetMatrixGapsMissing.clear();
+
+
+	bool reallyIsPoly = (states.size() > 1 && isPolymorphic);
+	char sym = (symRespectCase ? nexusSymbol : (char) toupper(nexusSymbol));
+	stateSetsVec.push_back(NxsDiscreteStateSetInfo(states, reallyIsPoly, sym));
+	/* if we have gaps, then the sclOffset is -1 and we want to enable
+		stateCodeLookup[-1], so we set stateCodeLookup to &stateSets[1]
+		hence the -sclOffset below
+	*/
+	stateCodeLookupPtr = &stateSetsVec[-sclOffset];
+
+	const NxsDiscreteStateCell stateCode = (const NxsDiscreteStateCell)stateSetsVec.size() + sclOffset - 1;
+	if (nexusSymbol != '\0')
+		{
+		if (symRespectCase)
+			cLookup[(int) nexusSymbol] = stateCode;
+		else
+			{
+			cLookup[(int) tolower(nexusSymbol)] = stateCode;
+			cLookup[(int) toupper(nexusSymbol)] = stateCode;
+			}
+		}
+	return stateCode;
+	}
+
+
+
+/*!
+	Throws an NxsNCLAPIException  if `c` is not a valid index of one of the "fundamental" states for the datatype.
+*/
+void NxsDiscreteDatatypeMapper::ValidateStateIndex(NxsDiscreteStateCell c) const
+	{
+	if (c < NXS_MISSING_CODE)
+		{
+		if (c == NXS_GAP_STATE_CODE)
+			{
+			if (gapChar == '\0')
+				throw NxsNCLAPIException("Illegal usage of NXS_GAP_STATE_CODE in a datatype without gaps");
+			return;
+			}
+		if (c == NXS_INVALID_STATE_CODE)
+			throw NxsNCLAPIException("Illegal usage of NXS_INVALID_STATE_CODE as a state index");
+		throw NxsNCLAPIException("Illegal usage of unknown negative state index");
+		}
+	else if (c >= (NxsDiscreteStateCell) nStates)
+		throw NxsNCLAPIException("Illegal usage of state index >= the number of states");
+	}
+
+/*!
+	Throws an NxsNCLAPIException  if `c` is not a valid state code.
+*/
+void NxsDiscreteDatatypeMapper::ValidateStateCode(NxsDiscreteStateCell c) const
+	{
+	if (c < sclOffset)
+		{
+		if (c == NXS_GAP_STATE_CODE)
+			{
+			if (gapChar == '\0')
+				throw NxsNCLAPIException("Illegal usage of NXS_GAP_STATE_CODE in a datatype without gaps");
+			}
+		if (c == NXS_INVALID_STATE_CODE)
+			throw NxsNCLAPIException("Illegal usage of NXS_INVALID_STATE_CODE as a state code");
+		throw NxsNCLAPIException("Illegal usage of unknown negative state index");
+		}
+	else if (c >= (((NxsDiscreteStateCell) stateSetsVec.size()) + sclOffset))
+	    {
+	    NxsString err = "Illegal usage of state code > the highest state code. c = ";
+	    err << int(c) << " (NxsDiscreteStateCell) stateSetsVec.size() = " << (NxsDiscreteStateCell) stateSetsVec.size();
+	    err << " sclOffset = " << sclOffset;
+	    throw NxsNCLAPIException(err);
+	    }
+	}
+
+
+void NxsDiscreteDatatypeMapper::GenerateNxsExceptionMatrixReading(char const* message, unsigned int taxInd, unsigned int charInd,
+NxsToken* token, const NxsString &nameStr)
+	{
+	NxsString e = "Error reading character ";
+	e << charInd + 1<<" for taxon " << taxInd + 1;
+	if (!nameStr.empty())
+		{
+		NxsString nasn;
+		nasn << taxInd + 1;
+		if (nasn != nameStr)
+			e << " (name \""<< nameStr <<"\")";
+		}
+	e << ":\n" << message;
+	if (token)
+		throw NxsException(e, *token);
+	else
+		throw NxsException(e);
+	}
+
+/*!
+	Returns true if the state code maps to a collection of states that were flagged as polymorphic.
+	generates a NxsNCLAPIException if `c` is not a valid state code
+*/
+bool NxsDiscreteDatatypeMapper::IsPolymorphic(NxsDiscreteStateCell c) const
+	{
+	NCL_ASSERT(stateCodeLookupPtr);
+	ValidateStateCode(c);
+	return stateCodeLookupPtr[c].isPolymorphic;
+	}
+
+
+/*!
+	Returns NXS_INVALID_STATE_CODE or the index of `c` in the symbols list.
+	case-sensitivity is controlled by this->respectCase attribute.
+
+	NOTE: the gap "state" and missing characters are NOT in the symbols list.
+*/
+NxsDiscreteStateCell NxsDiscreteDatatypeMapper::PositionInSymbols(char c) const
+	{
+	NxsDiscreteStateCell p = (NxsDiscreteStateCell)symbols.find(c);
+	if (p >= 0 && p < (NxsDiscreteStateCell) nStates)
+		return p;
+	if (!respectCase)
+		{
+		p = (NxsDiscreteStateCell)lcsymbols.find(c);
+		if (p >= 0 && p < (NxsDiscreteStateCell) nStates)
+			return p;
+		}
+	return NXS_INVALID_STATE_CODE;
+	}
+
+
+/*!
+	Returns the NEXUS reperesenation of the state code `scode` which may be a
+	multiple character string such as {DNY}
+   Generates a NxsNCLAPIException if `c` is not a valid state code.
+	If the string cannot be expressed (insufficient symbols are defined) then
+	`demandSymbols` controls the behavior.  If `demandSymbols` is true than a
+	NxsNCLAPIException is thrown. If `demandSymbols` is false then no output is
+	written.
+*/
+void NxsDiscreteDatatypeMapper::WriteStateCodeAsNexusString(std::ostream & out, NxsDiscreteStateCell scode, bool demandSymbols) const
+	{
+	//out << "WriteStateCodeAsNexusString-debug scode=" << scode<< '\n';
+	ValidateStateCode(scode);
+	const NxsDiscreteStateSetInfo & stateSetInfo =  stateCodeLookupPtr[scode];
+	NCL_ASSERT (&(stateSetsVec.at(scode-sclOffset)) == &stateSetInfo);
+	char c = stateSetInfo.nexusSymbol;
+	if (c != '\0')
+		{
+		out << c;
+		return;
+		}
+	std::string towrite;
+	std::set<NxsDiscreteStateCell>::const_iterator sIt = stateSetInfo.states.begin();
+	const std::set<NxsDiscreteStateCell>::const_iterator endIt = stateSetInfo.states.end();
+	for (; sIt != endIt; ++sIt)
+		{
+		const NxsDiscreteStateCell state = *sIt;
+		const NxsDiscreteStateSetInfo & subStateSetInfo =  stateCodeLookupPtr[state];
+		const char subc = subStateSetInfo.nexusSymbol;
+		if (subc != '\0')
+			towrite.append(1, subc);
+		else if (demandSymbols)
+			{
+			NxsString err("No symbol found for state code ");
+			err << state;
+			throw NxsNCLAPIException(err);
+			}
+		else
+			return;
+		}
+
+	out <<	(stateSetInfo.isPolymorphic ? '(' : '{');
+	out << towrite;
+	out <<	(stateSetInfo.isPolymorphic ? ')' : '}');
+	}
+
+unsigned NxsDiscreteDatatypeMapper::GetNumStatesInStateCode(NxsDiscreteStateCell scode) const
+	{
+	ValidateStateCode(scode);
+	const NxsDiscreteStateSetInfo & stateSetInfo =  stateCodeLookupPtr[scode];
+	return (unsigned)stateSetInfo.states.size();
+	}
+
+void NxsDiscreteDatatypeMapper::WriteStartOfFormatCommand(std::ostream & out) const
+	{
+	out << "    FORMAT Datatype=" << NxsCharactersBlock::GetNameOfDatatype(datatype);
+	if (this->missing != '?')
+		{
+		out << " Missing=";
+		out << this->missing;
+		}
+	if (this->gapChar != '\0')
+		{
+		out << "  Gap=";
+		out << this->gapChar;
+		}
+	if (this->datatype != NxsCharactersBlock::continuous)
+		{
+		unsigned numDefStates = 4;
+		if (this->datatype == NxsCharactersBlock::protein)
+			numDefStates = 21;
+		else if (this->datatype == NxsCharactersBlock::standard)
+			numDefStates = 0;
+		unsigned nSym = (unsigned)this->symbols.length();
+		if (nSym > numDefStates && this->datatype != NxsCharactersBlock::codon)
+			{
+			out << " Symbols=\"";
+			for (unsigned i = numDefStates; i < nSym; ++i)
+				{
+				char c = symbols[i];
+				if (c == '\0')
+					break;
+				out << c;
+				}
+			out <<"\"";
+			}
+		}
+	const std::map<char, NxsString> defEquates = NxsCharactersBlock::GetDefaultEquates(datatype);
+	std::map<char, NxsString> toWrite;
+	const std::map<char, NxsString>::const_iterator notFound = defEquates.end();
+	std::map<char, NxsString>::const_iterator inDefEquates;
+	for (std::map<char, NxsString>::const_iterator i = extraEquates.begin(); i != extraEquates.end(); ++i)
+		{
+		const char key =  (*i).first;
+		const NxsString val =  i->second;
+		inDefEquates = defEquates.find(key);
+		if (inDefEquates == notFound || inDefEquates->second != val)
+			toWrite[key] = val;
+		}
+	if (toWrite.size() > 0)
+		{
+		out << " Equate=\"";
+		for (std::map<char, NxsString>::const_iterator j = toWrite.begin(); j != toWrite.end(); ++j)
+			out << ' ' << j->first << '=' << j->second;
+		out <<"\"";
+		}
+	}
+
+bool NxsCharactersBlock::HandleNextContinuousState(NxsToken &token, unsigned taxNum, unsigned charNum, ContinuousCharRow & row, const NxsString & )
+	{
+	if (interleaving)
+		token.SetLabileFlagBit(NxsToken::newlineIsToken);
+	token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
+	std::vector<double> v;
+	std::vector<int> scored;
+	token.GetNextToken();
+	NxsString t;
+	if (interleaving && token.AtEOL())
+		return false;
+	if (token.Equals("("))
+		{
+		token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
+		token.GetNextToken();
+		while (!token.Equals(")"))
+			{
+			t = token.GetToken();
+			if (t.length() == 1 && (t[0] == missing || t[0] == gap))
+				{
+				v.push_back(DBL_MAX);
+				scored.push_back(0);
+				}
+			else if (t.length() == 1 && t[0] == matchchar)
+				{
+				v.push_back(DBL_MAX);
+				scored.push_back(2);
+				}
+			else if (!t.IsADouble())
+				GenerateUnexpectedTokenNxsException(token, "a number");
+			else
+				{
+				v.push_back(t.ConvertToDouble());
+				scored.push_back(1);
+				}
+			token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
+			token.GetNextToken();
+			}
+		}
+	else
+		{
+		t = token.GetToken();
+		if (t.length() == 1 && (t[0] == missing || t[0] == gap))
+			{
+			v.push_back(DBL_MAX);
+			scored.push_back(0);
+			}
+		else if (t.length() == 1 && t[0] == matchchar)
+			{
+			v.push_back(DBL_MAX);
+			scored.push_back(2);
+			}
+		else if (!t.IsADouble())
+			GenerateUnexpectedTokenNxsException(token, "a number");
+		else
+			{
+			v.push_back(t.ConvertToDouble());
+			scored.push_back(1);
+			}
+		}
+	unsigned n_read = (unsigned)v.size();
+	if (n_read < items.size())
+		{
+		errormsg.clear();
+		errormsg << "For each cell of the MATRIX a value for each of the " << (unsigned)items.size() <<  " ITEMS listed in the FORMAT command is expected.\nOnly " <<  n_read << " values read.";
+		GenerateNxsException(token);
+		}
+	// We've read in the state now, so if this character has been eliminated, we don't want to go any further with it
+	//
+	if (charNum == UINT_MAX)
+		return true;
+
+	if (charNum > row.size())
+		GenerateNxsException(token, "Internal Error: character index out of range in continuousMatrix.");
+
+	ContinuousCharCell & cell = row[charNum];
+	cell.clear();
+
+	std::vector<std::string >::const_iterator itemIt = items.begin();
+	std::string key;
+	unsigned curr_ind_in_v = 0;
+	for (; itemIt != items.end(); ++itemIt, ++curr_ind_in_v)
+		{
+		key = *itemIt;
+		if (scored[curr_ind_in_v] == 1)
+			cell[key] = vector<double>(1, v[curr_ind_in_v]);
+		else if (scored[curr_ind_in_v] == 0)
+			cell[key] = vector<double>();
+		else
+			{
+			if (taxNum == 0)
+				GenerateNxsException(token, "MATCHCHAR cannot be used in the first taxon");
+			const vector<double> & first_taxon_vector = continuousMatrix[0][charNum][key];
+			if (first_taxon_vector.empty())
+				GenerateNxsException(token, "First taxon does not have a value to copy, but a MATCHCHAR was found.");
+			else
+				cell[key] = vector<double>(1, first_taxon_vector[0]);
+			}
+		}
+	unsigned curr_ind_mapped = 1;
+	if (!key.empty() && curr_ind_in_v < n_read)
+		{
+		vector<double> & curr_cell_vector = cell[key];
+		for (; curr_ind_in_v < n_read; ++curr_ind_in_v, ++curr_ind_mapped)
+			{
+			if (scored[curr_ind_in_v] == 1)
+				curr_cell_vector.push_back(v[curr_ind_in_v]);
+			else if (scored[curr_ind_in_v] != 0)
+				curr_cell_vector.push_back(DBL_MAX);
+			else
+				{
+				if (taxNum == 0)
+					GenerateNxsException(token, "MATCHCHAR cannot be used in the first taxon");
+				const vector<double> & first_taxon_vector = continuousMatrix[0][charNum][key];
+				if (first_taxon_vector.size() < curr_ind_mapped+1)
+					GenerateNxsException(token, "First taxon does not have a value to copy, but a MATCHCHAR was found.");
+				else
+					curr_cell_vector.push_back(first_taxon_vector[curr_ind_mapped]);
+				}
+			}
+		}
+	return true;
+	}
+
+NxsDiscreteStateCell NxsDiscreteDatatypeMapper::StateCodeForNexusChar(
+  const char currChar,
+  NxsToken *token,
+  unsigned taxNum,		/* the taxon index, in range [0..`ntax') */
+  unsigned charNum,		/* the character index, in range [0..`nChar') */
+  const NxsDiscreteStateRow * firstTaxonRow,
+  const NxsString & nameStr) const
+  	{
+	NxsDiscreteStateCell currState = cLookup[static_cast<int>(currChar)];
+	if (currState == NXS_INVALID_STATE_CODE)
+		{
+		NxsString emsg;
+		if (currChar == matchChar)
+			{
+			if (firstTaxonRow == NULL)
+				GenerateNxsExceptionMatrixReading("Unexpected use of MatchChar in first taxon with data.", taxNum, charNum, token, nameStr);
+			if (firstTaxonRow->size() <= charNum)
+				{
+				emsg << "MatchChar found for character number "  << charNum+1 << " but the first taxon does not have a character state stored for this character.";
+				GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
+				}
+			currState = (*firstTaxonRow)[charNum];
+			}
+		else
+			{
+			emsg << "Invalid state specified \"" << currChar << "\"";
+			GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
+			}
+		}
+	return currState;
+	}
+
+bool NxsCharactersBlock::HandleNextDiscreteState(
+  NxsToken &token,
+  unsigned taxNum,
+  unsigned charNum,
+  NxsDiscreteStateRow & row,
+  NxsDiscreteDatatypeMapper &mapper,
+  const NxsDiscreteStateRow * firstTaxonRow,
+  const NxsString & nameStr)
+  	{
+  	if (interleaving)
+		token.SetLabileFlagBit(NxsToken::newlineIsToken);
+	NCL_ASSERT(!tokens);
+	token.SetLabileFlagBit(NxsToken::parentheticalToken);
+	token.SetLabileFlagBit(NxsToken::curlyBracketedToken);
+	token.SetLabileFlagBit(NxsToken::singleCharacterToken);
+
+	token.GetNextToken();
+
+	if (interleaving && token.AtEOL())
+		return false;
+	const NxsString &stateAsNexus = token.GetTokenReference();
+	NxsDiscreteStateCell sc =  mapper.EncodeNexusStateString(stateAsNexus, token, taxNum, charNum, firstTaxonRow, nameStr);
+	NCL_ASSERT(charNum < row.size());
+	row[charNum] = sc;
+	return true;
+	}
+
+NxsDiscreteStateCell NxsDiscreteDatatypeMapper::StateCodeForNexusPossibleMultiStateSet(
+  const char nexusSymbol,
+  const std::string &stateAsNexus,
+  NxsToken & token,	/* the token used to read from `in' */
+  const unsigned taxNum,		/* the taxon index, in range [0..`ntax') */
+  const unsigned charNum,		/* the character index, in range [0..`nChar') */
+  const NxsDiscreteStateRow * firstTaxonRow, const NxsString &nameStr)
+	{
+	NCL_ASSERT(stateAsNexus.length() > 0);
+	const char firstChar = stateAsNexus[0];
+	if (firstChar == '(' || firstChar == '{')
+		return StateCodeForNexusMultiStateSet(nexusSymbol, stateAsNexus, &token, taxNum, charNum, firstTaxonRow, nameStr);
+	if (stateAsNexus.length() > 1)
+		{
+		NxsString emsg;
+		emsg << "Expecting  {} or () around a multiple character state set.  Found " << stateAsNexus << " for taxon " << nameStr;
+		GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, &token, nameStr);
+		}
+
+	NxsDiscreteStateCell currState = StateCodeForNexusChar(firstChar, &token, taxNum, charNum, firstTaxonRow, nameStr);
+	cLookup[(int) nexusSymbol] = currState;
+	return currState;
+	}
+
+NxsDiscreteStateCell NxsDiscreteDatatypeMapper::StateCodeForNexusMultiStateSet(
+  const char nexusSymbol,
+  const std::string &stateAsNexus,
+  NxsToken * token,	/* the token used to read from `in' */
+  const unsigned taxNum,		/* the taxon index, in range [0..`ntax') */
+  const unsigned charNum,		/* the character index, in range [0..`nChar') */
+  const NxsDiscreteStateRow * firstTaxonRow,
+  const NxsString &nameStr)
+  	{
+	const char firstChar = stateAsNexus[0];
+	NxsString emsg;
+	const bool poly = (firstChar == '(');
+	if ((!poly) && firstChar != '{')
+		{
+		emsg << "Expecting a state symbol of set of symbols in () or  {} braces.  Found " << stateAsNexus;
+		GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
+		}
+	bool tildeFound = false;
+	NxsDiscreteStateCell prevState = NXS_INVALID_STATE_CODE;
+	char prevChar = firstChar;
+	std::string::const_iterator cIt = stateAsNexus.begin();
+	std::string::const_iterator endIt = stateAsNexus.end();
+	--endIt;
+	NCL_ASSERT((poly && *endIt == ')') || (!poly && *endIt == '}'));
+	std::set<NxsDiscreteStateCell> sset;
+	for (++cIt; cIt != endIt; ++cIt)
+		{
+		const char currChar = *cIt;
+		if ((strchr("\n\r \t", currChar) == NULL) && currChar != ',')
+			{
+			if (currChar == '~')
+				{
+				if (prevState < 0 || prevState >= (NxsDiscreteStateCell)nStates)
+					{
+					emsg << "A state range cannot start with " << prevChar;
+					GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
+					}
+				tildeFound = true;
+				}
+			else
+				{
+				// Add state symbol and record if it is the first or last one in case we encounter a tilde
+				NxsDiscreteStateCell currState;
+				if (tildeFound)
+					{
+					currState = PositionInSymbols(currChar);
+					if (currState == NXS_INVALID_STATE_CODE)
+						{
+						emsg << "A state range cannot end with " << currChar;
+						GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
+						}
+					if (currState < prevState)
+						{
+						emsg << prevChar << '~' << currChar << " is not a valid state range (the end state is a lower index than the start)";
+						GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
+						}
+					for (NxsDiscreteStateCell i = prevState; i <= currState; ++i)
+						sset.insert(i);
+					tildeFound = false;
+					}
+				else
+					{
+					currState = StateCodeForNexusChar(currChar, token, taxNum, charNum, firstTaxonRow, nameStr);
+					sset.insert(currState);
+					}
+				prevState = currState;
+				prevChar = currChar;
+				}
+			}
+		}
+	if (prevChar == '~')
+		{
+		emsg << "State range not terminated -- ending in ~" << *endIt;
+		GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
+		}
+	if (sset.empty())
+		{
+		emsg << "An illegal (empty) state range was found \"" << stateAsNexus << '\"';
+		GenerateNxsExceptionMatrixReading(emsg, taxNum, charNum, token, nameStr);
+		}
+	return StateCodeForStateSet(sset, poly, true, nexusSymbol);
+	}
+
+
+
+/*!
+	Called from HandleNextState to read in the next state when TOKENS was specified. Looks up state in character
+	states listed for the character to make sure it is a valid state, and returns state's value (0, 1, 2, ...). Note:
+	does NOT handle adding the state's value to matrix. Save the return value (call it k) and use the following command
+	to add it to matrix: matrix->AddState(i, j, k);
+*/
+bool NxsCharactersBlock::HandleNextTokenState(
+  NxsToken &token,
+  unsigned taxNum,
+  unsigned charNum,
+  NxsDiscreteStateRow & row,
+  NxsDiscreteDatatypeMapper &mapper,
+  const NxsDiscreteStateRow * firstTaxonRow,
+  const NxsString & nameStr)
+  {
+	if (interleaving)
+		token.SetLabileFlagBit(NxsToken::newlineIsToken);
+	token.GetNextToken();
+	if (interleaving && token.AtEOL())
+		return false;
+	if (token.GetTokenLength() ==  0)
+		GenerateNxsException(token, "Unexpected empty token encountered");
+
+	int polymorphism = token.Equals("(");
+	int uncertainty	 = token.Equals("{");
+	if (!uncertainty && !polymorphism)
+		{
+		row[charNum] = HandleTokenState(token, taxNum, charNum, mapper, firstTaxonRow, nameStr);
+		return true;
+		}
+
+	/*TODO - supporting this requires more bookeeping to allow the mapper to deal with
+	anonymous fundamental states  -- difficult because we don't know the number of symbols in TOKENS mode.*/
+	errormsg = "Currently polymorphism and ambiguity are not supported for matrices in TOKENS mode: ";
+	errormsg << token.GetToken() << " found while reading character " << charNum + 1 << " of taxon \"" << nameStr << '\"';
+	throw NxsException(errormsg, token);
+
+	bool tildeFound = false;
+	NxsDiscreteStateCell prevState = NXS_INVALID_STATE_CODE;
+	std::string prevToken = token.GetToken();
+	std::set<NxsDiscreteStateCell> sset;
+	for (;;)
+		{
+		// OPEN ISSUE: What about newlines if interleaving? I'm assuming
+		// that the newline must come between characters to count.
+
+		token.SetLabileFlagBit(NxsToken::tildeIsPunctuation);
+		token.GetNextToken();
+
+		if (token.Equals(","))
+			{
+			; /*Mesquite exports with , in state sets. We'll ignore the comma*/
+			}
+		if (polymorphism)
+			{
+			if (token.Equals(")"))
+				{
+				if (tildeFound)
+					mapper.GenerateNxsExceptionMatrixReading("Range of states still being specified when ')' encountered", taxNum, charNum, &token, nameStr);
+				break;
+				}
+			if (token.Equals("{"))
+				mapper.GenerateNxsExceptionMatrixReading("Illegal range of states '{' found inside '()'", taxNum, charNum, &token, nameStr);
+
+			}
+		else if (uncertainty)
+			{
+			if (token.Equals("}"))
+				{
+				if (tildeFound)
+					mapper.GenerateNxsExceptionMatrixReading("Range of states still being specified when '}' encountered", taxNum, charNum, &token, nameStr);
+				break;
+				}
+			if (token.Equals("("))
+				mapper.GenerateNxsExceptionMatrixReading("Illegal range of states '(' found inside '{}'", taxNum, charNum, &token, nameStr);
+			}
+		else if (token.Equals("~"))
+			{
+			if (prevState < 0 || prevState >= (NxsDiscreteStateCell)symbols.length())
+				{
+				errormsg.clear();
+				errormsg << "A state range cannot start with " << prevToken;
+				mapper.GenerateNxsExceptionMatrixReading(errormsg, taxNum, charNum, &token, nameStr);
+				}
+			tildeFound = true;
+			}
+		else
+			{
+			NxsDiscreteStateCell currState;
+			if (tildeFound)
+				{
+				currState = HandleTokenState(token, taxNum, charNum, mapper, firstTaxonRow, nameStr);
+				if (currState <= prevState)
+					{
+					errormsg = "Last state in specified range (";
+					errormsg << token.GetToken() << ") must be greater than the first";
+					mapper.GenerateNxsExceptionMatrixReading(errormsg, taxNum, charNum, &token, nameStr);
+					}
+				for (NxsDiscreteStateCell i = prevState; i <= currState; ++i)
+					sset.insert(i);
+				tildeFound = false;
+				}
+			else
+				{
+				// Add current state, then set first to that state's value
+				// State's value is its position within the list of states
+				// for that character
+				//
+				currState = HandleTokenState(token, taxNum, charNum, mapper, firstTaxonRow, nameStr);
+				sset.insert(currState);
+				}
+			prevState = currState;
+			prevToken = token.GetToken();
+			}
+		}
+
+	if (prevToken == "~")
+		{
+		errormsg.clear();
+		errormsg << "State range not terminated -- ending in ~" << token.GetToken();
+		mapper.GenerateNxsExceptionMatrixReading(errormsg, taxNum, charNum, &token, nameStr);
+		}
+	if (sset.empty())
+		{
+		errormsg.clear();
+		errormsg << "An illegal (empty) state range -- either  {} or ()";
+		mapper.GenerateNxsExceptionMatrixReading(errormsg, taxNum, charNum, &token, nameStr);
+		}
+	row[charNum] = mapper.StateCodeForStateSet(sset, (const bool)(polymorphism != 0), true, '\0');
+	return true;
+	}
+
+NxsDiscreteStateCell NxsCharactersBlock::HandleTokenState(
+  NxsToken &token,	/* the token used to read from `in' */
+  unsigned taxNum,
+  unsigned charNum,
+  NxsDiscreteDatatypeMapper &,
+  const NxsDiscreteStateRow * ,
+  const NxsString & nameStr)
+  	{
+	// Token may be one of the character states listed for character charNum in charStates
+	const std::string t = token.GetToken(respectingCase);
+	NxsStringVectorMap::const_iterator bagIter	= charStates.find(charNum);
+	/*
+	 if (bagIter == charStates.end())
+		return mapper.EncodeNexusStateString(t, token, taxNum, charNum, firstTaxonRow);
+	*/
+	NxsStringVector::const_iterator ci_begin	= bagIter->second.begin();
+	NxsStringVector::const_iterator ci_end		= bagIter->second.end();
+	NxsStringVector::const_iterator cit;
+	NxsDiscreteStateCell k = 0;
+	for (; ci_begin != ci_end; ++ci_begin, ++k)
+		{
+		if (respectingCase)
+			{
+			if (*ci_begin == t)
+				return k;
+			}
+		else
+			{
+			if (NxsString::case_insensitive_equals(t.c_str(), ci_begin->c_str()))
+				return k;
+			}
+		}
+	//return mapper.EncodeNexusStateString(t, token, taxNum, charNum, firstTaxonRow);
+	errormsg = "Unrecognized state ";
+	errormsg << t << " found while reading character " << charNum + 1 << " of taxon number " << taxNum + 1;
+	if (!nameStr.empty())
+		errormsg << "(name \"" << nameStr << "\")";
+	throw NxsException(errormsg, token);
+	}
+
+
+unsigned NxsCharactersBlock::GetMaxIndex() const
+	{
+	unsigned nct = GetNCharTotal();
+	if (nct == 0)
+		return UINT_MAX;
+	return nct - 1;
+	}
+
+/*!
+ Returns the number of indices that correspond to the label (and the number
+ of items that would be added to *inds if inds points to an empty set).
+*/
+unsigned NxsCharactersBlock::GetIndicesForLabel(const std::string &label, NxsUnsignedSet *inds) const
+	{
+	NxsString emsg;
+	const unsigned numb = CharLabelToNumber(label);
+	if (numb != 0)
+		{
+		if (inds)
+			inds->insert(numb - 1);
+		return 1;
+		}
+	if (!defCodonPosPartitionName.empty())
+		{
+		std::string t(label);
+		NxsString::to_upper(t);
+		std::string n;
+		if (t == "POS1")
+			n.assign("1");
+		else if (t == "POS2")
+			n.assign("2");
+		else if (t == "POS3")
+			n.assign("3");
+		else if (t == "NONCODING")
+			n.assign("N");
+		if (!n.empty())
+			{
+			NxsPartitionsByName::const_iterator pit = codonPosPartitions.find(defCodonPosPartitionName);
+			if (pit != codonPosPartitions.end())
+				{
+				const NxsPartition & p = pit->second;
+				for (NxsPartition::const_iterator s = p.begin(); s != p.end(); ++s)
+					{
+					if (NxsString::case_insensitive_equals(n.c_str(), s->first.c_str()))
+						{
+						unsigned nel = (unsigned)s->second.size();
+						if (inds)
+							inds->insert(s->second.begin(), s->second.end());
+						return nel;
+						}
+					}
+				}
+			}
+		}
+	if (NxsString::case_insensitive_equals(label.c_str(), "CONSTANT"))
+		{
+		NxsUnsignedSet c;
+		FindConstantCharacters(c);
+		if (inds)
+			inds->insert(c.begin(), c.end());
+		return (unsigned)c.size();
+		}
+	if (NxsString::case_insensitive_equals(label.c_str(), "GAPPED"))
+		{
+		NxsUnsignedSet c;
+		FindGappedCharacters(c);
+		if (inds)
+			inds->insert(c.begin(), c.end());
+		return (unsigned)c.size();
+		}
+	return GetIndicesFromSetOrAsNumber(label, inds, charSets, GetMaxIndex(), "character");
+	}
+
+/*!
+	Returns true if this set replaces an older definition.
+*/
+bool NxsCharactersBlock::AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds)
+	{
+	NxsString ls(label.c_str());
+	bool replaced = charSets.count(ls) > 0;
+	charSets[ls] = inds;
+	return replaced;
+	}
+
+/*!
+	Returns true if this set replaces an older definition.
+*/
+bool NxsCharactersBlock::AddNewPartition(const std::string &label, const NxsPartition & inds)
+	{
+	NxsString ls(label.c_str());
+	ls.ToUpper();
+	bool replaced = charPartitions.count(ls) > 0;
+	charPartitions[ls] = inds;
+	return replaced;
+	}
+
+/*!
+	Returns true if this set replaces an older definition.
+*/
+bool NxsCharactersBlock::AddNewCodonPosPartition(const std::string &label, const NxsPartition & inds, bool isDef)
+	{
+	NxsString ls(label.c_str());
+	ls.ToUpper();
+	bool replaced = codonPosPartitions.count(ls) > 0;
+	codonPosPartitions[ls] = inds;
+	if (isDef)
+		defCodonPosPartitionName = ls;
+	return replaced;
+	}
+
+/*!
+	Returns true if this set replaces an older definition.
+*/
+bool NxsCharactersBlock::AddNewExSet(const std::string &label, const NxsUnsignedSet & inds)
+	{
+	NxsString ls(label.c_str());
+	bool replaced = exSets.count(ls) > 0;
+	exSets[ls] = inds;
+	return replaced;
+	}
+
+/*! In v2.1 of the API, the NxsTaxaBlockAPI and NxsAssumptionsBlockAPI pointers
+	are usually NULL.  These block assignments are made during the parse.
+*/
+NxsCharactersBlock::NxsCharactersBlock(
+  NxsTaxaBlockAPI *tb,			/* the taxa block object to consult for taxon labels (can be 0L)*/
+  NxsAssumptionsBlockAPI *ab)	/* the assumptions block object to consult for exclusion sets (can be 0L) */
+  	:NxsTaxaBlockSurrogate(tb, NULL)
+	{
+	assumptionsBlock = ab;
+	NCL_BLOCKTYPE_ATTR_NAME = "CHARACTERS";
+	supportMixedDatatype = false;
+	convertAugmentedToMixed = false;
+	allowAugmentingOfSequenceSymbols = false;
+	writeInterleaveLen = -1;
+	Reset();
+	}
+/*! Excludes characters whose indices are contained in the set `exset'.
+	\returns number of characters actually excluded (some may have already been excluded).
+*/
+unsigned NxsCharactersBlock::ApplyExset(
+  NxsUnsignedSet &exset)	/* set of character indices to exclude in range [0..`nChar') */
+	{
+	excluded.clear();
+	set_union(eliminated.begin(), eliminated.end(), exset.begin(), exset.end(), inserter(excluded, excluded.begin()));
+	return (unsigned) excluded.size();
+	}
+
+/*! Includes characters whose indices are contained in the set `inset'.
+	\returns number of characters that are included after the operation
+*/
+unsigned NxsCharactersBlock::ApplyIncludeset(
+  NxsUnsignedSet &inset)	/* set of character indices to include in range [0..`nChar') */
+	{
+	NxsUnsignedSet inc(inset);
+	inc.erase(eliminated.begin(), eliminated.end());
+	excluded.erase(inc.begin(), inc.end());
+	return nChar - (unsigned) excluded.size();
+	}
+
+/*! Converts a character label to a 1-offset number corresponding to the character's position based on data from
+	the CharLabels NEXUS command.
+	If `s' is not a valid character label, returns the value 0.
+*/
+unsigned NxsCharactersBlock::CharLabelToNumber(
+  const std::string &inp) const	/* the character label to convert */
+	{
+	NxsString s(inp.c_str());
+	s.ToUpper();
+	std::map<std::string, unsigned>::const_iterator ltindIt = ucCharLabelToIndex.find(s);
+	if (ltindIt == ucCharLabelToIndex.end())
+		return 0;
+	return 1 + ltindIt->second;
+	}
+
+/*!
+	Transfers all data from `other' to this object, leaving `other' completely empty. Used to convert a NxsDataBlock
+	object to a NxsCharactersBlock object in programs where it is desirable to just have a NxsCharactersBlock for
+	storage but also allow users to enter the information in the form of the deprecated NxsDataBlock. This function
+	does not make a copy of such things as the data matrix, instead just transferring the pointer to that object from
+	other to this. This is whay it was named Consume rather than CopyFrom.
+*/
+void NxsCharactersBlock::Consume(
+  NxsCharactersBlock &other)	/* NxsCharactersBlock object from which to copy */
+	{
+	if (assumptionsBlock)
+		assumptionsBlock->SetCallback(NULL);
+	assumptionsBlock = other.assumptionsBlock;
+	other.assumptionsBlock = NULL;
+	if (assumptionsBlock)
+		assumptionsBlock->SetCallback(this);
+
+	nChar = other.nChar;
+	nTaxWithData = other.nTaxWithData;
+	matchchar = other.matchchar;
+	respectingCase = other.respectingCase;
+	transposing = other.transposing;
+	interleaving = other.interleaving;
+	tokens = other.tokens;
+	labels = other.labels;
+	missing = other.missing;
+	gap = other.gap;
+	gapMode = other.gapMode;
+	symbols = other.symbols;
+	userEquates = other.userEquates;
+	defaultEquates = other.defaultEquates;
+	discreteMatrix = other.discreteMatrix;
+	continuousMatrix = other.continuousMatrix;
+	eliminated = other.eliminated;
+	excluded = other.excluded;
+	ucCharLabelToIndex = other.ucCharLabelToIndex;
+	indToCharLabel = other.indToCharLabel;
+	charStates = other.charStates;
+	globalStateLabels = other.globalStateLabels;
+	items = other.items;
+	charSets = other.charSets;
+	charPartitions = other.charPartitions;
+	exSets = other.exSets;
+	datatype = other.datatype;
+	originalDatatype = other.originalDatatype;
+	datatypeReadFromFormat = other.datatypeReadFromFormat;
+	statesFormat = other.statesFormat;
+	datatypeMapperVec = other.datatypeMapperVec;
+	isEmpty = false;
+	isUserSupplied = other.isUserSupplied;
+	supportMixedDatatype = other.supportMixedDatatype;
+	convertAugmentedToMixed = other.convertAugmentedToMixed;
+	allowAugmentingOfSequenceSymbols = other.allowAugmentingOfSequenceSymbols;
+	writeInterleaveLen = other.writeInterleaveLen;
+	other.Reset();
+	transfMgr.Reset();
+	}
+
+
+void NxsCharactersBlock::WriteStatesForTaxonAsNexus(
+  std::ostream &out,			/* output stream on which to print matrix */
+  unsigned taxNum,
+  unsigned beginCharInd,
+  unsigned endCharInd) const	{
+  	NCL_ASSERT(endCharInd <= this->nChar);
+
+	if (datatype == continuous)
+		{
+		const ContinuousCharRow & row = GetContinuousMatrixRow(taxNum);
+		if (!row.empty())
+			{
+			NCL_ASSERT(endCharInd <= row.size());
+			for (unsigned charInd = beginCharInd; charInd < endCharInd; ++charInd)
+				{
+				out << ' ';
+				ShowStateLabels(out, taxNum, charInd, UINT_MAX);
+				}
+			}
+		}
+	else
+		{
+		const NxsDiscreteStateRow & row = GetDiscreteMatrixRow(taxNum);
+		const unsigned rs = (const unsigned)row.size();
+		NCL_ASSERT(endCharInd <= rs);
+		if (rs > 0)
+			{
+			if (this->datatype == NxsCharactersBlock::codon)
+				{
+				for (unsigned charInd = beginCharInd; charInd < endCharInd; ++charInd)
+					{
+					NxsDiscreteStateCell sc = row[charInd];
+					if (sc == NXS_GAP_STATE_CODE)
+						out << gap << gap << gap;
+					else if (sc >= 0 && sc < (NxsDiscreteStateCell) globalStateLabels.size())
+						out << globalStateLabels[sc];
+					else
+						out << missing << missing << missing;
+					}
+				}
+			else
+				{
+				const NxsDiscreteDatatypeMapper * dm = GetDatatypeMapperForChar(0);
+				if (dm == NULL)
+					throw NxsNCLAPIException("No DatatypeMapper in WriteStatesForTaxonAsNexus");
+				if (IsMixedType())
+					{
+
+					for (unsigned charInd = beginCharInd; charInd < endCharInd; ++charInd)
+						{
+						dm = GetDatatypeMapperForChar(charInd);
+						if (dm == NULL)
+							{
+							errormsg = "No DatatypeMapper for character ";
+							errormsg << charInd + 1 << " in WriteStatesForTaxonAsNexus";
+							throw NxsNCLAPIException(errormsg);
+							}
+						const NxsDiscreteStateCell c = row.at(charInd);
+						dm->WriteStateCodeAsNexusString(out, c);
+						}
+					}
+				else
+					{
+					if (tokens)
+						{
+						for (unsigned charInd = beginCharInd; charInd < endCharInd; ++charInd)
+							{
+							NxsDiscreteStateCell sc = row[charInd];
+							out << ' ';
+							if (sc == NXS_GAP_STATE_CODE)
+								out << gap;
+							else
+								{
+								NxsString sl = GetStateLabel(charInd, sc); /*v2.1to2.2 4 */
+								if (sl == " ")
+									{
+									errormsg = "Writing character state ";
+									errormsg << 1 + sc << " for character " << 1+charInd << ", but no appropriate chararcter label or symbol was found.";
+									throw NxsNCLAPIException(errormsg);
+									}
+								else
+									out  << NxsString::GetEscaped(sl);
+								}
+							}
+						}
+					else
+						{
+						std::vector<NxsDiscreteStateCell>::const_iterator endIt = row.begin() + beginCharInd;
+						std::vector<NxsDiscreteStateCell>::const_iterator begIt = endIt;
+						if (endCharInd == row.size())
+							endIt = row.end();
+						else
+							endIt += endCharInd - beginCharInd;
+						dm->WriteStateCodeRowAsNexus(out, begIt, endIt);
+						}
+					}
+				}
+			}
+		}
+	}
+
+
+/*!
+	Provides a dump of the contents of the `matrix' variable. Useful for testing whether data is being read as
+	expected. If marginText is NULL, matrix output is placed flush left. If each line of output should be prefaced with
+	a tab character, specify "\t" for `marginText'.
+*/
+void NxsCharactersBlock::DebugShowMatrix(
+  std::ostream &out,			/* output stream on which to print matrix */
+  bool ,	/* deprecated, matchchar no longer used for output */
+  const char *marginText) const /* for printing first on each line */
+	{
+	if (!taxa)
+		return;
+	const unsigned width = taxa->GetMaxTaxonLabelLength();
+	const unsigned ntt = GetNTaxTotal();
+	for (unsigned i = 0; i < ntt; i++)
+		{
+		bool skip = true;
+		if (datatype == continuous)
+			{
+			const ContinuousCharRow & row = GetContinuousMatrixRow(i);
+			skip = row.empty();
+			}
+		else
+			{
+			const NxsDiscreteStateRow & row = GetDiscreteMatrixRow(i);
+			skip = row.empty();
+			}
+		if (!skip)
+			{
+			if (marginText != NULL)
+				out << marginText;
+			const NxsString currTaxonLabel = taxa->GetTaxonLabel(i); /*v2.1to2.2 4 */
+			out << currTaxonLabel;
+			unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
+			unsigned diff = width - currTaxonLabelLen;
+			std::string spacer(diff+5, ' ');
+			out << spacer;
+			WriteStatesForTaxonAsNexus(out, i, 0, nChar);
+			out << endl;
+			}
+		}
+	}
+
+unsigned NxsCharactersBlock::GetMaxObsNumStates(bool countMissingStates, bool onlyActiveChars) NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	unsigned maxN = 1;
+	for (unsigned j = 0; j < nChar; j++)
+		{
+		if (!onlyActiveChars || IsActiveChar(j))
+			maxN = std::max(maxN, GetObsNumStates(j, countMissingStates));
+		}
+	return maxN;
+	}
+
+/*!
+	Performs a count of the number of active characters.
+*/
+unsigned NxsCharactersBlock::GetNumActiveChar() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	unsigned num_active_char = 0;
+	for (unsigned i = 0; i < nChar; i++)
+		{
+		if (IsActiveChar(i))
+			num_active_char++;
+		}
+	return num_active_char;
+	}
+
+
+
+/* Returns label for character state `j' at character `i', if a label has been specified. If no label was specified,
+	returns string containing a single blank (i.e., " ").
+*/
+NxsString NxsCharactersBlock::GetStateLabelImpl( /*v2.1to2.2 4 */
+  unsigned i,	/* the locus in range [0..`nChar') */
+  unsigned j) const	/* the 0-offset index of the state of interest */
+	{
+	NxsString s = " ";
+	NxsStringVectorMap::const_iterator cib = charStates.find(i);
+	if (cib != charStates.end() && j < static_cast<unsigned>(cib->second.size()))
+		return cib->second[j];
+	if (!globalStateLabels.empty() && (j < globalStateLabels.size()))
+		return globalStateLabels[j];
+	return s;
+	}
+
+
+/*!
+	Returns true if `ch' can be found in the `symbols' array. The value of `respectingCase' is used to determine
+	whether or not the search should be case sensitive. Assumes `symbols' is non-NULL.
+*/
+bool NxsCharactersBlock::IsInSymbols(
+  char ch) NCL_COULD_BE_CONST /* the symbol character to search for */ /*v2.1to2.2 1 */
+	{
+	char char_in_question = (respectingCase ? ch : (char)toupper(ch));
+	for (std::string::const_iterator sIt = symbols.begin(); sIt != symbols.end(); ++sIt)
+		{
+		const char char_in_symbols = (respectingCase ? *sIt : (char)toupper(*sIt));
+		if (char_in_symbols == char_in_question)
+			return true;
+		}
+	return false;
+	}
+
+/*!
+	Called when CHARLABELS command needs to be parsed from within the DIMENSIONS block. Deals with everything after
+	the token CHARLABELS up to and including the semicolon that terminates the CHARLABELS command. If an ELIMINATE
+	command has been processed, labels for eliminated characters will not be stored.
+*/
+void NxsCharactersBlock::HandleCharlabels(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	ucCharLabelToIndex.clear();
+	indToCharLabel.clear();
+	unsigned ind = 0;
+	for (;;)
+		{
+		token.GetNextToken();
+		if (token.Equals(";"))
+			break;
+		else
+			{
+			if (ind >= nChar)
+				GenerateNxsException(token, "Number of character labels exceeds NCHAR specified in DIMENSIONS command");
+			NxsString t = token.GetToken();
+			if (t != " ")
+				{
+				indToCharLabel[ind] = t;
+				t.ToUpper();
+				ucCharLabelToIndex[t] = ind;
+				}
+			ind++;
+			}
+		}
+	}
+
+/*!
+	Called when CHARSTATELABELS command needs to be parsed from within the CHARACTERS block. Deals with everything
+	after the token CHARSTATELABELS up to and including the semicolon that terminates the CHARSTATELABELS command.
+	CharLabels data structures  will store labels only for characters that have not been eliminated, and likewise for
+	`charStates'. Specifically, `charStates[0]' refers to the vector of character state labels for the first
+	non-eliminated character.
+*/
+void NxsCharactersBlock::HandleCharstatelabels(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	unsigned currChar = 0;
+	bool semicolonFoundInInnerLoop = false;
+	bool tokenAlreadyRead = false;
+	bool save = true;
+
+	charStates.clear();
+	ucCharLabelToIndex.clear();
+	indToCharLabel.clear();
+
+	for (;;)
+		{
+		save = true;
+
+		if (semicolonFoundInInnerLoop)
+			break;
+
+		if (tokenAlreadyRead)
+			tokenAlreadyRead = false;
+		else
+			token.GetNextToken();
+
+		if (token.Equals(";"))
+			break;
+
+		// Token should be the character number; create a new association
+		//
+		int sn = -1;
+        try {
+            sn = token.GetToken().ConvertToInt();
+            }
+        catch (NxsString::NxsX_NotANumber &x)
+            {
+            }
+		unsigned n = (unsigned)sn;
+		if (sn < 1 || n > nChar || n <= currChar)
+			{
+			errormsg = "Invalid character number (";
+			errormsg += token.GetToken();
+			errormsg += ") found in CHARSTATELABELS command (either out of range or not interpretable as an integer)";
+			throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+			}
+
+		currChar = n;
+
+		token.GetNextToken();
+
+		// Token should be the character label or / if there is no label
+		//	' ' is a placeholder for no label.
+		//
+		if (save)
+			{
+			NxsString t = token.GetToken();
+			if (t != " " && !token.Equals("/"))
+				{
+				indToCharLabel[currChar - 1] = t;
+				t.ToUpper();
+				ucCharLabelToIndex[t] = currChar - 1;
+				}
+			}
+		if (!token.Equals("/"))
+			token.GetNextToken();
+
+		// Token should be a slash character if state labels were provided for this character; otherwise,
+		// token should be one of the following:
+		// 1) the comma separating information for different characters, in which case we read in the
+		//	  next token (which should be the next character number)
+		// 2) the semicolon indicating the end of the command
+		//
+		if (!token.Equals("/"))
+			{
+			if (!token.Equals(",") && !token.Equals(";"))
+				{
+				errormsg = "Expecting a comma or semicolon here, but found \"";
+				errormsg += token.GetToken();
+				errormsg += "\" instead";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			if (token.Equals(","))
+				token.GetNextToken();
+			tokenAlreadyRead = true;
+			continue;
+			}
+
+		// Now create a new association for the character states list
+
+		for (;;)
+			{
+			token.GetNextToken();
+
+			if (token.Equals(";"))
+				{
+				semicolonFoundInInnerLoop = true;
+				break;
+				}
+
+			if (token.Equals(","))
+				break;
+
+			if (save)
+				{
+				if (datatype == continuous)
+					GenerateNxsException(token, "State Labels cannot be specified when the datatype is continuous");
+
+				// Token should be a character state label; add it to the list
+				NxsString cslabel = token.GetToken();
+				charStates[n - 1].push_back(cslabel);
+				}
+
+			} // inner for (;;) loop (grabbing state labels for character n)
+		} // outer for (;;) loop
+	}
+
+/*!
+	Called when DIMENSIONS command needs to be parsed from within the CHARACTERS block. Deals with everything after
+	the token DIMENSIONS up to and including the semicolon that terminates the DIMENSIONs command. `newtaxaLabel',
+	`ntaxLabel' and `ncharLabel' are simply "NEWTAXA", "NTAX" and "NCHAR" for this class, but may be different for
+	derived classes that use `newtaxa', `ntax' and `nChar' for other things (e.g., ntax is number of populations in
+	an ALLELES block)
+*/
+void NxsCharactersBlock::HandleDimensions(
+  NxsToken &token,			/* the token used to read from `in' */
+  NxsString newtaxaLabel,	/* the label used in data file for `newtaxa' */
+  NxsString ntaxLabel,		/* the label used in data file for `ntax' */
+  NxsString ncharLabel)		/* the label used in data file for `nChar' */
+	{
+	nChar = 0;
+	unsigned ntaxRead = 0;
+	for (;;)
+		{
+		token.GetNextToken();
+		if (token.Equals(newtaxaLabel))
+			newtaxa = true;
+		else if (token.Equals(ntaxLabel))
+			{
+			DemandEquals(token, "after NTAX in DIMENSIONS command");
+			ntaxRead = DemandPositiveInt(token, ntaxLabel.c_str());
+			}
+		else if (token.Equals(ncharLabel))
+			{
+			DemandEquals(token, "in DIMENSIONS command");
+			nChar = DemandPositiveInt(token, ncharLabel.c_str());
+			}
+		else if (token.Equals(";"))
+			break;
+		}
+
+	if (nChar == 0)
+		{
+		errormsg = "DIMENSIONS command must have an NCHAR subcommand .";
+		throw NxsException(errormsg, token);
+		}
+	if (newtaxa)
+		{
+		if (ntaxRead == 0)
+			{
+			errormsg = "DIMENSIONS command must have an NTAX subcommand when the NEWTAXA option is in effect.";
+			throw NxsException(errormsg, token);
+			}
+		AssureTaxaBlock(createImpliedBlock, token, "Dimensions");
+		if (!createImpliedBlock)
+			{
+			taxa->Reset();
+			if (nexusReader)
+				nexusReader->RemoveBlockFromUsedBlockList(taxa);
+			}
+		taxa->SetNtax(ntaxRead);
+		nTaxWithData = ntaxRead;
+		}
+	else
+		{
+		AssureTaxaBlock(false, token, "Dimensions");
+		const unsigned ntaxinblock = taxa->GetNTax();
+		if (ntaxinblock == 0)
+			{
+			errormsg = "A TAXA block must be read before character data, or the DIMENSIONS command must use the NEWTAXA.";
+			throw NxsException(errormsg, token);
+			}
+
+		if (ntaxinblock < ntaxRead)
+			{
+			errormsg = ntaxLabel;
+			errormsg += " in ";
+			errormsg += NCL_BLOCKTYPE_ATTR_NAME;
+			errormsg += " block must be less than or equal to NTAX in TAXA block\nNote: one circumstance that can cause this error is \nforgetting to specify ";
+			errormsg += ntaxLabel;
+			errormsg += " in DIMENSIONS command when \na TAXA block has not been provided";
+			throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+			}
+		nTaxWithData = (ntaxRead == 0 ? ntaxinblock : ntaxRead);
+		}
+	}
+
+/*!
+	Called when ELIMINATE command needs to be parsed from within the CHARACTERS block. Deals with everything after the
+	token ELIMINATE up to and including the semicolon that terminates the ELIMINATE command. Any character numbers
+	or ranges of character numbers specified are stored in the NxsUnsignedSet `eliminated', which remains empty until
+	an ELIMINATE command is processed. Note that like all sets the character ranges are adjusted so that their offset
+	is 0. For example, given "eliminate 4-7;" in the data file, the eliminate array would contain the values 3, 4, 5
+	and 6 (not 4, 5, 6 and 7). It is assumed that the ELIMINATE command comes before character labels and/or character
+	state labels have been specified; an error message is generated if the user attempts to use ELIMINATE after a
+	CHARLABELS, CHARSTATELABELS, or STATELABELS command.
+*/
+void NxsCharactersBlock::HandleEliminate(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	if (!eliminated.empty() && nexusReader)
+		nexusReader->NexusWarnToken("Only one ELIMINATE command should be used in a CHARACTERS or DATA block (it must appear before the MATRIX command).\n   New character eliminations will be added to the previous eliminated characters (the previously eliminated characters will continue to be excluded).", NxsReader::UNCOMMON_SYNTAX_WARNING, token);
+	token.GetNextToken();
+	NxsSetReader::ReadSetDefinition(token, *this, "Character", "Eliminate", &eliminated);
+	NCL_ASSERT(eliminated.size() <= nChar);
+	for (NxsUnsignedSet::const_iterator elIt = eliminated.begin(); elIt != eliminated.end(); ++elIt)
+		excluded.insert(*elIt);
+	}
+
+
+
+/*!
+	Called from HandleMatrix function to read in a standard (i.e., non-transposed) matrix. Interleaving, if
+	applicable, is dealt with herein.
+*/
+void NxsCharactersBlock::HandleStdMatrix(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	NCL_ASSERT(taxa != NULL);
+	unsigned indOfTaxInCommand;
+	unsigned indOfTaxInMemory;
+	unsigned currChar = 0;
+	unsigned firstChar = 0;
+	unsigned lastChar = nChar;
+	unsigned nextFirst = 0;
+	unsigned page = 0;
+	const bool continuousData =  (datatype == NxsCharactersBlock::continuous);
+	const unsigned ntlabels = taxa->GetNumTaxonLabels();
+	errormsg.clear();
+	bool taxaBlockNeedsLabels = (ntlabels == 0);
+	if (!taxaBlockNeedsLabels && ntlabels < nTaxWithData)
+		{
+		errormsg << "Not enough taxlabels are known to read characters for " << nTaxWithData << " taxa in the Matrix command.";
+		throw NxsException(errormsg, token);
+		}
+	ContinuousCharRow emptyContRow;
+	NxsDiscreteStateRow emptyDiscRow;
+	ContinuousCharRow *contRowPtr = NULL;
+	NxsDiscreteStateRow *discRowPtr = NULL;
+	//ContinuousCharRow *ftContRowPtr = NULL;
+	NxsDiscreteStateRow *ftDiscRowPtr = NULL;
+	const bool isContinuous = (datatype == NxsCharactersBlock::continuous);
+	if (isContinuous)
+		emptyContRow.resize(nChar);
+	else
+		emptyDiscRow.assign(nChar, NXS_INVALID_STATE_CODE);
+	std::vector<unsigned> toInMem(nTaxWithData, UINT_MAX);
+	std::vector<unsigned> nCharsRead(nTaxWithData, 0);
+
+	unsigned numSigInts = NxsReader::getNumSignalIntsCaught();
+	const bool checkingSignals = NxsReader::getNCLCatchesSignals();
+	const unsigned MAX_NUM_CHARS_BETWEEN_SIGNAL_CHECKS = 1000;
+	for (; currChar < nChar; page++)
+		{
+		for (indOfTaxInCommand = 0; indOfTaxInCommand < nTaxWithData ; indOfTaxInCommand++)
+			{
+			unsigned numCharsSinceLastSignalCheck = 0;
+			if (checkingSignals && NxsReader::getNumSignalIntsCaught() != numSigInts)
+				{
+				if (datatype == NxsCharactersBlock::continuous)
+					continuousMatrix.clear();
+				else
+					discreteMatrix.clear();
+				throw NxsSignalCanceledParseException("Reading Characters Block");
+				}
+			NxsString nameStr;
+			if (labels)
+				{
+				token.GetNextToken();
+				nameStr = token.GetToken();
+				if (taxaBlockNeedsLabels)
+					{
+					if (taxa->IsAlreadyDefined(nameStr))
+						{
+						errormsg << "Data for this taxon (" << nameStr << ") has already been saved";
+						throw NxsException(errormsg, token);
+						}
+					try {
+						indOfTaxInMemory = taxa->AddTaxonLabel(nameStr);
+						}
+					catch (NxsException &x)
+						{
+						if (nameStr == ";")
+							{
+							errormsg << "Unexpected ; after only " << indOfTaxInCommand << " taxa were read (expecting characters for " << nTaxWithData << " taxa).";
+							throw NxsException(errormsg, token);
+							}
+						x.addPositionInfo(token);
+						throw x;
+						}
+					}
+				else
+					{
+					unsigned numOfTaxInMemory = taxa->TaxLabelToNumber(nameStr);
+					if (numOfTaxInMemory == 0)
+						{
+						if (token.Equals(";"))
+							{
+							if (currChar != nChar)
+								errormsg << "Unexpected ; (after only " << currChar << " characters were read)";
+							else
+								errormsg << "Unexpected ; (after characters were read for only " << indOfTaxInCommand << "out of " << nTaxWithData << " taxa)";
+							}
+						else
+							errormsg << "Could not find taxon named \"" << nameStr << "\" among stored taxon labels";
+						if (currChar > 0)
+							errormsg << "\n   Expecting data for taxon \"" << taxa->GetTaxonLabel(toInMem[indOfTaxInCommand]) << "\"";
+						throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+						}
+					indOfTaxInMemory = numOfTaxInMemory - 1;
+					}
+				}
+			else
+				{
+				indOfTaxInMemory = indOfTaxInCommand;
+				nameStr << (indOfTaxInMemory + 1);
+				}
+			if (page == 0)
+				{
+				if (isContinuous)
+					{
+					NCL_ASSERT(indOfTaxInMemory < continuousMatrix.size());
+					continuousMatrix[indOfTaxInMemory] = emptyContRow;
+					}
+				else
+					{
+					NCL_ASSERT(indOfTaxInMemory < discreteMatrix.size());
+					discreteMatrix[indOfTaxInMemory] = emptyDiscRow;
+					}
+				if (toInMem[indOfTaxInCommand] != UINT_MAX)
+					{
+					errormsg << "Characters for taxon \"" << nameStr << "\" (number " << indOfTaxInMemory + 1 << "and \"" << taxa->GetTaxonLabel(indOfTaxInMemory) << "\" according to the taxa block) have already been stored";
+					throw NxsException(errormsg, token);
+					}
+				toInMem[indOfTaxInCommand] = indOfTaxInMemory;
+				}
+			else
+				{
+				if (toInMem[indOfTaxInCommand] != indOfTaxInMemory)
+					{
+					errormsg << "Ordering of taxa must be identical to that in first interleave page. Taxon \"" << nameStr << "\" was not expected.";
+					throw NxsException(errormsg, token);
+					}
+				}
+
+			if (firstChar > 0 && nCharsRead[indOfTaxInCommand] >= firstChar)
+				{
+				errormsg << "Data for this taxon (" << nameStr << ") have already been saved";
+				throw NxsException(errormsg, token);
+				}
+			if (isContinuous)
+				{
+				contRowPtr = &continuousMatrix[indOfTaxInMemory];
+				//if (ftDiscRowPtr == NULL)
+				//	ftContRowPtr = contRowPtr;
+				}
+			else
+				{
+				discRowPtr = &discreteMatrix[indOfTaxInMemory];
+				if (ftDiscRowPtr == NULL)
+					ftDiscRowPtr = discRowPtr;
+				}
+
+			//******************************************************
+			//******** Beginning of loop through characters ********
+			//******************************************************
+			bool atEOL = false;
+			for (currChar = firstChar; currChar < lastChar; currChar++)
+				{
+				if (checkingSignals)
+					{
+					if (numCharsSinceLastSignalCheck >= MAX_NUM_CHARS_BETWEEN_SIGNAL_CHECKS)
+						{
+						if (NxsReader::getNumSignalIntsCaught() != numSigInts)
+							{
+							if (datatype == NxsCharactersBlock::continuous)
+								continuousMatrix.clear();
+							else
+								discreteMatrix.clear();
+							throw NxsSignalCanceledParseException("Reading Characters Block");
+							}
+						numCharsSinceLastSignalCheck = 0;
+						}
+					else
+						numCharsSinceLastSignalCheck++;
+					}
+
+				NxsDiscreteDatatypeMapper * currMapper =  GetMutableDatatypeMapperForChar(currChar);
+				// atEOL will be false only if a newline character is encountered before character j processed
+				if (continuousData)
+					atEOL = HandleNextContinuousState(token, indOfTaxInMemory, currChar, *contRowPtr, nameStr);
+				else
+					{
+					NCL_ASSERT(currMapper);
+					if (tokens)
+						atEOL = HandleNextTokenState(token, indOfTaxInMemory, currChar, *discRowPtr, *currMapper, ftDiscRowPtr, nameStr);
+					else
+						atEOL = HandleNextDiscreteState(token, indOfTaxInMemory, currChar, *discRowPtr, *currMapper, ftDiscRowPtr, nameStr);
+					}
+				if (interleaving && !atEOL)
+					{
+					if (lastChar < nChar && currChar != lastChar)
+						{
+						errormsg << "Each line within an interleave page must comprise the same number of characters.  Error reading taxon \"" << nameStr << '\"';
+						throw NxsException(errormsg, token);
+						}
+
+					// currChar should be firstChar in next go around
+					nextFirst = currChar;
+
+					// Set lastChar to currChar so that we can check to make sure the remaining lines
+					// in this interleave page end at the same place
+					lastChar = currChar;
+					}
+				}
+			if (lastChar > 0)
+				nCharsRead[indOfTaxInCommand] = lastChar - 1;
+			if (lastChar < nChar && indOfTaxInCommand > 0)
+				{
+				token.SetLabileFlagBit(NxsToken::newlineIsToken);
+				token.GetNextToken();
+				if (!token.AtEOL())
+					{
+					errormsg << "Each line within an interleave page must comprise the same number of characters\n. Expecting the end of a line, but found " << token.GetToken() << " when reading data for taxon \"" << nameStr << '\"';
+					throw NxsException(errormsg, token);
+					}
+				}
+			else
+				{
+				const char nextch = token.PeekAtNextChar();
+				if (indOfTaxInCommand > 0 && (!atEOL) && (strchr(";[\n\r \t", nextch) == NULL) && nexusReader)
+					{
+					errormsg << "Expecting a whitespace character at the end of the characters for taxon \""<< nameStr << "\" but found " << nextch;
+					nexusReader->NexusWarnToken(errormsg, NxsReader::UNCOMMON_SYNTAX_WARNING, token);
+					errormsg.clear();
+					}
+				}
+			}
+		firstChar = nextFirst;
+		lastChar = nChar;
+		taxaBlockNeedsLabels = false; /* taxaBlockNeedsLabels can only be true on the first page */
+		}
+	}
+
+/*!
+	Called from HandleMatrix function to read in a transposed matrix. Interleaving, if applicable, is dealt with herein.
+*/
+void NxsCharactersBlock::HandleTransposedMatrix(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	NCL_ASSERT(taxa);
+	unsigned currTaxon = 0;
+	unsigned firstTaxon = 0;
+	unsigned lastTaxon = nTaxWithData;
+	unsigned nextFirst = 0;
+	unsigned page = 0;
+	const bool continuousData =  (datatype == NxsCharactersBlock::continuous);
+	unsigned indOfCharInCommand, indOfCharInMemory;
+	const bool isContinuous = (datatype == NxsCharactersBlock::continuous);
+
+	if (isContinuous)
+		{
+		ContinuousCharRow emptyContRow(nChar);
+		for (unsigned i = 0; i < nTaxWithData; ++ i)
+			continuousMatrix[i] = emptyContRow;
+		}
+	else
+		{
+		NxsDiscreteStateRow emptyDiscRow(nChar, NXS_INVALID_STATE_CODE);
+		for (unsigned i = 0; i < nTaxWithData; ++ i)
+			discreteMatrix[i] = emptyDiscRow;
+		}
+	vector<unsigned> toInMem(nChar, UINT_MAX);
+	vector<unsigned> nTaxRead(nChar, 0);
+	bool needsCharLabels = indToCharLabel.empty();
+	for (;;	page++)
+		{
+		for (indOfCharInCommand = 0; indOfCharInCommand < nChar; indOfCharInCommand++)
+			{
+			NxsString rawToken;
+			if (labels)
+				{
+				token.GetNextToken();
+				if (needsCharLabels)
+					{
+					rawToken = token.GetToken();
+					NxsString s = rawToken;
+					s.ToUpper();
+					if (ucCharLabelToIndex.count(s) > 0)
+						{
+						errormsg << "Data for this character (" << token.GetToken() << ") has already been saved";
+						throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+						}
+					ucCharLabelToIndex[s] = indOfCharInCommand;
+					indToCharLabel[indOfCharInCommand] = rawToken;
+					indOfCharInMemory = indOfCharInCommand;
+					}
+				else // either not first interleaved page or character labels not previously defined
+					{
+					rawToken = token.GetToken();
+					NxsString s = rawToken;
+					s.ToUpper();
+					LabelToIndexMap::const_iterator iter = ucCharLabelToIndex.find(s);
+					if (iter == ucCharLabelToIndex.end())
+						{
+						errormsg << "Could not find character named " << token.GetToken() <<  " among stored character labels";
+						throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+						}
+					indOfCharInMemory = iter->second;
+					}
+				}
+			else
+				indOfCharInMemory = indOfCharInCommand;
+
+			if (page == 0)
+				{
+				if (toInMem[indOfCharInCommand] != UINT_MAX)
+					{
+					errormsg << "States for character " << indOfCharInCommand;
+					if (!rawToken.empty())
+						errormsg << " (" << rawToken << ") ";
+					errormsg << "have already been stored";
+					throw NxsException(errormsg, token);
+					}
+				toInMem[indOfCharInCommand] = indOfCharInMemory;
+				}
+			else
+				{
+				if (toInMem[indOfCharInCommand] != indOfCharInMemory)
+					{
+					errormsg << "The order of characters must be in the same order in each page of the interleaved matrix. Character " << rawToken << " was unexpected.";
+					throw NxsException(errormsg, token);
+					}
+				}
+			if (firstTaxon > 0 && nTaxRead[indOfCharInCommand] >= firstTaxon)
+				{
+				errormsg << "Data for this character ";
+				if (!rawToken.empty())
+					errormsg << '(' << rawToken << ") ";
+				errormsg << "has already been saved";
+				throw NxsException(errormsg, token);
+				}
+
+			NxsDiscreteDatatypeMapper * currMapper =  GetMutableDatatypeMapperForChar(indOfCharInMemory);
+
+			for (currTaxon = firstTaxon; currTaxon < lastTaxon; currTaxon++)
+				{
+				bool atEOL = false;
+				NxsString nameStr;
+				nameStr << 1+currTaxon;
+				if (continuousData)
+					{
+					ContinuousCharRow *contRowPtr = &continuousMatrix[currTaxon];
+					atEOL = HandleNextContinuousState(token, currTaxon, indOfCharInMemory, *contRowPtr, nameStr);
+					}
+				else
+					{
+					NxsDiscreteStateRow *discRowPtr = &discreteMatrix[currTaxon];
+					if (tokens)
+						atEOL = HandleNextTokenState(token,  currTaxon, indOfCharInMemory, *discRowPtr, *currMapper, NULL, nameStr);
+					else
+						atEOL = HandleNextDiscreteState(token, currTaxon, indOfCharInMemory, *discRowPtr, *currMapper, NULL, nameStr);
+					}
+				if (interleaving && !atEOL)
+					{
+					if (lastTaxon < nTaxWithData && currTaxon != lastTaxon)
+						GenerateNxsException(token, "Each line within an interleave page must comprise the same number of taxa");
+
+					// currTaxon should be firstChar in next go around
+					nextFirst = currTaxon;
+
+					// Set lastTaxon to currTaxon so that we can check to make sure the
+					// remaining lines in this interleave page end at the same place
+					lastTaxon = currTaxon;
+					}
+				}
+			if (currTaxon > 0)
+				nTaxRead[indOfCharInCommand] = currTaxon - 1;
+			if (lastTaxon < nTaxWithData && indOfCharInCommand > 0)
+				{
+				token.SetLabileFlagBit(NxsToken::newlineIsToken);
+				token.GetNextToken();
+				if (!token.AtEOL())
+					{
+					errormsg = "Each line within an interleave page must comprise the same number of taxa\n.";
+					errormsg << "Expecting the end of a line, but found " << token.GetToken();
+					throw NxsException(errormsg, token);
+					}
+				}
+			}
+		firstTaxon = nextFirst;
+		lastTaxon = nTaxWithData;
+		if (currTaxon == nTaxWithData)
+			break;
+		needsCharLabels = false;
+		}
+	}
+
+/*!
+	Called when MATRIX command needs to be parsed from within the CHARACTERS block. Deals with everything after the
+	token MATRIX up to and including the semicolon that terminates the MATRIX command.
+*/
+void NxsCharactersBlock::HandleMatrix(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	const NxsPartition dtParts;
+	const std::vector<DataTypesEnum> dtv;
+	if (datatypeMapperVec.empty())
+		CreateDatatypeMapperObjects(dtParts, dtv);
+	if (taxa == NULL)
+		AssureTaxaBlock(false, token, "Matrix");
+
+	if (tokens && GetDataType() == standard)
+		{
+		/* we can run into trouble here because the number of states can be larger than the
+		symbols list in the NxsDiscreteDatatypeMapper object (because CharState labels can be
+		used in a matrix, and symbols don't have to be introduced for each character).
+
+		We deal with that here, by introducing \0 symbols
+		*/
+		const unsigned nStatesWSymbols = (const unsigned)symbols.length();
+		unsigned nStatesTotal = nStatesWSymbols;
+		for (NxsStringVectorMap::const_iterator cib = this->charStates.begin(); cib != this->charStates.end(); ++cib)
+			{
+			const NxsStringVector & stateLabelsVec = cib->second;
+			const unsigned ns = (unsigned)stateLabelsVec.size();
+			if (ns > nStatesTotal)
+				nStatesTotal = ns;
+			}
+		if (nStatesTotal > nStatesWSymbols)
+			{
+			symbols.append(nStatesTotal-nStatesWSymbols, '\0');
+			CreateDatatypeMapperObjects(dtParts, dtv);
+			}
+		}
+	const unsigned ntax = taxa->GetNTax();
+	if (ntax == 0)
+		{
+		errormsg = "Must precede ";
+		errormsg << NCL_BLOCKTYPE_ATTR_NAME << " block with a TAXA block or specify NEWTAXA and NTAX in the DIMENSIONS command";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+
+	discreteMatrix.clear();
+	continuousMatrix.clear();
+
+	if (datatype == NxsCharactersBlock::continuous)
+		{
+		continuousMatrix.clear();
+		continuousMatrix.resize(ntax);
+		}
+	else
+		{
+		discreteMatrix.clear();
+		discreteMatrix.resize(ntax);
+		}
+	if (IsMixedType())
+		{
+		if (transposing)
+			throw NxsUnimplementedException("Reading of transposed, mixed datatype matrices will probably never be supported by NCL");
+		/*	HandleMixedDatatypeMatrix(token); */
+		}
+	if (transposing)
+		HandleTransposedMatrix(token);
+	else
+		HandleStdMatrix(token);
+	DemandEndSemicolon(token, "MATRIX");
+	if (assumptionsBlock)
+		assumptionsBlock->SetCallback(this);
+	if (convertAugmentedToMixed)
+		AugmentedSymbolsToMixed();
+	}
+
+/*!
+	Called when STATELABELS command needs to be parsed from within the DIMENSIONS block. Deals with everything after
+	the token STATELABELS up to and including the semicolon that terminates the STATELABELS command. Note that the
+	numbers of states are shifted back one before being stored so that the character numbers in the NxsStringVectorMap
+	objects are 0-offset rather than being 1-offset as in the NxsReader data file.
+*/
+void NxsCharactersBlock::HandleStatelabels(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	if (datatype == continuous)
+		GenerateNxsException(token, "STATELABELS cannot be specified when the datatype is continuous");
+	charStates.clear();
+	for (;;)
+		{
+		token.GetNextToken();
+		if (token.Equals(";"))
+			break;
+        int n = -1;
+        try {
+            n = token.GetToken().ConvertToInt();
+            }
+        catch (NxsString::NxsX_NotANumber &x)
+            {
+            }
+		if (n < 1 || n > (int)nChar)
+			{
+			errormsg = "Invalid character number (";
+			errormsg << token.GetToken() << ") found in STATELABELS command (either out of range or not interpretable as an integer)";
+			throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+			}
+		NxsStringVector & v = charStates[n - 1];
+		for (;;)
+			{
+			token.GetNextToken();
+			if (token.Equals(";") || token.Equals(","))
+				break;
+			v.push_back(token.GetToken());
+			}
+		}
+	}
+
+/*!
+	This function provides the ability to read everything following the block name (which is read by the NxsReader
+	object) to the END or ENDBLOCK statement. Characters are read from the input stream `in'. Overrides the abstract
+	virtual function in the base class.
+*/
+void NxsCharactersBlock::Read(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	isEmpty = false;
+	isUserSupplied = true;
+
+	NxsString s;
+	s = "BEGIN ";
+	s += NCL_BLOCKTYPE_ATTR_NAME;
+	DemandEndSemicolon(token, s.c_str());
+	nTaxWithData = 0;
+
+	for (;;)
+		{
+		token.GetNextToken();
+		NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
+		if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
+			{
+			if (discreteMatrix.empty() && continuousMatrix.empty())
+				{
+				errormsg.clear();
+				errormsg << "\nA " << NCL_BLOCKTYPE_ATTR_NAME << " block must contain a Matrix command";
+				throw NxsException(errormsg, token);
+				}
+			return;
+			}
+		if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
+			{
+			if (token.Equals("DIMENSIONS"))
+				HandleDimensions(token, "NEWTAXA", "NTAX", "NCHAR");
+			else if (token.Equals("FORMAT"))
+				HandleFormat(token);
+			else if (token.Equals("ELIMINATE"))
+				HandleEliminate(token);
+			else if (token.Equals("TAXLABELS"))
+				HandleTaxLabels(token);
+			else if (token.Equals("CHARSTATELABELS"))
+				HandleCharstatelabels(token);
+			else if (token.Equals("CHARLABELS"))
+				HandleCharlabels(token);
+			else if (token.Equals("STATELABELS"))
+				HandleStatelabels(token);
+			else if (token.Equals("MATRIX"))
+				HandleMatrix(token);
+			else
+				SkipCommand(token);
+			}
+		}
+	}
+
+/*!
+	This function outputs a brief report of the contents of this CHARACTERS block. Overrides the abstract virtual
+	function in the base class.
+*/
+void NxsCharactersBlock::Report(
+  std::ostream &out) NCL_COULD_BE_CONST  /* the output stream to which to write the report */ /*v2.1to2.2 1 */
+	{
+	out << '\n' << NCL_BLOCKTYPE_ATTR_NAME << " block contains ";
+	if (nTaxWithData == 0)
+		out << "no taxa";
+	else if (nTaxWithData == 1)
+		out << "one taxon";
+	else
+		out << nTaxWithData << " taxa";
+	out << " and ";
+	if (nChar == 0)
+		out << "no characters";
+	else if (nChar == 1)
+		out << "one character";
+	else
+		out << nChar << " characters";
+	out << endl;
+
+	out << "  Data type is \"" << this->GetDatatypeName() << "\"" << endl;
+
+	if (respectingCase)
+		out << "  Respecting case" << endl;
+	else
+		out << "  Ignoring case" << endl;
+
+	if (tokens)
+		out << "  Multicharacter tokens allowed in data matrix" << endl;
+	else
+		out << "  Data matrix entries are expected to be single symbols" << endl;
+
+	if (labels && transposing)
+		out << "  Character labels are expected on left side of matrix" << endl;
+	else if (labels && !transposing)
+		out << "  Taxon labels are expected on left side of matrix" << endl;
+	else
+		out << "  No labels are expected on left side of matrix" << endl;
+
+	if (!indToCharLabel.empty())
+		{
+		out << "  Character and character state labels:" << endl;
+		for (unsigned k = 0; k < nChar; k++)
+			{
+			const std::map<unsigned, std::string>::const_iterator toLit = indToCharLabel.find(k);
+			const unsigned kNum = 1 + k;
+			if (toLit == indToCharLabel.end())
+				out << "    " << kNum << "    (no label provided for this character)" << endl;
+			else
+				out << "    " << kNum << "    " << toLit->second << endl;
+
+			// Output state labels if any are defined for this character
+			//
+			NxsStringVectorMap::const_iterator cib = charStates.find(k);
+			if (cib != charStates.end())
+				{
+				int ns = (int)cib->second.size();
+				for (int m = 0; m < ns; m++)
+					out << "        " << cib->second[m] << endl;
+				}
+			}
+		}
+
+	if (transposing && interleaving)
+		out << "  Matrix transposed and interleaved" << endl;
+	else if (transposing && !interleaving)
+		out << "  Matrix transposed but not interleaved" << endl;
+	else if (!transposing && interleaving)
+		out << "  Matrix interleaved but not transposed" << endl;
+	else
+		out << "  Matrix neither transposed nor interleaved" << endl;
+
+	out << "  Missing data symbol is '" << missing << '\'' << endl;
+
+	if (matchchar != '\0')
+		out << "  Match character is '" << matchchar << '\'' << endl;
+	else
+		out << "  No match character specified" << endl;
+
+	if (gap != '\0')
+		out << "  Gap character specified is '" << gap << '\'' << endl;
+	else
+		out << "  No gap character specified" << endl;
+
+	out << "  Valid symbols are: " << symbols << endl;
+
+	int numEquateMacros = (int)(userEquates.size() + defaultEquates.size());
+	if (numEquateMacros > 0)
+		{
+		out << "  Equate macros in effect:" << endl;
+		std::map<char, NxsString>::const_iterator i = defaultEquates.begin();
+		for (; i != defaultEquates.end(); ++i)
+			{
+			out << "   " << (*i).first << " = " << i->second << endl;
+			}
+		i = userEquates.begin();
+		for (; i != userEquates.end(); ++i)
+			{
+			out << "   " << (*i).first << " = " << i->second << endl;
+			}
+		}
+	else
+		out << "  No equate macros have been defined" << endl;
+
+	if (eliminated.empty())
+		out << "  No characters were eliminated" << endl;
+	else
+		{
+		out << "  The following characters were eliminated:" << endl;
+		NxsUnsignedSet::const_iterator k;
+		for (k = eliminated.begin(); k != eliminated.end(); k++)
+			{
+			out << "   " << ((*k)+1) << endl;
+			}
+		}
+
+
+	if (excluded.empty())
+		out << "  no characters excluded" << endl;
+	else
+		{
+		out << "  The following characters have been excluded:\n";
+		for (NxsUnsignedSet::const_iterator eIt = excluded.begin(); eIt != excluded.end(); ++eIt)
+			out << "   " << (*eIt+1) << endl;
+		}
+	out << "  Data matrix:" << endl;
+	DebugShowMatrix(out, false, "    ");
+	}
+
+void NxsCharactersBlock::WriteAsNexus(std::ostream &out) const
+	{
+	out << "BEGIN CHARACTERS;\n";
+	WriteBasicBlockCommands(out);
+	out << "    DIMENSIONS";
+	if (this->taxa)
+		{
+		const unsigned wod = GetNTaxWithData();
+		if (wod > 0)
+			{
+			const unsigned tnt = taxa->GetNTax();
+			if (wod != tnt)
+				out << " NTax=" << wod;
+			}
+		}
+	const unsigned multiplier = (this->datatype == NxsCharactersBlock::codon ? 3 : 1);
+	out << " NChar=" << multiplier*(this->nChar) << ";\n";
+	this->WriteEliminateCommand(out);
+	this->WriteFormatCommand(out);
+	this->WriteCharStateLabelsCommand(out);
+	this->WriteMatrixCommand(out);
+	WriteSkippedCommands(out);
+	out << "END;\n";
+	}
+
+
+void NxsCharactersBlock::WriteEliminateCommand(
+  std::ostream &out) const /* output stream on which to print matrix */
+	{
+	if (eliminated.empty())
+		return;
+	out << "    ELIMINATE";
+	for (NxsUnsignedSet::const_iterator u = this->eliminated.begin(); u != this->eliminated.end(); ++u)
+		out << ' ' << (1 + *u);
+	out << ";\n";
+	}
+
+
+void NxsCharactersBlock::WriteMatrixCommand(
+  std::ostream &out) const /* output stream on which to print matrix */
+	{
+	if (taxa == NULL)
+		return;
+	unsigned width = taxa->GetMaxTaxonLabelLength();
+	const unsigned ntaxTotal = taxa->GetNTax();
+	out << "Matrix\n";
+	int prec = 6;
+	if (datatype == continuous)
+		prec = (int)out.precision(10);
+	unsigned stride = (this->writeInterleaveLen < 1 ? this->nChar : this->writeInterleaveLen);
+	unsigned begChar = 0;
+	while (begChar < this->nChar)
+		{
+		if (begChar > 0)
+			out << '\n';
+		unsigned endChar  = std::min(begChar + stride, this->nChar);
+		for (unsigned i = 0; i < ntaxTotal; i++)
+			{
+			if (this->TaxonIndHasData(i))
+				{
+				const std::string currTaxonLabel = NxsString::GetEscaped(taxa->GetTaxonLabel(i));
+				out << currTaxonLabel;
+				unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
+				unsigned diff = width - currTaxonLabelLen;
+				for (unsigned k = 0; k < diff+5; k++)
+					out << ' ';
+
+				WriteStatesForMatrixRow(out, i, UINT_MAX, begChar, endChar);
+				out << '\n';
+				}
+			}
+		begChar = endChar;
+		}
+	out << ";\n";
+	if (datatype == continuous)
+		out.precision(prec);
+	}
+
+std::string NxsCharactersBlock::GetMatrixRowAsStr(const unsigned rowIndex) const /* output stream on which to print matrix */
+	{
+	if (!this->TaxonIndHasData(rowIndex))
+		return std::string();
+	std::ostringstream o;
+	WriteStatesForMatrixRow(o, rowIndex, UINT_MAX, 0, this->nChar);
+	return o.str();
+	}
+
+void NxsCharactersBlock::WriteStatesForMatrixRow(
+  std::ostream &out,				/* the output stream on which to write */
+  unsigned currTaxonIndex,	/* the taxon, in range [0..`ntax') */
+  unsigned ,
+  unsigned beginChar,
+  unsigned endChar) const		/* the index of the first taxon (if UINT_MAX, don't use matchchar) */
+	{
+	WriteStatesForTaxonAsNexus(out, currTaxonIndex, beginChar, endChar);
+	}
+
+
+void NxsCharactersBlock::WriteCharLabelsCommand(std::ostream &out) const
+	{
+	if (indToCharLabel.empty())
+		return;
+	out << "    CHARLABELS";
+	std::map<unsigned, std::string>::const_iterator resultSearchIt;
+	const std::map<unsigned, std::string>::const_iterator endIt = indToCharLabel.end();
+	unsigned emptyLabelsToWrite = 0;
+	for (unsigned oit = 0; oit < nChar; ++oit)
+		{
+		resultSearchIt = indToCharLabel.find(oit);
+		if (resultSearchIt == endIt)
+			emptyLabelsToWrite++;
+		else
+			{
+			for (unsigned j = 0; j < emptyLabelsToWrite; ++j)
+				out << " _";
+			emptyLabelsToWrite = 0;
+			out << ' ' << NxsString::GetEscaped(resultSearchIt->second);
+			}
+		}
+	out << ";\n";
+	}
+
+void NxsCharactersBlock::WriteCharStateLabelsCommand(std::ostream &out) const
+	{
+	if (charStates.empty())
+		{
+		this->WriteCharLabelsCommand(out);
+		return;
+		}
+	const NxsString mtString;
+	bool isFirst = true;
+	std::map<unsigned, std::string>::const_iterator resultSearchIt;
+	const std::map<unsigned, std::string>::const_iterator endIt = indToCharLabel.end();
+	const NxsStringVectorMap::const_iterator endCSIt = this->charStates.end();
+	for (unsigned oit = 0; oit < nChar; ++oit)
+		{
+		resultSearchIt = indToCharLabel.find(oit);
+		NxsString escapedCLabel;
+		if (resultSearchIt != endIt)
+			escapedCLabel = NxsString::GetEscaped(resultSearchIt->second).c_str();
+		const NxsStringVectorMap::const_iterator cib = this->charStates.find(oit);
+		if (isFirst)
+			{
+			out << "    CharStateLabels \n      ";
+			isFirst = false;
+			}
+		else
+			out << ",\n      ";
+		out << 1 + oit << ' ';
+		if (cib != endCSIt)
+			{
+			const NxsStringVector & stateLabelsVec = cib->second;
+			unsigned ns = (unsigned)stateLabelsVec.size();
+			if (!escapedCLabel.empty())
+				out << escapedCLabel;
+			out << " / ";
+			for (unsigned m = 0; m < ns; m++)
+				out << " " << NxsString::GetEscaped(stateLabelsVec[m]);
+			}
+		else if (!escapedCLabel.empty())
+			out << escapedCLabel;
+		else out << '/';
+		}
+	out << ";\n";
+	}
+
+void NxsCharactersBlock::WriteFormatCommand(std::ostream &out) const
+	{
+	const NxsDiscreteDatatypeMapper * mapper =  GetDatatypeMapperForChar(0);
+	if (IsMixedType())
+		{
+		out << "    FORMAT Datatype=MIXED(";
+		bool first = true;
+		for (std::vector<DatatypeMapperAndIndexSet>::const_iterator mIt = datatypeMapperVec.begin(); mIt != datatypeMapperVec.end(); ++mIt)
+			{
+			if (first)
+				first = false;
+			else
+				out << ", ";
+			out << GetNameOfDatatype(mIt->first.GetDatatype()) << ':';
+			NxsSetReader::WriteSetAsNexusValue(mIt->second, out);
+			}
+		out << ')';
+		if (this->missing != '?')
+			out << " Missing=" << this->missing;
+		if (this->gap != '\0')
+			out << "  Gap=" << this->gap;
+		}
+	else
+		mapper->WriteStartOfFormatCommand(out);
+
+	if (this->respectingCase)
+		out << " RespectCase";
+
+	if (this->matchchar != '\0')
+		out << "  MatchChar=" << this->matchchar;
+	if (this->datatype == continuous)
+		{
+		out << " Items = (";
+		for (vector<std::string>::const_iterator iIt = items.begin(); iIt != items.end(); ++iIt)
+			out << *iIt << ' ';
+		out << ")";
+		if (this->statesFormat == STATES_PRESENT)
+			out << " StatesFormat=StatesPresent";
+		}
+	else if (this->statesFormat == INDIVIDUALS)
+			out << " StatesFormat=Individuals";
+
+	if (this->tokens && this->datatype != NxsCharactersBlock::continuous) /*TOKENS is the only choice for continuous data*/
+		out << " Tokens";
+	if (this->writeInterleaveLen > 1 && (this->nChar > (unsigned)this->writeInterleaveLen ))
+		{
+		out << " Interleave";
+		}
+	out << ";\n";
+	}
+
+std::map<char, NxsString> NxsCharactersBlock::GetDefaultEquates(DataTypesEnum dt)
+	{
+	std::map<char, NxsString> defEquates;
+	if (dt == NxsCharactersBlock::dna || dt == NxsCharactersBlock::rna || dt == NxsCharactersBlock::nucleotide)
+		{
+		defEquates['R'] = NxsString("{AG}");
+		defEquates['M'] = NxsString("{AC}");
+		defEquates['S'] = NxsString("{CG}");
+		defEquates['V'] = NxsString("{ACG}");
+		if (dt == NxsCharactersBlock::dna || dt == NxsCharactersBlock::nucleotide)
+			{
+			defEquates['Y'] = NxsString("{CT}");
+			defEquates['K'] = NxsString("{GT}");
+			defEquates['W'] = NxsString("{AT}");
+			defEquates['H'] = NxsString("{ACT}");
+			defEquates['B'] = NxsString("{CGT}");
+			defEquates['D'] = NxsString("{AGT}");
+			defEquates['N'] = NxsString("{ACGT}");
+			defEquates['X'] = NxsString("{ACGT}");
+			if (dt == NxsCharactersBlock::nucleotide)
+				defEquates['U'] ='T';
+			}
+		else
+			{
+			defEquates['Y'] = NxsString("{CU}");
+			defEquates['K'] = NxsString("{GU}");
+			defEquates['W'] = NxsString("{AU}");
+			defEquates['H'] = NxsString("{ACU}");
+			defEquates['B'] = NxsString("{CGU}");
+			defEquates['D'] = NxsString("{AGU}");
+			defEquates['N'] = NxsString("{ACGU}");
+			defEquates['X'] = NxsString("{ACGU}");
+			}
+		}
+	else if (dt == NxsCharactersBlock::protein)
+		{
+		defEquates['B'] = NxsString("{DN}");
+		defEquates['Z'] = NxsString("{EQ}");
+		defEquates['X'] = NxsString("{ACDEFGHIKLMNPQRSTVWY*}");
+		}
+	/* molecular datatypes are the only datatypes with default equates and
+		keys of either case are equivalent.
+	*/
+	NxsString upperKeys;
+	for (std::map<char, NxsString>::const_iterator k = defEquates.begin(); k != defEquates.end(); ++k)
+		{
+		upperKeys += k->first;
+		}
+	for (std::string::const_iterator k = upperKeys.begin(); k != upperKeys.end(); ++k)
+		{
+		const char c = *k;
+		const char lc = (char)tolower(c);
+		defEquates[lc] = defEquates[c];
+		}
+
+	return defEquates;
+	}
+
+const char * NxsCharactersBlock::GetNameOfDatatype(DataTypesEnum datatype)
+	{
+	switch(datatype)
+		{
+		case NxsCharactersBlock::codon:
+		case NxsCharactersBlock::dna:
+			return "DNA";
+		case NxsCharactersBlock::rna:
+			return "RNA";
+		case NxsCharactersBlock::nucleotide:
+			return "Nucleotide";
+		case NxsCharactersBlock::protein:
+			return "Protein";
+		case NxsCharactersBlock::continuous:
+			return "Continuous"; // do not change!  phylobase uses this!!!
+		default:
+			return "Standard";
+		}
+	}
+
+/*!
+	Returns NxsCharactersBlock object to the state it was in when first created.
+*/
+void NxsCharactersBlock::Reset()
+	{
+	ResetSurrogate();
+	NxsBlock::Reset();
+	nTaxWithData = 0;
+	nChar = 0;
+	newtaxa				= false;
+	interleaving		= false;
+	transposing			= false;
+	respectingCase		= false;
+	labels				= true;
+	tokens				= false;
+	datatype			= NxsCharactersBlock::standard;
+	originalDatatype	= NxsCharactersBlock::standard;
+	datatypeReadFromFormat = false;
+	missing				= '?';
+	gap					= '\0';
+	gapMode = GAP_MODE_MISSING;
+	matchchar			= '\0';
+	symbols.clear();
+	ResetSymbols();
+
+	ucCharLabelToIndex.clear();
+	indToCharLabel.clear();
+	charSets.clear();
+	charPartitions.clear();
+	codonPosPartitions.clear();
+	defCodonPosPartitionName.clear();
+	exSets.clear();
+	charStates.clear();
+	globalStateLabels.clear();
+	userEquates.clear();
+	defaultEquates.clear();
+	eliminated.clear();
+	datatypeMapperVec.clear();
+	discreteMatrix.clear();
+	continuousMatrix.clear();
+	items = std::vector<std::string>(1, std::string("STATES"));
+	statesFormat = STATES_PRESENT;
+	restrictionDataype = false;
+	}
+
+std::string NxsCharactersBlock::GetDefaultSymbolsForType(NxsCharactersBlock::DataTypesEnum dt)
+	{
+	switch(dt)
+		{
+		case NxsCharactersBlock::nucleotide:
+		case NxsCharactersBlock::dna:
+			return std::string("ACGT");
+		case NxsCharactersBlock::rna:
+			return std::string("ACGU");
+		case NxsCharactersBlock::protein:
+			return std::string("ACDEFGHIKLMNPQRSTVWY*");
+		case NxsCharactersBlock::standard:
+			return std::string("01");
+		default:
+			return std::string();
+
+		}
+	return std::string();
+	}
+/*!
+	Resets standard symbol set after a change in `datatype' is made. Also flushes equates list and installs standard
+	equate macros for the current `datatype'.
+*/
+void NxsCharactersBlock::ResetSymbols()
+	{
+	symbols = GetDefaultSymbolsForType(datatype);
+	userEquates.clear();
+	defaultEquates = GetDefaultEquates(datatype);
+	datatypeMapperVec.clear();
+	}
+
+/*!
+	Looks up the state(s) at row `i', column `charNum' of matrix and writes it (or them) to out. If there is uncertainty or
+	polymorphism, the list of states is surrounded by the appropriate set of symbols (i.e., parentheses for
+	polymorphism, curly brackets for uncertainty). If TOKENS was specified, the output takes the form of the defined
+	state labels; otherwise, the correct symbol is looked up in `symbols' and output.
+*/
+void NxsCharactersBlock::ShowStateLabels(
+  std::ostream &out,				/* the output stream on which to write */
+  unsigned taxInd,				/* the taxon, in range [0..`ntax') */
+  unsigned charInd,				/* the character, in range [0..`nChar') */
+  unsigned ) const		/* the index of the first taxon (if UINT_MAX, don't use matchchar) */
+	{
+	if (datatype == continuous)
+		{
+		const ContinuousCharCell & cell = continuousMatrix.at(taxInd).at(charInd);
+		std::vector<std::string>::const_iterator itemIt = items.begin();
+		bool parensNeeded = items.size() > 1;
+		if (items.size() == 1)
+			{
+			ContinuousCharCell::const_iterator oit = cell.find(*itemIt);
+			if (oit != cell.end() && oit->second.size() > 1)
+				parensNeeded = true;
+			}
+		if (parensNeeded)
+			out	 << '(';
+		for (; itemIt != items.end(); ++itemIt)
+			{
+			ContinuousCharCell::const_iterator cit = cell.find(*itemIt);
+			if (cit == cell.end())
+				out << missing << ' ';
+			else
+				{
+				if (cit->second.empty())
+					out << missing << ' ';
+				else
+					{
+					vector<double>::const_iterator vIt = cit->second.begin();
+					for(; vIt != cit->second.end(); ++vIt)
+						{
+						if (*vIt == DBL_MAX)
+							out << missing << ' ';
+						else
+							out << *vIt << ' ';
+						}
+					}
+				}
+			}
+		if (parensNeeded)
+			out	 << ") ";
+		else
+			out << ' ';
+		return;
+		}
+	const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(charInd);
+	NCL_ASSERT(mapper != NULL);
+	const NxsDiscreteStateCell currStateCode = discreteMatrix.at(taxInd).at(charInd);
+	if (tokens)
+		{
+		out << ' ';
+		if (currStateCode >= 0 && currStateCode < (NxsDiscreteStateCell) mapper->GetNumStates())
+			{
+			NxsStringVectorMap::const_iterator ci = charStates.find(charInd);
+			if (ci != charStates.end() && ((NxsDiscreteStateCell) ci->second.size()) > currStateCode)
+				out << ci->second[currStateCode];
+			else if (currStateCode < 0)
+				{
+				if (currStateCode == NXS_MISSING_CODE)
+					out << this->GetMissingSymbol();
+				else if (currStateCode == NXS_GAP_STATE_CODE)
+					out << this->GetGapSymbol();
+				else
+					out << '_';
+				}
+			else if (globalStateLabels.size() > (unsigned) currStateCode)
+				out << globalStateLabels[currStateCode];
+			else
+				out << '_';
+			return;
+			}
+		}
+	mapper->WriteStateCodeAsNexusString(out, currStateCode);
+	}
+
+/*!
+	Writes out the state (or states) stored in this NxsDiscreteDatum object to the buffer `s' using the symbols array
+	to do the necessary translation of the numeric state values to state symbols. In the case of polymorphism or
+	uncertainty, the list of states will be surrounded by brackets or parentheses (respectively). Assumes `s' is
+	non-NULL and long enough to hold everything printed.
+*/
+void NxsCharactersBlock::WriteStates(
+  NxsDiscreteDatum &d,	/* the datum to be queried */
+  char *s,				/* the buffer to which to print */
+  unsigned slen) NCL_COULD_BE_CONST /* the length of the buffer `s' */ /*v2.1to2.2 1 */
+	{
+	std::ostringstream outs;
+	ShowStates(outs, d.taxInd, d.charInd);
+	std::string sfo = outs.str();
+	if (s == NULL || sfo.length() > slen)
+		throw NxsNCLAPIException("Char buffer too small in NxsCharactersBlock::WriteStates");
+	strcpy(s, sfo.c_str());
+	}
+
+/*!
+	This function is no longer the most efficient way to access parsed data (see notes on NxsCharacterBlock and
+	GetMatrix() and GetMatrixDecoder() methods.
+
+	Returns the number of states for taxon `i', character `j'.
+*/
+unsigned NxsCharactersBlock::GetNumStates(
+  unsigned taxInd,	/* the taxon in range [0..`ntax') */
+  unsigned charInd) NCL_COULD_BE_CONST /* the character in range [0..`nChar') */ /*v2.1to2.2 1 */
+	{
+	const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(charInd);
+	NCL_ASSERT(mapper != NULL);
+	const NxsDiscreteStateCell currStateCode = discreteMatrix.at(taxInd).at(charInd);
+	return mapper->GetNumStatesInStateCode(currStateCode);
+	}
+
+/*! Excludes character with index `i`.
+*/
+void NxsCharactersBlock::ExcludeCharacter(
+  unsigned i)	/* index of character to exclude in range [0..`nChar') */
+	{
+	if (i >= nChar)
+		{
+		errormsg  = "Character index is ExcludeCharacter out-of-range.   Must be < ";
+		errormsg << nChar;
+		throw NxsNCLAPIException(errormsg);
+		}
+	excluded.insert(i);
+	}
+/*! Includes (or "activates") character with index `i`.
+*/
+void NxsCharactersBlock::IncludeCharacter(
+  unsigned i)	/* index of character to include in range [0..`nChar') */
+	{
+	if (i >= nChar)
+		{
+		errormsg  = "Character index is ExcludeCharacter out-of-range.   Must be < ";
+		errormsg << nChar;
+		throw NxsNCLAPIException(errormsg);
+		}
+	excluded.erase(i);
+	}
+
+bool NxsCharactersBlock::IsGapState(
+  unsigned taxInd,	/* the taxon, in range [0..`ntax') */
+  unsigned charInd) NCL_COULD_BE_CONST /* the character, in range [0..`nChar') */ /*v2.1to2.2 1 */
+	{
+	if (this->datatype == continuous)
+		return false;
+	const NxsDiscreteStateRow & row = discreteMatrix.at(taxInd);
+	return (row.size() > charInd && row[charInd] == NXS_GAP_STATE_CODE);
+	}
+
+bool NxsCharactersBlock::IsMissingState(
+  unsigned taxInd,	/* the taxon, in range [0..`ntax') */
+  unsigned charInd) NCL_COULD_BE_CONST /* the character, in range [0..`nChar') */ /*v2.1to2.2 1 */
+	{
+	if (this->datatype == continuous)
+		{
+		return !continuousMatrix.at(taxInd).empty();
+		}
+	const NxsDiscreteStateRow & row = discreteMatrix.at(taxInd);
+	return (row.size() <= charInd || (row[charInd] == NXS_MISSING_CODE));
+	}
+
+
+void NxsCharactersBlock::FindConstantCharacters(NxsUnsignedSet &c) const
+	{
+	vector<NxsDiscreteStateCell> iv;
+	for (unsigned colIndex = 0; colIndex < nChar; ++colIndex)
+		{
+		const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(colIndex);
+		if (mapper == NULL)
+			throw NxsNCLAPIException("No DatatypeMapper in FindConstantCharacters");
+
+		std::set<NxsDiscreteStateCell> intersectionSet = mapper->GetStateSetForCode(NXS_MISSING_CODE);
+		for (NxsDiscreteStateMatrix::const_iterator rowIt = discreteMatrix.begin(); rowIt != discreteMatrix.end(); ++rowIt)
+			{
+			const NxsDiscreteStateRow & row = *rowIt;
+			if (row.size() > colIndex)
+				{
+				const NxsDiscreteStateCell sc = row[colIndex];
+				std::set<NxsDiscreteStateCell> currSet = mapper->GetStateSetForCode(sc);
+				iv.clear();
+				set_intersection(currSet.begin(), currSet.end(), intersectionSet.begin(), intersectionSet.end(), std::back_inserter(iv));
+				intersectionSet.clear();
+				if (iv.empty())
+					break;
+				intersectionSet.insert(iv.begin(), iv.end());
+				}
+			}
+		if (!intersectionSet.empty())
+			c.insert(colIndex);
+		}
+	}
+
+void NxsCharactersBlock::FindGappedCharacters(NxsUnsignedSet &c) const
+	{
+	vector<NxsDiscreteStateCell> iv;
+	for (unsigned colIndex = 0; colIndex < nChar; ++colIndex)
+		{
+		for (NxsDiscreteStateMatrix::const_iterator rowIt = discreteMatrix.begin(); rowIt != discreteMatrix.end(); ++rowIt)
+			{
+			const NxsDiscreteStateRow & row = *rowIt;
+			if (row.size() > colIndex && row[colIndex] == NXS_GAP_STATE_CODE)
+				{
+				c.insert(colIndex);
+				break;
+				}
+			}
+		}
+	}
+
+/* Behaves like GetMaximalStateSetOfColumn except that missing data columns do not increase
+	size of the returned state set.
+	If GapMode is missing, then gaps are not counted.
+*/
+std::set<NxsDiscreteStateCell> NxsCharactersBlock::GetNamedStateSetOfColumn(const unsigned colIndex) const
+	{
+	const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(colIndex);
+	if (mapper == NULL)
+		throw NxsNCLAPIException("No DatatypeMapper in GetNamedStateSetOfColumn");
+
+	std::set<NxsDiscreteStateCell> sset;
+	std::set<NxsDiscreteStateCell> scodes;
+	const unsigned maxnstates = mapper->GetNumStatesIncludingGap();
+	for (NxsDiscreteStateMatrix::const_iterator rowIt = discreteMatrix.begin(); rowIt != discreteMatrix.end(); ++rowIt)
+		{
+		const NxsDiscreteStateRow & row = *rowIt;
+		if (row.size() > colIndex)
+			{
+			const NxsDiscreteStateCell sc = row[colIndex];
+			const bool isIgnoredGap = (sc == NXS_GAP_STATE_CODE) && (this->gapMode == GAP_MODE_MISSING);
+			const bool toBeCounted = !(sc == NXS_MISSING_CODE || isIgnoredGap);
+			if (toBeCounted && scodes.count(sc) == 0)
+				{
+				scodes.insert(sc);
+				const std::set<NxsDiscreteStateCell>	& ts = mapper->GetStateSetForCode(sc);
+				sset.insert(ts.begin(), ts.end());
+				if (sset.size() == maxnstates)
+					break;
+				}
+			}
+		}
+	return sset;
+	}
+/* Returns the union of all states that are consistent with a column */
+std::set<NxsDiscreteStateCell> NxsCharactersBlock::GetMaximalStateSetOfColumn(const unsigned colIndex) const
+	{
+	const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(colIndex);
+	if (mapper == NULL)
+		throw NxsNCLAPIException("No DatatypeMapper in GetMaximalStateSetOfColumn");
+
+	std::set<NxsDiscreteStateCell> sset;
+	std::set<NxsDiscreteStateCell> scodes;
+	const unsigned maxnstates = mapper->GetNumStatesIncludingGap();
+	for (NxsDiscreteStateMatrix::const_iterator rowIt = discreteMatrix.begin(); rowIt != discreteMatrix.end(); ++rowIt)
+		{
+		const NxsDiscreteStateRow & row = *rowIt;
+		if (row.size() > colIndex)
+			{
+			const NxsDiscreteStateCell sc = row[colIndex];
+			if (scodes.count(sc) == 0)
+				{
+				scodes.insert(sc);
+				const std::set<NxsDiscreteStateCell>	& ts = mapper->GetStateSetForCode(sc);
+				sset.insert(ts.begin(), ts.end());
+				if (sset.size() == maxnstates)
+					break;
+				}
+			}
+		}
+	return sset;
+	}
+
+ bool NxsCharactersBlock::IsPolymorphic(
+  unsigned taxInd,	/* the taxon in range [0..`ntax') */
+  unsigned charInd) NCL_COULD_BE_CONST /* the character in range [0..`nChar') */ /*v2.1to2.2 1 */
+	{
+	const NxsDiscreteDatatypeMapper * mapper = GetDatatypeMapperForChar(charInd);
+	NCL_ASSERT(mapper);
+	if (taxInd >= discreteMatrix.size())
+		throw NxsNCLAPIException("Taxon index out of range of NxsCharactersBlock::IsPolymorphic");
+	const NxsDiscreteStateRow & row = discreteMatrix[taxInd];
+	if (row.size() <= charInd)
+		throw NxsNCLAPIException("Character index out of range of NxsCharactersBlock::IsPolymorphic");
+	return mapper->IsPolymorphic(row[charInd]);
+	}
+
+
+/*!
+	Shows the states for taxon `i', character `j', on the stream `out'. Uses `symbols' array to translate the states
+	from the way they are stored (as integers) to the symbol used in the original data matrix. Assumes `i' is in the
+	range [0..`ntax') and `j' is in the range [0..`nChar'). Also assumes `matrix' is non-NULL.
+*/
+void NxsCharactersBlock::ShowStates(
+  std::ostream &out, /* the stream on which to show the state(s) */
+  unsigned taxInd,	/* the (0-offset) index of the taxon in question */
+  unsigned charInd) NCL_COULD_BE_CONST /* the (0-offset) index of the character in question */ /*v2.1to2.2 1 */
+	{
+	bool ft = tokens;
+	tokens = false;
+	ShowStateLabels(out, taxInd, charInd, UINT_MAX);
+	tokens = ft;
+	}
+
+/*---------------------------------------------------------------------------------------
+ Results in aliasing of the taxa, assumptionsBlock blocks!
+*/
+void NxsCharactersBlock::CopyCharactersContents(const NxsCharactersBlock &other)
+	{
+	assumptionsBlock = other.assumptionsBlock;
+	nChar = other.nChar;
+	nTaxWithData = other.nTaxWithData;
+	matchchar = other.matchchar;
+	respectingCase = other.respectingCase;
+	transposing = other.transposing;
+	interleaving = other.interleaving;
+	tokens = other.tokens;
+	labels = other.labels;
+	missing = other.missing;
+	gap = other.gap;
+	gapMode = other.gapMode;
+	symbols = other.symbols;
+	userEquates = other.userEquates;
+	datatypeMapperVec = other.datatypeMapperVec;
+	discreteMatrix = other.discreteMatrix;
+	continuousMatrix = other.continuousMatrix;
+	eliminated = other.eliminated;
+	excluded = other.excluded;
+	ucCharLabelToIndex = other.ucCharLabelToIndex;
+	indToCharLabel = other.indToCharLabel;
+	charStates = other.charStates;
+	globalStateLabels = other.globalStateLabels;
+	items = other.items;
+	charSets = other.charSets;
+	exSets = other.exSets;
+	charPartitions = other.charPartitions;
+	codonPosPartitions = other.codonPosPartitions;
+	defCodonPosPartitionName = other.defCodonPosPartitionName;
+	transfMgr = other.transfMgr;
+	datatype = other.datatype;
+	statesFormat = other.statesFormat;
+	supportMixedDatatype = other.supportMixedDatatype;
+	convertAugmentedToMixed = other.convertAugmentedToMixed;
+	allowAugmentingOfSequenceSymbols = other.allowAugmentingOfSequenceSymbols;
+	restrictionDataype = other.restrictionDataype;
+	writeInterleaveLen = other.writeInterleaveLen;
+	}
+
+
+NxsCharactersBlock *NxsCharactersBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
+	{
+	if (reader == NULL || idneeded != "CHARACTERS")
+		return NULL;
+	NxsCharactersBlock * nb  = new NxsCharactersBlock(NULL, NULL);
+	nb->SetCreateImpliedBlock(true);
+	nb->SetImplementsLinkAPI(true);
+	return nb;
+	}
+
+// returns a vector of vectors of  the states for each state code.
+// 	The second to the last element will be empty to correspond to  NXS_GAP_STATE_CODE = -2
+// 	The last element will be empty to correspond to NXS_MISSING_CODE = -1
+
+std::vector<std::vector<int> > NxsDiscreteDatatypeMapper::GetPythonicStateVectors() const
+	{
+
+	std::vector<std::vector<int> > pv(this->GetNumStateCodes());
+
+	const int endIndex = (((int) stateSetsVec.size()) + sclOffset);
+	for (int i = 0; i < endIndex; ++i)
+		{
+		NxsDiscreteStateRow r = this->GetStateVectorForCode(i);
+		pv[i].reserve(r.size());
+		for (NxsDiscreteStateRow::const_iterator rIt = r.begin(); rIt != r.end(); ++rIt)
+			pv[i].push_back((int)*rIt);
+		}
+	return pv;
+	}
diff --git a/src/nxscxxdiscretematrix.cpp b/src/nxscxxdiscretematrix.cpp
new file mode 100644
index 0000000..7071084
--- /dev/null
+++ b/src/nxscxxdiscretematrix.cpp
@@ -0,0 +1,511 @@
+//	Copyright (C) 2008 Mark Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.1
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <iterator>
+#include "ncl/nxscxxdiscretematrix.h"
+#include "ncl/nxsutilcopy.h"
+#include <cassert>
+using std::string;
+using std::vector;
+using std::cout;
+using std::endl;
+
+/**===========================================================================
+| fills compressedTransposedMatrix and empties patternSet
+|
+| If `originalIndexToCompressed` or `compressedIndexToOriginal` are requested
+|   then the `compressedIndexPattern` mapping must be supplied. `compressedIndexPattern`
+|   must contain pointers to the keys in `patternSet.` Note that these will
+|   be invalid after the call because patternSet will be emptied).
+*/
+void NxsConsumePatternSetToPatternVector(
+  std::set<NxsCharacterPattern> & patternSet, /* INPUT matrix that will hold the compressed columns */
+  std::vector<NxsCharacterPattern> & compressedTransposedMatrix, /* OUTPUT matrix that will hold the compressed columns */
+  const std::vector<const NxsCharacterPattern *> * compressedIndexPattern, /** INPUT This mapping must be provided if either  `originalIndexToCompressed` or `compressedIndexToOriginal` is requested */
+  std::vector<int> * originalIndexToCompressed, /** OUTPUT if not 0L, this will be filled to provide map an index in `mat` to the corresponding index in `compressedTransposedMatrix` (-1 in the vector indicates that the character was not included) */
+  std::vector<std::set<unsigned> > * compressedIndexToOriginal) /** OUTPUT  if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+{
+    const unsigned patternIndexOffset = (unsigned const)compressedTransposedMatrix.size();
+    const unsigned numCompressedPatterns = (unsigned const)patternSet.size();
+    if (originalIndexToCompressed != 0L || compressedIndexToOriginal != 0L)
+        {
+        if (compressedIndexPattern == 0L)
+            throw NxsException("compressedIndexPattern must be provided in ConsumePatternSetToPatternVector if mappings are requested");
+        unsigned patternIndex = 0;
+        for (std::set<NxsCharacterPattern>::iterator pIt = patternSet.begin(); pIt != patternSet.end(); ++pIt, ++patternIndex)
+            {
+            pIt->patternIndex = patternIndex + patternIndexOffset;
+            }
+        if (originalIndexToCompressed)
+            originalIndexToCompressed->resize(compressedIndexPattern->size());
+        if (compressedIndexToOriginal)
+            {
+            compressedIndexToOriginal->clear();
+            compressedIndexToOriginal->resize(numCompressedPatterns);
+            }
+        for (unsigned i = 0; i < compressedIndexPattern->size(); ++ i)
+            {
+            const NxsCharacterPattern * pat = (*compressedIndexPattern)[i];
+            if (pat)
+                {
+                if (originalIndexToCompressed)
+                    (*originalIndexToCompressed)[i] = pat->patternIndex;
+                if (compressedIndexToOriginal)
+                    {
+                    NCL_ASSERT(pat->patternIndex < numCompressedPatterns);
+                    compressedIndexToOriginal->at(pat->patternIndex).insert(i);
+                    }
+                }
+            else
+                {
+                if (originalIndexToCompressed)
+                    (*originalIndexToCompressed)[i] = -1;
+                }
+            }
+        }
+    compressedTransposedMatrix.reserve(numCompressedPatterns);
+    for (std::set<NxsCharacterPattern>::iterator pIt = patternSet.begin(); pIt != patternSet.end();)
+        {
+        compressedTransposedMatrix.push_back(*pIt);
+        std::set<NxsCharacterPattern>::iterator prevIt = pIt++;
+        patternSet.erase(prevIt);
+        }
+    patternSet.clear();
+}
+
+
+unsigned NxsCompressDiscreteMatrix(
+  const NxsCXXDiscreteMatrix & mat,			/**< is the data source */
+  std::set<NxsCharacterPattern> & patternSet, /* matrix that will hold the compressed columns */
+  std::vector<const NxsCharacterPattern *> * compressedIndexPattern, /** if not 0L, this will be filled to provide a map from an index in `compressedTransposedMatrix` to the original character count */
+  const NxsUnsignedSet * taxaToInclude,	/**< if not 0L, this should be  the indices of the taxa in `mat` to include (if 0L all characters will be included). Excluding taxa will result in shorter patterns (the skipped taxa will not be filled with empty codes, instead the taxon indexing will be frameshifted -- the client code must keep track of these frameshifts). */
+  const NxsUnsignedSet * charactersToInclude)
+    {
+    const unsigned origNumPatterns = (unsigned) patternSet.size();
+	unsigned ntax = mat.getNTax();
+	unsigned patternLength = ntax;
+	unsigned nchar = mat.getNChar();
+	if (compressedIndexPattern)
+	    {
+	    compressedIndexPattern->resize(nchar);
+	    }
+	NxsUnsignedSet allTaxaInds;
+	if (taxaToInclude)
+	    {
+	    if (taxaToInclude->empty())
+	        return 0; // might want to warn about this!
+	    const unsigned lastTaxonIndex = *(taxaToInclude->rbegin());
+	    if (lastTaxonIndex >= ntax)
+	        throw NxsException("Taxon index in taxaToInclude argument to NxsCompressDiscreteMatrix is out of range");
+        patternLength -= taxaToInclude->size();
+	    }
+    else
+        {
+        for (unsigned i = 0; i < ntax; ++i)
+            allTaxaInds.insert(i);
+        taxaToInclude = &allTaxaInds;
+        }
+	if (charactersToInclude)
+	    {
+	    if (charactersToInclude->empty())
+	        return 0; // might want to warn about this!
+	    const unsigned lastColumnIndex = *(charactersToInclude->rbegin());
+	    if (lastColumnIndex >= nchar)
+	        throw NxsException("Character index in charactersToInclude argument to NxsCompressDiscreteMatrix is out of range");
+	    }
+
+    // Create actingWeights vector and copy the integer weights from mat into it
+    // If there are no integer weights in mat, copy the floating point weights instead
+    // if floating point weights have been defined
+	const std::vector<int> & iwts = mat.getIntWeightsConst();
+	std::vector<double> actingWeights(nchar, 1.0);
+	//bool weightsSpecified = false;
+	//bool weightsAsInts = false;
+	if (!iwts.empty())
+		{
+		NCL_ASSERT(iwts.size() >= nchar);
+		//weightsSpecified = true;
+		//weightsAsInts = true;
+		for (unsigned j = 0; j < nchar; ++j)
+			actingWeights[j] = (double)iwts.at(j);
+		}
+	else
+		{
+		const std::vector<double> & dwts = mat.getDblWeightsConst();
+		if (!dwts.empty())
+			{
+		//weightsSpecified = true;
+			actingWeights = dwts;
+			NCL_ASSERT(actingWeights.size() == nchar);
+			}
+		}
+
+    // Set corresponding actingWeights elements to zero if any characters have been excluded in mat
+	const NxsUnsignedSet & excl = mat.getExcludedCharIndices();
+	for (NxsUnsignedSet::const_iterator eIt = excl.begin(); eIt != excl.end(); ++eIt)
+		{
+		NCL_ASSERT(*eIt < nchar);
+		actingWeights[*eIt] = 0.0;
+		}
+	const double * wts = &(actingWeights[0]);
+
+	NxsCharacterPattern patternTemp;
+    patternTemp.count = 1;
+	for (unsigned j = 0; j < nchar; ++j)
+		{
+        double patternWeight = wts[j];
+        bool shouldInclude = (charactersToInclude == 0L || (charactersToInclude->find(j) != charactersToInclude->end()));
+        if (patternWeight > 0.0 &&  shouldInclude)
+            {
+            // Build up a vector representing the pattern of state codes at this site
+            patternTemp.stateCodes.clear();
+            patternTemp.stateCodes.reserve(patternLength);
+            patternTemp.sumOfPatternWeights = patternWeight;
+
+            unsigned indexInPattern = 0;
+            for (NxsUnsignedSet::const_iterator taxIndIt = taxaToInclude->begin(); taxIndIt != taxaToInclude->end(); ++taxIndIt, ++indexInPattern)
+                {
+                const unsigned taxonIndex = *taxIndIt;
+                const NxsCDiscreteStateSet * row	= mat.getRow(taxonIndex);
+                const NxsCDiscreteStateSet code = row[j];
+                patternTemp.stateCodes.push_back(code);
+                }
+            NCL_ASSERT(indexInPattern == patternLength);
+
+            std::set<NxsCharacterPattern>::iterator lowBoundLoc = patternSet.lower_bound(patternTemp);
+            if ((lowBoundLoc == patternSet.end()) || (patternTemp < *lowBoundLoc))
+                {
+                std::set<NxsCharacterPattern>::iterator insertedIt = patternSet.insert(lowBoundLoc, patternTemp);
+                if (compressedIndexPattern)
+                    {
+                    const NxsCharacterPattern & patInserted = *insertedIt;
+                    (*compressedIndexPattern)[j] = &patInserted;
+                    }
+                }
+            else
+                {
+                NCL_ASSERT(patternTemp == *lowBoundLoc);
+                lowBoundLoc->sumOfPatternWeights += patternWeight;
+                lowBoundLoc->count += 1;
+                if (compressedIndexPattern)
+                    {
+                    (*compressedIndexPattern)[j] = &(*lowBoundLoc);
+                    }
+                }
+            }
+		}
+	return (unsigned)patternSet.size() - origNumPatterns;
+    }
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	Copies data from `mat' to `pattern_vect' and `pattern_counts'. The `pattern_vect' vector holds the patterns while
+|	`pattern_counts' holds the count of the number of sites having each pattern. Additionally, the vectors
+|	`pattern_to_sites' and `charIndexToPatternIndex' are built: `pattern_to_sites' allows you to get a list of sites
+|	given a specific pattern, and `charIndexToPatternIndex' lets you find the index of a pattern in `pattern_vect' and
+|	`pattern_counts' given an original site index.
+*/
+unsigned NxsCompressDiscreteMatrix(
+  const NxsCXXDiscreteMatrix & mat,
+  std::vector<NxsCharacterPattern> & compressedTransposedMatrix,
+  std::vector<int> * originalIndexToCompressed,
+  std::vector<std::set<unsigned> > * compressedIndexToOriginal,
+  const NxsUnsignedSet * taxaToInclude,
+  const NxsUnsignedSet * charactersToInclude)
+	{
+	std::set<NxsCharacterPattern> patternSet;
+	std::vector<const NxsCharacterPattern *> toPatternMap;
+	std::vector<const NxsCharacterPattern *> *toPatternMapPtr = 0L;
+	if (originalIndexToCompressed != 0L || compressedIndexToOriginal != 0L)
+	    toPatternMapPtr = &toPatternMap;
+
+	NxsCompressDiscreteMatrix(mat, patternSet, toPatternMapPtr, taxaToInclude, charactersToInclude);
+    const unsigned numPatternsAdded = (unsigned const)patternSet.size();
+
+	NxsConsumePatternSetToPatternVector(patternSet, compressedTransposedMatrix, toPatternMapPtr, originalIndexToCompressed, compressedIndexToOriginal);
+	return numPatternsAdded;
+	}
+
+void NxsTransposeCompressedMatrix(
+  const std::vector<NxsCharacterPattern> & compressedTransposedMatrix,
+  ScopedTwoDMatrix<NxsCDiscreteStateSet> & destination,
+  std::vector<unsigned> * patternCounts,
+  std::vector<double> * patternWeights)
+{
+	const unsigned npatterns = (unsigned const)compressedTransposedMatrix.size();
+	if (npatterns == 0)
+	    {
+	    destination.Initialize(0, 0);
+	    return;
+	    }
+	const unsigned ntaxa = (unsigned const)compressedTransposedMatrix[0].stateCodes.size();
+	destination.Initialize(ntaxa, npatterns);
+    NxsCDiscreteStateSet ** matrix = destination.GetAlias();			/** taxa x characters matrix of indices of state sets */
+    if (patternCounts)
+        patternCounts->resize(npatterns);
+    if (patternWeights)
+        patternWeights->resize(npatterns);
+	for (unsigned p = 0; p < npatterns; ++p)
+		{
+		const NxsCharacterPattern & pattern = compressedTransposedMatrix[p];
+		const std::vector<NxsCDiscreteState_t> & states = pattern.stateCodes;
+		for (unsigned t = 0; t < ntaxa; ++t)
+		    matrix[t][p] = states[t];
+        if (patternCounts)
+            (*patternCounts)[p] = pattern.count;
+        if (patternWeights)
+            (*patternWeights)[p] = pattern.sumOfPatternWeights;
+		}
+}
+
+NxsCXXDiscreteMatrix::NxsCXXDiscreteMatrix(const NxsCharactersBlock & cb, bool gapsToMissing, const NxsUnsignedSet * toInclude, bool standardizeCoding)
+	{
+	Initialize(&cb, gapsToMissing, toInclude, standardizeCoding);
+	}
+
+void NxsCXXDiscreteMatrix::Initialize(const NxsCharactersBlock * cb, bool gapsToMissing, const NxsUnsignedSet * toInclude, bool standardizeCoding)
+{
+	this->nativeCMatrix.stateList = 0L;
+	this->nativeCMatrix.stateListPos = 0L;
+	this->nativeCMatrix.matrix = 0L;
+	this->nativeCMatrix.symbolsList = 0L;
+	this->nativeCMatrix.nStates = 0;
+	this->nativeCMatrix.nChar = 0;
+	this->nativeCMatrix.nTax = 0L;
+	this->nativeCMatrix.nObservedStateSets = 0;
+	this->nativeCMatrix.datatype = NxsAltGeneric_Datatype;
+	this->symbolsStringAlias.clear();
+	this->matrixAlias.Initialize(0, 0);
+	this->stateListAlias.clear();
+	this->stateListPosAlias.clear();
+	this->intWts.clear();
+	this->dblWts.clear();
+	this->activeExSet.clear();
+	if (cb == NULL)
+		return;
+	std::vector<const NxsDiscreteDatatypeMapper *> mappers = cb->GetAllDatatypeMappers();
+	if (mappers.empty() || mappers[0] == NULL)
+		throw NxsException("no mappers");
+
+	std::set <const NxsDiscreteDatatypeMapper * > usedMappers;
+	NxsUnsignedSet scratchSet;
+	if (toInclude == 0L)
+		{
+		for (unsigned i = 0; i < cb->GetNChar(); ++i)
+			scratchSet.insert(i);
+		toInclude = & scratchSet;
+	 	}
+	for (NxsUnsignedSet::const_iterator indIt = toInclude->begin(); indIt != toInclude->end(); ++indIt)
+		{
+		unsigned charIndex = *indIt;
+		usedMappers.insert(cb->GetDatatypeMapperForChar(charIndex));
+		}
+
+
+	if (usedMappers.size() > 1)
+		throw NxsException("too many mappers");
+	if (usedMappers.empty())
+		throw NxsException("no mappers - or empty charset");
+
+
+	const NxsDiscreteDatatypeMapper & mapper = **usedMappers.begin();
+	const NxsDiscreteStateMatrix & rawMatrix = cb->GetRawDiscreteMatrixRef();
+
+	NxsCharactersBlock::DataTypesEnum inDatatype = mapper.GetDatatype();
+	if (inDatatype < LowestNxsCDatatype || inDatatype > HighestNxsCDatatype)
+		throw NxsException("Datatype cannot be converted to NxsCDiscreteMatrix");
+	this->nativeCMatrix.datatype = NxsAltDatatypes(inDatatype);
+	this->nativeCMatrix.nStates = mapper.GetNumStates();
+	const std::string fundamentalSymbols = mapper.GetSymbols();
+	const std::string fundamentalSymbolsPlusGaps = mapper.GetSymbolsWithGapChar();
+	const bool hadGaps = !(fundamentalSymbols == fundamentalSymbolsPlusGaps);
+
+	this->symbolsStringAlias = fundamentalSymbols;
+	char missingSym = cb->GetMissingSymbol();
+	const NxsCDiscreteState_t newMissingStateCode = (standardizeCoding ? (NxsCDiscreteState_t) this->nativeCMatrix.nStates : (NxsCDiscreteState_t) NXS_MISSING_CODE);
+	NCL_ASSERT((int)NXS_MISSING_CODE < 0);
+	NCL_ASSERT((int)NXS_GAP_STATE_CODE < 0);
+	NxsDiscreteStateCell sclOffsetV;
+	if (hadGaps)
+		sclOffsetV = std::min((NxsDiscreteStateCell)NXS_GAP_STATE_CODE, (NxsDiscreteStateCell)NXS_MISSING_CODE);
+	else
+		sclOffsetV = NXS_MISSING_CODE;
+	const NxsDiscreteStateCell sclOffset(sclOffsetV);
+
+	const NxsDiscreteStateCell negSCLOffset = -sclOffset;
+	const unsigned nMapperStateCodes = mapper.GetNumStateCodes();
+	const unsigned recodeVecLen = nMapperStateCodes;
+	const unsigned nMapperPosStateCodes = nMapperStateCodes + sclOffset;
+	std::vector<NxsCDiscreteState_t> recodeVec(recodeVecLen + negSCLOffset, -2);
+	NxsCDiscreteState_t * recodeArr = &recodeVec[negSCLOffset];
+
+	if (fundamentalSymbols.length() < this->nativeCMatrix.nStates)
+		throw NxsException("Fundamental states missing from the symbols string");
+	const unsigned nfun_sym = (const unsigned)fundamentalSymbols.length();
+	for (NxsCDiscreteState_t i = 0; i < (NxsCDiscreteState_t)this->nativeCMatrix.nStates; ++i)
+		{
+		if (i < (NxsCDiscreteState_t)nfun_sym && (NxsCDiscreteState_t)fundamentalSymbols[i] == '\0' && mapper.PositionInSymbols(fundamentalSymbols[i]) != (NxsDiscreteStateCell) i)
+			{
+			NCL_ASSERT(i >= (NxsCDiscreteState_t)nfun_sym || fundamentalSymbols[i] == '\0' || mapper.PositionInSymbols(fundamentalSymbols[i]) == (NxsDiscreteStateCell) i);
+			}
+#		if !defined (NDEBUG)
+			const std::set<NxsDiscreteStateCell>	 & ss =  mapper.GetStateSetForCode(i);
+			NCL_ASSERT(ss.size() == 1);
+			NCL_ASSERT(*ss.begin() == i);
+#		endif
+		stateListAlias.push_back(1);
+		stateListAlias.push_back(i);
+		stateListPosAlias.push_back((unsigned) 2*i);
+		recodeArr[i] = i;
+		}
+
+	//NXS_INVALID_STATE_CODE
+
+	if (hadGaps)
+		{
+		if (standardizeCoding)
+		    recodeArr[NXS_GAP_STATE_CODE] = ((hadGaps && gapsToMissing)? newMissingStateCode : -1);
+        else
+		    recodeArr[NXS_GAP_STATE_CODE] = NXS_GAP_STATE_CODE;
+        }
+
+	if (missingSym == '\0')
+		missingSym = (hadGaps ? mapper.GetGapSymbol() : '?');
+	else
+		{
+		NCL_ASSERT(NXS_MISSING_CODE == mapper.GetStateCodeStored(missingSym));
+		}
+	recodeArr[NXS_MISSING_CODE] = newMissingStateCode;
+	const unsigned nCodesInMissing  = this->nativeCMatrix.nStates + (gapsToMissing ?  0 : 1);
+	if (standardizeCoding)
+	    {
+	    this->symbolsStringAlias.append(1, missingSym);
+        stateListPosAlias.push_back(2*this->nativeCMatrix.nStates);
+        stateListAlias.push_back(nCodesInMissing);
+        if (!gapsToMissing)
+            stateListAlias.push_back(-1);
+        for (NxsCDiscreteState_t i = 0; i < (NxsCDiscreteState_t)this->nativeCMatrix.nStates; ++i)
+            stateListAlias.push_back(i);
+        }
+
+	NxsCDiscreteState_t nextStateCode = (standardizeCoding ? (newMissingStateCode + 1) : this->nativeCMatrix.nStates);
+	for (NxsDiscreteStateCell i = (NxsDiscreteStateCell)this->nativeCMatrix.nStates; i < (NxsDiscreteStateCell) nMapperPosStateCodes; ++i)
+		{
+		const std::set<NxsDiscreteStateCell>	 &ss = mapper.GetStateSetForCode( i);
+		const unsigned ns = (const unsigned)ss.size();
+		const bool mapToMissing  = (!mapper.IsPolymorphic(i) && (nCodesInMissing + 1 == ns || nCodesInMissing == ns));
+		if (mapToMissing)
+			recodeArr[i] = newMissingStateCode;
+		else
+			{
+			recodeArr[i] = nextStateCode++;
+			stateListPosAlias.push_back((unsigned)stateListAlias.size());
+			stateListAlias.push_back(ns);
+			for (std::set<NxsDiscreteStateCell>::const_iterator sIt = ss.begin(); sIt != ss.end(); ++sIt)
+				stateListAlias.push_back((NxsCDiscreteState_t) *sIt);
+			std::string stateName = mapper.StateCodeToNexusString(i);
+			if (stateName.length() != 1)
+				this->symbolsStringAlias.append(1, ' ');
+			else
+				this->symbolsStringAlias.append(1, stateName[0]);
+			}
+		}
+	NCL_ASSERT(stateListPosAlias.size() == (unsigned)nextStateCode);
+	NCL_ASSERT(symbolsStringAlias.size() == (unsigned)nextStateCode);
+	this->nativeCMatrix.nObservedStateSets = nextStateCode;
+
+	this->nativeCMatrix.nTax = (unsigned)rawMatrix.size();
+	this->nativeCMatrix.nChar = (this->nativeCMatrix.nTax == 0 ? 0 : toInclude->size());
+	this->matrixAlias.Initialize(this->nativeCMatrix.nTax, this->nativeCMatrix.nChar);
+	nativeCMatrix.matrix = matrixAlias.GetAlias();
+	const unsigned nt = this->nativeCMatrix.nTax;
+	const unsigned nc = this->nativeCMatrix.nChar;
+	for (unsigned r = 0; r < nt; ++r)
+		{
+		NxsCDiscreteStateSet	 * recodedRow = nativeCMatrix.matrix[r];
+		const std::vector<NxsDiscreteStateCell> & rawRowVec = rawMatrix[r];
+		if (rawRowVec.empty())
+			{
+			NxsCDiscreteState_t recodedMissing = recodeArr[NXS_MISSING_CODE];
+			for (unsigned c = 0; c < nc; ++c)
+				*recodedRow++ = recodedMissing;
+			}
+		else
+			{
+			NCL_ASSERT(rawRowVec.size() >= nc);
+			const NxsDiscreteStateCell * rawRow = &rawRowVec[0];
+		    NxsUnsignedSet::const_iterator includedIt = toInclude->begin();
+			for (unsigned c = 0; c < nc; ++c)
+				{
+				unsigned charIndex = *includedIt++;
+				const NxsDiscreteStateCell rawC = rawRow[charIndex];
+				if ((unsigned)(rawC +  negSCLOffset) >= recodeVecLen)
+					{
+					NCL_ASSERT((unsigned)(rawC +  negSCLOffset) < recodeVecLen);
+					}
+				NCL_ASSERT(rawC >= sclOffset);
+				const NxsCDiscreteState_t recodedC = recodeArr[rawC];
+				NCL_ASSERT(recodedC > -2 || !standardizeCoding);
+				NCL_ASSERT(recodedC < nextStateCode);
+				*recodedRow++ = recodedC;
+				}
+			}
+		}
+	nativeCMatrix.symbolsList = symbolsStringAlias.c_str();
+	nativeCMatrix.stateListPos = &stateListPosAlias[0];
+	nativeCMatrix.stateList = &stateListAlias[0];
+
+	intWts.clear();
+	dblWts.clear();
+	const NxsTransformationManager &tm = cb->GetNxsTransformationManagerRef();
+	intWts = tm.GetDefaultIntWeights();
+	if (intWts.empty())
+		dblWts = tm.GetDefaultDoubleWeights();
+	activeExSet = cb->GetExcludedIndexSet();
+}
+
+/**
+ *	Constructs  from the native C struct NxsCDiscreteMatrix
+ *		by deep copy.
+ */
+NxsCXXDiscreteMatrix::NxsCXXDiscreteMatrix(const NxsCDiscreteMatrix & mat)
+	:nativeCMatrix(mat),//aliases pointers, but we'll fix this below
+	symbolsStringAlias(mat.symbolsList),
+	matrixAlias(mat.nTax, mat.nChar),
+	stateListPosAlias(mat.stateListPos, (mat.stateListPos + mat.nObservedStateSets))
+	{
+	nativeCMatrix.symbolsList = symbolsStringAlias.c_str();
+	nativeCMatrix.stateListPos = &stateListPosAlias[0];
+	if (mat.nObservedStateSets > 0)
+		{
+		const unsigned lastStateIndex = nativeCMatrix.stateListPos[nativeCMatrix.nObservedStateSets - 1];
+		const unsigned lenAmbigList = lastStateIndex + mat.stateList[lastStateIndex] + 1;
+		//	cout << "lenAmbigList = "<< lenAmbigList <<endl;
+		stateListAlias.reserve(lenAmbigList);
+		ncl_copy(mat.stateList, (mat.stateList + lenAmbigList), std::back_inserter(stateListAlias));
+		}
+	nativeCMatrix.stateList = &stateListAlias[0];
+	nativeCMatrix.matrix = matrixAlias.GetAlias();
+
+	// cout << "Matrix in NxsCXXDiscreteMatrix ctor:" << mat.nTax << ' '<< mat.nChar<< endl;
+	for (unsigned i = 0; i < mat.nTax; ++i)
+		{
+		if (mat.nChar > 0)
+			ncl_copy(mat.matrix[i], mat.matrix[i] + mat.nChar, nativeCMatrix.matrix[i]);
+		}
+
+	}
diff --git a/src/nxsdatablock.cpp b/src/nxsdatablock.cpp
new file mode 100644
index 0000000..6f59c6b
--- /dev/null
+++ b/src/nxsdatablock.cpp
@@ -0,0 +1,63 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#include "ncl/nxsdatablock.h"
+
+/*!
+	Sets `NCL_BLOCKTYPE_ATTR_NAME' to "DATA" and `newtaxa' to true, and calls the base class (NxsCharactersBlock) constructor.
+*/
+NxsDataBlock::NxsDataBlock(
+  NxsTaxaBlockAPI *tb,			/* the taxa block object for storing taxon labels */
+  NxsAssumptionsBlockAPI *ab)	/* the assumptions block object for storing exsets */
+  : NxsCharactersBlock(tb, ab)
+	{
+	NCL_BLOCKTYPE_ATTR_NAME = "DATA";
+	Reset();
+	}
+
+/*!
+	Calls Reset function of the parent class (NxsCharactersBlock) and resets `newtaxa' to true in preparation for
+	reading another DATA block.
+*/
+void NxsDataBlock::Reset()
+	{
+	NxsCharactersBlock::Reset();
+	newtaxa = true;
+	}
+
+/*!
+	Converts this NxsDataBlock object into a NxsCharactersBlock object, storing the result in the supplied
+	NxsCharactersBlock object. This NxsDataBlock object will subsequently say it is empty when asked.
+*/
+void NxsDataBlock::TransferTo(
+  NxsCharactersBlock &charactersblock)	/* the NxsCharactersBlock object that will receive all the data from this object */
+	{
+	charactersblock.Reset();
+	charactersblock.Consume((NxsCharactersBlock &)(*this));
+	}
+
+NxsDataBlock *NxsDataBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
+	{
+	if (reader == NULL || idneeded != "DATA")
+		return NULL;
+	NxsDataBlock * nb = new NxsDataBlock(NULL, NULL);
+	nb->SetCreateImpliedBlock(true);
+	nb->SetImplementsLinkAPI(true);
+	return nb;
+	}
diff --git a/src/nxsdistancedatum.cpp b/src/nxsdistancedatum.cpp
new file mode 100644
index 0000000..f788a4b
--- /dev/null
+++ b/src/nxsdistancedatum.cpp
@@ -0,0 +1,36 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#include "ncl/ncl.h"
+
+// /*----------------------------------------------------------------------------------------------------------------------
+// |	Initializes value to 0.0 and missing to true.
+// */
+// NxsDistanceDatum::NxsDistanceDatum()
+// 	{
+// 	missing	= true;
+// 	value	= 0.0;
+// 	}
+
+// /*----------------------------------------------------------------------------------------------------------------------
+// |	Does nothing.
+// */
+// NxsDistanceDatum::~NxsDistanceDatum()
+// 	{
+// 	}
diff --git a/src/nxsdistancesblock.cpp b/src/nxsdistancesblock.cpp
new file mode 100644
index 0000000..eb4e38b
--- /dev/null
+++ b/src/nxsdistancesblock.cpp
@@ -0,0 +1,714 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <iomanip>
+#include <climits>
+#include <cstdlib>
+
+#include "ncl/nxsdistancesblock.h"
+#include "ncl/nxsreader.h"
+using namespace std;
+
+
+void NxsDistancesBlock::WriteFormatCommand(std::ostream &out) const
+	{
+	out << "    FORMAT Missing = " << missing << " Triangle = Lower Diagonal;\n";
+	}
+
+void NxsDistancesBlock::WriteMatrixCommand(std::ostream &out) const
+	{
+	if (taxa == NULL)
+		return;
+	unsigned width = taxa->GetMaxTaxonLabelLength();
+	const unsigned ntaxTotal = taxa->GetNTax();
+	out << "MATRIX";
+	int prec = (int)out.precision(10);
+	for (unsigned i = 0; i < ntaxTotal; i++)
+		{
+		const std::string currTaxonLabel = NxsString::GetEscaped(taxa->GetTaxonLabel(i));
+		out << '\n' << currTaxonLabel;
+		unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
+		unsigned diff = width - currTaxonLabelLen;
+		for (unsigned k = 0; k < diff+5; k++)
+			out << ' ';
+		for (unsigned j = 0; j< i; j++)
+			{
+			if (IsMissing(i,j))
+				out << ' ' << missing << "         ";
+			else
+				out << ' '<< GetDistance(i, j);
+			}
+		out << " 0.0";
+		}
+	out << ";\n";
+	out.precision(prec);
+	}
+
+void NxsDistancesBlock::WriteAsNexus(std::ostream &out) const
+	{
+	out << "BEGIN DISTANCES;\n";
+	WriteBasicBlockCommands(out);
+	if (nchar > 0)
+		out << "    DIMENSIONS NChar = " << nchar << ";\n";
+	WriteFormatCommand(out);
+	WriteMatrixCommand(out);
+	WriteSkippedCommands(out);
+	out << "END;\n";
+	}
+
+
+/*!
+	See Reset() for defaults
+*/
+NxsDistancesBlock::NxsDistancesBlock(
+  NxsTaxaBlockAPI *t)	/* the NxsTaxaBlockAPI that will keep track of taxon labels */
+  : NxsBlock(),
+  NxsTaxaBlockSurrogate(t, NULL)
+	{
+	NCL_BLOCKTYPE_ATTR_NAME = "DISTANCES";
+	Reset();
+	}
+
+/*!
+	Deletes `matrix' and `taxonPos' arrays.
+*/
+NxsDistancesBlock::~NxsDistancesBlock()
+	{
+	Reset();
+	}
+
+/*!
+	Called when DIMENSIONS command needs to be parsed from within the DISTANCES block. Deals with everything after the
+	token DIMENSIONS up to and including the semicolon that terminates the DIMENSIONS command.
+*/
+void NxsDistancesBlock::HandleDimensionsCommand(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	nchar = 0;
+	unsigned ntaxRead = 0;
+	for (;;)
+		{
+		token.GetNextToken();
+		if (token.Equals("NEWTAXA"))
+			newtaxa = true;
+		else if (token.Equals("NTAX"))
+			{
+			DemandEquals(token, "after NTAX in DIMENSIONS command");
+			ntaxRead = DemandPositiveInt(token, "NTAX");
+			}
+		else if (token.Equals("NCHAR"))
+			{
+			DemandEquals(token, "in DIMENSIONS command");
+			nchar = DemandPositiveInt(token, "NCHAR");
+			}
+		else if (token.Equals(";"))
+			break;
+		}
+	if (newtaxa)
+		{
+		if (ntaxRead == 0)
+			{
+			errormsg = "DIMENSIONS command must have an NTAX subcommand when the NEWTAXA option is in effect.";
+			throw NxsException(errormsg, token);
+			}
+		expectedNtax = ntaxRead;
+		AssureTaxaBlock(createImpliedBlock, token, "Dimensions");
+		if (!createImpliedBlock)
+			{
+			taxa->Reset();
+			if (nexusReader)
+				nexusReader->RemoveBlockFromUsedBlockList(taxa);
+			}
+		taxa->SetNtax(expectedNtax);
+		}
+	else
+		{
+		AssureTaxaBlock(false, token, "Dimensions");
+		const unsigned ntaxinblock = taxa->GetNumTaxonLabels();
+		if (ntaxinblock == 0)
+			{
+			errormsg = "A TAXA block must be read before character data, or the DIMENSIONS command must use the NEWTAXA.";
+			throw NxsException(errormsg, token);
+			}
+		if (ntaxinblock < ntaxRead)
+			{
+			errormsg = "NTAX in ";
+			errormsg << NCL_BLOCKTYPE_ATTR_NAME << " block must be less than or equal to NTAX in TAXA block\nNote: one circumstance that can cause this error is \nforgetting to specify NTAX in DIMENSIONS command when \na TAXA block has not been provided";
+			throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+			}
+		expectedNtax = (ntaxRead == 0 ? ntaxinblock : ntaxRead);;
+		}
+	}
+
+/*!
+	Called when FORMAT command needs to be parsed from within the DISTANCES block. Deals with everything after the
+	token FORMAT up to and including the semicolon that terminates the FORMAT command.
+*/
+void NxsDistancesBlock::HandleFormatCommand(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	for (;;)
+		{
+		token.GetNextToken();
+		if (token.Equals(";"))
+			break;
+		if (token.Equals("TRIANGLE"))
+			{
+			DemandEquals(token, "after TRIANGLE");
+			token.GetNextToken();
+			if (token.Equals("LOWER"))
+				triangle = NxsDistancesBlockEnum(lower);
+			else if (token.Equals("UPPER"))
+				triangle = NxsDistancesBlockEnum(upper);
+			else if (token.Equals("BOTH"))
+				triangle = NxsDistancesBlockEnum(both);
+			else
+				{
+				errormsg = "Expecting UPPER, LOWER, or BOTH but found ";
+				errormsg += token.GetToken();
+				errormsg += " instead";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			}
+		else if (token.Equals("DIAGONAL"))
+			diagonal = true;
+		else if (token.Equals("NODIAGONAL"))
+			diagonal = false;
+		else if (token.Equals("LABELS"))
+			labels = true;
+		else if (token.Equals("NOLABELS"))
+			labels = false;
+		else if (token.Equals("INTERLEAVE"))
+			interleave = true;
+		else if (token.Equals("NOINTERLEAVE"))
+			interleave = false;
+		else if (token.Equals("MISSING"))
+			{
+			DemandEquals(token, "after MISSING");
+			token.GetNextToken();
+			if (token.GetTokenLength() != 1 || isdigit(token.GetTokenReference()[0]))
+				{
+				errormsg = "Missing data symbol specified (";
+				errormsg += token.GetToken();
+				errormsg += ") is invalid (must be a single character)";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			missing = token.GetTokenReference()[0];
+			}
+		else
+			{
+			errormsg = "Token specified (";
+			errormsg += token.GetToken();
+			errormsg += ") is an invalid subcommand for the FORMAT command";
+			throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+			}
+		}
+	}
+
+/*!
+	Called from within HandleMatrix, this function is used to deal with interleaved matrices. It is called once for
+	each pass through the taxa. The local variable `jmax' records the number of columns read in the current interleaved
+	page and is used to determine the offset used for j in subsequent pages.
+*/
+bool NxsDistancesBlock::HandleNextPass(
+  NxsToken &token,	/* the token we are using for reading the data file */
+  unsigned &offset,	/* the offset */
+  vector<unsigned> & fileMatrixCmdOrderToTaxInd,
+  set<unsigned> & taxIndsRead)
+	{
+	unsigned jmax = 0;
+	bool done = false;
+
+	unsigned i_first = 0;
+	if (triangle == NxsDistancesBlockEnum(lower))
+		i_first = offset;
+	unsigned i_last = expectedNtax;
+	errormsg.clear();
+	for (unsigned i = i_first; i < i_last; i++)
+		{
+		// Deal with taxon label if provided. Here are the four situations we need to deal with:
+		//   newtaxa  (offset > 0)  handled by
+		//      0           0         case 1
+		//      0           1         case 1
+		//      1           0         case 2
+		//      1           1         case 1
+		//
+		if (labels && (!newtaxa || offset > 0))
+			{
+			// Case 1: Expecting taxon labels, and also expecting them to already be in taxa
+			//
+			do
+				{
+				token.SetLabileFlagBit(NxsToken::newlineIsToken);
+				token.GetNextToken();
+				}
+			while(token.AtEOL());
+
+			try
+				{
+				// Look up position of taxon in NxsTaxaBlockAPI list
+				//
+				unsigned k = taxa->FindTaxon(token.GetToken());
+				if (k != i && triangle != NxsDistancesBlockEnum(lower))
+					{
+					errormsg << "Taxon " << token.GetToken() << " was not expected in the DISTANCES matrix.\nTaxa should be in the same order as in the Taxon block";
+					throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+					}
+
+				// Array taxonPos is initialized to UINT_MAX and filled in as taxa are encountered
+				//
+				if (fileMatrixCmdOrderToTaxInd[i] == UINT_MAX)
+					{
+					fileMatrixCmdOrderToTaxInd[i] = k;
+					if (taxIndsRead.count(k) > 0)
+						{
+						errormsg << "Taxon " << token.GetToken() << " was encountered more than one time in the Distances Matrix.";
+						throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+						}
+					taxIndsRead.insert(k);
+					}
+				else if (fileMatrixCmdOrderToTaxInd[i] != k)
+					{
+					errormsg << "Taxon labeled " << token.GetToken() << " is out of order compared to previous interleave pages";
+					throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+					}
+				}
+			catch (NxsTaxaBlock::NxsX_NoSuchTaxon)
+				{
+				errormsg = "Could not find ";
+				errormsg += token.GetToken();
+				errormsg += " among taxa previously defined";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			}
+
+		else if (labels && newtaxa)
+			{
+			// Case 2: Expecting taxon labels, and also expecting taxa block to be empty
+			//
+			do
+				{
+				token.SetLabileFlagBit(NxsToken::newlineIsToken);
+				token.GetNextToken();
+				}
+			while(token.AtEOL());
+			const NxsString t(token.GetToken().c_str());
+			taxa->AddTaxonLabel(t);
+			fileMatrixCmdOrderToTaxInd[i] = i;
+			taxIndsRead.insert(i);
+			}
+
+		// Now deal with the row of distance values
+		//
+		unsigned true_j = 0;
+		for (unsigned j = 0; j < expectedNtax; j++)
+			{
+			if (i == expectedNtax - 1)
+				{
+				if (j == expectedNtax - 1)
+					done = true;
+				if (true_j == expectedNtax - 1 || (!diagonal && triangle == NxsDistancesBlockEnum(upper)))
+					{
+					done = true;
+					break;
+					}
+				}
+			if (!diagonal && triangle == NxsDistancesBlockEnum(lower) && j == expectedNtax - offset - 1)
+				{
+				done = true;
+				break;
+				}
+
+			token.SetLabileFlagBit(NxsToken::newlineIsToken);
+			token.GetNextToken();
+
+			if (token.AtEOL())
+				{
+				if (j > jmax)
+					{
+					jmax = j;
+					if (!diagonal && triangle == NxsDistancesBlockEnum(upper) && i >= offset)
+						jmax++;
+					if (interleave && triangle == NxsDistancesBlockEnum(upper))
+						i_last = jmax + offset;
+					}
+				break;
+				}
+
+			true_j = j + offset;
+			if (triangle == NxsDistancesBlockEnum(upper) && i > offset)
+				true_j += (i - offset);
+			if (!diagonal && triangle == NxsDistancesBlockEnum(upper) && i >= offset)
+				true_j++;
+
+			if (true_j == expectedNtax)
+				{
+				errormsg = "Too many distances specified in row just read in";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+
+			string t = token.GetToken();
+			unsigned corrected_i = fileMatrixCmdOrderToTaxInd.at(i);
+			unsigned corrected_j = true_j;
+			if (triangle == NxsDistancesBlockEnum(lower))
+				corrected_j = fileMatrixCmdOrderToTaxInd.at(true_j);
+			if (corrected_i == UINT_MAX || corrected_j == UINT_MAX)
+				{
+				errormsg = "Illegal internal row number for taxon in Distance Matrix.";
+				throw NxsNCLAPIException(errormsg, token);
+				}
+			if (token.GetTokenLength() == 1 && t[0] == missing)
+				SetMissing(corrected_i, corrected_j);
+			else
+				SetDistance(corrected_i, corrected_j, atof(t.c_str()));
+			}
+		}
+	offset += jmax;
+	return done;
+	}
+
+void NxsDistancesBlock::CopyDistancesContents(const NxsDistancesBlock &other)
+	{
+	expectedNtax = other.expectedNtax;
+	nchar = other.nchar;
+	diagonal = other.diagonal;
+	interleave = other.interleave;
+	labels = other.labels;
+	triangle = other.triangle;
+	missing = other.missing;
+	matrix = other.matrix;
+	}
+
+/*!
+	Called when MATRIX command needs to be parsed from within the DISTANCES block. Deals with everything after the
+	token MATRIX up to and including the semicolon that terminates the MATRIX command.
+*/
+void NxsDistancesBlock::HandleMatrixCommand(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	errormsg.clear();
+	if (expectedNtax == 0 || taxa == NULL)
+		{
+		AssureTaxaBlock(false, token, "Matrix");
+		expectedNtax = taxa->GetNumTaxonLabels();
+		}
+	if (expectedNtax == 0)
+		{
+		errormsg = "MATRIX command cannot be read if NTAX is zero";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+
+	if (triangle == NxsDistancesBlockEnum(both) && !diagonal)
+		{
+		errormsg = "Cannot specify NODIAGONAL and TRIANGLE=BOTH at the same time";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+	if (newtaxa)
+		taxa->Reset();
+
+	vector<unsigned> fileMatrixCmdOrderToTaxInd(expectedNtax, UINT_MAX);
+	set<unsigned> taxIndsRead;
+	unsigned nTaxInTaxBlock = taxa->GetNumTaxonLabels();
+	if (nTaxInTaxBlock < expectedNtax)
+		{
+		errormsg << "NTAX in " << NCL_BLOCKTYPE_ATTR_NAME << " block must be less than or equal to NTAX in TAXA block\nNote: one circumstance that can cause this error is \nforgetting to specify NTAX in DIMENSIONS command when \na TAXA block has not been provided";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+	NxsDistanceDatumRow row(nTaxInTaxBlock);
+	matrix.assign(nTaxInTaxBlock, row);
+	unsigned offset = 0;
+	for (;;)
+		{
+		if (HandleNextPass(token, offset, fileMatrixCmdOrderToTaxInd, taxIndsRead))
+			break;
+		}
+	DemandEndSemicolon(token, "MATRIX");
+	}
+
+/*!
+	This function provides the ability to read everything following the block name (which is read by the NEXUS object)
+	to the end or endblock statement. Characters are read from the input stream in. Overrides the abstract virtual
+	function in the base class.
+*/
+void NxsDistancesBlock::Read(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	isEmpty = false;
+
+	DemandEndSemicolon(token, "BEGIN DISTANCES");
+
+	for (;;)
+		{
+		token.GetNextToken();
+		NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
+		if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
+			return;
+		if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
+			{
+			if (token.Equals("DIMENSIONS"))
+				HandleDimensionsCommand(token);
+			else if (token.Equals("FORMAT"))
+				HandleFormatCommand(token);
+			else if (token.Equals("TAXLABELS"))
+				HandleTaxLabels(token);
+			else if (token.Equals("MATRIX"))
+				HandleMatrixCommand(token);
+			else
+				SkipCommand(token);
+			}
+		}
+	}
+
+/*!
+	This function outputs a brief report of the contents of this taxa block. Overrides the abstract virtual function in
+	the base class.
+*/
+void NxsDistancesBlock::Report(
+  std::ostream &out) NCL_COULD_BE_CONST /* the output stream to which to write the report */ /*v2.1to2.2 1 */
+	{
+	const unsigned ntaxTotal = taxa->GetNumTaxonLabels();
+
+	out << endl;
+	out << NCL_BLOCKTYPE_ATTR_NAME << " block contains ";
+	if (ntaxTotal == 0)
+		{
+		out << "no taxa" << endl;
+		}
+	else if (ntaxTotal == 1)
+		out << "one taxon" << endl;
+	else
+		out << ntaxTotal << " taxa" << endl;
+
+	if (IsLowerTriangular())
+		out << "  Matrix is lower-triangular" << endl;
+	else if (IsUpperTriangular())
+		out << "  Matrix is upper-triangular" << endl;
+	else
+		out << "  Matrix is rectangular" << endl;
+
+	if (IsInterleave())
+		out << "  Matrix is interleaved" << endl;
+	else
+		out << "  Matrix is non-interleaved" << endl;
+
+	if (IsLabels())
+		out << "  Taxon labels provided" << endl;
+	else
+		out << "  No taxon labels provided" << endl;
+
+	if (IsDiagonal())
+		out << "  Diagonal elements specified" << endl;
+	else
+		out << "  Diagonal elements not specified" << endl;
+
+	out << "  Missing data symbol is " << missing << endl;
+
+	if (expectedNtax == 0)
+		return;
+
+	out.setf(ios::fixed, ios::floatfield);
+	out.setf(ios::showpoint);
+	for (unsigned i = 0; i < ntaxTotal; i++)
+		{
+		if (labels)
+			out << setw(20) << taxa->GetTaxonLabel(i);
+		else
+			out << "        ";
+
+		for (unsigned j = 0; j < ntaxTotal; j++)
+			{
+			if (triangle == NxsDistancesBlockEnum(upper) && j < i)
+				out << setw(12) << " ";
+			else if (triangle == NxsDistancesBlockEnum(lower) && j > i)
+				continue;
+			else if (!diagonal && i == j)
+				{
+				out << setw(12) << " ";
+				}
+			else if (IsMissing(i, j))
+				out << setw(12) << missing;
+			else
+				out << setw(12) << GetDistance(i, j);
+			}
+
+		out << endl;
+		}
+	}
+
+/*!
+	Flushes taxonLabels and sets ntax to 0 in preparation for reading a new TAXA block.
+	`triangle' to `NxsDistancesBlockEnum::lower',
+	`missing' to '?',
+	`labels' and `diagonal' to true,
+	`newtaxa' and `interleave' to false,
+	`expectedNtax' and `nchar' to 0.
+	and clears the matrix.
+*/
+void NxsDistancesBlock::Reset()
+	{
+	NxsBlock::Reset();
+	ResetSurrogate();
+	matrix.clear();
+	expectedNtax        = 0;
+	nchar       = 0;
+	diagonal    = true;
+	labels      = true;
+	interleave  = false;
+	missing     = '?';
+	triangle    = NxsDistancesBlockEnum(lower);
+	}
+
+/*!
+	Returns the value of nchar.
+*/
+unsigned NxsDistancesBlock::GetNchar() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return nchar;
+	}
+
+/*!
+	Returns the value of the (`i', `j')th element of `matrix'. Assumes `i' and `j' are both in the range [0..`ntax')
+	and the distance stored at `matrix[i][j]' is not missing. Also assumes `matrix' is not NULL.
+*/
+double NxsDistancesBlock::GetDistance(
+  unsigned i,	/* the row */
+  unsigned j) const /* the column */
+	{
+	return GetCell(i,j).value;
+	}
+
+/*!
+	Returns the value of `missing'.
+*/
+char NxsDistancesBlock::GetMissingSymbol() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return missing;
+	}
+
+/*!
+	Returns the value of `triangle'.
+*/
+unsigned NxsDistancesBlock::GetTriangle() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return triangle;
+	}
+
+/*!
+	Returns true if the value of `triangle' is NxsDistancesBlockEnum(both), false otherwise.
+*/
+bool NxsDistancesBlock::IsRectangular() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return (triangle == NxsDistancesBlockEnum(both) ? true : false);
+	}
+
+/*!
+	Returns true if the value of triangle is NxsDistancesBlockEnum(upper), false otherwise.
+*/
+bool NxsDistancesBlock::IsUpperTriangular() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return (triangle == NxsDistancesBlockEnum(upper) ? true : false);
+	}
+
+/*!
+	Returns true if the value of triangle is NxsDistancesBlockEnum(lower), false otherwise.
+*/
+bool NxsDistancesBlock::IsLowerTriangular() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return (triangle == NxsDistancesBlockEnum(lower) ? true : false);
+	}
+
+/*!
+	Returns the value of diagonal.
+*/
+bool NxsDistancesBlock::IsDiagonal() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return diagonal;
+	}
+
+/*!
+	Returns the value of interleave.
+*/
+bool NxsDistancesBlock::IsInterleave() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return interleave;
+	}
+
+/*!
+	Returns the value of labels.
+*/
+bool NxsDistancesBlock::IsLabels() NCL_COULD_BE_CONST /*v2.1to2.2 1 */
+	{
+	return labels;
+	}
+
+/*!
+	Returns true if the (`i',`j')th distance is missing. Assumes `i' and `j' are both in the range [0..`ntax') and
+	`matrix' is not NULL.
+*/
+bool NxsDistancesBlock::IsMissing(
+  unsigned i,	/* the row */
+  unsigned j) const	/* the column */
+	{
+	return (bool)(GetCell(i,j).missing);
+	}
+
+/*!
+	Sets the value of the (`i',`j')th matrix element to `d' and `missing' to false . Assumes `i' and `j' are both in
+	the range [0..`ntax') and `matrix' is not NULL.
+*/
+void NxsDistancesBlock::SetDistance(
+  unsigned i,	/* the row */
+  unsigned j,	/* the column */
+  double d)		/* the distance value */
+	{
+	NxsDistanceDatum & c =  GetCell(i, j);
+	c.value = d;
+	c.missing = false;
+	}
+
+/*!
+	Sets the value of the (`i', `j')th `matrix' element to missing. Assumes `i' and `j' are both in the range
+	[0..`ntax') and `matrix' is not NULL.
+*/
+void NxsDistancesBlock::SetMissing(
+  unsigned i,	/* the row */
+  unsigned j)	/* the column */
+	{
+	NxsDistanceDatum & c =  GetCell(i, j);
+	c.missing = 1;
+	c.value = 0.0;
+	}
+
+/*!
+	Sets `nchar' to `n'.
+*/
+void NxsDistancesBlock::SetNchar(
+  unsigned n)	/* the number of characters */
+	{
+	nchar = n;
+	}
+
+NxsDistancesBlock *NxsDistancesBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
+	{
+	if (reader == NULL || idneeded != "DISTANCES")
+		return NULL;
+	NxsDistancesBlock * nb  = new NxsDistancesBlock(NULL);
+	nb->SetCreateImpliedBlock(true);
+	nb->SetImplementsLinkAPI(true);
+	return nb;
+	}
diff --git a/src/nxsemptyblock.cpp b/src/nxsemptyblock.cpp
new file mode 100644
index 0000000..34f8233
--- /dev/null
+++ b/src/nxsemptyblock.cpp
@@ -0,0 +1,158 @@
+//	Copyright (C) 1999-2002 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library).
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#include "ncl/ncl.h"
+
+// not used in rncl/phylobase
+#if 0
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	Sets the base class data member `id' to the name of the block (i.e. "EMPTY") in NEXUS data files.
+*/
+NxsEmptyBlock::NxsEmptyBlock()
+	{
+	id = "EMPTY";
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	Nothing needs to be done.
+*/
+NxsEmptyBlock::~NxsEmptyBlock()
+	{
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	The code here is identical to the base class version (simply returns 0), so the code here should either be modified
+|	or this derived version eliminated altogether. Under what circumstances would you need to modify the default code,
+|	you ask? This function should be modified to something meaningful if this derived class needs to construct and run
+|	a NxsSetReader object to read a set involving characters. The NxsSetReader object may need to use this function to
+|	look up a character label encountered in the set. A class that overrides this method should return the character
+|	index in the range [1..`nchar']; i.e., add one to the 0-offset index.
+*/
+unsigned NxsEmptyBlock::CharLabelToNumber(
+  NxsString s)	/* the character label to be translated to character number */
+	{
+	return 0;
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	Called when the END or ENDBLOCK command needs to be parsed from within the EMPTY block. Basically just checks to
+|	make sure the next token in the data file is a semicolon.
+*/
+void NxsEmptyBlock::HandleEndblock(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	DemandEndSemicolon(token, "END or ENDBLOCK");
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	This function provides the ability to read everything following the block name (which is read by the NxsReader
+|	object) to the END or ENDBLOCK statement. Characters are read from the input stream `in'. Overrides the pure
+|	virtual function in the base class.
+*/
+void NxsEmptyBlock::Read(
+  NxsToken &token)	/* the token used to read from `in'*/
+	{
+	isEmpty = false;
+	NxsString s;
+	s = "BEGIN "
+	s += id;
+	DemandEndSemicolon(token, s.c_str());
+
+	for(;;)
+		{
+		token.GetNextToken();
+
+		if (token.Equals("END"))
+			{
+			HandleEndblock(token);
+			break;
+			}
+
+		else if(token.Equals("ENDBLOCK"))
+			{
+			HandleEndblock(token);
+			break;
+			}
+
+		else
+			{
+			SkippingCommand(token.GetToken());
+
+			do
+				{
+				token.GetNextToken();
+				}
+			while (!token.AtEOF() && !token.Equals(";"));
+
+			if (token.AtEOF())
+				{
+				errormsg = "Unexpected end of file encountered";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			}
+		}
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	Sets `isEmpty' to true in preparation for reading a new EMPTY block. Overrides the pure virtual function in the
+|	base class.
+*/
+void NxsEmptyBlock::Reset()
+	{
+	NxsBlock::Reset();
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	This function outputs a brief report of the contents of this EMPTY block. Overrides the pure virtual function in
+|	the base class.
+*/
+void NxsEmptyBlock::Report(
+  ostream &out)	/* the output stream to which to write the report */
+	{
+	out << endl;
+	out << id << " block contains...";
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	This function is called when an unknown command named `commandName' is about to be skipped. This version of the
+|	function (which is identical to the base class version) does nothing (i.e., no warning is issued that a command
+|	was unrecognized). Modify this virtual function to provide such warnings to the user (or eliminate it altogether
+|	since the base class version already does what this does).
+*/
+void NxsEmptyBlock::SkippingCommand(
+  NxsString commandName)	/* the name of the command being skipped */
+	{
+	}
+
+/*----------------------------------------------------------------------------------------------------------------------
+|	The code here is identical to the base class version (simply returns 0), so the code here should either be modified
+|	or this derived version eliminated altogether. Under what circumstances would you need to modify the default code,
+|	you ask? This function should be modified to something meaningful if this derived class needs to construct and run
+|	a NxsSetReader object to read a set involving taxa. The NxsSetReader object may need to use this function to look
+|	up a taxon label encountered in the set. A class that overrides this method should return the taxon index in the
+|	range [1..ntax]; i.e., add one to the 0-offset index.
+*/
+unsigned NxsEmptyBlock::TaxonLabelToNumber(
+  NxsString s)	/* the taxon label to be translated to a taxon number */
+	{
+	return 0;
+	}
+
+#endif
diff --git a/src/nxsexception.cpp b/src/nxsexception.cpp
new file mode 100644
index 0000000..3b3521a
--- /dev/null
+++ b/src/nxsexception.cpp
@@ -0,0 +1,106 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#include "ncl/nxsexception.h"
+#include "ncl/nxstoken.h"
+
+using namespace std;
+/*!
+	Copies 's' to msg and sets line, col and pos to the current line, column and position in the file where parsing
+	stopped.
+*/
+NxsException::NxsException(
+  const std::string & s,	/* the message for the user */
+  file_pos fp,	/* the current file position */
+  long fl,		/* the current file line */
+  long fc)		/* the current file column */
+	{
+	msg.assign(s);
+	addPositionInfo(fp, fl, fc);
+	}
+
+/*!
+	Creates a NxsException object with the specified message, getting file position information from the NxsToken.
+*/
+NxsException::NxsException(
+  const std::string &s,		/* message that describes the error */
+  const NxsToken &t)		/* NxsToken that was supplied the last token (the token that caused the error) */
+	{
+	msg		= NxsString(s.c_str());
+	this->addPositionInfo(t);
+  	}
+
+NxsException::NxsException(const std::string &s, const ProcessedNxsToken &t)
+	{
+	msg		= NxsString(s.c_str());
+	this->addPositionInfo(t);
+	}
+
+NxsException::NxsException(const std::string &s, const NxsTokenPosInfo &t)
+	{
+	msg		= NxsString(s.c_str());
+	this->addPositionInfo(t);
+	}
+
+const char * NxsException::nxs_what () const
+	{
+	std::string m = "Nexus Parsing error: ";
+	m.append(msg);
+	msg.assign(m);
+	if (line >= 0)
+		msg << " at line " << line;
+	if (col >= 0)
+		msg << " column " << col;
+	return msg.c_str();
+	}
+
+NxsSignalCanceledParseException::NxsSignalCanceledParseException(const std::string & s)
+	:NxsException(s)
+	{
+	msg = "Signal detected during NEXUS class library";
+	if (!s.empty())
+		msg << " in the processing step: " << s;
+	msg << '.';
+	}
+
+
+void NxsException::addPositionInfo(const NxsToken & t) 
+	{
+	pos = t.GetFilePosition();
+	line = t.GetFileLine();
+	col = t.GetFileColumn();
+	}
+void NxsException::addPositionInfo(const ProcessedNxsToken & t)
+	{
+	pos		= t.GetFilePosition();
+	line	= t.GetLineNumber();
+	col		= t.GetColumnNumber();
+	}
+void NxsException::addPositionInfo(const NxsTokenPosInfo & t)
+	{
+	pos		= t.GetFilePosition();
+	line	= t.GetLineNumber();
+	col		= t.GetColumnNumber();
+	}
+void NxsException::addPositionInfo(file_pos fp, long fl, long fc)
+	{
+	pos		= fp;
+	line	= fl;
+	col		= fc;
+	}
diff --git a/src/nxsmultiformat.cpp b/src/nxsmultiformat.cpp
new file mode 100644
index 0000000..9f0d05a
--- /dev/null
+++ b/src/nxsmultiformat.cpp
@@ -0,0 +1,1606 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis and Mark T. Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+
+/* The phylip parser is based on code from PHYLIP which is:
+ * version 3.6. (c) Copyright 1993-2004 by the University of Washington.
+ * Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, Andrew Keeffe,
+ * Mike Palczewski, Doug Buxton and Dan Fineman. Permission is granted to
+ * copy and use this program provided no fee is charged for it and provided
+ * that this copyright notice is not removed.
+ */
+
+
+/*
+ * This file is a phylip to NEXUS converter that consists of code from PHYLIP
+ * 3.6.5 (see copyright above) tweaked by Mark Holder to output NEXUS.
+ *
+ * This file was created by concatenating the headers, and .c files:
+ *	phylip.h,
+ *	seq.h,
+ *	discrete.h
+ *	phylip.c
+ *	seq.c,
+ *	pars.c, and
+ *	discrete.c concatenated
+ *	followed by removal of code that is unused in this simple program, and
+ * the addition of routines for printing out NEXUS.
+ */
+
+
+#include <cassert>
+#include <fstream>
+#include <algorithm>
+#include "ncl/nxsmultiformat.h"
+#include "ncl/nxsstring.h"
+
+const unsigned long MAX_BUFFER_SIZE = 0x80000;
+
+
+
+
+
+
+
+const char * gFormatNames[] = {	"nexus",
+								"dnafasta",
+								"aafasta",
+								"rnafasta",
+								"dnaphylip",
+								"rnaphylip",
+								"aaphylip",
+								"discretephylip",
+								"dnaphylipinterleaved",
+								"rnaphylipinterleaved",
+								"aaphylipinterleaved",
+								"discretephylipinterleaved",
+								"dnarelaxedphylip",
+								"rnarelaxedphylip",
+								"aarelaxedphylip",
+								"discreterelaxedphylip",
+								"dnarelaxedphylipinterleaved",
+								"rnarelaxedphylipinterleaved",
+								"aarelaxedphylipinterleaved",
+								"discreterelaxedphylipinterleaved",
+								"dnaaln",
+								"rnaaln",
+								"aaaln",
+								"phyliptree",
+								"relaxedphyliptree",
+								"nexml",
+								"dnafin",
+								"aafin",
+								"rnafin"
+							};
+const unsigned gNumFormats = 29;
+const unsigned PHYLIP_NMLNGTH = 10;
+
+std::vector<std::string> MultiFormatReader::getFormatNames()
+	{
+	std::vector<std::string> v(gNumFormats);
+	for (unsigned i = 0; i < gNumFormats; ++i)
+		{
+		v[i] = std::string(gFormatNames[i]);
+		}
+	return v;
+	}
+
+
+
+class FileToCharBuffer
+{
+		char prevChar;
+		std::istream & inf;
+		unsigned long remaining;
+		unsigned long pos;
+	public:
+		unsigned long totalSize;
+	protected:
+		unsigned long lineNumber;
+		unsigned long prevNewlinePos;
+	public:
+		unsigned long inbuffer;
+		char * buffer;
+
+		/* reads at most MAX_BUFFER_SIZE characters from inf into the buffer that is
+		returned. The caller must delete the buffer.  On exit `len` will store the
+		length of the buffer.
+		*/
+
+		FileToCharBuffer(std::istream & instream);
+
+		/* reads at most maxLen characters from `inf` into the `buffer`
+		Returns false if no characters are read.
+		If true is returned then `maxLen` will indicate the number of characters read.
+		*/
+		bool refillBuffer(unsigned long offset);
+		char current() const
+			{
+			return buffer[pos];
+			}
+		bool advance()
+			{
+			if (pos + 1 >= inbuffer)
+				{
+				if (!refillBuffer(0))
+					return false;
+				}
+			else
+				++pos;
+			const char c = current();
+			if (c == 13)
+				{
+				++lineNumber;
+				prevNewlinePos = position();
+				}
+			else if (c == 10)
+				{
+				if (prev() != 13)
+					++lineNumber;
+				prevNewlinePos = position();
+				}
+			return true;
+			}
+		bool advance_then_store(char & c)
+			{
+			if (!this->advance())
+				return false;
+			c = this->current();
+			return true;
+			}
+		bool skip_to_beginning_of_line(char & next);
+		char prev() const
+			{
+			if (pos == 0)
+				return prevChar;
+			return buffer[pos - 1];
+			}
+		~FileToCharBuffer()
+			{
+			delete [] buffer;
+			}
+		unsigned long position() const
+			{
+			return totalSize +  pos - remaining - inbuffer;
+			}
+		unsigned long line() const
+			{
+			return lineNumber;
+			}
+		unsigned long column() const
+			{
+			unsigned long p = position();
+			if (p < prevNewlinePos)
+				return 0;
+			return p - prevNewlinePos;
+			}
+
+};
+
+
+void MultiFormatReader::ReadFilepath(const char * filepath, const char * formatName)
+	{
+	if (!formatName)
+		return;
+	DataFormatType f =  formatNameToCode(formatName);
+	if (f == UNSUPPORTED_FORMAT)
+		{
+		NxsString m;
+		m << "Unsupported format: " << formatName;
+		throw NxsException(m);
+		}
+	this->ReadFilepath(filepath, f);
+	}
+
+void MultiFormatReader::ReadStream(std::istream & inf, const char * formatName)
+	{
+	if (!formatName)
+		return;
+	DataFormatType f =  formatNameToCode(formatName);
+	if (f == UNSUPPORTED_FORMAT)
+		{
+		NxsString m;
+		m << "Unsupported format: " << formatName;
+		throw NxsException(m);
+		}
+	this->ReadStream(inf, f);
+	}
+
+FileToCharBuffer::FileToCharBuffer(std::istream & instream)
+	:prevChar('\n'),
+	inf(instream),
+	pos(0),
+	totalSize(0),
+	lineNumber(1),
+	prevNewlinePos(0),
+	buffer(0L)
+	{
+	std::streampos s = inf.tellg();
+	inf.seekg (0, std::ios::end);
+	std::streampos e = inf.tellg();
+	if (e <= s)
+		{
+		inbuffer = 0;
+		remaining = 0;
+		return;
+		}
+	inf.seekg(s);
+	totalSize = static_cast<unsigned long>(e - s);
+	inbuffer = std::min(MAX_BUFFER_SIZE, totalSize);
+	remaining = totalSize - inbuffer;
+	buffer = new char [inbuffer];
+	inf.read(buffer, inbuffer);
+	const char c = current();
+
+	if (c == 13)
+		{
+		++lineNumber;
+		prevNewlinePos = position();
+		}
+	else if (c == 10)
+		{
+		if (prev() != 13)
+			++lineNumber;
+		prevNewlinePos = position();
+		}
+	}
+
+bool FileToCharBuffer::refillBuffer(unsigned long offset)
+	{
+	if (remaining  == 0)
+		return false;
+	if (offset == 0)
+		prevChar = buffer[inbuffer-1];
+	inbuffer = std::min(inbuffer - offset, remaining);
+	remaining -= inbuffer;
+	inf.read(buffer + offset, inbuffer);
+	pos = offset;
+	return true;
+	}
+
+
+MultiFormatReader::DataFormatType MultiFormatReader::formatNameToCode(const std::string &s)
+	{
+	std::string l(s);
+	NxsString::to_lower(l);
+	int ind = NxsString::index_in_array(l, gFormatNames, gNumFormats);
+	if (ind < 0)
+		return UNSUPPORTED_FORMAT;
+	NCL_ASSERT(ind < UNSUPPORTED_FORMAT);
+	return MultiFormatReader::DataFormatType(ind);
+	}
+
+
+
+/* Assumes that `contents` was returned from readFileToMemory() has been called
+	with `inf` and the `len` refers the size of the buffer allocated by
+	readFileToMemory
+*/
+bool  MultiFormatReader::readFastaSequences(
+	FileToCharBuffer & ftcb,
+	const NxsDiscreteDatatypeMapper &dm,
+	std::list<std::string> & taxaNames,
+	std::list<NxsDiscreteStateRow> & matList,
+	size_t & longest)
+	{
+	NCL_ASSERT(ftcb.buffer);
+	NxsString err;
+	for (;;)
+		{
+		if (ftcb.current() == '>' && ( ftcb.prev() == '\n' ||  ftcb.prev() == '\r'))
+			{
+			std::string n;
+			if (!ftcb.advance())
+				break;
+			for (;;)
+				{
+				char c = ftcb.current();
+				if (c == '\n' || c == '\r')
+					break;
+				n.append(1, c);
+				if (!ftcb.advance())
+					break;
+				}
+			std::string nameStripped = NxsString::strip_surrounding_whitespace(n);
+			if (this->coerceUnderscoresToSpaces)
+			    {
+			    NxsString x(nameStripped.c_str());
+			    x.UnderscoresToBlanks();
+			    nameStripped = x;
+			    }
+			taxaNames.push_back(nameStripped);
+
+			matList.push_back(NxsDiscreteStateRow());
+			if (!ftcb.advance())
+				break;
+			NxsDiscreteStateRow & row = *(matList.rbegin());
+			row.reserve(longest);
+			for (;;)
+				{
+				char c = ftcb.current();
+				if (c == '>' && (ftcb.prev() == '\n' || ftcb.prev() == '\r'))
+					break;
+				if (isgraph(c))
+					{
+					NxsDiscreteStateCell stateCode = dm.GetStateCodeStored(c);
+					if (stateCode == NXS_INVALID_STATE_CODE)
+						{
+						err << "Illegal state code \"" << c << "\" found when reading character " << (unsigned) row.size() << " for taxon " << n;
+						throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+						}
+					row.push_back(stateCode);
+					}
+				if (!ftcb.advance())
+					break;
+				}
+			longest = std::max(longest, row.size());
+			}
+		else
+			{
+			if (isgraph(ftcb.current()))
+				{
+				err << "Illegal non-whitespace occurring outside of a name/sequence pair.  Expecting the first name to startwith > but found \"" << ftcb.current() << "\".";
+				throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+				}
+			if (!ftcb.advance())
+				break;
+			}
+		}
+	// pad with missing data to make even rows
+	std::list<NxsDiscreteStateRow>::iterator sIt = matList.begin();
+	bool allSameLength = true;
+	for (; sIt != matList.end(); ++sIt)
+		{
+		NxsDiscreteStateRow & row = *sIt;
+		if (row.size() < longest)
+			{
+			allSameLength = false;
+			break;
+			}
+		}
+	return allSameLength;
+	}
+
+std::string  MultiFormatReader::readPhylipName(FileToCharBuffer & ftcb, unsigned i, bool relaxedNames)
+	{
+	NxsString err;
+	std::string n;
+	if (relaxedNames)
+		{
+		do {
+			n.append(1,ftcb.current());
+			if (!ftcb.advance())
+				{
+				err << "End of file found when reading the name of taxon " << i+1 << ", \"" << n << "\"";
+				throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+				}
+			}
+		while (isgraph(ftcb.current()));
+		while (!isgraph(ftcb.current()))
+			{
+			if (!ftcb.advance())
+				{
+				err << "End of file found when expecting the beginning of the data for taxon " << i+1 << ", \"" << n << "\"";
+				throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+				}
+			}
+		}
+	else
+		{
+		std::string ws;
+		for (unsigned letter = 0; letter < PHYLIP_NMLNGTH; ++letter)
+			{
+			char c = ftcb.current();
+			if (isgraph(c))
+				{
+				n.append(ws);
+				n.append(1,c);
+				ws.clear();
+				}
+			else
+				ws.append(1, c);
+			if (!ftcb.advance())
+				{
+				err << "End of file found when reading the name for taxon " << i+1 << ", \"" << n << "\"";
+				throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+				}
+			}
+		}
+    if (this->coerceUnderscoresToSpaces)
+        {
+        NxsString x(n.c_str());
+        x.UnderscoresToBlanks();
+        n = x;
+        }
+
+	return n;
+	}
+
+void  MultiFormatReader::readPhylipData(
+	FileToCharBuffer & ftcb,
+	const NxsDiscreteDatatypeMapper &dm,
+	std::list<std::string> & taxaNames,
+	std::list<NxsDiscreteStateRow> & matList,
+	const unsigned n_taxa,
+	const unsigned n_char,
+	bool relaxedNames)
+	{
+	NCL_ASSERT(n_taxa > 0 && n_char > 0);
+	NxsString err;
+	matList.clear();
+	matList.assign(n_taxa, NxsDiscreteStateRow(n_char, NXS_INVALID_STATE_CODE));
+	std::list<NxsDiscreteStateRow>::iterator mIt = matList.begin();
+	unsigned currentTaxon;
+	while (!isgraph(ftcb.current()))
+		{
+		    currentTaxon = 0;
+		    if (!ftcb.advance())
+		    {
+			goto funcExit;
+		    }
+		}
+	for (currentTaxon = 0; currentTaxon < n_taxa; ++currentTaxon)
+		{
+		std::string n = readPhylipName(ftcb, currentTaxon, relaxedNames);
+        taxaNames.push_back(n);
+		NCL_ASSERT(mIt != matList.end());
+		NxsDiscreteStateRow & row = *mIt++;
+		for (unsigned j = 0; j < n_char; ++j)
+			{
+			bool readChar = false;
+			for (;;)
+				{
+				const char c = ftcb.current();
+				if (isgraph(c))
+					{
+					if (isdigit(c))// I don't know why PHYLIP allows digits in the midst of the sequence, but it seems to.
+						{
+						err << "Number encountered (and ignored) within sequence for taxon " << n;
+						NexusWarn(err, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, ftcb.position(), ftcb.line(), ftcb.column());
+						err.clear();
+						}
+					else
+						{
+						const NxsDiscreteStateCell stateCode = dm.GetStateCodeStored(c);
+						if (stateCode == NXS_INVALID_STATE_CODE)
+							{
+							if (c == '.')
+								{
+								if (currentTaxon == 0)
+									{
+									err << "Illegal match character state code  \".\" found in the first taxon for character " << j + 1 ;
+									throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+									}
+								NxsDiscreteStateRow & firstRow = *(matList.begin());
+								row[j] = firstRow.at(j);
+								}
+							else
+								{
+								err << "Illegal state code \"" << c << "\" found when reading site " << j + 1 << " for taxon " << n;
+								throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+								}
+							}
+						else
+							row[j] = stateCode;
+						readChar = true;
+						}
+					}
+				if (!ftcb.advance())
+					goto funcExit;
+				if (readChar)
+					break;
+				}
+			}
+		char f = ftcb.current();
+		while (f != '\r' && f != '\n')
+			{
+			if (isgraph(f))
+				{
+				err << "Sequence longer than " << n_char << " found for taxon " << n << ". The character \""<< f << "\" was found, and will be ignored. If the file position of this error corresponds to sequences for the next taxon in the matrix, then that is an indication that the sequences for taxon " << n << " are too short.";
+				NexusWarn(err, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, ftcb.position(), ftcb.line(), ftcb.column());
+				err.clear();
+				}
+			if (!ftcb.advance())
+				goto funcExit;
+			f = ftcb.current();
+			}
+		while (!isgraph(ftcb.current()))
+			{
+			if (!ftcb.advance())
+				goto funcExit;
+			}
+		}
+	funcExit:
+		if (currentTaxon + 1 != n_taxa)
+			{
+			err << "Unexpected end of file.\nExpecting data for " << n_taxa << " taxa, but only found data for " << currentTaxon + 1;
+			throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+			}
+		const NxsDiscreteStateRow & lastRow = *matList.rbegin();
+		if (lastRow.size() != n_char)
+			{
+			err << "Unexpected end of file.\nExpecting " << n_char << " characters for taxon " <<  *(taxaNames.rbegin()) << ", but only found " << (unsigned) lastRow.size() << " characters.";
+			throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+			}
+	}
+
+
+void  MultiFormatReader::readInterleavedPhylipData(
+	FileToCharBuffer & ftcb,
+	const NxsDiscreteDatatypeMapper &dm,
+	std::list<std::string> & taxaNames,
+	std::list<NxsDiscreteStateRow> & matList,
+	const unsigned n_taxa,
+	const unsigned n_char,
+	bool relaxedNames)
+	{
+	NCL_ASSERT(n_taxa > 0 && n_char > 0);
+	NxsString err;
+	matList.clear();
+	matList.assign(n_taxa, NxsDiscreteStateRow(n_char, NXS_INVALID_STATE_CODE));
+	std::list<NxsDiscreteStateRow>::iterator mIt = matList.begin();
+	unsigned startCharIndex = 0;
+	unsigned endCharIndex = n_char;
+	unsigned currentTaxon;
+	while (!isgraph(ftcb.current()))
+		{
+		    currentTaxon = 0;
+		    if (!ftcb.advance())
+		    {
+			goto funcExit;
+		    }
+		}
+	while (startCharIndex < n_char)
+		{
+		for (currentTaxon = 0; currentTaxon < n_taxa; ++currentTaxon)
+			{
+			if (startCharIndex == 0)
+				{
+				std::string n = readPhylipName(ftcb, currentTaxon, relaxedNames);
+				taxaNames.push_back(n);
+				}
+			if (currentTaxon == 0)
+				mIt = matList.begin();
+			NCL_ASSERT(mIt != matList.end());
+			NxsDiscreteStateRow & row = *mIt++;
+			unsigned j = startCharIndex;
+			for (;;)
+				{
+				const char c = ftcb.current();
+				if (isgraph(c))
+					{
+					if (j >= endCharIndex)
+						{
+						if (currentTaxon == 0)
+							{
+							err << "Too many characters were found for the taxon " << *(taxaNames.begin());
+							throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+							}
+						else
+							{
+							std::list<std::string>::const_iterator nIt = taxaNames.begin();
+							for (unsigned q = 0; q < currentTaxon ; ++q)
+								++nIt;
+							err << "Illegal character \"" << c << "\" found, after all of the data for this interleave page has been read for the taxon " << *nIt;
+							throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+							}
+						}
+					if (isdigit(c))// I don't know why PHYLIP allows digits in the midst of the sequence, but it seems to.
+						{
+						std::list<std::string>::const_iterator nIt = taxaNames.begin();
+						for (unsigned q = 0; q < currentTaxon ; ++q)
+							++nIt;
+						err << "Number encountered (and ignored) within sequence for taxon " << *nIt;
+						NexusWarn(err, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, ftcb.position(), ftcb.line(), ftcb.column());
+						err.clear();
+						}
+					else
+						{
+						const NxsDiscreteStateCell stateCode = dm.GetStateCodeStored(c);
+						if (stateCode == NXS_INVALID_STATE_CODE)
+							{
+							if (c == '.')
+								{
+								if (currentTaxon == 0)
+									{
+									err << "Illegal match character state code  \".\" found in the first taxon for character " << j + 1 ;
+									throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+									}
+								NxsDiscreteStateRow & firstRow = *(matList.begin());
+								row[j] = firstRow.at(j);
+								}
+							else
+								{
+								std::list<std::string>::const_iterator nIt = taxaNames.begin();
+								for (unsigned q = 0; q < currentTaxon ; ++q)
+									++nIt;
+								err << "Illegal state code \"" << c << "\" found when reading site " << j + 1 << " for taxon " << *nIt;
+								throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+								}
+							}
+						else
+							row[j] = stateCode;
+						j++;
+						}
+					}
+				else if (c == '\r' || c == '\n')
+					{
+					if (currentTaxon == 0)
+						endCharIndex = j;
+					else if (j != endCharIndex)
+						{
+						std::list<std::string>::const_iterator nIt = taxaNames.begin();
+						for (unsigned q = 0; q < currentTaxon ; ++q)
+							++nIt;
+						err << "Expecting " << endCharIndex -  startCharIndex << "characters  in this interleave page (based on the number of characters in the first taxon), but only found " << j - startCharIndex << " for taxon " << *nIt;
+						throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+						}
+					break;
+					}
+				if (!ftcb.advance())
+					goto funcExit;
+				}
+			while (!isgraph(ftcb.current()))
+				{
+				if (!ftcb.advance())
+					goto funcExit;
+				}
+			}
+		startCharIndex = endCharIndex;
+		endCharIndex = n_char;
+		}
+	funcExit:
+		if (currentTaxon + 1 != n_taxa)
+			{
+			err << "Unexpected end of file.\nExpecting data for " << n_taxa << " taxa, but only found data for " << currentTaxon + 1;
+			throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+			}
+		const NxsDiscreteStateRow & lastRow = *matList.rbegin();
+		if (lastRow.size() != n_char)
+			{
+			err << "Unexpected end of file.\nExpecting " << n_char << " characters for taxon " <<  *(taxaNames.rbegin()) << ", but only found " << (unsigned) lastRow.size() << " characters.";
+			throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+			}
+	}
+
+bool FileToCharBuffer::skip_to_beginning_of_line(char & next)
+	{
+	next = this->current();
+	for (;;)
+		{
+		const char c = next;
+		if (!this->advance_then_store(next))
+			return false;
+		if (c == '\n')
+			return true;
+		if (c == '\r')
+			{
+			if (next == '\n' && (!this->advance_then_store(next)))
+				return false;
+			return true;
+			}
+		}
+	}
+
+bool  MultiFormatReader::readAlnData(
+	FileToCharBuffer & ftcb,
+	const NxsDiscreteDatatypeMapper &dm,
+	std::list<std::string> & taxaNames,
+	std::list<NxsDiscreteStateRow> & matList)
+	{
+	taxaNames.clear();
+	NCL_ASSERT(ftcb.buffer);
+	NxsString err;
+	char c;
+	if (!ftcb.current())
+		throw NxsException("Could not read from file", ftcb.position(), ftcb.line(), ftcb.column());
+
+	c = ftcb.current();
+	unsigned index = 0;
+	const char * firstWord = "CLUSTAL";
+	std::string found;
+	const unsigned lenFirstWord = (unsigned const)strlen(firstWord);
+	while (index < lenFirstWord)
+		{
+		found.append(1, c);
+		if (toupper(c) != firstWord[index] || !ftcb.advance())
+			{
+			err << "Expecting file to start \"CLUSTAL\" found \"" << found << "\"";
+			throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+			}
+		++index;
+		c = ftcb.current();
+		}
+	do {
+		if (!ftcb.skip_to_beginning_of_line(c))
+			throw NxsException("Expecting multi-line file",ftcb.position(), ftcb.line(), ftcb.column());
+	} while (!isgraph(c));
+	bool readingFirstBlock = true;
+	for (;;)
+		{
+		// skip lines starting with whitespace
+		while (!isgraph(c))
+			{
+			if (!ftcb.skip_to_beginning_of_line(c))
+				{
+				if (taxaNames.empty())
+					throw NxsException("Sequences after clustal header", ftcb.position(), ftcb.line(), ftcb.column());
+				goto funcExit;
+				}
+			}
+		unsigned curr_tax_ind = 0;
+		std::list<std::string>::const_iterator taxNameIt;
+		std::list<NxsDiscreteStateRow>::iterator matRowIt;
+		if (!readingFirstBlock)
+			{
+			taxNameIt = taxaNames.begin();
+			matRowIt = matList.begin();
+			}
+		NxsDiscreteStateRow * row = NULL;
+		// this is the loop over taxa for a "page" of interleave data
+		for (;isgraph(c);)
+			{
+			std::string n;
+			for (;;)
+				{
+				n.append(1, c);
+				if (!ftcb.advance())
+					break;
+				c = ftcb.current();
+				if (!isgraph(c))
+					break;
+				}
+			if (readingFirstBlock)
+				{
+                if (this->coerceUnderscoresToSpaces)
+                    {
+                    NxsString x(n.c_str());
+                    x.UnderscoresToBlanks();
+                    n = x;
+                    }
+				taxaNames.push_back(n);
+				matList.push_back(NxsDiscreteStateRow());
+				row = &(*(matList.rbegin()));
+				}
+			else if (curr_tax_ind > taxaNames.size())
+				{
+				err << "Expecting a line beginning with whitespace (or a blank line), but found \"" << n << "\"";
+				throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+				}
+			else
+				{
+				std::string prev_name = *taxNameIt++;
+				if (!NxsString::case_insensitive_equals(prev_name.c_str(), n.c_str()))
+					{
+					err << "Expecting data for taxon # " << (1 + curr_tax_ind) << " \"" << prev_name << "\" but got \"" << n << "\"";
+					throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+					}
+				row = &(*matRowIt++);
+				}
+
+
+			while (ftcb.advance_then_store(c))
+				{
+				if (isgraph(c))
+					break;
+				}
+			if  (!isgraph(c))
+				{
+				err << "Unexpected end-of-file after taxon name \"" << n << "\"";
+				throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+				}
+			// this is the loop over states for a given taxon
+			bool eof = false;
+			bool eoseq = false;
+			for (;!eoseq;)
+				{
+				if (isgraph(c))
+					{
+					if (isdigit(c))
+						{
+						if (!ftcb.skip_to_beginning_of_line(c))
+							{
+							if (!readingFirstBlock && (curr_tax_ind + 1) != taxaNames.size())
+								{
+								err << "Unexpected End of file. Expecting data for " << (unsigned) taxaNames.size() << " sequences";
+								throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+								}
+							goto funcExit;
+							}
+						break;
+						}
+					else
+						{
+						NxsDiscreteStateCell stateCode = dm.GetStateCodeStored(c);
+						if (stateCode == NXS_INVALID_STATE_CODE)
+							{
+							err << "Illegal state code \"" << c << "\" found when reading character " << (unsigned long) row->size() << " for taxon " << n;
+							throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+							}
+						row->push_back(stateCode);
+						eof = !ftcb.advance_then_store(c);
+						}
+					}
+				if ((!eof) && (!isgraph(c)))
+					{
+					if (c == '\n')
+						{
+						eof = !ftcb.advance_then_store(c);
+						eoseq = true;
+						}
+					else if (c == '\r')
+						{
+						eof = !ftcb.advance_then_store(c);
+						if (!eof && c == '\n')
+							eof = !ftcb.advance_then_store(c);
+						eoseq = true;
+						}
+					else
+						eof = !ftcb.advance_then_store(c);
+					}
+				if (eof)
+					{
+					if (!readingFirstBlock && (curr_tax_ind + 1) != taxaNames.size())
+						{
+						err << "Unexpected End of file. Expecting data for " << (unsigned) taxaNames.size() << " sequences";
+						throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+						}
+					goto funcExit;
+					}
+				}
+			if (isgraph(c))
+				curr_tax_ind++;
+			else
+				{
+				if (!readingFirstBlock && (1 + curr_tax_ind) != taxaNames.size())
+					{
+					err << "Unexpected line beginning with whitespace. Expecting data for " << (unsigned) taxaNames.size() << " sequences";
+					throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+					}
+				curr_tax_ind = 0;
+				readingFirstBlock = false;
+				}
+			}
+		}
+
+	funcExit:
+		// pad with missing data to make even rows
+		std::list<NxsDiscreteStateRow>::iterator sIt = matList.begin();
+		long longest = -1;
+		for (; sIt != matList.end(); ++sIt)
+			{
+			NxsDiscreteStateRow & row = *sIt;
+			if (longest == -1)
+				longest = (long) row.size();
+			else if (longest != (long) row.size())
+				return false;
+			}
+		return true;
+	}
+
+void MultiFormatReader::addTaxaNames(const std::list<std::string> & taxaNames, NxsTaxaBlockAPI * taxa)
+	{
+	NCL_ASSERT(taxa);
+	std::list<std::string>::const_iterator nIt = taxaNames.begin();
+
+	std::vector<NxsNameToNameTrans> nameTrans;
+	bool nameTransNeeded = false;
+	NxsString t;
+
+	for (; nIt != taxaNames.end(); ++nIt)
+		{
+		std::string name = *nIt;
+		NxsNameToNameTrans trans(name, name);
+		for (unsigned i = 1; ; ++i)
+			{
+			try {
+				taxa->AddTaxonLabel(name);
+				break;
+				}
+			catch (DuplicatedLabelNxsException & x)
+				{
+				if (!this->conversionOutputRecord.addNumbersToDisambiguateNames)
+					throw;
+				nameTransNeeded = true;
+				t.assign(*nIt);
+				t << i;
+				trans.second = t;
+				name = t;
+				}
+			}
+		if (this->conversionOutputRecord.addNumbersToDisambiguateNames)
+			nameTrans.push_back(trans);
+		}
+
+
+	// write out a name translation file if we need to
+	if (nameTransNeeded && this->conversionOutputRecord.writeNameTranslationFile)
+		this->conversionOutputRecord.writeNameTranslation(nameTrans, taxa);
+	}
+
+void MultiFormatReader::moveDataToMatrix(std::list<NxsDiscreteStateRow> & matList,  NxsDiscreteStateMatrix &mat)
+	{
+	mat.clear();
+	mat.resize(matList.size());
+	NxsDiscreteStateMatrix::iterator dIt = mat.begin();
+	std::list<NxsDiscreteStateRow>::iterator sIt = matList.begin();
+	for (; sIt != matList.end(); ++sIt, ++dIt)
+		{
+		NxsDiscreteStateRow & source = *sIt;
+		NxsDiscreteStateRow & dest = *dIt;
+		dest.swap(source);
+		}
+	}
+
+void  MultiFormatReader::moveDataToDataBlock(const std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, const unsigned nchar, NxsDataBlock * dataB)
+	{
+	NCL_ASSERT(dataB);
+	NxsString d;
+	d << "Dimensions ntax = " << (unsigned) matList.size() << " nchar = " << nchar << " ; ";
+	std::istringstream fakeDimStream(d);
+	NxsToken fakeDimToken(fakeDimStream);
+	NxsString newTaxLabel("NewTaxa");
+	NxsString ntaxLabel("NTax");
+	NxsString ncharLabel("NChar");
+	dataB->HandleDimensions(fakeDimToken, newTaxLabel, ntaxLabel, ncharLabel);
+
+	NCL_ASSERT(dataB->taxa);
+	addTaxaNames(taxaNames, dataB->taxa);
+
+	moveDataToMatrix(matList, dataB->discreteMatrix);
+	}
+
+void  MultiFormatReader::moveDataToUnalignedBlock(const std::list<std::string> & taxaNames, std::list<NxsDiscreteStateRow> & matList, NxsUnalignedBlock * uB)
+	{
+	NCL_ASSERT(uB);
+	NxsString d;
+	d << "Dimensions NewTaxa ntax = " << (unsigned) matList.size() << " ; ";
+	std::istringstream fakeDimStream(d);
+	NxsToken fakeDimToken(fakeDimStream);
+	uB->HandleDimensions(fakeDimToken);
+
+	NCL_ASSERT(uB->taxa);
+	addTaxaNames(taxaNames, uB->taxa);
+
+	moveDataToMatrix(matList, uB->uMatrix);
+	}
+
+void  MultiFormatReader::readFastaFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt)
+	{
+	NxsString blockID("DATA");
+	NxsBlock *nb = cloneFactory.GetBlockReaderForID(blockID, this, NULL);
+	NCL_ASSERT(nb);
+	if (!nb)
+		return;
+	nb->SetNexus(this);
+
+	NxsDataBlock * dataB = static_cast<NxsDataBlock *>(nb); // this should be safe because we know that the PublicNexusReader has a DataBlock assigned to "DATA" -- unless the caller has replaced that clone template (gulp)
+	FileToCharBuffer ftcb(inf);
+	if (ftcb.buffer)
+		{
+		dataB->Reset();
+		dataB->datatype = dt;
+		dataB->ResetSymbols();
+		dataB->gap = '-';
+		NxsPartition dtParts;
+		std::vector<NxsCharactersBlock::DataTypesEnum> dtv;
+		dataB->CreateDatatypeMapperObjects(dtParts, dtv);
+
+		const NxsDiscreteDatatypeMapper * dm = dataB->GetDatatypeMapperForChar(0);
+
+		std::list<std::string> taxaNames;
+		std::list<NxsDiscreteStateRow> matList;
+		size_t longest = 0;
+		bool aligned = true;
+		try {
+			aligned = readFastaSequences(ftcb, *dm, taxaNames, matList, longest);
+			}
+		catch (...)
+			{
+			cloneFactory.BlockError(dataB);
+			throw;
+			}
+
+		if (aligned)
+			{
+			moveDataToDataBlock(taxaNames, matList, (unsigned int)longest, dataB);
+			BlockReadHook(blockID, dataB);
+			}
+		else
+			{
+			cloneFactory.BlockError(dataB);
+			blockID.assign("UNALIGNED");
+			NxsBlock * nub = cloneFactory.GetBlockReaderForID(blockID, this, NULL);
+			if (!nub)
+				{
+				NCL_ASSERT(nub);
+				return;
+				}
+			nub->SetNexus(this);
+
+			NxsUnalignedBlock * unalignedB = static_cast<NxsUnalignedBlock *>(nub); // this should be safe because we know that the PublicNexusReader has a DataBlock assigned to "DATA" -- unless the caller has replaced that clone template (gulp)
+			unalignedB->Reset();
+			unalignedB->datatype = dt;
+			unalignedB->ResetSymbols();
+			unalignedB->gap = '-';
+			unalignedB->ResetDatatypeMapper();
+			moveDataToUnalignedBlock(taxaNames, matList, unalignedB);
+			BlockReadHook(blockID, unalignedB);
+			}
+		}
+	else
+		{
+		cloneFactory.BlockError(dataB);
+		NxsString err;
+		err << "No Data read -- file appears to be empty";
+		this->NexusError(err, 0, -1, -1);
+		}
+	}
+
+
+/* Assumes that `contents` was returned from readFileToMemory() has been called
+	with `inf` and the `len` refers the size of the buffer allocated by
+	readFileToMemory
+*/
+bool  MultiFormatReader::readFinSequences(
+	FileToCharBuffer & ftcb,
+	NxsDiscreteDatatypeMapper &dm,
+	std::list<std::string> & taxaNames,
+	std::list<NxsDiscreteStateRow> & matList,
+	size_t & longest)
+	{
+	NCL_ASSERT(ftcb.buffer);
+	NxsString err;
+
+	std::string firstLine;
+	for (;;)
+		{
+		char c = ftcb.current();
+		if (c == '\n' || c == '\r')
+			break;
+		firstLine.append(1, c);
+		if (!ftcb.advance())
+			break;
+		}
+	std::string sfl = NxsString::strip_surrounding_whitespace(firstLine);
+	if (!NxsString::case_insensitive_equals(sfl.c_str(), "label data"))
+		{
+		err << "Expecting the first line of the file to contain just the words \"label data\", but found \"" << sfl << '\"';
+		throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+		}
+
+	for (;;)
+		{
+		const char cc = ftcb.current();
+		if (!isgraph(cc))
+			{
+			if (!ftcb.advance())
+				break;
+			}
+		else if (cc == '@')
+			break;
+		else
+			{
+			std::string name;
+			bool commentLine= false;
+			if (ftcb.current() == '/')
+				{
+				if (!ftcb.advance())
+					{
+					err << "Unexpected end of file after / character";
+					throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+					}
+				if (ftcb.current() == '*')
+					{
+					commentLine = true;
+					bool prevStar = false;
+					for (;;)
+						{
+						if (!ftcb.advance())
+							{
+							err << "Unexpected end of file in comment";
+							throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+							}
+						char cmtc = ftcb.current();
+						if (prevStar && cmtc == '/')
+							break;
+						prevStar = (cmtc == '*');
+						}
+					if (!ftcb.advance())
+						break;
+					}
+				else
+					name.append(1, '/');
+				}
+			if (commentLine)
+				continue;
+			// read taxon name -- no escaping of characters will be done
+			for (;;)
+				{
+				char c = ftcb.current();
+				if (!isgraph(c))
+					break;
+				name.append(1, c);
+				if (!ftcb.advance())
+					break;
+				}
+			// skip ws
+			for (;;)
+				{
+				char sc = ftcb.current();
+				if (isgraph(sc))
+					break;
+				if (sc == '\n' || sc == '\r' || !ftcb.advance())
+					{
+					err << "Unexpected end of line (or end of file).  Expecting sequence for " << name;
+					throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+					}
+				}
+			if (this->coerceUnderscoresToSpaces)
+			    {
+			    NxsString x(name.c_str());
+			    x.UnderscoresToBlanks();
+			    name = x;
+			    }
+			taxaNames.push_back(name);
+			matList.push_back(NxsDiscreteStateRow());
+			NxsDiscreteStateRow & row = *(matList.rbegin());
+			row.reserve(longest);
+			// read sequence
+			for (;;)
+				{
+				char seqc = ftcb.current();
+				if (isgraph(seqc))
+					{
+					NxsDiscreteStateCell stateCode;
+					if (seqc == '[')
+						{
+						std::string recoded;
+						recoded.append(1, '{');
+						if (!ftcb.advance())
+							{
+							err << "Unexpected end of file is [ group!";
+							throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+							}
+						while (ftcb.current() != ']')
+							{
+							recoded.append(1, ftcb.current());
+							if (!ftcb.advance())
+								{
+								err << "Unexpected end of file is [ group!";
+								throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+								}
+							}
+						recoded.append(1, '}');
+						try{
+							NxsString nn;
+							nn << name;
+							stateCode = dm.StateCodeForNexusMultiStateSet('\0',
+  																	  recoded,
+  																	  0L,
+  																	  (unsigned int)taxaNames.size(),
+  																	  (unsigned int)row.size(),
+  																	  0L,
+  																	  nn);
+							}
+						catch (NxsException & x)
+							{
+							x.addPositionInfo(ftcb.position(), ftcb.line(), ftcb.column());
+							throw x;
+							}
+						}
+					else
+						{
+						stateCode = dm.GetStateCodeStored(seqc);
+						if (stateCode == NXS_INVALID_STATE_CODE)
+							{
+							err << "Illegal state code \"" << seqc << "\" found when reading character " << (unsigned) row.size() << " for taxon \"" << name << "\".";
+							throw NxsException(err, ftcb.position(), ftcb.line(), ftcb.column());
+							}
+						}
+					row.push_back(stateCode);
+					}
+				else if (seqc == '\n' || seqc == '\r')
+					break;
+				if (!ftcb.advance())
+					break;
+				}
+			longest = std::max(longest, row.size());
+			}
+		}
+	// pad with missing data to make even rows
+	std::list<NxsDiscreteStateRow>::iterator sIt = matList.begin();
+	bool allSameLength = true;
+	for (; sIt != matList.end(); ++sIt)
+		{
+		NxsDiscreteStateRow & row = *sIt;
+		if (row.size() < longest)
+			{
+			allSameLength = false;
+			break;
+			}
+		}
+	return allSameLength;
+	}
+
+
+void  MultiFormatReader::readFinFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt)
+	{
+	NxsString blockID("DATA");
+	NxsBlock *nb = cloneFactory.GetBlockReaderForID(blockID, this, NULL);
+	NCL_ASSERT(nb);
+	if (!nb)
+		return;
+	nb->SetNexus(this);
+
+	NxsDataBlock * dataB = static_cast<NxsDataBlock *>(nb); // this should be safe because we know that the PublicNexusReader has a DataBlock assigned to "DATA" -- unless the caller has replaced that clone template (gulp)
+	FileToCharBuffer ftcb(inf);
+	if (ftcb.buffer)
+		{
+		dataB->Reset();
+		dataB->datatype = dt;
+		dataB->ResetSymbols();
+		dataB->gap = '-';
+		NxsPartition dtParts;
+		std::vector<NxsCharactersBlock::DataTypesEnum> dtv;
+		dataB->CreateDatatypeMapperObjects(dtParts, dtv);
+
+		NxsDiscreteDatatypeMapper * dm = dataB->GetMutableDatatypeMapperForChar(0);
+
+		std::list<std::string> taxaNames;
+		std::list<NxsDiscreteStateRow> matList;
+		size_t longest = 0;
+		bool aligned = true;
+		try {
+			aligned = readFinSequences(ftcb, *dm, taxaNames, matList, longest);
+			}
+		catch (...)
+			{
+			cloneFactory.BlockError(dataB);
+			throw;
+			}
+
+		if (aligned)
+			{
+			moveDataToDataBlock(taxaNames, matList, (unsigned int)longest, dataB);
+			BlockReadHook(blockID, dataB);
+			}
+		else
+			{
+			cloneFactory.BlockError(dataB);
+			blockID.assign("UNALIGNED");
+			NxsBlock * nub = cloneFactory.GetBlockReaderForID(blockID, this, NULL);
+			if (!nub)
+				{
+				NCL_ASSERT(nub);
+				return;
+				}
+			nub->SetNexus(this);
+
+			NxsUnalignedBlock * unalignedB = static_cast<NxsUnalignedBlock *>(nub); // this should be safe because we know that the PublicNexusReader has a DataBlock assigned to "DATA" -- unless the caller has replaced that clone template (gulp)
+			unalignedB->Reset();
+			unalignedB->datatype = dt;
+			unalignedB->ResetSymbols();
+			unalignedB->ResetDatatypeMapper();
+			moveDataToUnalignedBlock(taxaNames, matList, unalignedB);
+			BlockReadHook(blockID, unalignedB);
+			}
+		}
+	else
+		{
+		cloneFactory.BlockError(dataB);
+		NxsString err;
+		err << "No Data read -- file appears to be empty";
+		this->NexusError(err, 0, -1, -1);
+		}
+	}
+
+void  MultiFormatReader::ReadFilepath(const char * filepath, DataFormatType format)
+	{
+	if (format == NEXUS_FORMAT)
+		{
+		NxsReader::ReadFilepath(filepath);
+		}
+	else
+		{
+		std::ifstream inf;
+		try{
+			inf.open(filepath, std::ios::binary);
+			if (!inf.good())
+				{
+				NxsString err;
+				err << "Could not open the file \"" << filepath <<"\"";
+				this->NexusError(err, 0, -1, -1);
+				}
+			else
+				this->ReadStream(inf, format, filepath);
+			}
+		catch (NxsException & x)
+			{
+			this->NexusError(x.msg, x.pos, x.line, x.col);
+			}
+		catch (...)
+			{
+			NxsString err;
+			err << "Unknown error occurred while reading \"" << filepath <<"\"." ;
+			this->NexusError(err, 0, -1, -1);
+			}
+
+		}
+	}
+
+void  MultiFormatReader::ReadStream(std::istream & inf, DataFormatType format, const char * filepath)
+	{
+	if (format == NEXUS_FORMAT)
+		{
+		NxsReader::ReadFilestream(inf);
+		}
+	else
+		{
+		if (format == FASTA_DNA_FORMAT)
+			readFastaFile(inf, NxsCharactersBlock::dna);
+		else if (format == FASTA_RNA_FORMAT)
+			readFastaFile(inf, NxsCharactersBlock::rna);
+		else if (format == FASTA_AA_FORMAT)
+			readFastaFile(inf, NxsCharactersBlock::protein);
+		else if (format == PHYLIP_DNA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::dna, false, false);
+		else if (format == PHYLIP_RNA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::rna, false, false);
+		else if (format == PHYLIP_AA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::protein, false, false);
+		else if (format == PHYLIP_DISC_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::standard, false, false);
+		else if (format == INTERLEAVED_PHYLIP_DNA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::dna, false, true);
+		else if (format == INTERLEAVED_PHYLIP_RNA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::rna, false, true);
+		else if (format == INTERLEAVED_PHYLIP_AA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::protein, false, true);
+		else if (format == INTERLEAVED_PHYLIP_DISC_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::standard, false, true);
+		else if (format == RELAXED_PHYLIP_DNA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::dna, true, false);
+		else if (format == RELAXED_PHYLIP_RNA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::rna, true, false);
+		else if (format == RELAXED_PHYLIP_AA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::protein, true, false);
+		else if (format == RELAXED_PHYLIP_DISC_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::standard, true, false);
+		else if (format == INTERLEAVED_RELAXED_PHYLIP_DNA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::dna, true, true);
+		else if (format == INTERLEAVED_RELAXED_PHYLIP_RNA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::rna, true, true);
+		else if (format == INTERLEAVED_RELAXED_PHYLIP_AA_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::protein, true, true);
+		else if (format == INTERLEAVED_RELAXED_PHYLIP_DISC_FORMAT)
+			readPhylipFile(inf, NxsCharactersBlock::standard, true, true);
+		else if (format == ALN_DNA_FORMAT)
+			readAlnFile(inf, NxsCharactersBlock::dna);
+		else if (format == ALN_RNA_FORMAT)
+			readAlnFile(inf, NxsCharactersBlock::rna);
+		else if (format == ALN_AA_FORMAT)
+			readAlnFile(inf, NxsCharactersBlock::protein);
+		else if (format == RELAXED_PHYLIP_TREE_FORMAT)
+			readPhylipTreeFile(inf, true);
+		else if (format == PHYLIP_TREE_FORMAT)
+			readPhylipTreeFile(inf, false);
+		else if (format == FIN_DNA_FORMAT)
+			readFinFile(inf, NxsCharactersBlock::dna);
+		else if (format == FIN_RNA_FORMAT)
+			readFinFile(inf, NxsCharactersBlock::rna);
+		else if (format == FIN_AA_FORMAT)
+			readFinFile(inf, NxsCharactersBlock::protein);
+		else
+			{
+			NxsString m;
+			if (filepath)
+				m << "The file " << filepath << " is not in a supported format.";
+			else
+				m << "Unsupported format.";
+			NexusError(m, 0, -1, -1);
+			return;
+			}
+		PostExecuteHook();
+		}
+	}
+// More tolerant than strict PHYLIP (tolerates any amount of whitespace before or
+// between ntax and nchar.
+// throws a NxsException if the header cannot be read.
+// returns the file position.
+unsigned MultiFormatReader::readPhylipHeader(std::istream & inf, unsigned & ntax, unsigned & nchar)
+	{
+	int ntaxi = 0;
+	int nchari = 0;
+	if (inf.good())
+		{
+		inf >> ntaxi;
+		}
+	else
+		{
+		NxsString err("Invalid file stream (this probably indicates an error occurred while opening the file).");
+		throw NxsException(err, 0, -1, -1);
+		}
+
+	if (inf.good())
+		inf >> nchari;
+	else
+		{
+		NxsString err("A file error occurred while reading ntax.");
+		throw NxsException(err, 0, -1, -1);
+		}
+	if (!inf.good())
+		{
+		NxsString err("A file error occurred while reading ntax.");
+		throw NxsException(err, 0, -1, -1);
+		}
+	if (ntaxi < 1 || nchari < 1)
+		{
+		NxsString err("Expecting the file to start with positive number of taxa then the number of characters.");
+		throw NxsException(err, 0, -1, -1);
+		}
+	ntax = (unsigned) ntaxi;
+	nchar = (unsigned) nchari;
+	return (unsigned) inf.tellg();
+	}
+
+void MultiFormatReader::readPhylipTreeFile(std::istream & inf, bool relaxedNames)
+	{
+	NxsString blockID("TREES");
+	NxsBlock *nb = cloneFactory.GetBlockReaderForID(blockID, this, NULL);
+	NCL_ASSERT(nb);
+	if (!nb)
+		return;
+	nb->SetNexus(this);
+
+	/* this should be safe because we know that the PublicNexusReader has a
+		NxsTreesBlock assigned to "TREES" -- unless the caller has replaced that
+		clone template (gulp)
+	*/
+	NxsTreesBlock * treesB = static_cast<NxsTreesBlock *>(nb);
+	NxsString err;
+	try {
+		treesB->Reset();
+		NxsToken inTokens(inf);
+		treesB->ReadPhylipTreeFile(inTokens);
+		if (!relaxedNames)
+			{
+			const NxsTaxaBlockAPI * taxa = treesB->GetTaxaBlockPtr(0L);
+			if (!taxa)
+				{
+				err << "No taxa found in tree description (which probably means that no tree was found).";
+				throw NxsException(err, inTokens);
+				}
+			const std::vector<std::string> l = taxa->GetAllLabels();
+			for (std::vector<std::string>::const_iterator lIt = l.begin(); lIt != l.end(); ++lIt)
+				{
+				if (lIt->length() > PHYLIP_NMLNGTH)
+					{
+					err << "The taxon label " << *lIt << " has more than the allowed number of charcters (" << PHYLIP_NMLNGTH << ')';
+					throw NxsException(err);
+					}
+				}
+			}
+		BlockReadHook(blockID, treesB);
+		}
+	catch (...)
+		{
+		cloneFactory.BlockError(nb);
+		throw;
+		}
+	}
+
+/* if this returns NULL, then the read failed and gLogMessage will contain
+	and error message.
+*/
+void MultiFormatReader::readAlnFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt)
+	{
+	NxsString blockID("DATA");
+	NxsBlock *nb = cloneFactory.GetBlockReaderForID(blockID, this, NULL);
+	NCL_ASSERT(nb);
+	if (!nb)
+		return;
+	nb->SetNexus(this);
+	/* this should be safe because we know that the PublicNexusReader has a
+		DataBlock assigned to "DATA" -- unless the caller has replaced that
+		clone template (gulp)
+	*/
+	NxsDataBlock * dataB = static_cast<NxsDataBlock *>(nb);
+
+	try {
+		dataB->Reset();
+		dataB->datatype = dt;
+		dataB->ResetSymbols();
+		dataB->gap = '-';
+		NxsPartition dtParts;
+		std::vector<NxsCharactersBlock::DataTypesEnum> dtv;
+		dataB->CreateDatatypeMapperObjects(dtParts, dtv);
+
+		const NxsDiscreteDatatypeMapper * dm = dataB->GetDatatypeMapperForChar(0);
+		NCL_ASSERT(dm);
+		FileToCharBuffer ftcb(inf);
+		if (ftcb.buffer)
+			{
+			std::list<std::string> taxaNames;
+			std::list<NxsDiscreteStateRow> matList;
+			if (!readAlnData(ftcb, *dm, taxaNames, matList))
+				throw NxsException("Expecting the same number of characters for all sequences in the ALN file");
+			const unsigned nchar = (unsigned const)matList.begin()->size();
+			moveDataToDataBlock(taxaNames, matList, nchar, dataB);
+			BlockReadHook(blockID, dataB);
+			}
+		}
+	catch (...)
+		{
+		cloneFactory.BlockError(nb);
+		throw;
+		}
+}
+
+/* if this returns NULL, then the read failed and gLogMessage will contain
+	and error message.
+*/
+void MultiFormatReader::readPhylipFile(std::istream & inf, NxsCharactersBlock::DataTypesEnum dt, bool relaxedNames, bool interleaved)
+	{
+	NxsString blockID("DATA");
+	NxsBlock *nb = cloneFactory.GetBlockReaderForID(blockID, this, NULL);
+	NCL_ASSERT(nb);
+	if (!nb)
+		return;
+	nb->SetNexus(this);
+	/* this should be safe because we know that the PublicNexusReader has a
+		DataBlock assigned to "DATA" -- unless the caller has replaced that
+		clone template (gulp)
+	*/
+	NxsDataBlock * dataB = static_cast<NxsDataBlock *>(nb);
+
+	try {
+		dataB->Reset();
+		dataB->datatype = dt;
+		dataB->ResetSymbols();
+		dataB->gap = '-';
+		NxsPartition dtParts;
+		std::vector<NxsCharactersBlock::DataTypesEnum> dtv;
+		dataB->CreateDatatypeMapperObjects(dtParts, dtv);
+
+		const NxsDiscreteDatatypeMapper * dm = dataB->GetDatatypeMapperForChar(0);
+		NCL_ASSERT(dm);
+		unsigned ntax = 0;
+		unsigned nchar = 0;
+		unsigned headerLen = readPhylipHeader(inf, ntax, nchar);
+		FileToCharBuffer ftcb(inf);
+		ftcb.totalSize += headerLen;
+		if (ftcb.buffer)
+			{
+			std::list<std::string> taxaNames;
+			std::list<NxsDiscreteStateRow> matList;
+			if (interleaved)
+				readInterleavedPhylipData(ftcb, *dm, taxaNames, matList, ntax, nchar, relaxedNames);
+			else
+				readPhylipData(ftcb, *dm, taxaNames, matList, ntax, nchar, relaxedNames);
+			moveDataToDataBlock(taxaNames, matList, nchar, dataB);
+			BlockReadHook(blockID, dataB);
+			}
+		}
+	catch (...)
+		{
+		cloneFactory.BlockError(nb);
+		throw;
+		}
+}
diff --git a/src/nxspublicblocks.cpp b/src/nxspublicblocks.cpp
new file mode 100644
index 0000000..91d416f
--- /dev/null
+++ b/src/nxspublicblocks.cpp
@@ -0,0 +1,811 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+
+
+#include <istream>
+#include <fstream>
+#include "ncl/nxspublicblocks.h"
+#include "ncl/nxsreader.h"
+
+#include <Rcpp.h>
+
+using namespace std;
+
+
+/*! Registers the strings is the vector as taxon labels.
+
+	This is is a convenience function that creates a NxsTaxaBlock, fills it, and
+	then calls AddReadTaxaBlock.
+
+	\returns the block created.
+*/
+NxsTaxaBlock * PublicNexusReader::RegisterTaxa(const std::vector<std::string> & tl) {
+	if (tl.empty()) {
+		return 0L;
+	}
+	NxsTaxaBlock *tb = new NxsTaxaBlock();
+	tb->SetNtax( (unsigned)tl.size() );
+	for (std::vector<std::string>::const_iterator labelIt = tl.begin(); labelIt != tl.end(); ++labelIt)
+		tb->AddTaxonLabel(*labelIt);
+	AddReadTaxaBlock(tb);
+	return tb;
+}
+
+/*! A convenience function to get a list of NxsBlocks from a file path without having to create and dispose of a  PublicNexusReader object*/
+BlockReaderList PublicNexusReader::parseFileOrThrow(
+    const char *filepath, /* path of file to parse */
+    NxsReader::WarningHandlingMode mode,
+    bool parsePrivateBlocks, /* true to store the commands found in  private blocks */
+    bool storeTokenInfo)
+    {
+    PublicNexusReader nexusReader(mode);
+    return NxsReader::parseFileWithReader(nexusReader, filepath, parsePrivateBlocks, storeTokenInfo);
+    }
+
+BlockReaderList DefaultErrorReportNxsReader::parseFile(
+    const char *filepath, /* path of file to parse */
+    std::ostream * stdOutstream,
+    std::ostream * errOutstream,
+    bool parsePrivateBlocks, /* true to store the commands found in  private blocks */
+    bool storeTokenInfo)
+    {
+    DefaultErrorReportNxsReader nexusReader(stdOutstream, errOutstream);
+    return NxsReader::parseFileWithReader(nexusReader, filepath, parsePrivateBlocks, storeTokenInfo);
+    }
+
+/*! Convenience function for reading a filepath.
+   Returns a list of NxsBlock pointers (which the caller must delete)
+   corresponding to the NxsBlocks found in the file.
+   Raises NxsExceptions on errors.
+*/
+BlockReaderList NxsReader::parseFileWithReader(
+    NxsReader & nexusReader,
+    const char *filepath, /*!< path of file to parse */
+    bool parsePrivateBlocks, /*!< true to store the commands found in  private blocks */
+    bool storeTokenInfo) /*!< true for storage of full token info (such as file position) for private blocks */
+    {
+    if (!filepath)
+        nexusReader.NexusError("Invalid (NULL) file specified to be parsed", 0, -1, -1);
+    ifstream inf(filepath, ios::binary);
+    if (!inf.good())
+        {
+        NxsString err;
+        err << "Could not parse the file \"" << filepath <<"\"";
+        nexusReader.NexusError(err, 0, -1, -1);
+        }
+    nexusReader.statusMessage("Creating token");
+	NxsToken token(inf);
+	NxsDefaultPublicBlockFactory factory(parsePrivateBlocks, storeTokenInfo);
+	nexusReader.AddFactory(&factory);
+	try {
+        nexusReader.statusMessage("Executing");
+	    nexusReader.Execute(token);
+	    }
+	catch(...)
+	    {
+        nexusReader.RemoveFactory(&factory);
+        throw;
+	    }
+	nexusReader.RemoveFactory(&factory);
+	BlockReaderList brl = nexusReader.GetBlocksFromLastExecuteInOrder();
+	return brl;
+    }
+
+
+void NxsStoreTokensBlockReader::Reset()
+	{
+	NxsBlock::Reset();
+	commandsRead.clear();
+	}
+
+void NxsStoreTokensBlockReader::ReportConst(std::ostream &out) const
+	{
+	out << NCL_BLOCKTYPE_ATTR_NAME << " block contains ";
+	if (storeAllTokenInfo)
+		{
+		out << (unsigned)commandsRead.size() << " commands:\n";
+		for (std::list<ProcessedNxsCommand>::const_iterator cIt = commandsRead.begin(); cIt != commandsRead.end(); ++cIt)
+			{
+			const ProcessedNxsToken & t = (*cIt)[0];
+			out << "    " << t.GetToken() << "\n";
+			}
+		}
+	else
+		{
+		out << (unsigned)justTokens.size() << " commands:\n";
+		for (ListVecString::const_iterator cIt = justTokens.begin(); cIt != justTokens.end(); ++cIt)
+			out << "    " << cIt->at(0) << "\n";
+		}
+	}
+
+void NxsStoreTokensBlockReader::ReadCommand(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	if (storeAllTokenInfo)
+		{
+		ProcessedNxsCommand fullTokens;
+		token.ProcessAsCommand(&fullTokens);
+		if (!fullTokens.empty())
+			commandsRead.push_back(fullTokens);
+		}
+	else
+		{
+		VecString justString;
+		while (!token.Equals(";"))
+			{
+			justString.push_back(token.GetToken());
+			token.GetNextToken();
+			}
+		if (!justString.empty())
+			justTokens.push_back(justString);
+		}
+	}
+
+void NxsStoreTokensBlockReader::Read(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	isEmpty = false;
+	isUserSupplied = true;
+	NxsString begcmd("BEGIN ");
+	begcmd += this->NCL_BLOCKTYPE_ATTR_NAME;
+	DemandEndSemicolon(token, begcmd.c_str());
+
+	for(;;)
+		{
+		token.GetNextToken();
+        if (token.Equals("END") || token.Equals("ENDBLOCK"))
+            {
+            HandleEndblock(token);
+            return ;
+            }
+		this->ReadCommand(token);
+		}
+	}
+
+void NxsStoreTokensBlockReader::WriteAsNexus(std::ostream &out) const
+	{
+	out << "BEGIN " << NxsString::GetEscaped(this->NCL_BLOCKTYPE_ATTR_NAME) << ";\n";
+	if (storeAllTokenInfo)
+		{
+		for (std::list<ProcessedNxsCommand>::const_iterator cIt = commandsRead.begin(); cIt != commandsRead.end(); ++cIt)
+			{
+			const ProcessedNxsCommand & t = *cIt;
+			if (WriteCommandAsNexus(out, t))
+    			out << '\n';
+			}
+		}
+	else
+		{
+		for (ListVecString::const_iterator cIt = justTokens.begin(); cIt != justTokens.end(); ++cIt)
+			{
+			const VecString & t = *cIt;
+			out << "   ";
+			for (VecString::const_iterator wIt = t.begin(); wIt != t.end(); ++wIt)
+				out << ' ' << NxsString::GetEscaped(*wIt);
+			out << ";\n";
+			}
+		}
+	WriteSkippedCommands(out);
+	out << "END;\n";
+	}
+
+/*! Returns a new instance of a block  for the appropriate block type NCL_BLOCKTYPE_ATTR_NAME. \ref BlockTypeIDDiscussion
+*/
+NxsBlock  *NxsDefaultPublicBlockFactory::GetBlockReaderForID(
+  const std::string & NCL_BLOCKTYPE_ATTR_NAME, /*! \ref BlockTypeIDDiscussion */
+  NxsReader *reader,
+  NxsToken *token)
+	{
+	if (NCL_BLOCKTYPE_ATTR_NAME == "ASSUMPTIONS" || NCL_BLOCKTYPE_ATTR_NAME == "SETS")
+		return assumpBlockFact.GetBlockReaderForID(NCL_BLOCKTYPE_ATTR_NAME, reader, token);
+	if (NCL_BLOCKTYPE_ATTR_NAME == "CHARACTERS")
+		return charBlockFact.GetBlockReaderForID(NCL_BLOCKTYPE_ATTR_NAME, reader, token);
+	if (NCL_BLOCKTYPE_ATTR_NAME == "DATA")
+		return dataBlockFact.GetBlockReaderForID(NCL_BLOCKTYPE_ATTR_NAME, reader, token);
+	if (NCL_BLOCKTYPE_ATTR_NAME == "DISTANCES")
+		return distancesBlockFact.GetBlockReaderForID(NCL_BLOCKTYPE_ATTR_NAME, reader, token);
+	if (NCL_BLOCKTYPE_ATTR_NAME == "TAXA")
+		return taxaBlockFact.GetBlockReaderForID(NCL_BLOCKTYPE_ATTR_NAME, reader, token);
+	if (NCL_BLOCKTYPE_ATTR_NAME == "TREES")
+		return treesBlockFact.GetBlockReaderForID(NCL_BLOCKTYPE_ATTR_NAME, reader, token);
+	if (NCL_BLOCKTYPE_ATTR_NAME == "TAXAASSOCIATION")
+		return taxaAssociationBlockFact.GetBlockReaderForID(NCL_BLOCKTYPE_ATTR_NAME, reader, token);
+	if (NCL_BLOCKTYPE_ATTR_NAME == "UNALIGNED")
+		return unalignedBlockFact.GetBlockReaderForID(NCL_BLOCKTYPE_ATTR_NAME, reader, token);
+	if (tokenizeUnknownBlocks)
+		{
+		NxsStoreTokensBlockReader * nb = new NxsStoreTokensBlockReader(NCL_BLOCKTYPE_ATTR_NAME, storeTokenInfoArg);
+		nb->SetImplementsLinkAPI(false);
+    	return nb;
+        }
+	return NULL;
+	}
+
+/*! Creates a reader for the specified blocks.
+	The first argument is integer with bits that is composed of bits from PublicNexusReader::NexusBlocksToRead,
+		indicating which of the public blocks should be read. Either compose the
+		argument by ORing together bits (such as NEXUS_TREES_BLOCK_BIT|NEXUS_TAXA_BLOCK_BIT)
+		or simply pass in -1 to read all public blocks.
+	The "standard" NCL NxsBlock (NxsCharactersBlock, NxsTaxaBlock...) instances will be
+		created as initial clone block templates in the contained NxsCloneBlockFactory.
+		These instances can be altered by using getting pointers to them using the
+		GetAssumptionsBlockTemplate(), GetTaxaBlockTemplate()... methods.
+*/
+PublicNexusReader::PublicNexusReader(
+  const int blocksToRead, /*!< integer with bits that is composed of bits from PublicNexusReader::NexusBlocksToRead, indicating which of the public blocks should be read*/
+  NxsReader::WarningHandlingMode warnModeArg) /*!< warning mode (passed to ExceptionRaisingNxsReader::ExceptionRaisingNxsReader() */
+	:ExceptionRaisingNxsReader(warnModeArg),
+	bitsForBlocksToRead(blocksToRead),
+	assumptionsBlockTemplate(0L),
+	charactersBlockTemplate(0L),
+	dataBlockTemplate(0L),
+	distancesBlockTemplate(0L),
+	storerBlockTemplate(0L),
+	taxaBlockTemplate(0L),
+	taxaAssociationBlockTemplate(0L),
+	treesBlockTemplate(0L),
+	unalignedBlockTemplate(0L)
+{
+	this->AddFactory(&cloneFactory);
+
+	taxaBlockTemplate = new NxsTaxaBlock();
+	taxaBlockTemplate->SetImplementsLinkAPI(false);
+	cloneFactory.AddPrototype(taxaBlockTemplate);
+
+	if (blocksToRead & NEXUS_ASSUMPTIONS_BLOCK_BIT)
+		{
+		assumptionsBlockTemplate = new NxsAssumptionsBlock(0L);
+		assumptionsBlockTemplate->SetImplementsLinkAPI(true);
+		cloneFactory.AddPrototype(assumptionsBlockTemplate, "ASSUMPTIONS");
+		cloneFactory.AddPrototype(assumptionsBlockTemplate, "SETS");
+		cloneFactory.AddPrototype(assumptionsBlockTemplate, "CODONS");
+		}
+
+	if (blocksToRead & NEXUS_TREES_BLOCK_BIT)
+		{
+		treesBlockTemplate = new NxsTreesBlock(NULL);
+		treesBlockTemplate->SetCreateImpliedBlock(true);
+		treesBlockTemplate->SetImplementsLinkAPI(true);
+		treesBlockTemplate->SetProcessAllTreesDuringParse(true);
+		treesBlockTemplate->SetAllowImplicitNames(true);
+		treesBlockTemplate->SetWriteFromNodeEdgeDataStructure(true);
+		cloneFactory.AddPrototype(treesBlockTemplate);
+		}
+	if (blocksToRead & NEXUS_CHARACTERS_BLOCK_BIT)
+		{
+		charactersBlockTemplate = new NxsCharactersBlock(NULL, NULL);
+		charactersBlockTemplate->SetCreateImpliedBlock(true);
+		charactersBlockTemplate->SetImplementsLinkAPI(true);
+		charactersBlockTemplate->SetSupportMixedDatatype(true);
+		charactersBlockTemplate->SetConvertAugmentedToMixed(true);
+
+		dataBlockTemplate = new NxsDataBlock(NULL, NULL);
+		dataBlockTemplate->SetCreateImpliedBlock(true);
+		dataBlockTemplate->SetImplementsLinkAPI(true);
+		dataBlockTemplate->SetSupportMixedDatatype(true);
+		dataBlockTemplate->SetConvertAugmentedToMixed(true);
+		cloneFactory.AddPrototype(charactersBlockTemplate, "CHARACTERS");
+		cloneFactory.AddPrototype(dataBlockTemplate, "DATA");
+		}
+	if (blocksToRead & NEXUS_UNALIGNED_BLOCK_BIT)
+		{
+		unalignedBlockTemplate = new NxsUnalignedBlock(NULL);
+		unalignedBlockTemplate->SetCreateImpliedBlock(true);
+		unalignedBlockTemplate->SetImplementsLinkAPI(true);
+		cloneFactory.AddPrototype(unalignedBlockTemplate);
+		}
+	if (blocksToRead & NEXUS_DISTANCES_BLOCK_BIT)
+		{
+		distancesBlockTemplate = new NxsDistancesBlock(NULL);
+		distancesBlockTemplate->SetCreateImpliedBlock(true);
+		distancesBlockTemplate->SetImplementsLinkAPI(true);
+		cloneFactory.AddPrototype(distancesBlockTemplate);
+		}
+	if (blocksToRead & NEXUS_TAXAASSOCIATION_BLOCK_BIT)
+		{
+		taxaAssociationBlockTemplate = new NxsTaxaAssociationBlock();
+		cloneFactory.AddPrototype(taxaAssociationBlockTemplate);
+		}
+	if (blocksToRead & NEXUS_UNKNOWN_BLOCK_BIT)
+		{
+		std::string emptyString;
+		storerBlockTemplate = new NxsStoreTokensBlockReader(emptyString, true);
+		storerBlockTemplate->SetImplementsLinkAPI(false);
+		cloneFactory.AddDefaultPrototype(storerBlockTemplate);
+		}
+}
+
+/*! \ref NxsReader::Execute().  This method calls PostExecuteHook() after NxsReader::Execute
+		is completed.
+*/
+void PublicNexusReader::Execute(NxsToken& token, bool notifyStartStop)
+{
+	NxsReader::Execute(token, notifyStartStop);
+	PostExecuteHook();
+}
+
+/*! \ref Called after successful execute.
+	in the PublicNexusReader, this function up-casts blocks to the type that
+	they should be.
+
+	\warn if you derive from PublicNexusReader and change the type of the clone templates, then
+	you must override this function so that the casts in this function will be safe.
+*/
+void PublicNexusReader::PostExecuteHook()
+{
+	BlockReaderList blocks = GetBlocksFromLastExecuteInOrder();
+	for (BlockReaderList::const_iterator bIt = blocks.begin(); bIt != blocks.end(); ++bIt)
+		{
+		NxsBlock * b = *bIt;
+		const std::string NCL_BLOCKTYPE_ATTR_NAME = b->GetID();
+		const std::string capId = NxsString::get_upper(NCL_BLOCKTYPE_ATTR_NAME);
+		const char * capIdP = capId.c_str();
+		if (strcmp(capIdP, "TAXA") == 0)
+			taxaBlockVec.push_back(static_cast<NxsTaxaBlock *>(b));
+		else if (strcmp(capIdP, "TREES") == 0)
+			treesBlockVec.push_back(static_cast<NxsTreesBlock *>(b));
+		else if ((strcmp(capIdP, "CHARACTERS") == 0) || (strcmp(capIdP, "DATA") == 0))
+			charactersBlockVec.push_back(static_cast<NxsCharactersBlock *>(b));
+		else if ((strcmp(capIdP, "ASSUMPTIONS") == 0) || (strcmp(capIdP, "SETS") == 0) || (strcmp(capIdP, "CODONS") == 0))
+			assumptionsBlockVec.push_back(static_cast<NxsAssumptionsBlock *>(b));
+		else if (strcmp(capIdP, "DISTANCES") == 0)
+			distancesBlockVec.push_back(static_cast<NxsDistancesBlock *>(b));
+		else if (strcmp(capIdP, "TAXAASSOCIATION") == 0)
+			taxaAssociationBlockVec.push_back(static_cast<NxsTaxaAssociationBlock *>(b));
+		else if (strcmp(capIdP, "UNALIGNED") == 0)
+			unalignedBlockVec.push_back(static_cast<NxsUnalignedBlock *>(b));
+		else
+			{
+			storerBlockVec.push_back(static_cast<NxsStoreTokensBlockReader *>(b));
+			}
+		}
+}
+
+void PublicNexusReader::AddFactory(NxsBlockFactory *f)
+{
+	if (f == &cloneFactory)
+		NxsReader::AddFactory(f);
+	else
+		{
+		NCL_ASSERT(false);
+		}
+}
+
+PublicNexusReader::~PublicNexusReader()
+{
+	delete assumptionsBlockTemplate;
+	delete charactersBlockTemplate;
+	delete dataBlockTemplate;
+	delete distancesBlockTemplate;
+	delete storerBlockTemplate;
+	delete taxaBlockTemplate;
+	delete taxaAssociationBlockTemplate;
+	delete treesBlockTemplate;
+	delete unalignedBlockTemplate;
+}
+
+unsigned PublicNexusReader::GetNumAssumptionsBlocks(const NxsTaxaBlock *taxa) const
+	{
+	unsigned n = 0;
+	std::vector<NxsAssumptionsBlock *>::const_iterator bIt = assumptionsBlockVec.begin();
+	for (; bIt != assumptionsBlockVec.end(); ++bIt)
+		{
+		NxsAssumptionsBlock * b = *bIt;
+		if (taxa && taxa != b->taxa)
+			continue;
+		n++;
+		}
+	return n;
+	}
+
+NxsAssumptionsBlock * PublicNexusReader::GetAssumptionsBlock(const NxsTaxaBlock *taxa, unsigned index) const
+	{
+	unsigned n = 0;
+	std::vector<NxsAssumptionsBlock *>::const_iterator bIt = assumptionsBlockVec.begin();
+	for (; bIt != assumptionsBlockVec.end(); ++bIt)
+		{
+		NxsAssumptionsBlock * b = *bIt;
+		if (taxa && taxa != b->taxa)
+			continue;
+		if (index == n)
+			return b;
+		n++;
+		}
+	return 0L;
+	}
+
+unsigned PublicNexusReader::GetNumAssumptionsBlocks( const NxsCharactersBlock * chars) const
+	{
+	unsigned n = 0;
+	std::vector<NxsAssumptionsBlock *>::const_iterator bIt = assumptionsBlockVec.begin();
+	for (; bIt != assumptionsBlockVec.end(); ++bIt)
+		{
+		NxsAssumptionsBlock * b = *bIt;
+		if (chars && chars != b->charBlockPtr)
+			continue;
+		n++;
+		}
+	return n;
+	}
+
+NxsAssumptionsBlock * PublicNexusReader::GetAssumptionsBlock(const NxsCharactersBlock * chars, unsigned index) const
+	{
+	unsigned n = 0;
+	std::vector<NxsAssumptionsBlock *>::const_iterator bIt = assumptionsBlockVec.begin();
+	for (; bIt != assumptionsBlockVec.end(); ++bIt)
+		{
+		NxsAssumptionsBlock * b = *bIt;
+		if (chars && chars != b->charBlockPtr)
+			continue;
+		if (index == n)
+			return b;
+		n++;
+		}
+	return 0L;
+	}
+
+unsigned PublicNexusReader::GetNumAssumptionsBlocks(const NxsTreesBlock *tree) const
+	{
+	unsigned n = 0;
+	std::vector<NxsAssumptionsBlock *>::const_iterator bIt = assumptionsBlockVec.begin();
+	for (; bIt != assumptionsBlockVec.end(); ++bIt)
+		{
+		NxsAssumptionsBlock * b = *bIt;
+		if (tree && tree != b->treesBlockPtr)
+			continue;
+		n++;
+		}
+	return n;
+	}
+
+NxsAssumptionsBlock * PublicNexusReader::GetAssumptionsBlock(const NxsTreesBlock *tree, unsigned index) const
+	{
+	unsigned n = 0;
+	std::vector<NxsAssumptionsBlock *>::const_iterator bIt = assumptionsBlockVec.begin();
+	for (; bIt != assumptionsBlockVec.end(); ++bIt)
+		{
+		NxsAssumptionsBlock * b = *bIt;
+		if (tree && tree != b->treesBlockPtr)
+			continue;
+		if (index == n)
+			return b;
+		n++;
+		}
+	return 0L;
+	}
+
+unsigned PublicNexusReader::GetNumCharactersBlocks(const NxsTaxaBlock *taxa) const
+	{
+	unsigned n = 0;
+	std::vector<NxsCharactersBlock *>::const_iterator bIt = charactersBlockVec.begin();
+	for (; bIt != charactersBlockVec.end(); ++bIt)
+		{
+		NxsCharactersBlock * b = *bIt;
+		if (!taxa || taxa == b->taxa)
+			n++;
+		}
+	return n;
+	}
+
+NxsCharactersBlock * PublicNexusReader::GetCharactersBlock(const NxsTaxaBlock *taxa, unsigned index) const
+	{
+	unsigned n = 0;
+	std::vector<NxsCharactersBlock *>::const_iterator bIt = charactersBlockVec.begin();
+	for (; bIt != charactersBlockVec.end(); ++bIt)
+		{
+		NxsCharactersBlock * b = *bIt;
+		if (!taxa || taxa == b->taxa)
+			{
+			if (index == n)
+				return b;
+			n++;
+			}
+		}
+	return 0L;
+	}
+
+unsigned PublicNexusReader::GetNumDistancesBlocks(const NxsTaxaBlock *taxa) const
+	{
+	unsigned n = 0;
+	std::vector<NxsDistancesBlock *>::const_iterator bIt = distancesBlockVec.begin();
+	for (; bIt != distancesBlockVec.end(); ++bIt)
+		{
+		NxsDistancesBlock * b = *bIt;
+		if (!taxa || taxa == b->taxa)
+			n++;
+		}
+	return n;
+	}
+
+NxsDistancesBlock * PublicNexusReader::GetDistancesBlock(const NxsTaxaBlock *taxa, unsigned index) const
+	{
+	unsigned n = 0;
+	std::vector<NxsDistancesBlock *>::const_iterator bIt = distancesBlockVec.begin();
+	for (; bIt != distancesBlockVec.end(); ++bIt)
+		{
+		NxsDistancesBlock * b = *bIt;
+		if (!taxa || taxa == b->taxa)
+			{
+			if (index == n)
+				return b;
+			n++;
+			}
+		}
+	return 0L;
+	}
+
+unsigned PublicNexusReader::GetNumUnalignedBlocks(const NxsTaxaBlock *taxa) const
+	{
+	unsigned n = 0;
+	std::vector<NxsUnalignedBlock *>::const_iterator bIt = unalignedBlockVec.begin();
+	for (; bIt != unalignedBlockVec.end(); ++bIt)
+		{
+		NxsUnalignedBlock * b = *bIt;
+		if (!taxa || taxa == b->taxa)
+			n++;
+		}
+	return n;
+	}
+
+NxsUnalignedBlock * PublicNexusReader::GetUnalignedBlock(const NxsTaxaBlock *taxa, unsigned index) const
+	{
+	unsigned n = 0;
+	std::vector<NxsUnalignedBlock *>::const_iterator bIt = unalignedBlockVec.begin();
+	for (; bIt != unalignedBlockVec.end(); ++bIt)
+		{
+		NxsUnalignedBlock * b = *bIt;
+		if (!taxa || taxa == b->taxa)
+			{
+			if (index == n)
+				return b;
+			n++;
+			}
+		}
+	return 0L;
+	}
+
+
+unsigned PublicNexusReader::GetNumTaxaAssociationBlocks(const NxsTaxaBlock *taxa) const
+	{
+	unsigned n = 0;
+	std::vector<NxsTaxaAssociationBlock *>::const_iterator bIt = taxaAssociationBlockVec.begin();
+	for (; bIt != taxaAssociationBlockVec.end(); ++bIt)
+		{
+		NxsTaxaAssociationBlock * b = *bIt;
+		if (!taxa || taxa == b->GetFirstTaxaBlock() || taxa == b->GetSecondTaxaBlock())
+			n++;
+		}
+	return n;
+	}
+
+NxsTaxaAssociationBlock * PublicNexusReader::GetTaxaAssociationBlock(const NxsTaxaBlock *taxa, unsigned index) const
+	{
+	unsigned n = 0;
+	std::vector<NxsTaxaAssociationBlock *>::const_iterator bIt = taxaAssociationBlockVec.begin();
+	for (; bIt != taxaAssociationBlockVec.end(); ++bIt)
+		{
+		NxsTaxaAssociationBlock * b = *bIt;
+		if (!taxa || taxa == b->GetFirstTaxaBlock() || taxa == b->GetSecondTaxaBlock())
+			{
+			if (index == n)
+				return b;
+			n++;
+			}
+		}
+	return 0L;
+	}
+
+unsigned PublicNexusReader::GetNumTreesBlocks(const NxsTaxaBlock *taxa) const
+	{
+	unsigned n = 0;
+	std::vector<NxsTreesBlock *>::const_iterator bIt = treesBlockVec.begin();
+	for (; bIt != treesBlockVec.end(); ++bIt)
+		{
+		NxsTreesBlock * b = *bIt;
+		if (!taxa || taxa == b->taxa)
+			n++;
+		}
+	return n;
+	}
+
+NxsTreesBlock * PublicNexusReader::GetTreesBlock(const NxsTaxaBlock *taxa, unsigned index) const
+	{
+	unsigned n = 0;
+	std::vector<NxsTreesBlock *>::const_iterator bIt = treesBlockVec.begin();
+	for (; bIt != treesBlockVec.end(); ++bIt)
+		{
+		NxsTreesBlock * b = *bIt;
+		if (!taxa || taxa == b->taxa)
+			{
+			if (index == n)
+				return b;
+			n++;
+			}
+		}
+	return 0L;
+	}
+
+unsigned PublicNexusReader::GetNumUnknownBlocks() const
+	{
+	return (unsigned)storerBlockVec.size();
+	}
+
+NxsStoreTokensBlockReader * PublicNexusReader::GetUnknownBlock(unsigned index) const
+	{
+	if (index < storerBlockVec.size())
+		return storerBlockVec[index];
+	return 0L;
+	}
+
+unsigned PublicNexusReader::GetNumTaxaBlocks() const
+	{
+	return (unsigned)taxaBlockVec.size();
+	}
+
+NxsTaxaBlock * PublicNexusReader::GetTaxaBlock(unsigned index) const
+	{
+	if (index < taxaBlockVec.size())
+		return taxaBlockVec[index];
+	return 0L;
+	}
+
+void PublicNexusReader::ClearUsedBlockList()
+	{
+	NxsReader::ClearUsedBlockList();
+	assumptionsBlockVec.clear();
+	charactersBlockVec.clear();
+	dataBlockVec.clear();
+	distancesBlockVec.clear();
+	storerBlockVec.clear();
+	taxaBlockVec.clear();
+	taxaAssociationBlockVec.clear();
+	treesBlockVec.clear();
+	unalignedBlockVec.clear();
+	}
+
+
+
+
+bool fileExists(const std::string &fn);
+
+// this is not a great way to check for existence - we may lack read permissions.
+bool fileExists(const std::string &fn)
+{
+	std::ifstream inf;
+	inf.open(fn.c_str());
+	const bool b = inf.good();
+	inf.close();
+	return b;
+}
+
+std::string NxsConversionOutputRecord::getUniqueFilenameWithLowestIndex(const char * prefix)
+{
+	NxsString fn;
+	fn.assign(prefix);
+	const unsigned MAX_SUFFIX =  10000;
+	for (unsigned i = 1; i <= MAX_SUFFIX ; ++i)
+		{
+		if (!fileExists(fn))
+			return fn;
+		fn.assign(prefix);
+		fn << i;
+		}
+	fn.clear();
+	fn << "Files \"" << prefix << "\" through \"" << prefix << MAX_SUFFIX << "\" exist, and I am afraid to write any more files to that directory. I quit.";
+	throw NxsException(fn);
+}
+
+// writes the name pairs separated by newlines to a file whose filepath is specified
+//	by fn.
+void NxsConversionOutputRecord::writeTaxonNameTranslationFilepath(const char * fn, const std::vector<NxsNameToNameTrans> & nameTrans, const NxsTaxaBlockAPI *tb, bool verbose)
+{
+	std::ofstream tnf;
+	tnf.open(fn);
+	if (!tnf.good())
+		{
+		NxsString msg;
+		msg << "Could not open the file " << fn << " for writing translation of names";
+		throw NxsException(msg);
+		}
+	if (verbose)
+	    Rcpp::Rcerr << "Writing \"" << fn << "\" to store the translation of names\n";
+	writeTaxonNameTranslationStream(tnf, nameTrans, tb);
+	tnf.close();
+}
+
+
+// writes the name pairs separated by newlines to the ostream tnf
+void NxsConversionOutputRecord::writeTaxonNameTranslationStream(std::ostream & tnf, const std::vector<NxsNameToNameTrans> & nameTrans, const NxsTaxaBlockAPI *tb)
+{
+	std::string blockLabel = tb->GetTitle();
+	tnf << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
+	tnf << "<taxa label=";
+	writeAttributeValue(tnf, blockLabel);
+	tnf << " >\n";
+	for (std::vector<NxsNameToNameTrans>::const_iterator nIt = nameTrans.begin(); nIt != nameTrans.end(); ++nIt)
+		{
+		tnf << " <taxon src=";
+		writeAttributeValue(tnf, nIt->first);
+		tnf << " dest=";
+		writeAttributeValue(tnf, nIt->second);
+		tnf << " />\n";
+		}
+	tnf << "</taxa>\n";
+}
+
+void NxsConversionOutputRecord::writeNameTranslation(std::vector<NxsNameToNameTrans> nameTrans, const NxsTaxaBlockAPI * taxa)
+	{
+	if (taxaBlocksToConversionFiles.find(taxa) != taxaBlocksToConversionFiles.end())
+		return;
+	std::string fn;
+	if (this->numberTranslationFiles)
+		fn = getUniqueFilenameWithLowestIndex(this->translationFilename.c_str());
+	else
+		fn = this->translationFilename;
+	writeTaxonNameTranslationFilepath(fn.c_str(), nameTrans, taxa, this->verboseWritingOfNameTranslationFile);
+	taxaBlocksToConversionFiles[taxa] = fn;
+	}
+
+void writeAttributeValue(ostream & out, const std::string & v)
+	{
+	if (v.c_str() == NULL)
+		out << "\'\'";
+	else
+		{
+
+		if (v.find_first_of("\'\"&") != string::npos)
+			{
+			if (strchr(v.c_str(), '\'') != NULL)
+				{
+				out << '\"';
+				for (std::string::const_iterator cIt = v.begin(); cIt != v.end(); ++cIt)
+					{
+					const char & c = *cIt;
+					if (c == '\"')
+						out << """;
+					else if (c == '&')
+						out << "&";
+					else
+						out << c;
+					}
+				out << '\"';
+
+				}
+			else
+				{
+				out << '\'';
+				for (std::string::const_iterator cIt = v.begin(); cIt != v.end(); ++cIt)
+					{
+					const char & c = *cIt;
+					if (c == '&')
+						out << "&";
+					else
+						out << c;
+					}
+				out << '\'';
+				}
+			}
+		else
+			out << '\'' << v << '\'';
+		}
+	}
+
+
+
diff --git a/src/nxsreader.cpp b/src/nxsreader.cpp
new file mode 100644
index 0000000..35367c6
--- /dev/null
+++ b/src/nxsreader.cpp
@@ -0,0 +1,1396 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <csignal>
+#include <algorithm>
+#include <set>
+#include <fstream>
+#include <climits>
+#include <sstream>
+#include <iterator>
+
+#include <Rcpp.h>
+
+#include "ncl/nxsreader.h"
+#include "ncl/nxsdefs.h"
+#include "ncl/nxscharactersblock.h"
+#include "ncl/nxstaxablock.h"
+#include "ncl/nxstreesblock.h"
+
+using namespace std;
+
+#if defined(NCL_CONST_FUNCS) && NCL_CONST_FUNCS
+	int onlyDefinedInCouldBeConst()
+	{
+		return 1;
+	}
+
+#endif
+
+
+static void NxsHandleSignalCallback(int);
+
+NxsReader::SignalHandlerFuncPtr NxsReader::prevSignalCatcher = 0L;
+bool NxsReader::nclCatchesSignals = false;
+unsigned NxsReader::numSigIntsCaught = 0;
+bool NxsReader::prevSignalStored = true;
+
+unsigned NxsReader::getNumSignalIntsCaught()
+	{
+	return NxsReader::numSigIntsCaught;
+	}
+
+void NxsReader::setNumSignalsIntsCaught(unsigned n)
+	{
+	NxsReader::numSigIntsCaught = n;
+	}
+
+void NxsReader::setNCLCatchesSignals(bool v)
+	{
+	NxsReader::nclCatchesSignals = v;
+	}
+
+bool NxsReader::getNCLCatchesSignals()
+	{
+	return NxsReader::nclCatchesSignals;
+	}
+
+
+void NxsHandleSignalCallback(int)
+	{
+	unsigned nc = NxsReader::getNumSignalIntsCaught();
+	NxsReader::setNumSignalsIntsCaught(1 + nc);
+	}
+
+void NxsReader::installNCLSignalHandler()
+	{
+	NxsReader::SignalHandlerFuncPtr prev = std::signal(SIGINT, SIG_IGN);
+	if (prev != SIG_IGN)
+		{
+		NxsReader::prevSignalCatcher = prev;
+		NxsReader::prevSignalStored = true;
+		std::signal(SIGINT, NxsHandleSignalCallback);
+		}
+	}
+
+void NxsReader::uninstallNCLSignalHandler()
+	{
+	if (prevSignalStored)
+		{
+		std::signal(SIGINT, NxsReader::prevSignalCatcher);
+		NxsReader::prevSignalCatcher = 0L;
+		NxsReader::prevSignalStored = false;
+		}
+	}
+
+
+
+/*! Reads a filename with NxsToken object. Calls NexusError on failures */
+void NxsReader::ReadFilepath(const char *filename)
+	{
+	std::ifstream inf;
+	try{
+		inf.open(filename, ios::binary);
+		if (!inf.good())
+			{
+			NxsString err;
+			err << "Could not open the file \"" << filename <<"\"";
+			this->NexusError(err, 0, -1, -1);
+			}
+		}
+	catch (...)
+		{
+		NxsString err;
+		err << '\"' << filename <<"\" does not refer to a valid file." ;
+		this->NexusError(err, 0, -1, -1);
+		}
+	this->ReadFilestream(inf);
+	}
+
+
+/*! Reads the content of string `s` as if it were NEXUS. */
+void NxsReader::ReadStringAsNexusContent(const std::string & s)
+	{
+	std::istringstream inf(s);
+	this->ReadFilestream(inf);
+	}
+
+/*! Reads the istream `inf` by creating a NxsToken object and then calling NxsReader::Execute() */
+void NxsReader::ReadFilestream(std::istream & inf)
+	{
+	NxsToken token(inf);
+	this->Execute(token);
+	}
+
+/*! Returns the set of blocks that have been created from factories, and
+	removes reference to from the NxsReader's collections.
+*/
+std::set<NxsBlock *> NxsReader::RemoveBlocksFromFactoriesFromUsedBlockLists()
+	{
+	std::set<NxsBlock *> todel;
+	BlockReaderList saved;
+	for (BlockReaderList::iterator bIt = blocksInOrder.begin(); bIt != blocksInOrder.end(); ++bIt)
+		{
+		NxsBlock * b  = *bIt;
+		if (BlockIsASingeltonReader(b))
+			saved.push_back(b);
+		else
+			{
+			todel.insert(b);
+			}
+		}
+	for (std::set<NxsBlock *>::iterator d = todel.begin(); d != todel.end(); ++d)
+		{
+		RemoveBlockFromUsedBlockList(*d);
+		}
+	return todel;
+	}
+
+/*! Deletes the set of blocks that have been created from factories and
+	removes reference to from the NxsReader's collections.
+*/
+void NxsReader::DeleteBlocksFromFactories()
+	{
+	std::set<NxsBlock *> todel = RemoveBlocksFromFactoriesFromUsedBlockLists();
+	for (std::set<NxsBlock *>::iterator d = todel.begin(); d != todel.end(); ++d)
+		delete *d;
+	}
+
+/*! \returns true if the block `b` is one of the registered block readers (rather
+	than a block from a factory).
+*/
+bool NxsReader::BlockIsASingeltonReader(NxsBlock *b) const
+	{
+	NxsBlock * sb = blockList;
+	while (sb)
+		{
+		if (b == sb)
+			return true;
+		sb = sb->next;
+		}
+	return false;
+	}
+
+/*! \returns a NxsBlock from `chosenBlockList` with a Title that matches `title`.
+	In the event of ties, the most recently read block is returned.
+	If `title` is NULL, then any block is considered a match.
+	On output *nMatches will be the number of matches (if `nMatches` is not NULL).
+	NULL will be returned if there are no matches.
+*/
+NxsBlock *NxsReader::FindBlockByTitle(const BlockReaderList & chosenBlockList, const char *title, unsigned *nMatches)
+	{
+	BlockReaderList  found = FindAllBlocksByTitle(chosenBlockList, title);
+
+	if (found.empty())
+		{
+		if (nMatches)
+			*nMatches = 0;
+		return NULL;
+		}
+	if (nMatches)
+		*nMatches = (unsigned)found.size();
+	return (NxsBlock *) found.back();
+	}
+
+
+
+BlockReaderList NxsReader::FindAllBlocksByTitle(const BlockReaderList & chosenBlockList, const char *title)
+	{
+	BlockReaderList found = FindAllBlocksByTitleNoPrioritization(chosenBlockList, title);
+	if (found.empty())
+		return found;
+	map<int, BlockReaderList> byPriority;
+	for (BlockReaderList::const_iterator fIt = found.begin(); fIt != found.end(); ++fIt)
+		{
+		NxsBlock * b = *fIt;
+		int priority = GetBlockPriority(b);
+		byPriority[priority].push_back(b);
+		}
+	NCL_ASSERT(!byPriority.empty());
+	return byPriority.rbegin()->second;
+	}
+
+BlockReaderList NxsReader::FindAllBlocksByTitleNoPrioritization(const BlockReaderList & chosenBlockList, const char *title)
+	{
+	BlockReaderList found;
+	if (chosenBlockList.empty() || title == NULL)
+		{
+		found = chosenBlockList;
+		}
+	else
+		{
+		bool emptyTitle = strlen(title) == 0;
+		for (BlockReaderList::const_iterator cblIt = chosenBlockList.begin(); cblIt != chosenBlockList.end(); ++cblIt)
+			{
+			NxsBlock * b = *cblIt;
+			std::vector<std::string> v = this->GetAllTitlesForBlock(b);
+			for (std::vector<std::string>::const_iterator vIt = v.begin(); vIt != v.end(); ++vIt)
+				{
+				const std::string & n = *vIt;
+				if ((emptyTitle && n.empty()) || (NxsString::case_insensitive_equals(title, n.c_str())))
+					{
+					found.push_back(b);
+					break;
+					}
+				}
+			}
+		}
+	return found;
+
+	}
+
+/*! 	\returns all of the TITLEs that have been used for the same block.
+
+	Identical blocks with the different titles can be stored once with all of the
+titles stored a list of "alias titles"
+
+	This will only happen for TAXA blocks, currently.
+*/
+std::vector<std::string> NxsReader::GetAllTitlesForBlock(const NxsBlock *b) const
+	{
+	std::vector<std::string> v;
+	v.push_back(b->GetTitle());
+	std::map<const NxsBlock *, std::list<std::string> >::const_iterator a = blockTitleAliases.find(b);
+	if (a != blockTitleAliases.end())
+		std::copy(a->second.begin(), a->second.end(), back_inserter(v));
+	return v;
+	}
+
+/*! 	used internally to register a new "alias title" for a block */
+void NxsReader::RegisterAltTitle(const NxsBlock * b, std::string t)
+	{
+	std::list<std::string> & v = blockTitleAliases[b];
+	v.push_back(t);
+	}
+
+/*! \returns the pointer to the block with type ID (TAXA, CHARACTERS, ...) matching `btype`
+	 and title matching `title` or 0L if there is no such block.
+	 on output `nMatches` (if it is not 0L) will list the number of blocks that match this
+	 criteria.
+*/
+NxsBlock *NxsReader::FindBlockOfTypeByTitle(const std::string &btype, const char *title, unsigned *nMatches)
+	{
+	BlockTypeToBlockList::const_iterator btblIt = blockTypeToBlockList.find(btype);
+	if (btblIt == blockTypeToBlockList.end())
+		{
+		if (nMatches)
+			*nMatches = 0;
+		return NULL;
+		}
+	const BlockReaderList & chosenBlockList = btblIt->second;
+	return FindBlockByTitle(chosenBlockList, title, nMatches);
+	}
+
+/*!
+	NOTE: cast to NxsTaxaBlockAPI *.  This should only called by NCL when factories and the Link API are in effect.
+	When using these APIs, block readers that read "TAXA" blocks in a NEXUS file must inherit from
+	NxsTaxaBlockAPI, or the behavior will be undefined.
+	This requirement also applies to "implied" taxa blocks that are returned from CHARACTERS (or other) blocks.
+*/
+NxsTaxaBlockAPI *NxsReader::GetTaxaBlockByTitle(const char *title, unsigned *nMatches)
+	{
+	const std::string btype("TAXA");
+	return static_cast<NxsTaxaBlockAPI *>(FindBlockOfTypeByTitle(btype, title, nMatches));
+	}
+
+/*!
+	NOTE: cast to NxsCharactersBlockAPI *.	This should only called by NCL when factories and the Link API are in effect.
+	When using these APIs, block readers that read "CHARACTERS" or "DATA" blocks in a NEXUS file must inherit from
+	NxsCharactersBlockAPI, or the behavior will be undefined.
+*/
+NxsCharactersBlockAPI	*NxsReader::GetCharBlockByTitle(const char *title, unsigned *nMatches)
+	{
+	const std::string btype("CHARACTERS");
+	return static_cast<NxsCharactersBlockAPI *>(FindBlockOfTypeByTitle(btype, title, nMatches));
+	}
+/*!
+	NOTE: cast to NxsTreesBlockAPI *.  This should only called by NCL when factories and the Link API are in effect.  In
+	this case block readers that read "TREES" blocks in a NEXUS file must inherit from NxsTaxaBlockAPI, or the
+	behavior will be undefined.
+*/
+NxsTreesBlockAPI *NxsReader::GetTreesBlockByTitle(const char *title, unsigned *nMatches)
+	{
+	const std::string btype("TREES");
+	return static_cast<NxsTreesBlockAPI *>(FindBlockOfTypeByTitle(btype, title, nMatches));
+	}
+
+/*! Initializes both `blockList' and `currBlock' to NULL.
+*/
+NxsReader::NxsReader() : currentWarningLevel(UNCOMMON_SYNTAX_WARNING), alwaysReportStatusMessages(false)
+	{
+	blockList	= NULL;
+	currBlock	= NULL;
+	taxaBlockFactory = NULL;
+	destroyRepeatedTaxaBlocks = false;
+	}
+
+NxsReader::~NxsReader()
+	{
+	NxsBlock *curr;
+	for (curr = blockList; curr;)
+		{
+		if (curr->GetNexus() == this)
+			curr->SetNexus(NULL);
+		curr = curr->next;
+		}
+	for (BlockReaderList::iterator b = blocksInOrder.begin(); b != blocksInOrder.end(); ++b)
+		{
+		if ((*b)->GetNexus() == this)
+			(*b)->SetNexus(NULL);
+		}
+
+	}
+
+/*!
+	Add a factory for NEXUS block readers to the front of the factories list.
+*/
+void NxsReader::AddFactory(NxsBlockFactory *f)
+	{
+	if (f)
+		factories.push_front(f);
+	}
+/*!
+	Remove a factory for NEXUS block readers.
+*/
+void NxsReader::RemoveFactory(NxsBlockFactory *f)
+	{
+	factories.remove(f);
+	}
+
+
+/*!
+	Adds `newBlock' to the end of the list of NxsBlock objects growing from `blockList'. If `blockList' points to NULL,
+	this function sets `blockList' to point to `newBlock'. Calls SetNexus method of `newBlock' to inform `newBlock' of
+	the NxsReader object that now owns it. This is useful when the `newBlock' object needs to communicate with the
+	outside world through the NxsReader object, such as when it issues progress reports as it is reading the contents
+	of its block.
+*/
+void NxsReader::Add(
+  NxsBlock *newBlock)	/* a pointer to an existing block object */
+	{
+	NCL_ASSERT(newBlock != NULL);
+
+	newBlock->SetNexus(this);
+
+	if (!blockList)
+		blockList = newBlock;
+	else
+		{
+		// Add new block to end of list
+		//
+		NxsBlock *curr;
+		for (curr = blockList; curr && curr->next;)
+			curr = curr->next;
+		NCL_ASSERT(curr && !curr->next);
+		curr->next = newBlock;
+		}
+	}
+
+/*!
+	\deprecated
+	Returns position (first block has position 0) of block `b' in `blockList'. Returns UINT_MAX if `b' cannot be found
+	in `blockList'.
+*/
+unsigned NxsReader::PositionInBlockList(
+  NxsBlock *b)	/* a pointer to an existing block object */
+	{
+	unsigned pos = 0;
+	NxsBlock *curr = blockList;
+
+	for (;;)
+		{
+		if (curr == NULL || curr == b)
+			break;
+		pos++;
+		curr = curr->next;
+		}
+
+	if (curr == NULL)
+		pos = UINT_MAX;
+
+	return pos;
+	}
+
+/*!
+	Reassign should be called if a block (`oldb') is about to be deleted (perhaps to make way for new data). Create
+	the new block (`newb') before deleting `oldb', then call Reassign to replace `oldb' in `blockList' with `newb'.
+	Assumes `oldb' exists and is in `blockList'.
+
+	This function is necessary in v2.0, but replacement of blocks is more easily done
+	with block factories in NCL v2.1 and higher.
+*/
+void NxsReader::Reassign(
+  NxsBlock *oldb,	/* a pointer to the block object soon to be deleted */
+  NxsBlock *newb)	/* a pointer to oldb's replacement */
+	{
+	NxsBlock *prev = NULL;
+	NxsBlock *curr = blockList;
+	newb->SetNexus(this);
+
+	for (;;)
+		{
+		if (curr == NULL || curr == oldb)
+			break;
+		prev = curr;
+		curr = curr->next;
+		}
+
+	NCL_ASSERT(curr != NULL);
+
+	newb->next = curr->next;
+	if (prev == NULL)
+		blockList = newb;
+	else
+		prev->next = newb;
+	curr->next = NULL;
+	curr->SetNexus(NULL);
+	}
+
+bool NxsReader::BlockListEmpty()
+	{
+	return (blockList == NULL ? true : false);
+	}
+
+/*!
+	This function was created for purposes of debugging a new NxsBlock. This version does nothing; to create an active
+	DebugReportBlock function, override this version in the derived class and call the Report function of `nexusBlock'.
+	This function is called whenever the main NxsReader Execute function encounters the [&spillall] command comment
+	between blocks in the data file. The Execute function goes through all blocks and passes them, in turn, to this
+	DebugReportBlock function so that their contents are displayed. Placing the [&spillall] command comment between
+	different versions of a block allows multiple blocks of the same type to be tested using one long data file. Say
+	you are interested in testing whether the normal, transpose, and interleave format of a matrix can all be read
+	correctly. If you put three versions of the block in the data file one after the other, the second one will wipe out
+	the first, and the third one will wipe out the second, unless you have a way to report on each one before the next
+	one is read. This function provides that ability.
+*/
+void NxsReader::DebugReportBlock(
+  NxsBlock &)	/* the block that should be reported */
+	{
+	}
+
+/*!
+	Detaches `oldBlock' from the list of NxsBlock objects growing from `blockList'. If `blockList' itself points to
+	`oldBlock', this function sets `blockList' to point to `oldBlock->next'. Note: the object pointed to by `oldBlock'
+	is not deleted, it is simply detached from the linked list. No harm is done in Detaching a block pointer that has
+	already been detached previously; if `oldBlock' is not found in the block list, Detach simply returns quietly. If
+	`oldBlock' is found, its SetNexus object is called to set the NxsReader pointer to NULL, indicating that it is no
+	longer owned by (i.e., attached to) a NxsReader object.
+*/
+void NxsReader::Detach(
+  NxsBlock *oldBlock)	/* a pointer to an existing block object */
+	{
+	NCL_ASSERT(oldBlock != NULL);
+	RemoveBlockFromUsedBlockList(oldBlock);
+	// Return quietly if there are not blocks attached
+	//
+	if (blockList == NULL)
+		return;
+
+	if (blockList == oldBlock)
+		{
+		blockList = oldBlock->next;
+		oldBlock->SetNexus(NULL);
+		}
+	else
+		{
+		// Bug fix MTH 6/17/2002: old version detached intervening blocks as well
+		//
+		NxsBlock *curr = blockList;
+		for (; curr->next != NULL && curr->next != oldBlock;)
+			curr = curr->next;
+
+		// Line below can be uncommented to find cases where Detach function is
+		// called for pointers that are not in the linked list. If line below is
+		// uncommented, the part of the descriptive comment that precedes this
+		// function about "...simply returns quietly" will be incorrect (at least
+		// in the Debugging version of the program where asserts are active).
+		//
+		//NCL_ASSERT(curr->next == oldBlock);
+
+		if (curr->next == oldBlock)
+			{
+			curr->next = oldBlock->next;
+			oldBlock->SetNexus(NULL);
+			}
+		}
+	}
+
+/*!
+	Called by the NxsReader object when a block named `blockName' is entered. Allows derived class overriding this
+	function to notify user of progress in parsing the NEXUS file. Also gives program the opportunity to ask user if it
+	is ok to purge data currently contained in this block. If user is asked whether existing data should be deleted, and
+	the answer comes back no, then then the overrided function should return false, otherwise it should return true.
+	This (base class) version always returns true.
+*/
+bool NxsReader::EnteringBlock(
+  NxsString )	/* the name of the block just entered */
+	{
+	return true;
+	}
+
+/*!
+	Called by the NxsReader object when a block named `blockName' is being exited. Allows derived class overriding this
+	function to notify user of progress in parsing the NEXUS file.
+*/
+void NxsReader::ExitingBlock(
+  NxsString )	/* the name of the block being exited */
+	{
+	}
+
+/*!
+	Called after `block' has returned from NxsBlock::Read()
+*/
+void NxsReader::PostBlockReadingHook(
+  NxsBlock & /*block*/) /// the block that was just read
+	{
+	}
+
+/*! Uses the registered Factories to spawn a reader for blocks with the name "currBlockName."
+	if sourceOfBlock is not NULL, then *sourceOfBlock will alias to the NxsBlockFactory used.
+	Returns NULL (and does not modify *sourceOfBlock), if no Factory is found that returns a block
+*/
+NxsBlock *NxsReader::CreateBlockFromFactories(const std::string & currBlockName, NxsToken &token, NxsBlockFactory **sourceOfBlock)
+	{
+	for (BlockFactoryList::iterator fIt = factories.begin(); currBlock == NULL && fIt != factories.end(); ++fIt)
+		{
+		NxsBlock *b = (*fIt)->GetBlockReaderForID(currBlockName, this, &token);
+		if (b)
+			{
+			if (b->CanReadBlockType(token))
+				{
+				if (sourceOfBlock)
+					*sourceOfBlock = *fIt;
+				b->SetNexus(this);
+				return b;
+				}
+			(*fIt)->BlockError(b);
+			}
+		}
+	return NULL;
+	}
+
+/*!
+	Reads the NxsReader data file from the input stream provided by `token'. This function is responsible for reading
+	through the name of a each block. Once it has read a block name, it searches for a block reader to
+	handle reading the remainder of the block's contents.
+
+	The block object's Read() method is responsible for reading the END or ENDBLOCK command as well as the trailing semicolon.
+
+	Execute() handles reading comments that are outside of blocks, as well as the initial "#NEXUS" keyword.
+
+	As discussed in \ref NexusErrors exceptions raised during parsing result in calls to ::NexusError()
+
+	If `notifyStartStop' is false then ExecuteStart and ExecuteStop functions will not to be called.
+
+	The order of operations is:
+		-# Read until next Begin command.
+		-# Search through the registered block instances to find one that returns
+			true from NxsBlock::CanReadBlockType()
+		-# If none is found then CreateBlockFromFactories is called.
+		-# If no appropriate block reader has been created, then SkippingBlock hook will
+			be called (and the NxsReader will call ReadUntilEndblock to read until
+			the END of the block before returning to step 1.
+		-# If an appropriate block reader was found in steps 2 or 3 then ExecuteBlock() will be called.
+
+*/
+void NxsReader::Execute(
+  NxsToken	&token,				/*!< the token object used to grab NxsReader tokens */
+  bool		notifyStartStop)	/*!< if true, ExecuteStarting and ExecuteStopping will be called */
+	{
+	bool signalHandlerInstalled = false;
+	unsigned numSigInts = 0;
+	if (NxsReader::nclCatchesSignals)
+		{
+		numSigInts = getNumSignalIntsCaught();
+		installNCLSignalHandler();
+		signalHandlerInstalled = true;
+		}
+	try {
+		CoreExecutionTasks(token, notifyStartStop);
+		}
+	catch (...)
+		{
+		if (signalHandlerInstalled)
+			uninstallNCLSignalHandler();
+		throw;
+		}
+	if (signalHandlerInstalled)
+		{
+		uninstallNCLSignalHandler();
+		if (numSigInts != getNumSignalIntsCaught())
+			throw NxsSignalCanceledParseException("Reading NEXUS content");
+		}
+	}
+
+/*! used internally to  do most of the work of Execute() */
+void NxsReader::CoreExecutionTasks(
+  NxsToken	&token,				/* the token object used to grab NxsReader tokens */
+  bool		notifyStartStop)	/* if true, ExecuteStarting and ExecuteStopping will be called */
+	{
+	unsigned numSigInts = NxsReader::getNumSignalIntsCaught();
+	const bool checkingSignals = NxsReader::getNCLCatchesSignals();
+
+	lastExecuteBlocksInOrder.clear();
+	currBlock = NULL;
+
+	NxsString errormsg;
+	token.SetEOFAllowed(true);
+
+	try
+		{
+		token.SetLabileFlagBit(NxsToken::saveCommandComments);
+		token.GetNextToken();
+		}
+	catch (NxsException x)
+		{
+		NexusError(token.errormsg, 0, 0, 0);
+		return;
+		}
+
+	if (token.Equals("#NEXUS"))
+		{
+		token.SetLabileFlagBit(NxsToken::saveCommandComments);
+		token.GetNextToken();
+		}
+	else
+		{
+		errormsg = "Expecting #NEXUS to be the first token in the file, but found ";
+		errormsg += token.GetToken();
+		errormsg += " instead";
+		/*mth changed this to a warning instead of an error	 because of the large number
+			of files that violate this requirement.
+		*/
+		NexusWarn(errormsg,  NxsReader::AMBIGUOUS_CONTENT_WARNING, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+
+	if (notifyStartStop)
+		ExecuteStarting();
+	bool keepReading = true;
+	for (;keepReading && !token.AtEOF();)
+		{
+		if (checkingSignals && NxsReader::getNumSignalIntsCaught() != numSigInts)
+			{
+			throw NxsSignalCanceledParseException("Reading NEXUS content");
+			}
+		if (token.Equals("BEGIN"))
+			{
+			token.SetEOFAllowed(false); /*must exit the block before and EOF*/
+			token.GetNextToken();
+			token.SetBlockName(token.GetTokenReference().c_str());
+			for (currBlock = blockList; currBlock != NULL; currBlock = currBlock->next)
+				{
+				if (currBlock->CanReadBlockType(token))
+					break;
+				}
+			NxsString currBlockName = token.GetToken();
+			currBlockName.ToUpper();
+			NxsBlockFactory * sourceOfBlock = NULL;
+			if (currBlock == NULL)
+				{
+				try
+					{
+					currBlock = CreateBlockFromFactories(currBlockName, token, &sourceOfBlock);
+					}
+				catch (NxsException x)
+					{
+					NexusError(x.msg, x.pos, x.line, x.col);
+					token.SetBlockName(0L);
+					token.SetEOFAllowed(true);
+					return;
+					}
+			    }
+			if (currBlock == NULL)
+				{
+				SkippingBlock(currBlockName);
+				if (!ReadUntilEndblock(token, currBlockName))
+					{
+					token.SetBlockName(0L);
+					token.SetEOFAllowed(true);
+					return;
+					}
+				}
+			else if (currBlock->IsEnabled())
+				keepReading = ExecuteBlock(token, currBlockName, currBlock, sourceOfBlock);
+			else
+				{
+				SkippingDisabledBlock(token.GetToken());
+				if (sourceOfBlock)
+					sourceOfBlock->BlockSkipped(currBlock);
+				if (!ReadUntilEndblock(token, currBlockName))
+					{
+					token.SetBlockName(0L);
+					token.SetEOFAllowed(true);
+					return;
+					}
+				}
+			currBlock = NULL;
+			token.SetEOFAllowed(true);
+			token.SetBlockName(0L);
+			}	// if (token.Equals("BEGIN"))
+		else if (token.Equals("&SHOWALL"))
+			{
+			for (NxsBlock*	showBlock = blockList; showBlock != NULL; showBlock = showBlock->next)
+				DebugReportBlock(*showBlock);
+			}
+		else if (token.Equals("&LEAVE"))
+			break;
+		if (keepReading)
+			{
+			token.SetLabileFlagBit(NxsToken::saveCommandComments);
+			token.GetNextToken();
+			}
+		}
+	if (notifyStartStop)
+		ExecuteStopping();
+
+	currBlock = NULL;
+	}
+
+void NxsReader::ClearContent()
+	{
+	for (currBlock = blockList; currBlock;)
+		{
+		currBlock->Reset();
+		currBlock = currBlock->next;
+		}
+	currBlock = blockList;
+	blocksInOrder.clear();
+	blockPriorities.clear();
+	lastExecuteBlocksInOrder.clear();
+	blockTypeToBlockList.clear();
+	blockTitleHistoryMap.clear();
+	blockTitleAliases.clear();
+	}
+
+
+/*! \returns a pointer to a previously process  NxsTaxaBlock with the same taxon
+	labels. The comparison of labels is case-insensitive and not affected by the
+	ordering of taxa within the block.
+
+	TAXA blocks are often repeated in sets of NEXUS files (because a bare TREES block
+	constitutes an illegal NEXUS file, and because NCL spawns implied Taxa blocks
+	if it reads just a Trees block).
+
+	If NxsReader::cullIdenticalTaxaBlocks(true) has been called then NxsReader::GetOriginalTaxaBlock
+	will be called as part of determining whether or not a taxa block should be deleted.
+
+	\warning: this is a hole in the const-correctness because the caller could (but shouldn't
+		modify the Taxa block).
+*/
+NxsTaxaBlockAPI * NxsReader::GetOriginalTaxaBlock(const NxsTaxaBlockAPI * testB) const
+	{
+	if (!testB)
+		return 0L;
+	const std::string idstring("TAXA");
+	BlockTypeToBlockList::const_iterator bttblIt = blockTypeToBlockList.find(idstring);
+	if (bttblIt == blockTypeToBlockList.end())
+		return 0L;
+	const BlockReaderList & brl = bttblIt->second;
+	const unsigned ntt = testB->GetNumTaxonLabels();
+	const std::vector<std::string> testL = testB->GetAllLabels();
+	for (BlockReaderList::const_iterator bIt = brl.begin(); bIt != brl.end(); ++bIt)
+		{
+		const NxsBlock * nb = *bIt;
+		const NxsTaxaBlockAPI * prev = (const NxsTaxaBlockAPI *) nb;
+		if (prev->GetNumTaxonLabels() == ntt)
+			{
+			const std::vector<std::string> prevL = prev->GetAllLabels();
+			std::vector<std::string>::const_iterator pIt = prevL.begin();
+			std::vector<std::string>::const_iterator testIt = testL.begin();
+
+			for (; (testIt != testL.end()) && (pIt != prevL.end()) ; ++testIt, ++pIt)
+				{
+				if (!NxsString::case_insensitive_equals(testIt->c_str(), pIt->c_str()))
+					break;
+				}
+			if (testIt == testL.end())
+				return const_cast<NxsTaxaBlockAPI *>(prev);
+			}
+		}
+
+	return 0L;
+	}
+
+
+/*! Called internally when the NxsReader has found the correct NxsBlock to read
+	a block in a file.
+
+	`token` will be at the block ID.
+	`currBlockName` will be the block ID as a string.
+	`currentBlock` will be the block reader to be used
+	`sourceOfBlock` is the factory  that created the block (or 0L). If sourceOfBlock
+		is not NULL then it will be alerted if the block is skipped (BlockSkipped() method)
+		or there was an error in the read (BlockError() method). The factory is expected
+		to delete the block instances in these cases (NxsReader will not refer to those
+		instances again).
+
+
+
+	The following steps occur:
+		- the EnteringBlock hook is called (if it returns false, the block will be skipped by calling
+			NxsReader::SkippingBlock
+		- NxsBlock::Reset() is called on the reader block
+		- NxsBlock::Read() method of the reader block is called
+		- If an exception is generated, the NexusError is called.
+		- If no exception is generated by Read then the block is processed:
+			- if NxsReader::cullIdenticalTaxaBlocks(true) has been called before Execute and this
+				is a repeated TAXA block, the block will be deleted.
+			- the BlockReadHook() will store all of the implied blocks
+				(by calling NxsBlock::GetImpliedBlocks()) and the block itself.
+			- if one of the implied blocks is a repeated TAXA block and
+				NxsReader::cullIdenticalTaxaBlocks(true) has been called, then
+				the blocks NxsBlock::SwapEquivalentTaxaBlock() method will determine
+				whether or not the duplicate taxa block can be deleted.
+			- each stored block will generate a call to NxsReader::AddBlockToUsedBlockList()
+		- ExitingBlock() is called
+		- PostBlockReadingHook() is called
+*/
+bool NxsReader::ExecuteBlock(NxsToken &token, const NxsString &currBlockName, NxsBlock *currentBlock, NxsBlockFactory * sourceOfBlock)
+	{
+	if (!EnteringBlock(currBlockName))
+		{
+		SkippingBlock(currBlockName);
+		if (sourceOfBlock)
+			sourceOfBlock->BlockSkipped(currentBlock);
+		if (!ReadUntilEndblock(token, currBlockName))
+			{
+			token.SetBlockName(0L);
+			token.SetEOFAllowed(true);
+			return false;
+			}
+		return true;
+		}
+	this->RemoveBlockFromUsedBlockList(currentBlock);
+	currentBlock->Reset();
+	// We need to back up currentBlock, because the Read statement might trigger
+	// a recursive call to Execute (if the block contains instructions to execute
+	// another file, then the same NxsReader object may be used and any member fields (e.g. currentBlock)
+	//	could be trashed.
+	//
+	bool eofFound = false;
+	try
+		{
+		try
+			{
+			currentBlock->Read(token);
+			}
+		catch (NxsX_UnexpectedEOF &eofx)
+			{
+			if (!currentBlock->TolerateEOFInBlock())
+				throw eofx;
+			NxsString m;
+			m << "Unexpected End of file in " << currBlockName << "block";
+			currentBlock->WarnDangerousContent(m, token);
+			eofFound = true;
+			}
+		if (destroyRepeatedTaxaBlocks && currBlockName.EqualsCaseInsensitive("TAXA"))
+			{
+			NxsTaxaBlockAPI * oldTB = this->GetOriginalTaxaBlock((NxsTaxaBlockAPI *) currentBlock);
+			if (oldTB)
+				{
+				const std::string altTitle = currentBlock->GetTitle();
+				this->RegisterAltTitle(oldTB, altTitle);
+				if (sourceOfBlock)
+					sourceOfBlock->BlockError(currentBlock);
+				return true;
+				}
+			}
+		BlockReadHook(currBlockName, currentBlock, &token);
+		}
+	catch (NxsException &x)
+		{
+		NxsString m;
+		if (currentBlock->errormsg.length() > 0)
+			m = currentBlock->errormsg;
+		else
+			m = x.msg;
+		currentBlock->Reset();
+		if (sourceOfBlock != 0)
+			{
+
+			sourceOfBlock->BlockError(currentBlock);
+			}
+		else
+
+		token.SetBlockName(0L);
+		token.SetEOFAllowed(true);
+		currentBlock = NULL;
+		NexusError(m, x.pos, x.line, x.col);
+		return false;
+		}	// catch (NxsException x)
+	ExitingBlock(currBlockName);
+	PostBlockReadingHook(*currentBlock);
+	return !eofFound;
+	}
+
+/*! Called by NxsReader::ExecuteBlock() to store the block and its implied blocks \ref NxsReader::ExecuteBlock()*/
+void NxsReader::BlockReadHook(const NxsString &currBlockName, NxsBlock *currentBlock, NxsToken * token)
+	{
+	VecBlockPtr implied = currentBlock->GetImpliedBlocks();
+	for (VecBlockPtr::iterator impIt = implied.begin(); impIt != implied.end(); ++impIt)
+		{
+		NxsBlock * nb = *impIt;
+		NCL_ASSERT(nb);
+		NxsString impID = nb->GetID();
+		bool storeBlock = true;
+		if (destroyRepeatedTaxaBlocks && impID.EqualsCaseInsensitive("TAXA"))
+			{
+			NxsTaxaBlockAPI * oldTB = this->GetOriginalTaxaBlock((NxsTaxaBlockAPI *) nb);
+			if (oldTB)
+				{
+				storeBlock = ! currentBlock->SwapEquivalentTaxaBlock(oldTB);
+				const std::string altTitle = nb->GetTitle();
+				this->RegisterAltTitle(oldTB, altTitle);
+				if (!storeBlock)
+					{
+					delete nb;
+					}
+
+				}
+			}
+		if (storeBlock)
+			{
+			NxsString m;
+			m << "storing implied block: " << impID;
+			this->statusMessage(m);
+			this->AddBlockToUsedBlockList(impID, nb, token);
+			}
+		}
+	NxsString s;
+	s << "storing read block: " << currentBlock->GetID();
+	this->statusMessage(s);
+	this->AddBlockToUsedBlockList(currBlockName, currentBlock, token);
+	}
+
+/*!
+	Returns a string containing the copyright notice for the NxsReader Class Library, useful for reporting the use of
+	this library by programs that interact with the user.
+*/
+const char * NxsReader::NCLCopyrightNotice()
+	{
+	return NCL_COPYRIGHT;
+	}
+
+/*!
+	Returns a string containing the URL for the NxsReader Class Library internet home page.
+*/
+const char * NxsReader::NCLHomePageURL()
+	{
+	return NCL_HOMEPAGEURL;
+	}
+
+/*!
+	Returns a string containing the name and current version of the NxsReader Class Library, useful for reporting the
+	use of this library by programs that interact with the user.
+*/
+const char * NxsReader::NCLNameAndVersion()
+	{
+	return NCL_NAME_AND_VERSION;
+	}
+
+/*!
+	Called just after Execute member function reads the opening "#NEXUS" token in a NEXUS data file. Override this
+	virtual base class function if your application needs to do anything at this point in the execution of a NEXUS data
+	file (e.g. good opportunity to pop up a dialog box showing progress). Be sure to call the Execute function with the
+	`notifyStartStop' argument set to true, otherwise ExecuteStarting will not be called.
+
+*/
+void NxsReader::ExecuteStarting()
+	{
+	}
+
+/*!
+	Called when Execute member function encounters the end of the NEXUS data file, or the special comment [&LEAVE] is
+	found between NEXUS blocks. Override this virtual base class function if your application needs to do anything at
+	this point in the execution of a NEXUS data file (e.g. good opportunity to hide or destroy a dialog box showing
+	progress). Be sure to call the Execute function with the `notifyStartStop' argument set to true, otherwise
+	ExecuteStopping will not be called.
+*/
+void NxsReader::ExecuteStopping()
+	{
+	}
+
+/*!
+	Called when an error is encountered in a NEXUS file. Allows program to give user details of the error as well as
+	the precise location of the error.
+*/
+void NxsReader::NexusError(
+  NxsString ,	/* the error message to be displayed */
+  file_pos	,	/* the current file position */
+  long	,	/* the current file line */
+  long	)	/* the current column within the current file line */
+	{
+	}
+
+/*!
+	This function may be used to report progess while reading through a file. For example, the NxsAllelesBlock class
+	uses this function to report the name of the population it is currently reading so the user doesn't think the
+	program has hung on large data sets.
+*/
+void NxsReader::OutputComment(
+  const NxsString &)	/* a comment to be shown on the output */
+	{
+	}
+
+/*!
+	This function is called when an unknown block named `blockName' is about to be skipped. Override this pure virtual
+	function to provide an indication of progress as the NEXUS file is being read.
+*/
+void NxsReader::SkippingBlock(
+  NxsString )	/* the name of the block being skipped */
+	{
+	}
+
+/*!
+	This function is called when a disabled block named `blockName' is encountered in a NEXUS data file being executed.
+	Override this pure virtual function to handle this event in an appropriate manner. For example, the program may
+	wish to inform the user that a data block was encountered in what is supposed to be a tree file.
+*/
+void NxsReader::SkippingDisabledBlock(
+  NxsString )	/* the name of the disabled block being skipped */
+	{
+	}
+
+
+/*! Used internally to skip until teh END; or ENDBLOCK; command. */
+bool NxsReader::ReadUntilEndblock(NxsToken &token, const std::string & )
+	{
+	for (;;)
+		{
+		token.GetNextToken();
+		if (token.Equals("END") || token.Equals("ENDBLOCK"))
+			{
+			token.GetNextToken();
+			if (!token.Equals(";"))
+				{
+				std::string errormsg = "Expecting ';' after END or ENDBLOCK command, but found ";
+				errormsg += token.GetToken();
+				errormsg += " instead";
+				NexusError(NxsString(errormsg.c_str()), token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				return false;
+				}
+			return true;
+			}
+		else
+			token.ProcessAsCommand(NULL);
+		}
+	}
+
+/*! Convenience function for setting the NxsTaxaBlockFactory */
+void NxsReader::SetTaxaBlockFactory(NxsTaxaBlockFactory *f)
+	{
+	if (this->taxaBlockFactory)
+		this->RemoveFactory(this->taxaBlockFactory);
+	this->taxaBlockFactory = f;
+	if (taxaBlockFactory)
+		this->AddFactory(this->taxaBlockFactory);
+	}
+/*! \returns the last TAXA block.
+	\warning: This should only called when the client knows that the TAXA block
+	inherits from NxsTaxaBlockAPI (static_cast is used). This will be true if the
+	client code has not derived its own NxsBlock for reading TAXA blocks
+*/
+NxsTaxaBlockAPI *NxsReader::GetLastStoredTaxaBlock()
+	{
+	const std::string idstring("TAXA");
+	NxsBlock * nb = GetLastStoredBlockByID(idstring);
+	return static_cast<NxsTaxaBlockAPI *>(nb); //dynamic_cast<NxsTaxaBlockAPI *>(nb);
+	}
+
+/*! \returns the last CHARACTERS/DATA block.
+	\warning: This should only called when the client knows that the TAXA block
+	inherits from NxsTaxaBlockAPI (static_cast is used). This will be true if the
+	client code has not derived its own NxsBlock for reading TAXA blocks
+*/
+NxsCharactersBlockAPI *NxsReader::GetLastStoredCharactersBlock()
+	{
+	const std::string idstring("CHARACTERS");
+	NxsBlock * nb = GetLastStoredBlockByID(idstring);
+	return static_cast<NxsCharactersBlockAPI *>(nb); //dynamic_cast<NxsCharactersBlockAPI *>(nb);
+	}
+
+/*! \returns the last TREES block.
+	\warning: This should only called when the client knows that the TAXA block
+	inherits from NxsTaxaBlockAPI (static_cast is used). This will be true if the
+	client code has not derived its own NxsBlock for reading TAXA blocks
+*/
+NxsTreesBlockAPI *NxsReader::GetLastStoredTreesBlock()
+	{
+	const std::string idstring("TREES");
+	NxsBlock * nb = GetLastStoredBlockByID(idstring);
+	return static_cast<NxsTreesBlockAPI *>(nb); //dynamic_cast<NxsTreesBlockAPI *>(nb);
+	}
+
+/*! \returns the last block with block ID ("TAXA", "DATA"...) indicated by key
+*/
+
+NxsBlock *NxsReader::GetLastStoredBlockByID(const std::string &key)
+	{
+	BlockTypeToBlockList::iterator bttblIt = blockTypeToBlockList.find(key);
+	if (bttblIt == blockTypeToBlockList.end())
+		return NULL;
+	return bttblIt->second.back();
+	}
+
+/*! Used internally. Called by AddBlockToUsedBlockList() this function will generate
+	a NxsException if the block's title is found in another block of the same block ID type (TAXA, CHARACTERS, ...)
+
+	If the block has no title, an automatically generated title will be supplied with the form
+		Untitled <block type ID> Block #
+*/
+void NxsReader::NewBlockTitleCheckHook(const std::string &blockname, NxsBlock *p, NxsToken *token)
+	{
+	NxsBlockTitleHistoryMap::iterator mIt = blockTitleHistoryMap.find(blockname);
+	if (mIt == blockTitleHistoryMap.end())
+		{
+		std::list<std::string> mt;
+		blockTitleHistoryMap[blockname] = NxsBlockTitleHistory(1, mt);
+		mIt = blockTitleHistoryMap.find(blockname);
+		NCL_ASSERT(mIt != blockTitleHistoryMap.end());
+		}
+	NxsBlockTitleHistory & titleHist = mIt->second;
+	unsigned n = titleHist.first;
+	std::list<std::string> & previousTitles = titleHist.second;
+	std::list<std::string>::iterator lsIt;
+	std::string pTitle = p->GetTitle();
+	std::string origTitle = pTitle;
+	NxsString::to_upper(pTitle);
+	if (pTitle.empty())
+		{
+		while (pTitle.empty())
+			{
+			NxsString autoName = "Untitled ";
+			autoName += p->GetID().c_str();
+			autoName += " Block ";
+			autoName += n++;
+			pTitle.assign(autoName.c_str());
+			NxsString::to_upper(pTitle);
+            lsIt = find(previousTitles.begin(), previousTitles.end(), pTitle);
+			if (lsIt == previousTitles.end())
+				{
+				p->SetTitle(autoName, true);
+				titleHist.first = n;
+				}
+			else
+				pTitle.clear();
+			}
+		}
+	else
+		{
+        lsIt = find(previousTitles.begin(), previousTitles.end(), pTitle);
+		if (lsIt != previousTitles.end())
+			{
+			NxsString msg = "Block titles cannot be repeated. The TITLE ";
+			msg += origTitle;
+			msg += " has already been used for a ";
+			msg += blockname;
+			msg += " block.";
+			if (token)
+				throw NxsException(msg, *token);
+			else
+				throw NxsException(msg, 0, -1, -1);
+			}
+		}
+	previousTitles.push_back(pTitle);
+	}
+
+/*! Used internally to store the correctly read block `p`
+	`token` is the token that is being parsed (or 0L).
+
+	\warning This can generate NxsExceptions if there are clashes in the block title
+*/
+void NxsReader::AddBlockToUsedBlockList(const std::string &blockTypeID, NxsBlock *p, NxsToken *token)
+	{
+	NCL_ASSERT(p);
+	std::string n;
+	if (blockTypeID == "DATA")
+		n = std::string("CHARACTERS");
+	else
+		n = blockTypeID;
+	NewBlockTitleCheckHook(n, p, token);
+	BlockTypeToBlockList::iterator bttblIt = blockTypeToBlockList.find(n);
+	if (bttblIt == blockTypeToBlockList.end())
+		blockTypeToBlockList[n] = BlockReaderList(1, p);
+	else
+		bttblIt->second.push_back(p);
+	blocksInOrder.remove(p);
+	blocksInOrder.push_back(p);
+	if (this->GetBlockPriority(p) < 0)
+		AssignBlockPriority(p, 0);
+
+	lastExecuteBlocksInOrder.remove(p);
+	lastExecuteBlocksInOrder.push_back(p);
+	}
+
+/*! Removes a block from the NxsReader's records. Does NOT delete the block!
+	\returns the number of times the block was in the reader's block lists (usually
+		either 0 or 1).
+*/
+unsigned NxsReader::RemoveBlockFromUsedBlockList(NxsBlock *p)
+	{
+	unsigned totalDel = 0;
+	unsigned before, after;
+	std::vector<std::string> keysToDel;
+	for (BlockTypeToBlockList::iterator bttblIt = blockTypeToBlockList.begin(); bttblIt != blockTypeToBlockList.end(); ++bttblIt)
+		{
+		BlockReaderList & brl = bttblIt->second;
+		before = (unsigned)brl.size();
+		brl.remove(p);
+		after = (unsigned)brl.size();
+		if (after == 0)
+			keysToDel.push_back(bttblIt->first);
+		totalDel += before - after;
+		}
+	for (std::vector<std::string>::const_iterator keyIt = keysToDel.begin(); keyIt != keysToDel.end(); ++keyIt)
+		blockTypeToBlockList.erase(*keyIt);
+	blocksInOrder.remove(p);
+	blockPriorities.erase(p);
+	lastExecuteBlocksInOrder.remove(p);
+	std::string blockID =  p->GetID();
+	NxsBlockTitleHistoryMap::iterator mIt = blockTitleHistoryMap.find(blockID);
+	if (mIt != blockTitleHistoryMap.end())
+		{
+		std::string blockName = p->GetTitle();
+		NxsBlockTitleHistory & titleHist = mIt->second;
+		std::list<std::string> & previousTitles = titleHist.second;
+		std::list<std::string>::iterator ptIt = previousTitles.begin();
+		while (ptIt != previousTitles.end())
+			{
+			if (NxsString::case_insensitive_equals(ptIt->c_str(), blockName.c_str()))
+				ptIt = previousTitles.erase(ptIt);
+			else
+				 ++ptIt;
+			}
+		}
+	return totalDel;
+	}
+
+/*! Returns a set of all of the blocks that have been successfully read.
+*/
+std::set<NxsBlock*> NxsReader::GetSetOfAllUsedBlocks()
+	{
+	std::set<NxsBlock*> s;
+	for (BlockTypeToBlockList::iterator bttblIt = blockTypeToBlockList.begin(); bttblIt != blockTypeToBlockList.end(); ++bttblIt)
+		{
+		BlockReaderList & brl = bttblIt->second;
+		s.insert(brl.begin(), brl.end());
+		}
+	return s;
+	}
+
+void ExceptionRaisingNxsReader::NexusWarn(const std::string &msg, NxsWarnLevel warnLevel, file_pos pos, long line, long col)
+	{
+	if (warnLevel < currentWarningLevel)
+		return;
+	if (warnLevel >= this->warningToErrorThreshold)
+		{
+		NxsString e(msg.c_str());
+		throw NxsException(e, pos, line, col);
+		}
+
+	if (warnMode == NxsReader::IGNORE_WARNINGS)
+		return;
+	if (warnMode == NxsReader::WARNINGS_TO_STDERR)
+		{
+		Rcpp::Rcerr << "\nWarning:  ";
+		Rcpp::Rcerr << "\n " << msg << std::endl;
+		if (line > 0 || pos > 0)
+			Rcpp::Rcerr << "at line " << line << ", column (approximately) " << col << " (file position " << pos << ")\n";
+		}
+	else if (warnMode != NxsReader::WARNINGS_TO_STDOUT)
+		{
+		Rcpp::Rcout << "\nWarning:  ";
+		if (line > 0 || pos > 0)
+			Rcpp::Rcout << "at line " << line << ", column " << col << " (file position " << pos << "):\n";
+		Rcpp::Rcout << "\n " << msg << '\n';
+		if (line > 0 || pos > 0)
+			Rcpp::Rcout << "at line " << line << ", column (approximately) " << col << " (file position " << pos << ')' << std::endl;
+		}
+	else
+		{
+		NxsString m("WARNING:\n ");
+		m += msg.c_str();
+		NexusError(m, pos, line, col);
+		}
+	}
+
+void ExceptionRaisingNxsReader::SkippingBlock(NxsString blockName)
+	{
+	if (warnMode == NxsReader::IGNORE_WARNINGS)
+		return;
+	if (warnMode == NxsReader::WARNINGS_TO_STDERR)
+		Rcpp::Rcerr << "[!Skipping unknown block (" << blockName << ")...]" << std::endl;
+	else if (warnMode != NxsReader::WARNINGS_TO_STDOUT)
+		Rcpp::Rcout << "[!Skipping unknown block (" << blockName << ")...]" << std::endl;
+	}
+
+void ExceptionRaisingNxsReader::SkippingDisabledBlock(NxsString blockName)
+	{
+	if (warnMode == NxsReader::IGNORE_WARNINGS)
+		return;
+	if (warnMode == NxsReader::WARNINGS_TO_STDERR)
+		Rcpp::Rcerr << "[!Skipping disabled block (" << blockName << ")...]" << std::endl;
+	else if (warnMode != NxsReader::WARNINGS_TO_STDOUT)
+		Rcpp::Rcout << "[!Skipping disabled block (" << blockName << ")...]" << std::endl;
+	}
+
+void NxsReader::statusMessage(const std::string & m) const
+{
+	if (alwaysReportStatusMessages || currentWarningLevel == UNCOMMON_SYNTAX_WARNING) {
+	    //Rcpp::Rcout << m << std::endl;
+	}
+}
+
+/*! Clears the lists of all of the blocks that have been read.
+	NOTE: does NOT free any memory or call Reset() on any blocks"!!
+
+	This call can be used to "tell" a reader instance that you have taken
+	over the memory management for all of the blocks that it has read (or created).
+*/
+void NxsReader::ClearUsedBlockList()
+	{
+	RemoveBlocksFromFactoriesFromUsedBlockLists();
+	blocksInOrder.clear();
+	blockPriorities.clear();
+	lastExecuteBlocksInOrder.clear();
+	blockTypeToBlockList.clear();
+	}
+
+void NxsReader::AssignBlockPriority(NxsBlock *b, int priorityLevel)
+	{
+	blockPriorities[b] = priorityLevel;
+	}
+
+int	NxsReader::GetBlockPriority(NxsBlock *b) const
+	{
+	std::map<NxsBlock *, int>::const_iterator bIt = blockPriorities.find(b);
+	if (bIt == blockPriorities.end())
+		return 0;
+	return bIt->second;
+	}
+
+void NxsReader::DemoteBlocks(int priorityLevel)
+	{
+	BlockReaderList brl = GetUsedBlocksInOrder();
+	BlockReaderList::iterator brlIt = brl.begin();
+	for (; brlIt != brl.end(); ++brlIt)
+		{
+		NxsBlock * b = *brlIt;
+		AssignBlockPriority(b, priorityLevel);
+		}
+	}
diff --git a/src/nxssetreader.cpp b/src/nxssetreader.cpp
new file mode 100644
index 0000000..f4fe41d
--- /dev/null
+++ b/src/nxssetreader.cpp
@@ -0,0 +1,536 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <climits>
+#include "ncl/nxssetreader.h"
+#include "ncl/nxstoken.h"
+#include <algorithm>
+#include <iterator>
+using namespace std;
+
+void NxsSetReader::AddRangeToSet(unsigned first, unsigned last, unsigned stride, NxsUnsignedSet * destination, const NxsUnsignedSet * taboo, NxsToken &token)
+	{
+	NCL_ASSERT (last >= first);
+	NCL_ASSERT (last !=UINT_MAX);
+	NCL_ASSERT (stride !=UINT_MAX);
+	if (destination == NULL)
+		return;
+	NxsUnsignedSet::iterator dIt = destination->insert(first).first;
+	for (unsigned curr = first + stride; curr <= last; curr += stride)
+		{
+		if (taboo != NULL && taboo->count(curr) > 0)
+			{
+			NxsString errormsg;
+			errormsg << "Illegal repitition of an index (" << curr + 1 << ") in multiple subsets.";
+			throw NxsException(errormsg, token);
+			}
+		dIt = destination->insert(dIt, curr);
+		}
+	}
+
+/**
+	returns the number of indices added.
+*/
+unsigned NxsSetReader::InterpretTokenAsIndices(NxsToken &token,
+  const NxsLabelToIndicesMapper & mapper,
+  const char * setType,
+  const char * cmdName,
+  NxsUnsignedSet * destination)
+	{
+	try {
+		const std::string t = token.GetToken();
+		if (NxsString::case_insensitive_equals(t.c_str(), "ALL"))
+			{
+			unsigned m = mapper.GetMaxIndex();
+			NxsUnsignedSet s;
+			for (unsigned i = 0; i <= m; ++i)
+				s.insert(i);
+			destination->insert(s.begin(), s.end());
+			return (unsigned)s.size();
+			}
+		return mapper.GetIndicesForLabel(t, destination);
+		}
+	catch (const NxsException & x)
+		{
+		NxsString errormsg = "Error in the ";
+		errormsg << setType << " descriptor of a " << cmdName << " command.\n";
+		errormsg += x.msg;
+		throw NxsException(errormsg, token);
+		}
+	catch (...)
+		{
+		NxsString errormsg = "Expecting a ";
+		errormsg << setType << " descriptor (number or label) in the " << cmdName << ".  Encountered ";
+		errormsg <<  token.GetToken();
+		throw NxsException(errormsg, token);
+		}
+	}
+
+void NxsSetReader::ReadSetDefinition(
+  NxsToken &token,
+  const NxsLabelToIndicesMapper & mapper,
+  const char * setType, /* "TAXON" or "CHARACTER" -- for error messages only */
+  const char * cmdName, /* command name -- "TAXSET" or "EXSET"-- for error messages only */
+  NxsUnsignedSet * destination, /** to be filled */
+  const NxsUnsignedSet * taboo)
+	{
+	NxsString errormsg;
+	NxsUnsignedSet tmpset;
+	NxsUnsignedSet dummy;
+	if (destination == NULL)
+		destination = & dummy;
+	unsigned previousInd = UINT_MAX;
+	std::vector<unsigned> intersectVec;
+	while (!token.Equals(";"))
+		{
+		if (taboo && token.Equals(","))
+			return;
+		if (token.Equals("-"))
+			{
+			if (previousInd == UINT_MAX)
+				{
+				errormsg = "The '-' must be preceded by number or a ";
+				errormsg << setType << " label in the " << cmdName << " command.";
+				throw NxsException(errormsg, token);
+				}
+			token.GetNextToken();
+			if (token.Equals(";") || token.Equals("\\"))
+				{
+				errormsg = "Range in the ";
+				errormsg << setType << " set definition in the " << cmdName << " command must be closed with a number or label.";
+				throw NxsException(errormsg, token);
+				}
+			unsigned endpoint;
+			if (token.Equals("."))
+				endpoint = mapper.GetMaxIndex();
+			else
+				{
+				tmpset.clear();
+				unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
+				if (nAdded != 1)
+					{
+					errormsg = "End of a range in a ";
+					errormsg << setType << " set definition in the " << cmdName << " command must be closed with a single number or label (not a set).";
+					throw NxsException(errormsg, token);
+					}
+				endpoint = *(tmpset.begin());
+				if (endpoint < previousInd)
+					{
+					errormsg = "End of a range in a ";
+					errormsg << setType << " set definition in the " << cmdName << " command must be a larger index than the start of the range (found ";
+					errormsg << previousInd + 1 << " - " << token.GetToken();
+					throw NxsException(errormsg, token);
+					}
+				}
+			token.GetNextToken();
+			if (token.Equals("\\"))
+				{
+				token.GetNextToken();
+				NxsString t = token.GetToken();
+				unsigned stride = 0;
+				try
+					{
+					stride = t.ConvertToUnsigned();
+					}
+				catch (const NxsString::NxsX_NotANumber &)
+					{}
+				if (stride == 0)
+					{
+					errormsg = "Expecting a positive number indicating the 'stride' after the \\ in the ";
+					errormsg << setType << " set definition in the " << cmdName << " command. Encountered ";
+					errormsg << t;
+					throw NxsException(errormsg, token);
+					}
+				AddRangeToSet(previousInd, endpoint, stride, destination, taboo, token);
+				token.GetNextToken();
+				}
+			else
+				AddRangeToSet(previousInd, endpoint, 1, destination, taboo, token);
+			previousInd = UINT_MAX;
+			}
+		else
+			{
+			tmpset.clear();
+			const unsigned nAdded = NxsSetReader::InterpretTokenAsIndices(token, mapper, setType, cmdName, &tmpset);
+			if (taboo != NULL)
+				{
+				set_intersection(taboo->begin(), taboo->end(), tmpset.begin(), tmpset.end(), back_inserter(intersectVec));
+				if (!intersectVec.empty())
+					{
+					errormsg << "Illegal repitition of an index (" << 1 + *(intersectVec.begin()) << ") in multiple subsets.";
+					throw NxsException(errormsg, token);
+					}
+				}
+			if (nAdded == 1 )
+				{
+				previousInd = *(tmpset.begin());
+				destination->insert(previousInd);
+				}
+			else
+				{
+				previousInd = UINT_MAX;
+				destination->insert(tmpset.begin(), tmpset.end());
+				}
+			token.GetNextToken();
+			}
+		}
+	}
+
+/*!
+	Initializes `max' to maxValue, `settype' to `type', `token' to `t', `block' to `nxsblk' and `nxsset' to `iset',
+	then clears `nxsset'.
+*/
+NxsSetReader::NxsSetReader(
+  NxsToken			&t,			/* reference to the NxsToken being used to read in the NEXUS data file */
+  unsigned			maxValue,	/* maximum possible value allowed in this set (e.g. nchar or ntax) */
+  NxsUnsignedSet	&iset,		/* reference to the set object to store the set defined in the NEXUS data file */
+  NxsBlock			&nxsblk,	/* reference to the NxsBlock object (used for looking up taxon or character labels when encountered in the set definition) */
+  unsigned			type)		/* one of the elements in the NxsSetReaderEnum enumeration */
+  : block(nxsblk), token(t), nxsset(iset)
+	{
+	max		= maxValue;
+	settype	= type;
+	nxsset.clear();
+	}
+
+/*!
+	Adds the range specified by `first', `last', and `modulus' to the set. If `modulus' is zero it is ignored. The
+	parameters `first' and `last' refer to numbers found in the data file itself, and thus have range [1..`max']. They
+	are stored in `nxsset', however, with offset 0. For example, if the data file says "4-10\2" this function would be
+	called with `first' = 4, `last' = 10 and `modulus' = 2, and the values stored in `nxsset' would be 3, 5, 7, 9. The
+	return value is true unless `last' is greater than `max', `first' is less than 1, or `first' is greater than `last':
+	in any of these cases, the return value is false to indicate failure to store this range.
+*/
+bool NxsSetReader::AddRange(
+  unsigned first,		/* the first member of the range (inclusive, offset 1) */
+  unsigned last,		/* the last member of the range (inclusive, offset 1) */
+  unsigned modulus)		/* the modulus to use (if non-zero) */
+	{
+	if (last > max || first < 1 || first > last)
+		return false;
+
+	for (unsigned i = first - 1; i < last; i++)
+		{
+		unsigned diff = i - first + 1;
+		if (modulus > 0 && diff % modulus != 0)
+			continue;
+		nxsset.insert(i);
+		}
+
+	return true;
+	}
+
+/*!
+	Tries to interpret `token' as a number. Failing that, tries to interpret `token' as a character or taxon label,
+	which it then converts to a number. Failing that, it throws a NxsException exception.
+*/
+unsigned NxsSetReader::GetTokenValue()
+	{
+	int i = -1;
+	try {
+	    i = token.GetToken().ConvertToInt();
+	    }
+	catch (NxsString::NxsX_NotANumber &x)
+	    {
+	    }
+
+	unsigned v = 0;
+	if (i > 0)
+		v = (unsigned) i;
+
+	if (v == 0 && settype != NxsSetReader::generic)
+		{
+		if (settype == NxsSetReader::charset)
+			v = block.CharLabelToNumber(token.GetToken());
+		else if (settype == NxsSetReader::taxset)
+			v = block.TaxonLabelToNumber(token.GetToken());
+		}
+
+	if (v == 0)
+		{
+		block.errormsg = "Set element (";
+		block.errormsg += token.GetToken();
+		block.errormsg += ") not a number ";
+		if (settype == NxsSetReader::charset)
+			block.errormsg += "and not a valid character label";
+		else if (settype == NxsSetReader::taxset)
+			block.errormsg += "and not a valid taxon label";
+
+		throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+
+	return v;
+	}
+
+std::vector<unsigned> NxsSetReader::GetSetAsVector(const NxsUnsignedSet &s)
+	{
+	std::vector<unsigned> u;
+	u.reserve(s.size());
+	for (NxsUnsignedSet::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
+		u.push_back(*sIt);
+	return u;
+	}
+
+void NxsSetReader::WriteSetAsNexusValue(const NxsUnsignedSet & nxsset, std::ostream & out)
+	{
+	NxsUnsignedSet::const_iterator currIt = nxsset.begin();
+	const NxsUnsignedSet::const_iterator endIt = nxsset.end();
+	if (currIt == endIt)
+		return;
+	unsigned rangeBegin = 1 + *currIt++;
+	if (currIt == endIt)
+		{
+		out << ' ' << rangeBegin;
+		return;
+		}
+	unsigned prev = 1 + *currIt++;
+	if (currIt == endIt)
+		{
+		out << ' ' << rangeBegin << ' ' << prev;
+		return;
+		}
+	unsigned stride = prev - rangeBegin;
+	unsigned curr = 1 + *currIt++;
+	bool inRange = true;
+	while (currIt != endIt)
+		{
+		if (inRange)
+			{
+			if (curr - prev != stride)
+				{
+				if (prev - rangeBegin == stride)
+					{
+					out << ' ' << rangeBegin;
+					rangeBegin = prev;
+					stride = curr - prev;
+					}
+				else
+					{
+					if (stride > 1)
+						out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
+					else
+						out << ' ' << rangeBegin << '-' << prev ;
+					inRange = false;
+					}
+				}
+			}
+		else
+			{
+			inRange = true;
+			rangeBegin = prev;
+			stride = curr - prev;
+			}
+		prev = curr;
+		curr = 1 + *currIt;
+		currIt++;
+		}
+	if (inRange)
+		{
+		if (curr - prev != stride)
+			{
+			if (prev - rangeBegin == stride)
+				out << ' ' << rangeBegin << ' ' << prev;
+			else
+				{
+				if (stride > 1)
+					out << ' ' << rangeBegin << '-' << prev << " \\ " << stride;
+				else
+					out << ' ' << rangeBegin << '-' << prev ;
+				}
+			out << ' ' << curr;
+			}
+		else
+			{
+			if (stride > 1)
+				out << ' ' << rangeBegin << '-' << curr << " \\ " << stride;
+			else
+				out << ' ' << rangeBegin << '-' << curr ;
+			}
+		}
+	else
+		out << ' ' << prev << ' ' << curr;
+	}
+/*!
+	Reads in a set from a NEXUS data file. Returns true if the set was terminated by a semicolon, false otherwise.
+*/
+bool NxsSetReader::Run()
+	{
+	bool ok;
+	bool retval = false;
+
+	unsigned rangeBegin = UINT_MAX;
+	unsigned rangeEnd = rangeBegin;
+	bool insideRange = false;
+	unsigned modValue = 1;
+
+	for (;;)
+		{
+		// Next token should be one of the following:
+		//   ';'        --> set definition finished
+		//   '-'        --> range being defined
+		//   <integer>  --> member of set (or beginning or end of a range)
+		//   '.'        --> signifies the number max
+		//   '\'        --> signifies modulus value coming next
+		//
+		token.GetNextToken();
+
+		if (token.Equals("-"))
+			{
+			// We should not be inside a range when we encounter a hyphenation symbol.
+			// The hyphen is what _puts_ us inside a range!
+			//
+			if (insideRange)
+				{
+				block.errormsg = "The symbol '-' is out of place here";
+				throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			insideRange = true;
+			}
+
+		else if (token.Equals("."))
+			{
+			// We _should_ be inside a range if we encounter a period, as this
+			// is a range termination character
+			//
+			if (!insideRange)
+				{
+				block.errormsg = "The symbol '.' can only be used to specify the end of a range";
+				throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			rangeEnd = max;
+			}
+
+		else if (token.Equals("\\"))
+			{
+			// The backslash character is used to specify a modulus to a range, and
+			// thus should only be encountered if currently inside a range
+			//
+			if (!insideRange)
+				{
+				block.errormsg = "The symbol '\\' can only be used after the end of a range has been specified";
+				throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+
+			// This should be the modulus value
+			//
+			modValue = NxsToken::DemandPositiveInt(token, block.errormsg, "The modulus value");
+			}
+
+		else if (insideRange && rangeEnd == UINT_MAX)
+			{
+			// The beginning of the range and the hyphen symbol have been read
+			// already, just need to store the end of the range at this point
+			//
+			rangeEnd = GetTokenValue();
+			}
+
+		else if (insideRange)
+			{
+			// If insideRange is true, we must have already stored the beginning
+			// of the range and read in the hyphen character. We would not have
+			// made it this far if we had also not already stored the range end.
+			// Thus, we can go ahead and add the range.
+			//
+			ok = AddRange(rangeBegin, rangeEnd, modValue);
+			modValue = 1;
+
+			if (!ok)
+				{
+				block.errormsg = "Character number out of range (or range incorrectly specified) in set specification";
+				throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+
+			// We have actually already read in the next token, so deal with it
+			// now so that we don't end up skipping a token
+			//
+			if (token.Equals(";"))
+				{
+				retval = true;
+				break;
+				}
+			else if (token.Equals(","))
+				{
+				break;
+				}
+
+			rangeBegin = GetTokenValue();
+			rangeEnd = UINT_MAX;
+			insideRange = false;
+			}
+
+		else if (rangeBegin != UINT_MAX)
+			{
+			// If we were inside a range, we would have not gotten this far.
+			// If not in a range, we are either getting ready to begin a new
+			// range or have previously read in a single value. Handle the
+			// latter possibility here.
+			//
+			ok = AddRange(rangeBegin, rangeBegin, modValue);
+			modValue = 1;
+
+			if (!ok)
+				{
+				block.errormsg = "Number out of range (or range incorrectly specified) in set specification";
+				throw NxsException(block.errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+
+			if (token.Equals(";"))
+				{
+				retval = true;
+				break;
+				}
+			else if (token.Equals(","))
+				{
+				break;
+				}
+
+			rangeBegin = GetTokenValue();
+			rangeEnd = UINT_MAX;
+			}
+
+		else if (token.Equals(";"))
+			{
+			retval = true;
+			break;
+			}
+
+		else if (token.Equals(","))
+			{
+			break;
+			}
+
+		else if (token.Equals("ALL"))
+			{
+			rangeBegin = 1;
+			rangeEnd = max;
+			ok = AddRange(rangeBegin, rangeEnd);
+			}
+
+		else
+			{
+			// Can only get here if rangeBegin still equals UINT_MAX and thus we
+			// are reading in the very first token and that token is neither
+			// the word "all" nor is it a semicolon
+			//
+			rangeBegin = GetTokenValue();
+			rangeEnd = UINT_MAX;
+			}
+		}
+
+	return retval;
+	}
diff --git a/src/nxsstring.cpp b/src/nxsstring.cpp
new file mode 100644
index 0000000..bb05cde
--- /dev/null
+++ b/src/nxsstring.cpp
@@ -0,0 +1,1041 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis and Mark T. Holder
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <climits>
+#include <cstdarg>
+#include <cmath>
+#include <cfloat>
+#include <cstdlib>
+#include "ncl/nxsdefs.h"
+#include "ncl/nxsstring.h"
+
+using namespace std;
+
+// splits a string by whitespace and push the graphical strings to the back of r.
+//	Leading and trailing whitespace is lost ( there will be no empty strings added
+//		to the list.
+void NxsString::split(const std::string &s, std::list<std::string> * r)
+	{
+	NCL_ASSERT(r);
+	if (r == NULL)
+		return;
+	std::string current;
+	for (std::string::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
+		{
+		const char c = *sIt;
+		if (isgraph(c))
+			current.append(1, c);
+		else if (!current.empty())
+			{
+			r->push_back(current);
+			current.clear();
+			}
+		}
+	if (!current.empty())
+		r->push_back(current);
+	}
+
+int NxsString::index_in_vector(const std::string &t, const std::vector<std::string> &v)
+	{
+	std::vector<std::string>::const_iterator vIt = v.begin();
+	int i = 0;
+	for (; vIt != v.end(); ++vIt, ++i)
+		{
+		if (t == *vIt)
+			return i;
+		}
+	return -1;
+	}
+
+int NxsString::index_in_array(const std::string &t, const char * * v, const unsigned n)
+	{
+	if (n > 0 && v)
+		{
+		for (int i = 0; i < (int) n; ++i)
+			{
+			if (v[i] && t == v[i])
+				return i;
+			}
+		}
+	return -1;
+	}
+
+
+/// Strips  whitespace from the front and end of a string
+std::string NxsString::strip_surrounding_whitespace(const std::string & s)
+	{
+	std::string l = strip_leading_whitespace(s);
+	return strip_trailing_whitespace(l);
+	}
+
+std::string NxsString::strip_leading_whitespace(const std::string & s)
+	{
+	std::string	t;
+	t.reserve(s.length());
+	bool graphFound = false;
+	for (std::string::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
+		{
+		if (graphFound || isgraph(*sIt))
+			{
+			t.push_back(*sIt);
+			graphFound = true;
+			}
+		}
+	return t;
+	}
+
+
+std::string NxsString::strip_trailing_whitespace(const std::string & s)
+	{
+	std::string	t;
+	t.reserve(s.length());
+	bool graphFound = false;
+	for (std::string::const_reverse_iterator sIt = s.rbegin(); sIt != s.rend(); ++sIt)
+		{
+		if (graphFound || isgraph(*sIt))
+			{
+			t.push_back(*sIt);
+			graphFound = true;
+			}
+		}
+	return std::string(t.rbegin(), t.rend());
+	}
+
+/// Strips all whitespace
+std::string NxsString::strip_whitespace(const std::string & s)
+	{
+	std::string	t;
+	t.reserve(s.length());
+	for (std::string::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
+		{
+		if (isgraph(*sIt))
+			t.push_back(*sIt);
+		}
+	return t;
+	}
+
+/*!
+	Returns true if `o` points to a string that represents a long (and `o` has no other characters than the long).
+	if n is not NULL, then when the function returns true, *n will be the long.
+*/
+bool NxsString::to_long(const char *o, long *n)
+	{
+	if (o == NULL)
+		return false;
+	if (strchr("0123456789-+",*o) != NULL) // strtol skips leading whitespace, but we don't  do that in
+		{
+		char * pEnd;
+		const long i = strtol (o, &pEnd, 10);
+		if (*pEnd != '\0')
+			return false;
+		if (n != NULL)
+			*n = i;
+		return true;
+		}
+	return false;
+	}
+
+/*!
+	Returns true if `o` points to a string that represents a double (and `o` has no other characters than the long).
+	if n is not NULL, then when the function returns true, *n will be the long.
+*/
+bool NxsString::to_double(const char *o, double *n)
+	{
+	if (o == NULL)
+		return false;
+	if (strchr("0123456789-.+",*o) != NULL ) // strtol skips leading whitespace, but we don't  do that in
+		{
+		char * pEnd;
+		const double i = strtod (o, &pEnd);
+		if (*pEnd != '\0')
+			return false;
+		if (n != NULL)
+			*n = i;
+		return true;
+		}
+	return false;
+	}
+
+bool NxsString::case_insensitive_equals(const char *o, const char * t)
+	{
+	if (o == 0L || t == 0L)
+		return false;
+    for (; toupper(*o) == toupper(*t); ++o, ++t)
+        {
+        if (*o == '\0')
+            return true;
+        }
+    return false;
+	}
+
+/*!
+	Capitalizes every character in s.
+*/
+std::string & NxsString::to_upper(std::string &s)
+	{
+	for (std::string::iterator sIt = s.begin(); sIt != s.end(); sIt++)
+		*sIt = (char) toupper(*sIt);
+	return s;
+	}
+/*!
+	Capitalizes every character in s.
+*/
+std::string & NxsString::to_lower(std::string &s)
+	{
+	for (std::string::iterator sIt = s.begin(); sIt != s.end(); sIt++)
+		*sIt = (char) tolower(*sIt);
+	return s;
+	}
+
+/*!
+	Appends a string representation of the supplied double to the stored string and returns a reference to itself.
+*/
+NxsString &NxsString::operator+=(
+  const double d)	/* the double value to append */
+	{
+	char tmp[81];
+
+	// Create a C-string representing the supplied double value.
+	// The # causes a decimal point to always be output.
+	//
+	std::sprintf(tmp, "%#3.6f", d);
+	unsigned tmplen = (unsigned)strlen(tmp);
+
+	// If the C-string has a lot of trailing zeros, lop them off
+	//
+	for (;;)
+		{
+		if (tmplen < 3 || tmp[tmplen-1] != '0' || tmp[tmplen-2] == '.')
+			break;
+		tmp[tmplen-1] = '\0';
+		tmplen--;
+		}
+
+	append(tmp);
+	return *this;
+	}
+
+/*!
+	Adds `n' copies of the character `c' to the end of the stored string and returns a reference to itself.
+*/
+NxsString &NxsString::AddTail(
+  char c,		/* the character to use in the appended tail */
+  unsigned n)	/* the number of times `c' is to be appended */
+	{
+	char s[2];
+	s[0] = c;
+	s[1] = '\0';
+
+	for (unsigned i = 0; i < n; i++)
+		append(s);
+
+	return *this;
+	}
+
+#if defined(_MSC_VER)
+#	pragma warning(disable:4786)
+#	pragma warning(disable:4291)
+#	if _MSC_VER >= 1500
+#		include <cstdio>
+#		if !defined(vsnprintf)
+#			define vsnprintf _vsnprintf_s
+#		endif
+#		define sprintf sprintf_s
+#   else
+#       define vsnprintf _vsnprintf
+#   endif
+#endif
+
+
+/*!
+	Appends a printf-style formatted string onto the end of this NxsString and returns the number of characters added to the
+	string. For example, the following code would result in the string s being set to "ts-tv rate ratio = 4.56789":
+>
+	double kappa = 4.56789;
+	NxsString s;
+	s.PrintF("ts-tv rate ratio = %.5f", kappa);
+>
+*/
+int NxsString::PrintF(
+  const char *formatStr,	/* the printf-style format string */
+  ...)						/* other arguments referred to by the format string */
+  	{
+  	const int kInitialBufferSize = 256;
+  	char buf[kInitialBufferSize];
+
+	// Create a pointer to the list of optional arguments
+	//
+  	va_list argList;
+
+	// Set arg_ptr to the first optional argument in argList. The
+	// second argument (formatStr) is the last non-optional argument.
+	//
+  	va_start(argList, formatStr);
+
+	// If vsnprintf returns -1, means kInitialBufferSize was not large enough.
+	// In this case, only kInitialBufferSize bytes are written.
+	//
+  	int nAdded = vsnprintf(buf, kInitialBufferSize, formatStr, argList);
+
+	// Reset the argument list pointer
+	//
+  	va_end(argList);
+
+	// Currently, if formatted string is too long to fit into the supplied buf,
+	// just adding a terminating '\0' and returning the truncated string
+	// Need to think of a better solution
+	//
+	if (nAdded  < 0 || nAdded >= kInitialBufferSize)
+		buf[kInitialBufferSize - 1] = '\0';
+
+	*this << buf;
+
+  	return nAdded;
+  	}
+
+/*!
+	Returns true if the string is a abbreviation (or complete copy) of the argument `s'.
+*/
+bool NxsString::IsStdAbbreviation(
+  const NxsString &s,	/* the string for which the stored string is potentially an abbreviation */
+  bool respectCase)		/* if true, comparison will be case-sensitive */
+  const
+	{
+	if (empty())
+		return false;
+
+	// s is the unabbreviated comparison string
+	//
+	const unsigned slen = static_cast<unsigned const>(s.size());
+
+	// t is the stored string
+	//
+	const unsigned tlen = static_cast<unsigned const>(size());
+
+	// t cannot be an abbreviation of s if it is longer than s
+	//
+	if (tlen > slen)
+		return false;
+
+	// Examine each character in t and return false (meaning "not an abbreviation")
+	// if at any point the corresponding character in s is different
+	//
+	for (unsigned k = 0; k < tlen; k++)
+		{
+		if (respectCase)
+			{
+			if ((*this)[k] != s[k])
+				return false;
+			}
+		else if (toupper((*this)[k]) != toupper(s[k]))
+			return false;
+		}
+
+	return true;
+	}
+
+/*!
+	Returns true if the stored string is a case-insensitive abbreviation (or complete copy) of `s' and the stored string
+ 	has all of the characters that are in the initial capitalized portion of `s'. For example if `s' is "KAPpa" then
+	"kappa", "kapp", or "kap" (with any capitalization pattern) will return true and all other strings will return false.
+	Always returns false if the stored string has length of zero.
+*/
+bool NxsString::IsCapAbbreviation(
+  const NxsString &s)	/* the string for which the stored string is potentially an abbreviation */
+  const
+	{
+	if (empty())
+		return false;
+
+	// s is the unabbreviated comparison string
+	//
+	const unsigned slen = static_cast<unsigned>(s.size());
+
+	// t is the stored string
+	//
+	const unsigned tlen = static_cast<unsigned>(size());
+
+	// If the stored string is longer than s then it cannot be an abbreviation of s
+	//
+	if (tlen > slen)
+		return false;
+
+	unsigned k = 0;
+	for (; k < slen; k++)
+		{
+		if (isupper(s[k]))
+			{
+			// If still in the uppercase portion of s and we've run out of characters
+			// in t, then t is not a valid abbrevation of s
+			//
+			if (k >= tlen)
+				return false;
+
+			// If kth character in t is not equal to kth character in s, then
+			// t is not an abbrevation of s
+			//
+			char tokenChar = (char)toupper((*this)[k]);
+			if (tokenChar != s[k])
+				return false;
+			}
+		else if (!isalpha(s[k]))
+			{
+			// Get here if we are no longer in the upper case portion of s and
+			// s[k] is not an alphabetic character. This section is necessary because
+			// we are dealing with a section of s that is not alphabetical and thus
+			// we cannot tell whether this should be part of the abbrevation or not
+			// (i.e. we cannot tell if it is capitalized or not). In this case, we
+			// pretend that we are still in the upper case portion of s and return
+			// false if we have run out of characters in t (meaning that the abbreviation
+			// was too short) or we find a mismatch.
+			//
+			if (k >= tlen)
+				return false;
+
+			if ((*this)[k] != s[k])
+				return false;
+			}
+		else
+			{
+			// Get here if we are no longer in the upper case portion of s and
+			// s[k] is an alphabetic character. Just break because we have determined
+			// that t is in fact a valid abbreviation of s.
+			//
+			break;
+			}
+		}
+
+	// Check the lower case portion of s and any corresponding characters in t for mismatches
+	// Even though the abbreviation is valid up to this point, it will become invalid if
+	// any mismatches are found beyond the upper case portion of s
+	//
+	for (; k < tlen; k++)
+		{
+  		const char tokenChar = (char)toupper((*this)[k]);
+  		const char otherChar = (char)toupper(s[k]);
+		if (tokenChar != otherChar)
+			return false;
+		}
+
+	return true;
+	}
+
+/*!
+	Right-justifies `x' in a field `w' characters wide, using blank spaces to fill in unused portions on the left-hand
+	side of the field. Specify true for `clear_first' to first empty the string. Assumes `w' is large enough to
+	accommodate the string representation of `x'.
+*/
+NxsString &NxsString::RightJustifyLong(
+  long x,			/* long value to right justify */
+  unsigned int w,	/* width of field */
+  bool clear_first)	/* if true, initialize string first to empty string */
+	{
+	bool x_negative = (x < 0L ? true : false);
+	unsigned long xabs = (x_negative ? (-x) : x);
+	unsigned num_spaces = w;
+
+	// If w = 10 and x = 123, we need 7 blank spaces before x
+	// log10(123) is 2.09, indicating that x is at least 10^2 = 100 but not
+	// 10^3 = 1000, thus x requires at least 3 characters to display
+	//
+	unsigned x_width = (x == 0 ? 1 :1 + (int)log10((double)xabs));
+	if (x_negative)
+		x_width++;	// for the minus sign
+
+	NCL_ASSERT(x_width <= num_spaces);
+	num_spaces -= x_width;
+
+	if (clear_first)
+		erase();
+
+	for (unsigned k = 0; k < num_spaces; k++)
+		*this += ' ';
+
+	if (x_negative)
+		*this += '-';
+
+	*this += xabs;
+	return *this;
+	}
+
+/*!
+	Right-justifies `x' in a field `w' characters wide with precision `p', using blank spaces to fill in unused
+	portions on the left-hand side of the field. Specify true for `clear_first' to first empty the string. Assumes that
+	the specified width is enough to accommodate the string representation of `x'.
+*/
+NxsString &NxsString::RightJustifyDbl(
+  double x,				/* double value to right justify */
+  unsigned w,			/* width of field */
+  unsigned p,			/* precision to use when displaying `x' */
+  bool clear_first)		/* if true, initialize stored string first to the empty string */
+	{
+	if (clear_first)
+		erase();
+
+	char fmtstr[81];
+	sprintf(fmtstr, "%%.%df", p);
+	NxsString tmp;
+	tmp.PrintF(fmtstr, x);
+
+	NCL_ASSERT(w >= tmp.length());
+	unsigned num_spaces = w - (unsigned)tmp.length();
+
+	for (unsigned k = 0; k < num_spaces; k++)
+		*this += ' ';
+
+	*this += tmp;
+	return *this;
+	}
+
+/*!
+	Right-justifies `s' in a field `w' characters wide, using blank spaces to fill in unused portions on the left-hand
+	side of the field. Specify true for `clear_first' to first empty the string. Assumes that the specified width is
+	enough to accommodate `s'.
+*/
+NxsString &NxsString::RightJustifyString(
+  const NxsString &s,	/* string to right justify */
+  unsigned w,			/* width of field */
+  bool clear_first)		/* if true, initialize string first to the empty string */
+	{
+	if (clear_first)
+		erase();
+
+	NCL_ASSERT(w >= s.length());
+	unsigned num_spaces = w - (unsigned)s.length();
+
+	for (unsigned k = 0; k < num_spaces; k++)
+		*this += ' ';
+
+	*this += s;
+	return *this;
+	}
+void NxsString::blanks_to_underscores(std::string &s)
+	{
+	for (std::string::iterator sIt = s.begin(); sIt != s.end(); sIt++)
+		{
+		if (*sIt == ' ')
+			*sIt =  '_';
+		}
+	}
+
+void NxsString::add_nxs_quotes(std::string &s)
+	{
+	std::string withQuotes;
+	unsigned len = (unsigned)s.length();
+	withQuotes.reserve(len + 4);
+	withQuotes.append(1,'\'');
+	for (std::string::const_iterator sIt = s.begin(); sIt != s.end(); sIt++)
+		{
+		withQuotes.append(1, *sIt);
+		if (*sIt == '\'')
+			withQuotes.append(1,'\'');
+		}
+	withQuotes.append(1,'\'');
+	s.swap(withQuotes);
+	}
+
+NxsString::NxsQuotingRequirements NxsString::determine_quoting_requirements(const std::string & s)
+	{
+	NxsQuotingRequirements nrq = kNoQuotesNeededForNexus;
+	for (std::string::const_iterator sIt = s.begin(); sIt != s.end(); ++sIt)
+		{
+		if (!isgraph(*sIt))
+			{
+			if (*sIt != ' ')
+				return kSingleQuotesNeededForNexus;
+			nrq  = kUnderscoresSufficeForNexus;
+			}
+		else if (strchr("(){}\"-]/\\,;:=*`+<>", *sIt) != NULL)
+			{
+			// Get here if c is any NEXUS punctuation mark except left square bracket ([) or apostrophe (').
+			// [ and ' never get returned as punctuation by NxsToken,
+			// so we should never encounter them here.
+			//
+			return (s.length() > 1 ? kSingleQuotesNeededForNexus : kNoQuotesNeededForNexus);
+			}
+		else if (strchr("\'[_", *sIt) != NULL)
+			{
+			// Get here if c is either an apostrophe or left square bracket. Quotes are needed if one of these
+			// characters is all there is to this string
+			//
+			return kSingleQuotesNeededForNexus;
+			}
+		}
+	return nrq;
+	}
+
+/*!
+	Returns true if the string needs to be surrounded by single-quotes to make it a single nexus token.
+*/
+bool NxsString::QuotesNeeded() const
+	{
+	for (NxsString::const_iterator sIt = begin(); sIt != end(); sIt++)
+		{
+		char c = (*sIt);
+		if (!isgraph(c))
+			return true;
+		else if (strchr("(){}\"-]/\\,;:=*`+<>", c) != NULL && length() > 1)
+			return true;
+		else if (c == '\'' || c == '_' || c == '[')
+			return true;
+		}
+	return false;
+	}
+
+/*!
+	Converts any blank spaces found in the stored string to the underscore character.
+*/
+NxsString &NxsString::BlanksToUnderscores()
+	{
+	unsigned len = (unsigned)length();
+	for (unsigned k = 0; k < len; k++)
+		{
+		char &ch = at(k);
+		if (ch == ' ')
+			ch = '_';
+		}
+	return *this;
+	}
+
+/*!
+	Converts any underscore characters found in the stored string to blank spaces.
+*/
+NxsString &NxsString::UnderscoresToBlanks()
+	{
+	unsigned len = (unsigned)length();
+	for (unsigned k = 0; k < len; k++)
+		{
+		char &ch = at(k);
+		if (ch == '_')
+			ch = ' ';
+		}
+	return *this;
+	}
+
+/*!
+	Shortens stored string to `n' - 3 characters, making the last three characters "...". If string is already less than
+	`n' characters in length, this function has no effect. This is useful when it is desirable to show some of the
+	contents of a string, even when the string will not fit in its entirety into the space available for displaying it.
+	Assumes that `n' is at least 4.
+*/
+NxsString &NxsString::ShortenTo(
+  unsigned n)	/* maximum number of characters available for displaying the string */
+	{
+	NCL_ASSERT(n > 3);
+	if (length() <= static_cast<unsigned>(n))
+		return *this;
+
+	NxsString s;
+	for (NxsString::iterator sIt = begin(); sIt != end(); sIt++)
+		{
+		s += (*sIt);
+		if (s.length() >= n - 3)
+			break;
+		}
+	s += "...";
+
+	*this = s;
+	return *this;
+	}
+
+
+/*!
+	Returns true if the stored string can be interpreted as a double value, and returns false otherwise.
+*/
+bool NxsString::IsADouble() const
+	{
+	const char	*str			= c_str();
+	unsigned	i				= 0;
+	bool		hadDecimalPt	= false;
+	bool		hadExp			= false;
+	bool		hadDigit		= false;
+	bool		hadDigitInExp	= false;
+
+	//	First char can be -
+	//
+	if (str[i]=='-' || str[i] == '+')
+		i++;
+
+	while (str[i])
+		{
+		if (isdigit(str[i]))
+			{
+			//	Digits are always OK
+			//
+			if (hadExp)
+				hadDigitInExp = true;
+			else
+				hadDigit = true;
+			}
+		else if (str[i] == '.')
+			{
+			//	One decimal point is allowed and it must be before the exponent
+			//
+			if (hadExp || hadDecimalPt)
+				return false;
+			hadDecimalPt = true;
+			}
+		else if (str[i] == 'e' || str[i] == 'E')
+			{
+			//	One e is allowed, but it must be after at least one digit
+			//
+			if (hadExp || !hadDigit)
+				return false;
+			hadExp = true;
+			}
+		else if (str[i] == '-')
+			{
+			//	Another - is allowed if it is preceded by e
+			//
+			if (!hadExp || (str[i-1] != 'e' && str[i-1] != 'E')	)
+				return false;
+			}
+		else
+			return false;
+		i++;
+		}
+
+	if (hadExp)
+		{
+		if (hadDigitInExp)
+			return true;
+		return false;
+		}
+
+	if (hadDigit)
+		return true;
+	return false;
+	}
+
+/*!
+	Returns true if stored string can be interpreted as a long integer.
+*/
+bool NxsString::IsALong() const
+	{
+	const char *str	= c_str();
+	unsigned i		= 0;
+
+	//	First char can be -
+	//
+	if (str[i]=='-')
+		i++;
+
+	if (!isdigit(str[i]))
+		return false;
+
+	while (str[i])
+		{
+		if (!isdigit(str[i]))
+			return false;
+		i++;
+		}
+
+	return true;
+	}
+
+/*!
+	Returns true if the stored string is a non-case-sensitive copy of the argument `s'. Note: will return true if both the
+	stored string and `s' are empty strings.
+*/
+bool NxsString::EqualsCaseInsensitive(
+  const NxsString &s)	/* the comparison string */
+  const
+	{
+	unsigned k;
+	unsigned slen = (unsigned)s.size();
+	unsigned tlen = (unsigned)size();
+	if (slen != tlen)
+		return false;
+
+	for (k = 0; k < tlen; k++)
+		{
+  		if ((char)toupper((*this)[k]) != (char)toupper(s[k]))
+			return false;
+		}
+
+	return true;
+	}
+
+/*!
+	Creates a string representation of the hexadecimal version of the long integer `p'. For example, if `p' equals 123,
+	and if 2 was specified for `nFours', the resulting string would be "7B". If 4 was specified for `nFours', then the
+	resulting string would be "007B".
+*/
+NxsString NxsString::ToHex(
+  long p,			/* the value to display in hexadecimal */
+  unsigned nFours)	/* the number of hexadecimal digits to display */
+	{
+	NxsString s;
+	char decod[] = "0123456789ABCDEF";
+	for (int i = nFours - 1; i >= 0 ; i--)
+		{
+		unsigned long k = (p >> (4*i));
+		unsigned long masked = (k & 0x000f);
+		s += decod[masked];
+		}
+	return s;
+	}
+
+/*!
+	Checks to see if the stored string begins with upper case letters and, if so, returns all of the contiguous capitalized
+	prefix. If the stored string begins with lower case letters, an empty string is returned.
+*/
+NxsString NxsString::UpperCasePrefix() const
+	{
+	NxsString x;
+	unsigned i = 0;
+	while (i < size() && isupper((*this)[i]))
+		x += (*this)[i++];
+	return x;
+	}
+
+/*!
+	Converts the stored string to an unsigned int using the standard C function strtol, throwing NxsX_NotANumber if the
+	conversion fails. Returns UINT_MAX if the number is too large to fit in an unsigned (or was a negative number).
+*/
+unsigned NxsString::ConvertToUnsigned() const
+	{
+	long l = ConvertToLong();
+	if (l < 0 || l >= (long) INT_MAX)
+		return UINT_MAX;
+	return static_cast<unsigned> (l);
+	}
+
+/*!
+	Converts the stored string to an int using the standard C function strtol, throwing NxsX_NotANumber if the conversion
+	fails. Returns INT_MAX if the number is too large to fit in an int or -INT_MAX if it is too small.
+*/
+int NxsString::ConvertToInt() const
+	{
+	long l = ConvertToLong();
+	if (l == LONG_MAX || l > INT_MAX)
+		return INT_MAX;
+	if (l == -LONG_MAX || l <-INT_MAX)
+		return -INT_MAX;
+	return static_cast<int> (l);
+	}
+
+/*!
+	Converts the stored string to a long using the standard C function strtol, throwing NxsX_NotANumber if the conversion
+	fails.
+*/
+long NxsString::ConvertToLong() const
+	{
+	if (length() == 0 || !(isdigit(at(0)) || at(0) == '-'))
+		throw NxsX_NotANumber();
+	const char *b = c_str();
+	char *endP;
+	long l = strtol(b, &endP, 10);
+#if defined(_MSC_VER)
+	if ((l == 0 && (endP - b) == 0))
+		throw NxsX_NotANumber();
+#else
+	if (l == 0 && endP == b)
+		throw NxsX_NotANumber();
+#endif
+	return l;
+	}
+
+/*!
+	Converts the stored string to a double using the standard C function strtod, throwing NxsX_NotANumber if the conversion
+	fails. Returns DBL_MAX or -DBL_MAX if the number is out of bounds.
+*/
+double NxsString::ConvertToDouble() const
+	{
+	if (length() == 0)
+		throw NxsX_NotANumber();
+
+	char ch = at(0);
+	if (isdigit(ch) || ch == '-' || ch == '.'|| toupper(ch) == 'E')
+		{
+		const char *b = c_str();
+		char *endP;
+		double d = strtod(b, &endP);
+#if defined(_MSC_VER)
+		if ((d == 0.0 && (endP - b) == 0))
+			throw NxsX_NotANumber();
+#else
+		if (d == 0.0 && endP == b)
+			throw NxsX_NotANumber();
+#endif
+		if (d == HUGE_VAL)
+			return DBL_MAX;
+		if (d == -HUGE_VAL)
+			return -DBL_MAX;
+		return d;
+		}
+	throw NxsX_NotANumber();
+#if defined (DEMANDS_UNREACHABLE_RETURN)
+	return DBL_MAX;
+#endif
+	}
+
+/*!
+	Transforms the vector of NxsString objects by making them all lower case and then capitalizing the first portion of
+	them so that the capitalized portion is enough to uniquely specify each. Returns true if the strings are long enough
+	to uniquely specify each. Horrendously bad algorithm, but shouldn't be called often.
+*/
+bool SetToShortestAbbreviation(
+  NxsStringVector	&strVec,		/* vector of NxsString objects */
+  bool 				allowTooShort)	/* */
+	{
+	NxsStringVector upperCasePortion;
+	unsigned i;
+	for (i = 0; i < strVec.size(); i++)
+		{
+		// Change the next string to lower case
+		//
+		strVec[i].ToLower();
+
+		unsigned prefLen = 0;
+		NxsString pref;
+
+		if (prefLen >= strVec[i].size())
+			return false;
+		pref += (char) toupper(strVec[i][prefLen++]);
+		bool moreChars = true;
+
+		// Keep adding letters from the current string until pref is unique.
+		// Then add this pref to upperCasePortion (vector of previous prefs)
+		//
+		for (;moreChars;)
+			{
+			unsigned prevInd = 0;
+			for (; prevInd < upperCasePortion.size(); prevInd++)
+				{
+				if (pref == upperCasePortion[prevInd])
+					{
+					// 	Conflict  - both abbreviations need to grow
+					//
+					if (prefLen >= strVec[i].size())
+						{
+						if (allowTooShort)
+							{
+							if (prefLen < strVec[prevInd].size())
+								upperCasePortion[prevInd] += (char) toupper(strVec[prevInd][prefLen]);
+							moreChars = false;
+							break;
+							}
+						else
+							return false;
+						}
+					pref += (char) toupper(strVec[i][prefLen]);
+					if (prefLen >= strVec[prevInd].size())
+						{
+						if (allowTooShort)
+							{
+							prevInd = 0;
+							prefLen++;
+							break;
+							}
+						else
+							return false;
+						}
+					upperCasePortion[prevInd] += (char) toupper(strVec[prevInd][prefLen++]);
+					prevInd = 0;
+					break;
+					}
+				else
+					{
+					unsigned j;
+					for (j = 0; j < prefLen; j++)
+						{
+						if (pref[j] != upperCasePortion[prevInd][j])
+							break;
+						}
+					if (j == prefLen)
+						{
+						//	pref agrees with the first part of another abbreviation, lengthen it.
+						//
+						if (prefLen >= strVec[i].size())
+							{
+							if (allowTooShort)
+								{
+								moreChars = false;
+								break;
+								}
+							else
+								return false;
+							}
+						pref += (char) toupper(strVec[i][prefLen++]);
+						break;
+						}
+					}
+				}
+			if (prevInd == upperCasePortion.size() || !moreChars)
+				{
+				// Made it all the way through with no problems, add this
+				// prefix as command i's upper case portion
+				//
+				upperCasePortion.push_back(pref);
+				break;
+				}
+			}
+		}
+
+	for (i = 0; i < strVec.size(); i++)
+		{
+		for (unsigned j = 0; j < upperCasePortion[i].size(); j++)
+			strVec[i][j] = upperCasePortion[i][j];
+		}
+
+	return true;
+	}
+
+/*!
+	Returns a vector of NxsString objects that match the entire `testStr'.
+*/
+NxsStringVector GetVecOfPossibleAbbrevMatches(
+  const NxsString		&testStr,		/* string to match */
+  const NxsStringVector	&possMatches)	/* vector of possible matches */
+	{
+	NxsStringVector matches;
+	for (unsigned i = 0; i < possMatches.size(); i++)
+		{
+		if (testStr.Abbreviates(possMatches[i]))
+			matches.push_back(possMatches[i]);
+		}
+	return matches;
+	}
+
+/*!
+	Written to make it easy to initialize a vector of strings. Similar to the perl split function. Converts a string like
+	this -- "A|bro|ken strin|g" -- to a vector of strings with four elements:  "A", "bro", "ken string", and "g".
+*/
+NxsStringVector BreakPipeSeparatedList(
+  const NxsString &strList)	/* the string submitted for splitting */
+  	{
+	NxsString::const_iterator p = strList.begin();
+	NxsString ss;
+	NxsStringVector retVec;
+	for (;;)
+		{
+		bool done = (p == strList.end());
+		if (done || (*p == '|'))
+			{
+			retVec.push_back(ss);
+			ss.clear();
+			if (done)
+				break;
+			p++;
+			}
+		ss += *p;
+		p++;
+		}
+	return retVec;
+	}
diff --git a/src/nxstaxaassociationblock.cpp b/src/nxstaxaassociationblock.cpp
new file mode 100644
index 0000000..898dfa3
--- /dev/null
+++ b/src/nxstaxaassociationblock.cpp
@@ -0,0 +1,237 @@
+ 
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <iomanip>
+#include <cassert>
+#include <climits>
+#include <cstdlib>
+
+#include "ncl/nxstaxaassociationblock.h"
+#include "ncl/nxsreader.h"
+
+using namespace std;
+
+
+NxsTaxaAssociationBlock::NxsTaxaAssociationBlock()
+    :firstTaxaBlock(0L),
+    secondTaxaBlock(0L)
+    {
+    NCL_BLOCKTYPE_ATTR_NAME = "TAXAASSOCIATION";
+	Reset();
+	}
+
+NxsTaxaBlockAPI * NxsTaxaAssociationBlock::ProcessTaxaBlockName(const NxsString & value,  NxsToken &token) const {
+    assert(this->nexusReader);
+    NxsTaxaBlockAPI * cb = this->nexusReader->GetTaxaBlockByTitle(value.c_str(), NULL);
+    if (cb == NULL)
+        {
+        errormsg = "Unknown TAXA block (";
+        errormsg += value;
+        errormsg +=") referred to in the TAXA command";
+        throw NxsException(errormsg, token);
+        }
+    return cb;
+}
+
+void NxsTaxaAssociationBlock::HandleTaxaCommand(
+  NxsToken &token)
+{
+	if (!this->nexusReader)
+		NxsNCLAPIException("No NxsReader when reading TaxaAssociation block.");
+
+	token.GetNextToken();
+	this->firstTaxaBlock = this->ProcessTaxaBlockName(token.GetTokenReference(), token);
+	token.GetNextToken();
+	if (!token.Equals(","))
+	    {
+	    errormsg << "Expecting comma in the TAXA command, found \"" << token.GetTokenReference() << "\".";
+	    throw NxsException(errormsg, token);
+	    }
+	token.GetNextToken();
+	this->secondTaxaBlock = this->ProcessTaxaBlockName(token.GetTokenReference(), token);
+    NxsToken::DemandEndSemicolon(token, this->errormsg, "TAXA");
+}
+void NxsTaxaAssociationBlock::HandleAssociatesCommand(
+  NxsToken &token)
+{
+    if (this->firstTaxaBlock == 0L || this->secondTaxaBlock == 0L)
+        {
+        errormsg << "Expecting TAXA command to precede an ASSOCIATES command.";
+	    throw NxsException(errormsg, token);
+        }
+    token.GetNextToken();
+    for (;;)
+        {
+		std::set<unsigned> fSet;
+		while (!token.IsPunctuationToken() || !(token.Equals(";") || token.Equals(",") || token.Equals("/")))
+		    {
+		    try {
+		        this->firstTaxaBlock->GetIndicesForLabel(token.GetTokenReference(), &fSet);
+                }
+            catch(...)
+                {
+                errormsg << "Unrecognized taxon \"" << token.GetTokenReference() << "\" in ASSOCIATES command";
+                throw NxsException(errormsg, token);
+                }
+            token.GetNextToken();
+		    }
+		if (!token.Equals("/"))
+		    {
+            errormsg << "Expecting / in ASSOCIATES command, found \"" << token.GetTokenReference() << "\"";
+            throw NxsException(errormsg, token);
+		    }
+
+		if (fSet.empty())
+		    {
+            errormsg << "Expecting taxon labels from the first TAXA block before the / in ASSOCIATES command.";
+            throw NxsException(errormsg, token);
+		    }
+        token.GetNextToken();
+
+		std::set<unsigned> sSet;
+		
+		while (!token.IsPunctuationToken() || !(token.Equals(";") || token.Equals(",") || token.Equals("/")))
+		    {
+		    try {
+		        this->secondTaxaBlock->GetIndicesForLabel(token.GetTokenReference(), &sSet);
+                }
+            catch(...)
+                {
+                errormsg << "Unrecognized taxon \"" << token.GetTokenReference() << "\" in ASSOCIATES command";
+                throw NxsException(errormsg, token);
+                }
+            token.GetNextToken();
+		    }
+
+		if (!(token.Equals(";") || token.Equals(",")))
+		    {
+            errormsg << "Expecting , or ; in ASSOCIATES command, found \"" << token.GetTokenReference() << "\"";
+            throw NxsException(errormsg, token);
+		    }
+
+		if (sSet.empty())
+		    {
+            errormsg << "Expecting taxon labels from the second TAXA block after the / in ASSOCIATES command.";
+            throw NxsException(errormsg, token);
+		    }
+		
+		for (std::set<unsigned>::const_iterator fIt = fSet.begin(); fIt != fSet.end(); ++fIt)
+		    {
+		    this->AddAssociation(*fIt, sSet);
+		    }
+		if (token.Equals(";"))
+		    break;
+		token.GetNextToken();
+	    }
+}
+
+void NxsTaxaAssociationBlock::Read(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	isEmpty = false;
+
+	DemandEndSemicolon(token, "BEGIN TAXAASSOCIATION");
+
+	for (;;)
+		{
+		token.GetNextToken();
+		NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
+		if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
+			return;
+		if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
+			{
+			if (token.Equals("TAXA"))
+				HandleTaxaCommand(token);
+			else if (token.Equals("ASSOCIATES"))
+				HandleAssociatesCommand(token);
+			else
+				SkipCommand(token);
+			}
+		}
+	}
+
+void NxsTaxaAssociationBlock::Report(std::ostream &out) NCL_COULD_BE_CONST  /*v2.1to2.2 1 */
+{
+	out << '\n';
+	if (this->firstTaxaBlock && this->secondTaxaBlock)
+	    {
+        out << NCL_BLOCKTYPE_ATTR_NAME << " block contains the following:\n";
+        out << firstToSecond.size() << " associations between taxa in " << this->firstTaxaBlock->GetTitle() << " and " << this->secondTaxaBlock->GetTitle() << '\n';
+        out << secondToFirst.size() << " associations between taxa in " << this->secondTaxaBlock->GetTitle() << " and " << this->firstTaxaBlock->GetTitle() << '\n';
+	    }
+}
+
+void NxsTaxaAssociationBlock::Reset() 
+{
+	NxsBlock::Reset();
+    this->firstToSecond.clear();
+    this->secondToFirst.clear();
+    this->firstTaxaBlock = 0L;
+    this->secondTaxaBlock = 0L;
+}
+
+void NxsTaxaAssociationBlock::WriteAsNexus(std::ostream &out) const
+	{
+	if (this->firstTaxaBlock && this->secondTaxaBlock)
+	    {
+	    out << "BEGIN TAXAASSOCIATION;\n";
+        WriteBasicBlockCommands(out);
+
+        out << "    TAXA ";
+        std::string taxaBlockName = this->firstTaxaBlock->GetTitle();
+        out << NxsString::GetEscaped(taxaBlockName);
+        out << " , ";
+        taxaBlockName = this->secondTaxaBlock->GetTitle();
+        out << NxsString::GetEscaped(taxaBlockName);
+        out << ";\n";
+        
+        
+        out << "    ASSOCIATES\n        ";
+        bool firstAssoc = true;
+        for (AssociationMap::const_iterator ftsIt = this->firstToSecond.begin(); ftsIt != this->firstToSecond.end(); ++ftsIt)
+            {
+            if (!firstAssoc)
+                out << ",\n        ";
+            unsigned fTaxonInd = ftsIt->first;
+            std::string f = this->firstTaxaBlock->GetTaxonLabel(fTaxonInd);
+            out << NxsString::GetEscaped(f);
+            out << " / ";
+            const std::set<unsigned> & secSet = ftsIt->second;
+            for (std::set<unsigned>::const_iterator sIt = secSet.begin(); sIt != secSet.end(); ++sIt)
+                {
+                unsigned sTaxonInd = *sIt;
+                std::string s = this->secondTaxaBlock->GetTaxonLabel(sTaxonInd);
+                out << NxsString::GetEscaped(s) << ' ';
+                }
+            firstAssoc = false;
+            }
+        out << ";\n";
+        
+        out << "END;\n";
+        }
+	}
+
+NxsTaxaAssociationBlock *NxsTaxaAssociationBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
+	{
+	if (reader == NULL || idneeded != "TAXAASSOCIATION")
+		return NULL;
+	NxsTaxaAssociationBlock * nb  = new NxsTaxaAssociationBlock();
+	nb->SetImplementsLinkAPI(false);
+	return nb;
+	}
diff --git a/src/nxstaxablock.cpp b/src/nxstaxablock.cpp
new file mode 100644
index 0000000..1b7ae4b
--- /dev/null
+++ b/src/nxstaxablock.cpp
@@ -0,0 +1,633 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <climits>
+#include "ncl/nxstaxablock.h"
+#include "ncl/nxsreader.h"
+
+using namespace std;
+
+/*! \return (NTax - 1) or  UINT_MAX if NTax is 0
+*/
+unsigned NxsTaxaBlock::GetMaxIndex() const
+	{
+	unsigned nct = dimNTax;
+	if (nct == 0)
+		return UINT_MAX;
+	return nct - 1;
+	}
+unsigned NxsTaxaBlock::GetNumLabelsCurrentlyStored() const
+	{
+	return (unsigned)taxLabels.size();
+	}
+
+
+std::vector<std::string> NxsTaxaBlockAPI::GetAllLabels() const
+	{
+	const unsigned n = GetNTaxTotal();
+	std::vector<std::string> v(n);
+	for (unsigned i = 0; i < n; ++i)
+		{
+		NxsString nextLabel = GetTaxonLabel(i);  /*v2.1to2.2 4 */
+		v[i] = std::string(nextLabel.c_str());
+		}
+	return v;
+	}
+
+/* \returns a 1-based number of the taxon with label of `label` (not case-sensitive).
+	This is a low-level function not intended for widespread use (it is faster way to
+	query the label list because it does not throw exceptions or do the numeric interpretation
+	of labels).
+
+	\warning{does NOT apply the numeric interpretation of the label.}
+
+	\warning{ 1-based numbering}
+*/
+unsigned NxsTaxaBlock::TaxLabelToNumber(const std::string &label) const
+	{
+	std::string r(label.c_str());
+	NxsString::to_upper(r);
+	return CapitalizedTaxLabelToNumber(r);
+	}
+
+/* Used internally in reading of sets
+
+ \returns the number of indices that correspond to the label (and the number
+ of items that would be added to *inds if inds points to an empty set).
+*/
+unsigned NxsTaxaBlock::GetIndicesForLabel(const std::string &label, /*!< label, set name or string with the 1-based numeric representation of the object */
+	NxsUnsignedSet *inds) const /* The set of indices to add the taxa indices to (can be 0L). */
+	{
+	NxsString emsg;
+	const unsigned numb = TaxLabelToNumber(label);
+	if (numb != 0)
+		{
+		if (inds)
+			inds->insert(numb - 1);
+		return 1;
+		}
+	return GetIndicesFromSetOrAsNumber(label, inds, taxSets, GetMaxIndex(), "taxon");
+	}
+
+bool NxsTaxaBlock::AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds)
+	{
+	NxsString nlab(label.c_str());
+	const bool replaced = taxSets.count(nlab) > 0;
+	taxSets[nlab] = inds;
+	return replaced;
+	}
+/* Returns true if this set replaces an older definition.
+*/
+bool NxsTaxaBlock::AddNewPartition(const std::string &label, const NxsPartition & inds)
+	{
+	NxsString ls(label.c_str());
+	bool replaced = taxPartitions.count(ls) > 0;
+	taxPartitions[ls] = inds;
+	return replaced;
+	}
+
+/* Initializes NCL_BLOCKTYPE_ATTR_NAME to "TAXA" and dimNTax to 0.
+*/
+NxsTaxaBlock::NxsTaxaBlock()
+  	{
+	dimNTax	= 0;
+	NCL_BLOCKTYPE_ATTR_NAME		= "TAXA";
+	}
+
+NxsTaxaBlock::~NxsTaxaBlock()
+	{}
+
+/*! Other than the commands handled by NxsBlock::HandleBasicBlockCommands(), this
+	function will deal with Dimensions and call NxsTaxaBlock::HandleTaxLabels()
+	to parse the TaxLabels commands.
+
+	All other commands will be skipped
+*/
+void NxsTaxaBlock::Read(
+  NxsToken &token)	/* the token used to read from in */
+	{
+	Reset();
+	isEmpty				= false;
+	isUserSupplied		= true;
+
+	DemandEndSemicolon(token, "BEGIN TAXA");
+
+	for (;;)
+		{
+		token.GetNextToken();
+		NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
+		if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
+			return;
+		if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
+			{
+			if (token.Equals("DIMENSIONS"))
+				{
+				token.GetNextToken();
+				if (!token.Equals("NTAX"))
+					{
+					errormsg = "Expecting NTAX keyword, but found ";
+					errormsg += token.GetToken();
+					errormsg += " instead";
+					throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+					}
+				DemandEquals(token, "after NTAX");
+				dimNTax = DemandPositiveInt(token, "NTAX");
+				taxLabels.reserve(dimNTax);
+				DemandEndSemicolon(token, "DIMENSIONS");
+				}	// if (token.Equals("DIMENSIONS"))
+			else if (token.Equals("TAXLABELS"))
+				HandleTaxLabels(token);
+			else
+				SkipCommand(token);
+			}
+		}	// GetNextToken loop
+	}
+
+/*! Resets the taxLabels. \throws NxsException for illegal or duplicated labels.
+
+	All other commands will be skipped.
+*/
+void NxsTaxaBlock::HandleTaxLabels(NxsToken &token)
+	{
+	if (dimNTax == 0)
+		{
+		errormsg = "NTAX must be specified before TAXLABELS command";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+	taxLabels.clear();
+	labelToIndex.clear();
+	for (unsigned i = 0; i < dimNTax; i++)
+		{
+		token.GetNextToken();
+		try
+			{
+			NxsString t = token.GetToken();
+			AddTaxonLabel(t);
+			}
+		catch (const NxsException & x)
+			{
+			throw NxsException(x.msg, token);
+			}
+		}
+	DemandEndSemicolon(token, "TAXLABELS");
+	}
+
+/* This function outputs a brief report of the contents of this taxa block. Overrides the abstract virtual function in
+	the base class.
+*/
+void NxsTaxaBlock::Report(
+  std::ostream &out) NCL_COULD_BE_CONST /* the output stream to which to write the report */ /*v2.1to2.2 1 */
+	{
+	out << endl;
+	out << NCL_BLOCKTYPE_ATTR_NAME << " block contains ";
+	if (dimNTax == 0)
+		{
+		out << "no taxa" << endl;
+		return;
+		}
+	if (dimNTax == 1)
+		out << "1 taxon" << endl;
+	else
+		out << dimNTax << " taxa" << endl;
+	for (unsigned k = 0; k < dimNTax; k++)
+		out << "    " << (k+1) << "    " << GetTaxonLabel(k) << endl;
+	}
+
+/* Writes contents of this block in NEXUS format to `out'.
+*/
+void NxsTaxaBlock::WriteAsNexus(std::ostream &out) const
+	{
+	out << "BEGIN TAXA;\n";
+	WriteBasicBlockCommands(out);
+	out << "    DIMENSIONS NTax = " << dimNTax << ";\n";
+	this->WriteTaxLabelsCommand(out);
+	WriteSkippedCommands(out);
+	out << "END;\n";
+	}
+
+/* Writes the NEXUS TaxLabels commands
+*/
+void NxsTaxaBlock::WriteTaxLabelsCommand(std::ostream &out) const
+	{
+	const unsigned nLabels = this->GetNumTaxonLabels();
+	if (nLabels > 0)
+		{
+		out << "    TAXLABELS";
+		for (NxsStringVector::const_iterator kIt = taxLabels.begin(); kIt != taxLabels.end(); ++kIt)
+			out << ' ' << NxsString::GetEscaped(*kIt);
+		out << ";\n";
+		}
+	}
+
+/* Flushes taxonLabels and sets dimNTax to 0 in preparation for reading a new TAXA block.
+*/
+void NxsTaxaBlock::Reset()
+	{
+	NxsBlock::Reset();
+	taxLabels.clear();
+	labelToIndex.clear();
+	dimNTax = 0;
+	inactiveTaxa.clear();
+	taxSets.clear();
+	taxPartitions.clear();
+	}
+
+/* Adds taxon label 's' to end of list of taxon labels and increments dimNTax by 1.
+
+	\returns the (0-based) index of taxon label just added.
+*/
+unsigned NxsTaxaBlock::AddTaxonLabel(
+  const std::string & rs)	/* the taxon label to add */
+	{
+	unsigned ind = (unsigned)taxLabels.size();
+	NxsString s(rs.c_str());
+	std::string x(rs.c_str());
+	NxsString::to_upper(x);
+	CheckCapitalizedTaxonLabel(x);
+	taxLabels.push_back(s);
+	labelToIndex[x] = ind;
+	return ind;
+	}
+
+/*  No action if the label `s` could be added to list of taxon labels
+
+	\throws a NxsException if the label `s` is illegal, and
+	\throws a DuplicatedLabelNxsException if the label is already in the block
+*/
+void NxsTaxaBlock::CheckCapitalizedTaxonLabel(
+  const std::string &s) const /*!< potential taxon label to check */
+  {
+	unsigned ind = (unsigned)taxLabels.size();
+	if (dimNTax < ind)
+		{
+		NxsString e = "Number of stored labels exceeds the NTax specified.";
+		//e << "Number of stored labels (" << taxLabels.size() << ") exceeds the NTax specified (" << dimNTax<<").";
+		throw NxsException(e);
+		}
+	if (CapitalizedTaxLabelToNumber(s) != 0)
+		{
+		NxsString e = "TaxLabels cannot be repeated. The label ";
+		e << s << " has already been stored.";
+		throw DuplicatedLabelNxsException(e);
+		}
+	if (s.length() == 1 && NxsString::IsNexusPunctuation(s[0]))
+		{
+		NxsString e = "Illegal TaxLabel found:\n";
+		e << s << "\n TaxLabels cannot be punctuation.";
+		throw NxsException(e);
+		}
+	}
+
+
+void NxsTaxaBlock::ChangeTaxonLabel(
+  unsigned i,	/* the taxon label number to change */
+  NxsString s)	/* the string used to replace label i */ /*v2.1to2.2 4 */
+	{
+	if (i >= (unsigned)taxLabels.size())
+		{
+		NxsString e = "The label for taxon ";
+		e << (i+1) << " cannot be changed, because the only " << (unsigned)taxLabels.size() << " taxlabel(s) have been assigned.";
+		throw NxsNCLAPIException(e);
+		}
+	RemoveTaxonLabel(i);
+	std::string x(s.c_str());
+	NxsString::to_upper(x);
+	CheckCapitalizedTaxonLabel(x);
+	taxLabels[i] = NxsString(s.c_str()); /* odd construct for v2.1->v2.2 translation */
+	labelToIndex[x] = i;
+	}
+
+void NxsTaxaBlock::RemoveTaxonLabel(
+  unsigned i)	/* the taxon label number to remove */
+	{
+	std::string oldLabel(taxLabels[i].c_str());
+	NxsString::to_upper(oldLabel);
+	labelToIndex.erase(oldLabel);
+	taxLabels[i] = NxsString();
+	}
+
+/* Returns the length of the longest taxon label stored. Useful for formatting purposes in outputting the data matrix
+	(i.e., you want the left edge of the matrix to line up).
+*/
+unsigned NxsTaxaBlock::GetMaxTaxonLabelLength()
+	{
+	NxsStringVector::const_iterator tlIt = taxLabels.begin();
+	unsigned maxlen = 0;
+	for (; tlIt < taxLabels.end(); ++tlIt)
+		{
+		const unsigned thislen = (unsigned)tlIt->size();
+		if (thislen > maxlen)
+			maxlen = thislen;
+		}
+	return maxlen;
+	}
+
+/* Returns the label for taxon 'i'. where i is in the range [0-dimNTax)
+*/
+NxsString NxsTaxaBlock::GetTaxonLabel(unsigned i) const  /*v2.1to2.2 4 */
+	{
+	if (i >= dimNTax)
+		{
+		NxsString e = "The  taxon index ";
+		e << i  << " is out of range.  Only " << dimNTax << " taxa in block.";
+		throw NxsNCLAPIException(e);
+		}
+	if (i < (unsigned)taxLabels.size())
+		return taxLabels[i];
+	NxsString s;
+	s += (i + 1);
+	return s; /*the number is a default label*/
+	}
+
+/*!
+	Returns true if taxonLabels[i] contains embedded spaces and thus should be surrounded by single quotes if output is
+	NEXUS format.
+*/
+bool NxsTaxaBlock::NeedsQuotes(
+  unsigned i)	/* the taxon label number in question */
+	{
+	const NxsString x(GetTaxonLabel(i).c_str());
+	return x.QuotesNeeded();
+	}
+
+/*! Returns true if taxon label equal to 's' can be found in the taxonLabels list, and returns false otherwise.
+*/
+bool NxsTaxaBlock::IsAlreadyDefined(
+  const std::string & s)	/* the s to attempt to find in the taxonLabels list */
+	{
+	return (TaxLabelToNumber(s) != 0);
+	}
+
+/*!
+	Returns a (0-based) index of taxon named 's' in taxonLabels list. If taxon named 's' cannot be found, or if there are no
+	labels currently stored in the taxonLabels list, throws NxsX_NoSuchTaxon exception.
+*/
+unsigned NxsTaxaBlock::FindTaxon(
+  const NxsString &s) const /* the string to attempt to find in the taxonLabels list */  /*v2.1to2.2 4 */
+	{
+	unsigned k = TaxLabelToNumber(s);
+	if (k == 0)
+		throw NxsTaxaBlock::NxsX_NoSuchTaxon();
+	return (k - 1);
+	}
+
+/*! Returns number of taxon labels currently stored.
+*/
+unsigned NxsTaxaBlock::GetNumTaxonLabels() const
+	{
+	return (unsigned)taxLabels.size();
+	}
+
+/*! Sets dimNTax to n.
+*/
+void NxsTaxaBlock::SetNtax(
+  unsigned n)	/* the number of taxa */
+	{
+	dimNTax = n;
+	if (taxLabels.size() > dimNTax)
+		{
+		for (unsigned i = dimNTax; i < taxLabels.size(); i++)
+			RemoveTaxonLabel(i);
+		taxLabels.resize(dimNTax);
+		}
+	else
+		taxLabels.reserve(dimNTax);
+	}
+
+NxsTaxaBlock *NxsTaxaBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
+	{
+	if (reader == NULL || idneeded != "TAXA")
+		return NULL;
+	NxsTaxaBlock * nb = new NxsTaxaBlock();
+	nb->SetImplementsLinkAPI(false);
+	return nb;
+	}
+
+NxsTaxaBlockAPI * NxsTaxaBlockSurrogate::GetTaxaBlockPtr(int *status) const
+	{
+	if (status)
+		*status = GetTaxaLinkStatus();
+	return taxa;
+	}
+
+
+void NxsTaxaBlockSurrogate::SetTaxaLinkStatus(NxsBlock::NxsBlockLinkStatus s)
+	{
+	if (taxaLinkStatus & NxsBlock::BLOCK_LINK_USED)
+		{
+		throw NxsNCLAPIException("Resetting a used taxaLinkStatus");
+		}
+	taxaLinkStatus = s;
+	}
+
+
+void NxsTaxaBlockSurrogate::SetTaxaBlockPtr(NxsTaxaBlockAPI *c, NxsBlock::NxsBlockLinkStatus s)
+	{
+	SetTaxaLinkStatus(s);
+	taxa = c;
+	}
+
+/*only used it the linkAPI is enabled*/
+void NxsTaxaBlockSurrogate::HandleLinkTaxaCommand(NxsToken & token)
+	{
+	token.GetNextToken();
+	const std::map<std::string, std::string> kv = token.ProcessAsSimpleKeyValuePairs("LINK");
+	std::map<std::string, std::string>::const_iterator pairIt = kv.begin();
+	for (;pairIt != kv.end(); ++pairIt)
+		{
+		NxsTaxaBlockAPI *entryTaxa = taxa;
+		int entryTaxaLinkStatus = taxaLinkStatus;
+		NxsString key(pairIt->first.c_str());
+		key.ToUpper();
+		NxsString value(pairIt->second.c_str());
+		if (key == "TAXA")
+			{
+			if (taxa && !taxa->GetID().EqualsCaseInsensitive(value))
+				{
+				if (GetTaxaLinkStatus() & NxsBlock::BLOCK_LINK_USED)
+					{
+					NxsString errormsg = "LINK to a Taxa block must occur before commands that use a taxa block";
+					throw NxsException(errormsg, token);
+					}
+				SetTaxaBlockPtr(NULL, NxsBlock::BLOCK_LINK_UNINITIALIZED);
+				}
+			if (!taxa)
+				{
+				if (!nxsReader)
+					{
+					NxsString errormsg =  "API Error: No nxsReader during parse in NxsTaxaBlockSurrogate::HandleLinkTaxaCommand";
+					throw NxsNCLAPIException(errormsg, token);
+					}
+				NxsTaxaBlockAPI * cb = nxsReader->GetTaxaBlockByTitle(value.c_str(), NULL);
+				if (cb == NULL)
+					{
+					NxsString errormsg = "Unknown TAXA block (";
+					errormsg += value;
+					errormsg +=") referred to in the LINK command";
+					taxa = entryTaxa;
+					taxaLinkStatus = entryTaxaLinkStatus;
+					throw NxsException(errormsg, token);
+					}
+				SetTaxaBlockPtr(cb, NxsBlock::BLOCK_LINK_FROM_LINK_CMD);
+				}
+			}
+		else
+			{
+			NxsString errormsg = "Skipping unknown LINK subcommand: ";
+			errormsg += pairIt->first.c_str();
+			nxsReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+			errormsg.clear(); //this token pos will be off a bit.
+			}
+		}
+	}
+void NxsTaxaBlockSurrogate::WriteLinkTaxaCommand(std::ostream &out) const
+	{
+	if (taxa && !(taxa->GetTitle().empty()))
+		out << "    LINK TAXA = " << NxsString::GetEscaped(taxa->GetTitle()) << ";\n";
+	}
+
+/* This function is called by derived classes right before they start to parse a command
+	that requires a Taxa block.
+	If a taxa block has not been set at this point, and one cannot be created then
+	a NxsException will be generated.
+
+	This enables lazy initialization of the taxa field.
+*/
+void NxsTaxaBlockSurrogate::AssureTaxaBlock(bool allocBlock, NxsToken &token, const char *cmd)
+	{
+	if (!allocBlock)
+		{
+		if (taxa != NULL)
+			return;
+		if (!nxsReader)
+			{
+			NxsString  errormsg =  "API Error: No nxsReader during parse in NxsTaxaBlockSurrogate::AssureTaxaBlock";
+			throw NxsNCLAPIException(errormsg, token);
+			}
+		unsigned nTb;
+		NxsTaxaBlockAPI * cb = nxsReader->GetTaxaBlockByTitle(NULL, &nTb);
+		if (cb == NULL)
+			{
+			NxsString errormsg =  "TAXA Block has been not been read, but a ";
+			if (cmd)
+				errormsg += cmd;
+			errormsg += " command (which requires a TAXA block) has been encountered. Either add a TAXA block or (for blocks other than the TREES block) you may use a \"DIMENSIONS NEWTAXA NTAX= ...\" command to introduces taxa.";
+			throw NxsException(errormsg, token);
+			}
+		if (nTb > 1)
+			{
+			NxsString errormsg =  "Multiple TAXA Blocks have been read (or implied using NEWTAXA in other blocks) and a ";
+			if (cmd)
+				errormsg += cmd;
+			errormsg += " command (which requires a TAXA block) has been encountered";
+			std::string bn = token.GetBlockName();
+			if (!bn.empty())
+				{
+				errormsg += " in a ";
+				errormsg += bn;
+				errormsg += " block.";
+				}
+			errormsg += ".\nThis can be caused by reading multiple files. It is possible that\neach file is readable separately, but cannot be read unambiguously when read in sequence.\n";
+			errormsg += "One way to correct this is to use the\n    TITLE some-unique-name-here ;\ncommand in the TAXA block and an accompanying\n    LINK TAXA=the-unique-title-goes here;\n";
+			errormsg += "command to specify which TAXA block is needed.";
+			cb->WarnDangerousContent(errormsg, token);
+			}
+		taxa = cb;
+		return;
+		}
+	if (nxsReader != NULL)
+		{
+		NxsTaxaBlockFactory * tbf = nxsReader->GetTaxaBlockFactory();
+		if (tbf)
+			{
+			std::string s("TAXA");
+			taxa = tbf->GetBlockReaderForID(s, nxsReader, &token);
+			ownsTaxaBlock = true;
+			passedRefOfOwnedBlock = false;
+			taxaLinkStatus = NxsBlock::BLOCK_LINK_TO_IMPLIED_BLOCK;
+			}
+		}
+	if (taxa == NULL)
+		{
+		taxa = new NxsTaxaBlock();
+		ownsTaxaBlock = true;
+		passedRefOfOwnedBlock = false;
+		taxaLinkStatus = NxsBlock::BLOCK_LINK_TO_IMPLIED_BLOCK;
+		}
+	}
+
+bool NxsTaxaBlockSurrogate::SurrogateSwapEquivalentTaxaBlock(NxsTaxaBlockAPI * tb)
+	{
+		NxsTaxaBlockFactory * tbf = nxsReader->GetTaxaBlockFactory();
+		if (this->taxa && this->ownsTaxaBlock && tbf)
+			tbf->BlockError(taxa);
+		this->SetTaxaBlockPtr(tb, NxsBlock::BLOCK_LINK_EQUIVALENT_TO_IMPLIED);
+		return true;
+	}
+
+void NxsTaxaBlockSurrogate::ResetSurrogate()
+	{
+	if (ownsTaxaBlock)
+		{
+		if (!passedRefOfOwnedBlock)
+			{
+			if (taxa != NULL && nxsReader != NULL)
+				{
+				NxsTaxaBlockFactory * factory = nxsReader->GetTaxaBlockFactory();
+				if (factory)
+					{
+					factory->BlockError(taxa);
+					taxa = NULL;
+					}
+				}
+			if (taxa)
+				delete taxa;
+			}
+		taxa = NULL; /*potential memory leak, but a reference should be held, by the nexus reader.*/
+		ownsTaxaBlock = false;
+		taxaLinkStatus = NxsBlock::BLOCK_LINK_UNINITIALIZED;
+		}
+	newtaxa = false;
+	passedRefOfOwnedBlock = false;
+	}
+
+VecBlockPtr NxsTaxaBlockSurrogate::GetCreatedTaxaBlocks()
+	{
+	VecBlockPtr vbp;
+	if (newtaxa && taxa)
+		{
+		vbp.push_back(taxa);
+		passedRefOfOwnedBlock = true;
+		}
+	return vbp;
+	}
+/*!
+	Called when TAXLABELS command needs to be parsed from within the UNALIGNED block. Deals with everything after the
+	token TAXLABELS up to and including the semicolon that terminates the TAXLABELS command.
+*/
+void NxsTaxaBlockSurrogate::HandleTaxLabels(
+  NxsToken & token)	/* the token used to read from `in' */
+	{
+	if (!newtaxa || taxa == NULL)
+		{
+		NxsString errormsg = "NEWTAXA must have been specified in DIMENSIONS command to use the TAXLABELS command in a ";
+		errormsg << GetBlockName() << " block";
+		throw NxsException(errormsg, token);
+		}
+	taxa->HandleTaxLabels(token);
+	}
diff --git a/src/nxstoken.cpp b/src/nxstoken.cpp
new file mode 100644
index 0000000..50eda02
--- /dev/null
+++ b/src/nxstoken.cpp
@@ -0,0 +1,1106 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <cstdlib>
+#include <cassert>
+#include <sstream>
+#include "ncl/nxstoken.h"
+
+using namespace std;
+
+#define NEW_NXS_TOKEN_READ_CHAR
+
+
+
+
+/*!
+ * Parses a ProcessedNxsCommand assuming that it has the form:
+ *		cmd_name opt_name = opt_val multi_opt_name = (opt_val_1 opt_val2) ;
+ * Errors are produced if opt_names (or multi_opt_names) are repeated within a command.
+ */
+NxsSimpleCommandStrings ProcessedNxsToken::ParseSimpleCmd(
+  const std::vector<ProcessedNxsToken> &pnc,
+	bool convertToLower)
+{
+	NxsSimpleCommandStrings nscs;
+	if (pnc.empty())
+		return nscs;
+
+
+	std::vector<ProcessedNxsToken>::const_iterator wordIt = pnc.begin();
+
+	nscs.cmdName = wordIt->GetToken();
+	if (convertToLower)
+		NxsString::to_lower(nscs.cmdName);
+	nscs.cmdPos = wordIt->GetFilePosInfoConstRef();
+	++wordIt;
+
+	std::string key;
+
+	NxsString errorMsg;
+	NxsTokenPosInfo keyPos = nscs.cmdPos;
+	bool eqRead = false;
+	for (; wordIt != pnc.end(); ++wordIt)
+		{
+		std::string w = wordIt->GetToken();
+		if (convertToLower)
+			NxsString::to_lower(w);
+		if (key.empty())
+			{
+			key = w;
+			if (nscs.HasKey(key))
+				{
+				errorMsg << "Command option (" << key << ") repeated in the " << nscs.cmdName << " command.";
+				throw NxsException(errorMsg, wordIt->GetFilePosInfoConstRef());
+				}
+			keyPos = wordIt->GetFilePosInfoConstRef();
+			}
+		else if (!eqRead)
+			{
+			if (w != "=")
+				{
+				errorMsg << "Expecting an = after the  " << key << " command option of the  " << nscs.cmdName << " command.";
+				throw NxsException(errorMsg, wordIt->GetFilePosInfoConstRef());
+				}
+			eqRead = true;
+			}
+		else {
+			if (w == "(")
+				{
+				++wordIt;
+				w = wordIt->GetToken();
+				std::vector<std::string> vals;
+				NxsSimpleCommandStrings::MatString mat;
+				if (w == "(")
+					{
+					while (w != ")")
+						{
+						if (w != "(")
+							{
+							errorMsg << "Expecting a ( to begin another row of values in the " << key << " command option of the  " << nscs.cmdName << " command.";
+							throw NxsException(errorMsg, keyPos);
+							}
+
+						++wordIt;
+						w = wordIt->GetToken();
+						while (wordIt != pnc.end())
+							{
+							w = wordIt->GetToken();
+							if (convertToLower)
+								NxsString::to_lower(w);
+							if (w == ")")
+								break;
+							vals.push_back(w);
+							++wordIt;
+							}
+						if (wordIt == pnc.end())
+							{
+							errorMsg << "Expecting a ) to end the list of values for the " << key << " command option of the  " << nscs.cmdName << " command.";
+							throw NxsException(errorMsg, keyPos);
+							}
+						++wordIt;
+						mat.push_back(vals);
+						vals.clear();
+						w = wordIt->GetToken();
+						if (wordIt == pnc.end())
+							{
+							errorMsg << "Expecting a ) to end the list of values for the " << key << " command option of the  " << nscs.cmdName << " command.";
+							throw NxsException(errorMsg, keyPos);
+							}
+						}
+					nscs.matOpts[key] = NxsSimpleCommandStrings::MatFromFile(wordIt->GetFilePosInfoConstRef(), mat);
+					}
+				else
+					{
+					while (wordIt != pnc.end())
+						{
+						w = wordIt->GetToken();
+						if (convertToLower)
+							NxsString::to_lower(w);
+						if (w == ")")
+							break;
+						vals.push_back(w);
+						++wordIt;
+						}
+					if (wordIt == pnc.end())
+						{
+						errorMsg << "Expecting a ) to end the list of values for the " << key << " command option of the  " << nscs.cmdName << " command.";
+						throw NxsException(errorMsg, keyPos);
+						}
+					nscs.multiOpts[key] = NxsSimpleCommandStrings::MultiValFromFile(wordIt->GetFilePosInfoConstRef(), vals);
+					}
+				}
+			else
+				{
+				std::string val = w;
+				nscs.opts[key] = NxsSimpleCommandStrings::SingleValFromFile( wordIt->GetFilePosInfoConstRef(), val);
+				}
+			eqRead = false;
+			key.clear();
+			}
+		}
+	if (eqRead)
+		{
+		errorMsg << "Expecting a value after the = sign in the  " << key << " command option of the  " << nscs.cmdName << " command.";
+		throw NxsException(errorMsg, keyPos);
+		}
+	if (!key.empty())
+		{
+		errorMsg << "Expecting an = after the  " << key << " command option of the  " << nscs.cmdName << " command.";
+		throw NxsException(errorMsg, keyPos);
+		}
+	return nscs;
+}
+
+
+
+
+NxsX_UnexpectedEOF::NxsX_UnexpectedEOF(NxsToken &token)
+	:NxsException("Unexpected end-of-file", token)
+	{
+	std::string t = token.GetBlockName();
+	NxsString::to_upper(t);
+	if (!t.empty())
+		msg << " while reading " << t << " block.";
+	}
+
+/*! Writes the command `c` (with all embedded comments) a terminating ; will be written if any tokens are written. */
+bool WriteCommandAsNexus(std::ostream & out, const ProcessedNxsCommand &c)
+	{
+	if (c.empty())
+		return false;
+	out << "   "; /* command indentation  - 1 space*/
+	for(ProcessedNxsCommand::const_iterator cIt = c.begin(); cIt != c.end(); ++cIt)
+		{
+		out << ' ';
+		cIt->WriteAsNexus(out);
+		}
+	out << ";";
+	return true;
+	}
+
+
+
+/*!
+	Convenience function.
+ 	Raises an aprropriate NxsException (by appending  `contextString` to the phrase Unexpected ; "), if incrementing
+		`tokIt` makes it equal to `endIt`
+*/
+void ProcessedNxsToken::IncrementNotLast(std::vector<ProcessedNxsToken>::const_iterator & tokIt, const std::vector<ProcessedNxsToken>::const_iterator &endIt, const char * contextString)
+	{
+	++tokIt;
+	if (tokIt == endIt)
+		{
+		NxsString errormsg = "Unexpected ; ";
+		if (contextString)
+			errormsg.append(contextString);
+		--tokIt;
+		throw NxsException(errormsg, *tokIt);
+		}
+	}
+/*!
+ 	Advance the stream and store it in nextCharInStream.  Deal with the 3 ways of specifying return charaters
+		(nextCharInStream will be set to \n if any of the return styles are found)
+*/
+inline void NxsToken::AdvanceToNextCharInStream()
+	{
+	if (nextCharInStream == EOF)
+		return;
+	nextCharInStream  = (signed char) (inputStream.rdbuf())->sbumpc();
+	posOffBy = -1;
+	if (nextCharInStream == 13 || nextCharInStream == 10)
+		{
+		if(nextCharInStream == 13)
+			{
+			if ((inputStream.rdbuf())->sgetc() == 10)	//peeks at the next char
+				{
+				(inputStream.rdbuf())->sbumpc();
+				posOffBy = -2;
+				}
+			}
+		nextCharInStream = '\n';
+		}
+	}
+
+
+#if defined(NEW_NXS_TOKEN_READ_CHAR)
+/*!
+	returns the character that had been stored in nextCharInStream, but also calls AdvanceToNextCharInStream() so
+	nextCharInStream is advanced.
+	Does all of the fileposition bookkeeping.
+	Throws an NxsX_UnexpectedEOF exception if eof is found but eofAllowed is false.
+*/
+inline char NxsToken::GetNextChar()
+	{
+	//
+	// 	Why this was changed:  calls to tellg seem slow and unnecessary - we're storing filepos in terms of the
+	//	number of times we call sbumpc().
+	//	if we go back to getting the filepos via in.tellg(), remember to call it
+	//	twice after both sgetc() calls in the case of the \13\10 endline
+
+	signed char ch = nextCharInStream;
+	AdvanceToNextCharInStream();
+	if(ch == EOF)
+		{
+		atEOF = true;
+		if (eofAllowed)
+			return '\0';
+		throw NxsX_UnexpectedEOF(*this);
+		}
+	if(ch == '\n')
+		{
+		fileLine++;
+		fileColumn = 1L;
+		atEOL = true;
+		return '\n';
+		}
+	if (ch == '\t')
+		fileColumn += 4 - ((fileColumn - 1)%4);	//@assumes that tab will be 4 in the editor we use
+	else
+		fileColumn++;
+	atEOL = false;
+	return ch;
+	}
+
+#else //	if  !defined(NEW_NXS_TOKEN_READ_CHAR)
+
+/*!
+	Reads next character from in and does all of the following before returning it to the calling function:
+~
+	o if character read is either a carriage return or line feed, the variable line is incremented by one and the
+	  variable col is reset to zero
+	o if character read is a carriage return, and a peek at the next character to be read reveals that it is a line
+	  feed, then the next (line feed) character is read
+	o if either a carriage return or line feed is read, the character returned to the calling function is '\n' if
+	  character read is neither a carriage return nor a line feed, col is incremented by one and the character is
+	  returned as is to the calling function
+	o in all cases, the variable filepos is updated using a call to the tellg function of istream.
+~
+*/
+inline char NxsToken::GetNextChar()
+	{
+	int ch = inputStream.get();
+	int failed = inputStream.bad();
+	if (failed)
+		{
+		errormsg = "Unknown error reading data file (check to make sure file exists)";
+		throw NxsException(errormsg);
+		}
+
+	if (ch == 13 || ch == 10)
+		{
+		fileLine++;
+		fileColumn = 1L;
+
+		if (ch == 13 && (int)inputStream.peek() == 10)
+			ch = inputStream.get();
+
+		atEOL = 1;
+		}
+	else if (ch == EOF)
+		atEOF = 1;
+	else
+		{
+		fileColumn++;
+		atEOL = 0;
+		}
+
+#	if defined(__DECCXX)
+		filepos = 0L;
+#	else
+		file_pos filepos = inputStream.tellg();
+#	endif
+
+	if (atEOF)
+		return '\0';
+	else if (atEOL)
+		return '\n';
+	else
+		return (char)ch;
+	}
+#endif
+
+std::map<std::string, std::string> NxsToken::ParseAsSimpleKeyValuePairs(const ProcessedNxsCommand & tv, const char *cmdName)
+	{
+	std::map<std::string, std::string> kv;
+	std::string key;
+	ProcessedNxsCommand::const_iterator tvIt = tv.begin();
+	ProcessedNxsCommand::const_iterator prevIt;
+	ProcessedNxsCommand::const_iterator endIt = tv.end();
+	while (tvIt != endIt)
+		{
+		key = tvIt->GetToken().c_str();
+		prevIt = tvIt++;
+		if (tvIt == endIt || tvIt->GetToken() != "=")
+			{
+			NxsString m("Expecting = after ");
+			m += key.c_str();
+			m += " in ";
+			m += cmdName;
+			m += " command.";
+			if (tvIt == endIt)
+				throw NxsException(m, prevIt->GetFilePosition(), prevIt->GetLineNumber(), prevIt->GetColumnNumber());
+			else
+				throw NxsException(m, tvIt->GetFilePosition(), tvIt->GetLineNumber(), tvIt->GetColumnNumber());
+			}
+		prevIt = tvIt++;
+		if (tvIt == endIt)
+			{
+			NxsString m("Expecting a value after = in the  ");
+			m += key.c_str();
+			m += " subcommand of the in ";
+			m += cmdName;
+			m += " command.";
+			throw NxsException(m, prevIt->GetFilePosition(), prevIt->GetLineNumber(), prevIt->GetColumnNumber());
+			}
+		kv[key] = tvIt->GetToken();
+		tvIt++;
+		}
+	return kv;
+	}
+
+
+std::vector<ProcessedNxsToken> NxsToken::Tokenize(const std::string & toTokenize)
+    {
+	std::string bogusStr = toTokenize;
+	bogusStr.append(1, '\n');
+	std::istringstream bogusStream(bogusStr);
+	NxsToken bogusToken(bogusStream);
+	bogusToken.GetNextToken();
+	std::vector<ProcessedNxsToken>  tokenVec;
+	while (!bogusToken.AtEOF())
+		{
+		tokenVec.push_back(ProcessedNxsToken(bogusToken));
+		bogusToken.GetNextToken();
+		}
+    return tokenVec;
+    }
+
+
+/*!
+	Reads until ";" and fills the vector of ProcessedNxsToken objects.
+	Note the ";" is not included in the ProcessedNxsCommand, but it can be assumed that the semicolon followed.
+	The NxsToken objects file position will reflect the location of the semicolon.
+*/
+void NxsToken::ProcessAsCommand(ProcessedNxsCommand *tokenVec)
+	{
+	;
+	while (!this->Equals(";"))
+		{
+		if (tokenVec)
+			tokenVec->push_back(ProcessedNxsToken(*this));
+		this->GetNextToken();
+		}
+	}
+
+
+/*!
+	Returns copy of s but with quoting according to the NEXUS Standard (single quotes around the token and all internal
+		single quotes replaced with a pair of single quotes.
+*/
+std::string NxsToken::GetQuoted(const std::string &s)
+	{
+	std::string withQuotes;
+	withQuotes.reserve(s.length() + 4);
+	withQuotes.push_back('\'');
+	for (NxsString::const_iterator sIt = s.begin(); sIt != s.end(); sIt++)
+		{
+		withQuotes.push_back(*sIt);
+		if (*sIt == '\'')
+			withQuotes.push_back('\'');
+		}
+	withQuotes.push_back('\'');
+	return withQuotes;
+	}
+
+/*!
+	Advances the token, and returns the unsigned int that the token represents
+
+ 	Sets errormsg and raises a NxsException on failure.
+	`contextString` is used in error messages:
+		"${contextString} must be a number greater than 0"
+*/
+unsigned NxsToken::DemandPositiveInt(NxsToken &token, NxsString & errormsg, const char *contextString)
+	{
+	token.GetNextToken();
+	int i = -1;
+	try {
+	    i = token.GetToken().ConvertToInt();
+	    }
+	catch (NxsString::NxsX_NotANumber &x)
+	    {
+	    }
+//	int i = atoi(token.GetToken().c_str());
+	if (i <= 0)
+		{
+		errormsg.assign(contextString);
+		errormsg += " must be a number greater than 0. Found ";
+		errormsg += token.GetToken();
+		errormsg += " instead";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+	return (unsigned) i;
+	}
+
+
+/*!
+	Advances the token, and returns the unsigned int that the token represents
+
+ 	Sets errormsg and raises a NxsException on failure.
+	`contextString` is used in error messages:
+		"Expecting ';' to terminate the ${contextString} command"
+*/
+void NxsToken::DemandEndSemicolon(NxsToken &token, NxsString & errormsg, const char *contextString)
+	{
+	token.GetNextToken();
+	if (!token.Equals(";"))
+		{
+		errormsg = "Expecting ';' to terminate the ";
+		errormsg += contextString;
+		errormsg += " command, but found ";
+		errormsg += token.GetToken();
+		errormsg += " instead";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+	}
+/*!
+	Returns copy of s but with quoting according to the NEXUS Standard (single quotes around the token and all internal
+		single quotes replaced with a pair of single quotes.
+*/
+bool NxsToken::NeedsQuotes(const std::string &s)
+	{
+	for (std::string::const_iterator sIt = s.begin(); sIt != s.end(); sIt++)
+		{
+		const char &c = (*sIt);
+		if (!isgraph(c))
+			return true;
+		else if (strchr("\'[(){}\"-]/\\,;:=*`+<>", c) != NULL)
+			{
+			// ' and [ always need quotes.  other punctuation needs quotes if it is in a word of length > 1
+			if (c == '\'' || c == '[')
+				return true;
+			return (s.length() > 1);
+			}
+		}
+	return false;
+	}
+
+
+
+/*!
+	Sets atEOF and atEOL to false, comment and token to the empty string, fileColumn and fileLine to 1, filepos to 0,
+	labileFlags to 0 and saved and special to the null character. Initializes the istream reference data
+	member in to the supplied istream `i'.
+*/
+NxsToken::NxsToken(
+  istream &i)	/* the istream object to which the token is to be associated */
+  : inputStream(i),
+	eofAllowed(true)
+	{
+	posOffBy = 0;
+	atEOF		= false;
+	atEOL		= false;
+	comment.clear();
+	fileColumn	= 1L;
+	fileLine	= 1L;
+	labileFlags	= 0;
+	saved		= '\0';
+	special		= '\0';
+
+	whitespace[0]  = ' ';
+	whitespace[1]  = '\t';
+	whitespace[2]  = '\n';
+	whitespace[3]  = '\0';
+#	if defined(NEW_NXS_TOKEN_READ_CHAR)
+		nextCharInStream = 'a';	//anything other than EOF will work
+		AdvanceToNextCharInStream();
+#	endif
+    this->isPunctuationFn = &(NxsString::IsNexusPunctuation);
+	}
+
+/*!
+	Nothing needs to be done; all objects take care of deleting themselves.
+*/
+NxsToken::~NxsToken()
+	{
+	}
+
+/*!
+	Reads rest of comment (starting '[' already input) and acts accordingly. If comment is an output comment, and if
+	an output stream has been attached, writes the output comment to the output stream. Otherwise, output comments are
+	simply ignored like regular comments.
+	If the labileFlag bit saveCommandComments is in effect, and we are NOT in the middle of a token then the comment
+		(without the [] braces) will be stored in token.
+	All other comments are stored as embeddedComments.
+	Returns true if a command comment was read and stored as the token
+*/
+bool NxsToken::GetComment()
+	{
+	// Set comment level to 1 initially.  Every ']' encountered reduces
+	// level by one, so that we know we can stop when level becomes 0.
+	//
+	NxsString currentComment;
+	bool command = false;
+
+	bool formerEOFAllowed = eofAllowed;
+	eofAllowed = false;
+	try
+		{
+		char ch = GetNextChar();
+		// See if first character is the output comment symbol ('!')
+		// or command comment symbol (&)
+		//
+		int printing = 0;
+		if (ch == '!')
+			printing = 1;
+		else if (ch == '&' && (labileFlags & saveCommandComments) && token.empty())
+			command = true;
+		currentComment.push_back(ch);
+		if (ch != ']')
+			{
+			int level = 1;
+			for(;;)
+				{
+				ch = GetNextChar();
+				if (ch == ']')
+					{
+					level--;
+					if (level == 0)
+						break;
+					}
+				else if (ch == '[')
+					level++;
+				currentComment.push_back(ch);
+				}
+
+			if (printing)
+				{
+				// Allow output comment to be printed or displayed in most appropriate
+				// manner for target operating system
+				//
+				NxsString foroutput(currentComment.c_str() + 1);
+				comment = foroutput;
+				OutputComment(foroutput);
+				}
+			if (command)
+				token = currentComment;
+			else
+				embeddedComments.push_back(NxsComment(currentComment, GetFileLine(), GetFileColumn()));
+			}
+		}
+	catch (NxsX_UnexpectedEOF & x)
+		{
+		x.msg << " (end-of-file inside comment)";
+		eofAllowed = formerEOFAllowed;
+		throw x;
+		}
+	eofAllowed = formerEOFAllowed ;
+	return command;
+	}
+
+/*!
+	Reads rest of a token surrounded with curly brackets (the starting '{' has already been input) up to and including
+	the matching '}' character. All nested curly-bracketed phrases will be included.
+*/
+void NxsToken::GetCurlyBracketedToken()
+	{
+	bool formerEOFAllowed = eofAllowed;
+	eofAllowed = false;
+	try
+		{
+		int level = 1;
+		while(level > 0)
+			{
+			char ch = GetNextChar();
+			if (ch == '}')
+				level--;
+			else if (ch == '{')
+				level++;
+			AppendToToken(ch);
+			}
+		}
+	catch (NxsX_UnexpectedEOF & x)
+		{
+		x.msg << " (end-of-file inside {} braced statement)";
+		eofAllowed = formerEOFAllowed;
+		throw x;
+		}
+	eofAllowed = formerEOFAllowed ;
+	}
+
+/*!
+	Gets remainder of a double-quoted NEXUS word (the first double quote character was read in already by GetNextToken).
+	This function reads characters until the next double quote is encountered. Tandem double quotes within a
+	double-quoted NEXUS word are not allowed and will be treated as the end of the first word and the beginning of the
+	next double-quoted NEXUS word. Tandem single quotes inside a double-quoted NEXUS word are saved as two separate
+	single quote characters; to embed a single quote inside a double-quoted NEXUS word, simply use the single quote by
+	itself (not paired with another tandem single quote).
+*/
+void NxsToken::GetDoubleQuotedToken()
+	{
+	bool formerEOFAllowed = eofAllowed;
+	eofAllowed = false;
+	try
+		{
+		for(;;)
+			{
+			char ch = GetNextChar();
+			if (ch == '\"')
+				break;
+			else
+				AppendToToken(ch);
+			}
+		}
+	catch (NxsX_UnexpectedEOF & x)
+		{
+		x.msg << " (end-of-file inside \" quoted statement)";
+		eofAllowed = formerEOFAllowed;
+		throw x;
+		}
+	eofAllowed = formerEOFAllowed ;
+	}
+
+/*!
+	Gets remainder of a quoted NEXUS word (the first single quote character was read in already by GetNextToken). This
+	function reads characters until the next single quote is encountered. An exception occurs if two single quotes occur
+	one after the other, in which case the function continues to gather characters until an isolated single quote is
+	found. The tandem quotes are stored as a single quote character in the token NxsString.
+*/
+void NxsToken::GetQuoted()
+	{
+	bool formerEOFAllowed = eofAllowed;
+	eofAllowed = false;
+	long fl = fileLine;
+	long fc = fileColumn;
+
+	try
+		{
+		for(;;)
+			{
+			char ch = GetNextChar();
+			if (ch == '\'')
+				{
+				ch = GetNextChar();
+				if (ch == '\'')
+					AppendToToken(ch);
+				else
+					{
+					saved = ch;
+					break;
+					}
+				}
+			else
+				AppendToToken(ch);
+			}
+		}
+	catch (NxsX_UnexpectedEOF & x)
+		{
+		x.msg << " (end-of-file inside \' quoted token that started on line " << fl<< ", column " <<fc << ')';
+		eofAllowed = formerEOFAllowed;
+		throw x;
+		}
+	eofAllowed = formerEOFAllowed ;
+	}
+
+/*!
+Like GetQuoted, but emits both single quotes in the event of an
+internal single quote. Important for the double parsing of trees by the NxsTreesBlock
+*/
+void NxsToken::GetQuotedWithInternalSingleQuotesDoubled()
+	{
+	bool formerEOFAllowed = eofAllowed;
+	eofAllowed = false;
+	long fl = fileLine;
+	long fc = fileColumn;
+
+	try
+		{
+		for(;;)
+			{
+			char ch = GetNextChar();
+			if (ch == '\'')
+				{
+				ch = GetNextChar();
+				if (ch == '\'')
+					{
+					AppendToToken(ch);
+					AppendToToken(ch);
+					}
+				else
+					{
+					saved = ch;
+					break;
+					}
+				}
+			else
+				AppendToToken(ch);
+			}
+		}
+	catch (NxsX_UnexpectedEOF & x)
+		{
+		x.msg << " (end-of-file inside \' quoted token that started on line " << fl<< ", column " <<fc << ')';
+		eofAllowed = formerEOFAllowed;
+		throw x;
+		}
+	eofAllowed = formerEOFAllowed ;
+	}
+
+/*!
+	Reads rest of parenthetical token (starting '(' already input) up to and including the matching ')' character.  All
+	nested parenthetical phrases will be included.
+*/
+void NxsToken::GetParentheticalToken()
+	{
+	// Set level to 1 initially.  Every ')' encountered reduces
+	// level by one, so that we know we can stop when level becomes 0.
+	//
+	int level = 1;
+	std::vector<NxsComment> prevEmbedded = embeddedComments;
+	embeddedComments.clear();
+	char ch;
+	ch = GetNextChar();
+	for(;;)
+		{
+		if (atEOF)
+			break;
+
+		if (ch == '\'')
+			{
+			AppendToToken('\'');
+			GetQuotedWithInternalSingleQuotesDoubled();
+			AppendToToken('\'');
+			ch = saved;
+			saved = '\0';
+			if (atEOF)
+				{
+				if (ch == ')' && level == 1)
+					{
+					AppendToToken(')');
+					break;
+					}
+				else
+					{
+					NxsX_UnexpectedEOF x(*this);
+					x.msg << "(end-of-file inside () statement)";
+					}
+				}
+			continue;
+			}
+		if (ch == '[')
+			{
+			GetComment();
+			assert(embeddedComments.size() == 1);
+			AppendToToken('[');
+			const std::string & body =  embeddedComments[0].GetText();
+			token.append(body.begin(), body.end());
+			AppendToToken(']');
+			embeddedComments.clear();
+
+			}
+		else
+			{
+			if (ch == ')')
+				level--;
+			else if (ch == '(')
+				level++;
+
+			AppendToToken(ch);
+			}
+
+		if (level == 0)
+			break;
+		ch = GetNextChar();
+		}
+	embeddedComments = prevEmbedded;
+	}
+
+/*!
+	Returns true if token begins with the capitalized portion of `s' and, if token is longer than `s', the remaining
+	characters match those in the lower-case portion of `s'. The comparison is case insensitive. This function should be
+	used instead of the Begins function if you wish to allow for abbreviations of commands and also want to ensure that
+	user does not type in a word that does not correspond to any command.
+*/
+bool NxsToken::Abbreviation(
+  NxsString s)	/* the comparison string */
+	{
+	int k;
+	int slen = (int)s.size();
+	int tlen = (int)token.size();
+	char tokenChar, otherChar;
+
+	// The variable mlen refers to the "mandatory" portion
+	// that is the upper-case portion of s
+	//
+	int mlen;
+	for (mlen = 0; mlen < slen; mlen++)
+		{
+		if (!isupper(s[mlen]))
+			break;
+		}
+
+	// User must have typed at least mlen characters in
+	// for there to even be a chance at a match
+	//
+	if (tlen < mlen)
+		return false;
+
+	// If user typed in more characters than are contained in s,
+	// then there must be a mismatch
+	//
+	if (tlen > slen)
+		return false;
+
+	// Check the mandatory portion for mismatches
+	//
+	for (k = 0; k < mlen; k++)
+		{
+		tokenChar = (char)toupper( token[k]);
+		otherChar = s[k];
+		if (tokenChar != otherChar)
+			return false;
+		}
+
+	// Check the auxiliary portion for mismatches (if necessary)
+	//
+	for (k = mlen; k < tlen; k++)
+		{
+		tokenChar = (char)toupper( token[k]);
+		otherChar = (char)toupper( s[k]);
+		if (tokenChar != otherChar)
+			return false;
+		}
+
+	return true;
+	}
+
+/*!
+	Returns true if token NxsString begins with the NxsString `s'. This function should be used instead of the Equals
+	function if you wish to allow for abbreviations of commands.
+*/
+bool NxsToken::Begins(
+  NxsString s,			/* the comparison string */
+  bool respect_case)	/* determines whether comparison is case sensitive */
+	{
+	unsigned k;
+	char tokenChar, otherChar;
+
+	unsigned slen = (unsigned)s.size();
+	if (slen > token.size())
+		return false;
+
+	for (k = 0; k < slen; k++)
+		{
+		if (respect_case)
+			{
+			tokenChar = token[k];
+			otherChar = s[k];
+			}
+		else
+			{
+			tokenChar = (char)toupper( token[k]);
+			otherChar = (char)toupper( s[k]);
+			}
+
+		if (tokenChar != otherChar)
+			return false;
+		}
+
+	return true;
+	}
+
+/*!
+	Reads characters from in until a complete token has been read and stored in token. GetNextToken performs a number
+	of useful operations in the process of retrieving tokens:
+~
+	o any underscore characters encountered are stored as blank spaces (unless the labile flag bit preserveUnderscores
+	  is set)
+	o if the first character of the next token is an isolated single quote, then the entire quoted NxsString is saved
+	  as the next token
+	o paired single quotes are automatically converted to single quotes before being stored
+	o comments are handled automatically (normal comments are treated as whitespace and output comments are passed to
+	  the function OutputComment which does nothing in the NxsToken class but can be overridden in a derived class to
+	  handle these in an appropriate fashion)
+	o leading whitespace (including comments) is automatically skipped
+	o if the end of the file is reached on reading this token, the atEOF flag is set and may be queried using the AtEOF
+	  member function
+	o punctuation characters are always returned as individual tokens (see the Maddison, Swofford, and Maddison paper
+	  for the definition of punctuation characters) unless the flag ignorePunctuation is set in labileFlags,
+	  in which case the normal punctuation symbols are treated just like any other darkspace character.
+~
+	The behavior of GetNextToken may be altered by using labile flags. For example, the labile flag saveCommandComments
+	can be set using the member function SetLabileFlagBit. This will cause comments of the form [&X] to be saved as
+	tokens (without the square brackets), but only for the aquisition of the next token. Labile flags are cleared after
+	each application.
+*/
+void NxsToken::GetNextToken()
+	{
+	ResetToken();
+
+	char ch = ' ';
+	if (saved == '\0' || IsWhitespace(saved))
+		{
+		// Skip leading whitespace
+
+		while( IsWhitespace(ch) && !atEOF)
+			ch = GetNextChar();
+		saved = ch;
+		}
+
+	for(;;)
+		{
+		// Break now if singleCharacterToken mode on and token length > 0.
+		//
+		if (labileFlags & singleCharacterToken && !token.empty())
+			break;
+
+		// Get next character either from saved or from input stream.
+		//
+		if (saved != '\0')
+			{
+			ch = saved;
+			saved = '\0';
+			}
+		else
+			ch = GetNextChar();
+		// Break now if we've hit EOF.
+		//
+		if (atEOF)
+			break;
+		if (strchr("\n\r \t", ch) != NULL)//!isgraph(ch))
+			{
+			if (ch == '\n' && labileFlags & newlineIsToken)
+				{
+				if (token.empty())
+					{
+					atEOL = 1;
+					AppendToToken(ch);
+					}
+				else
+					{
+					// Newline came after token, save newline until next time when it will be
+					// reported as a separate token.
+					//
+					atEOL = 0;
+					saved = ch;
+					}
+				break;
+				}
+			else
+				{
+				// Break only if we've begun adding to token (remember, if we hit a comment before a token,
+				// there might be further white space between the comment and the next token).
+				//
+				if (!token.empty()) 
+				    {
+				    if (ch == ' ' && (labileFlags & NxsToken::spaceDoesNotBreakToken))
+				        AppendToToken(ch);
+				    else
+    					break;
+					}
+				}
+			}
+		else if (ch == '_')
+			{
+			// If underscores are discovered in unquoted tokens, they should be
+			// automatically converted to spaces.
+			//
+			if (!(labileFlags & preserveUnderscores))
+				ch = ' ';
+			AppendToToken(ch);
+			}
+
+		else if (ch == '[')
+			{
+			// Get rest of comment and deal with it, but notice that we only break if the comment ends a token,
+			// not if it starts one (comment counts as whitespace). In the case of command comments
+			// (if saveCommandComment) GetComment will add to the token NxsString, causing us to break because
+			// token.size() will be greater than 0.
+			comment.clear();
+			if (GetComment())
+				break;
+			}
+		else if (IsPunctuation(ch))
+			{
+			if (ch == '(' && (labileFlags & parentheticalToken))
+				{
+				AppendToToken(ch);
+				GetParentheticalToken();
+				}
+			else if (ch == '{' && (labileFlags & curlyBracketedToken))
+				{
+				AppendToToken(ch);
+				GetCurlyBracketedToken();
+				}
+			else if (ch == '\"' && (labileFlags & doubleQuotedToken))
+				GetDoubleQuotedToken();
+			else if (ch == '\'' && token.empty())
+				GetQuoted();
+			else
+				{
+				//save if we have started a token, consider the punctuation to
+				// be the full token.
+				if (token.size() > 0)
+					saved = ch;
+				else
+					AppendToToken(ch);
+				}
+			break;
+			}
+		else
+			AppendToToken(ch);
+		}
+
+	labileFlags = 0;
+	}
+
+/*!
+	Strips whitespace from currently-stored token. Removes leading, trailing, and embedded whitespace characters.
+*/
+void NxsToken::StripWhitespace()
+	{
+	NxsString s;
+	for (unsigned j = 0; j < token.size(); j++)
+		{
+		if (IsWhitespace( token[j]))
+			continue;
+		s += token[j];
+		}
+	token = s;
+	}
+
+/*!
+	Converts all alphabetical characters in token to upper case.
+*/
+void NxsToken::ToUpper()
+	{
+	for (unsigned i = 0; i < token.size(); i++)
+		token[i] = (char)toupper(token[i]);
+	}
+
+
+void NxsToken::UseNewickTokenization(bool v)
+    {
+    if (v)
+        {
+        this->isPunctuationFn = &(NxsString::IsNewickPunctuation);
+        }
+    else
+        {
+        this->isPunctuationFn = &(NxsString::IsNexusPunctuation);
+        }
+    }    
diff --git a/src/nxstreesblock.cpp b/src/nxstreesblock.cpp
new file mode 100644
index 0000000..a239142
--- /dev/null
+++ b/src/nxstreesblock.cpp
@@ -0,0 +1,2146 @@
+//	Copyright (C) 1999-2003 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+#include <climits>
+#include <Rcpp.h>
+#include <sstream>
+#include <stack>
+
+#include "ncl/nxstreesblock.h"
+#include "ncl/nxsreader.h"
+using namespace std;
+#define REGRESSION_TESTING_GET_TRANS_TREE_DESC 0
+#define DEBUGGING_TREES_BLOCK 0
+enum PrevTreeTokenDesc
+		{
+		NXS_TREE_OPEN_PARENS_TOKEN,
+		NXS_TREE_CLOSE_PARENS_TOKEN,
+		NXS_TREE_COMMA_TOKEN,
+		NXS_TREE_CLADE_NAME_TOKEN,
+		NXS_TREE_COLON_TOKEN,
+		NXS_TREE_BRLEN_TOKEN
+		};
+
+NxsSimpleNode * NxsSimpleNode::FindTaxonIndex(unsigned leafIndex)
+{
+	if (leafIndex == taxIndex)
+		return this;
+	NxsSimpleNode *n = lChild;
+	while (n)
+		{
+		NxsSimpleNode * r = n->FindTaxonIndex(leafIndex);
+		if (r)
+			return r;
+		n = n->rSib;
+		}
+	return NULL;
+}
+
+//Makes the leaf with taxIndex == leafIndex a child of the root of the tree
+// \returns the node that is the new child of the root.
+NxsSimpleNode * NxsSimpleTree::RerootAt(unsigned leafIndex)
+{
+	NxsSimpleNode * newRoot = NULL;
+	if (root)
+		{
+		if (leafIndex < leaves.size())
+			newRoot = leaves[leafIndex];
+		if (newRoot == NULL)
+			newRoot = root->FindTaxonIndex(leafIndex);
+		}
+	if (newRoot == NULL)
+		{
+		NxsString eMsg;
+		eMsg << "Reroot failed. Leaf number " << (leafIndex + 1) << " was not found in the tree.";
+		throw NxsNCLAPIException(eMsg);
+		}
+	return RerootAtNode(newRoot);
+}
+
+NxsSimpleNode * NxsSimpleTree::RerootAtNode(NxsSimpleNode *newRoot)
+{
+	NxsSimpleNode * p = newRoot->edgeToPar.parent;
+	if (!p || p == root)
+		return newRoot;
+	std::stack<NxsSimpleNode *> toFlip;
+	while (p != root)
+		{
+		toFlip.push(p);
+		p = p->edgeToPar.parent;
+		}
+	while (!toFlip.empty())
+		{
+		NxsSimpleNode *subRoot = toFlip.top();
+		toFlip.pop();
+		FlipRootsChildToRoot(subRoot);
+		}
+	return newRoot;
+}
+void NxsSimpleTree::FlipRootsChildToRoot(NxsSimpleNode *subRoot)
+{
+	std::vector<NxsSimpleNode *> rc = root->GetChildren();
+	if (rc.size() < 2)
+		{
+		NCL_ASSERT(!rc.empty());
+		NCL_ASSERT(rc[0] == subRoot);
+		/* root has degree 1 delete it */
+		std::vector<NxsSimpleNode *> tmp;
+		tmp.swap(allNodes);
+		allNodes.reserve(tmp.size() - 1);
+		for (std::vector<NxsSimpleNode *>::const_iterator nIt = tmp.begin(); nIt != tmp.end(); ++nIt)
+			{
+			if (*nIt != root)
+				allNodes.push_back(*nIt);
+			}
+		delete root;
+		root = subRoot;
+		subRoot->edgeToPar.parent = NULL;
+		return;
+		}
+
+	if (rc.size() == 2)
+		{
+		/* root has degree 2 delete it */
+
+		NxsSimpleNode * formerSib = subRoot->rSib;
+		if (formerSib == NULL)
+			formerSib = root->lChild;
+		NCL_ASSERT(formerSib != subRoot);
+
+		std::vector<NxsSimpleNode *> tmp;
+		tmp.swap(allNodes);
+		allNodes.reserve(tmp.size() - 1);
+		for (std::vector<NxsSimpleNode *>::const_iterator nIt = tmp.begin(); nIt != tmp.end(); ++nIt)
+			{
+			if (*nIt != root)
+				allNodes.push_back(*nIt);
+			}
+		delete root;
+		root = NULL;
+
+		formerSib->edgeToPar.parent = subRoot;
+		if (formerSib->edgeToPar.defaultEdgeLen)
+			{
+			if (!subRoot->edgeToPar.defaultEdgeLen)
+				{
+				formerSib->edgeToPar.defaultEdgeLen = false;
+				formerSib->edgeToPar.hasIntEdgeLens = subRoot->edgeToPar.hasIntEdgeLens;
+				formerSib->edgeToPar.iEdgeLen = subRoot->edgeToPar.iEdgeLen;
+				formerSib->edgeToPar.dEdgeLen = subRoot->edgeToPar.dEdgeLen;
+				}
+			}
+		else
+			{
+			if (!subRoot->edgeToPar.defaultEdgeLen)
+				{
+				if (formerSib->edgeToPar.hasIntEdgeLens)
+					{
+					if (subRoot->edgeToPar.hasIntEdgeLens)
+						formerSib->edgeToPar.iEdgeLen += subRoot->edgeToPar.iEdgeLen;
+					else
+						{
+						formerSib->edgeToPar.hasIntEdgeLens = false;
+						formerSib->edgeToPar.dEdgeLen = subRoot->edgeToPar.dEdgeLen + (double) formerSib->edgeToPar.iEdgeLen;
+						}
+					}
+				else
+					{
+					if (subRoot->edgeToPar.hasIntEdgeLens)
+						formerSib->edgeToPar.dEdgeLen += (double)subRoot->edgeToPar.iEdgeLen;
+					else
+						formerSib->edgeToPar.dEdgeLen += subRoot->edgeToPar.dEdgeLen;
+					}
+				}
+			}
+		NxsSimpleNode * subRootRChild = subRoot->GetLastChild();
+		if (subRootRChild == NULL)
+			subRoot->lChild = formerSib;
+		else
+			subRootRChild->rSib = formerSib;
+		subRoot->rSib = NULL;
+		root = subRoot;
+		subRoot->edgeToPar.parent = NULL;
+		}
+	else
+		{
+		/* root has degree > 2, preserve it */
+		root->edgeToPar = subRoot->edgeToPar;
+		std::swap(root->edgeToPar.child, root->edgeToPar.parent);
+		NxsSimpleNode * subRootRChild = subRoot->GetLastChild();
+		if (subRootRChild == NULL)
+			subRoot->lChild = root;
+		else
+			subRootRChild->rSib = root;
+		if (root->lChild == subRoot)
+			root->lChild = subRoot->rSib;
+		else
+			{
+			NxsSimpleNode * rOtherChild = root->lChild;
+			while (rOtherChild)
+				{
+				if (rOtherChild->rSib == subRoot)
+					{
+					rOtherChild->rSib = subRoot->rSib;
+					break;
+					}
+				rOtherChild = rOtherChild->rSib;
+				NCL_ASSERT(rOtherChild); // we trip this if subRoot is not in the rsib list
+				}
+			}
+		subRoot->rSib = NULL;
+		root = subRoot;
+		subRoot->edgeToPar.parent = NULL;
+		}
+}
+
+void NxsSimpleEdge::WriteAsNewick(std::ostream &out, bool nhx) const
+	{
+	if (!defaultEdgeLen)
+		{
+		out << ':';
+		if (lenAsString.empty())
+			if (hasIntEdgeLens)
+				out << iEdgeLen;
+			else
+				out << dEdgeLen;
+		else
+			out << lenAsString;
+		}
+	for (std::vector<NxsComment>::const_iterator uc = unprocessedComments.begin(); uc != unprocessedComments.end(); ++uc)
+		out << '[' << uc->GetText() << ']';
+	if (nhx && !parsedInfo.empty())
+		{
+		out << "[&&NHX";
+		for (std::map<std::string, std::string>::const_iterator p = parsedInfo.begin(); p != parsedInfo.end(); ++p)
+			out << ':' << p->first << '=' << p->second;
+		out << ']';
+		}
+	}
+
+void NxsSimpleNode::WriteAsNewick(std::ostream &out, bool nhx, bool useLeafNames, bool escapeNames, const NxsTaxaBlockAPI *taxa, bool escapeInternals) const
+	{
+	if (lChild)
+		{
+		out << '(';
+		const std::vector<NxsSimpleNode *> children = GetChildren();
+		for (std::vector<NxsSimpleNode *>::const_iterator child = children.begin(); child != children.end(); ++child)
+			{
+			if (child != children.begin())
+				out << ',';
+			(*child)->WriteAsNewick(out, nhx, useLeafNames, escapeNames, taxa, escapeInternals);
+			}
+		out << ')';
+		if (!name.empty())
+			{
+			if (escapeNames
+			    || (escapeInternals && (!children.empty())))
+				out << NxsString::GetEscaped(name);
+			else
+				out << name;
+			}
+		else if (taxIndex != UINT_MAX)
+			out << (1 + taxIndex);
+		}
+	else
+		{
+		NCL_ASSERT (taxIndex != UINT_MAX);
+		if (useLeafNames)
+			{
+			if (name.empty() && taxa)
+				{
+				std::string n = taxa->GetTaxonLabel(taxIndex);
+				if (escapeNames)
+					out << NxsString::GetEscaped(n);
+				else
+					out << n;
+				}
+			else
+				{
+				if (escapeNames)
+					out << NxsString::GetEscaped(name);
+				else
+					out << name;
+				}
+			}
+		else
+			out << (1 + taxIndex);
+		}
+	edgeToPar.WriteAsNewick(out, nhx);
+	}
+
+void NxsSimpleNode::AddSelfAndDesToPreorder(std::vector<const NxsSimpleNode *> &p) const
+	{
+#if  0
+	p.push_back(this);
+	NxsSimpleNode * currCh = this->lChild;
+	while (currCh)
+		{
+		currCh->AddSelfAndDesToPreorder(p);
+		currCh = currCh->rSib;
+		}
+#else
+	std::stack<const NxsSimpleNode *> ndStack;
+	const NxsSimpleNode * currCh = this;
+	for (;;)
+		{
+		p.push_back(currCh);
+		if (currCh->lChild)
+			{
+			currCh = currCh->lChild;
+			if (currCh->rSib)
+				{
+				ndStack.push(currCh->rSib);
+				}
+			}
+		else
+			{
+			if (ndStack.empty())
+				break;
+			currCh = ndStack.top();
+			ndStack.pop();
+			if (currCh->rSib)
+				{
+				ndStack.push(currCh->rSib);
+				}
+			}
+		}
+#endif
+	}
+
+std::vector<const NxsSimpleNode *> NxsSimpleTree::GetPreorderTraversal() const
+	{
+	std::vector<const NxsSimpleNode *> p;
+	if (root)
+		root->AddSelfAndDesToPreorder(p);
+	return p;
+	}
+
+std::vector<std::vector<int> > NxsSimpleTree::GetIntPathDistances(bool toMRCA) const
+	{
+	if (root == NULL || root->lChild == NULL)
+		return std::vector<std::vector<int> >();
+
+	typedef std::map<unsigned, int> TaxonIndToDistMap;
+	typedef std::map<unsigned, TaxonIndToDistMap> PairwiseDistMap;
+	typedef PairwiseDistMap::iterator PairwiseDistRow;
+
+	std::map<const NxsSimpleNode *,  TaxonIndToDistMap > ndToDist;
+	const std::vector<const NxsSimpleNode *> preord = GetPreorderTraversal();
+	unsigned maxIndex = 0;
+	PairwiseDistMap pairwiseDist;
+	for (std::vector<const NxsSimpleNode *>::const_reverse_iterator nIt = preord.rbegin(); nIt != preord.rend(); ++nIt)
+		{
+		const NxsSimpleNode *nd = *nIt;
+		if (nd->lChild)
+			{
+			TaxonIndToDistMap nm;
+			ndToDist[nd] = nm;
+			TaxonIndToDistMap & tidm = ndToDist[nd];
+			const NxsSimpleNode * currChild = nd->lChild;
+			if (nd->taxIndex != UINT_MAX)
+				{
+				if (maxIndex < nd->taxIndex)
+					maxIndex = nd->taxIndex;
+				tidm[nd->taxIndex] = 0;
+				}
+			while (currChild)
+				{
+				TaxonIndToDistMap currChildEls;
+				TaxonIndToDistMap * currChildElsPtr;
+				int currEdgeLen = currChild->edgeToPar.GetIntEdgeLen();
+				if (currChild->lChild)
+					{
+					NCL_ASSERT(ndToDist.find(currChild) != ndToDist.end());
+					currChildElsPtr = &(ndToDist[currChild]);
+					}
+				else
+					{
+					if (maxIndex < currChild->taxIndex)
+						maxIndex = currChild->taxIndex;
+					currChildEls[currChild->taxIndex] = 0;
+					currChildElsPtr = &currChildEls;
+					}
+				for (TaxonIndToDistMap::const_iterator i = tidm.begin(); i != tidm.end(); ++i)
+					{
+					const unsigned iIndex = i->first;
+					const int idist = i->second;
+					for (TaxonIndToDistMap::const_iterator j = currChildElsPtr->begin(); j != currChildElsPtr->end(); ++j)
+						{
+						const unsigned jIndex = j->first;
+						const int jdist = j->second;
+						const int ndToJDist = jdist + currEdgeLen;
+						if (toMRCA)
+							{
+							PairwiseDistRow  iRow = pairwiseDist.find(iIndex);
+							PairwiseDistRow  jRow = pairwiseDist.find(jIndex);
+							NCL_ASSERT(iRow == pairwiseDist.end() || (iRow->second.find(jIndex) == iRow->second.end()));
+							NCL_ASSERT(jRow == pairwiseDist.end() || (jRow->second.find(iIndex) == jRow->second.end()));
+							pairwiseDist[iIndex][jIndex] = idist;
+							pairwiseDist[jIndex][iIndex] = ndToJDist;
+							}
+						else
+							{
+							const unsigned fIndex = (iIndex < jIndex ? iIndex : jIndex);
+							const unsigned sIndex = (iIndex < jIndex ? jIndex : iIndex);
+							PairwiseDistRow  r = pairwiseDist.find(fIndex);
+							const bool found = (r != pairwiseDist.end() && (r->second.find(sIndex) != r->second.end()));
+							if (!found)
+								pairwiseDist[fIndex][sIndex] = currEdgeLen + idist + jdist;
+							}
+						}
+					}
+				for (TaxonIndToDistMap::const_iterator j = currChildElsPtr->begin(); j != currChildElsPtr->end(); ++j)
+					tidm[j->first] = currEdgeLen + j->second;
+				currChild = currChild->rSib;
+				}
+			}
+		}
+	if (maxIndex == 0)
+		return std::vector<std::vector<int> >();
+	std::vector<int> toTipDistRow(maxIndex+1, INT_MAX);
+	std::vector<std::vector<int> > pathDistMat(maxIndex+1, toTipDistRow);
+	for (unsigned diagInd = 0; diagInd <= maxIndex; ++diagInd)
+		pathDistMat[diagInd][diagInd] = 0;
+
+	for (PairwiseDistMap::const_iterator iit = pairwiseDist.begin(); iit != pairwiseDist.end(); ++iit)
+		{
+		const unsigned iInd = iit->first;
+		const TaxonIndToDistMap & toDistMap = iit->second;
+		for (TaxonIndToDistMap::const_iterator jit = toDistMap.begin(); jit != toDistMap.end(); ++jit)
+			{
+			const unsigned jInd = jit->first;
+			if (jInd != iInd)
+				{
+				const int d = jit->second;
+				pathDistMat[iInd][jInd] = d;
+				pathDistMat[jInd][iInd] = d;
+				}
+			}
+		}
+
+	return pathDistMat;
+	}
+
+/* if toMRCA is true the the row i col j element will be the distanc from tip i
+ to the MRCA of (i and j)
+*/
+std::vector<std::vector<double> > NxsSimpleTree::GetDblPathDistances(bool toMRCA) const
+	{
+	if (root == NULL || root->lChild == NULL)
+		return std::vector<std::vector<double> >();
+
+	typedef std::map<unsigned, double> TaxonIndToDistMap;
+	typedef std::map<unsigned, TaxonIndToDistMap> PairwiseDistMap;
+	typedef PairwiseDistMap::iterator PairwiseDistRow;
+
+	std::map<const NxsSimpleNode *,  TaxonIndToDistMap > ndToDist;
+	const std::vector<const NxsSimpleNode *> preord = GetPreorderTraversal();
+	unsigned maxIndex = 0;
+	PairwiseDistMap pairwiseDist;
+	for (std::vector<const NxsSimpleNode *>::const_reverse_iterator nIt = preord.rbegin(); nIt != preord.rend(); ++nIt)
+		{
+		const NxsSimpleNode *nd = *nIt;
+		if (nd->lChild)
+			{
+			TaxonIndToDistMap nm;
+			ndToDist[nd] = nm;
+			TaxonIndToDistMap & tidm = ndToDist[nd];
+			if (nd->taxIndex != UINT_MAX)
+				{
+				if (maxIndex < nd->taxIndex)
+					maxIndex = nd->taxIndex;
+				tidm[nd->taxIndex] = 0.0;
+				}
+			// loop over all of the children of nd
+			const NxsSimpleNode * currChild = nd->lChild;
+			while (currChild)
+				{
+				TaxonIndToDistMap currChildEls;
+				TaxonIndToDistMap * currChildElsPtr;
+				double currEdgeLen = currChild->edgeToPar.GetDblEdgeLen();
+				if (currChild->lChild)
+					{
+					NCL_ASSERT(ndToDist.find(currChild) != ndToDist.end());
+					currChildElsPtr = &(ndToDist[currChild]);
+					}
+				else
+					{
+					if (maxIndex < currChild->taxIndex)
+						maxIndex = currChild->taxIndex;
+					currChildEls[currChild->taxIndex] = 0.0;
+					currChildElsPtr = &currChildEls;
+					}
+				//for each leaf i ( the the previously encountered descendants of nd)...
+				for (TaxonIndToDistMap::const_iterator i = tidm.begin(); i != tidm.end(); ++i)
+					{
+					// compare it to leaf j (descendant of currChild).
+					const unsigned iIndex = i->first;
+					const double idist = i->second;
+					for (TaxonIndToDistMap::const_iterator j = currChildElsPtr->begin(); j != currChildElsPtr->end(); ++j)
+						{
+						const unsigned jIndex = j->first;
+						const double jdist = j->second;
+						const double ndToJDist = jdist + currEdgeLen;
+						if (toMRCA)
+							{
+							PairwiseDistRow  iRow = pairwiseDist.find(iIndex);
+							PairwiseDistRow  jRow = pairwiseDist.find(jIndex);
+							NCL_ASSERT(iRow == pairwiseDist.end() || (iRow->second.find(jIndex) == iRow->second.end()));
+							NCL_ASSERT(jRow == pairwiseDist.end() || (jRow->second.find(iIndex) == jRow->second.end()));
+							pairwiseDist[iIndex][jIndex] = idist;
+							pairwiseDist[jIndex][iIndex] = ndToJDist;
+							}
+						else
+							{
+							const unsigned fIndex = (iIndex < jIndex ? iIndex : jIndex);
+							const unsigned sIndex = (iIndex < jIndex ? jIndex : iIndex);
+							PairwiseDistRow  r = pairwiseDist.find(fIndex);
+							const bool found = (r != pairwiseDist.end() && (r->second.find(sIndex) != r->second.end()));
+							if (!found)
+								pairwiseDist[fIndex][sIndex] = idist + ndToJDist;
+							}
+						}
+					}
+				for (TaxonIndToDistMap::const_iterator j = currChildElsPtr->begin(); j != currChildElsPtr->end(); ++j)
+					tidm[j->first] = currEdgeLen + j->second;
+				currChild = currChild->rSib;
+				}
+			}
+		}
+	if (maxIndex == 0)
+		return std::vector<std::vector<double> >();
+	std::vector<double> toTipDistRow(maxIndex+1, DBL_MAX);
+	std::vector<std::vector<double> > pathDistMat(maxIndex+1, toTipDistRow);
+	for (unsigned diagInd = 0; diagInd <= maxIndex; ++diagInd)
+		pathDistMat[diagInd][diagInd] = 0.0;
+
+
+	for (PairwiseDistMap::const_iterator iit = pairwiseDist.begin(); iit != pairwiseDist.end(); ++iit)
+		{
+		const unsigned iInd = iit->first;
+		pathDistMat[iInd][iInd] = 0.0;
+		const TaxonIndToDistMap & toDistMap = iit->second;
+		for (TaxonIndToDistMap::const_iterator jit = toDistMap.begin(); jit != toDistMap.end(); ++jit)
+			{
+			const unsigned jInd = jit->first;
+			const double d = jit->second;
+			pathDistMat[iInd][jInd] = d;
+			if (!toMRCA)
+				pathDistMat[jInd][iInd] = d;
+			}
+		}
+
+	return pathDistMat;
+	}
+
+std::string parseNHXComment(const std::string comment, std::map<std::string, std::string> *infoMap)
+	{
+	if (comment.length() < 6 || comment[0] != '&' || comment[1] != '&' || comment[2] != 'N' ||comment[3] != 'H' || comment[4] != 'X' )
+		return comment;
+	size_t colonPos = comment.find(':', 5);
+	if (colonPos == string::npos)
+		return comment.substr(5, string::npos);
+	for (;;)
+		{
+		size_t eqPos = comment.find('=', colonPos);
+		if (eqPos == string::npos || (eqPos <= (colonPos + 1)))
+			return comment.substr(colonPos, string::npos);
+		std::string key = comment.substr(colonPos + 1, eqPos - 1 - colonPos);
+		colonPos = comment.find(':', eqPos + 1);
+		if (colonPos == eqPos + 1)
+			{
+			if (infoMap)
+				(*infoMap)[key] = string();
+			}
+		else if (colonPos == string::npos)
+			{
+			std::string lastVal = comment.substr(eqPos + 1);
+			if (infoMap)
+				(*infoMap)[key] = lastVal;
+			return std::string();
+			}
+		else
+			{
+			std::string value = comment.substr(eqPos + 1, colonPos - eqPos - 1);
+			if (infoMap)
+				(*infoMap)[key] = value;
+			}
+		}
+	}
+
+void NxsSimpleEdge::DealWithNexusComments(const std::vector<NxsComment> & ecs, bool NHXComments)
+	{
+	for (std::vector<NxsComment>::const_iterator ecsIt = ecs.begin(); ecsIt != ecs.end(); ++ecsIt)
+		{
+		if (NHXComments)
+			{
+			std::string ns = ecsIt->GetText();
+			std::map<std::string, std::string> currCmt;
+			std::string unparsed = parseNHXComment(ns, &currCmt);
+			for (std::map<std::string, std::string>::const_iterator c = currCmt.begin(); c != currCmt.end(); ++c)
+				{
+				const std::string & k = c->first;
+				const std::string & v = c->second;
+				this->parsedInfo[k] = v;
+				}
+			if (!unparsed.empty())
+				{
+				if (unparsed.length() == ns.length())
+					this->unprocessedComments.push_back(*ecsIt);
+				else
+					{
+					NxsComment nc(unparsed, ecsIt->GetLineNumber(), ecsIt->GetColumnNumber());
+					this->unprocessedComments.push_back(nc);
+					}
+				}
+			}
+		else
+			this->unprocessedComments.push_back(*ecsIt);
+		}
+	}
+
+void NxsSimpleTree::Initialize(const NxsFullTreeDescription & td, bool treatInternalNodeLabelsAsStrings)
+	{
+	if (!td.IsProcessed())
+		throw NxsNCLAPIException("A tree description must be processed by ProcessTree before calling NxsSimpleTree::NxsSimpleTree");
+	Clear();
+	std::string s;
+	const std::string & n = td.GetNewick();
+	s.reserve(n.length() + 1);
+	s.assign(n.c_str());
+	s.append(1, ';');
+	istringstream newickstream(s);
+	NxsToken token(newickstream);
+	if (td.RequiresNewickNameTokenizing())
+		{
+		token.UseNewickTokenization(true);
+		}
+	token.SetEOFAllowed(false);
+	realEdgeLens = td.SomeEdgesHaveLengths() && (! td.EdgeLengthsAreAllIntegers());
+	const bool NHXComments = td.HasNHXComments();
+	NxsString emsg;
+	double lastFltEdgeLen;
+	long lastIntEdgeLen;
+	long currTaxNumber;
+	token.GetNextToken();
+	NCL_ASSERT(token.Equals("("));
+	root = AllocNewNode(0L);
+	NxsSimpleNode * currNd = root;
+	NxsSimpleEdge * currEdge = &(currNd->edgeToPar);
+	NxsSimpleNode * tmpNode;
+	bool prevInternalOrLength;
+	bool currInternalOrLength = false;
+	for (;;)
+		{
+		currEdge->DealWithNexusComments(token.GetEmbeddedComments(), NHXComments);
+		if (token.Equals(";"))
+			{
+			if (currNd != root)
+				throw NxsNCLAPIException("Semicolon found before the end of the tree description.  This means that more \"(\" characters  than \")\"  were found.");
+			break;
+			}
+		const NxsString & tstr = token.GetTokenReference();
+		const char * t = tstr.c_str();
+		bool handled;
+		handled = false;
+		prevInternalOrLength = currInternalOrLength;
+		currInternalOrLength = false;
+
+		if (tstr.length() == 1)
+			{
+			handled = true;
+			if (t[0] == '(')
+				{
+				tmpNode = AllocNewNode(currNd);
+				currNd->AddChild(tmpNode);
+				currNd = tmpNode;
+				currEdge = &(currNd->edgeToPar);
+				}
+			else if (t[0] == ')')
+				{
+				currNd = currNd->GetParent();
+				NCL_ASSERT(currNd);
+				currEdge = &(currNd->edgeToPar);
+				currInternalOrLength = true;
+				}
+			else if (t[0] == ':')
+				{
+				token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation); // this allows us to deal with sci. not. in branchlengths (and negative branch lengths).
+				token.GetNextToken();
+				currEdge->DealWithNexusComments(token.GetEmbeddedComments(), NHXComments);
+				t = token.GetTokenReference().c_str();
+				if (realEdgeLens)
+					{
+					if (!NxsString::to_double(t, &lastFltEdgeLen))
+						{
+						emsg << "Expecting a number as a branch length. Found " << tstr;
+						throw NxsException(emsg, token);
+						}
+					currEdge->SetDblEdgeLen(lastFltEdgeLen, t);
+					}
+				else
+					{
+					if (!NxsString::to_long(t, &lastIntEdgeLen))
+						{
+						emsg << "Expecting a number as a branch length. Found " << tstr;
+						throw NxsException(emsg, token);
+						}
+					currEdge->SetIntEdgeLen((int)lastIntEdgeLen, t);
+					}
+				currInternalOrLength = true;
+				}
+			else if (t[0] == ',')
+				{
+				currNd = currNd->GetParent();
+				NCL_ASSERT(currNd);
+				tmpNode = AllocNewNode(currNd);
+				currNd->AddChild(tmpNode);
+				currNd = tmpNode;
+				currEdge = &(currNd->edgeToPar);
+				}
+			else
+				handled = false;
+			}
+		if (!handled)
+			{
+			//std::cerr << "!handled t = " << t << "\n";
+
+			bool wasReadAsNumber = false;
+			if (currNd->IsTip() ||  !treatInternalNodeLabelsAsStrings) {
+				wasReadAsNumber = NxsString::to_long(t, &currTaxNumber);
+			}
+			if (wasReadAsNumber)
+				{
+				if (currTaxNumber < 1)
+					{
+					if (!prevInternalOrLength)
+						{
+						emsg << "Expecting a taxon number greater than 1. Found " << tstr;
+						throw NxsException(emsg, token);
+						}
+					wasReadAsNumber = false;
+					}
+				}
+			if (wasReadAsNumber)
+				{
+				currNd->taxIndex = (unsigned)currTaxNumber - 1;
+				if (currNd->lChild == NULL)
+					{
+					while (currNd->taxIndex >= leaves.size())
+						leaves.push_back(0L);
+					leaves[currNd->taxIndex] = currNd;
+					}
+				}
+			else
+				currNd->name = t;
+			}
+		token.GetNextToken();
+		}
+	}
+unsigned NxsTreesBlock::TreeLabelToNumber(const std::string & name) const
+	{
+	NxsString r(name.c_str());
+	r.ToUpper();
+	std::map<std::string, unsigned>::const_iterator cntiIt = capNameToInd.find(r);
+	if (cntiIt == capNameToInd.end())
+		return 0;
+	return cntiIt->second + 1;
+	}
+unsigned NxsTreesBlock::GetMaxIndex() const
+	{
+	if (trees.size() == 0)
+		return UINT_MAX;
+	return (unsigned)trees.size() - 1;
+	}
+/*!
+ Returns the number of indices that correspond to the label (and the number
+ of items that would be added to *inds if inds points to an empty set).
+*/
+unsigned NxsTreesBlock::GetIndicesForLabel(const std::string &label, NxsUnsignedSet *inds) const
+	{
+	NxsString emsg;
+	const unsigned numb = TreeLabelToNumber(label);
+	if (numb > 0)
+		{
+		if (inds)
+			inds->insert(numb - 1);
+		return 1;
+		}
+	return GetIndicesFromSetOrAsNumber(label, inds, treeSets, GetMaxIndex(), "tree");
+	}
+bool NxsTreesBlock::AddNewIndexSet(const std::string &label, const NxsUnsignedSet & inds)
+	{
+	NxsString  nlabel(label.c_str());
+	const bool replaced = treeSets.count(nlabel) > 0;
+	treeSets[nlabel] = inds;
+	return replaced;
+	}
+/*!
+	Returns true if this set replaces an older definition.
+*/
+bool NxsTreesBlock::AddNewPartition(const std::string &label, const NxsPartition & inds)
+	{
+	NxsString ls(label.c_str());
+	bool replaced = treePartitions.count(ls) > 0;
+	treePartitions[ls] = inds;
+	return replaced;
+	}
+/*!
+	Initializes `NCL_BLOCKTYPE_ATTR_NAME' to "TREES", `ntrees' to 0, `defaultTree' to 0, and `taxa' to `tb'. Assumes `tb' is non-NULL.
+*/
+NxsTreesBlock::NxsTreesBlock(
+  NxsTaxaBlockAPI *tb)	/* the NxsTaxaBlockAPI object to be queried for taxon names appearing in tree descriptions */
+  :NxsTaxaBlockSurrogate(tb, NULL),
+   constructingTaxaBlock(false),
+  processedTreeValidationFunction(NULL),
+  ptvArg(NULL)
+	{
+	NCL_BLOCKTYPE_ATTR_NAME = "TREES";
+	defaultTreeInd = UINT_MAX;
+	writeTranslateTable = true;
+	allowImplicitNames = false;
+	useNewickTokenizingDuringParse = false;
+	treatIntegerLabelsAsNumbers = false;
+	processAllTreesDuringParse = true;
+	writeFromNodeEdgeDataStructure = false;
+	validateInternalNodeLabels = true;
+	treatAsRootedByDefault = true;
+	allowNumericInterpretationOfTaxLabels = true;
+	allowUnquotedSpaces = false;
+	disambiguateDuplicateNames = false;
+	}
+/*!
+	Clears `translateList', `rooted', `treeName' and `treeDescription'.
+*/
+NxsTreesBlock::~NxsTreesBlock()
+	{
+	}
+/*!
+	Makes data member `taxa' point to `tb' rather than the NxsTaxaBlockAPI object it was previously pointing to. Assumes
+	`tb' is non-NULL.
+*/
+void NxsTreesBlock::ReplaceTaxaBlockPtr(
+  NxsTaxaBlockAPI *tb)		/* pointer to new NxsTaxaBlockAPI object (does not attempt to delete the object previously pointed to) */
+	{
+	NCL_ASSERT(tb != NULL);
+	taxa = tb;
+	}
+/*! \returns the description of the tree stored at position `i' in `treeDescription'. Assumes that `i' will be in the
+	range [0..`ntrees').
+
+	in NCL version 2.1 and greater, this newick string is guaranteed to use taxon numbers (1-based)
+	in the newick string.  This makes it easier to parse.
+*/
+NxsString NxsTreesBlock::GetTreeDescription(
+  unsigned i)	/* the index of the tree for which the description is to be returned */
+	{
+	return NxsString(GetFullTreeDescription(i).GetNewick().c_str());
+	}
+/*!
+	Returns true if the `i'th tree (0-offset) is rooted, false otherwise. Assumes that `i' will be in the
+	range [0..ntrees).
+*/
+bool NxsTreesBlock::IsRootedTree(
+  unsigned i)	/* the index of the tree in question */
+  	{
+	return GetFullTreeDescription(i).IsRooted();
+	}
+/*!
+	Returns the name of the tree stored at position `i' in `treeName'. Assumes that `i' will be in the range
+	[0..`ntrees').
+*/
+NxsString NxsTreesBlock::GetTreeName(
+  unsigned i)	/* the index of the tree for which the name is to be returned */
+	{
+	return NxsString(GetFullTreeDescription(i).GetName().c_str());
+	}
+/*!
+	Returns true if the `i'th tree (0-offset) is the default tree, false otherwise. Assumes that `i' will be in the
+	range [0..ntrees).
+*/
+bool NxsTreesBlock::IsDefaultTree(
+  unsigned i)	/* the index of the tree in question */
+	{
+	return (i == GetNumDefaultTree());
+	}
+const NxsFullTreeDescription & NxsTreesBlock::GetFullTreeDescription(unsigned i) const
+	{
+	NCL_ASSERT(i < trees.size());
+	return trees.at(i);
+	}
+/*!
+	This function outputs a brief report of the contents of this block. Overrides the abstract virtual function in the
+	base class.
+*/
+void NxsTreesBlock::Report(
+  std::ostream &out) NCL_COULD_BE_CONST /* the output stream to which to write the report */ /*v2.1to2.2 1 */
+	{
+	const unsigned ntrees = GetNumTrees();
+	out << '\n' <<  NCL_BLOCKTYPE_ATTR_NAME << " block contains ";
+	if (ntrees == 0)
+		{
+		out << "no trees" << endl;
+		return;
+		}
+	if (ntrees == 1)
+		out << "one tree" << endl;
+	else
+		out << ntrees << " trees" << endl;
+	for (unsigned k = 0; k < ntrees; k++)
+		{
+		const NxsFullTreeDescription & tree = GetFullTreeDescription(k);
+		out << "    " << (k+1) << "    " << tree.GetName();
+		out << "    (";
+		if (tree.IsRooted())
+			out << "rooted";
+		else
+			out << "unrooted";
+		if (defaultTreeInd == k)
+			out << ",default tree)" << endl;
+		else
+			out << ')' << endl;
+		}
+	}
+/*!
+	Outputs a brief description of this block's contents to the referenced NxsString. An example of the output of this
+	command is shown below:
+>
+	TREES block contains 102 trees
+>
+*/
+void NxsTreesBlock::BriefReport(
+  NxsString &s) NCL_COULD_BE_CONST /* reference to the string in which to store the contents of the brief report */ /*v2.1to2.2 1 */
+	{
+	const unsigned ntrees = GetNumTrees();
+	s << "\n\n" << NCL_BLOCKTYPE_ATTR_NAME << " block contains ";
+	if (ntrees == 0)
+		s += "no trees\n";
+	else if (ntrees == 1)
+		s += "one tree\n";
+	else
+		s << ntrees << " trees\n";
+	}
+/*!
+	Flushes `treeName', `treeDescription', `translateList' and `rooted', and sets `ntrees' and `defaultTree' both to 0
+	in preparation for reading a new TREES block.
+*/
+void NxsTreesBlock::Reset()
+	{
+	NxsBlock::Reset();
+	ResetSurrogate();
+	defaultTreeInd = UINT_MAX;
+	trees.clear();
+	capNameToInd.clear();
+	treeSets.clear();
+	treePartitions.clear();
+	constructingTaxaBlock = false;
+	newtaxa = false;
+	}
+/*!
+	Returns the 0-offset index of the default tree, which will be 0 if there is only one tree stored or no trees
+	stored. If more than one tree is stored, the default tree will be the one specifically indicated by the user (using
+	an asterisk in the data file), or 0 if the user failed to specify.
+*/
+unsigned NxsTreesBlock::GetNumDefaultTree()
+	{
+	return (defaultTreeInd == UINT_MAX ? 0 : defaultTreeInd);
+	}
+/*!
+	Returns the number of trees stored in this NxsTreesBlock object.
+*/
+unsigned NxsTreesBlock::GetNumTrees() const
+	{
+	return (unsigned)trees.size();
+	}
+/*!
+	Returns the number of trees stored in this NxsTreesBlock object.
+*/
+unsigned NxsTreesBlock::GetNumTrees()
+	{
+	return (unsigned)trees.size();
+	}
+void NxsTreesBlock::WriteTranslateCommand(std::ostream & out) const
+	{
+	NCL_ASSERT(taxa);
+	out << "    TRANSLATE" << "\n";
+	const unsigned nt = taxa->GetNTaxTotal();
+	for (unsigned i = 0; i < nt; ++i)
+		{
+		if (i > 0)
+				out << ",\n";
+		out << "        " << i + 1 << ' ' << NxsString::GetEscaped(taxa->GetTaxonLabel(i));
+		}
+	out << ";\n";
+	}
+
+void NxsTreesBlock::WriteTreesCommand(std::ostream & out) const
+	{
+	if (constructingTaxaBlock)
+		{
+		// this check is intended to make sure that ProcessTree really behaves
+		//	as a const function.
+		// If we are constructingTaxaBlock, then the it can modify the contained taxa block
+		throw NxsNCLAPIException("WriteTreesCommand block cannot be called while the Trees Block is still being constructed");
+		}
+	NxsTreesBlock *ncthis = const_cast<NxsTreesBlock *>(this);
+	NxsSimpleTree nst(0, 0.0);
+	const bool useLeafNames = !(this->writeTranslateTable);
+	for (unsigned k = 0; k < trees.size(); k++)
+		{
+#		if defined REGRESSION_TESTING_GET_TRANS_TREE_DESC
+			NxsTreesBlock *nc = const_cast<NxsTreesBlock *>(this);
+			NxsString transTreeDesc = nc->GetTranslatedTreeDescription(k);
+#		endif
+		NxsFullTreeDescription & treeDesc = trees.at(k);
+		ncthis->ProcessTree(treeDesc);
+		const std::string & name = treeDesc.GetName();
+		out << "    TREE ";
+		if (k == defaultTreeInd)
+			out << "* ";
+		if (name.length() == 0)
+			out <<  "UnnamedTree = [&";
+		else
+			out << NxsString::GetEscaped(name) << " = [&";
+		out << (treeDesc.IsRooted() ? 'R' : 'U');
+		out << ']';
+		if (writeFromNodeEdgeDataStructure)
+			{
+			nst.Initialize(treeDesc);
+			nst.WriteAsNewick(out, true, useLeafNames, true, taxa, true);
+			}
+		else
+			out << treeDesc.GetNewick();
+		out << ";\n";
+
+
+#		if defined(PHYLOBASE_TESTING)
+			const NxsTreesBlock * treeBlock = this;
+		    std::vector<std::string> treeNames;      //vector of tree names
+		    const NxsTaxaBlockAPI * taxaB = GetTaxaBlockPtr(0L);
+		    unsigned ntax = taxaB->GetNTax();
+
+
+        	std::vector<unsigned> parentVector; //Index of the parent. 0 means no parent.
+    	    std::vector<double> branchLengthVector;
+			parentVector.clear();
+			branchLengthVector.clear();
+			const NxsFullTreeDescription & ftd = treeBlock->GetFullTreeDescription(k);
+			treeNames.push_back(ftd.GetName());
+			NxsSimpleTree simpleTree(ftd, -1, -1.0);
+			std::vector<const NxsSimpleNode *> ndVector =  simpleTree.GetPreorderTraversal();
+			unsigned internalNdIndex = ntax;
+			for (std::vector<const NxsSimpleNode *>::const_iterator ndIt = ndVector.begin(); ndIt != ndVector.end(); ++ndIt)
+				{
+				NxsSimpleNode * nd = (NxsSimpleNode *) *ndIt;
+				unsigned nodeIndex;
+				if (nd->IsTip())
+					{
+					nodeIndex = nd->GetTaxonIndex();
+					Rcpp::Rcout << " leaf node # = " <<  nodeIndex << '\n';
+					}
+				else
+					{
+					nodeIndex = internalNdIndex++;
+					nd->SetTaxonIndex(nodeIndex);
+					Rcpp::Rcout << " internal node # = " << nd->GetTaxonIndex()  << '\n';
+					}
+				if (parentVector.size() < nodeIndex + 1)
+					{
+					parentVector.resize(nodeIndex + 1);
+					}
+				if (branchLengthVector.size() < nodeIndex + 1)
+					{
+					branchLengthVector.resize(nodeIndex + 1);
+					}
+				NxsSimpleEdge edge = nd->GetEdgeToParent();
+
+				NxsSimpleNode * par = 0L;
+				par = (NxsSimpleNode *) edge.GetParent();
+				if (par != 0L)
+					{
+					parentVector[nodeIndex] = 1 + par->GetTaxonIndex();
+					branchLengthVector[nodeIndex] = edge.GetDblEdgeLen();
+					}
+				else
+					{
+					parentVector[nodeIndex] = 0;
+					branchLengthVector[nodeIndex] = -1.0;
+					}
+				}
+			Rcpp::Rcout << "Parents = [";
+			for (std::vector<unsigned>::const_iterator nIt = parentVector.begin(); nIt != parentVector.end(); ++nIt)
+			    {
+				Rcpp::Rcout << *nIt << ", ";
+			    }
+			Rcpp::Rcout << "]\nbranch lengths = [";
+			for (std::vector<double>::const_iterator nIt = branchLengthVector.begin(); nIt != branchLengthVector.end();  ++nIt)
+			    {
+			 	Rcpp::Rcout << *nIt << ", ";
+			    }
+			Rcpp::Rcout << "]\n";
+#endif
+		}
+	}
+/*!
+	Writes contents of this block in NEXUS format to `out'.
+*/
+void NxsTreesBlock::WriteAsNexus(std::ostream &out) const
+	{
+	if (GetNumTrees() == 0)
+		return;
+	out << "BEGIN TREES;\n";
+	WriteBasicBlockCommands(out);
+	if (this->writeTranslateTable)
+		WriteTranslateCommand(out);
+	WriteTreesCommand(out);
+	WriteSkippedCommands(out);
+	out << "END;\n";
+	}
+NxsTreesBlock *NxsTreesBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
+	{
+	if (reader == NULL || idneeded != "TREES")
+		return NULL;
+	NxsTreesBlock * nb = new NxsTreesBlock(NULL);
+	nb->SetCreateImpliedBlock(true);
+	nb->SetImplementsLinkAPI(true);
+	return nb;
+	}
+void NxsTreesBlock::ConstructDefaultTranslateTable(NxsToken &token, const char * cmd)
+	{
+	if (taxa == NULL)
+		{
+		if (nxsReader == NULL)
+			GenerateNxsException(token, "A Taxa block must be read before the Trees block can be read.");
+		unsigned nTb;
+		nxsReader->GetTaxaBlockByTitle(NULL, &nTb);
+		AssureTaxaBlock(nTb == 0 && allowImplicitNames && createImpliedBlock, token, cmd);
+		}
+	const unsigned nt = taxa->GetNTaxTotal();
+	if (nt == 0)
+		{
+		if (allowImplicitNames)
+			{
+			constructingTaxaBlock = true;
+			if (nexusReader)
+				nexusReader->NexusWarnToken("A TAXA block should be read before the TREES block (but no TAXA block was found).  Taxa will be inferred from their usage in the TREES block.", NxsReader::AMBIGUOUS_CONTENT_WARNING , token);
+			newtaxa = true;
+			}
+		else
+			GenerateNxsException(token, "Taxa block must be read before the Trees block can be read.");
+		}
+	if (!constructingTaxaBlock)
+		{
+		for (unsigned i = 0; i < nt; ++i)
+			{
+			NxsString s;
+			s += (i + 1);
+			capNameToInd[s] = i;
+			NxsString t(taxa->GetTaxonLabel(i).c_str());
+			t.ToUpper();
+			capNameToInd[t] = i;
+			}
+		}
+	}
+void NxsTreesBlock::HandleTranslateCommand(NxsToken &token)
+	{
+	for (unsigned n = 0;; ++n)
+		{
+		token.GetNextToken();
+		if (token.Equals(";"))
+			break;
+		NxsString key(token.GetTokenReference().c_str());
+		unsigned keyInd = taxa->TaxLabelToNumber(key);
+		token.GetNextToken();
+		NxsString value(token.GetTokenReference().c_str());
+		unsigned valueInd = taxa->TaxLabelToNumber(value);
+		if (valueInd == 0)
+			{
+			if (constructingTaxaBlock)
+				{
+				taxa->SetNtax(n+1);
+				// bug fix March 10, 2009 we had had an erroneous "+ 1" added to the index
+				unsigned newVal = taxa->AddTaxonLabel(value);
+				NxsString numV;
+				numV += (1 + newVal);
+				if (capNameToInd.find(numV) == capNameToInd.end())
+					capNameToInd[numV] = newVal;
+				// bug fix March 10, 2009.  When we get no taxa block, but a translate
+				//	table we need to add the label to the translation table (because the
+				//	call to ConstructDefaultTranslateTable will not have been able
+				//	to fill in any taxon labels)
+				value.ToUpper();
+				if (capNameToInd.find(value) == capNameToInd.end())
+					capNameToInd[value] = newVal;
+
+				}
+			else if (nexusReader)
+				{
+				errormsg << "Unknown taxon " << value << " in TRANSLATE command.\nThe translate key "<< key << " has NOT been added to the translation table!";
+				nexusReader->NexusWarnToken(errormsg, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, token);
+				errormsg.clear();
+				}
+			}
+		if (valueInd > 0)
+			{
+			if (keyInd != 0 && keyInd != valueInd && nexusReader)
+				{
+				errormsg << "TRANSLATE command overwriting the taxon " << key << " with a redirection to " << value;
+				nexusReader->NexusWarnToken(errormsg, NxsReader::OVERWRITING_CONTENT_WARNING, token);
+				errormsg.clear();
+				}
+			key.ToUpper();
+			capNameToInd[key] = valueInd - 1;
+			}
+		token.GetNextToken();
+		if (token.Equals(";"))
+			break;
+		if (!token.Equals(","))
+			{
+			errormsg << "Expecting a , or ; after a translate key-value pair. Found " << token.GetTokenReference();
+			throw NxsException(errormsg, token);
+			}
+		}
+	constructingTaxaBlock = false;
+	}
+
+/*
+ Converts to a Nexus token (and thus loses some of the file position information).
+*/
+void NxsTreesBlock::ProcessTokenVecIntoTree(
+  const ProcessedNxsCommand & tokenVec,
+  NxsFullTreeDescription & td,
+  NxsLabelToIndicesMapper *taxa,
+  std::map<std::string, unsigned> &capNameToInd,
+  bool allowNewTaxa,
+  NxsReader * nexusReader,
+  const bool respectCase,
+  const bool validateInternalNodeLabels,
+  const bool treatIntegerLabelsAsNumbers,
+  const bool allowNumericInterpretationOfTaxLabels,
+  const bool autoNumberDuplicateNames)
+	{
+	ProcessedNxsCommand::const_iterator tvIt = tokenVec.begin();
+	ostringstream tokenStream;
+	long line = 0;
+	long col = 0;
+	file_pos pos = 0;
+	if (!tokenVec.empty())
+		{
+		line = tvIt->GetLineNumber();
+		col = tvIt->GetColumnNumber();
+		pos = tvIt->GetFilePosition();
+		for (;tvIt != tokenVec.end(); ++tvIt)
+			tokenStream << NxsString::GetEscaped(tvIt->GetToken());
+		tokenStream << ';';
+		}
+	std::string s = tokenStream.str();
+	istringstream newickstream(s);
+	NxsToken token(newickstream);
+	if (td.RequiresNewickNameTokenizing())
+		{
+		token.UseNewickTokenization(true);
+		}
+
+	try
+		{
+		ProcessTokenStreamIntoTree(token,
+		                           td,
+		                           taxa,
+		                           capNameToInd,
+		                           allowNewTaxa,
+		                           nexusReader,
+		                           respectCase,
+								   validateInternalNodeLabels,
+								   treatIntegerLabelsAsNumbers,
+								   allowNumericInterpretationOfTaxLabels,
+								   false,
+								   autoNumberDuplicateNames);
+		}
+	catch (NxsException & x)
+		{
+		x.pos += pos;
+		x.line += line;
+		x.col += col;
+		throw x;
+		}
+	}
+
+std::vector<std::string> NxsFullTreeDescription::GetTreeTokens() const
+	{
+	const std::string & n = this->GetNewick();
+	std::string y;
+	const std::string *p = &n;
+	if (n.empty() || *n.rend() != ';') {
+			y = n;
+			y.append(1, ';');
+			p = &y;
+		}
+	istringstream newickstream(*p);
+	NxsToken tokenizer(newickstream);
+	if (this->RequiresNewickNameTokenizing())
+		{
+		tokenizer.UseNewickTokenization(true);
+		}
+	std::list<std::string> tl;
+	tokenizer.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
+	tokenizer.GetNextToken();
+	while (!tokenizer.EqualsCaseSensitive(";"))
+		{
+		tl.push_back(tokenizer.GetTokenReference());
+		tokenizer.SetLabileFlagBit(NxsToken::hyphenNotPunctuation);
+		tokenizer.GetNextToken();
+		}
+	return std::vector<std::string>(tl.begin(), tl.end());
+	}
+
+
+NxsString disambiguateName(const std::map<std::string, unsigned> &  capNameToInd,
+                               const std::set<unsigned> & taxaEncountered,
+                               NxsString & ucl,
+                               const char * t,
+                               bool respectCase)
+    {
+    std::map<std::string, unsigned>::const_iterator tt = capNameToInd.find(ucl);
+	unsigned ind = (tt == capNameToInd.end() ? UINT_MAX : tt->second);
+	std::set<unsigned>::const_iterator teIt = taxaEncountered.find(ind);
+	unsigned dupNum = UINT_MAX;
+    const char * origT = t;
+    NxsString nameDisambiguator(t);
+    std::string origUCL = ucl;
+    while (teIt != taxaEncountered.end())
+        {
+        if (dupNum == UINT_MAX)
+            dupNum = 2;
+        else
+            dupNum += 1;
+        std::stringstream disambigUCL;
+        if (!respectCase)
+            disambigUCL << origUCL << "_NCLDUPLICATE" << dupNum;
+        else
+            disambigUCL << origUCL << "_NCLDuplicate" << dupNum;
+        std::stringstream disambigRawStr;
+        disambigRawStr << origT << "_NCLDuplicate" << dupNum;
+        ucl.assign(disambigUCL.str());
+        nameDisambiguator.assign(disambigRawStr.str());
+        tt = capNameToInd.find(ucl);
+        ind = (tt == capNameToInd.end() ? UINT_MAX : tt->second);
+        teIt = taxaEncountered.find(ind);
+        //std::cerr << "nameDisambiguator = " << nameDisambiguator << " ind = " << ind << "\n";
+        }
+    return nameDisambiguator;
+    }
+
+void NxsTreesBlock::ProcessTokenStreamIntoTree(
+  NxsToken & token,
+  NxsFullTreeDescription & td,
+  NxsLabelToIndicesMapper *taxa,
+  std::map<std::string, unsigned> &capNameToInd,
+  bool allowNewTaxa,
+  NxsReader * nexusReader,
+  const bool respectCase,
+  const bool validateInternalNodeLabels,
+  const bool treatIntegerLabelsAsNumbers,
+  const bool allowNumericInterpretationOfTaxLabels,
+  const bool allowUnquotedSpaces,
+  const bool autoNumberDuplicateNames)
+	{
+	bool previousNonIntegerLabels=false, previousAllIntegerLabels = false;
+	NxsString errormsg;
+	int & flags = td.flags;
+	bool NHXComments = false;
+	bool someMissingEdgeLens = false;
+	bool someHaveEdgeLens = false;
+	bool someRealEdgeLens = false;
+	bool hasPolytomies = false;
+	bool hasDegTwoNodes = false;
+	bool hasInternalLabels = false;
+	bool hasInternalLabelsInTaxa = false;
+	bool hasInternalLabelsNotInTaxa = false;
+	const bool rooted = (flags & NxsFullTreeDescription::NXS_IS_ROOTED_BIT);
+	std::stack<unsigned> nchildren;
+	std::set<unsigned> taxaEncountered;
+	double minDblEdgeLen = DBL_MAX;
+	int minIntEdgeLen = INT_MAX;
+	double lastFltEdgeLen;
+	long lastIntEdgeLen;
+	bool taxsetRead = false;
+	token.GetNextToken();
+	ostringstream newickStream;
+	NxsString nameDisambiguator;
+	const NxsString * taxaLabelPtr;
+	if (!token.Equals("("))
+		{
+		errormsg << "Expecting a ( to start the tree description, but found " << token.GetTokenReference();
+		throw NxsException(errormsg, token);
+		}
+	nchildren.push(0);
+	newickStream << '(';
+	int prevToken = NXS_TREE_OPEN_PARENS_TOKEN;
+	if (allowUnquotedSpaces)
+	    token.SetLabileFlagBit(NxsToken::spaceDoesNotBreakToken);
+	token.GetNextToken();
+	for (;;)
+		{
+		//std::cerr << "t = \"" << token.GetToken() << "\"\n";
+		const std::vector<NxsComment> & ecs = token.GetEmbeddedComments();
+		for (std::vector<NxsComment>::const_iterator ecsIt = ecs.begin(); ecsIt != ecs.end(); ++ecsIt)
+			{
+			if (!NHXComments)
+				{
+				const std::string & ns = ecsIt->GetText();
+				if (ns.length() > 5 && ns[0] == '&' && ns[1] == '&' && ns[2] == 'N' &&ns[3] == 'H' && ns[4] == 'X')
+					NHXComments = true;
+				}
+			ecsIt->WriteAsNexus(newickStream);
+			}
+		if (token.Equals(";"))
+			{
+			if (!nchildren.empty())
+				throw NxsException("Semicolon found before the end of the tree description.  This means that more \"(\" characters  than \")\"  were found.", token);
+			break;
+			}
+		const NxsString & tstr = token.GetTokenReference();
+		const char * t = tstr.c_str();
+		bool handled;
+		handled = false;
+		if (tstr.length() == 1)
+			{
+			if (t[0] == '(')
+				{
+				if (nchildren.empty())
+					throw NxsException("End of tree description.  Expected ; but found (", token);
+				if (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN || prevToken == NXS_TREE_BRLEN_TOKEN)
+					{
+					errormsg << "Expecting a , before a new subtree definition:\n \")(\"\n \"name(\" and\n \"branch-length(\"\n are prohibited.";
+					if (nexusReader)
+						nexusReader->NexusWarnToken(errormsg, NxsReader::PROBABLY_INCORRECT_CONTENT_WARNING, token);
+					else
+						throw NxsException(errormsg, token);
+					/* if we did not throw an excection, then we are in relaxed parsing mode.
+						We'll add the implied ,
+					*/
+					if (!someMissingEdgeLens && (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN))
+						someMissingEdgeLens = true;
+					newickStream << ',';
+					prevToken = NXS_TREE_COMMA_TOKEN;
+					}
+				else if (prevToken == NXS_TREE_COLON_TOKEN)
+					throw NxsException("Expecting a branch length after a : but found (", token);
+				nchildren.top() += 1;
+				nchildren.push(0);
+				newickStream << '(';
+				prevToken = NXS_TREE_OPEN_PARENS_TOKEN;
+				handled = true;
+				}
+			else if (t[0] == ')')
+				{
+				if (nchildren.empty())
+					throw NxsException("End of tree description.  Expected ; but found )", token);
+				if (prevToken == NXS_TREE_OPEN_PARENS_TOKEN || prevToken == NXS_TREE_COMMA_TOKEN)
+					throw NxsException("Expecting a clade description before the subtree's closing )\n \"()\" and \",)\" are prohibited.", token);
+				if (prevToken == NXS_TREE_COLON_TOKEN)
+					throw NxsException("Expecting a branch length after a : but found (", token);
+				if (!someMissingEdgeLens && (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN))
+					someMissingEdgeLens = true;
+				if (nchildren.top() == 1)
+					hasDegTwoNodes = true;
+				else if (nchildren.top() > 2)
+					{
+					if (rooted)
+						hasPolytomies = true;
+					else if (nchildren.top() > 3 || nchildren.size() > 1) /* three children are allowed not considered a polytomy */
+						hasPolytomies = true;
+					}
+				nchildren.pop();
+				newickStream << ')';
+				prevToken = NXS_TREE_CLOSE_PARENS_TOKEN;
+				handled = true;
+				}
+			else if (t[0] == ':')
+				{
+				if (prevToken != NXS_TREE_CLOSE_PARENS_TOKEN && prevToken != NXS_TREE_CLADE_NAME_TOKEN)
+					throw NxsException("Found a : separator for a subtree at an inappropriate location. A colon is only permitted after a clade name or )-symbol.", token);
+				if (taxsetRead && prevToken == NXS_TREE_CLADE_NAME_TOKEN)
+					throw NxsException("Found a : separator after a taxset name. Branch lengths cannot be assigned to multi-taxon taxsets.", token);
+				newickStream << ':';
+				prevToken = NXS_TREE_COLON_TOKEN;
+				handled = true;
+				token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation); // this allows us to deal with sci. not. in branchlengths (and negative branch lengths).
+				}
+			else if (t[0] == ',')
+				{
+				if (prevToken == NXS_TREE_OPEN_PARENS_TOKEN)
+					throw NxsException("Found a empty subclade found. The combination \"(,\" is prohibited.", token);
+				if (prevToken == NXS_TREE_COMMA_TOKEN)
+					throw NxsException("Found a empty subclade found. The combination \",,\" is prohibited.", token);
+				if (prevToken == NXS_TREE_COLON_TOKEN)
+					throw NxsException("Found a , when a branch length was expected found. The combination \":,\" is prohibited.", token);
+				if (!someMissingEdgeLens && (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN))
+					someMissingEdgeLens = true;
+				newickStream << ',';
+				prevToken = NXS_TREE_COMMA_TOKEN;
+				handled = true;
+				}
+			}
+		if (!handled)
+			{
+			if (prevToken == NXS_TREE_COLON_TOKEN)
+				{
+				bool handledLength = false;
+				if (!someRealEdgeLens)
+					{
+					if (NxsString::to_long(t, &lastIntEdgeLen))
+						{
+						handledLength = true;
+						if (lastIntEdgeLen < minIntEdgeLen)
+							minIntEdgeLen = (int)lastIntEdgeLen;
+						}
+					}
+				if (!handledLength)
+					{
+					if (!NxsString::to_double(t, &lastFltEdgeLen))
+						{
+						errormsg << "Expecting a number as a branch length. Found " << tstr;
+						throw NxsException(errormsg, token);
+						}
+					someRealEdgeLens = true;
+					if (lastFltEdgeLen < minDblEdgeLen)
+						minDblEdgeLen = lastFltEdgeLen;
+					}
+				newickStream << tstr;
+				someHaveEdgeLens = true;
+				prevToken = NXS_TREE_BRLEN_TOKEN;
+				}
+			else
+				{
+				if (prevToken == NXS_TREE_BRLEN_TOKEN || prevToken == NXS_TREE_CLADE_NAME_TOKEN)
+					{
+					errormsg << "Found a name " << tstr << " which should be preceded by a ( or a ,";
+					throw NxsException(errormsg, token);
+					}
+				taxsetRead = false;
+				taxaLabelPtr = &tstr;
+				NxsString ucl(t);
+				if (!respectCase)
+					ucl.ToUpper();
+				NxsString toAppend;
+				if (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN)
+					{
+					if (validateInternalNodeLabels)
+						{
+						//std::cerr << "validateInternalNodeLabels = true " << taxaLabelPtr << "\n";
+						std::map<std::string, unsigned>::const_iterator tt = capNameToInd.find(ucl);
+						unsigned ind = (tt == capNameToInd.end() ? UINT_MAX : tt->second);
+						if (taxaEncountered.find(ind) != taxaEncountered.end())
+							{
+							if (!autoNumberDuplicateNames) {
+                                errormsg << "Taxon number " << ind + 1 << " (coded by the token " << tstr << ") has already been encountered in this tree. Duplication of taxa in a tree is prohibited.";
+                                throw NxsException(errormsg, token);
+                            }
+                            nameDisambiguator = disambiguateName(capNameToInd, taxaEncountered, ucl, t, respectCase);
+                            taxaLabelPtr = &nameDisambiguator;
+                            t = nameDisambiguator.c_str();
+
+							}
+						hasInternalLabels = true;
+						if (ind == UINT_MAX)
+							{
+							hasInternalLabelsNotInTaxa = true;
+							toAppend += NxsString::GetEscaped(*taxaLabelPtr);
+							}
+						else
+							{
+							hasInternalLabelsInTaxa = true;
+							taxaEncountered.insert(ind);
+							toAppend += (1 + ind);
+							}
+						}
+					else
+						{
+						hasInternalLabels = true;
+						hasInternalLabelsNotInTaxa = true;
+						toAppend += NxsString::GetEscaped(*taxaLabelPtr);
+						//std::cerr << "validateInternalNodeLabels = false " << toAppend << "\n";
+						}
+					}
+				else
+					{
+					std::map<std::string, unsigned>::const_iterator tt = capNameToInd.find(ucl);
+					unsigned ind = (tt == capNameToInd.end() ? UINT_MAX : tt->second);
+					std::set<unsigned>::const_iterator teIt = taxaEncountered.find(ind);
+					if (teIt != taxaEncountered.end())
+					    {
+                        if (!autoNumberDuplicateNames) {
+                            errormsg << "Taxon number " << ind + 1 << " (coded by the token " << tstr << ") has already been encountered in this tree. Duplication of taxa in a tree is prohibited.";
+                            throw NxsException(errormsg, token);
+                        }
+                        nameDisambiguator = disambiguateName(capNameToInd, taxaEncountered, ucl, t, respectCase);
+                        taxaLabelPtr = &nameDisambiguator;
+                        t = nameDisambiguator.c_str();
+                        }
+					if (ind == UINT_MAX)
+						{
+						std::set<unsigned> csinds;
+						if (allowNumericInterpretationOfTaxLabels) //@TEMPORARY hack
+							NxsLabelToIndicesMapper::allowNumberAsIndexPlusOne = false;
+						unsigned nadded = taxa->GetIndexSet(*taxaLabelPtr, &csinds);
+						if (allowNumericInterpretationOfTaxLabels) //@TEMPORARY hack
+							NxsLabelToIndicesMapper::allowNumberAsIndexPlusOne = true;
+						if (nadded == 0)
+							{
+							if (!allowNewTaxa)
+								{
+								errormsg << "Expecting a Taxon label after a \"" << (prevToken == NXS_TREE_OPEN_PARENS_TOKEN ? '(' : ',') << "\" character. Found \"" << *taxaLabelPtr << "\" but this is not a recognized taxon label.";
+								throw NxsException(errormsg, token);
+								}
+							long dummy;
+							if (treatIntegerLabelsAsNumbers && NxsString::to_long(ucl.c_str(), &dummy))
+								{
+								if (previousNonIntegerLabels)
+									{
+									errormsg << "Trees are being read in a mode that treats integer taxon labels as the number of the taxon. The mixing of integer and non-integer labels is not supported";
+									throw NxsException(errormsg, token);
+									}
+								previousAllIntegerLabels = true;
+								if (dummy < 1)
+									{
+									errormsg << "Trees are being read in a mode that treats integer taxon labels as the number of the taxon. All numbers are expected to be > 0";
+									throw NxsException(errormsg, token);
+									}
+								unsigned currNT = taxa->GetNumLabelsCurrentlyStored();
+								unsigned tn = (unsigned) dummy;
+								//errormsg << "numeric taxon handling -- currNT =  " << currNT << ". dummy= " << dummy << ".\n" ;
+								while (currNT < tn)
+									{
+									NxsString tasstring;
+									tasstring << ++currNT;
+									unsigned valueInd = taxa->AppendNewLabel(tasstring);
+									capNameToInd[tasstring] = valueInd;
+									//errormsg << "numeric taxon handling -- registering " << tasstring << " to " << valueInd << " mapping.\n";
+									}
+								std::map<std::string, unsigned>::const_iterator ttWithAdditions = capNameToInd.find(ucl);
+								unsigned indWithAdditions = (ttWithAdditions == capNameToInd.end() ? UINT_MAX : ttWithAdditions->second);
+								if (indWithAdditions == UINT_MAX)
+									{
+									errormsg << "Trees are being read in a mode that treats integer taxon labels as the number of the taxon - only numeric taxon labels were expected (the lookup table for "<< ucl << " failed to yield a hit, indicating that some non-numeric labels have been registered at some point)";
+									throw NxsException(errormsg, token);
+									}
+								//std::cerr << "1 taxaEncountered.insert " << indWithAdditions << "\n";
+								taxaEncountered.insert(indWithAdditions);
+								nchildren.top() += 1;
+								toAppend += (1 + indWithAdditions);
+								}
+							else
+								{
+								if (treatIntegerLabelsAsNumbers)
+									{
+									if (previousAllIntegerLabels)
+										{
+										errormsg << "Trees are being read in a mode that treats integer taxon labels as the number of the taxon. The mixing of integer and non-integer labels (such as \"" << t << "\") is not supported";
+										throw NxsException(errormsg, token);
+										}
+									previousNonIntegerLabels = true;
+									}
+								std::string tasstring(taxaLabelPtr->c_str());
+								unsigned valueInd = taxa->AppendNewLabel(tasstring);
+								if (allowNumericInterpretationOfTaxLabels)
+									{
+									NxsString numV;
+									numV += (valueInd+1);
+									if (capNameToInd.find(numV) == capNameToInd.end())
+										capNameToInd[numV] = valueInd;
+									}
+								if (!respectCase)
+									NxsString::to_upper(tasstring);
+								capNameToInd[tasstring] = valueInd;
+								//std::cerr << "2 taxaEncountered.insert " << valueInd << "for " << tasstring << "\n";
+								taxaEncountered.insert(valueInd);
+								nchildren.top() += 1;
+								toAppend += (1 + valueInd);
+								}
+							}
+						else
+							{
+							bool firstTaxonAdded = true;
+							for (std::set<unsigned>::const_iterator cit = csinds.begin(); cit != csinds.end(); ++cit)
+								{
+								if (taxaEncountered.find(*cit) != taxaEncountered.end())
+									{
+									errormsg << "Taxon number " << *cit + 1 << " (one of the members of the taxset " << *taxaLabelPtr << ") has already been encountered in this tree. Duplication of taxa in a tree is prohibited.";
+									throw NxsException(errormsg, token);
+									}
+								//std::cerr << "3 taxaEncountered.insert " << *cit << "\n";
+								taxaEncountered.insert(*cit);
+								nchildren.top() += 1;
+								if (!firstTaxonAdded)
+									toAppend.append(1, ',');
+								toAppend += (1 + *cit);
+								firstTaxonAdded = false;
+								}
+							if (nadded > 1)
+								{
+								taxsetRead = true;
+								someMissingEdgeLens = true;
+								}
+							}
+						}
+					else
+						{
+						//std::cerr << "4 taxaEncountered.insert " << ind << "\n";
+						taxaEncountered.insert(ind);
+						nchildren.top() += 1;
+						toAppend += (1 + ind);
+						}
+					}
+				newickStream << toAppend;
+				prevToken = NXS_TREE_CLADE_NAME_TOKEN;
+				}
+			}
+        if (allowUnquotedSpaces)
+	        token.SetLabileFlagBit(NxsToken::spaceDoesNotBreakToken);
+
+		token.GetNextToken();
+		}
+	td.flags |= NxsFullTreeDescription::NXS_TREE_PROCESSED;
+	if (someHaveEdgeLens)
+		{
+		flags |= NxsFullTreeDescription::NXS_HAS_SOME_EDGE_LENGTHS_BIT;
+		if (someRealEdgeLens)
+			{
+			flags &= ~(NxsFullTreeDescription::NXS_INT_EDGE_LENGTHS_BIT);
+			td.minDblEdgeLen = minDblEdgeLen;
+			}
+		else
+			{
+			flags |= NxsFullTreeDescription::NXS_INT_EDGE_LENGTHS_BIT;
+			td.minIntEdgeLen = minIntEdgeLen;
+			}
+		}
+	td.newick = newickStream.str();
+	if (someMissingEdgeLens)
+		flags |= NxsFullTreeDescription::NXS_MISSING_SOME_EDGE_LENGTHS_BIT;
+	if (hasPolytomies)
+		flags |= NxsFullTreeDescription::NXS_HAS_POLYTOMY_BIT;
+	if (hasDegTwoNodes)
+		flags |= NxsFullTreeDescription::NXS_HAS_DEG_TWO_NODES_BIT;
+	if (hasInternalLabels)
+		{
+		flags |= NxsFullTreeDescription::NXS_HAS_INTERNAL_NAMES_BIT;
+		if (hasInternalLabelsNotInTaxa)
+			flags |= NxsFullTreeDescription::NXS_HAS_NEW_INTERNAL_NAMES_BIT;
+		if (hasInternalLabelsInTaxa)
+			flags |= NxsFullTreeDescription::NXS_KNOWN_INTERNAL_NAMES_BIT;
+		}
+	if (NHXComments)
+		flags |= NxsFullTreeDescription::NXS_HAS_NHX_BIT;
+	if (taxaEncountered.size() == taxa->GetMaxIndex() + 1)
+		flags |= NxsFullTreeDescription::NXS_HAS_ALL_TAXA_BIT;
+	}
+
+void NxsTreesBlock::ProcessTree(NxsFullTreeDescription & ftd) const
+	{
+	if (ftd.flags & NxsFullTreeDescription::NXS_TREE_PROCESSED)
+		return;
+	ftd.newick.append(1, ';');
+	const std::string incomingNewick = ftd.newick;
+	ftd.newick.clear();
+	istringstream newickstream(incomingNewick);
+	NxsToken token(newickstream);
+	if (ftd.RequiresNewickNameTokenizing())
+		{
+		token.UseNewickTokenization(true);
+		}
+	ProcessTokenStreamIntoTree(token,
+	                           ftd,
+	                           taxa,
+	                           capNameToInd,
+	                           constructingTaxaBlock,
+	                           nexusReader,
+	                           false,
+	                           validateInternalNodeLabels,
+	                           treatIntegerLabelsAsNumbers,
+	                           allowNumericInterpretationOfTaxLabels,
+	                           allowUnquotedSpaces,
+	                           disambiguateDuplicateNames);
+	}
+
+void NxsTreesBlock::HandleTreeCommand(NxsToken &token, bool rooted)
+	{
+	NCL_ASSERT(taxa);
+	token.GetNextToken();
+	if (token.Equals("*"))
+		{
+		defaultTreeInd = (unsigned)trees.size();
+		token.GetNextToken();
+		}
+	NxsString treeName = token.GetToken();
+	DemandEquals(token, "after tree name in TREE command");
+	file_pos fp = 0;
+	int fline = (int)token.GetFileLine();
+	int fcol = (int)token.GetFileColumn();
+	fp = token.GetFilePosition();
+	try {
+		// This should be either a tree description or a command comment specifying
+		// whether this tree is to be rooted ([&R]) or unrooted ([&U]).
+		//
+		token.SetLabileFlagBit(NxsToken::saveCommandComments);
+		token.SetLabileFlagBit(NxsToken::parentheticalToken);
+		token.GetNextToken();
+		NxsString s = token.GetToken();
+		if (!s.empty() && s[0] == '&')
+			{
+			if (s[1] == 'R' || s[1] == 'r')
+				rooted = true;
+			else if (s[1] == 'U' || s[1] == 'u')
+				rooted = false;
+			else
+				{
+				errormsg << "[" << token.GetToken() << "] is not a valid command comment in a TREE command";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			// now grab the tree description
+			token.SetLabileFlagBit(NxsToken::parentheticalToken);
+			token.GetNextToken();
+			s = token.GetToken();
+			}
+		if (!s.empty() && s[0] != '(')
+			{
+			errormsg << "Expecting command comment or tree description in TREE (or UTREE) command, but found " << token.GetToken() << " instead";
+			throw NxsException(errormsg);
+			}
+		}
+	catch (NxsX_UnexpectedEOF &)
+		{
+		errormsg << "Unexpected end of file in tree description.\n";
+		errormsg << "This probably indicates that the parentheses in the newick description are not balanced, and one or more closing parentheses are needed.";
+		throw NxsException(errormsg, fp, fline, fcol);
+		}
+	std::string mt;
+	int f = (rooted ? NxsFullTreeDescription::NXS_IS_ROOTED_BIT : 0);
+	trees.push_back(NxsFullTreeDescription(mt, treeName, f));
+	NxsFullTreeDescription & td = trees[trees.size() -1];
+	ReadTreeFromOpenParensToken(td, token);
+	}
+
+void NxsTreesBlock::ReadTreeFromOpenParensToken(NxsFullTreeDescription &td, NxsToken & token)
+	{
+	if (this->useNewickTokenizingDuringParse)
+		{
+		token.UseNewickTokenization(true);
+		td.SetRequiresNewickNameTokenizing(true);
+		}
+	try {
+		file_pos fp = 0;
+		int fline = (int)token.GetFileLine();
+		int fcol = (int)token.GetFileColumn();
+		ostringstream newickStream;
+		newickStream << token.GetTokenReference();
+		token.GetNextToken();
+		const std::vector<NxsComment> & ecs = token.GetEmbeddedComments();
+		for (std::vector<NxsComment>::const_iterator ecsIt = ecs.begin(); ecsIt != ecs.end(); ++ecsIt)
+			ecsIt->WriteAsNexus(newickStream);
+		while (!token.Equals(";"))
+			{
+			if (token.Equals("(") || token.Equals(")") || token.Equals(","))
+				GenerateUnexpectedTokenNxsException(token, "root taxon information");
+			newickStream << NxsString::GetEscaped(token.GetTokenReference());
+			if (allowUnquotedSpaces) {
+			    token.SetLabileFlagBit(NxsToken::spaceDoesNotBreakToken);
+			}
+			token.GetNextToken();
+			const std::vector<NxsComment> & iecs = token.GetEmbeddedComments();
+			for (std::vector<NxsComment>::const_iterator iecsIt = iecs.begin(); iecsIt != iecs.end(); ++iecsIt)
+				iecsIt->WriteAsNexus(newickStream);
+			}
+		td.newick = newickStream.str();
+		if (processAllTreesDuringParse)
+			{
+			try
+				{
+				ProcessTree(td);
+				if (this->processedTreeValidationFunction)
+					{
+					if (!this->processedTreeValidationFunction(td, this->ptvArg, this))
+						trees.pop_back();
+					}
+				}
+			catch (NxsException &x)
+				{
+				x.pos += fp;
+				x.line += fline - 1; /*both tokenizers start at 1 instead of zero, so we need to decrement the line */
+				x.col += fcol;
+				throw x;
+				}
+			}
+		}
+	catch (...)
+		{
+		if (this->useNewickTokenizingDuringParse)
+			token.UseNewickTokenization(false);
+		throw;
+		}
+	if (this->useNewickTokenizingDuringParse)
+		token.UseNewickTokenization(false);
+	}
+/*!
+	This function provides the ability to read everything following the block name (which is read by the NxsReader
+	object) to the END or ENDBLOCK command. Characters are read from the input stream `in'. Overrides the abstract
+	virtual function in the base class.
+*/
+void NxsTreesBlock::Read(
+  NxsToken &token)	/* the token used to read from `in' */
+	{
+	isEmpty = false;
+	isUserSupplied = true;
+	DemandEndSemicolon(token, "BEGIN TREES");
+	//AssureTaxaBlock(createImpliedBlock, token, "BEGIN TREES");
+	bool readTranslate = false;
+	bool readTree = false;
+	errormsg.clear();
+	constructingTaxaBlock = false;
+	newtaxa = false;
+	capNameToInd.clear();
+	unsigned numSigInts = NxsReader::getNumSignalIntsCaught();
+	const bool checkingSignals = NxsReader::getNCLCatchesSignals();
+
+	for (;;)
+		{
+		token.GetNextToken();
+		if (checkingSignals && NxsReader::getNumSignalIntsCaught() != numSigInts)
+			{
+			throw NxsSignalCanceledParseException("Reading TREES Block");
+			}
+		NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
+		if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
+			{
+			if (constructingTaxaBlock)
+				{
+				if (taxa && taxa->GetNTax() > 0)
+					newtaxa = true;
+				constructingTaxaBlock = false; /* we don't allow the construction of taxa blocks over repeated readings or after the block has been read */
+				}
+			return;
+			}
+		if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
+			{
+			if (token.Equals("TRANSLATE"))
+				{
+				if (readTree)
+					WarnDangerousContent("TRANSLATE command must precede any TREE commands in a TREES block", token);
+				if (readTranslate)
+					{
+					WarnDangerousContent("Only one TRANSLATE command may be read in a TREES block", token);
+					capNameToInd.clear();
+					}
+				readTranslate = true;
+				ConstructDefaultTranslateTable(token, "TRANSLATE");
+				HandleTranslateCommand(token);
+				}
+			else
+				{
+				bool utreeCmd = token.Equals("UTREE");
+				bool treeCmd = token.Equals("TREE");
+				bool readAsRooted = (treeCmd && this->treatAsRootedByDefault);
+				if (utreeCmd || treeCmd)
+					{
+					if (!readTranslate && ! readTree)
+						ConstructDefaultTranslateTable(token, token.GetTokenReference().c_str());
+					readTree = true;
+					HandleTreeCommand(token, readAsRooted);
+					}
+				else
+					SkipCommand(token);
+				}
+			}
+		}
+	}
+/*! Returns the description of the tree with index `i' where i is in [0..ntrees).
+	Node numbers will be translated to names in the resulting tree description.
+	Use GetTreeDescription if translation is not desired.
+
+	Note that if the Names are complex they may complicate simple parses of the tree
+	For example "A (" is a valid NEXUS taxon name (though one that I hope no one is crazy enough to use.
+*/
+NxsString NxsTreesBlock::GetTranslatedTreeDescription(
+  unsigned i)	/* the index of the tree for which the description is to be returned */
+	{
+	NCL_ASSERT(i < trees.size());
+	NCL_ASSERT(taxa);
+	NxsFullTreeDescription & ftd = trees.at(i);
+	ProcessTree(ftd);
+	std::string incomingNewick = ftd.newick;
+	incomingNewick.append(1, ';');
+	istringstream newickstream(incomingNewick);
+	NxsToken token(newickstream);
+	if (ftd.RequiresNewickNameTokenizing())
+		{
+		token.UseNewickTokenization(true);
+		}
+
+	token.GetNextToken();
+	if (!token.Equals("("))
+		{
+		errormsg << "Expecting a ( to start the tree description, but found " << token.GetTokenReference();
+		throw NxsException(errormsg, token);
+		}
+	int prevToken = NXS_TREE_OPEN_PARENS_TOKEN;
+	long taxIndLong;
+	const unsigned ntax = taxa->GetNTaxTotal();
+	ostringstream translated;
+	for (;;)
+		{
+		const std::vector<NxsComment> & ecs = token.GetEmbeddedComments();
+		for (std::vector<NxsComment>::const_iterator ecsIt = ecs.begin(); ecsIt != ecs.end(); ++ecsIt)
+			ecsIt->WriteAsNexus(translated);
+		if (token.Equals(";"))
+			break;
+		const NxsString & t = token.GetTokenReference();
+		bool handled;
+		handled = false;
+		if (t.length() == 1)
+			{
+			if (t[0] == '(')
+				{
+				translated <<  '(';
+				prevToken = NXS_TREE_OPEN_PARENS_TOKEN;
+				handled = true;
+				}
+			else if (t[0] == ')')
+				{
+				translated << ')';
+				prevToken = NXS_TREE_CLOSE_PARENS_TOKEN;
+				handled = true;
+				}
+			else if (t[0] == ':')
+				{
+				translated << ':';
+				prevToken = NXS_TREE_COLON_TOKEN;
+				handled = true;
+				token.SetLabileFlagBit(NxsToken::hyphenNotPunctuation); // this allows us to deal with sci. not. in branchlengths (and negative branch lengths).
+				}
+			else if (t[0] == ',')
+				{
+				translated << ',';
+				prevToken = NXS_TREE_COMMA_TOKEN;
+				handled = true;
+				}
+			}
+		if (!handled)
+			{
+			if (prevToken == NXS_TREE_COLON_TOKEN)
+				{
+				translated << t;
+				prevToken = NXS_TREE_BRLEN_TOKEN;
+				}
+			else
+				{
+				if (NxsString::to_long(t.c_str(), &taxIndLong) && taxIndLong <= (long) ntax && taxIndLong > 0)
+					translated << NxsString::GetEscaped(taxa->GetTaxonLabel((unsigned) taxIndLong - 1));
+				else if (prevToken == NXS_TREE_CLOSE_PARENS_TOKEN)
+					translated << t;
+				else
+					{
+					errormsg << "Expecting a taxon index in a tree description, but found " << t;
+					throw NxsException(errormsg, token);
+					}
+				}
+			}
+		token.GetNextToken();
+		}
+	return NxsString(translated.str().c_str());
+	}
+
+void NxsTreesBlock::ReadPhylipTreeFile(NxsToken & token)
+	{
+	bool prevAIN = allowImplicitNames;
+	bool prevUNTDP = useNewickTokenizingDuringParse;
+	allowImplicitNames = true;
+	bool firstTree = true;
+	const bool prevEOFAllowed = token.GetEOFAllowed();
+	token.SetEOFAllowed(false);
+	try
+		{
+		for (;;)
+			{
+			token.SetLabileFlagBit(NxsToken::saveCommandComments);
+			token.SetLabileFlagBit(NxsToken::parentheticalToken);
+			token.GetNextToken();
+			NxsString s = token.GetToken();
+			bool rooted = false;
+			if (!s.empty() && s[0] == '&')
+				{
+				if (s[1] == 'R' || s[1] == 'r')
+					rooted = true;
+				else if (s[1] == 'U' || s[1] == 'u')
+					rooted = false;
+				else
+					{
+					errormsg << "[" << token.GetToken() << "] is not a valid command comment in a TREE command";
+					throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+					}
+				// now grab the tree description
+				token.SetLabileFlagBit(NxsToken::parentheticalToken);
+				token.GetNextToken();
+				s = token.GetToken();
+				}
+			if (!s.empty() && s[0] != '(')
+				{
+				errormsg << "Expecting a tree description, but found \"" << token.GetToken() << "\" instead";
+				throw NxsException(errormsg);
+				}
+			if (firstTree)
+				{
+				ConstructDefaultTranslateTable(token, token.GetTokenReference().c_str());
+				firstTree = false;
+				}
+			int f = (rooted ? NxsFullTreeDescription::NXS_IS_ROOTED_BIT : 0);
+			std::string mt;
+			trees.push_back(NxsFullTreeDescription(mt, mt, f));
+			NxsFullTreeDescription & td = trees[trees.size() -1];
+			this->useNewickTokenizingDuringParse = true;
+			ReadTreeFromOpenParensToken(td, token);
+			this->useNewickTokenizingDuringParse = prevUNTDP;
+			//this->constructingTaxaBlock = false; // we have to signal that we are done constructing the TAXA block
+			}
+		}
+	catch (NxsX_UnexpectedEOF &)
+		{
+		allowImplicitNames = prevAIN;
+		useNewickTokenizingDuringParse = prevUNTDP;
+		token.SetEOFAllowed(prevEOFAllowed);
+		this->constructingTaxaBlock = false; // we have to signal that we are done constructing the TAXA block
+		if (firstTree)
+			{
+			errormsg << "Unexpected end of file in tree description.\n";
+			errormsg << "This probably indicates that the parentheses in the newick description are not balanced, and one or more closing parentheses are needed.";
+			throw NxsException(errormsg);
+			}
+		}
+	catch (...)
+		{
+		allowImplicitNames = prevAIN;
+		useNewickTokenizingDuringParse = prevUNTDP;
+		token.SetEOFAllowed(prevEOFAllowed);
+		this->constructingTaxaBlock = false; // we have to signal that we are done constructing the TAXA block
+		throw;
+		}
+	this->constructingTaxaBlock = false; // we have to signal that we are done constructing the TAXA block
+	token.SetEOFAllowed(prevEOFAllowed);
+	allowImplicitNames = prevAIN;
+	useNewickTokenizingDuringParse = prevUNTDP;
+	}
diff --git a/src/nxsunalignedblock.cpp b/src/nxsunalignedblock.cpp
new file mode 100644
index 0000000..6cee5eb
--- /dev/null
+++ b/src/nxsunalignedblock.cpp
@@ -0,0 +1,915 @@
+//	Copyright (C) 2007 Paul O. Lewis
+//
+//	This file is part of NCL (Nexus Class Library) version 2.0.
+//
+//	NCL is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//
+//	NCL is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+//	GNU General Public License for more details.
+//
+//	You should have received a copy of the GNU General Public License
+//	along with NCL; if not, write to the Free Software Foundation, Inc.,
+//	59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#include <climits>
+#include "ncl/nxsunalignedblock.h"
+#include "ncl/nxsreader.h"
+using namespace std;
+
+//@POL Note: This file is not yet ready for use (Paul Lewis, 19-May-2007)
+
+/*!
+	Initializes `NCL_BLOCKTYPE_ATTR_NAME' to "UNALIGNED", `taxa' to `tb', `assumptionsBlock' to `ab', `ntax' and `ntaxTotal' to 0, `newtaxa'
+	and `respectingCase' to false, `labels' to true, `datatype' to `NxsUnalignedBlock::standard', `missing' to '?', and
+	`taxonPos' and `activeTaxon' to NULL. The `equates' map and `uMatrix' vector are both cleared. The ResetSymbols
+	function is called to reset the `symbols' data member.
+*/
+NxsUnalignedBlock::NxsUnalignedBlock(
+  NxsTaxaBlockAPI * tb)			/* is the taxa block object to consult for taxon labels */
+  : NxsBlock(),
+  NxsTaxaBlockSurrogate(tb, NULL)
+	{
+	NCL_BLOCKTYPE_ATTR_NAME = "UNALIGNED";
+	Reset();
+	}
+
+/*!
+	Deletes any memory allocated to the arrays `symbols', `taxonPos' and `activeTaxon'. Flushes the containers
+	`equates', and `uMatrix'.
+*/
+NxsUnalignedBlock::~NxsUnalignedBlock()
+	{
+	Reset();
+	}
+
+/*!
+	Returns NxsUnalignedBlock object to the state it was in when first created. See NxsUnalignedBlock constructor for
+	details.
+*/
+void NxsUnalignedBlock::Reset()
+	{
+	NxsBlock::Reset();
+	ResetSurrogate();
+	nTaxWithData = 0;
+	newtaxa = false;
+	respectingCase = false;
+	labels = true;
+	originalDatatype = datatype = NxsCharactersBlock::standard;
+	missing = '?';
+	gap = '\0';
+	ResetSymbols();	// also resets equates
+	nChar = 0;
+	uMatrix.clear();
+	}
+
+bool NxsUnalignedBlock::TaxonIndHasData(
+  unsigned taxInd) const /* the character in question, in the range [0..`nchar') */
+	{
+	return (taxInd < uMatrix.size() && !uMatrix[taxInd].empty());
+	}
+
+std::string NxsUnalignedBlock::GetMatrixRowAsStr(const unsigned rowIndex) const /* output stream on which to print matrix */
+	{
+	if (!this->TaxonIndHasData(rowIndex))
+		return std::string();
+	std::ostringstream o;
+	WriteStatesForMatrixRow(o, rowIndex);
+	return o.str();
+	}
+
+
+void NxsUnalignedBlock::ResetDatatypeMapper()
+	{
+	mapper = NxsDiscreteDatatypeMapper(datatype, symbols, missing, gap, matchchar, respectingCase, equates);
+	datatype = mapper.GetDatatype();
+	}
+/*!
+	Resets standard symbol set after a change in `datatype' is made. Also flushes equates list and installs standard
+	equate macros for the current `datatype'.
+*/
+void NxsUnalignedBlock::ResetSymbols()
+	{
+	switch(datatype)
+		{
+		case NxsCharactersBlock::nucleotide:
+		case NxsCharactersBlock::dna:
+			symbols =  "ACGT";
+			break;
+
+		case NxsCharactersBlock::rna:
+			symbols = "ACGU";
+			break;
+
+		case NxsCharactersBlock::protein:
+			symbols =  "ACDEFGHIKLMNPQRSTVWY*";
+			break;
+
+		default:
+			symbols = "01";
+		}
+
+	equates.clear();
+	this->equates = NxsCharactersBlock::GetDefaultEquates(datatype);
+	ResetDatatypeMapper();
+	}
+/*!
+	Provides a dump of the contents of the `uMatrix' variable. Useful for testing whether data is being read as
+	expected. If `marginText' is NULL, output is flush left. If each line of output should be prefaced with
+	a tab character, specify "\t" for `marginText'.
+*/
+void NxsUnalignedBlock::DebugShowMatrix(
+  std::ostream & out,		/* is the output stream on which to print */
+  const char * marginText) NCL_COULD_BE_CONST /* is text printed first on each line */ /*v2.1to2.2 1 */
+	{
+	if (!taxa)
+		return;
+	unsigned width = taxa->GetMaxTaxonLabelLength();
+	const unsigned ntt = GetNTaxTotal();
+	NCL_ASSERT(uMatrix.size() >= ntt);
+	for (unsigned i = 0; i < ntt; i++)
+		{
+		const NxsDiscreteStateRow * row = GetDiscreteMatrixRow(i);
+		if (row && !(row->empty()))
+			{
+			if (marginText != NULL)
+				out << marginText;
+			const NxsString currTaxonLabel = taxa->GetTaxonLabel(i); /*v2.1to2.2 4 */
+			out << currTaxonLabel;
+			unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
+			unsigned diff = width - currTaxonLabelLen;
+			std::string spacer(diff+5, ' ');
+			out << spacer;
+			mapper.WriteStateCodeRowAsNexus(out, *row);
+			}
+		}
+	}
+
+/*!
+	Returns a string containing a formatted representation of the state `x'.
+*/
+std::string NxsUnalignedBlock::FormatState(
+  NxsDiscreteDatum d)		/* is the element of `uMatrix' to format */
+  const
+	{
+	if (d.taxInd >= GetNTaxTotal())
+		throw NxsNCLAPIException("Taxon out of range in NxsUnalignedBlock::FormatState");
+	const NxsDiscreteStateRow & row = uMatrix[d.taxInd];
+	if (d.charInd >= row.size())
+		return std::string(1, missing);
+	return mapper.StateCodeToNexusString(row[d.charInd]);
+	}
+
+/*!
+	Returns true if `ch' can be found in the `symbols' array. The value of `respectingCase' is used to determine
+	whether or not the search should be case sensitive. Assumes `symbols' is non-NULL.
+*/
+bool NxsUnalignedBlock::IsInSymbols(
+  char ch)	/* the symbol character to search for */
+	{
+	char char_in_question = (respectingCase ? ch : (char)toupper(ch));
+	for (std::string::const_iterator sIt = symbols.begin(); sIt != symbols.end(); ++sIt)
+		{
+		const char char_in_symbols = (respectingCase ? *sIt : (char)toupper(*sIt));
+		if (char_in_symbols == char_in_question)
+			return true;
+		}
+	return false;
+	}
+
+/*!
+	Called when DIMENSIONS command needs to be parsed from within the UNALIGNED block. Deals with everything after the
+	token DIMENSIONS up to and including the semicolon that terminates the DIMENSIONS command.
+*/
+void NxsUnalignedBlock::HandleDimensions(
+  NxsToken & token)			/* the token used to read from `in' */
+	{
+	unsigned ntaxRead = 0;
+	for (;;)
+		{
+		token.GetNextToken();
+		if (token.Equals("NEWTAXA"))
+			newtaxa = true;
+		else if (token.Equals("NTAX"))
+			{
+			DemandEquals(token, "after NTAX in DIMENSIONS command");
+			ntaxRead = DemandPositiveInt(token, "NTAX");
+			}
+		else if (token.Equals(";"))
+			break;
+		}
+	if (newtaxa)
+		{
+		if (ntaxRead == 0)
+			{
+			errormsg = "DIMENSIONS command must have an NTAX subcommand when the NEWTAXA option is in effect.";
+			throw NxsException(errormsg, token);
+			}
+		AssureTaxaBlock(createImpliedBlock, token, "Dimensions");
+		if (!createImpliedBlock)
+			{
+			taxa->Reset();
+			if (nexusReader)
+				nexusReader->RemoveBlockFromUsedBlockList(taxa);
+			}
+		taxa->SetNtax(ntaxRead);
+		nTaxWithData = ntaxRead;
+		}
+	else
+		{
+		AssureTaxaBlock(false, token, "Dimensions");
+		const unsigned ntaxinblock = taxa->GetNTax();
+		if (ntaxinblock == 0)
+			{
+			errormsg = "A TAXA block must be read before character data, or the DIMENSIONS command must use the NEWTAXA.";
+			throw NxsException(errormsg, token);
+			}
+		if (ntaxinblock < ntaxRead)
+			{
+			errormsg = "NTAX in UNALIGNED block must be less than or equal to NTAX in TAXA block\nNote: one circumstance that can cause this error is \nforgetting to specify NTAX in DIMENSIONS command when \na TAXA block has not been provided";
+			throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+			}
+		nTaxWithData = (ntaxRead == 0 ? ntaxinblock : ntaxRead);
+		}
+	}
+
+/*!
+	Called when the END or ENDBLOCK command needs to be parsed from within the UNALIGNED block. Does two things:
+~
+	o checks to make sure the next token in the data file is a semicolon
+	o eliminates character labels and character state labels for characters that have been eliminated
+~
+*/
+void NxsUnalignedBlock::HandleEndblock(
+  NxsToken & token)		/* the token used to read from `in' */
+	{
+	DemandEndSemicolon(token, "END or ENDBLOCK");
+	}
+
+/*!
+	Called when FORMAT command needs to be parsed from within the DIMENSIONS block. Deals with everything after the
+	token FORMAT up to and including the semicolon that terminates the FORMAT command.
+*/
+void NxsUnalignedBlock::HandleFormat(
+  NxsToken & token)	/* is the token used to read from `in' */
+	{
+	bool standardDataTypeAssumed = false;
+	bool ignoreCaseAssumed = false;
+
+	for (;;)
+		{
+		token.GetNextToken();
+
+		if (token.Equals("DATATYPE"))
+			{
+			DemandEquals(token, "after keyword DATATYPE");
+			// This should be one of the following: STANDARD, DNA, RNA, NUCLEOTIDE or PROTEIN
+			token.GetNextToken();
+
+			if (token.Equals("STANDARD"))
+				datatype = NxsCharactersBlock::standard;
+			else if (token.Equals("DNA"))
+				datatype = NxsCharactersBlock::dna;
+			else if (token.Equals("RNA"))
+				datatype = NxsCharactersBlock::rna;
+			else if (token.Equals("NUCLEOTIDE"))
+				datatype = NxsCharactersBlock::nucleotide;
+			else if (token.Equals("PROTEIN"))
+				datatype = NxsCharactersBlock::protein;
+			else
+				{
+				errormsg = token.GetToken();
+				errormsg += " is not a valid DATATYPE within a ";
+				errormsg += NCL_BLOCKTYPE_ATTR_NAME;
+				errormsg += " block";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			if (standardDataTypeAssumed && datatype != NxsCharactersBlock::standard)
+				{
+				errormsg = "DATATYPE must be specified first in FORMAT command";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			originalDatatype = datatype;
+			ResetSymbols();
+			}
+		else if (token.Equals("RESPECTCASE"))
+			{
+			if (ignoreCaseAssumed)
+				{
+				errormsg = "RESPECTCASE must be specified before MISSING and SYMBOLS in FORMAT command";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			standardDataTypeAssumed = true;
+			respectingCase = true;
+			}
+		else if (token.Equals("MISSING"))
+			{
+			DemandEquals(token, "after keyword MISSING");
+			// This should be the missing data symbol (single character)
+			token.GetNextToken();
+
+			if (token.GetTokenLength() != 1)
+				{
+				errormsg = "MISSING symbol should be a single character, but ";
+				errormsg += token.GetToken();
+				errormsg += " was specified";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			else if (token.IsPunctuationToken() && !token.IsPlusMinusToken())
+				{
+				errormsg = "MISSING symbol specified cannot be a punctuation token (";
+				errormsg += token.GetToken();
+				errormsg += " was specified)";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+			else if (token.IsWhitespaceToken())
+				{
+				errormsg = "MISSING symbol specified cannot be a whitespace character (";
+				errormsg += token.GetToken();
+				errormsg += " was specified)";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+
+			missing = token.GetToken()[0];
+
+			ignoreCaseAssumed = true;
+			standardDataTypeAssumed = true;
+			}
+		else if (token.Equals("SYMBOLS") || token.Equals("SYMBOL"))
+			{
+			    //NxsDiscreteStateCell numDefStates;
+			unsigned maxNewStates;
+			switch(datatype)
+				{
+				case NxsCharactersBlock::dna:
+				case NxsCharactersBlock::rna:
+				case NxsCharactersBlock::nucleotide:
+				    //numDefStates = 4;
+					maxNewStates = NCL_MAX_STATES-4;
+					break;
+				case NxsCharactersBlock::protein:
+				    //numDefStates = 21;
+					maxNewStates = NCL_MAX_STATES-21;
+					break;
+				default:
+				    //numDefStates = 0; // replace symbols list for standard datatype
+					symbols[0] = '\0';
+					maxNewStates = NCL_MAX_STATES;
+				}
+			DemandEquals(token, "after keyword SYMBOLS");
+
+			// This should be the symbols list
+			token.SetLabileFlagBit(NxsToken::doubleQuotedToken);
+			token.GetNextToken();
+
+			token.StripWhitespace();
+			unsigned numNewSymbols = token.GetTokenLength();
+
+			if (numNewSymbols > maxNewStates)
+				{
+				errormsg = "SYMBOLS defines ";
+				errormsg += numNewSymbols;
+				errormsg += " new states but only ";
+				errormsg += maxNewStates;
+				errormsg += " new states allowed for this DATATYPE";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+
+			NxsString to = token.GetToken();
+			unsigned tlen = (unsigned)to.size();
+			NxsString processedS;
+			// Check to make sure user has not used any symbols already in the
+			// default symbols list for this data type
+			for (unsigned i = 0; i < tlen; i++)
+				{
+				if (IsInSymbols(to[i]))
+					{
+					errormsg = "The character ";
+					errormsg << to[i] << " defined in SYMBOLS is predefined for this DATATYPE and shoud not occur in a SYMBOLS subcommand of a FORMAT command.";
+					if (nexusReader)
+						{
+						nexusReader->NexusWarnToken(errormsg, NxsReader::SKIPPING_CONTENT_WARNING, token);
+						errormsg.clear();
+						}
+					}
+				else
+					processedS += to[i];
+				}
+
+			// If we've made it this far, go ahead and add the user-defined
+			// symbols to the end of the list of predefined symbols
+			symbols.append(processedS);
+
+			ignoreCaseAssumed = true;
+			standardDataTypeAssumed = true;
+			}
+
+		else if (token.Equals("EQUATE"))
+			{
+			DemandEquals(token, "after keyword EQUATE");
+
+			// This should be a double-quote character
+			token.GetNextToken();
+
+			if (!token.Equals("\""))
+				{
+				errormsg = "Expecting '\"' after keyword EQUATE but found ";
+				errormsg += token.GetToken();
+				errormsg += " instead";
+				throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+				}
+
+			// Loop until second double-quote character is encountered
+			for (;;)
+				{
+				token.GetNextToken();
+				if (token.Equals("\""))
+					break;
+
+				// If token is not a double-quote character, then it must be the equate symbol (i.e., the
+				// character to be replaced in the data matrix)
+				if (token.GetTokenLength() != 1)
+					{
+					errormsg = "Expecting single-character EQUATE symbol but found ";
+					errormsg += token.GetToken();
+					errormsg += " instead";
+					throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+					}
+
+				// Check for bad choice of equate symbol
+				NxsString t = token.GetToken();
+				const char ch = t[0];
+				bool badEquateSymbol = false;
+
+				// The character '^' cannot be an equate symbol
+				if (ch == '^')
+					badEquateSymbol = true;
+
+				// Equate symbols cannot be punctuation (except for + and -)
+				if (token.IsPunctuationToken() && !token.IsPlusMinusToken())
+					badEquateSymbol = true;
+
+				// Equate symbols cannot be same as matchchar, missing, or gap
+				if (ch == missing || ch == gap)
+					badEquateSymbol = true;
+
+				// Equate symbols cannot be one of the state symbols currently defined
+				if (IsInSymbols(ch))
+					badEquateSymbol = true;
+
+				if (badEquateSymbol)
+					{
+					errormsg = "EQUATE symbol specified (";
+					errormsg += token.GetToken();
+					errormsg += ") is not valid; must not be same as missing, \nmatchchar, gap, state symbols, or any of the following: ()[]{}/\\,;:=*'\"`<>^";
+					throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+					}
+
+				NxsString k = token.GetToken();
+
+				DemandEquals(token, "in EQUATE definition");
+
+				// This should be the token to be substituted in for the equate symbol
+				token.SetLabileFlagBit(NxsToken::parentheticalToken);
+				token.SetLabileFlagBit(NxsToken::curlyBracketedToken);
+				token.GetNextToken();
+				NxsString v = token.GetToken();
+
+				// Add the new equate association to the equates list
+				equates[ch] = v;
+				}
+
+			standardDataTypeAssumed = true;
+			}
+		else if (token.Equals("LABELS"))
+			{
+			labels = true;
+			standardDataTypeAssumed = true;
+			}
+		else if (token.Equals("NOLABELS"))
+			{
+			labels = false;
+			standardDataTypeAssumed = true;
+			}
+		else if (token.Equals(";"))
+			{
+			break;
+			}
+		}
+	ResetDatatypeMapper();
+	}
+
+/*!
+	Called from HandleMatrix function to read in the next state. Returns true if next token encountered is a comma,
+	false otherwise. A comma signals the end of data for the current taxon in an UNALIGNED block.
+*/
+bool NxsUnalignedBlock::HandleNextState(
+  NxsToken & token,			/* is the token used to read from `in' */
+  unsigned taxNum,				/* is the row in range [0..ntax) (used for error reporting only) */
+  unsigned charNum,				/* is the column (used for error reporting only) */
+  NxsDiscreteStateRow & row, const NxsString &nameStr)	/* is the container for storing new state */
+	{
+	token.SetLabileFlagBit(NxsToken::parentheticalToken);
+	token.SetLabileFlagBit(NxsToken::curlyBracketedToken);
+	token.SetLabileFlagBit(NxsToken::singleCharacterToken);
+
+	token.GetNextToken();
+
+	if (token.Equals(",") || token.Equals(";"))
+		return false;
+	const NxsString stateAsNexus = token.GetToken();
+	const NxsDiscreteStateCell stateCode = mapper.EncodeNexusStateString(stateAsNexus, token, taxNum, charNum, NULL, nameStr);
+	if (charNum < row.size())
+		row[charNum] = stateCode;
+	else
+		{
+		while (charNum < row.size())
+			row.push_back(NXS_INVALID_STATE_CODE);
+		row.push_back(stateCode);
+		}
+	return true;
+	}
+
+/*!
+	Called when MATRIX command needs to be parsed from within the UNALIGNED block. Deals with everything after the
+	token MATRIX up to and including the semicolon that terminates the MATRIX command.
+*/
+void NxsUnalignedBlock::HandleMatrix(
+  NxsToken & token)	/* is the token used to read from `in' */
+	{
+	if (taxa == NULL)
+		{
+		AssureTaxaBlock(false, token, "Matrix");
+		unsigned ntax = taxa->GetNTax();
+		if (ntax == 0)
+			{
+			errormsg = "Must precede ";
+			errormsg += NCL_BLOCKTYPE_ATTR_NAME;
+			errormsg += " block with a TAXA block or specify NEWTAXA and NTAX in the DIMENSIONS command";
+			throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+			}
+		}
+	const unsigned ntax = taxa->GetNTax();
+	uMatrix.clear();
+	uMatrix.resize(ntax);
+	unsigned indOfTaxInMemory = 0;
+	std::vector<unsigned> toInMem(nTaxWithData, UINT_MAX);
+	const unsigned ntlabels = taxa->GetNumTaxonLabels();
+	errormsg.clear();
+	bool taxaBlockNeedsLabels = (ntlabels == 0);
+	if (!taxaBlockNeedsLabels && ntlabels < nTaxWithData)
+		{
+		errormsg << "Not enough taxlabels are known to read characters for " << nTaxWithData << " taxa in the Matrix command.";
+		throw NxsException(errormsg, token);
+		}
+	for (unsigned indOfTaxInCommand = 0; indOfTaxInCommand < nTaxWithData; indOfTaxInCommand++)
+		{
+		NxsString nameStr;
+		if (labels)
+			{
+			token.GetNextToken();
+			nameStr = token.GetToken();
+			if (taxaBlockNeedsLabels)
+				{
+				if (taxa->IsAlreadyDefined(nameStr))
+					{
+					errormsg << "Data for this taxon (" << nameStr << ") has already been saved";
+					throw NxsException(errormsg, token);
+					}
+				try {
+					indOfTaxInMemory = taxa->AddTaxonLabel(nameStr);
+					}
+				catch (NxsException &x)
+					{
+					if (nameStr == ";")
+						{
+						errormsg << "Unexpected ; after only " << indOfTaxInCommand << " taxa were read (expecting characters for " << nTaxWithData << " taxa).";
+						throw NxsException(errormsg, token);
+						}
+					x.addPositionInfo(token);
+					throw x;
+					}
+				}
+			else
+				{
+				unsigned numOfTaxInMemory = taxa->TaxLabelToNumber(nameStr);
+				if (numOfTaxInMemory == 0)
+					{
+					if (token.Equals(";"))
+						errormsg << "Unexpected ;";
+					else
+						errormsg << "Could not find taxon named " << nameStr << " among stored taxon labels";
+					throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+					}
+				indOfTaxInMemory = numOfTaxInMemory - 1;
+				}
+			}
+		else
+			{
+			indOfTaxInMemory = indOfTaxInCommand;
+			nameStr << 1+indOfTaxInMemory;
+			}
+		if (toInMem[indOfTaxInCommand] != UINT_MAX)
+			{
+			errormsg << "Characters for taxon " << indOfTaxInCommand << " (" << taxa->GetTaxonLabel(indOfTaxInMemory) << ") have already been stored";
+			throw NxsException(errormsg, token);
+			}
+		toInMem[indOfTaxInCommand] = indOfTaxInMemory;
+		NxsDiscreteStateRow * new_row = &uMatrix[indOfTaxInMemory];
+		unsigned charInd = 0;
+		while (HandleNextState(token, indOfTaxInMemory, charInd, *new_row, nameStr))
+			charInd++;
+		}
+	}
+
+
+/*!
+	This function provides the ability to read everything following the block name (which is read by the NxsReader
+	object) to the END or ENDBLOCK statement. Characters are read from the input stream `in'. Overrides the abstract
+	virtual function in the base class.
+*/
+void NxsUnalignedBlock::Read(
+  NxsToken & token)	/* is the token used to read from `in' */
+	{
+	isEmpty = false;
+	isUserSupplied = true;
+
+	// This should be the semicolon after the block name
+	token.GetNextToken();
+	if (!token.Equals(";"))
+		{
+		errormsg = "Expecting ';' after ";
+		errormsg += NCL_BLOCKTYPE_ATTR_NAME;
+		errormsg += " block name, but found ";
+		errormsg += token.GetToken();
+		errormsg += " instead";
+		throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
+		}
+	nTaxWithData = 0;
+
+	for (;;)
+		{
+		token.GetNextToken();
+		NxsBlock::NxsCommandResult res = HandleBasicBlockCommands(token);
+		if (res == NxsBlock::NxsCommandResult(STOP_PARSING_BLOCK))
+			return;
+		if (res != NxsBlock::NxsCommandResult(HANDLED_COMMAND))
+			{
+			if (token.Equals("DIMENSIONS"))
+				HandleDimensions(token);
+			else if (token.Equals("FORMAT"))
+				HandleFormat(token);
+			else if (token.Equals("TAXLABELS"))
+				HandleTaxLabels(token);
+			else if (token.Equals("MATRIX"))
+				HandleMatrix(token);
+			else
+				SkipCommand(token);
+			}
+		}	// for (;;)
+	}
+
+/*!
+	This function outputs a brief report of the contents of this UNALIGNED block. Overrides the abstract virtual
+	function in the base class.
+*/
+void NxsUnalignedBlock::Report(
+  std::ostream & out) NCL_COULD_BE_CONST /* is the output stream to which to write the report */ /*v2.1to2.2 1 */
+	{
+	out << '\n' << NCL_BLOCKTYPE_ATTR_NAME << " block contains ";
+	if (nTaxWithData == 0)
+		out << "no taxa";
+	else if (nTaxWithData == 1)
+		out << "one taxon";
+	else
+		out << nTaxWithData << " taxa";
+
+	out << "\n  Data type is \"" << this->GetDatatypeName() << "\"" << endl;
+
+	if (respectingCase)
+		out << "  Respecting case" << endl;
+	else
+		out << "  Ignoring case" << endl;
+
+	if (labels)
+		out << "  Taxon labels were provided on left side of matrix" << endl;
+	else
+		out << "  No taxon labels were provided on left side of matrix" << endl;
+
+	out << "  Missing data symbol is '" << missing << '\'' << endl;
+	out << "  Valid symbols are: " << symbols << endl;
+
+	int numEquateMacros = (int)equates.size();
+	if (numEquateMacros > 0)
+		{
+		out << "  Equate macros in effect:" << endl;
+		std::map<char, NxsString>::const_iterator i = equates.begin();
+		for (; i != equates.end(); ++i)
+			{
+			out << "    " << (*i).first << " = " << (*i).second << endl;
+			}
+		}
+	else
+		out << "  No equate macros have been defined" << endl;
+
+	out << "  Data matrix:" << endl;
+	DebugShowMatrix(out, "    ");
+	}
+
+/*!
+	Writes out the information in this block in NEXUS format to the specified std::ostream.
+*/
+void NxsUnalignedBlock::WriteAsNexus(
+  std::ostream & out)	/* is the output stream on which to write */
+  const
+	{
+	out << "BEGIN UNALIGNED;\n";
+	WriteBasicBlockCommands(out);
+	if (this->taxa && taxa->GetNumTaxonLabels() > this->nTaxWithData)
+		out << "    DIMENSIONS NTax=" << this->nTaxWithData << ";\n";
+
+	this->WriteFormatCommand(out);
+	this->WriteMatrixCommand(out);
+	WriteSkippedCommands(out);
+	out << "END;\n";
+	}
+
+/*!
+	Writes out the information in the MATRIX command in NEXUS format to the specified std::ostream.
+*/
+void NxsUnalignedBlock::WriteMatrixCommand(
+  std::ostream & out)	/* is the output stream on which to print the matrix */
+  const
+	{
+	NCL_ASSERT(taxa);
+	const unsigned ntax = taxa->GetNTax();
+	unsigned width = taxa->GetMaxTaxonLabelLength();
+	out << "Matrix";
+
+	//std::vector<unsigned> origIndexVec = this->GetOrigMatrixIndicesToWrite();
+	bool first = true;
+	for (unsigned i = 0; i < ntax; ++i)
+		{
+		if (!uMatrix[i].empty())
+			{
+			if (first)
+				out << "\n";
+			else
+				out << ",\n";
+			first = false;
+			NxsString nm = taxa->GetTaxonLabel(i); /*v2.1to2.2 4 */
+			std::string s = nm.c_str();
+			const std::string currTaxonLabel = NxsString::GetEscaped(taxa->GetTaxonLabel(i));
+			out << currTaxonLabel;
+
+			// Print out enough spaces to even up the left edge of the matrix output
+			unsigned currTaxonLabelLen = (unsigned)currTaxonLabel.size();
+			unsigned diff = width - currTaxonLabelLen;
+			for (unsigned k = 0; k < diff + 5; k++)
+				out << ' ';
+
+			WriteStatesForMatrixRow(out, i);
+			}
+		}
+	out << "\n;\n";
+	}
+
+void NxsUnalignedBlock::WriteStatesForMatrixRow(
+  std::ostream &out,				/* the output stream on which to write */
+  unsigned currTaxonIndex) const	/* the taxon, in range [0..`ntax') */
+	{
+	const NxsDiscreteStateRow & row = uMatrix[currTaxonIndex];
+	for (NxsDiscreteStateRow::const_iterator rIt = row.begin(); rIt != row.end(); ++rIt)
+		mapper.WriteStateCodeAsNexusString(out, *rIt);
+	}
+
+
+/*!
+	Writes out the information in the FORMAT command in NEXUS format to the specified std::ostream.
+*/
+void NxsUnalignedBlock::WriteFormatCommand(std::ostream &out) const
+	{
+	mapper.WriteStartOfFormatCommand(out);
+	if (this->respectingCase)
+		out << " RespectCase";
+	// Output terminating semicolon
+	out << ";\n";
+	}
+
+NxsUnalignedBlock *NxsUnalignedBlockFactory::GetBlockReaderForID(const std::string & idneeded, NxsReader *reader, NxsToken *)
+	{
+	if (reader == NULL || idneeded != "UNALIGNED")
+		return NULL;
+	NxsUnalignedBlock * nb  = new NxsUnalignedBlock(NULL);
+	nb->SetCreateImpliedBlock(true);
+	nb->SetImplementsLinkAPI(true);
+	return nb;
+	}
+
+/*!
+	Returns internal representation of the state for taxon `i', character `j', as a vector of integer values. In the
+	normal situation, there is only one state with no uncertainty or polymorphism and the vector returned will contain
+	only a single, positive integer value. If there are multiple states, the vector will contain k values, where k
+	equals the number of states plus 1. The first value in the vector will be either 0 (indicating ambiguity) or 1
+	(meaning polymorphism), and the remaining values will be positive integers each of which is an index into the
+	symbols array. In the case of missing data, an empty vector will be returned. In an UNALIGNED block, there are a
+	different number of characters for each taxon. Use NumCharsForTaxon before calling this function to make sure you
+	do not ask for a character beyond the end. If that happens, a NxsUnalignedBlock::NxsX_NoSuchCharacter exception
+	will be thrown. If no data is stored for taxon `i' in this UNALIGNED block, a NxsUnalignedBlock::NxsX_NoDataForTaxon
+	exception will be thrown, with the exception object storing the offending taxon index in its public data member
+	`taxon_index'.
+*/
+NxsDiscreteStateRow NxsUnalignedBlock::GetInternalRepresentation(
+  unsigned taxInd,	/* is the index of the taxon in the TAXA block in range [0..`ntaxTotal') */
+  unsigned charInd)	/* is the character index (greater than or equal to 0) */
+	{
+	if (taxInd >= uMatrix.size())
+		throw NxsUnalignedBlock::NxsX_NoDataForTaxon(taxInd);
+	NxsDiscreteStateRow & row = uMatrix[taxInd];
+	if (charInd >= (unsigned)row.size())
+		return NxsDiscreteStateRow();
+	return mapper.GetStateVectorForCode(row[charInd]);
+	}
+
+/*!
+	Returns number of characters stored for taxon whose index in the TAXA block is `i'. In an UNALIGNED block,
+	each taxon can have a different number of characters, and this function can be used to find out how many characters
+	are stored for any particular taxon. Note that `i' should be the index of the taxon of interest as it appears in
+	the TAXA block. Because there may be fewer taxa in this UNALIGNED block (`ntax') than there are in the TAXA block
+	(`ntaxTotal'), it is possible that no data were stored for the taxon having index `i', in which case a
+	NxsUnalignedBlock::NxsX_NoDataForTaxon exception is thrown.
+*/
+unsigned NxsUnalignedBlock::NumCharsForTaxon(
+  unsigned taxInd)	/* is the index of the taxon in range [0..`ntaxTotal') */
+	{
+	if (taxInd >= uMatrix.size())
+		throw NxsUnalignedBlock::NxsX_NoDataForTaxon(taxInd);
+	return (unsigned)uMatrix[taxInd].size();
+	}
+
+
+/*!
+	Returns the number of states for taxon `i', character `j'. If `j' is equal to or greater than the number of
+	characters for taxon `i', returns UINT_MAX. If there is missing data, the return value is 0, otherwise a positive
+	integer will be returned. An alternative is to use the function GetInternalRepresentation to obtain a vector of all
+	states, and the size of that vector could be used to determine both the number and the identity of the states. If
+	no data was stored for the taxon having index i in the UNALIGNED block, a NxsUnalignedBlock::NxsX_NoDataForTaxon
+	exception is thrown.
+*/
+unsigned NxsUnalignedBlock::GetNumStates(
+  unsigned taxInd,	/* the taxon in range [0..`ntaxTotal') */
+  unsigned charInd)	/* the character in range [0..`nchar') */
+	{
+	if (taxInd >= uMatrix.size())
+		throw NxsUnalignedBlock::NxsX_NoDataForTaxon(taxInd);
+	NxsDiscreteStateRow & row = uMatrix[taxInd];
+	if (charInd >= (unsigned)row.size())
+		return UINT_MAX;
+	return mapper.GetNumStatesInStateCode(row[charInd]);
+	}
+
+/*!
+	Returns true if the state at taxon `taxInd', character `j' is the missing state, false otherwise. Throws NxsException if
+	`j' is too large (i.e. specifies a character beyond the last character for `uMatrix' row `taxInd'). Calls
+	NxsUnalignedBlock::GetInternalRepresentation, so unless all you need is information about missing data, it is more
+	efficient to simply call GetInternalRepresentation and see if the returned vector is empty. Note that `taxInd' should be
+	the index of the taxon in the TAXA block. If data for that taxon has not been stored in this UNALIGNED block, then
+	a NxsUnalignedBlock::NxsX_NoDataForTaxon exception will be thrown by GetInternalRepresentation.
+*/
+bool NxsUnalignedBlock::IsMissingState(
+  unsigned taxInd,	/* the taxon, in range [0..`ntaxTotal') */
+  unsigned charInd)	/* the character, in range [0..infinity) */
+	{
+	if (taxInd >= uMatrix.size())
+		throw NxsNCLAPIException("Taxon index out of range of NxsUnalignedBlock::IsMissingState");
+	NxsDiscreteStateRow & row = uMatrix[taxInd];
+	if (charInd >= (unsigned)row.size())
+		throw NxsNCLAPIException("Character index out of range of NxsUnalignedBlock::IsMissingState");
+	return mapper.GetNumStates()  == (unsigned) row[charInd];
+	}
+
+/*!
+	Returns true if taxon `taxInd' is polymorphic for character `j', false otherwise. Throws NxsException if `j' is too large
+	(i.e. specifies a character beyond the last character for `uMatrix' row `taxInd'). Calls
+	NxsUnalignedBlock::GetInternalRepresentation, so unless all you need is information about polymorphism, it is more
+	efficient to simply call GetInternalRepresentation and extract the information you need from the returned vector.
+	Note that `taxInd' should be the index of the taxon in the TAXA block. If data for that taxon has not been stored in this
+	UNALIGNED block, then a NxsUnalignedBlock::NxsX_NoDataForTaxon exception will be thrown by
+	GetInternalRepresentation.
+*/
+bool NxsUnalignedBlock::IsPolymorphic(
+  unsigned taxInd,	/* the taxon in range [0..`ntaxTotal') */
+  unsigned charInd)	/* the character in range [0..infinity) */
+	{
+	if (taxInd >= uMatrix.size())
+		throw NxsNCLAPIException("Taxon index out of range of NxsUnalignedBlock::IsMissingState");
+	NxsDiscreteStateRow & row = uMatrix[taxInd];
+	if (charInd >= (unsigned)row.size())
+		throw NxsNCLAPIException("Character index out of range of NxsUnalignedBlock::IsMissingState");
+	return mapper.IsPolymorphic(row[charInd]);
+	}
diff --git a/tests/test-all.R b/tests/test-all.R
new file mode 100644
index 0000000..51c1d2b
--- /dev/null
+++ b/tests/test-all.R
@@ -0,0 +1,3 @@
+
+library(testthat)
+test_check("rncl")
diff --git a/tests/testthat/test.badnex.R b/tests/testthat/test.badnex.R
new file mode 100644
index 0000000..dfacebe
--- /dev/null
+++ b/tests/testthat/test.badnex.R
@@ -0,0 +1,14 @@
+#
+# --- Test badnex.R ---
+#
+
+test_that("Malformed Nexus File should not work.", {
+    if (Sys.getenv("RCMDCHECK") == FALSE) {
+        pth <- file.path(getwd(), "..", "inst", "nexusfiles")
+    } else {
+        pth <- system.file(package="rncl", "nexusfiles")
+    }
+    badFile <- file.path(pth, "badnex.nex")
+    stopifnot(file.exists(badFile))
+    expect_error(read_nexus_phylo(file=badFile))
+})
diff --git a/tests/testthat/test.rncl.R b/tests/testthat/test.rncl.R
new file mode 100644
index 0000000..2134a5b
--- /dev/null
+++ b/tests/testthat/test.rncl.R
@@ -0,0 +1,307 @@
+#
+# --- Test readNCL.R ---
+#
+
+### Get all the test files
+if (Sys.getenv("RCMDCHECK") == FALSE) {
+    pth <- file.path(getwd(), "..", "inst", "nexusfiles")
+    pth_nw_good <- file.path(getwd(), "..", "inst", "newick_good")
+
+} else {
+    pth <- system.file(package="rncl", "nexusfiles")
+    pth_nw_good <- system.file(package="rncl", "newick_good")
+}
+
+
+## co1.nex -- typical output from MrBayes. Contains 2 identical trees, the first
+## one having posterior probabilities as node labels
+co1File <- file.path(pth, "co1.nex")
+
+## MultiLineTrees.nex -- 2 identical trees stored on several lines
+multiLinesFile <- file.path(pth, "MultiLineTrees.nex")
+
+## Newick trees
+newick <- file.path(pth, "newick.tre")
+
+## treeWithDiscreteData.nex -- Mesquite file with discrete data
+treeDiscDt <- file.path(pth, "treeWithDiscreteData.nex")
+
+## Nexus files where trees only contain subset of taxa listed in TAXA block
+taxsub <- file.path(pth, "test_subset_taxa.nex")
+
+## NEXUS file to test for underscores
+tr_under <- file.path(pth, "test_underscores.nex")
+
+## NEXUS file with no tree block
+tr_empty <- file.path(pth, "test_empty.nex")
+
+stopifnot(file.exists(co1File))
+stopifnot(file.exists(multiLinesFile))
+stopifnot(file.exists(taxsub))
+stopifnot(file.exists(treeDiscDt))
+stopifnot(file.exists(tr_under))
+stopifnot(file.exists(tr_empty))
+
+## function (file, simplify=TRUE, type=c("all", "tree", "data"),
+##   char.all=FALSE, polymorphic.convert=TRUE, levels.uniform=TRUE,
+##   check.node.labels=c("keep", "drop", "asdata"))
+
+
+
+## ########### CO1 -- MrBayes file -- tree only
+
+## Tree properties
+## Labels
+labCo1 <- c("Cow", "Seal", "Carp", "Loach", "Frog", "Chicken", "Human",
+            "Mouse", "Rat", "Whale") #, NA, NA, NA, NA, NA, NA, NA, NA)
+#names(labCo1) <- 1:18
+## Edge lengths
+eLco1 <- c(0.143336, 0.225087, 0.047441, 0.055934, 0.124549, 0.204809, 0.073060, 0.194575,
+           0.171296, 0.222039, 0.237101, 0.546258, 0.533183, 0.154442, 0.134574, 0.113163,
+           0.145592)
+names(eLco1) <- c("11-1", "11-2", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-3",
+                  "17-4", "16-5", "15-6", "14-7", "13-18", "18-8", "18-9", "12-10")
+## Node types
+nTco1 <-  c("tip", "tip", "tip", "tip", "tip", "tip", "tip", "tip", "tip",
+            "tip", "internal", "internal", "internal", "internal", "internal",
+            "internal", "internal", "internal")
+names(nTco1) <- 1:18
+## Label values
+lVco1 <- c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.93, 0.88, 0.99, 1.00,
+           0.76, 1.00, 1.00)
+
+
+context("rncl can deal with simple NEXUS files (tree only)")
+test_that("file with 2 trees (warning normal)", {
+    ## Read trees
+    co1 <- read_nexus_phylo(file=co1File)
+    ## Check files are named
+    expect_equal(names(co1), c("con 50 majrule", "con 50 majrule"))
+    ## Tree 1
+    co1Tree1 <- co1[[1]]
+    target_edgeLength <- unname(eLco1[paste(co1Tree1$edge[,1], co1Tree1$edge[,2], sep="-")])
+    expect_equal(typeof(co1Tree1$edge), "integer")
+    expect_equal(co1Tree1$tip.label, labCo1)     # check labels
+    expect_equal(co1Tree1$edge.length, target_edgeLength)  # check edge lengths
+    expect_equal(co1Tree1$node.label, c("", "0.93", "0.88", "0.99", "1.00", "0.76", "1.00", "1.00"))
+    ## Tree 2
+    co1Tree2 <- co1[[2]]
+    expect_equal(typeof(co1Tree2$edge), "integer")
+    expect_equal(co1Tree2$tip.label, labCo1)     # check labels
+    expect_equal(co1Tree2$edge.length, target_edgeLength)  # check edge lengths
+    expect_equal(co1Tree2$node.label, NULL)
+})
+
+test_that("test option simplify", {
+    ## Check option simplify
+    co1 <- read_nexus_phylo(file=co1File, simplify=TRUE)
+    target_edgeLength <- unname(eLco1[paste(co1$edge[,1], co1$edge[,2], sep="-")])
+    expect_equal(typeof(co1$edge), "integer")
+    expect_true(inherits(co1, "phylo"))        # make sure there is only one tree
+    expect_equal(co1$tip.label, labCo1)     # check labels
+    expect_equal(co1$edge.length, target_edgeLength)  # check edge lengths
+    expect_equal(co1$node.label, c("", "0.93", "0.88", "0.99", "1.00", "0.76", "1.00", "1.00"))
+})
+
+test_that("readNCL can handle multi line files", {
+    ## ########### Mutli Lines -- tree only
+    multiLines <- read_nexus_phylo(file=multiLinesFile)
+    ## load correct representation and make sure that the trees read
+    ## match it
+    ml <- ape::read.nexus(file = multiLinesFile)
+    expect_equal(typeof(multiLines[[1]]$edge), "integer")
+    expect_equal(typeof(multiLines[[2]]$edge), "integer")
+    expect_equal(multiLines[[1]], ml[[1]])
+    expect_equal(multiLines[[2]], ml[[2]])
+    rm(ml)
+})
+
+## ########### Newick files
+context("test with Newick files")
+## Tree representation
+labNew <- c("a", "b", "c")
+eLnew <- c(1, 2, 3, 4)
+
+test_that("check.node.labels='drop' with readNCL", {
+    newTr <- read_newick_phylo(file=newick)
+    expect_equal(newTr$tip.label, labNew)
+    expect_equal(typeof(newTr$edge), "integer")
+    expect_equal(newTr$edge.length, eLnew)
+    expect_equal(newTr$node.label, c("yy", "xx"))
+})
+
+## weird files
+test_that("weird files",{
+    tr <- read_newick_phylo(file=file.path(pth_nw_good, "Gudrun.tre"))
+    expect_equal(typeof(tr$edge), "integer")
+    expect_equal(length(tr$tip.label), 68)
+    expect_equal(tr$Nnode, 42)
+})
+
+test_that("simple tree with singletons",  {
+    expect_warning(simple_tree <- read_newick_phylo(file=file.path(pth_nw_good, "simpleTree.tre")),
+                   "fur")
+    expect_equal(typeof(simple_tree$edge), "integer")
+    expect_equal(simple_tree$tip.label, c("A_1", "B__2", "C", "D"))
+    expect_equal(simple_tree$node.label, c("mammals", "cats", "dogs"))
+})
+
+test_that("tree with singletons", {
+    expect_warning(sing_tree <- read_newick_phylo(file=file.path(pth_nw_good, "singleton_tree.tre")),
+                   "cats")
+    expect_equal(typeof(sing_tree$edge), "integer")
+    expect_true(is.null(sing_tree$edge.length))
+    expect_equal(sing_tree$tip.label, c("A", "B", "C", "D", "E"))
+    expect_equal(sing_tree$node.label, c("life", "tetrapods", "dogs", "mammals"))
+})
+
+test_that("tree with singletons", {
+    expect_warning(sing_tree <- read_newick_phylo(file=file.path(pth_nw_good, "singleton_with_edge_length.tre")),
+                   "cats")
+    expect_equal(typeof(sing_tree$edge), "integer")
+    expect_equal(length(sing_tree$edge.length), nrow(sing_tree$edge))
+    expect_equal(sing_tree$tip.label, c("A", "B", "C", "D", "E"))
+    expect_equal(sing_tree$node.label, c("life", "tetrapods", "dogs", "mammals"))
+})
+
+test_that("tree with tip and node labels", {
+    tr1 <- read_newick_phylo(file=file.path(pth_nw_good, "tree1.tre"))
+    expect_equal(typeof(tr1$edge), "integer")
+    expect_equal(length(tr1$edge.length), nrow(tr1$edge))
+    expect_equal(tr1$tip.label, c("A", "B", "C", "D"))
+    expect_equal(tr1$node.label, c("F", "E"))
+    expect_equal(tr1$edge.length, seq(.1, .5, by=.1))
+})
+
+test_that("tree with tip and node labels 2", {
+    tr2 <- read_newick_phylo(file=file.path(pth_nw_good, "tree2.tre"))
+    expect_equal(typeof(tr2$edge), "integer")
+    expect_true(is.null(tr2$edge.length))
+    expect_equal(tr2$tip.label, LETTERS[1:4])
+    expect_equal(tr2$node.label, "E")
+    expect_equal(tr2$Nnode, 1)
+})
+
+############################################################################
+## missing edge lengths                                                   ##
+############################################################################
+
+test_that("file with missing edge lengths (default behavior)", {
+    expect_warning(tr <- read_newick_phylo(file = file.path(pth_nw_good, "missing_edge_lengths.tre")),
+                   "All removed")
+    expect_true(is.null(tr$edge.length))
+    expect_equal(typeof(tr$edge), "integer")
+})
+
+test_that("file with missing edge lengths specify missing value", {
+    expect_warning(tr <- read_newick_phylo(file = file.path(pth_nw_good, "missing_edge_lengths.tre"),
+                                           missing_edge_length = -99),
+                   "replaced by")
+    expect_true(sum(tr$edge.length == -99) > 0)
+    expect_equal(typeof(tr$edge), "integer")
+})
+
+test_that("missing_edge_length is a single numeric value", {
+    expect_error(tr <- read_newick_phylo(file = file.path(pth_nw_good, "missing_edge_lengths.tre"),
+                                           missing_edge_length = "test"),
+                 "single numerical value")
+    expect_error(tr <- read_newick_phylo(file = file.path(pth_nw_good, "missing_edge_lengths.tre"),
+                                           missing_edge_length = c(0, 1)),
+                 "single numerical value")
+    expect_error(tr <- read_newick_phylo(file = file.path(pth_nw_good, "missing_edge_lengths.tre"),
+                                           missing_edge_length = c(NA, 1)),
+                 "single numerical value")
+    expect_error(tr <- read_newick_phylo(file = file.path(pth_nw_good, "missing_edge_lengths.tre"),
+                                           missing_edge_length = c(TRUE)),
+                 "single numerical value")
+})
+
+############################################################################
+## Files where trees contain a subset of the taxa listed in TAXA block    ##
+############################################################################
+
+context("Tree with subset of taxa listed in TAXA block")
+
+test_that("taxa subset", {
+    tr <- read_nexus_phylo(file = taxsub)
+    ncl <- rncl(file = taxsub, file.format = "nexus")
+    expect_equal(ncl$trees[1], "(2,((3,1),(5,4)))")
+    expect_equal(ncl$trees[2], "(2:6,((3:2,1:1):4,(5:10,4:9):7):3)")
+    expect_equal(ncl$trees[3], "(2,(3,(6,(5,4))))")
+    expect_equal(ncl$trees[4], "(5,(4,(2,(3,(1,6)))))")
+    expect_equal(typeof(tr[[1]]$edge), "integer")
+    expect_equal(tr[[1]]$edge, cbind(c(6, 8, 8, 9, 9, 6, 7, 7),
+                               (1:9)[-6]))
+    expect_equal(tr[[2]]$edge, cbind(c(6, 8, 8, 9, 9, 6, 7, 7),
+                               (1:9)[-6]))
+    expect_equal(tr[[3]]$edge, cbind(c(6, 7, 8, 9, 9, 6, 7, 8),
+                               (1:9)[-6]))
+    expect_equal(tr[[4]]$edge, cbind(c(7, 8, 9, 10, 11, 11, 7, 8, 9, 10),
+                               (1:11)[-7]))
+    expect_equal(tr[[2]]$edge.length,
+                 c(6, 2, 1, 10, 9, 3, 4, 7))
+    expect_equal(tr[[1]]$edge.length, NULL)
+    expect_equal(tr[[1]]$tip.label, c("porifera", "ctenophora", "cnidaria", "deuterostomia", "protostomia"))
+    expect_equal(tr[[2]]$tip.label, c("porifera", "ctenophora", "cnidaria", "deuterostomia", "protostomia"))
+    expect_equal(tr[[3]]$tip.label, c("porifera", "ctenophora", "xeno", "deuterostomia", "protostomia"))
+    expect_equal(tr[[4]]$tip.label, c("deuterostomia", "protostomia", "porifera", "ctenophora", "cnidaria", "xeno"))
+    expect_equal(names(tr), paste0("hyp", 1:4))
+})
+
+############################################################################
+## Test roundtrip with Myrmecus file                                      ##
+############################################################################
+
+context("Compare output from ape read file and phylobase")
+
+test_that("compare read.nexus and read_nexus_phylo", {
+    tr_ape <- ape::read.nexus(file = treeDiscDt)
+    tr_ph4 <- read_nexus_phylo(file = treeDiscDt)
+    expect_equal(typeof(tr_ph4$edge), "integer")
+    expect_equal(tr_ape, tr_ph4)
+})
+
+############################################################################
+## Test spacesAsUnderscores                                               ##
+############################################################################
+
+context("test spacesAsUnderscores")
+
+test_that("spacesAsUnderscores is TRUE",  {
+    ncl <- rncl(file = tr_under, file.format = "nexus", spacesAsUnderscores = TRUE)
+    expect_equal(typeof(ncl$parentVector[[1]]), "integer")
+    expect_true(any(grepl("\\_", ncl$taxaNames)))
+    expect_true(all(sapply(ncl$taxonLabelVector, function(x) any(grepl("_", x)))))
+    expect_true(any(grepl("_", ncl$charLabels)))
+    expect_true(any(grepl("_", ncl$stateLabels)))
+})
+
+
+test_that("spacesAsUnderscores is FALSE",  {
+    ncl <- rncl(file = tr_under, file.format = "nexus", spacesAsUnderscores = FALSE)
+    expect_equal(typeof(ncl$parentVector[[1]]), "integer")
+    expect_false(any(grepl("\\_", ncl$taxaNames)))
+    expect_false(all(sapply(ncl$taxonLabelVector, function(x) any(grepl("_", x)))))
+    expect_false(any(grepl("_", ncl$charLabels)))
+    expect_false(any(grepl("_", ncl$stateLabels)))
+})
+
+############################################################################
+## Test on non - existing file                                            ##
+############################################################################
+
+context("non existing file")
+
+test_that("non existing file",
+          expect_error(rncl(file = "foo"), "doesn't exist")
+          )
+
+############################################################################
+## Test on an empty file                                                  ##
+############################################################################
+
+context("test on empty file")
+
+test_that("empty file (no trees)",
+          expect_equal(read_nexus_phylo(file = tr_empty),
+                       NULL))

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-rncl.git



More information about the debian-med-commit mailing list