[med-svn] [phybin] 01/02: Imported Upstream version 0.3

Andreas Tille tille at debian.org
Tue Dec 8 12:53:27 UTC 2015


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository phybin.

commit bff5fc5bc3407ef25f24bcf0d46222d635b746ee
Author: Andreas Tille <tille at debian.org>
Date:   Tue Dec 8 13:36:07 2015 +0100

    Imported Upstream version 0.3
---
 .gitignore                                   |   3 +
 Bio/Phylogeny/PhyBin.hs                      | 560 +++++++++++++++++++
 Bio/Phylogeny/PhyBin/Binning.hs              | 421 ++++++++++++++
 Bio/Phylogeny/PhyBin/CoreTypes.hs            | 359 ++++++++++++
 Bio/Phylogeny/PhyBin/Parser.hs               | 252 +++++++++
 Bio/Phylogeny/PhyBin/PreProcessor.hs         |  95 ++++
 Bio/Phylogeny/PhyBin/RFDistance.hs           | 444 +++++++++++++++
 Bio/Phylogeny/PhyBin/Util.hs                 | 139 +++++
 Bio/Phylogeny/PhyBin/Visualize.hs            | 449 +++++++++++++++
 DEVLOG.md                                    | 140 +++++
 LICENSE                                      |  31 ++
 Main.hs                                      | 617 +++++++++++++++++++++
 Makefile                                     | 121 ++++
 README.md                                    | 166 ++++++
 Setup.hs                                     |  12 +
 TestAll.hs                                   |  41 ++
 old_script/cheztrace.ss                      |  62 +++
 old_script/find_tree_matches.ss              | 420 ++++++++++++++
 old_script/iu-exptime.ss                     |  67 +++
 old_script/iu-match.ss                       | 698 +++++++++++++++++++++++
 old_script/match.ss                          | 800 +++++++++++++++++++++++++++
 old_script/parser2.ss                        |  45 ++
 old_script/parser_fullnames.ss               | 187 +++++++
 phybin.cabal                                 | 149 +++++
 point_check.sh                               |   6 +
 stripTable.hs                                |  50 ++
 tests/t1/112.tr                              |   1 +
 tests/t1/13.tr                               |   1 +
 tests/t2/116.tr                              |   1 +
 tests/t2/233.tr                              |   1 +
 tests/t30_mismatched/mismatched.tr           |   3 +
 tests/t3_consensus/cluster1_284_alltrees.tr  | 284 ++++++++++
 tests/t3_consensus/cluster1_284_consensus.tr |   1 +
 tests/t4_consensus/cluster1_16_alltrees.tr   |  16 +
 tests/t4_consensus/cluster1_16_consensus.tr  |   1 +
 tests/t5_consensus/cluster1_35_alltrees.tr   |  35 ++
 tests/t5_consensus/cluster1_35_consensus.tr  |   1 +
 vary_branchlen.sh                            |  47 ++
 website/trees.jpg                            | Bin 0 -> 40409 bytes
 website/website.css                          |  74 +++
 40 files changed, 6800 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7431621
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+Rickettsia
+Rickettsiales
+Wolbachia
\ No newline at end of file
diff --git a/Bio/Phylogeny/PhyBin.hs b/Bio/Phylogeny/PhyBin.hs
new file mode 100644
index 0000000..722d5d3
--- /dev/null
+++ b/Bio/Phylogeny/PhyBin.hs
@@ -0,0 +1,560 @@
+{-# LANGUAGE ScopedTypeVariables, RecordWildCards, TypeSynonymInstances, CPP #-}
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
+{-# OPTIONS_GHC -fwarn-unused-imports #-}
+
+-- | This module contains the code that does the tree normalization and binning.
+--   It's the heart of the program.
+
+module Bio.Phylogeny.PhyBin
+       ( driver, binthem, normalize, annotateWLabLists, unitTests, acquireTreeFiles,
+         deAnnotate, retrieveHighlights, matchAnyHighlight )
+       where
+
+import qualified Data.Foldable as F
+import           Data.Function       (on)
+import           Data.List           (delete, minimumBy, sortBy, foldl1', foldl', intersperse, isPrefixOf)
+import           Data.Maybe          (fromMaybe, catMaybes)
+import           Data.Either         (partitionEithers)
+import           Data.Time.Clock
+import           Data.Tuple          (swap)
+import qualified Data.ByteString.Char8 as B
+import qualified Data.Map                   as M
+import qualified Data.Set                   as S
+import qualified Data.Text.Lazy.IO           as T
+import qualified Data.Vector                 as V
+import qualified Data.Vector.Unboxed         as U
+import           Control.Monad       (forM, forM_, filterM, when, unless)
+import qualified Control.Concurrent.Async as Async
+import           Control.Exception   (evaluate)
+import           Control.Applicative ((<$>),(<*>))
+import           Control.Concurrent  (Chan)
+import           System.FilePath     (combine)
+import           System.Directory    (doesFileExist, doesDirectoryExist, createDirectoryIfMissing,
+                                      getDirectoryContents, getCurrentDirectory)
+import           System.IO           (openFile, hClose, IOMode(..), stdout, withFile)
+import           System.Info         (os)
+import           System.Process      (system)
+import           System.Exit         (ExitCode(..))
+import           Test.HUnit          ((~:),(~=?),Test,test)
+import qualified Data.Clustering.Hierarchical as C
+import qualified Data.GraphViz        as Gv 
+
+-- For vizualization:
+import           Text.PrettyPrint.HughesPJClass hiding (char, Style)
+import           Data.GraphViz.Printing (renderDot)
+import           Bio.Phylogeny.PhyBin.CoreTypes
+import           Bio.Phylogeny.PhyBin.Parser (parseNewick, parseNewicks)
+import           Bio.Phylogeny.PhyBin.PreProcessor (collapseBranchLenThresh,
+                                                    collapseBranchBootStrapThresh, pruneTreeLeaves)
+import           Bio.Phylogeny.PhyBin.Visualize (dotToPDF, dotNewickTree, viewNewickTree, dotDendrogram)
+import           Bio.Phylogeny.PhyBin.RFDistance
+import           Bio.Phylogeny.PhyBin.Binning
+import           Bio.Phylogeny.PhyBin.Util
+
+
+import Debug.Trace
+----------------------------------------------------------------------------------------------------
+
+-- Turn on for extra invariant checking:
+debug :: Bool
+debug = True
+
+#ifdef SEQUENTIALIZE
+#warning "SEQUENTIALIING execution.  Disabling all parallelism."
+type Async t = t
+async a = a
+wait  x = return x 
+#else
+type Async t = Async.Async t
+async = Async.async
+wait  = Async.wait
+#endif
+
+-- | A dendrogram PLUS consensus trees at the intermediate points.
+data DendroPlus a = DPLeaf (FullTree a)
+                  | DPBranch !Double (FullTree ()) (DendroPlus a) (DendroPlus a)
+
+----------------------------------------------------------------------------------------------------
+
+-- | Driver to put all the pieces together (parse, normalize, bin)
+driver :: PhyBinConfig -> IO ()
+driver cfg at PBC{ verbose, num_taxa, name_hack, output_dir, inputs=files,
+                do_graph, branch_collapse_thresh, bootstrap_collapse_thresh, highlights, 
+                dist_thresh, clust_mode, rfmode, preprune_labels, 
+                print_rfmatrix } =
+   -- Unused: do_draw
+ do 
+    --------------------------------------------------------------------------------
+    -- First, find out where we are and open the files:
+    --------------------------------------------------------------------------------
+    cd <- getCurrentDirectory 
+    --putStrLn$ "PHYBIN RUNNING IN DIRECTORY: "++ cd
+
+    bl <- doesDirectoryExist output_dir
+    unless bl $ createDirectoryIfMissing True output_dir
+
+    if isPrefixOf "mingw" os then
+      -- TODO: Need a portable version of this.  'filemanip' would do:
+      putStrLn$ "Not cleaning away previous phybin outputs (TODO: port this to Windows)."
+     else do 
+      putStrLn$ "Cleaning away previous phybin outputs..."
+      system$ "rm -f "++output_dir++"/dendrogram.*"
+      system$ "rm -f "++output_dir++"/cluster*"
+      system$ "rm -f "++output_dir++"/distance_matrix.txt"
+      system$ "rm -f "++output_dir++"/WARNINGS.txt"
+      return ()
+
+    putStrLn$ "Parsing "++show (length files)++" Newick tree files."
+    --putStrLn$ "\nFirst ten \n"++ concat (map (++"\n") $ map show $ take 10 files)
+
+    --------------------------------------------------------------------------------
+    -- Next, parse the files and do error checking and annotation.
+    --------------------------------------------------------------------------------
+
+    (goodFiles,warnings1) <- fmap partitionEithers $
+      forM files $ \ file -> do
+           reg <- is_regular_file file
+  	   if reg
+             then return$ Left file
+             else return$ Right file
+    let num_files = length goodFiles
+    bstrs <- mapM B.readFile goodFiles
+    let (labelTab, fulltrees) = parseNewicks name_hack (zip files bstrs)
+        nameToLabel = M.fromList $ map swap $ M.toList labelTab
+        
+    highlightTrs <- retrieveHighlights name_hack labelTab highlights
+
+    let allTaxaLs = M.elems labelTab
+        totalTaxa = length allTaxaLs
+    putStrLn$ "\nTotal unique taxa ("++ show (M.size labelTab) ++"):\n  "++ (unwords allTaxaLs)
+
+    expected_num_taxa <-
+      case preprune_labels of
+        Nothing -> return totalTaxa
+        Just ls -> return (length ls)
+    case rfmode of
+      TolerantNaive -> return ()
+      HashRF -> putStrLn$ "Note: defaulting to expecting ALL "++show expected_num_taxa++" to be present.."
+    --------------------------------------------------------------------------------
+
+    case bootstrap_collapse_thresh of
+      Just thr -> putStrLn$" !+ Collapsing branches of bootstrap value less than "++show thr
+      Nothing  -> return ()      
+
+    case branch_collapse_thresh of 
+      Just thr -> putStrLn$" !+ Collapsing branches of length less than "++show thr
+      Nothing  -> return ()
+
+    let do_one :: FullTree DefDecor -> IO (Int, [FullTree DefDecor], Maybe (Int, String))
+        do_one (FullTree treename lblAcc parsed) = do 
+           let
+               pruned0 = parsed
+               
+               -- Important: MUST collapse bootstrap first if we're doing both:
+               pruned1 = case bootstrap_collapse_thresh of
+                          Nothing  -> pruned0
+                          Just thr -> collapseBranchBootStrapThresh thr pruned0
+               pruned2 = case branch_collapse_thresh of 
+                          Nothing  -> pruned1
+                          Just thr -> collapseBranchLenThresh thr pruned1
+               pruned3 = case preprune_labels of
+                          Nothing  -> Just pruned2
+                          Just lst -> pruneTreeLeaves (S.fromList (map lkup lst)) pruned2
+
+               lkup trnm =
+                 case M.lookup trnm nameToLabel of
+                   Nothing -> error$ "Tree name "++show trnm++" did not occur ANYWHERE in the input set."
+                   Just lab -> lab
+    
+           -- TEMPTOGGLE
+	   -- when False $ do putStrLn$ "DRAWING TREE"
+           --                 viewNewickTree "Annotated"  (FullTree file lblAcc' annot)
+           --                 viewNewickTree "Normalized" (FullTree file lblAcc' normal)
+	   --      	   putStrLn$ "WEIGHTS OF NORMALIZED' CHILDREN: "++
+           --                       show (map get_weight$ get_children normal)
+           when verbose$ putStr "."
+           case pruned3 of
+             Nothing -> do when verbose$ putStrLn$ "\n WARNING: tree empty after preprocessing, discarding: "++ treename
+                           return (0, [], Just (0, treename)) 
+             Just pruned3' ->
+               let numL   = numLeaves pruned3'
+                   looksOK   = return (numL, [FullTree treename lblAcc pruned3'], Nothing)
+                   discardIt = return (0, [], Just (numL, treename)) 
+               in case rfmode of
+                   TolerantNaive -> looksOK
+                   HashRF | numL == expected_num_taxa -> looksOK
+                          | otherwise -> do
+                             when verbose$
+                               putStrLn$ "\n WARNING: tree contained unexpected number of leaves ("
+                                         ++ show numL ++"): "++ treename
+                             discardIt
+                             
+    results <- mapM do_one fulltrees
+    let (counts::[Int], validtreess, pairs:: [Maybe (Int, String)]) = unzip3 results
+    let validtrees = concat validtreess
+        warnings2  = catMaybes pairs
+    
+    putStrLn$ "\nNumber of input tree files: " ++ show num_files
+    -- mapM_ (print . displayStrippedTree) validtrees -- Debugging.
+    case preprune_labels of
+      Nothing  -> return ()
+      Just lst -> putStrLn$ "PRUNING trees to just these taxa: "++show lst
+    when (length warnings2 > 0) $
+      putStrLn$ "Number of bad/unreadable input tree files: " ++ show (length warnings2)
+    putStrLn$ "Number of VALID trees (correct # of leaves/taxa): " ++ show (length validtrees)
+    putStrLn$ "Total tree nodes contained in valid trees: "++ show (sum counts)
+    let all_branches = concatMap (F.foldr' (\x ls -> getBranchLen x:ls) []) validtrees    
+    unless (null all_branches) $ do
+      putStrLn$ "Average branch len over valid trees: "++ show (avg all_branches)
+      putStrLn$ "Max/Min branch lengths: "++ show (foldl1' max all_branches,
+                                                   foldl1' min all_branches)
+
+    --------------------------------------------------------------------------------
+    -- Next, dispatch on the mode and do the actual clustering or binning.
+    --------------------------------------------------------------------------------
+
+    (classes,binlist,asyncs) <- case clust_mode of
+      BinThem         -> do x <- doBins validtrees
+                            -- A list of bins, sorted by size:
+                            let binlist = reverse $ sortBy (compare `on` fst) $
+                                          map (\ (tr,OneCluster ls) -> (length ls, OneCluster ls)) $
+                                          M.toList x
+                            return (x,binlist,[])
+      ClusterThem{linkage} -> do
+        (mat, dendro) <- doCluster (rfmode==HashRF) expected_num_taxa linkage validtrees
+        -------------------- 
+        when print_rfmatrix $ printDistMat stdout mat
+        withFile (combine output_dir ("distance_matrix.txt")) WriteMode $ \ hnd ->
+           printDistMat hnd mat
+        --------------------
+        writeFile (combine output_dir ("dendrogram.txt"))
+                  (show$ fmap treename dendro)
+        putStrLn " [finished] Wrote full dendrogram to file dendrogram.txt"
+        sanityCheck dendro
+        --------------------
+        let plotIt mnameMap = 
+              if True -- do_graph
+              then async (do
+                t0 <- getCurrentTime
+                let dot = dotDendrogram cfg "dendrogram" 1.0 dendro mnameMap highlightTrs
+                -- Be wary of cycles, something appears to be buggy:
+                putStrLn$ " [async] writing dendrogram as a graph to dendrogram.dot"
+                -- writeFile (combine output_dir "dendrogram.dot") 
+                --           (show $ renderDot $ Gv.toDot dot)
+                mtxt <- safePrintDendro dot
+                case mtxt of
+                  Nothing  -> 
+                    putStrLn "WARNING: because we couldn't print it, we're not drawing the dendrogram either."
+                  Just txt -> do
+                    T.writeFile (combine output_dir "dendrogram.dot") txt
+                    putStrLn$ " [async] Next, plot dendrogram.pdf"
+                    _ <- dotToPDF dot (combine output_dir "dendrogram.pdf") 
+                    t1 <- getCurrentTime          
+                    putStrLn$ " [finished] Writing dendrogram diagram ("
+                              ++show(diffUTCTime t1 t0)++")")
+              else async (return ())
+        case dist_thresh of
+          Nothing -> do a <- plotIt Nothing
+                        return (M.empty,[],[a])
+                     -- error "Fully hierarchical cluster output is not finished!  Use --editdist."
+          Just dstThresh -> do            
+            putStrLn$ "Combining all clusters at distance less than or equal to "++show dstThresh
+            let clusts = sliceDendro (fromIntegral dstThresh) dendro
+                classes = clustsToMap clusts
+                -- Cache the lengths:
+                wlens  = [ (length ls, OneCluster ls) | OneCluster ls <- clusts ]
+                sorted0 = reverse$ sortBy (compare `on` fst) wlens -- TODO: Parallel sorting?
+                nameMap = clustsToNameMap (map snd sorted0)
+            gvizAsync <- plotIt (Just nameMap)
+            putStrLn$ "After flattening, cluster sizes are: "++show (map fst sorted0)
+            -- Flatten out the dendogram:
+            return (classes, sorted0, [gvizAsync])
+
+    unless (null binlist)$ reportClusts clust_mode binlist
+        
+    ----------------------------------------
+    -- TEST, TEMPTOGGLE: print out edge weights :
+    -- forM_ (map snd3 results) $ \parsed -> do 
+    --    let weights = all_edge_weights (head$ S.toList taxa) parsed
+    --    trace ("weights of "++ show parsed ++" "++ show weights) $
+    --      return ()
+    -- exitSuccess
+    ----------------------------------------
+
+    --------------------------------------------------------------------------------
+    -- Finally, produce all the required outputs.
+    --------------------------------------------------------------------------------
+
+--    putStrLn$ "Final number of tree bins: "++ show (M.size classes)
+
+    unless (null warnings1 && null warnings2) $
+	writeFile (combine output_dir "WARNINGS.txt")
+		  ("This file was generated to record all of the files which WERE NOT incorporated successfully into the results.\n" ++
+		   "Each of these files had some kind of problem, likely one of the following:\n"++
+		   "  (1) a mismatched number of taxa (leaves) in the tree relative to the rest of the dataset\n"++
+		   "  (2) a file that could not be read.\n"++
+		   "  (3) a file that could not be parsed.\n\n"++
+		   concat (map (\ file -> "Not a regular/readable file: "++ file++"\n")
+		           warnings1) ++ 
+		   concat (map (\ (n,file) ->
+                                 "Wrong number of taxa ("++ show n ++"): "++ file++"\n")
+		           warnings2))
+
+    async2 <- case clust_mode of
+                BinThem       -> outputBins binlist output_dir do_graph
+                ClusterThem{} -> outputClusters expected_num_taxa binlist output_dir do_graph
+
+    -- Wait on parallel tasks:
+    putStrLn$ "Waiting for "++show (length$ async2:asyncs)++" asynchronous tasks to finish..."
+    mapM_ wait (async2:asyncs)
+    putStrLn$ "Phybin completed."
+    --------------------------------------------------------------------------------
+    -- End driver
+    --------------------------------------------------------------------------------
+
+-- filePrefix = "bin"
+filePrefix = "cluster"    
+
+--------------------------------------------------------------------------------
+-- Driver helpers:
+--------------------------------------------------------------------------------
+
+-- doBins :: [FullTree DefDecor] -> t1 -> t2 -> IO BinResults
+doBins :: [FullTree DefDecor] -> IO (BinResults StandardDecor)
+doBins validtrees = do 
+    putStrLn$ "Creating equivalence classes (bins)..."
+    let classes = --binthem_normed$ zip files $ concat$ map snd3 results
+	          binthem validtrees
+    return (classes)
+
+doCluster :: Bool -> Int -> C.Linkage -> [FullTree DefDecor] -> IO (DistanceMatrix, C.Dendrogram (FullTree DefDecor))
+doCluster use_hashrf expected_num_taxa linkage validtrees = do
+  t0 <- getCurrentTime
+  when use_hashrf$ putStrLn " Using HashRF-style algorithm..."
+  let nwtrees  = map nwtree validtrees
+      numtrees = length validtrees
+      mat = if use_hashrf 
+            then hashRF expected_num_taxa nwtrees 
+            else fst (naiveDistMatrix nwtrees)
+      ixtrees  = zip [0..] validtrees
+      dist (i,t1) (j,t2) | j == i     = 0
+--                         | i == numtrees-1 = 0 
+                         | j < i      = fromIntegral ((mat V.! i) U.! j)
+                         | otherwise  = fromIntegral ((mat V.! j) U.! i)
+      dist1 a b = trace ("Taking distance between "++show (fst a, fst b)) $ dist a b
+      dendro = fmap snd $ C.dendrogram linkage ixtrees dist
+  -- Force the distance matrix:
+  V.mapM_ evaluate mat
+  t1 <- getCurrentTime
+  putStrLn$ "Time to compute distance matrix: "++show(diffUTCTime t1 t0)
+  putStrLn$ "Clustering using method "++show linkage
+  return (mat,dendro)
+
+
+reportClusts :: ClustMode -> [(Int, OneCluster StandardDecor)] -> IO ()
+reportClusts mode binlist = do 
+    let binsizes = map fst binlist
+
+    putStrLn$ " Outcome: "++show (length binlist)++" clusters found, "++show (length$ takeWhile (>1) binsizes)
+             ++" non-singleton, top bin sizes: "++show(take 10 binsizes)
+    putStrLn$"  Up to first 30 bin sizes, excluding singletons:"
+    forM_ (zip [1..30] binlist) $ \ (ind, (len, OneCluster ftrees)) -> do
+       when (len > 1) $ -- Omit that long tail of single element classes...
+          putStrLn$show$
+           hcat [text ("  * cluster#"++show ind++", members "++ show len ++", "), 
+                 case mode of
+                   BinThem -> vcat [text ("avg bootstraps "++show (get_bootstraps$ avg_trees$ map nwtree ftrees)++", "),
+                                    text "all: " <> pPrint (filter (not . null) $ map (get_bootstraps . nwtree) ftrees)]
+                   ClusterThem{} -> hcat [] ]
+
+-- | Convert a flat list of clusters into a map from individual trees to clusters.
+clustsToMap :: [OneCluster StandardDecor] -> BinResults StandardDecor
+clustsToMap clusts = F.foldl' fn M.empty clusts
+  where
+    fn acc theclust@(OneCluster ftrs) =
+      F.foldl' (fn2 theclust) acc ftrs
+    fn2 theclust acc (FullTree{nwtree}) =
+      M.insert (anonymize_annotated nwtree) theclust acc
+
+-- | Map each tree NAME onto the one-based index (in sorted order) of the cluster it
+-- comes from.
+clustsToNameMap :: [OneCluster StandardDecor] -> M.Map TreeName Int
+clustsToNameMap clusts = foldl' fn M.empty (zip [1..] clusts)
+  where
+    fn acc (ix,(OneCluster ftrs)) =
+      foldl' (\acc' t -> M.insert (treename t) ix acc') acc ftrs
+
+flattenDendro :: (C.Dendrogram (FullTree DefDecor)) -> OneCluster StandardDecor
+flattenDendro dendro =
+  case dendro of
+    C.Leaf (FullTree{treename,labelTable,nwtree}) ->
+      OneCluster [FullTree treename labelTable (annotateWLabLists nwtree)]
+    C.Branch _ left right ->
+      -- TODO: fix quadratic append
+      flattenDendro left `appendClusts` flattenDendro right
+ where
+   appendClusts (OneCluster ls1) (OneCluster ls2) = OneCluster (ls1++ls2)
+   
+-- | Turn a hierarchical clustering into a flat clustering.
+sliceDendro :: Double -> (C.Dendrogram (FullTree DefDecor)) -> [OneCluster StandardDecor]
+sliceDendro dstThresh den = loop den
+  where 
+   loop br@(C.Branch dist left right)
+     -- Too far apart to combine:
+     | dist > dstThresh  = loop left ++ loop right
+     | otherwise         = [flattenDendro br]
+   loop br@(C.Leaf _)    = [flattenDendro br]
+
+
+--------------------------------------------------------------------------------
+
+-- outputClusters :: (Num a1, Ord a1, Show a1) => Int -> [(a1, t, OneCluster a)] -> String -> Bool -> IO ()
+outputClusters :: Int -> [(Int, OneCluster a)] -> String -> Bool -> IO (Async ())
+outputClusters num_taxa binlist output_dir do_graph = do
+    let numbins = length binlist
+    let base i size = combine output_dir (filePrefix ++ show i ++"_"++ show size) 
+    let consTrs = [ FullTree "consensus" (labelTable $ head ftrees) nwtr 
+                  | (_, OneCluster ftrees) <- binlist
+                  , let nwtr :: NewickTree StandardDecor
+                        nwtr = annotateWLabLists $ fmap (const (Nothing,0)) $
+                               consensusTree num_taxa $ map nwtree ftrees ]
+    
+    forM_ (zip3 [1::Int ..] binlist consTrs) $ \ (i, (size, OneCluster ftrees), fullConsTr) -> do
+       writeFile (base i size ++".txt") (concat$ map ((++"\n") . treename) ftrees)
+       writeFile (base i size ++"_consensus.tr") (show (displayStrippedTree fullConsTr) ++ "\n")
+       writeFile (base i size ++"_alltrees.tr")
+           (unlines [ show (displayStrippedTree ft) | ft <- ftrees ])
+
+    putStrLn$ " [finished] Wrote contents of each cluster to cluster<N>_<size>.txt"
+    putStrLn$ " [finished] Wrote representative (consensus) trees to cluster<N>_<size>_consensus.tr"
+    if do_graph then do
+      putStrLn$ "Next start the time consuming operation of writing out graphviz visualizations:"
+      asyncs <- forM (zip3 [1::Int ..] binlist consTrs) $
+                \ (i, (size, OneCluster membs), fullConsTr) -> async$ do
+    	 when (size > 1 || numbins < 100) $ do
+           let dot = dotNewickTree ("cluster #"++ show i) 1.0 fullConsTr
+	   _ <- dotToPDF dot (base i size ++ ".pdf")
+	   return ()
+      async $ do
+        mapM_ wait asyncs
+        putStrLn$ " [finished] Wrote visual representations of consensus trees to "++filePrefix++"<N>_<size>.pdf"
+     else do putStrLn$ "NOT creating processes to build per-cluster .pdf visualizations. (Not asked to.)"
+             async (return ())
+
+outputBins :: [(Int, OneCluster StandardDecor)] -> String -> Bool -> IO (Async ())
+outputBins binlist output_dir  do_graph = do
+    let numbins = length binlist
+    let base i size = combine output_dir (filePrefix ++ show i ++"_"++ show size) 
+    let avgs = map (avg_trees . map nwtree . clustMembers . snd) binlist
+    forM_ (zip3 [1::Int ..] binlist avgs) $ \ (i, (size, OneCluster ftrees), avgTree) -> do
+       let FullTree fstName labs _ = head ftrees
+           fullAvgTr = FullTree fstName labs avgTree
+       writeFile (base i size ++".txt") (concat$ map ((++"\n") . treename) ftrees)
+       when debug$ do
+         writeFile (base i size ++".dbg") (show (pPrint avgTree) ++ "\n")
+       -- Printing the average tree instead of the stripped cannonical one:         
+       writeFile   (base i size ++"_avg.tr")  (show (displayDefaultTree$ deAnnotate fullAvgTr) ++ "\n") -- FIXME
+
+    putStrLn$ " [finished] Wrote contents of each bin to "++filePrefix++"<N>_<binsize>.txt"
+    putStrLn$ "            Wrote representative trees to "++filePrefix++"<N>_<binsize>_avg.tr" 
+    if do_graph then do
+       putStrLn$ "Next start the time consuming operation of writing out graphviz visualizations:"
+       asyncs <- forM (zip3 [1::Int ..] binlist avgs) $ \ (i, (size, OneCluster membs), avgTree) -> async $ do
+          let FullTree fstName labs _ = head membs
+              fullAvgTr = FullTree fstName labs avgTree 
+          when (size > 1 || numbins < 100) $ do 
+            let dot = dotNewickTree ("cluster #"++ show i) (1.0 / avg_branchlen (map nwtree membs))
+                                    --(annotateWLabLists$ fmap (const 0) tr)
+                                    -- TEMP FIXME -- using just ONE representative tree:
+                                    ( --trace ("WEIGHTED: "++ show (head$ trees bentry)) $ 
+                                      --(head$ trees bentry) 
+                                     fullAvgTr)
+            _ <- dotToPDF dot (base i size ++ ".pdf")
+            return ()
+       async $ do
+         mapM_ wait asyncs
+         putStrLn$ " [finished] Wrote visual representations of trees to "++filePrefix++"<N>_<size>.pdf"
+     else async (return ())
+--------------------------------------------------------------------------------
+
+-- Monadic mapAccum
+mapAccumM :: Monad m => (acc -> x -> m (acc,y)) -> acc -> [x] -> m (acc,[y])
+mapAccumM fn acc ls = F.foldrM fn' (acc,[]) ls
+  where
+    fn' x (acc,ls) = do (acc',y) <- fn acc x
+                        return (acc',y:ls)
+
+fst3 :: (t, t1, t2) -> t
+snd3 :: (t, t1, t2) -> t1
+thd3 :: (t, t1, t2) -> t2
+fst3 (a,_,_) = a
+snd3 (_,b,_) = b
+thd3 (_,_,c) = c
+
+
+-- Come up with an average tree from a list of isomorphic trees.
+-- This comes up with some blending of edge lengths.
+avg_trees :: [AnnotatedTree] -> AnnotatedTree
+avg_trees origls = 
+     fmap unlift $ 
+     foldIsomorphicTrees (foldl1 sumthem) $ 
+     map (fmap lift) origls     
+  where
+    totalCount = fromIntegral$ length origls
+
+    -- Here we do the actual averaging:
+    unlift :: TempDecor -> StandardDecor
+    unlift (bl, (bs, bootcount), wt, ls) =
+      (StandardDecor (bl / totalCount)
+                     (case bootcount of
+                        0 -> Nothing
+                        _ -> Just$ round (fromIntegral bs / fromIntegral bootcount))
+                     wt ls)
+      
+    lift :: StandardDecor -> TempDecor
+    lift (StandardDecor bl bs wt ls) = (bl, (fromMaybe 0 bs, countMayb bs), wt, ls)
+
+    sumthem :: TempDecor -> TempDecor -> TempDecor
+    sumthem (bl1, (bs1, cnt1), wt, ls)
+            (bl2, (bs2, cnt2), _,  _) =
+            (bl1+bl2, (bs1 + bs2, cnt1+cnt2), wt, ls)
+    countMayb Nothing  = 0
+    countMayb (Just _) = 1
+
+-- Used only by avg_trees above...
+type TempDecor = (Double, (Int, Int), Int, [Label])
+
+avg ls = sum ls / fromIntegral (length ls)
+
+-- | Parse extra trees in addition to the main inputs (for --highlight).
+retrieveHighlights :: (String->String) -> LabelTable -> [FilePath] -> IO [[NewickTree ()]]
+retrieveHighlights name_hack labelTab ls =
+  mapM parseHighlight ls
+  where
+    parseHighlight file = do 
+      bs <- B.readFile file
+      let (lt2,htr) = parseNewick labelTab name_hack file bs
+      unless (lt2 == labelTab) $
+        error$"Tree given as --highlight includes taxa not present in main tree set: "++
+              show(M.keys$ M.difference lt2 labelTab)            
+      return (map (fmap (const())) htr)
+
+
+-- | Create a predicate that tests trees for consistency with the set of --highlight
+-- (consensus) trees.
+--
+-- Note, tree consistency is not the same as an exact match.  It's
+-- like (<=) rather than (==).  All trees are consistent with the
+-- "star topology".
+matchAnyHighlight :: [[NewickTree ()]] -> NewickTree () -> Bool      
+-- matchAnyHighlight :: [[NewickTree ()]] -> NewickTree () -> Maybe Int
+-- If there is a match, return the index of the highlight that matched.
+matchAnyHighlight highlightTrs =
+   let matchers = map mkMatcher highlightTrs
+       mkMatcher ls = let fns = map compatibleWith ls -- Multiple predicate functions
+                      in \ tr -> -- Does it match any predicate?
+                          or$ map (\f -> f tr) fns   
+   in \ newtr -> 
+       any (\f -> f newtr) matchers
diff --git a/Bio/Phylogeny/PhyBin/Binning.hs b/Bio/Phylogeny/PhyBin/Binning.hs
new file mode 100644
index 0000000..9c0369b
--- /dev/null
+++ b/Bio/Phylogeny/PhyBin/Binning.hs
@@ -0,0 +1,421 @@
+{-# LANGUAGE ScopedTypeVariables, RecordWildCards, TypeSynonymInstances, CPP #-}
+{-# LANGUAGE NamedFieldPuns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
+{-# OPTIONS_GHC -fwarn-unused-imports #-}
+
+-- | This module contains the code that does the tree normalization and binning.
+
+module Bio.Phylogeny.PhyBin.Binning
+       ( -- * Binning and normalization
+         binthem, normalize, normalizeFT, annotateWLabLists, 
+         deAnnotate, OneCluster(..), BinResults, StrippedTree,
+         -- * Utilities and unit tests
+         get_weight, unitTests, anonymize_annotated
+       )
+       where
+
+import           Data.Function       (on)
+import           Data.List           (delete, minimumBy, sortBy, insertBy, intersperse, sort)
+import qualified Data.ByteString.Char8 as B
+import qualified Data.Map                   as M
+import           Control.Applicative ((<$>),(<*>))
+import           Control.Concurrent  (Chan)
+import           Test.HUnit          ((~:),(~=?),Test,test)
+
+-- For vizualization:
+import           Text.PrettyPrint.HughesPJClass hiding (char, Style)
+import           Bio.Phylogeny.PhyBin.CoreTypes
+import           Bio.Phylogeny.PhyBin.Parser (parseNewicks, parseNewick)
+import           Bio.Phylogeny.PhyBin.Visualize (dotNewickTree, viewNewickTree)
+
+-- Turn on for extra invariant checking:
+debug :: Bool
+debug = True
+
+----------------------------------------------------------------------------------------------------
+-- Normal form for unordered, unrooted trees
+----------------------------------------------------------------------------------------------------
+
+-- The basic idea is that what we *want* is the following, 
+--   ROOT: most balanced point
+--   ORDER: sorted in increasing subtree weight
+
+-- But that's not quite good enough.  There are ties to break.  To do
+-- that we fall back on the (totally ordered) leaf labels.
+
+--------------------------------------------------------------------------------
+
+
+-- Our sorting criteria for the children of interior nodes:
+compare_childtrees :: AnnotatedTree -> AnnotatedTree -> Ordering
+compare_childtrees node1 node2 = 
+    case (subtreeWeight $ get_dec node1) `compare` (subtreeWeight $ get_dec node2) of 
+     -- Comparisons on atoms cause problems WRT to determinism between runs if parallelism is introduced.
+     -- Can consider it an optimization for the serial case perhaps:
+--     EQ -> case map deAtom (get_label_list node1) `compare` 
+--	        map deAtom (get_label_list node2) of
+     EQ -> case (sortedLabels$ get_dec node1) `compare` (sortedLabels$ get_dec node2) of
+            EQ -> error$ "Internal invariant broken.  These two children have equal ordering priority:\n" 
+		  ++ "Pretty printing:\n  "
+		  ++ show (pPrint node1) ++ "\n  " ++ show (pPrint node2)
+		  ++ "\nFull data structures:\n  "
+		  ++ show (node1) ++ "\n  " ++ show (node2)
+	    x  -> x
+     x -> x
+
+-- | A version lifted to operate over full trees.
+normalizeFT :: FullTree StandardDecor -> FullTree StandardDecor
+normalizeFT (FullTree nm labs tr) = FullTree nm labs (normalize tr)
+
+-- | This is it, here's the routine that transforms a tree into normal form.
+--   This relies HEAVILY on lazy evaluation.
+normalize :: AnnotatedTree -> AnnotatedTree
+normalize tree = snd$ loop tree Nothing
+ where 
+
+  add_context dec Nothing  = dec
+  add_context dec (Just c) = add_weight dec c
+
+  -- loop: Walk over the tree, turning it inside-out in the process.
+  -- Inputs: 
+  --    1. node: the NewickTree node to process ("us")
+  --    2. context: all nodes connected through the parent, "flipped" as though *we* were root
+  --                The "flipped" part has ALREADY been normalized.
+  -- Outputs: 
+  --    1. new node
+  --    3. the best candidate root anywhere under this subtree
+  loop :: AnnotatedTree -> Maybe (AnnotatedTree) -> (AnnotatedTree, AnnotatedTree)
+  loop node ctxt  = case node of
+    NTLeaf (StandardDecor _ _ w sorted) _name -> 
+	(node, 
+	 -- If the leaf becomes the root... we could introduce another node:
+	 NTInterior (add_context (StandardDecor 0 Nothing w sorted) ctxt) $
+	            (verify_sorted "1" id$ maybeInsert compare_childtrees ctxt [node])
+
+	 -- It may be reasonable to not support leaves becoming root.. that changes the number of nodes!
+	            --error "normalize: leaf becoming root not currently supported."
+	)
+    
+    NTInterior dec ls -> 
+     let 
+         -- If this node becomes the root, the parent becomes one of our children:
+         inverted = NTInterior inverted_dec inverted_children
+	 inverted_dec      = add_context dec ctxt
+         inverted_children = verify_sorted "2" id$ maybeInsert compare_childtrees ctxt newchildren
+
+	 newchildren = --trace ("SORTED "++ show (map (get_label_list . fst) sorted)) $
+		       map fst sorted
+         sorted = sortBy (compare_childtrees `on` fst) possibs
+
+         possibs = 
+	  flip map ls $ \ child -> 
+	   let 
+
+	       -- Will this diverge???  Probably depends on how equality (for delete) is defined... 
+
+	       -- Reconstruct the current node missing one child (because it became a parent):
+	       -- Update its metadata appropriately:
+	       newinverted = NTInterior (subtract_weight inverted_dec child) 
+			                (verify_sorted "3" id$ delete newnode inverted_children)
+	       (newnode, _) = result
+
+  	       result = loop child (Just newinverted) 
+	   in
+	       result
+	 
+         -- Either us or a candidate suggested by one of the children:
+         rootcandidates = inverted : map snd sorted
+
+         -- Who wins?  The "most balanced".  Minimize max subtree weight.
+	 -- The compare operator is NOT allowed to return EQ here.  Therefore there will be a unique minima.
+	 winner = --trace ("Candidates: \n"++ show (nest 6$ vcat (map pPrint (zip (map max_subtree_weight rootcandidates) rootcandidates )))) $ 
+		  minimumBy cmpr_subtree_weight rootcandidates
+
+	 max_subtree_weight = maximum . map get_weight . get_children 
+	 fat_id = map get_label_list . get_children 
+
+         cmpr_subtree_weight tr1 tr2 = 
+           case max_subtree_weight  tr1 `compare` max_subtree_weight tr2 of
+	     EQ -> -- As a fallback we compare the alphabetic order of the "bignames" of the children:
+                   case fat_id tr1 `compare` fat_id tr2 of 
+		     EQ -> error$ "\nInternal invariant broken.  These two were equally good roots:\n" 
+			          ++ show (pPrint tr1) ++ "\n" ++ show (pPrint tr2)
+		     x -> x
+	     x -> x
+
+     in (NTInterior dec newchildren, winner)
+
+
+-- Verify that our invariants are met:
+verify_sorted :: (Show a, Pretty a) => String -> (a -> AnnotatedTree) -> [a] -> [a]
+verify_sorted msg = 
+ if debug 
+ then \ project nodes ->
+  let weights = map (get_weight . project) nodes in 
+    if sort weights == weights
+    then nodes
+--    else error$ "Child list failed verification: "++ show (pPrint nodes)
+    else error$ msg ++ ": Child list failed verification, not sorted: "++ show (weights)
+	        ++"\n  "++ show (sep $ map pPrint nodes) ++ 
+                "\n\nFull output:\n  " ++ (concat$ intersperse "\n  " $ map show nodes)
+ else \ _ nodes -> nodes
+
+
+-- TODO: Salvage any of these tests that are worthwhile and get them into the unit tests:	        	
+tt :: AnnotatedTree
+tt = normalize $ annotateWLabLists $ head$ snd$ parseNewick M.empty id "" "(A,(C,D,E),B);"
+
+norm4 :: FullTree StandardDecor
+norm4 = norm "((C,D,E),B,A);"
+
+norm5 :: AnnotatedTree
+norm5 = normalize$ annotateWLabLists$ head$ snd$ parseNewick M.empty id "" "(D,E,C,(B,A));"
+
+
+----------------------------------------------------------------------------------------------------
+-- Equivalence classes on Trees:
+----------------------------------------------------------------------------------------------------
+
+-- | When binning, the members of a OneCluster are isomorphic trees.  When clustering
+-- based on robinson-foulds distance they are merely similar trees.
+newtype OneCluster a = OneCluster { clustMembers :: [FullTree a] }
+  deriving Show 
+
+-- | Ignore metadata (but keep weights) for the purpose of binning
+type StrippedTree = NewickTree Int
+
+-- | Index the results of binning by topology-only stripped trees
+--   that have their decorations removed.
+type BinResults a = M.Map StrippedTree (OneCluster a)
+
+-- | The binning function.
+--   Takes labeled trees, classifies labels into equivalence classes.
+binthem :: [FullTree DefDecor] -> BinResults StandardDecor
+binthem ls = binthem_normed normalized
+ where
+  normalized = map (\ (FullTree n lab tree) ->
+                     FullTree n lab (normalize $ annotateWLabLists tree)) ls
+
+-- | This version accepts trees that are already normalized:
+binthem_normed :: [FullTree StandardDecor] -> BinResults StandardDecor
+binthem_normed normalized = 
+--   foldl (\ acc (lab,tree) -> M.insertWith update tree (OneCluster{ members=[lab] }) acc)
+   foldl (\ acc ft@(FullTree treename _ tree) ->
+           M.insertWith update (anonymize_annotated tree) (OneCluster [ft]) acc)
+	 M.empty normalized
+	 --(map (mapSnd$ fmap (const ())) normalized) -- still need to STRIP them
+ where 
+-- update new old = OneCluster{ members= (members new ++ members old) }
+ update (OneCluster new) (OneCluster old) = OneCluster (new++old)
+ --strip = fmap (const ())
+
+-- | For binning. Remove branch lengths and labels but leave weights.
+anonymize_annotated :: AnnotatedTree -> StrippedTree
+anonymize_annotated = fmap (\ (StandardDecor bl bs w labs) -> w)
+
+
+----------------------------------------------------------------------------------------------------
+-- Other tools and algorithms.
+----------------------------------------------------------------------------------------------------
+
+-- Extract all edges connected to a particular node in every tree.  Return branch lengths.
+all_edge_weights lab trees = 
+     concat$ map (loop []) trees
+  where 
+ loop acc (NTLeaf len name) | lab == name = len:acc
+ loop acc (NTLeaf _ _)                    = acc
+ loop acc (NTInterior _ ls) = foldl loop acc ls
+
+
+----------------------------------------------------------------------------------------------------
+-- Bitvector based normalization.
+----------------------------------------------------------------------------------------------------
+
+-- TODO: This approach is probably faster. Give it a try.
+
+{-
+int NumberOfSetBits(int i)
+{
+    i = i - ((i >> 1) & 0x55555555);
+    i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
+    return ((i + (i >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
+}
+
+int __builtin_popcount (unsigned int x);
+-}
+   
+
+----------------------------------------------------------------------------------------------------
+
+
+    
+{- 
+ ----------------------------------------
+ PARSING TIMING TEST:
+ ----------------------------------------
+
+ Compiling this with GHC 6.12 on my laptop -O2...
+ It takes 0.043s startup to parse ten files.
+ And 0.316 seconds to parse 2648.. so we can say that's almost all time spent parsing/building/traversing.
+ (All nodes summed to 14966)
+  (The tested version uses Strings for labels... not Atoms)
+
+ Comparing against the original mzscheme version (with Racket 5.0)
+ with default optimization (there's no obvious -O2), well the
+ generated .exe has a ~0.5 second startup time overhead...
+   0.881 seconds total to do the parsing, or about 380ms just for parsing.
+   But that doesn't do the counting!
+   Ok, this mzscheme version is in a messed up state at this point, but hacking
+   it to do a count (and it gets a different one.. 12319), I get 0.882 seconds real time, 
+   that is neglibly more.
+   
+ If anything parsec should be at a disadvantage because of the lack of
+ a preprocessing phase to generate the FSM...
+
+ Btw, switching node labels over to Atoms made no difference. (But
+ didn't slow down at least.)  We wouldn't expect this to save anything
+ on the construction side... parsec still allocates/builds the strings
+ before we intern them.
+
+ -}
+
+
+
+
+----------------------------------------------------------------------------------------------------
+-- General helper/utility functions:
+----------------------------------------------------------------------------------------------------
+
+merge :: Ord a => [a] -> [a] -> [a]
+merge [] ls = ls
+merge ls [] = ls
+merge l@(a:b) r@(x:y) = 
+  if a < x
+  then a : merge b r
+  else x : merge y l 
+
+-- Set subtraction for sorted lists:
+demerge :: (Ord a, Show a) => [a] -> [a] -> [a]
+demerge ls [] = ls
+demerge [] ls = error$ "demerge: first list did not contain all of second, remaining: " ++ show ls
+demerge (a:b) r@(x:y) = 
+  case a `compare` x of
+   EQ -> demerge b y
+   LT -> a : demerge b r 
+   GT -> error$ "demerge: element was missing from first list: "++ show x
+
+-- maybeCons :: Maybe a -> [a] -> [a]
+-- maybeCons Nothing  ls = ls
+-- maybeCons (Just x) ls = x : ls
+
+maybeInsert :: (a -> a -> Ordering) -> Maybe a -> [a] -> [a]
+maybeInsert _  Nothing  ls = ls
+maybeInsert fn (Just x) ls = insertBy fn x ls
+
+-- | Add the metadata that is used for binning
+annotateWLabLists :: NewickTree DefDecor -> AnnotatedTree
+annotateWLabLists tr = case tr of 
+  NTLeaf (bs,bl) n      -> NTLeaf (StandardDecor bl bs 1 [n]) n
+  NTInterior (bs,bl) [] -> NTInterior (StandardDecor bl bs 0 []) []
+  -- NTInterior (bs,bl) [] -> error "annotateWLabLists: internal invariant broken.  Shouldn't have NTInterior with null children."
+  NTInterior (bs,bl) ls -> 
+      let children = map annotateWLabLists ls in 
+      NTInterior (StandardDecor bl bs
+                  (sum $ map (subtreeWeight . get_dec) children)
+		  (foldl1 merge $ map (sortedLabels . get_dec) children))
+		 children
+
+----------------------------------------------------------------------------------------------------
+-- Simple Helper Functions
+----------------------------------------------------------------------------------------------------
+
+-- | Take the extra annotations away.  Inverse of `annotateWLabLists`.
+deAnnotate :: FullTree StandardDecor -> FullTree DefDecor
+deAnnotate (FullTree a b tr) = FullTree a b (fmap (\ (StandardDecor bl bs _ _) -> (bs,bl)) tr)
+
+-- Number of LEAVES contained in subtree:
+get_weight :: AnnotatedTree -> Int
+get_weight = subtreeWeight . get_dec
+
+-- Sorted list of leaf labels contained in subtree
+get_label_list :: AnnotatedTree -> [Label]
+get_label_list   = sortedLabels . get_dec
+
+add_weight :: StandardDecor -> AnnotatedTree -> StandardDecor
+add_weight (StandardDecor l1 bs1 w1 sorted1) node  = 
+  let (StandardDecor _ bs2 w2 sorted2) = get_dec node in 
+  StandardDecor l1 ((+) <$> bs1 <*> bs2) (w1+w2) (merge sorted1 sorted2)
+
+-- Remove the influence of one subtree from the metadata of another.
+subtract_weight :: StandardDecor -> AnnotatedTree -> StandardDecor
+subtract_weight (StandardDecor l1 bs1 w1 sorted1) node =  
+  let (StandardDecor _ bs2 w2 sorted2) = get_dec node in 
+  StandardDecor l1 ((-) <$> bs1 <*> bs2) (w1-w2) (demerge sorted1 sorted2)
+	
+
+----------------------------------------------------------------------------------------------------
+-- UNIT TESTING
+----------------------------------------------------------------------------------------------------
+
+tre1 :: (LabelTable, NewickTree DefDecor)
+tre1 = (x,head y)
+ where
+   (x,y) = parseNewick M.empty id "" "(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);"
+
+tre1draw :: IO (Chan (), FullTree StandardDecor)
+tre1draw = viewNewickTree "tre1"$ (FullTree "" (fst tre1) (annotateWLabLists (snd tre1)))
+
+tre1dot :: IO ()
+tre1dot = print $ dotNewickTree "" 1.0 $ (FullTree "" (fst tre1) (annotateWLabLists$ snd tre1))
+
+norm :: String -> FullTree StandardDecor
+norm = norm2 . B.pack
+
+norm2 :: B.ByteString -> FullTree StandardDecor
+norm2 bstr = FullTree "" tbl (normalize $ annotateWLabLists$ head tr)
+  where
+    (tbl,tr) = parseNewick M.empty id "test" bstr
+
+unitTests :: Test
+unitTests = 
+  let ntl s = NTLeaf (Nothing,0.0) s
+      (tbl,tre1_) = tre1
+  in 
+  test 
+   [ 
+     "merge" ~: [1,2,3,4,5,6] ~=? merge [1,3,5] [2,4,6::Int]
+   , "demerge" ~: [2,4,6] ~=? demerge [1,2,3,4,5,6] [1,3,5::Int]
+   , "demerge" ~: [1,3,5] ~=? demerge [1,2,3,4,5,6] [2,4,6::Int]
+   , "annotateWLabLists" ~: 
+     ["A","B","C","D"] -- ORD on atoms is expensive... it must use the whole string.
+      ~=? map (tbl M.!) 
+          (sortedLabels (get_dec (annotateWLabLists tre1_)))
+
+   -- Make sure that all of these normalize to the same thing.
+   , "normalize1" ~: "(C, D, E, (A, B))" ~=?  show (displayDefaultTree $ deAnnotate$ norm "(A,(C,D,E),B);")
+   , "normalize2A" ~: "(C, D, E, (A, B))" ~=?  show (pPrint$ norm "((C,D,E),B,A);")
+   , "normalize2B" ~: "(C, D, E, (A, B))" ~=?  show (pPrint$ norm "(D,E,C,(B,A));")
+
+   -- Here's an example from the rhizobia datasetsthat that caused my branch-sorting to fail.
+   , "normalize3" ~:  "(((BB, BJ)), (MB, ML), (RE, (SD, SM)))" 
+		 ~=? show (pPrint$ norm2 (B.pack "(((ML,MB),(RE,(SD,SM))),(BB,BJ));"))
+
+-- "((BB: 2.691831, BJ: 1.179707): 0.000000, ((ML: 0.952401, MB: 1.020319): 0.000000, (RE: 2.031345, (SD: 0.180786, SM: 0.059988): 0.861187): 0.717913): 0.000000);"
+
+   , "phbin: these 3 trees should fall in the same category" ~: 
+      1 ~=? (length $ M.toList $
+             binthem $ snd $ 
+              parseNewicks id [("one",  "(A,(C,D,E),B);"), 
+                               ("two",  "((C,D,E),B,A);"),
+                               ("three","(D,E,C,(B,A));")]
+             -- [("one",   snd$parseNewick M.empty id "" "(A,(C,D,E),B);"),
+             --  ("two",   snd$parseNewick M.empty id "" "((C,D,E),B,A);"),
+             --  ("three", snd$parseNewick M.empty id "" "(D,E,C,(B,A));")]
+            )
+      
+   , "dotConversion" ~: True ~=? 100 < length (show $ dotNewickTree "" 1.0$ norm "(D,E,C,(B,A));") -- 444     
+   ]
+
+
+--------------------------------------------------------------------------------
diff --git a/Bio/Phylogeny/PhyBin/CoreTypes.hs b/Bio/Phylogeny/PhyBin/CoreTypes.hs
new file mode 100644
index 0000000..8c4d081
--- /dev/null
+++ b/Bio/Phylogeny/PhyBin/CoreTypes.hs
@@ -0,0 +1,359 @@
+{-# LANGUAGE NamedFieldPuns, BangPatterns #-}
+{-# LANGUAGE OverloadedStrings, TypeSynonymInstances, FlexibleInstances #-}
+{-# LANGUAGE CPP #-}
+{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
+{-# OPTIONS_GHC -fwarn-unused-imports #-}
+
+module Bio.Phylogeny.PhyBin.CoreTypes
+       (
+         -- * Tree and tree decoration types
+         NewickTree(..), 
+         DefDecor, StandardDecor(..), AnnotatedTree, FullTree(..),
+         ClustMode(..), TreeName, NumTaxa(..),
+         
+         -- * Tree operations
+         displayDefaultTree, displayStrippedTree, 
+         treeSize, numLeaves, liftFT,
+         get_dec, set_dec, get_children, 
+         map_labels, all_labels, foldIsomorphicTrees,
+
+         -- * Utilities specific to StandardDecor:
+         avg_branchlen, get_bootstraps,
+
+         -- * Command line config options
+         PhyBinConfig(..), default_phybin_config,
+         WhichRFMode(..),
+
+         -- * General helpers
+         Label, LabelTable,
+         
+         -- * Experimenting with abstracting decoration operations
+         HasBranchLen(..)
+       )
+       where
+
+import qualified Data.Map as M
+import qualified Data.Set as S
+import Data.Foldable (Foldable(..))
+import Data.Maybe (maybeToList)
+import Data.Monoid (mappend, mconcat)
+import Text.PrettyPrint.HughesPJClass hiding (char, Style)
+
+import qualified Data.Clustering.Hierarchical as C
+
+#define NO_ATOMS
+#ifndef NO_ATOMS
+import StringTable.Atom
+#endif
+
+----------------------------------------------------------------------------------------------------
+-- Type definitions
+----------------------------------------------------------------------------------------------------
+
+type BranchLen = Double
+
+-- | Even though the Newick format allows it, here we ignore interior node
+--   labels. (They are not commonly used.)
+--
+--   Note that these trees are rooted.  The `normalize` function ensures that a
+--   single, canonical rooted representation is chosen.
+data NewickTree a = 
+   NTLeaf     a {-# UNPACK #-} !Label
+ | NTInterior a  [NewickTree a]
+ deriving (Show, Eq, Ord)
+
+-- TODO: Ordering maybe shouldn't need to touch the metadata.  At least on the fast
+-- path.
+
+{-
+-- [2010.09.22] Disabling:
+instance NFData Atom where
+  rnf a = rnf (fromAtom a :: Int)
+
+instance NFData a => NFData (NewickTree a) where
+  rnf (NTLeaf l n)      = rnf (l,n)
+  rnf (NTInterior l ls) = rnf (l,ls)
+-}
+
+instance Functor NewickTree where 
+   fmap fn (NTLeaf dec x)      = NTLeaf (fn dec) x 
+   fmap fn (NTInterior dec ls) = NTInterior (fn dec) (map (fmap fn) ls)
+
+instance Foldable NewickTree where
+  foldMap f (NTLeaf dec x) = f dec
+  foldMap f (NTInterior dec ls) = mappend (f dec) $
+                                  mconcat (map (foldMap f) ls)
+
+instance Foldable FullTree where
+  foldMap f (FullTree _ _ tr) = foldMap f tr
+
+instance Functor FullTree where
+  fmap f (FullTree n l tr) = FullTree n l $ fmap f tr
+
+instance Pretty dec => Pretty (NewickTree dec) where 
+ -- I'm using displayDefaultTree for the "prettiest" printing and
+ -- replacing pPrint with a whitespace-improved version of show:
+ pPrint (NTLeaf dec name)   = "NTLeaf"     <+> pPrint dec <+> text (show name)
+ pPrint (NTInterior dec ls) = "NTInterior" <+> pPrint dec <+> pPrint ls
+
+instance Pretty a => Pretty (FullTree a) where
+  pPrint (FullTree name mp tr) = 
+    "FullTree " <+> text name <+> loop tr
+   where
+    loop (NTLeaf dec ind)    = "NTLeaf"     <+> pPrint dec <+> text (mp M.! ind)
+    loop (NTInterior dec ls) = "NTInterior" <+> pPrint dec <+> pPrint ls
+
+instance (Pretty k, Pretty v) => Pretty (M.Map k v) where
+  pPrint mp = pPrint (M.toList mp)
+
+-- | Display a tree WITH the bootstrap and branch lengths.
+--   This prints in NEWICK format.
+displayDefaultTree :: FullTree DefDecor -> Doc
+displayDefaultTree orig = loop tr <> ";"
+  where
+    (FullTree _ mp tr) = orig -- normalize orig
+    loop (NTLeaf (Nothing,_) name)     = text (mp M.! name)
+    loop (NTLeaf _ _)                  = error "WEIRD -- why did a leaf node have a bootstrap value?"
+    loop (NTInterior (bootstrap,_) ls) = 
+       case bootstrap of
+         Nothing -> base
+         Just val -> base <> text ":[" <> text (show val) <> text "]"
+      where base = parens$ sep$ map_but_last (<>text",") $ map loop ls
+
+-- | The same, except with no bootstrap or branch lengths.  Any tree annotations
+-- ignored.
+displayStrippedTree :: FullTree a -> Doc
+displayStrippedTree orig = loop tr <> ";"
+  where
+    (FullTree _ mp tr) = orig -- normalize orig
+    loop (NTLeaf _ name) = text (mp M.! name)
+    loop (NTInterior _ ls) = parens$ sep$ map_but_last (<>text",") $ map loop ls
+
+----------------------------------------------------------------------------------------------------
+-- Labels
+----------------------------------------------------------------------------------------------------
+
+-- | Labels are inexpensive unique integers.  The table is necessary for converting them back.
+type Label = Int
+
+-- | Map labels back onto meaningful names.
+type LabelTable = M.Map Label String
+
+----------------------------------------------------------------------------------------------------
+-- Tree metadata (decorators)
+----------------------------------------------------------------------------------------------------
+
+
+-- | The barebones default decorator for NewickTrees contains BOOTSTRAP and
+-- BRANCHLENGTH.  The bootstrap values, if present, will range in [0..100]
+type DefDecor = (Maybe Int, BranchLen)
+
+-- | Additionally includes some scratch data that is used by the binning algorithm.
+type AnnotatedTree = NewickTree StandardDecor
+
+-- | The standard decoration includes everything in `DefDecor` plus
+--   some extra cached data:
+-- 
+--  (1) branch length from parent to "this" node
+--  (2) bootstrap values for the node
+-- 
+--  (3) subtree weights for future use
+--      (defined as number of LEAVES, not counting intermediate nodes)
+--  (4) sorted lists of labels for symmetry breaking
+data StandardDecor = StandardDecor {
+  branchLen     :: BranchLen,
+  bootStrap     :: Maybe Int,
+
+  -- The rest of these are used by the computations below.  These are
+  -- cached (memoized) values that could be recomputed:
+  ----------------------------------------
+  subtreeWeight :: Int,
+  sortedLabels  :: [Label]
+ }
+ deriving (Show,Read,Eq,Ord)
+
+class HasBranchLen a where
+  getBranchLen :: a -> BranchLen
+
+instance HasBranchLen StandardDecor where
+  getBranchLen = branchLen
+
+-- This one is kind of sloppy:
+instance HasBranchLen DefDecor where
+  getBranchLen = snd
+
+-- | A common type of tree contains the standard decorator and also a table for
+-- restoring the human-readable node names.
+data FullTree a =
+  FullTree { treename   :: TreeName
+           , labelTable :: LabelTable
+           , nwtree     :: NewickTree a 
+           }
+ deriving (Show, Ord, Eq)
+
+liftFT :: (NewickTree t -> NewickTree a) -> FullTree t -> FullTree a
+liftFT fn (FullTree nm labs x) = FullTree nm labs (fn x)
+
+type TreeName = String
+
+instance Pretty StandardDecor where 
+ pPrint (StandardDecor bl bs wt ls) = parens$
+    "StandardDecor" <+> hsep [pPrint bl, pPrint bs
+--                             , pPrint wt, pPrint ls
+                             ]
+
+
+----------------------------------------------------------------------------------------------------
+-- * Configuring and running the command line tool.
+----------------------------------------------------------------------------------------------------
+
+-- | Due to the number of configuration options for the driver, we pack them into a record.
+data PhyBinConfig = 
+  PBC { verbose :: Bool
+      , num_taxa :: NumTaxa
+      , name_hack :: String -> String
+      , output_dir :: String
+      , inputs :: [String]
+      , do_graph :: Bool
+      , do_draw :: Bool
+      , clust_mode  :: ClustMode
+      , highlights  :: [FilePath] -- [FullTree ()]
+      , show_trees_in_dendro :: Bool
+      , show_interior_consensus :: Bool
+      , rfmode :: WhichRFMode
+      , preprune_labels :: Maybe [String]
+      , print_rfmatrix :: Bool
+      , dist_thresh :: Maybe Int
+      , branch_collapse_thresh :: Maybe Double -- ^ Branches less than this length are collapsed.
+      , bootstrap_collapse_thresh :: Maybe Int
+        -- ^ BootStrap values less than this result in the intermediate node being collapsed.        
+      }
+
+-- | Supported modes for computing RFDistance.
+data WhichRFMode = HashRF | TolerantNaive  
+  deriving (Show, Eq, Ord)
+
+-- | How many taxa should we expect in the incoming dataset?
+data NumTaxa = Expected Int  -- ^ Supplied by the user.  Committed.
+             | Unknown       -- ^ In the future we may automatically pick a behavior.  Now this one is usually an error.
+             | Variable      -- ^ Explicitly ignore this setting in favor of comparing all trees
+                             --   (even if some are missing taxa).  This only works with certain modes.
+  deriving (Show, Read, Eq)
+               
+-- | The default phybin configuration.
+default_phybin_config :: PhyBinConfig
+default_phybin_config = 
+ PBC { verbose = False
+--      , num_taxa = error "must be able to determine the number of taxa expected in the dataset.  (Supply it manually with -n.)"
+      , num_taxa  = Unknown
+      , name_hack = id -- Default, no transformation of leaf-labels
+      , output_dir = "./phybin_out/"
+      , inputs = []
+      , do_graph = False
+      , do_draw = False
+--      , clust_mode = BinThem
+      , clust_mode = ClusterThem C.UPGMA
+      , rfmode = HashRF
+      , preprune_labels = Nothing
+      , highlights     = []
+      , show_trees_in_dendro = False
+      , show_interior_consensus = False
+      , print_rfmatrix = False
+      , dist_thresh = Nothing
+      , branch_collapse_thresh = Nothing
+      , bootstrap_collapse_thresh = Nothing
+     }
+
+data ClustMode = BinThem | ClusterThem { linkage :: C.Linkage }
+
+----------------------------------------------------------------------------------------------------
+-- * Simple utility functions for the core types:
+----------------------------------------------------------------------------------------------------
+
+-- | How many nodes (leaves and interior) are contained in a NewickTree?
+treeSize :: NewickTree a -> Int
+treeSize (NTLeaf _ _) = 1
+treeSize (NTInterior _ ls) = 1 + sum (map treeSize ls)
+
+-- | This counts only leaf nodes, which should include all taxa.
+numLeaves :: NewickTree a -> Int
+numLeaves (NTLeaf _ _) = 1
+numLeaves (NTInterior _ ls) = sum (map numLeaves ls)
+
+
+map_but_last :: (a -> a) -> [a] -> [a]
+map_but_last _ [] = []
+map_but_last _ [h] = [h]
+map_but_last fn (h:t) = fn h : map_but_last fn t
+
+
+
+get_dec :: NewickTree t -> t
+get_dec (NTLeaf     dec _) = dec
+get_dec (NTInterior dec _) = dec
+
+-- Set all the decorations to a constant:
+set_dec :: b -> NewickTree a -> NewickTree b
+set_dec d = fmap (const d)
+--set_dec d (NTLeaf _ x) = NTLeaf d x
+--set_dec d (NTInterior _ ls) = NTInterior d $ map (set_dec d) ls
+
+get_children :: NewickTree t -> [NewickTree t]
+get_children (NTLeaf _ _) = []
+get_children (NTInterior _ ls) = ls
+
+
+-- | Average branch length across all branches in all all trees.
+avg_branchlen :: HasBranchLen a => [NewickTree a] -> Double
+avg_branchlen origls = fst total / snd total
+  where
+   total = sum_ls $ map sum_tree origls
+   sum_ls ls = (sum$ map fst ls, sum$ map snd ls)
+   sum_tree (NTLeaf dec _) | getBranchLen dec == 0  = (0,0)
+                           | otherwise              = (abs (getBranchLen dec),1)
+   sum_tree (NTInterior dec ls) = 
+       let branchLen = getBranchLen dec
+           (x,y)     = sum_ls$ map sum_tree ls in
+       if branchLen == 0 then (x, y) else ((abs branchLen) + x, 1+y)
+
+-- | Retrieve all the bootstraps values actually present in a tree.
+get_bootstraps :: NewickTree StandardDecor -> [Int]
+get_bootstraps (NTLeaf (StandardDecor{bootStrap}) _) = maybeToList bootStrap
+get_bootstraps (NTInterior (StandardDecor{bootStrap}) ls) =
+  maybeToList bootStrap ++ concatMap get_bootstraps ls
+
+-- | Apply a function to all the *labels* (leaf names) in a tree.
+map_labels :: (Label -> Label) -> NewickTree a -> NewickTree a
+map_labels fn (NTLeaf     dec lbl) = NTLeaf dec $ fn lbl
+map_labels fn (NTInterior dec ls)  = NTInterior dec$ map (map_labels fn) ls
+ 
+-- | Return all the labels contained in the tree.
+all_labels :: NewickTree t -> [Label]
+all_labels (NTLeaf     _ lbl) = [lbl]
+all_labels (NTInterior _ ls)  = concat$ map all_labels ls
+
+
+
+-- | This function allows one to collapse multiple trees while looking
+-- only at the "horizontal slice" of all the annotations *at a given
+-- position* in the tree.
+--
+-- "Isomorphic" must apply both to the shape and the name labels or it
+-- is an error to apply this function.
+foldIsomorphicTrees :: ([a] -> b) -> [NewickTree a] -> NewickTree b
+foldIsomorphicTrees _ [] = error "foldIsomorphicTrees: empty list of input trees"
+foldIsomorphicTrees fn ls@(hd:_) = fmap fn horiztrees
+ where
+   -- Preserve the input order:
+   horiztrees = Prelude.foldr consTrees (fmap (const []) hd) ls
+   -- We use the tree datatype itself as the intermediate data
+   -- structure.  This is VERY allocation-expensive, it would be
+   -- possible to trade compute for allocation here:
+   consTrees a b = case (a,b) of
+    (NTLeaf dec nm1, NTLeaf decls nm2) | nm1 /= nm2 -> error$"foldIsomorphicTrees: mismatched names: "++show (nm1,nm2)
+                                       | otherwise ->
+     NTLeaf (dec : decls) nm1
+    (NTInterior dec ls1, NTInterior decls ls2) ->
+     NTInterior (dec:decls) $ zipWith consTrees ls1 ls2
+    _ -> error "foldIsomorphicTrees: difference in tree shapes"
+
diff --git a/Bio/Phylogeny/PhyBin/Parser.hs b/Bio/Phylogeny/PhyBin/Parser.hs
new file mode 100644
index 0000000..a6ad401
--- /dev/null
+++ b/Bio/Phylogeny/PhyBin/Parser.hs
@@ -0,0 +1,252 @@
+{-# LANGUAGE ScopedTypeVariables, BangPatterns, ParallelListComp  #-}
+{-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
+{-# OPTIONS_GHC -fwarn-unused-imports #-}
+
+module Bio.Phylogeny.PhyBin.Parser
+       (newick_parser, parseNewick, parseNewicks, parseNewickFiles, unitTests)
+       where
+import           Control.Exception  (evaluate, handle, SomeException)
+import qualified Data.ByteString.Char8 as B
+import           Data.Char          (isSpace)
+import           Data.Map as M
+import           Data.List as L
+import           Text.Parsec
+import           Text.Parsec.ByteString
+import           Test.HUnit          ((~:),(~=?),Test,test,assertFailure)
+import           System.FilePath (takeBaseName)
+
+import           Bio.Phylogeny.PhyBin.CoreTypes 
+import           Prelude as P
+
+type NameHack = (String->String)
+
+-- | Parse a bytestring into a NewickTree with branch lengths.  The
+--   first argument is file from which the data came and is just for
+--   better error messages.
+-- 
+-- If the single bytestring contains more than one tree, then a number is appended to
+-- the tree names.
+parseNewick :: LabelTable -> NameHack -> String -> B.ByteString -> (LabelTable, [NewickTree DefDecor])
+parseNewick tbl0 name_hack file input = (lbls,trs)
+  where 
+  (lbls,trs) = 
+     extractLabelTable tbl0 $ 
+     runB file (many1$ newick_parser name_hack) $
+     B.filter (not . isSpace) input
+
+-- treeFiles <- acquireTreeFiles files
+                     -- let fn f = do raw <- B.readFile f
+                     --               let ls = map (`B.append` (B.pack ";")) $ 
+                     --                        B.splitWith (== ';') raw
+                     --               return (map (f,) ls)
+                     -- trees0 <- concat <$> mapM fn treeFiles
+                     -- FIXME: no name_hack here:
+                     -- let (lbls, trees) = parseNewicks id trees0 
+                     -- putStrLn$ "Read trees! "++show (length trees)
+                     -- putStrLn$ "Taxa: "++show (pPrint lbls)
+                     -- putStrLn$ "First tree: "++show (displayDefaultTree (head trees))
+
+-- | Parse a list of trees, starting with an empty map of labels and accumulating a final map.
+parseNewickFiles :: NameHack -> [String] -> IO (LabelTable, [FullTree DefDecor])
+parseNewickFiles fn nms = do
+  -- WARNING: Lazy-IO here.  We need to be careful about leaking file descriptors.
+  bss <- mapM B.readFile nms
+  return $ parseNewicks fn (zip nms bss)
+
+-- | A version which takes in-memory trees as ByteStrings.
+parseNewicks :: NameHack -> [(String,B.ByteString)] -> (LabelTable, [FullTree DefDecor])
+parseNewicks name_hack pairs = (labtbl, fullTrs)
+ where
+   fullTrs = [ FullTree (tweakName file ind) labtbl tr
+             | (file,tr) <- trs
+             | ind <- [(0::Int)..] ]
+   tweakName file ind = -- Here we do the renaming/numbering business:
+     if addSuffix
+     then (takeBaseName file)++"_"++show ind 
+     else (takeBaseName file) 
+   addSuffix = case trs of
+                 []  -> False -- Should this be an error?
+                 [_] -> False
+                 _   -> True
+   (labtbl, trs) = P.foldr fn (M.empty,[]) pairs
+   fn (file,bstr) (!acc,!ls) =
+     let (acc',trs) = parseNewick acc name_hack file bstr
+         names      = if length trs > 1
+                      then zipWith (\x y -> x++"_"++show y) (repeat file) [0..]
+                      else [file]
+     in (acc', (zip names trs) ++ ls)
+
+runB :: Show a => String -> Parser a -> B.ByteString -> a
+runB file p input = case (parse p "" input) of
+	         Left err -> error ("parse error in file "++ show file ++" at "++ show err)
+		 Right x  -> x
+
+extractLabelTable :: LabelTable -> [TempTree] -> (LabelTable, [NewickTree DefDecor])
+extractLabelTable tbl0 trs = (finMap,finTree)
+ where
+   flipped = M.fromList $ L.map (\(x,y)->(y,x)) $ M.toList tbl0
+   -- (_,finMap,finTree) = loop2 (S.fromList (M.elems tbl0)) tbl0 tr
+   (_,finMap,finTree) = loop1 flipped tbl0 trs
+
+   -- Mere plumbing:
+   loop1 seen acc [] = (seen, acc, [])
+   loop1 seen acc (hd:tl) =
+     let (seen2,acc2,hd') = loop2 seen acc hd 
+         (seen3,acc3,tl') = loop1 seen2 acc2 tl in
+     (seen3,acc3, hd':tl')
+
+   loop2 seen acc (NTLeaf (d,Just nm) _)
+     | M.member nm seen = (seen, acc, NTLeaf d (seen M.! nm))
+     | otherwise = let nxt = M.size acc in
+                   (M.insert nm nxt seen,
+                    M.insert nxt nm acc,  NTLeaf d nxt)
+   loop2 seen1 acc1 (NTInterior (d,Nothing) chlds) =
+     let (seen',acc',ls') = 
+          P.foldr (\ x (seen2,acc2,ls) ->
+                   let (seen3,acc3,x') = loop2 seen2 acc2 x in
+                   (seen3, acc3, x':ls))
+                  (seen1,acc1,[])
+                  chlds
+     in (seen',acc', NTInterior d ls')
+
+----------------------------------------------------------------------------------------------------
+-- Newick file format parser definitions:
+----------------------------------------------------------------------------------------------------
+
+-- | Hack: we store the names in the leaves.
+type TempTree = NewickTree (DefDecor,Maybe String)
+
+-- | This is used to post-facto splice metadata into the data structure.
+tag :: DefDecor -> TempTree -> TempTree
+tag l s =
+  case s of 
+    NTLeaf (_,m) n            -> NTLeaf (l,m) n
+    NTInterior (_,Nothing) ls -> NTInterior (l,Nothing) ls
+
+-- | This parser ASSUMES that whitespace has been prefiltered from the input.
+newick_parser :: NameHack -> Parser TempTree
+newick_parser name_hack = 
+   do x <- subtree name_hack
+      -- Get the top-level metadata:
+      l <- branchMetadat name_hack
+      _ <- char ';'
+      return$ tag l x
+
+subtree :: NameHack -> Parser TempTree
+subtree name_hack = internal name_hack <|> leaf name_hack
+
+defaultMeta :: (Maybe Int, Double)
+defaultMeta = (Nothing,0.0)
+
+leaf :: NameHack -> Parser TempTree
+leaf name_hack =
+  do nm <- name
+     let nm' = name_hack nm
+     return$ NTLeaf (defaultMeta,Just nm') 0 
+
+internal :: NameHack -> Parser TempTree
+internal name_hack =
+   do _   <- char '('       
+      bs  <- branchset name_hack
+      _   <- char ')'       
+      _nm <- name -- IGNORED, internal names.
+      return$ NTInterior (defaultMeta,Nothing) bs
+
+branchset :: NameHack -> Parser [TempTree]
+branchset name_hack =
+    do b <- branch name_hack <?> "at least one branch"
+       rest <- option [] $ try$ do char ','; branchset name_hack
+       return (b:rest)
+
+branch :: NameHack -> Parser TempTree
+branch name_hack =
+         do s <- subtree name_hack
+            l <- branchMetadat name_hack 
+            return$ tag l s
+
+-- If the length is omitted, it is implicitly zero.
+branchMetadat :: NameHack -> Parser DefDecor 
+branchMetadat name_hack = option defaultMeta $ do
+    char ':'
+    n <- (try sciNotation <|> number)
+    -- IF the branch length succeeds then we go for the bracketed bootstrap value also:
+    bootstrap <- option Nothing $ do
+      char '['
+      s <- many1 digit
+      char ']'
+      return (Just (read s))
+    return (bootstrap,n)
+
+-- | Parse a normal, decimal number.
+number :: Parser Double
+number = 
+  do sign   <- option "" $ string "-"
+     first  <- many1 digit
+     second <- option "0" $ try$ do char '.'; many1 digit
+     return (read (sign ++ first++"."++second) :: Double)
+
+-- | Parse a number in scientific notation.
+sciNotation :: Parser Double
+sciNotation =
+  do coeff <- do first <- many1 digit
+                 second <- option "0" $ try$ do char '.'; many1 digit
+                 return $ first++"."++second
+     char 'e'
+     sign  <- option "" $ string "-"
+     expon <- many1 digit
+     return (read (coeff++"e"++sign++expon))
+
+-- | Names are a mess... they contain all kinds of garbage sometimes it seems.
+--   Thus we are very permissive.  We allow anything that is not something we specifically need to reserve.
+name :: Parser String
+name = option "" $ many1 (noneOf "()[]:;, \t\n\r\f\v")
+-- name = option "" $ many1 (letter <|> digit <|> oneOf "_.-")
+
+
+--------------------------------------------------------------------------------
+-- Unit Tests
+--------------------------------------------------------------------------------
+
+tre1 :: NewickTree DefDecor
+tre1 = head $ snd $ parseNewick M.empty id "" $ B.pack "(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);"
+
+unitTests :: Test
+unitTests = test
+   [ "test name"   ~: "foo" ~=?  run name "foo"
+   , "test number" ~:  3.3  ~=?  run number "3.3"
+   , "test number" ~:  3.0  ~=?  run number "3"
+   , "test number" ~:  -3.0 ~=?  run number "-3"
+
+   , "leaf"     ~: ntl "A" ~=?  run (leaf id)    "A"
+   , "subtree"  ~: ntl "A" ~=?  run (subtree id) "A"
+
+     -- These are not allowed:
+   , "null branchset" ~: errortest$ run (branchset id) ""
+     
+   , "internal" ~: NTInterior nada [ntl "A"] ~=?  run (internal id) "(A);"
+   , "example: no nodes are named"  ~: NTInterior nada
+                                         [ntl "", ntl "", NTInterior nada [ntl "", ntl ""]]
+        			   ~=? run (newick_parser id) "(,,(,));"
+   , "example: leaf nodes are named" ~: 6 ~=?  treeSize (run (newick_parser id) "(A,B,(C,D));")
+   , "example: all nodes are named"  ~: 6 ~=?  treeSize (run (newick_parser id) "(A,B,(C,D)E)F;")
+
+   , "example: all but root node have a distance to parent"  ~: 6 ~=? treeSize (run (newick_parser id) "(:0.1,:0.2,(:0.3,:0.4):0.5);")
+   , "example: all have a distance to parent"              ~: 6 ~=? treeSize (run (newick_parser id) "(:0.1,:0.2,(:0.3,:0.4):0.5):0.6;")
+   , "example: distances and leaf names (popular)"         ~: 6 ~=? treeSize tre1
+   , "example: distances and all names"                    ~: 6 ~=? treeSize (run (newick_parser id) "(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;")
+   , "example: a tree rooted on a leaf node (rare)"        ~: 6 ~=? treeSize (run (newick_parser id) "((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;")
+   ]
+ where
+  ntl s = NTLeaf ((Nothing,0.0),Just s) 0
+  nada = ((Nothing,0),Nothing)
+
+run :: Show a => Parser a -> String -> a
+run p input = runB "<unknown>" p (B.pack input)
+
+errortest :: t -> IO ()
+errortest x = 
+   --() ~=?
+    handle (\ (e::SomeException) -> return ()) $ 
+      do evaluate x
+         assertFailure "test was expected to throw an error"
+              
diff --git a/Bio/Phylogeny/PhyBin/PreProcessor.hs b/Bio/Phylogeny/PhyBin/PreProcessor.hs
new file mode 100644
index 0000000..dda3075
--- /dev/null
+++ b/Bio/Phylogeny/PhyBin/PreProcessor.hs
@@ -0,0 +1,95 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+
+-- | This is a preprocessor which can (optionally) be invoked to
+-- collapse short branch lengths.
+
+module Bio.Phylogeny.PhyBin.PreProcessor
+       ( collapseBranches,
+         collapseBranchLenThresh, collapseBranchBootStrapThresh,
+         pruneTreeLeaves
+       )
+       where
+
+import qualified Data.Set as S
+import Data.Maybe (catMaybes)
+import  Bio.Phylogeny.PhyBin.CoreTypes 
+
+
+-- | Prune the leaves of the tree to only those leaves in the provided set.
+-- 
+--   If ALL leaves are pruned from the set, this function returns nothing.
+pruneTreeLeaves :: S.Set Label -> NewickTree DefDecor -> Maybe (NewickTree DefDecor)
+pruneTreeLeaves set tr = loop tr
+ where
+   loop orig@(NTLeaf _ lab)
+     | S.member lab set = Just orig
+     | otherwise        = Nothing
+   loop (NTInterior dec@(_,blen) ls) =
+     case catMaybes $ map loop ls of
+       []    -> Nothing
+       -- Here we ELIMINATE the intermediate node, but we add in its branch length:
+       [one] -> Just$ fmap (\(bs,bl) -> (bs,blen + bl)) one
+       ls'   -> Just (NTInterior dec ls')
+
+
+-- | Removes branches that do not meet a predicate, leaving a shallower, "bushier"
+--   tree.  This does NOT change the set of leaves (taxa), it only removes interior
+--   nodes.
+--
+--   `collapseBranches pred collapser tree` uses `pred` to test the meta-data to see
+--   if collapsing the intermediate node below the branch is necessary, and if it is,
+--   it uses `collapser` to reduce all the metadata for the collapsed branches into a
+--   single piece of metadata.
+collapseBranches :: forall a . (a -> Bool) -> (a -> a -> a) -> NewickTree a -> NewickTree a
+collapseBranches isCollapsable collapse origtr = final
+  where    
+    (_,_, final) = loop origtr
+    
+    -- This loop returns:
+    --  (1) a list of leaf "floaters" that can still move upwards,
+    --  (2) immovable subtrees that can't
+    --  (3) a final node IF the result is the root.
+    loop :: NewickTree a -> ([(a,Label)], [NewickTree a], NewickTree a)
+    loop lf@(NTLeaf dec lb) | isCollapsable dec = ([(dec,lb)], [],  lf)
+                            | otherwise         = ([],        [lf], lf)
+    loop (NTInterior dec children) =
+      let (floats, anchors, _) = unzip3 $ map loop children 
+          thenode = NTInterior dec $ concat anchors ++ 
+                                     map (uncurry NTLeaf) (concat floats)
+      in      
+      if isCollapsable dec then
+        -- If we are collapsable we keep floating upwards.
+        -- We combine our metadata (on the left) into the floatees:
+        (map (\ (a,l) -> (collapse dec a, l))
+             (concat floats),
+         concat anchors,
+         thenode)
+      else
+        -- Otherwise this is the end of the road for these floaters:
+        ([], [thenode], thenode)
+
+
+-- | A common configuration.  Collapse branches based on a length threshold.
+collapseBranchLenThresh :: Double -> NewickTree DefDecor -> NewickTree DefDecor
+-- collapseBranchLenThresh :: HasBranchLen a => Double -> NewickTree a -> NewickTree a    
+collapseBranchLenThresh thr tr =
+  collapseBranches ((< thr) . getBranchLen) collapser tr
+  where
+    -- We REMOVE BootStraps as part of this process, they are not well-defined after this point.
+    collapser _intermediate@(_bt1, len1) _floatee@(_bt2, len2) =
+      (Nothing, len1 + len2)
+
+-- | A common configuration.  Collapse branches based on bootstrap values.
+collapseBranchBootStrapThresh :: Int -> NewickTree DefDecor -> NewickTree DefDecor
+-- collapseBranchLenThresh :: HasBranchLen a => Double -> NewickTree a -> NewickTree a    
+collapseBranchBootStrapThresh thr tr =
+  collapseBranches ((< thr) . getBoot) collapser tr
+  where
+    getBoot (Nothing,_)    = error$"collapseBranchBootStrapThresh: bootstrap value missing on tree node!\n"++
+                                   "They must be present if --minbootstrap is used."
+    getBoot (Just boot,_)  = boot
+    -- This had better happen BEFORE branch-length based collapsing is done:
+    collapser (_,len1) (_,len2)  = (Nothing, len1+len2)
+
+
+
diff --git a/Bio/Phylogeny/PhyBin/RFDistance.hs b/Bio/Phylogeny/PhyBin/RFDistance.hs
new file mode 100644
index 0000000..91e0767
--- /dev/null
+++ b/Bio/Phylogeny/PhyBin/RFDistance.hs
@@ -0,0 +1,444 @@
+{-# LANGUAGE ScopedTypeVariables, CPP, BangPatterns #-}
+
+module Bio.Phylogeny.PhyBin.RFDistance
+       (
+         -- * Types
+         DenseLabelSet, DistanceMatrix,
+
+         -- * Bipartition (Bip) utilities
+         allBips, foldBips, dispBip,
+         consensusTree, bipsToTree, filterCompatible, compatibleWith,
+
+         -- * ADT for dense sets
+         mkSingleDense, mkEmptyDense, bipSize,
+         denseUnions, denseDiff, invertDense, markLabel,
+         
+        -- * Methods for computing distance matrices
+        naiveDistMatrix, hashRF, 
+
+        -- * Output
+        printDistMat)
+       where
+
+import           Control.Monad
+import           Control.Monad.ST
+import           Data.Function       (on)
+import           Data.Word
+import qualified Data.Vector                 as V
+import qualified Data.Vector.Mutable         as MV
+import qualified Data.Vector.Unboxed.Mutable as MU
+import qualified Data.Vector.Unboxed         as U
+import           Text.PrettyPrint.HughesPJClass hiding (char, Style)
+import           System.IO      (hPutStrLn, hPutStr, Handle)
+import           System.IO.Unsafe
+
+-- import           Control.LVish
+-- import qualified Data.LVar.Set   as IS
+-- import qualified Data.LVar.SLSet as SL
+
+-- import           Data.LVar.Map   as IM
+-- import           Data.LVar.NatArray as NA
+
+import           Bio.Phylogeny.PhyBin.CoreTypes
+import           Bio.Phylogeny.PhyBin.PreProcessor (pruneTreeLeaves)
+-- import           Data.BitList
+import qualified Data.Set as S
+import qualified Data.List as L
+import qualified Data.IntSet as SI
+import qualified Data.Map.Strict as M
+import qualified Data.Foldable as F
+import qualified Data.Traversable as T
+import           Data.Monoid
+import           Prelude as P
+import           Debug.Trace
+
+#ifdef BITVEC_BIPS
+import qualified Data.Vector.Unboxed.Bit     as UB
+import qualified Data.Bit                    as B
+#endif
+
+-- I don't understand WHY, but I seem to get the same answers WITHOUT this.
+-- Normalization and symmetric difference do make things somewhat slower (e.g. 1.8
+-- seconds vs. 2.2 seconds for 150 taxa / 100 trees)
+#define NORMALIZATION
+-- define BITVEC_BIPS
+
+--------------------------------------------------------------------------------
+-- A data structure choice
+--------------------------------------------------------------------------------
+
+-- type DenseLabelSet s = BitList
+
+
+-- | Dense sets of taxa, aka Bipartitions or BiPs
+--   We assume that taxa labels have been mapped onto a dense, contiguous range of integers [0,N).
+-- 
+--   NORMALIZATION Rule: Bipartitions are really two disjoint sets.  But as long as
+--   the parent set (the union of the partitions, aka "all taxa") then a bipartition
+--   can be represented just by *one* subset.  Yet we must choose WHICH subset for
+--   consistency.  We use the rule that we always choose the SMALLER.  Thus the
+--   DenseLabelSet should always be half the size or less, compared to the total
+--   number of taxa.
+-- 
+--   A set that is more than a majority of the taxa can be normalized by "flipping",
+--   i.e. taking the taxa that are NOT in that set.
+#ifdef BITVEC_BIPS
+
+#  if 1
+type DenseLabelSet = UB.Vector B.Bit
+markLabel lab = UB.modify (\vec -> MU.write vec lab (B.fromBool True)) 
+mkEmptyDense  size = UB.replicate size (B.fromBool False)
+mkSingleDense size ind = markLabel ind (mkEmptyDense size)
+denseUnions        = UB.unions
+bipSize            = UB.countBits
+denseDiff          = UB.difference
+invertDense size bip = UB.invert bip
+dispBip labs bip = show$ map (\(ix,_) -> (labs M.! ix)) $
+                        filter (\(_,bit) -> B.toBool bit) $
+                        zip [0..] (UB.toList bip)
+denseIsSubset a b = UB.or (UB.difference b a)
+traverseDense_ fn bip =
+  U.ifoldr' (\ ix bit acc ->
+              (if B.toBool bit
+               then fn ix
+               else return ()) >> acc)
+        (return ()) bip
+
+#  else
+-- TODO: try tracking the size:
+data DenseLabelSet = DLS {-# UNPACK #-} !Int (UB.Vector B.Bit)
+markLabel lab (DLS _ vec)= DLS (UB.modify (\vec -> return (MU.write vec lab (B.fromBool True))) ) vec
+-- ....
+#  endif
+
+#else
+type DenseLabelSet = SI.IntSet
+markLabel lab set   = SI.insert lab set 
+mkEmptyDense _size  = SI.empty
+mkSingleDense _size = SI.singleton
+denseUnions _size   = SI.unions 
+bipSize             = SI.size
+denseDiff           = SI.difference
+denseIsSubset       = SI.isSubsetOf
+
+dispBip labs bip = "[" ++ unwords strs ++ "]"
+  where strs = map (labs M.!) $ SI.toList bip
+invertDense size bip = loop SI.empty (size-1)
+  where -- There's nothing for it but to iterate and test for membership:
+    loop !acc ix | ix < 0           = acc
+                 | SI.member ix bip = loop acc (ix-1)
+                 | otherwise        = loop (SI.insert ix acc) (ix-1)
+traverseDense_ fn bip =
+  -- FIXME: need guaranteed non-allocating way to do this.
+  SI.foldr' (\ix acc ->  fn ix >> acc) (return ()) bip
+#endif
+
+markLabel    :: Label -> DenseLabelSet -> DenseLabelSet
+mkEmptyDense :: Int -> DenseLabelSet
+mkSingleDense :: Int -> Label -> DenseLabelSet
+denseUnions  :: Int -> [DenseLabelSet] -> DenseLabelSet
+bipSize      :: DenseLabelSet -> Int
+
+-- | Print a BiPartition in a pretty form
+dispBip      :: LabelTable -> DenseLabelSet -> String
+
+-- | Assume that total taxa are 0..N-1, invert membership:
+invertDense  :: Int -> DenseLabelSet -> DenseLabelSet
+
+traverseDense_ :: Monad m => (Int -> m ()) -> DenseLabelSet -> m ()
+
+
+--------------------------------------------------------------------------------
+-- Dirt-simple reference implementation
+--------------------------------------------------------------------------------
+
+type DistanceMatrix = V.Vector (U.Vector Int)
+
+-- | Returns a triangular distance matrix encoded as a vector.
+--   Also return the set-of-BIPs representation for each tree.
+--
+--   This uses a naive method, directly computing the pairwise
+--   distance between each pair of trees.
+--
+--   This method is TOLERANT of differences in the laba/taxa sets between two trees.
+--   It simply prunes to the intersection before doing the distance comparison.
+--   Other scoring methods may be added in the future.  (For example, penalizing for
+--   missing taxa.)
+naiveDistMatrix :: [NewickTree DefDecor] -> (DistanceMatrix, V.Vector (S.Set DenseLabelSet))
+naiveDistMatrix lst = 
+   let sz = P.length lst
+       treeVect  = V.fromList lst
+       labelSets = V.map treeLabels treeVect
+       eachbips  = V.map allBips    treeVect
+       mat = V.generate sz $ \ i ->        
+             U.generate i  $ \ j ->
+             let 
+                 inI = (labelSets V.! i)
+                 inJ = (labelSets V.! j)
+                 inBoth = S.intersection inI inJ
+
+                 -- Match will always succeed due to size==0 test below:
+                 Just prI = pruneTreeLeaves inBoth (treeVect V.! i)
+                 Just prJ = pruneTreeLeaves inBoth (treeVect V.! j)
+                   
+                 -- Memoization: If we are using it at its full size we can use the cached one:
+                 bipsI = if S.size inBoth == S.size inI
+                         then (eachbips V.! i)
+                         else allBips prI
+                 bipsJ = if S.size inBoth == S.size inJ
+                         then (eachbips V.! j)
+                         else allBips prJ
+
+                 diff1 = S.size (S.difference bipsI bipsJ)
+                 diff2 = S.size (S.difference bipsJ bipsI) -- symettric difference
+             in if S.size inBoth == 0
+                then 0 -- This is weird, but what other answer could we give?
+                else diff1 + diff2
+   in (mat, eachbips)
+
+ where
+   treeLabels :: NewickTree a -> S.Set Label
+   treeLabels (NTLeaf _ lab)  = S.singleton lab
+   treeLabels (NTInterior _ ls) = S.unions (map treeLabels ls)
+
+-- | The number of bipartitions implied by a tree is one per EDGE in the tree.  Thus
+-- each interior node carries a list of BiPs the same length as its list of children.
+labelBips :: NewickTree a -> NewickTree (a, [DenseLabelSet])
+labelBips tr =
+--    trace ("labelbips "++show allLeaves++" "++show size) $
+#ifdef NORMALIZATION  
+    fmap (\(a,ls) -> (a,map (normBip size) ls)) $
+#endif
+    loop tr
+  where    
+    size = numLeaves tr
+    zero = mkEmptyDense size
+    loop (NTLeaf dec lab) = NTLeaf (dec, [markLabel lab zero]) lab      
+    loop (NTInterior dec chlds) =
+      let chlds' = map loop chlds
+          sets   = map (denseUnions size . snd . get_dec) chlds' in
+      NTInterior (dec, sets) chlds'
+
+    allLeaves = leafSet tr
+    leafSet (NTLeaf _ lab)    = mkSingleDense size lab
+    leafSet (NTInterior _ ls) = denseUnions size $ map leafSet ls
+
+-- normBip :: DenseLabelSet -> DenseLabelSet -> DenseLabelSet
+--    normBip allLeaves bip =
+normBip :: Int -> DenseLabelSet -> DenseLabelSet    
+normBip totsize bip =
+  let -- size     = bipSize allLeaves
+      halfSize = totsize `quot` 2
+--      flipped  = denseDiff allLeaves bip
+      flipped  = invertDense totsize bip 
+  in 
+  case compare (bipSize bip) halfSize of
+    LT -> bip 
+    GT -> flipped -- Flip it
+    EQ -> min bip flipped -- This is a painful case, we need a tie-breaker
+    
+
+foldBips :: Monoid m => (DenseLabelSet -> m) -> NewickTree a -> m
+foldBips f tr = F.foldMap f' (labelBips tr)
+ where f' (_,bips) = F.foldMap f bips
+  
+-- | Get all non-singleton BiPs implied by a tree.
+allBips :: NewickTree a -> S.Set DenseLabelSet
+allBips tr = S.filter ((> 1) . bipSize) $ foldBips S.insert tr S.empty
+
+--------------------------------------------------------------------------------
+-- Optimized, LVish version
+--------------------------------------------------------------------------------
+-- First, necessary types:
+
+-- UNFINISHED:
+#if 0
+-- | A collection of all observed bipartitons (bips) with a mapping of which trees
+-- contain which Bips.
+type BipTable s = IMap DenseLabelSet s (SparseTreeSet s)
+-- type BipTable = IMap BitList (U.Vector Bool)
+-- type BipTable s = IMap BitList s (NA.NatArray s Word8)
+
+-- | Sets of taxa (BiPs) that are expected to be sparse.
+type SparseTreeSet s = IS.ISet s TreeID
+-- TODO: make this a set of numeric tree IDs...
+-- NA.NatArray s Word8
+
+type TreeID = AnnotatedTree
+-- | Tree's are identified simply by their order within the list of input trees.
+-- type TreeID = Int
+#endif
+
+--------------------------------------------------------------------------------
+-- Alternate way of slicing the problem: HashRF
+--------------------------------------------------------------------------------
+
+-- The distance matrix is an atomically-bumped matrix of numbers.
+-- type DistanceMat s = NA.NatArray s Word32
+-- Except... bump isn't supported by our idempotent impl.
+
+-- | This version slices the problem a different way.  A single pass over the trees
+-- populates the table of bipartitions.  Then the table can be processed (locally) to
+-- produce (non-localized) increments to a distance matrix.
+hashRF :: Int -> [NewickTree a] -> DistanceMatrix
+hashRF num_taxa trees = build M.empty (zip [0..] trees)
+  where
+    num_trees = length trees
+    -- First build the table:
+    build acc [] = ingest acc
+    build acc ((ix,hd):tl) =
+      let bips = allBips hd
+          acc' = S.foldl' fn acc bips
+          fn acc bip = M.alter fn2 bip acc
+          fn2 (Just membs) = Just (markLabel ix membs)
+          fn2 Nothing      = Just (mkSingleDense num_taxa ix)
+      in      
+      build acc' tl
+
+    -- Second, ingest the table to construct the distance matrix:
+    ingest :: M.Map DenseLabelSet DenseLabelSet -> DistanceMatrix
+    ingest bipTable = runST theST
+      where
+       theST :: forall s0 . ST s0 DistanceMatrix
+       theST = do 
+        -- Triangular matrix, starting narrow and widening:
+        matr <- MV.new num_trees
+        -- Too bad MV.replicateM is insufficient.  It should pass index.  
+        -- Instead we write this C-style:
+        for_ (0,num_trees) $ \ ix -> do 
+          row <- MU.replicate ix (0::Int)
+          MV.write matr ix row
+          return ()
+
+        unsafeIOToST$ putStrLn$" Built matrix for dim "++show num_trees
+
+        let bumpMatr i j | j < i     = incr i j
+                         | otherwise = incr j i
+            incr :: Int -> Int -> ST s0 ()
+            incr i j = do -- Not concurrency safe yet:
+--                          unsafeIOToST$ putStrLn$" Reading at position "++show(i,j)
+                          row <- MV.read matr i
+                          elm <- MU.read row j
+                          MU.write row j (elm+1)
+                          return ()
+            fn bipMembs =
+              -- Here we quadratically consider all pairs of trees and ask whether
+              -- their edit distance is increased based on this particular BiP.
+              -- Actually, as an optimization, it is sufficient to consider only the
+              -- cartesian product of those that have and those that don't.
+              let haveIt   = bipMembs
+                  -- Depending on how invertDense is written, it could be useful to
+                  -- fuse this in and deforest "dontHave".
+                  dontHave = invertDense num_trees bipMembs
+                  fn1 trId = traverseDense_ (fn2 trId) dontHave
+                  fn2 trId1 trId2 = bumpMatr trId1 trId2
+              in
+--                 trace ("Computed donthave "++ show dontHave) $ 
+                 traverseDense_ fn1 haveIt
+        F.traverse_ fn bipTable
+        v1 <- V.unsafeFreeze matr
+        T.traverse (U.unsafeFreeze) v1
+
+
+--------------------------------------------------------------------------------
+-- Miscellaneous Helpers
+--------------------------------------------------------------------------------
+
+instance Pretty a => Pretty (S.Set a) where
+ pPrint s = pPrint (S.toList s)
+ 
+
+printDistMat :: Handle -> V.Vector (U.Vector Int) -> IO () 
+printDistMat h mat = do
+  hPutStrLn h "Robinson-Foulds distance (matrix format):"
+  hPutStrLn h "-----------------------------------------"
+  V.forM_ mat $ \row -> do 
+    U.forM_ row $ \elem -> do
+      hPutStr h (show elem)
+      hPutStr h " "
+    hPutStr h "0\n"          
+  hPutStrLn h "-----------------------------------------"
+
+-- My own forM for numeric ranges (not requiring deforestation optimizations).
+-- Inclusive start, exclusive end.
+{-# INLINE for_ #-}
+for_ :: Monad m => (Int, Int) -> (Int -> m ()) -> m ()
+for_ (start, end) _fn | start > end = error "for_: start is greater than end"
+for_ (start, end) fn = loop start
+  where
+   loop !i | i == end  = return ()
+           | otherwise = do fn i; loop (i+1)
+
+-- | Which of a set of trees are compatible with a consensus?
+filterCompatible :: NewickTree a -> [NewickTree b] -> [NewickTree b]
+filterCompatible consensus trees =
+    let cbips = allBips consensus in
+    [ tr | tr <- trees
+         , cbips `S.isSubsetOf` allBips tr ]
+
+-- | `compatibleWith consensus tree` -- Is a tree compatible with a consensus?
+--   This is more efficient if partially applied then used repeatedly.
+-- 
+-- Note, tree compatibility is not the same as an exact match.  It's
+-- like (<=) rather than (==).  The "star topology" is consistent with the
+-- all trees, because it induces the empty set of bipartitions.  
+compatibleWith :: NewickTree a -> NewickTree b -> Bool
+compatibleWith consensus =
+  let consBips = allBips consensus in 
+  \ newTr -> S.isSubsetOf consBips (allBips newTr)
+
+-- | Consensus between two trees, which may even have different label maps.
+consensusTreeFull (FullTree n1 l1 t1) (FullTree n2 l2 t2) =
+  error "FINISHME - consensusTreeFull"
+
+-- | Take only the bipartitions that are agreed on by all trees.
+consensusTree :: Int -> [NewickTree a] -> NewickTree ()
+consensusTree _ [] = error "Cannot take the consensusTree of the empty list"
+consensusTree num_taxa (hd:tl) = bipsToTree num_taxa intersection
+  where
+    intersection = L.foldl' S.intersection (allBips hd) (map allBips tl)
+--     intersection = loop (allBips hd) tl
+--     loop :: S.Set DenseLabelSet -> [NewickTree a] -> S.Set DenseLabelSet
+--     loop !remain []      = remain
+--     -- Was attempting to use foldBips here as an optimization:
+-- --     loop !remain (hd:tl) = loop (foldBips S.delete hd remain) tl
+--     loop !remain (hd:tl) = loop (S.difference remain (allBips hd)) tl    
+      
+-- | Convert from bipartitions BACK to a single tree.
+bipsToTree :: Int -> S.Set DenseLabelSet -> NewickTree ()
+bipsToTree num_taxa origbip =
+--  trace ("Doing bips in order: "++show sorted++"\n") $ 
+  loop lvl0 sorted
+  where
+    -- We consider each subset in increasing size order.
+    -- FIXME: If we tweak the order on BIPs, then we can just use S.toAscList here:
+    sorted = L.sortBy (compare `on` bipSize) (S.toList origbip)
+
+    lvl0 = [ (mkSingleDense num_taxa ix, NTLeaf () ix)
+           | ix <- [0..num_taxa-1] ]
+
+    -- VERY expensive!  However, due to normalization issues this is necessary for now:
+    -- TODO: in the future make it possible to definitively denormalize.
+    -- isMatch bip x = denseIsSubset x bip || denseIsSubset x (invertDense num_taxa bip)
+    isMatch bip x = denseIsSubset x bip 
+
+    -- We recursively glom together subtrees until we have a complete tree.
+    -- We only process larger subtrees after we have processed all the smaller ones.
+    loop !subtrees [] =
+      case subtrees of
+        []    -> error "bipsToTree: internal error"
+        [(_,one)] -> one
+        lst   -> NTInterior () (map snd lst)
+    loop !subtrees (bip:tl) =
+--      trace (" -> looping, subtrees "++show subtrees) $ 
+      let (in_,out) = L.partition (isMatch bip. fst) subtrees in
+      case in_ of
+        [] -> error $"bipsToTree: Internal error!  No match for bip: "++show bip
+              ++" out is\n "++show out++"\n and remaining bips "++show (length tl)
+              ++"\n when processing orig bip set:\n  "++show origbip
+          -- loop out tl
+        _ -> 
+         -- Here all subtrees that match the current bip get merged:
+         loop ((denseUnions num_taxa (map fst in_),
+                NTInterior ()        (map snd in_)) : out) tl
+
diff --git a/Bio/Phylogeny/PhyBin/Util.hs b/Bio/Phylogeny/PhyBin/Util.hs
new file mode 100644
index 0000000..5fb107d
--- /dev/null
+++ b/Bio/Phylogeny/PhyBin/Util.hs
@@ -0,0 +1,139 @@
+{-# LANGUAGE ScopedTypeVariables #-}
+-- RecordWildCards, TypeSynonymInstances, CPP
+-- {-# LANGUAGE NamedFieldPuns #-}
+-- {-# LANGUAGE OverloadedStrings #-}
+-- {-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
+-- {-# OPTIONS_GHC -fwarn-unused-imports #-}
+
+-- | This module contains misc bits used by (multiple) other modules.
+
+module Bio.Phylogeny.PhyBin.Util
+       ( 
+         is_regular_file, acquireTreeFiles,
+         safePrintDendro, sanityCheck
+       )
+       where
+
+import qualified Data.Foldable as F
+import           Data.Function       (on)
+import           Data.List           (delete, minimumBy, sortBy, insertBy, intersperse, sort)
+import           Data.Maybe          (fromMaybe, catMaybes)
+import qualified Data.Map                   as M
+import qualified Data.Set                   as S
+import qualified Data.Text.Lazy             as T
+import           Control.Monad       (forM, forM_, filterM, when, unless)
+import           Control.Exception   (evaluate)
+import           Control.Applicative ((<$>),(<*>))
+import           Control.Concurrent  (Chan)
+import           System.FilePath     (combine)
+import           System.Directory    (doesFileExist, doesDirectoryExist,
+                                      getDirectoryContents, getCurrentDirectory)
+import           System.IO           (openFile, hClose, IOMode(ReadMode), stderr,
+                                      hPutStr, hPutStrLn)
+import           System.Exit         (ExitCode(..))
+import           System.Timeout      (timeout)
+import           Test.HUnit          ((~:),(~=?),Test,test)
+
+-- For vizualization:
+import           Text.PrettyPrint.HughesPJClass hiding (char, Style)
+import           Bio.Phylogeny.PhyBin.CoreTypes
+-- import           Bio.Phylogeny.PhyBin.Parser (parseNewick)
+-- import           Bio.Phylogeny.PhyBin.Visualize (dotToPDF, dotNewickTree, viewNewickTree)
+
+import qualified Data.Clustering.Hierarchical as C
+import qualified Data.Graph.Inductive as G
+import qualified Data.GraphViz        as Gv
+import           Data.GraphViz.Printing (renderDot)
+import  Data.GraphViz.Types.Canonical (nodeStmts, graphStatements)
+
+----------------------------------------------------------------------------------------------------
+-- OS specific bits:
+----------------------------------------------------------------------------------------------------
+-- #ifdef WIN32
+-- is_regular_file = undefined
+-- is_directory path = 
+--   getFileAttributes
+-- --getFileInformationByHandle
+-- --    bhfiFileAttributes
+-- file_exists = undefined
+-- #else
+-- is_regular_file :: FilePath -> IO Bool
+-- is_regular_file file = 
+--   do stat <- getFileStatus file; 
+--      -- Hmm, this is probably bad practice... hard to know its exhaustive:
+--      return$ isRegularFile stat || isNamedPipe stat || isSymbolicLink stat
+-- is_directory :: FilePath -> IO Bool
+-- is_directory path = 
+--   do stat <- getFileStatus path
+--      return (isDirectory stat)
+-- file_exists = fileExist
+-- #endif
+
+-- Here we ASSUME it exists, then these functions are good enough:
+is_regular_file :: FilePath -> IO Bool
+is_regular_file = doesFileExist
+
+is_directory :: FilePath -> IO Bool
+is_directory = doesDirectoryExist 
+
+file_exists :: FilePath -> IO Bool
+file_exists path = 
+  do f <- doesFileExist path
+     d <- doesDirectoryExist path
+     return (f || d)
+
+--------------------------------------------------------------------------------
+
+-- | Expand out directories to find all the tree files.
+acquireTreeFiles :: [String] -> IO [String]
+acquireTreeFiles inputs = do 
+    all :: [[String]] <- forM inputs $ \ path -> do
+      exists <- file_exists path 
+
+      --stat   <- if exists then getFileStatus path else return (error "internal invariant")
+      -- [2010.09.23] This is no longer really necessary:
+      if not exists then do
+        error$ "No file or directory found at this path!: "++path
+	 -- hPutStr stderr$ "Input not a file/directory, assuming wildcard, using 'find' for expansion"
+	 -- entries <- HSH.run$ "find " ++ path	 
+	 -- hPutStrLn stderr$ "("++show (length entries)++" files found):  "++ show path
+	 -- return entries
+       else do
+	 isdir <- is_directory path
+	 reg  <- is_regular_file path
+	 if isdir then do 
+	    hPutStr stderr$ "Input is a directory, reading all regular files contained "
+	    children <- getDirectoryContents path
+	    filtered <- filterM is_regular_file $ map (combine path) children
+	    hPutStrLn stderr$ "("++show (length filtered)++" regular files found):  "++ show path
+	    return$ filtered
+          else if reg then do 
+	    return [path]
+	  else error$ "phybin: Unhandled input path: " ++ path
+
+    return (concat all)
+
+--------------------------------------------------------------------------------
+
+-- | Step carefully in case of cycles (argh).
+safePrintDendro :: Gv.DotGraph G.Node -> IO (Maybe T.Text)
+safePrintDendro dotg= do 
+--  putStrLn$ "Dendrogram graph size: "++ show (F.foldl' (\a _ -> a+1) 0 dotg)
+  mx <- timeout (2 * 1000 * 1000) $ do
+--        putStrLn$ "Dendrogram graph, is directed?: "++ show (Gv.directedGraph dotg)
+        putStrLn$ "Dendrogram graph size: "++ show (length $ nodeStmts $ graphStatements dotg)
+        let str = renderDot $ Gv.toDot dotg
+        evaluate (T.length str)
+        return str
+  case mx of
+    Nothing -> do putStrLn "WARNING: DotGraph appears to be a cyclic structure.  This is probably a bug."
+                  return Nothing
+    _ -> return mx
+
+sanityCheck :: C.Dendrogram (FullTree DefDecor) -> IO ()
+sanityCheck dendro = do 
+  let fn seen elm | S.member (treename elm) seen =
+                       error$"Dendrogram failed sanity check!  Tree name occurs multiple times: "++(treename elm)
+                  | otherwise = S.insert (treename elm) seen
+      sz = S.size $ F.foldl' fn S.empty dendro
+  putStrLn$ "Sanity checked dendrogram of size: "++show sz
diff --git a/Bio/Phylogeny/PhyBin/Visualize.hs b/Bio/Phylogeny/PhyBin/Visualize.hs
new file mode 100644
index 0000000..2f72774
--- /dev/null
+++ b/Bio/Phylogeny/PhyBin/Visualize.hs
@@ -0,0 +1,449 @@
+{-# LANGUAGE NamedFieldPuns #-}
+
+module Bio.Phylogeny.PhyBin.Visualize
+       (dotNewickTree, dotToPDF, viewNewickTree,
+        dotNewickTree_debug,
+
+        -- * Dendrogram visualization
+        dendrogramToGraph, dotDendrogram
+       )
+       where
+import           Text.Printf        (printf)
+import           Data.List          (elemIndex, isPrefixOf)
+import           Data.List.Split    (chunksOf)
+import           Data.Maybe         (fromJust, isJust)
+import qualified Data.Map           as M
+import qualified Data.Set           as S
+import qualified Data.Vector        as V
+import           Data.Text.Lazy     (pack)
+import           Control.Monad      (void)
+import           Control.Concurrent  (Chan, newChan, writeChan, forkIO)
+import qualified Data.Graph.Inductive as G  hiding (run)
+import qualified Data.GraphViz        as Gv hiding (parse, toLabel)
+import qualified Data.GraphViz.Attributes.Complete as GA
+import qualified Data.GraphViz.Attributes.Colors   as GC
+import           Data.GraphViz.Attributes.Colors   (Color(RGB))
+
+-- import           Test.HUnit          ((~:),(~=?),Test,test)
+
+import           System.Timeout (timeout)
+
+import qualified Data.Clustering.Hierarchical as C
+
+import           Bio.Phylogeny.PhyBin.CoreTypes
+import           Bio.Phylogeny.PhyBin.RFDistance (filterCompatible, compatibleWith, consensusTree)
+
+import Debug.Trace
+
+----------------------------------------------------------------------------------------------------
+-- Visualization with GraphViz and FGL:
+----------------------------------------------------------------------------------------------------
+
+-- First we need to be able to convert our trees to FGL graphs:
+toGraph :: FullTree StandardDecor -> G.Gr String Double
+toGraph (FullTree _ tbl tree) = G.run_ G.empty $ loop tree
+  where
+ fromLabel ix = tbl M.! ix
+ loop (NTLeaf _ name) = 
+    do let str = fromLabel name
+       _ <- G.insMapNodeM str
+       return str
+ loop (NTInterior (StandardDecor{sortedLabels}) ls) =
+    do let bigname = concatMap fromLabel sortedLabels
+       names <- mapM loop ls
+       _ <- G.insMapNodeM bigname
+       mapM_ (\x -> G.insMapEdgeM (bigname, x, 0.0)) names
+       return bigname
+
+-- This version uses the tree nodes themselves as graph labels.
+toGraph2 :: FullTree StandardDecor -> G.Gr (NewickTree StandardDecor) Double       
+toGraph2 (FullTree _ tbl tree) = G.run_ G.empty $ loop tree
+  where
+ loop node@(NTLeaf _  _) =  
+    do _ <- G.insMapNodeM node 
+       return ()
+ loop node@(NTInterior _ ls) =
+    do mapM_ loop ls
+       _ <- G.insMapNodeM node
+       -- Edge weights as just branchLen (not bootstrap):
+       mapM_ (\x -> G.insMapEdgeM (node, x, branchLen$ get_dec x)) ls
+       return ()
+
+----------------------------------------------------------------------------------------------------
+-- Dendrograms
+ ----------------------------------------------------------------------------------------------------
+
+-- | Convert to a dotGraph.  Some duplicated code with dotNewickTree.
+dotDendrogram :: PhyBinConfig -> String -> Double -> C.Dendrogram (FullTree a) ->
+                 Maybe (M.Map TreeName Int) -> [[NewickTree ()]] -> Gv.DotGraph G.Node
+dotDendrogram PBC{show_trees_in_dendro, show_interior_consensus}
+              title edge_scale origDendro mNameMap highlightTrs =
+  Gv.graphToDot myparams (G.nmap uid graph)
+ where
+  (charsDropped, dendro) = truncateNames origDendro
+  -- This is ugly, but we modify the name map to match:
+  nameMap' = fmap (M.mapKeys (drop charsDropped)) mNameMap  
+  graph :: DendroGraph
+  graph = dendrogramToGraph num_taxa dendro
+
+  num_taxa = numLeaves $ nwtree fstLeaf
+  FullTree{labelTable} = fstLeaf
+  fstLeaf = getFstLeaf dendro
+  getFstLeaf (C.Leaf ft)      = ft
+  getFstLeaf (C.Branch _ l _) = getFstLeaf l 
+
+  uidsToNames = M.fromList $
+                map (\nd at NdLabel{uid} -> (uid,nd)) $
+--                filter (isJust . tre) $ 
+                map (fromJust . G.lab graph) $  
+                G.nodes graph
+
+  matchers = map mkMatcher highlightTrs
+  mkMatcher ls = let fns = map compatibleWith ls -- Multiple predicate functions
+                 in \ tr -> -- Does it match any predicate?
+                    or$ map (\f -> f tr) fns 
+  
+  wcolors = zip matchers defaultPalette
+  findColor tr = loop wcolors
+    where loop [] = Nothing
+          loop ((f,col):rst) | f tr = Just col
+                             | otherwise = loop rst
+  
+  -- Map uids to highlight color
+  highlightMap = M.map fromJust $
+                 M.filter isJust $
+                 M.map (\ (NdLabel _ _ _ ct) -> findColor ct)
+                 uidsToNames
+
+  myparams :: Gv.GraphvizParams G.Node String Double () String -- (Either String String)
+  myparams = Gv.defaultParams { Gv.globalAttributes= [Gv.GraphAttrs [GA.Label$ GA.StrLabel$ pack title]],
+                                Gv.fmtNode= nodeAttrs,
+                                Gv.fmtEdge= edgeAttrs
+                              }
+  nodeAttrs :: (Int, UniqueNodeName) -> [GA.Attribute]
+  nodeAttrs (_num, uid) =
+    let uid' = if show_trees_in_dendro
+               then printed_tree++"\n"++uid
+               else uid
+        printed_tree =
+          case M.lookup uid uidsToNames of
+            Nothing -> ""
+            Just NdLabel{clumpSize,consensus} ->
+              (if clumpSize>1 then "size "++show clumpSize++"\n" else "")
+              ++ show (displayStrippedTree (FullTree "" labelTable consensus))
+        (tag,shp,styl) = -- case eith of
+          if isPrefixOf "DUMMY_" uid
+          then (if show_trees_in_dendro && show_interior_consensus
+                then printed_tree else "",
+                if show_interior_consensus
+                then GA.BoxShape -- GA.PlainText
+                else GA.PointShape,
+                [ GA.Color [weighted$ GA.X11Color Gv.Transparent] ]
+               )
+          else (uid', GA.Ellipse, [ GA.Style [GA.SItem GA.Filled []]])
+        highlightColor = 
+          case M.lookup uid highlightMap of
+            Nothing  -> []
+            Just col -> [ GA.Color [weighted col ] ]
+        clustColor | not (null highlightTrs) = []
+                   | otherwise = 
+          case (nameMap', M.lookup uid uidsToNames) of
+            (Just nm, Just NdLabel{tre=Just FullTree{treename}}) ->
+              case M.lookup treename nm of
+                Nothing -> []
+                -- Here we color the top TEN clusters:
+                Just ind | ind <= 10 -> [ GA.Color [weighted$ defaultPaletteV V.! (ind-1) ] ]
+                         | otherwise -> []
+            -- TODO: shall we color intermediate nodes?
+            _ -> []
+    in 
+    [ GA.Label$ GA.StrLabel$ pack tag
+    , GA.Shape shp
+    ] ++ styl ++ highlightColor ++ clustColor
+
+  edgeAttrs = getEdgeAttrs edge_scale
+
+
+type UniqueNodeName = String
+-- type DendroGraph = G.Gr (Maybe TreeName,UniqueNodeName) Double
+type DendroGraph = G.Gr NdLabel Double
+
+-- | When we first convert to a graph representation, there is a bunch of information
+-- hanging off of each node.
+data NdLabel =
+  NdLabel
+  { uid       :: UniqueNodeName
+  , tre       :: Maybe (FullTree ())
+  , clumpSize :: !Int
+  , consensus :: NewickTree ()
+  }
+  deriving (Show, Ord, Eq)
+
+-- | Create a graph using TreeNames for node labels and edit-distance for edge labels.
+dendrogramToGraph :: Int -> C.Dendrogram (FullTree a) -> DendroGraph
+dendrogramToGraph num_taxa orig =
+  G.run_ G.empty $ void$
+    loop (fmap (fmap (const())) orig)
+  where
+-- loop node@(C.Leaf ft) = G.insMapNodeM (NdLabel (treename ft) (Just$ fmap (const()) ft) 1 ft)
+ loop node@(C.Leaf ft) =
+   let stripped = fmap (const()) ft in
+   G.insMapNodeM (NdLabel (treename ft) (Just stripped) 1 (nwtree stripped))
+ loop node@(C.Branch 0 left right) = do
+   -- As a preprocessing step we collapse clusters that are separated by zero edit distance.
+   ----------------------------------------
+   let lvs = collapseZeroes left ++ collapseZeroes right
+       nms = map (treename) lvs
+       lens = map length nms
+       total = sum lens
+       avg   = total `quot` length nms
+       -- The goal here is to make an approximately square arrangement:
+       -- goal: avg * perline == total / (avg * perline)
+       perline = ceiling$ sqrt (fromIntegral total / ((fromIntegral avg)^2))
+       chunked = chunksOf perline nms
+       fatname = unlines (map unwords chunked)
+   G.insMapNodeM (NdLabel fatname (Just (head lvs))
+                  (length lvs) (consensusTree num_taxa (map nwtree lvs)))
+   ----------------------------------------   
+ loop node@(C.Branch dist left right) =
+    do (_,ll@(NdLabel lid _ s1 c1)) <- loop left
+       (_,rr@(NdLabel rid _ s2 c2)) <- loop right
+       -- Interior nodes do NOT have their names drawn:
+       let ndname = "DUMMY_"++(lid++"_"++rid)  -- HACK!
+       (midN,mid) <- G.insMapNodeM (NdLabel ndname Nothing (s1+s2) (consensusTree num_taxa [c1,c2]))
+       G.insMapEdgeM (ll, mid, dist)
+       G.insMapEdgeM (rr, mid, dist)
+       return (midN,mid)
+
+ collapseZeroes (C.Leaf tr)      = [tr]
+ collapseZeroes (C.Branch 0 l r) = collapseZeroes l ++ collapseZeroes r
+ collapseZeroes oth = error "dendrogramToGraph: internal error.  Not expecting non-zero branch length here."
+
+
+-- | The plot looks nicer when the names aren't bloated with repeated stuff. This
+-- replaces all tree names with potentially shorter names by removing the common prefix.
+-- Returns how many characters were dropped.
+truncateNames :: C.Dendrogram (FullTree a) -> (Int, C.Dendrogram (FullTree a))
+truncateNames dendro = (prefChars, fmap chopName dendro)
+ where 
+  chopName ft = ft{ treename= drop prefChars (treename ft) }
+  prefChars = length$ commonPrefix$ S.toList$ allNames dendro
+  allNames (C.Leaf tr)      = S.singleton (treename tr)
+  allNames (C.Branch _ l r) = S.union (allNames l) (allNames r)
+
+----------------------------------------------------------------------------------------------------
+
+
+-- | Open a GUI window to displaya tree.
+--
+--   Fork a thread that then runs graphviz.
+--   The channel retuned will carry a single message to signal
+--   completion of the subprocess.
+viewNewickTree :: String -> FullTree StandardDecor -> IO (Chan (), FullTree StandardDecor)
+-- TODO: UPDATE THIS TO RETURN AN ASYNC!!
+viewNewickTree title tree@(FullTree{nwtree}) =
+  do chan <- newChan
+     let dot = dotNewickTree title (1.0 / avg_branchlen [nwtree])
+	                     tree
+	 runit = do mx <- -- timeout defaultTimeout $
+                          -- This one is interactive, so we don't need a timeout.
+                          Gv.runGraphvizCanvas default_cmd dot Gv.Xlib
+                    -- case mx of
+                    --   Nothing -> putStrLn$ "WARNING: call to graphviz TIMED OUT."++file
+                    --   _       -> return ()
+		    writeChan chan ()
+     --str <- prettyPrint d
+     --putStrLn$ "Generating the following graphviz tree:\n " ++ str
+     _ <- forkIO runit
+       --do runGraphvizCanvas Dot dot Xlib; return ()
+       
+     return (chan, tree)
+
+
+--default_cmd = TwoPi -- Totally ignores edge lengths.
+default_cmd :: Gv.GraphvizCommand
+default_cmd = Gv.Neato
+
+-- Show a float without scientific notation:
+myShowFloat :: Double -> String
+-- showFloat weight = showEFloat (Just 2) weight ""
+myShowFloat fl =
+  let rnd = round fl in
+  if fl == fromIntegral rnd
+  then show rnd
+  else printf "%.4f" fl
+
+-- | Convert a .dot file to .pdf.
+dotToPDF :: Gv.DotGraph G.Node -> FilePath -> IO (Maybe FilePath)
+dotToPDF dot file = do
+  -- If we have any problem with graphviz we want to time that out rather than let
+  -- our whole run hang:
+  x <- timeout defaultTimeout $ 
+       Gv.runGraphvizCommand default_cmd dot Gv.Pdf file
+  case x of
+    Nothing -> do putStrLn$ "WARNING: call to graphviz TIMED OUT.  File not plotted: "++file
+                  return Nothing
+    _       -> return x
+
+-- Arbitrary: 15 second timeout.
+defaultTimeout :: Int
+defaultTimeout = (15 * 1000 * 1000)
+
+-- | Convert a NewickTree to a graphviz Dot graph representation.
+dotNewickTree :: String -> Double -> FullTree StandardDecor -> Gv.DotGraph G.Node
+dotNewickTree title edge_scale atree@(FullTree _ tbl tree) = 
+    --trace ("EDGE SCALE: " ++ show edge_scale) $
+    Gv.graphToDot myparams graph
+ where 
+  graph = toGraph2 atree
+  fromLabel ix = tbl M.! ix  
+  myparams :: Gv.GraphvizParams G.Node (NewickTree StandardDecor) Double () (NewickTree StandardDecor)
+  myparams = Gv.defaultParams { Gv.globalAttributes= [Gv.GraphAttrs [GA.Label$ GA.StrLabel$ pack title]],
+                                Gv.fmtNode= nodeAttrs, Gv.fmtEdge= edgeAttrs }
+  nodeAttrs :: (Int,NewickTree StandardDecor) -> [GA.Attribute]
+  nodeAttrs (_num, node) =
+    let children = get_children node in 
+    [ GA.Label$ GA.StrLabel$ pack$ 
+      concatMap fromLabel $
+      sortedLabels $ get_dec node
+    , GA.Shape (if null children then {-PlainText-} GA.Ellipse else GA.PointShape)
+    , GA.Style [GA.SItem GA.Filled []]
+    ]
+  edgeAttrs = getEdgeAttrs edge_scale
+
+
+-- | Some arbitrarily chosen colors from the X11 set:
+defaultPaletteV :: V.Vector GA.Color
+defaultPaletteV = V.fromList defaultPalette
+
+defaultPalette :: [GA.Color]
+defaultPalette = concat$ replicate 4 $ map GA.X11Color 
+  [ Gv.Aquamarine
+  , Gv.PaleVioletRed
+  , Gv.MediumPurple
+  , Gv.PaleGreen
+  , Gv.PapayaWhip
+  , Gv.SkyBlue
+  , Gv.Yellow
+  , Gv.Crimson
+  , Gv.Gray
+  , Gv.PaleGoldenrod
+  ]
+
+-- Grabbed the first couple palettes off a website:
+altPalette :: V.Vector GA.Color
+altPalette = V.fromList $ concat $ replicate 3 $ 
+  -- http://www.colourlovers.com/palette/2962435/Autumn_Roses
+  [ RGB 159 74 81, RGB 217 183 173, RGB 149 91 116, RGB 185 138 148
+  --  , RGB 101 69 82 -- too dark
+  ] ++
+  -- http://www.colourlovers.com/palette/2962434/Earthy_warm
+  [ RGB 108 74 39, RGB 207 179 83, RGB 180 149 60, RGB 244 242 185
+  -- , RGB 61 63 39
+  ]
+  
+getEdgeAttrs :: Double -> (t, t1, Double) -> [GA.Attribute]
+getEdgeAttrs edge_scale = edgeAttrs
+ where 
+  -- TOGGLE:
+  --  edgeAttrs (_,_,weight) = [ArrowHead noArrow, Len (weight * edge_scale + bump), GA.Label (StrLabel$ show (weight))]
+  edgeAttrs (_,_,weight) = 
+                           let draw_weight = compute_draw_weight weight edge_scale in
+                           --trace ("EDGE WEIGHT "++ show weight ++ " drawn at "++ show draw_weight) $
+			   [GA.ArrowHead Gv.noArrow,
+                            GA.Label$ GA.StrLabel$ pack$ myShowFloat weight] ++ -- TEMPTOGGLE
+			   --[ArrowHead noArrow, GA.Label (StrLabel$ show draw_weight)] ++ -- TEMPTOGGLE
+			    if weight == 0.0
+			    then [GA.Color [weighted$ GA.X11Color Gv.Red],
+                                  GA.LWidth 3.0, GA.Len minlen]
+			    else [GA.Len draw_weight]
+  minlen = 0.7
+  maxlen = 3.0
+  compute_draw_weight w scale = 
+    let scaled = (abs w) * scale + minlen in 
+    -- Don't draw them too big or it gets annoying:
+    (min scaled maxlen)
+
+weighted c = GC.WC {GC.wColor=c, GC.weighting=Nothing}
+
+-- | This version shows the ordered/rooted structure of the normalized tree.
+dotNewickTree_debug :: String -> FullTree StandardDecor -> Gv.DotGraph G.Node
+dotNewickTree_debug title atree@(FullTree _ tbl tree) = Gv.graphToDot myparams graph
+ where 
+  graph = toGraph2 atree
+  fromLabel ix = tbl M.! ix    
+  myparams :: Gv.GraphvizParams G.Node (NewickTree StandardDecor) Double () (NewickTree StandardDecor)
+  myparams = Gv.defaultParams { Gv.globalAttributes= [Gv.GraphAttrs [GA.Label$ GA.StrLabel$ pack title]],
+			        Gv.fmtNode= nodeAttrs, Gv.fmtEdge= edgeAttrs }
+  nodeAttrs :: (Int,(NewickTree StandardDecor)) -> [GA.Attribute]
+  nodeAttrs (num,node) =
+    let children = get_children node in 
+    [ GA.Label (if null children 
+  	        then GA.StrLabel$ pack$ concatMap fromLabel $ sortedLabels $ get_dec node
+	        else GA.RecordLabel$ take (length children) $ 
+                                  -- This will leave interior nodes unlabeled:
+	                          map (GA.PortName . GA.PN . pack) $ map show [1..]
+		                  -- This version gives some kind of name to interior nodes:
+--	                          map (\ (i,ls) -> LabelledTarget (PN$ show i) (fromLabel$ head ls)) $ 
+--                                       zip [1..] (map (thd3 . get_dec) children)
+               )
+    , GA.Shape GA.Record
+    , GA.Style [GA.SItem GA.Filled []]
+    ]
+
+  edgeAttrs (num1, num2, _weight) = 
+    let node1 = fromJust$ G.lab graph num1 
+	node2 = fromJust$ G.lab graph num2 	
+	ind = fromJust$ elemIndex node2 (get_children node1)
+    in [GA.TailPort$ GA.LabelledPort (GA.PN$ pack$ show$ 1+ind) (Just GA.South)]
+
+
+
+
+--------------------------------------------------------------------------------
+-- Unit Testing
+--------------------------------------------------------------------------------
+
+
+-- tt0 :: IO (Chan (), FullTree)
+-- tt0 = drawNewickTree "tt0" $ annotateWLabLists $ parseNewick "" "(A,(C,D,E),B);"
+
+-- tt3 :: IO (Chan (), FullTree)
+-- tt3 = drawNewickTree "tt3" tt
+
+-- tt4 :: IO (Chan (), FullTree)
+-- tt4 = drawNewickTree "tt4"$ trace ("FINAL: "++ show (pPrint norm4)) $ norm4
+
+-- tt5 :: IO (Chan (), FullTree)
+-- tt5 = drawNewickTree "tt5"$ norm5
+
+
+-- tt5' :: String
+-- tt5' = prettyPrint' $ dotNewickTree "norm5" 1.0 norm5
+
+-- ttall :: IO (Chan (), FullTree)
+-- ttall = do tt3; tt4; tt5
+
+-- tt2 :: G.Gr String Double
+-- tt2 = toGraph tt
+
+
+-- unitTests :: Test
+-- unitTests = test
+--    [ 
+--    ]
+
+
+-- TEMP / HACK:
+prettyPrint' :: Show a => a -> String
+prettyPrint' = show
+
+-- | Common prefix of a list of lists.
+commonPrefix :: Eq a => [[a]] -> [a]
+commonPrefix [] = []
+commonPrefix ls@(hd:tl)
+  | any null ls = []
+  | otherwise =
+    if   all ((== (head hd)) . head) tl
+    then head hd : commonPrefix (map tail ls)
+    else           commonPrefix (map tail ls)
diff --git a/DEVLOG.md b/DEVLOG.md
new file mode 100644
index 0000000..606d83b
--- /dev/null
+++ b/DEVLOG.md
@@ -0,0 +1,140 @@
+
+
+[2012.11.19]
+------------
+
+Right now the rickettsia dataset is causing problems:
+
+    phybin.exe: Rickettsia/phybin_output/bin1_16.txt: openFile: does not exist (No such file or directory)
+
+[2013.07.22] {Beginning optimization of all-to-all RF distance}
+---------------------------------------------------------------
+
+Making the labels into Ints and switching to IntSet has sped up the
+150-taxa/100-trees example from 3 sec to 1.6 seconds on my macbook
+retina.
+
+
+[2013.07.23] {Debugging}
+------------------------
+
+Ok, clustering on RFDistance with --editdist 0 should give the same
+answers as phybin --bin, but it's not!
+
+On Irene's current Wolbachia dataset, traditional phybin --bin comes
+up with this as its top cluster:
+
+    RAxML_bipartitions.88
+    RAxML_bipartitions.506
+    RAxML_bipartitions.476
+    RAxML_bipartitions.39
+    RAxML_bipartitions.386
+    RAxML_bipartitions.321
+    RAxML_bipartitions.3
+    RAxML_bipartitions.276
+    RAxML_bipartitions.260
+    RAxML_bipartitions.256
+    RAxML_bipartitions.233
+    RAxML_bipartitions.197
+    RAxML_bipartitions.171
+    RAxML_bipartitions.165
+    RAxML_bipartitions.141
+    RAxML_bipartitions.116
+
+By contrast, --UPGMA --editdist0 comes up with this:
+
+    RAxML_bipartitions.120
+    RAxML_bipartitions.167
+    RAxML_bipartitions.250
+    RAxML_bipartitions.38
+    RAxML_bipartitions.444
+    RAxML_bipartitions.494
+    
+This is with no branch-collapsing...  Ugh, that's no ovelap at all.
+If we look at the second bin for --UPGMA though, we see overlap:
+
+    RAxML_bipartitions.233
+    RAxML_bipartitions.3
+    RAxML_bipartitions.321
+    RAxML_bipartitions.39
+    RAxML_bipartitions.476
+    RAxML_bipartitions.88
+
+Ok, at least one is a subset of the other this time... and here's the diff:
+
+    RAxML_bipartitions.116
+    RAxML_bipartitions.141
+    RAxML_bipartitions.165
+    RAxML_bipartitions.171
+    RAxML_bipartitions.197
+    RAxML_bipartitions.256
+    RAxML_bipartitions.260
+    RAxML_bipartitions.276
+    RAxML_bipartitions.386
+    RAxML_bipartitions.506
+
+And here's the tree that phybin --bin produced:
+
+   ((14, 3_), (19, (5_, 13)), ((1_, 2_), (7_, (18, 6_))));
+
+I singled out 116 and 233, and then printed their normalized forms
+using "phybin -p 2 --printnorms tests/t2/*":
+
+    Tree "116"
+    ((1_, 2_), (7_, (18, 6_)), ((14, 3_), (19, (13, 5_))))
+    Tree "233"
+    ((1_, 2_), (7_, (18, 6_)), ((14, 3_), (19, (13, 5_))))
+
+And UNNORMALIZED:
+
+    Tree "116"
+    (3_, ((((6_, 18), 7_), (2_, 1_)), (19, (13, 5_))), 14)
+    Tree "233"
+    (5_, (19, ((3_, 14), ((2_, 1_), (7_, (6_, 18))))), 13)
+
+Wait.... this makes it look like normalization is WRONG!!!
+Tree 233 starts with 13+5 in a three-way split with a big chunk
+containing everything else.
+
+In fact, the RFDistance is three, which is probably right.
+
+[2013.07.23] {Updating set reps, timing}
+----------------------------------------
+
+Ok, went ahead and switched over to unboxed bitvectors for
+DenseLabelSet.  It's 10X slower!
+
+The BITVEC_BIPS option is getting the right answer now though.
+Apparently the normalization bug just fixed was the only bug.
+
+For 100 trees, 150 taxa (from the hashrf suite):
+
+  * IntSet takes 2.2s, 7.2G alloc, 114M copied.
+  * bit vec takes 21s and 69G alloc, 264M copied.
+  
+Productivities are high in both cases.
+
+Oops... there was a bug in bipSize.  I'm surprised it was getting the
+right answer.  Fixing that brought it down to 39G alloc, 12.3s.
+Stil, bit vectors are NOT a win for the direct/simple N^2 version.
+
+
+[2013.07.25] {Comparison ogainst HashRF}
+----------------------------------------
+
+Note that HashRF doesn't have very good error messages.  For example,
+if given our messy data with sum trees missing taxa:
+
+    ./hashrf Wolbachia/all_trees.tr 928
+
+    *** Collecting the taxon labels ***
+	Number of taxa = 10
+
+    *** Reading tree file and collecting bipartitions ***
+    libc++abi.dylib: terminate called throwing an exception
+    Abort trap: 6
+
+In fact... even if I use phybin to prune out the 503 trees with 10
+taxa, I still get the error.
+
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1a44106
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,31 @@
+NOTE: This License applies to all files in the "Haskell CnC"
+distribution, irrespective of whether the license is appended at the
+top of each file.
+
+          BSD STANDARD THREE CLAUSE LICENSE ("New BSD License")
+
+Copyright (c) 2010, Intel Corporation.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the <organization> nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/Main.hs b/Main.hs
new file mode 100644
index 0000000..deaaf0c
--- /dev/null
+++ b/Main.hs
@@ -0,0 +1,617 @@
+{-# LANGUAGE RecordWildCards, TupleSections, NamedFieldPuns #-}
+{-# OPTIONS_GHC -fwarn-unused-imports -fwarn-incomplete-patterns #-}
+
+-- | The MAIN module, of course.  This is the script that deals with
+-- command line options and calling into the heart of the beast.
+
+module Main where
+import           Data.List (sort, intersperse, foldl')
+import           Data.List.Split (splitOneOf)
+import qualified Data.ByteString.Char8 as B
+import qualified Data.Map as M
+import qualified Data.Set as S
+import           Control.Monad
+import           Control.Concurrent    (readChan)
+import           System.Environment    (getArgs, withArgs)
+import           System.Console.GetOpt (OptDescr(Option), ArgDescr(..), ArgOrder(..), usageInfo, getOpt)
+import           System.Exit           (exitSuccess)
+import           System.IO             (stdout) 
+
+import           Control.Exception (catch, SomeException)
+import qualified Data.Vector                 as V
+import           Test.HUnit                  as HU
+
+import Bio.Phylogeny.PhyBin.CoreTypes 
+import Bio.Phylogeny.PhyBin           (driver, normalize, annotateWLabLists,
+                                       unitTests, acquireTreeFiles, deAnnotate,
+                                       retrieveHighlights, matchAnyHighlight)
+import Bio.Phylogeny.PhyBin.Parser    (parseNewick, parseNewickFiles, unitTests)
+import Bio.Phylogeny.PhyBin.Visualize (viewNewickTree) -- dotNewickTree_debug
+import Bio.Phylogeny.PhyBin.RFDistance 
+import Bio.Phylogeny.PhyBin.PreProcessor
+
+import qualified Data.Clustering.Hierarchical as C
+
+import Data.Version (showVersion)
+import Paths_phybin (version)
+
+----------------------------------------------------------------------------------------------------
+-- MAIN script: Read command line options and call the program.
+----------------------------------------------------------------------------------------------------
+
+-- Note: ORDER is important here, we process options in this order:
+data Flag 
+    = Verbose  
+    | Version 
+    | Output String
+    | SetNumTaxa Int
+    | BranchThresh Double
+    | BootStrapThresh Int
+    | PruneTaxa [String]      
+    | NullOpt
+    | Graph | Draw
+    | Force 
+    | View
+    | TabDelimited Int Int
+
+    | Highlight FilePath
+    | ShowTreesInDendro | ShowInterior
+
+    | RFMode WhichRFMode
+    | SelfTest
+    | RFMatrix | LineSetDiffMode
+    | PrintNorms | PrintReg | PrintConsensus | PrintMatching
+    | Cluster C.Linkage
+    | BinningMode
+    | EditDistThresh Int
+    | DendogramOnly
+
+    | NameCutoff String
+    | NamePrefix Int
+    | NameTable String  -- Must come after Cutoff/Prefix
+
+  deriving (Show, Eq, Ord) -- ORD is really used.
+           
+parseTabDelim :: String -> Flag
+parseTabDelim _str = 
+  TabDelimited 8 9
+    
+options :: [OptDescr Flag]
+options =
+     [ Option ['v']     ["verbose"] (NoArg Verbose)    "print WARNINGS and other information (recommended at first)"
+     , Option ['V']     ["version"] (NoArg Version)    "show version number"
+
+     , Option ['o']     ["output"]  (ReqArg Output "DIR")  "set directory to contain all output files (default \"./phybin_out/\")"
+     , Option []     ["selftest"]   (NoArg SelfTest)   "run internal unit tests"
+       
+{- -- TODO: FIXME: IMPLEMENT THIS:
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option ['t']     ["tabbed"]  (ReqArg parseTabDelim "NUM1:NUM2")$  "assume the input is a ab-delimited file with gene names \n"++
+		                                                        "in column NUM1 and Newick trees in NUM2"
+-}       
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option []        []  (NoArg$ error "internal problem")  "----------------------------- Clustering Options ------------------------------"
+
+     , Option []    ["bin"]      (NoArg BinningMode)$                 "Use simple binning, the cheapest form of 'clustering'"
+     , Option []    ["single"]   (NoArg$ Cluster C.SingleLinkage)  $  "Use single-linkage clustering (nearest neighbor)"
+     , Option []    ["complete"] (NoArg$ Cluster C.CompleteLinkage)$  "Use complete-linkage clustering (furthest neighbor)"
+     , Option []    ["UPGMA"]    (NoArg$ Cluster C.UPGMA)          $  "Use Unweighted Pair Group Method (average linkage) - DEFAULT mode"
+
+     , Option []    ["editdist"]  (ReqArg (EditDistThresh . read) "DIST")$
+                                  "Combine all clusters separated by DIST or less.  Report a flat list of clusters.\n"++
+                                  "Irrespective of whether this is activated, a hierarchical clustering (dendogram.pdf) is produced."  
+--     , Option []    ["dendogram"] (NoArg DendogramOnly)$ "Report a hierarchical clustering (default)"
+
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option []        []     (NoArg$ error "internal problem")  "--- Select Robinson-Foulds (symmetric difference) distance algorithm: ---"
+     , Option []    ["hashrf"]   (NoArg$ RFMode HashRF)
+                   "(default) use a variant of the HashRF algorithm for the distance matrix"       
+     , Option []    ["tolerant"] (NoArg$ RFMode TolerantNaive)
+                    "use a slower, modified RF metric that tolerates missing taxa"
+       
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option []        []  (NoArg$ error "internal problem")  "----------------------------- Visualization --------------------------------"
+     , Option ['g']     ["graphbins"] (NoArg Graph)  "use graphviz to produce .dot and .pdf output files"
+      -- TODO: Produce the consensus tree as well as the individual trees.
+     , Option ['d']     ["drawbins"]  (NoArg Draw)   "like -g, but open GUI windows to show each bin's tree"
+
+     , Option ['w']     ["view"]    (NoArg View)$  "for convenience, \"view mode\" simply displays input Newick files without binning" 
+
+     , Option [] ["showtrees"] (NoArg ShowTreesInDendro)  "Print (textual) tree topology inside the nodes of the dendrogram"
+     , Option [] ["highlight"] (ReqArg Highlight "FILE") $ 
+           "Highlight nodes in the tree-of-trees (dendrogram) consistent with the.\n"++
+           "given tree file.  Multiple highlights are permitted and use different colors."
+--         "Highlights may be SUBTREES that use any subset of the taxa."
+     , Option [] ["interior"] (NoArg ShowInterior)
+          "Show the consensus trees for interior nodes in the dendogram, rather than just points."
+           
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option []        []  (NoArg$ error "internal problem")  "---------------------------- Tree pre-processing -----------------------------"
+     , Option []    ["prune"]  (ReqArg (PruneTaxa . parseTaxa) "TAXA")
+                               ("Prune trees to only TAXA before doing anything else.\n"++
+                                "Space and comma separated lists of taxa are allowed.  Use quotes.")
+     , Option ['b'] ["minbranchlen"] (ReqArg (BranchThresh . read) "LEN") "collapse branches less than LEN"
+
+     , Option []    ["minbootstrap"] (ReqArg (BootStrapThresh . read) "INT")
+                                     "collapse branches with bootstrap values less than INT"
+     -- , Option ['n'] ["numtaxa"] (ReqArg (SetNumTaxa . read) "NUM")
+     --                            "expect NUM taxa for this dataset, demand trees have all of them"
+       
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option []        []  (NoArg$ error "internal problem")  "--------------------------- Extracting taxa names ----------------------------"
+--     , Option ['n']     ["numtaxa"] (ReqArg (SetNumTaxa . read) "NUM") "expect NUM taxa for this dataset (otherwise it will guess)"
+       --  TODO, FIXME: The "guessing" part doesn't actually work yet -- implement it!!
+       --  What's a good algorithm?  Insist they all have the same number?  Take the mode?
+       
+{- -- TODO: FIXME: IMPLEMENT THIS:
+     , Option ['f']     ["force"]   (NoArg Force)    "force phybin to consume and bin trees with different numbers of taxa"
+-}
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option ['p']     ["nameprefix"]  (ReqArg (NamePrefix . read) "NUM") $ 
+		  "Leaf names in the input Newick trees can be gene names, not taxa.\n"++
+    	    	  "Then it is typical to extract taxa names from genes.  This option extracts\n"++
+                  "a prefix of NUM characters to serve as the taxa name."
+
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option ['s']     ["namesep"]   (ReqArg NameCutoff "STR") $  --"\n"++
+		  "An alternative to --nameprefix, STR provides a set of delimeter characters,\n"++
+                  "for example '-' or '0123456789'.  The taxa name is then a variable-length\n"++
+		  "prefix of each gene name up to but not including any character in STR."
+
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option ['m']     ["namemap"]  (ReqArg NameTable "FILE") $ 
+		  "Even once prefixes are extracted it may be necessary to use a lookup table\n"++
+		  "to compute taxa names, e.g. if multiple genes/plasmids map onto one taxa.\n"++
+		  "This option specifies a text file with find/replace entries of the form\n"++
+		  "\"<string> <taxaname>\", which are applied AFTER -s and -p."
+                  
+     , Option []        []          (NoArg NullOpt)  ""
+     , Option []        []  (NoArg$ error "internal problem")  "--------------------------- Utility Modes ----------------------------"
+     , Option [] ["rfdist"]  (NoArg RFMatrix)        "print a Robinson Foulds distance matrix for the input trees"
+     , Option [] ["setdiff"] (NoArg LineSetDiffMode) "for convenience, print the set difference between cluster*.txt files"
+     , Option [] ["print"]      (NoArg PrintReg)     "simply print out a concise form of each input tree"       
+     , Option [] ["printnorms"] (NoArg PrintNorms)   "simply print out a concise and NORMALIZED form of each input tree"
+     , Option [] ["consensus"]  (NoArg PrintConsensus) "print a strict consensus tree for the inputs, then exit"
+     , Option [] ["matching"] (NoArg PrintMatching) "print a list of tree names that match any --highlight argument"
+     ]
+ 
+
+-- | Parse the list of taxa provided on the command line.
+parseTaxa :: String -> [String]
+parseTaxa str = filter (not . null) $
+                splitOneOf ",; " str                
+
+usage :: String
+usage = "\nUsage: phybin [OPTION...] files or directories...\n\n"++
+
+--        "MODE must be one of 'bin' or 'cluster'.\n\n" ++
+
+        "PhyBin takes Newick tree files as input.  Paths of Newick files can\n"++
+        "be passed directly on the command line.  Or, if directories are provided,\n"++
+        "all files in those directories will be read.  Taxa are named based on the files\n"++
+        "containing them.  If a file contains multiple trees, all are read by phybin, and\n"++  
+        "the taxa name then includes a suffix indicating the position in the file:\n"++
+        " e.g. FILENAME_0, FILENAME_1, etc.\n"++
+        "\n"++          
+
+        -- "In binning mode, Phybin output contains, at minimum, files of the form binXX_YY.tr,\n"++
+        -- "each containing a list of input file paths that fall into that bin.  XX signifies\n"++
+        -- "the rank of the bin (biggest first), and YY is how many trees it contains.\n"++
+        -- "\n"++        
+
+        "When clustering trees, Phybin computes a complete all-to-all Robinson-Foulds distance matrix.\n"++
+        "If a threshold distance (tree edit distance) is given, then a flat set of clusters\n"++
+        "will be produced in files clusterXX_YY.tr.  Otherwise it produces a full dendogram.\n"++
+        "\n"++  
+
+        "Binning mode provides an especially quick-and-dirty form of clustering.\n"++
+        "When running with the --bin option, only exactly equal trees are put in the same cluster.\n"++
+        "Tree pre-processing still applies, however: for example collapsing short branches.\n"++
+        "\n"++        
+
+	"USAGE NOTES: \n"++
+        " * Currently phybin ignores input trees with the wrong number of taxa.\n"++
+	" * If given a directory as input phybin will assume all contained files are Newick trees.\n\n"++
+
+	"\nOptions include:\n"
+
+defaultErr :: [String] -> t
+defaultErr errs = error $ "ERROR!\n" ++ (concat errs ++ usageInfo usage options)
+
+--------------------------------------------------------------------------------
+
+main :: IO ()
+main = 
+  do argv <- getArgs 
+
+     (opts,files) <- 
+       case getOpt Permute options argv of
+	 (o,n,[]  ) -> return (o,n)
+         (_,_,errs) -> defaultErr errs
+
+     all_inputs <- acquireTreeFiles files
+     let process_opt cfg opt = case opt of 
+	   NullOpt -> return cfg
+	   Verbose -> return cfg { verbose= True } 
+	   Version -> do putStrLn$ "phybin version "++ showVersion version; exitSuccess
+
+	   SelfTest -> do _ <- runTestTT allUnitTests; exitSuccess
+
+     	   RFMatrix -> return cfg { print_rfmatrix= True }
+
+           ShowTreesInDendro -> return cfg { show_trees_in_dendro = True }
+           ShowInterior      -> return cfg { show_interior_consensus = True }
+           Highlight path    -> return cfg { highlights = path : highlights cfg }
+     
+           LineSetDiffMode -> do
+             bss <- mapM B.readFile files
+             case map (S.fromList . B.lines) bss of
+               [set1,set2] -> do let [f1,f2] = files
+                                 let diff = S.difference set1 set2
+                                 putStrLn$" Found "++show(S.size diff)++" lines occuring in "++f1++" but not "++f2
+                                 mapM_ B.putStrLn $ S.toList diff
+               oth -> error $"Line set difference mode expects two files as input, got "++show(length oth)
+             exitSuccess
+
+           PrintNorms     -> return cfg
+           PrintReg       -> return cfg
+           PrintConsensus -> return cfg
+           PrintMatching  -> return cfg
+           
+           Cluster lnk -> return cfg { clust_mode = ClusterThem lnk }
+           RFMode TolerantNaive
+             | Expected _ <- num_taxa cfg -> error "should not set --numtaxa AND --tolerant"
+             | otherwise  -> return cfg { rfmode = TolerantNaive }
+           RFMode HashRF  -> return cfg { rfmode = HashRF }
+           BinningMode -> return cfg { clust_mode = BinThem }
+           EditDistThresh n -> return cfg { dist_thresh = Just n }
+           DendogramOnly    -> return cfg { dist_thresh = Nothing }
+     
+	   Output s -> return cfg { output_dir= s }
+
+	   SetNumTaxa n
+             | rfmode cfg == TolerantNaive -> error "should not set --tolerant AND --numtaxa"
+             | otherwise -> return cfg { num_taxa= Expected n }
+     	   BranchThresh    n -> return cfg { branch_collapse_thresh    = Just n }
+     	   BootStrapThresh n -> return cfg { bootstrap_collapse_thresh = Just n }
+
+     	   PruneTaxa ls -> return cfg { preprune_labels = Just ls }
+     
+	   Graph     -> return cfg { do_graph= True } 
+	   Draw	     -> return cfg { do_draw = True } 
+	   View      -> return cfg -- Handled below
+
+	   TabDelimited _ _ -> error "tabbed option not handled yet"
+	   Force            -> error "force option not handled yet"
+
+
+	   NameCutoff str -> let chopper = takeWhile (not . flip S.member (S.fromList str)) 
+			     in return cfg { name_hack = chopper . name_hack cfg }
+	   NamePrefix n   -> return cfg { name_hack = (take n) . name_hack cfg }
+
+           -- This should always be after cutoff/prefix:
+	   NameTable file -> do reader <- name_table_reader file
+				return cfg { name_hack = reader . name_hack cfg }
+
+     config <- foldM process_opt default_phybin_config{ inputs= all_inputs } 
+	             (sort opts) -- NOTE: options processed in sorted order.
+
+     when (null files) $ do
+	defaultErr ["No file arguments!\n"]
+
+     --------------------------------------------------------------------
+     ----- Handle SPECIAL modes before going into normal processing -----
+     --------------------------------------------------------------------
+     -- This mode kicks in AFTER config options are processed.
+     when (elem PrintReg opts) $ do 
+       (_,fts) <- parseNewickFiles (name_hack config) all_inputs
+       forM_ fts $ \ ft@(FullTree name _ _) -> do
+         putStrLn $ "Tree "++show name
+         putStrLn$ show$ displayDefaultTree ft
+       exitSuccess
+     ------------------------------------------------------------
+     when (elem PrintNorms opts) $ do 
+       (_,fts) <- parseNewickFiles (name_hack config) all_inputs
+       forM_ fts $ \ ft@(FullTree name _ _) -> do
+         putStrLn $ "Tree , NORMALIZED:"++show name
+         putStrLn$ show$ displayDefaultTree$ deAnnotate $
+           liftFT (normalize . annotateWLabLists) ft
+       exitSuccess
+     ------------------------------------------------------------
+     when (elem PrintConsensus opts) $
+       case (num_taxa config) of
+         Expected numtax -> do 
+           (_,fts) <- parseNewickFiles (name_hack config) all_inputs
+           putStrLn $ "Strict Consensus Tree of "++show (length fts)++" trees:"
+           when (null fts) $ error "No trees provided!"
+           let ctree = consensusTree numtax (map nwtree fts)
+               FullTree{labelTable} = head fts
+           print$ displayStrippedTree$ FullTree "" labelTable ctree
+           exitSuccess
+         _ -> error "--consensus: cannot compute consensus when number of taxa is unknown or variable"
+     ------------------------------------------------------------
+     when (elem PrintMatching opts) $ do 
+       (labs,fts) <- parseNewickFiles (name_hack config) all_inputs
+       when (null$ highlights config) $ error "No --highlight given, so --matching makes no sense.  Matching what?"
+       htrs <- retrieveHighlights (name_hack config) labs (highlights config)
+       let isMatch = matchAnyHighlight htrs
+       forM_ fts $ \ FullTree{treename,nwtree} ->
+         when (isMatch$ fmap (const()) nwtree) $
+          putStrLn treename
+       exitSuccess       
+     ------------------------------------------------------------
+     when (View `elem` opts) $ do 
+       view_graphs config
+       exitSuccess
+     ------------------------------------------------------------
+     -- Otherwise do the main, normal thing:
+     driver config
+
+-- | Completely optional visualization step.  If this fails, we don't
+-- sweat it.
+view_graphs :: PhyBinConfig -> IO ()
+view_graphs PBC{..} = catch act hndl
+ where
+  hndl :: SomeException -> IO ()
+  hndl exn = putStrLn$ "Warning: Viewing graph failed, ignoring.  Error was: "++show exn
+  act  =   do chans <- forM inputs $ \ file -> do 
+                putStrLn$ "Drawing "++ file ++"...\n"
+		str <- B.readFile file
+		putStrLn$ "Parsed: " ++ (B.unpack str)
+                let (tbl,tr) = parseNewick M.empty name_hack file str
+ 	        (chan, _tr) <- viewNewickTree file (FullTree "" tbl (annotateWLabLists (head tr)))
+	        return chan
+	      forM_ chans readChan 
+	      return ()
+
+
+--------------------------------------------------------------------------------
+-- Every dataset it seems needs a new hack on the names!
+
+name_table_reader :: String -> IO (String -> String)
+name_table_reader file = 
+  do contents <- readFile file
+     let mp = M.fromList $ 
+	      map (\ls -> case ls of 
+		           [a,b] -> (a,b)
+		           _ -> error$ "Each line of "++file++" must contain two whitespace free strings: "++ unwords ls) $ 
+	      filter (not . null) $
+	      map tokenize $ 
+	      lines contents
+     return$ 
+       \ name_to_hack -> 
+
+       case M.lookup name_to_hack mp of -- Could use a trie
+	     Just x -> x
+	     Nothing -> name_to_hack
+  where
+    tokenize :: String -> [String]
+    tokenize line =
+      case words line of
+        []        -> []
+        [one]     -> error$"Name table contained bad line:  "++ show one
+        [one,two] -> [one,two]
+        (one:rest) -> [one, unwords rest]
+
+temp :: IO ()
+temp = driver default_phybin_config{ num_taxa= Expected 7, inputs=["../datasets/test.tr"] }
+
+
+--------------------------------------------------------------------------------
+-- Aggregated Unit Tests
+--------------------------------------------------------------------------------
+
+allUnitTests :: Test
+-- allUnitTests = unitTests ++
+allUnitTests = test 
+  [ Bio.Phylogeny.PhyBin.unitTests
+  , Bio.Phylogeny.PhyBin.Parser.unitTests
+  , "norm/Bip1" ~: (testNorm prob1)
+  , "bipTreeConversion" ~: testBipConversion
+  , "t3" ~: t3_consensusTest,
+    "t4" ~: t4_consensusTest,
+    "t5" ~: t5_consensusTest
+  ]
+
+-- [2013.07.23]      
+-- This was INCORRECTLY normalizing to:
+--     ((1_, 2_), (7_, (18, 6_)), ((14, 3_), (19, (13, 5_))))
+prob1 :: String
+prob1 = "(5_, (19, ((3_, 14), ((2_, 1_), (7_, (6_, 18))))), 13);"
+
+-- | Make sure that the normalized version of a tree yields the same bipartitions as
+-- the unnormalized one.
+testNorm :: String -> IO ()
+testNorm str = do
+  let (labs,parseds) = parseNewick M.empty id "test" (B.pack str)
+      parsed = head parseds
+      normed = normalize $ annotateWLabLists parsed
+      bips1  = allBips parsed
+      bips2  = allBips normed
+      added   = S.difference bips2 bips1
+      removed = S.difference bips1 bips2
+      -- dispBips bip = show$
+      --   map (map (labs M.!)) $ 
+      --   map IS.toList$ S.toList bip
+  unless (bips1 == bips2) $ do
+    putStrLn$ "Normalized this: "++show (displayDefaultTree $ FullTree "" labs parsed)
+    putStrLn$ "To this        : "++show (displayDefaultTree $ deAnnotate $ FullTree "" labs normed)
+    error$ "Normalization added and removed these bipartitions, respectively:\n  "
+           ++concat (intersperse " " (map (dispBip labs) (S.toList added))) ++"\n  "
+           ++concat (intersperse " " (map (dispBip labs) (S.toList removed)))
+
+
+-- 112 and 13
+rftest :: IO ()
+rftest = do 
+  (_mp,[t1,t2]) <- parseNewickFiles (take 2) ["tests/13.tr", "tests/112.tr"]
+  putStrLn$ "Tree 13           : " ++ show (displayDefaultTree t1)
+  putStrLn$ "Tree 112          : "++ show (displayDefaultTree t2)
+
+  putStrLn$ "Tree 13 normed    : "++ show (disp t1)
+  putStrLn$ "Tree 112 normed   : "++ show (disp t2)
+
+  putStrLn$ "13  collapsed 0.02: " ++show (disp$ liftFT (collapseBranchLenThresh 0.02) t1)
+  putStrLn$ "112 collapsed 0.02: " ++show (disp$ liftFT (collapseBranchLenThresh 0.02) t2)
+
+  putStrLn$ "13  collapsed 0.03: " ++show (disp$ liftFT (collapseBranchLenThresh 0.03) t1)
+  putStrLn$ "112 collapsed 0.03: " ++show (disp$ liftFT (collapseBranchLenThresh 0.03) t2)  
+
+  let (mat,_) = naiveDistMatrix [nwtree t1, nwtree t2]
+  printDistMat stdout mat
+  return ()
+ where
+  disp (FullTree nm labs tr) =
+    let collapsed :: AnnotatedTree 
+        collapsed = normalize$ annotateWLabLists tr
+    in displayDefaultTree$ deAnnotate $  FullTree nm labs collapsed
+
+-- | This test was done with --editdist 4 --complete
+t3_consensusTest :: IO ()
+t3_consensusTest = consensusTest "./tests/t3_consensus/cluster1_284_alltrees.tr"
+                                 "./tests/t3_consensus/cluster1_284_consensus.tr"
+
+-- | This test was done with --editdist 0 --complete
+t4_consensusTest :: IO ()
+t4_consensusTest = consensusTest "./tests/t4_consensus/cluster1_16_alltrees.tr"
+                                 "./tests/t4_consensus/cluster1_16_consensus.tr"
+
+-- | This test was done with --editdist 1 --complete
+t5_consensusTest :: IO ()
+t5_consensusTest = consensusTest "./tests/t5_consensus/cluster1_35_alltrees.tr"
+                                 "./tests/t5_consensus/cluster1_35_consensus.tr"
+
+consensusTest :: String -> String -> IO ()
+consensusTest alltrees consensus = do  
+  (_,ctree:ftrees)  <- parseNewickFiles id [consensus,alltrees]
+  let num_taxa      = numLeaves (nwtree ctree)
+      plainTrs      = map nwtree  ftrees 
+      eachbips      = map allBips plainTrs
+      totalBips     = foldl' S.union        S.empty eachbips
+      intersectBips = foldl' S.intersection S.empty eachbips
+      FullTree _ labs _ = ctree
+      linesPrnt x = unlines (map (("  "++) . dispBip labs) $ S.toList x)
+  putStrLn$ "Bips in each: "++     show (map S.size eachbips)
+  putStrLn$ "Total bips in all: "++show (S.size totalBips)  
+  putStrLn$ "Bips in common: "++   show (S.size intersectBips)
+  putStrLn$ "Bips of first member:\n" ++ linesPrnt (head eachbips)
+  putStrLn$ "Some bips in the union that are NOT in the intersection:\n" ++
+     linesPrnt (S.fromList$ take 20$ S.toList$ S.difference totalBips intersectBips)
+  let cbips = allBips $ nwtree ctree
+  putStrLn$ "ConsensusBips ("++show (S.size cbips)++"):\n"++linesPrnt cbips
+  putStrLn$"Things in the consensus that should NOT be:\n"++linesPrnt (S.difference cbips intersectBips)
+  putStrLn$"Things not in the consensus that SHOULD be:\n"++linesPrnt (S.difference intersectBips cbips)
+
+  putStrLn$ "Now recomputing consensus tree for "++show num_taxa++" taxa"
+  let ctree2 = consensusTree num_taxa plainTrs
+      cbips2 = allBips ctree2
+  putStrLn$ "Freshly recomputed consensusBips ("++show (S.size cbips2)++"):\n"++linesPrnt cbips2
+  HU.assertEqual "Consensus tree on disk should match computed one:"
+         cbips cbips2 -- (allBips$ fmap (const ()) $ nwtree ctree)         
+  
+  putStrLn " Partial distance matrix WITHIN this cluster:"
+  let (mat,_) = naiveDistMatrix (map nwtree ftrees)
+  printDistMat stdout (V.take 30 mat)
+  HU.assertBool "Consensus should only include bicbips2ps in the members" (S.isSubsetOf cbips totalBips)
+  HU.assertEqual "Consensus tree matches intersected bips" cbips intersectBips 
+  return ()
+
+testBipConversion :: IO ()
+testBipConversion = 
+  do (_,trs) <- allTestTrees
+     mapM_ testOne trs
+     putStrLn "All trees passed bip conversion."
+  where
+    testOne (FullTree{nwtree}) = do
+      let sz    = numLeaves nwtree   
+          bips  = allBips nwtree
+          tr2   = bipsToTree sz bips
+          bips2 = allBips tr2
+      assertEqual "round trip bips->tree->bips" bips bips2
+
+-- | Read in all test trees which we happen to have put in the repository for testing
+-- purposes.
+allTestTrees :: IO (LabelTable, [FullTree DefDecor])
+allTestTrees =
+  parseNewickFiles id $
+  [ "./tests/t3_consensus/cluster1_284_alltrees.tr"
+  , "./tests/t3_consensus/cluster1_284_consensus.tr"
+  , "./tests/t4_consensus/cluster1_16_alltrees.tr"
+  , "./tests/t4_consensus/cluster1_16_consensus.tr"
+  , "./tests/t5_consensus/cluster1_35_alltrees.tr"
+  , "./tests/t5_consensus/cluster1_35_consensus.tr"
+  ]
+  
+
+----------------------------------------------------------------------------------------------------
+-- TODO: expose a command line argument for testing.
+-- The below test exposed my normalization bug relating to "deriving Ord".
+-- I need to transform it into one or more proper unit tests.
+
+main_test :: IO ()
+main_test = 
+ withArgs ["-w","~/newton_and_newton_local/datasets/yersinia/yersinia_trees/111.dnd","-m","../datasets/yersinia/name_table_hack_yersinia.txt"]
+	  main 
+
+-- [2013.07.22] Disabling for the new Label representation:
+{-
+pa :: NewickTree DefDecor
+pa = set_dec (Nothing,1) $ 
+    NTInterior () [NTInterior () [NTLeaf () "RE",NTInterior () [NTLeaf () "SD",NTLeaf () "SM"]],NTInterior () [NTLeaf () "BB",NTLeaf () "BJ"],NTInterior () [NTLeaf () "MB",NTLeaf () "ML"]]
+
+pb :: NewickTree DefDecor
+pb = set_dec (Nothing,1) $ 
+    NTInterior () [NTInterior () [NTLeaf () "BB",NTLeaf () "BJ"],NTInterior () [NTLeaf () "MB",NTLeaf () "ML"],NTInterior () [NTLeaf () "RE",NTInterior () [NTLeaf () "SD",NTLeaf () "SM"]]]
+
+ls1 :: [(String, NewickTree DefDecor)]
+ls1 = [("a",pa),("b",pb)]
+
+-- This is one:
+num_binned :: Int
+num_binned = M.size $ binthem ls1
+
+a_ :: (String, NewickTree DefDecor)
+a_ = ("980.dnd",
+      fmap (\x -> (Nothing,x)) $ 
+      NTInterior 0.0 [NTInterior 5.697e-2 [NTLeaf 3.95e-2 "SM",NTLeaf 5.977e-2 "SD"],NTLeaf 0.13143 "RE",NTInterior 0.13899 [NTInterior 9.019e-2 [NTLeaf 0.11856 "BB",NTLeaf 0.13592 "BJ"],NTInterior 0.13194 [NTLeaf 0.19456 "MB",NTLeaf 0.16603 "ML"]]])
+
+b_ :: (String, NewickTree DefDecor)
+b_ = ("999.dnd",
+      fmap (\x -> (Nothing,x)) $ 
+      NTInterior 0.0 [NTInterior 6.527e-2 [NTInterior 0.13734 [NTLeaf 2.975e-2 "SM",NTLeaf 3.002e-2 "SD"],NTLeaf 0.18443 "RE"],NTInterior 6.621e-2 [NTLeaf 0.16184 "MB",NTLeaf 0.15233 "ML"],NTInterior 0.23143 [NTLeaf 9.192e-2 "BB",NTLeaf 0.10125 "BJ"]])
+
+-- But THIS is two:  ack!
+num2 :: Int
+num2 = M.size $ binthem [a_,b_]
+
+-- Here's the test that breaks things:
+a_norm :: NewickTree StandardDecor
+a_norm = normalize (annotateWLabLists$ snd a_)
+
+b_norm_ :: NewickTree (Double, Int, [Label])
+b_norm_ = NTInterior (0.0,7,["BB","BJ","MB","ML","RE","SD","SM"])
+         [NTInterior (0.23143,2,["BB","BJ"]) [NTLeaf (9.192e-2,1,["BB"]) "BB",NTLeaf (0.10125,1,["BJ"]) "BJ"],NTInterior (6.621e-2,2,["MB","ML"]) [NTLeaf (0.16184,1,["MB"]) "MB",NTLeaf (0.15233,1,["ML"]) "ML"],NTInterior (6.527e-2,3,["RE","SD","SM"]) [NTLeaf (0.18443,1,["RE"]) "RE",NTInterior (0.13734,2,["SD","SM"]) [NTLeaf (3.002e-2,1,["SD"]) "SD",NTLeaf (2.975e-2,1,["SM"]) "SM"]]]
+
+b_norm :: NewickTree StandardDecor
+b_norm = fmap (\ (bl,w,ls) -> StandardDecor bl Nothing w ls) b_norm_
+
+d1 :: IO (Chan (), NewickTree StandardDecor)
+d1 = viewNewickTree "" a_norm
+
+d2 :: IO (Chan (), NewickTree StandardDecor)
+d2 = viewNewickTree "" b_norm
+
+d1_ :: IO ThreadId
+d1_ = forkIO $ do runGraphvizCanvas Dot (dotNewickTree_debug "" a_norm) Xlib; return ()
+                  
+d2_ :: IO ThreadId
+d2_ = forkIO $ do runGraphvizCanvas Dot (dotNewickTree_debug "" b_norm) Xlib; return ()
+
+
+-- | A of a tree with _____ weights attached to it:
+withBootstrap :: String
+withBootstrap = "((((A8F330_:0.01131438136322714984,(G0GWK2_:0.00568050636963043226,(Q92FV4_:0.00284163304504484121,((B0BVQ5_:0.00319487112504297311,A8GU65_:0.00000122123005994819)74:0.00279881991324161267,(C3PM27_:0.00560787769333294297,C4K2Z0_:0.00559642713265556899)15:0.00000122123005994819)4:0.00000122123005994819)56:0.00276851661606284868)60:0.00283144414216590342)76:0.00886304965525876697,(A8GQC0_:0.05449879836105625541,(A8F0B2_:0.04736199885985507840,Q4UJN9_:0.02648399728559588939 [...]
+
+withBootstrap2 :: String
+withBootstrap2 = "((((A8F330_:0.01131438136322714984,(G0GWK2_:0.00568050636963043226,(Q92FV4_:0.00284163304504484121,((B0BVQ5_:0.00319487112504297311,A8GU65_:0.00000122123005994819):0.00279881991324161267[74],(C3PM27_:0.00560787769333294297,C4K2Z0_:0.00559642713265556899):0.00000122123005994819[15]):0.00000122123005994819[4]):0.00276851661606284868[56]):0.00283144414216590342[60]):0.00886304965525876697[76],(A8GQC0_:0.05449879836105625541,(A8F0B2_:0.04736199885985507840,Q4UJN9_:0.0264839 [...]
+-}
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..7f55d5a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,121 @@
+
+
+
+default: haskell
+
+all: find_tree_matches.exe haskell
+
+# GHC=ghc-7.4.2
+GHC=ghc-7.6.3
+
+release: haskell
+	upx phybin.exe
+	du -h phybin.exe
+
+haskell:
+	cabal configure
+	cabal build
+	cp ./dist/build/phybin/phybin phybin.exe
+	du -h phybin.exe
+	strip phybin.exe
+	du -h phybin.exe
+
+win:
+	$(GHC) -DWIN32 --make Main.hs -o phybin.exe
+
+find_tree_matches.exe: find_tree_matches.ss
+	mzc --exe find_tree_matches.exe find_tree_matches.ss
+
+clean:
+	rm -f phybin *.exe *.hi *.o Bio/Phylogeny/PhyBin/*.o Bio/Phylogeny/PhyBin/*.hi
+
+################################################################################
+
+#launch: clean release doc distro push upload
+launch: doc distro push upload
+
+doc:
+	pandoc -s -S --toc -c website.css README.md -o website/index.html
+
+release_website:
+	cp website/* ~/.hyplan/projects/phybin/
+
+distro:
+	cabal sdist
+	cp -v dist/*.tar.gz website/
+
+# HASKELLSRV=haskell.intel
+HASKELLSRV=community.haskell.org
+
+
+################################################################################
+# Collecting shortcuts for running different datasets:
+
+
+# LOCALSETS=~/newton_and_newton_local/datasets
+LOCALSETS=.
+
+rhizobia:
+	rm -rf $(LOCALSETS)/rhizobia/phybin_outputs
+	mkdir $(LOCALSETS)/rhizobia/phybin_outputs
+	./phybin.exe $(LOCALSETS)/rhizobia/gde/*.dnd -o $(LOCALSETS)/rhizobia/phybin_output/ -p 2 -n 7 -g -v 
+
+#yersinia_test_names:
+#	./phybin.exe -w ~/newton_and_newton_local/datasets/yersinia/yersinia_trees/111.dnd -s .  -m ../datasets/yersinia/name_table_hack_yersinia.txt 
+
+yersinia:
+	rm -rf $(LOCALSETS)/yersinia/phybin_outputs
+	mkdir  $(LOCALSETS)/yersinia/phybin_outputs
+	./phybin.exe $(LOCALSETS)/yersinia/yersinia_trees/ -o $(LOCALSETS)/yersinia/phybin_outputs/ -m ../datasets/yersinia/name_table_hack_yersinia.txt -s . -n 9 -g
+
+legionella:
+	rm -rf $(LOCALSETS)/legionella/phybin_outputs
+	mkdir  $(LOCALSETS)/legionella/phybin_outputs
+	./phybin.exe $(LOCALSETS)/legionella/legionella_orthologs_aa/ -o $(LOCALSETS)/legionella/phybin_outputs/ -m ../datasets/legionella/name_table_hack_legionella.txt -s 0123456789 -n 4 -g
+
+
+
+# Newer ones [2012.11.19]:
+newbatch: rickettsia rickettsiales wolbachia
+
+# BRANCHTHRESH=0.01
+BRANCHTHRESH=0.05
+PHYBIN= ./phybin.exe -b $(BRANCHTHRESH) -g
+
+#------------------------------------------------------------
+rickettsia:    $(LOCALSETS)/Rickettsia/renaming_table.txt
+	rm -rf $(LOCALSETS)/Rickettsia/phybin_outputs
+	mkdir  $(LOCALSETS)/Rickettsia/phybin_outputs
+	$(PHYBIN) -n 15 -m $(LOCALSETS)/Rickettsia/renaming_table.txt -s '_'  -o $(LOCALSETS)/Rickettsia/phybin_output/ $(LOCALSETS)/Rickettsia/final_trees/*BranchLab*.out 
+
+$(LOCALSETS)/Rickettsia/renaming_table.txt: $(LOCALSETS)/Rickettsia/Rickettsia_orthololgs.txt
+	runghc stripTable.hs $^ > $@
+
+#------------------------------------------------------------
+rickettsiales: $(LOCALSETS)/Rickettsiales/renaming_table.txt
+	rm -rf $(LOCALSETS)/Rickettsiales/phybin_outputs
+	mkdir  $(LOCALSETS)/Rickettsiales/phybin_outputs
+	$(PHYBIN) -n 29 -m $(LOCALSETS)/Rickettsiales/renaming_table.txt -s '_'  -o $(LOCALSETS)/Rickettsiales/phybin_output/ $(LOCALSETS)/Rickettsiales/final_trees/*BranchLab*.out 
+
+$(LOCALSETS)/Rickettsiales/renaming_table.txt: $(LOCALSETS)/Rickettsiales/Rickettsiales_orthologs.txt
+	runghc stripTable.hs $^ > $@
+
+#------------------------------------------------------------
+wolbachia:     $(LOCALSETS)/Wolbachia/renaming_table.txt
+	rm -rf $(LOCALSETS)/Wolbachia/phybin_outputs
+	mkdir  $(LOCALSETS)/Wolbachia/phybin_outputs
+	$(PHYBIN) -n 4 -m $(LOCALSETS)/Wolbachia/renaming_table.txt -s '_'  -o $(LOCALSETS)/Wolbachia/phybin_output/ $(LOCALSETS)/Wolbachia/final_trees/*BranchLab*.out 
+
+$(LOCALSETS)/Wolbachia/renaming_table.txt: $(LOCALSETS)/Wolbachia/Wolbachia_orthologs.txt
+	runghc stripTable.hs $^ > $@
+
+
+
+vary_branchlen: $(LOCALSETS)/Rickettsia/renaming_table.txt $(LOCALSETS)/Rickettsiales/renaming_table.txt $(LOCALSETS)/Wolbachia/renaming_table.txt
+	./vary_branchlen.sh 15 Rickettsia
+	./vary_branchlen.sh 29 Rickettsiales
+	./vary_branchlen.sh  4 Wolbachia
+
+temp:
+	./phybin.exe ~/newton_and_newton_local/datasets/rhizobia/gde/*.dnd -o temp -p 2 -n 7 
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..35a38e4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,166 @@
+% PhyBin (0.3): Binning/Clustering Newick Trees by Topology
+
+
+PhyBin is a simple command line tool that classifies a set of
+  [Newick tree files](http://en.wikipedia.org/wiki/Newick_format) 
+by their topology.  The purpose of it is to take a large set of tree
+files and browse through the most common tree topologies.
+
+![(Above figure) Trees corresponding to the three largest bins resulting from a
+  phybin run.  The file `binXX_YYY`, where `XX` is the rank of the bin and
+  `YYY` is the number of trees having that topology.](trees.jpg)
+
+Change Log
+==========
+
+In version 0.2, PhyBin was extended to do clustering as well as binning:
+
+ * Computee full all-to-all Robinson-Foulds distance matrices (quickly)
+ * Hierarchical clustering of all trees into a tree-of-trees dendrogram based on 
+   Robinson Foulds symmetric (tree edit) distance.
+
+In version 0.3, PhyBin gained a number of new features
+
+ * A `--tolerant` mode for computing RF distance matrices even for trees missing taxa. 
+ * A `--prune` option for "zooming in" on a specific set of taxa.
+ * The `--minboostrap` option was added.
+
+
+Invoking PhyBin 
+===============
+
+PhyBin is a command-line program that produces output in the form of
+text files and pdfs, but to produce pdfs (to visualize trees) the
+  [GraphViz program](http://www.graphviz.org/),
+including the `dot` command, must be installed on the machine.
+
+The following is a simple invocation of PhyBin:
+
+    phybin --bin *.tree -o output_dir/
+
+The input trees can be specified directly on the command-line, or, if the
+name of a directory is provided instead, all contained files are
+assumed to be trees in Newick format.
+
+PhyBin, at minimum, produces files of the form
+`output_dir/clusterXX_YY.tr`, one for each bin.  If
+requested, it will also produce visual representations of each bin in
+the form `output_dir/clusterXX_YY.pdf`.
+
+Downloading and Installing PhyBin
+=================================
+
+The source code to PhyBin can be downloaded here:
+
+  * [Download Source from Hackage](http://hackage.haskell.org/package/phybin)
+
+PhyBin is written in Haskell and if you have 
+  [Haskell Platform](http://hackage.haskell.org/platform/).
+installed you can install phybin with this one-liner:
+
+    cabal install phybin
+
+Otherwise PhyBin is also available for download as a statically-linked
+executable for Mac-OS, Linux, and Windows:
+
+  * [Download Mac-OS Binary](phybin-0.2.11.mac) 
+  * [Download Linux Binary](phybin-0.2.11.x86_64_linux)
+  * [Download Windows Binary](phybin-0.2.11_windows.exe)
+  
+
+
+Command-line Options
+====================
+
+In addition to input files and directories, `phybin` supports a number
+of command-line options.  Run "phybin --help" to see these options.
+Here is a snapshot of the current help output (version 0.2.11):
+
+    Usage: phybin [OPTION...] files or directories...
+
+    PhyBin takes Newick tree files as input.  Paths of Newick files can
+    be passed directly on the command line.  Or, if directories are provided,
+    all files in those directories will be read.  Taxa are named based on the files
+    containing them.  If a file contains multiple trees, all are read by phybin, and
+    the taxa name then includes a suffix indicating the position in the file:
+     e.g. FILENAME_0, FILENAME_1, etc.
+
+    When clustering trees, Phybin computes a complete all-to-all Robinson-Foulds distance matrix.
+    If a threshold distance (tree edit distance) is given, then a flat set of clusters
+    will be produced in files clusterXX_YY.tr.  Otherwise it produces a full dendogram (UNFINISHED).
+
+    Binning mode provides an especially quick-and-dirty form of clustering.
+    When running with the --bin option, only exactly equal trees are put in the same cluster.
+    Tree pre-processing still applies, however: for example collapsing short branches.
+
+    USAGE NOTES:
+     * Currently phybin ignores input trees with the wrong number of taxa.
+     * If given a directory as input phybin will assume all contained files are Newick trees.
+
+
+    Options include:
+
+      -v       --verbose         print WARNINGS and other information (recommended at first)
+      -V       --version         show version number
+      -o DIR   --output=DIR      set directory to contain all output files (default "./phybin_out/")
+	       --selftest        run internal unit tests
+
+				 ----------------------------- Clustering Options ------------------------------
+	       --bin             Use simple binning, the cheapest form of 'clustering'
+	       --single          Use single-linkage clustering (nearest neighbor)
+	       --complete        Use complete-linkage clustering (furthest neighbor)
+	       --UPGMA           Use Unweighted Pair Group Method (average linkage) - DEFAULT mode
+	       --editdist=DIST   Combine all clusters separated by DIST or less.  Report a flat list of clusters.
+				 Irrespective of whether this is activated, a hierarchical clustering (dendogram.pdf) is produced.
+				   Select Robinson-Foulds (symmetric difference) distance algorithm:
+	       --simple          use direct all-to-all comparison
+	       --hashrf          (default) use a variant of the HashRF algorithm for the distance matrix
+
+				 ----------------------------- Visualization --------------------------------
+      -g       --graphbins       use graphviz to produce .dot and .pdf output files
+      -d       --drawbins        like -g, but open GUI windows to show each bin's tree
+      -w       --view            for convenience, "view mode" simply displays input Newick files without binning
+	       --showtrees       Print (textual) tree topology inside the nodes of the dendrogram
+	       --highlight=FILE  Highlight nodes in the tree-of-trees (dendrogram) consistent with the.
+				 given tree file.  Multiple highlights are permitted and use different colors.
+	       --interior        Show the consensus trees for interior nodes in the dendogram, rather than just points.
+
+				 ---------------------------- Tree pre-processing -----------------------------
+      -n NUM   --numtaxa=NUM     expect NUM taxa for this dataset
+      -b LEN   --branchcut=LEN   collapse branches less than LEN
+
+				 --------------------------- Extracting taxa names ----------------------------
+
+      -p NUM   --nameprefix=NUM  Leaf names in the input Newick trees can be gene names, not taxa.
+				 Then it is typical to extract taxa names from genes.  This option extracts
+				 a prefix of NUM characters to serve as the taxa name.
+
+      -s STR   --namesep=STR     An alternative to --nameprefix, STR provides a set of delimeter characters,
+				 for example '-' or '0123456789'.  The taxa name is then a variable-length
+				 prefix of each gene name up to but not including any character in STR.
+
+      -m FILE  --namemap=FILE    Even once prefixes are extracted it may be necessary to use a lookup table
+				 to compute taxa names, e.g. if multiple genes/plasmids map onto one taxa.
+				 This option specifies a text file with find/replace entries of the form
+				 "<string> <taxaname>", which are applied AFTER -s and -p.
+
+				 --------------------------- Utility Modes ----------------------------
+	       --rfdist          print a Robinson Foulds distance matrix for the input trees
+	       --setdiff         for convenience, print the set difference between cluster*.txt files
+	       --print           simply print out a concise form of each input tree
+	       --printnorms      simply print out a concise and NORMALIZED form of each input tree
+	       --consensus       print a strict consensus tree for the inputs, then exit
+	       --matching        print a list of tree names that match any --highlight argument
+
+
+- - - - - - - - - - - - - - -
+Authors: Irene and Ryan Newton
+
+Contact email: `irnewton` and `rrnewton` at `indiana` `edu` (with "at" and "dot" inserted).
+
+[Irene's](http://www.bio.indiana.edu/faculty/directory/profile.php?person=irnewton) and 
+[Ryan](http://www.cs.indiana.edu/~rrnewton/homepage.html) homepages.
+
+.
+
+
diff --git a/Setup.hs b/Setup.hs
new file mode 100644
index 0000000..a3e2dc0
--- /dev/null
+++ b/Setup.hs
@@ -0,0 +1,12 @@
+#!/usr/bin/env runhaskell
+
+import Distribution.Simple
+import Distribution.Simple.PreProcess
+import Distribution.PackageDescription	
+import Distribution.Simple.LocalBuildInfo	
+import System.Cmd(system) 
+import System.Exit
+
+main :: IO () 
+main = do putStrLn$ "Running Setup.hs ..."
+	  defaultMainWithHooks simpleUserHooks 
diff --git a/TestAll.hs b/TestAll.hs
new file mode 100644
index 0000000..9a53665
--- /dev/null
+++ b/TestAll.hs
@@ -0,0 +1,41 @@
+{-# LANGUAGE TemplateHaskell #-}
+{-# LANGUAGE ScopedTypeVariables, BangPatterns, ParallelListComp, OverloadedStrings  #-}
+module Main  where
+
+import Data.Vector         as V
+import Data.Vector.Unboxed as U
+import Data.ByteString.Char8 as B
+import Bio.Phylogeny.PhyBin
+import Bio.Phylogeny.PhyBin.CoreTypes
+import Bio.Phylogeny.PhyBin.RFDistance
+import Bio.Phylogeny.PhyBin.Parser
+import System.IO
+import Prelude as P
+
+import qualified Test.HUnit as HU
+-- import Test.Framework
+import Test.Framework.Providers.HUnit
+import Test.Framework.TH (defaultMainGenerator)
+
+-- Here we compare a tree against the same tree with 18 removed:
+-- Or the same tree with one intermediate node removed.
+(_,t30) = parseNewicks id $
+          [ ("t30_0", "(2_, (((14, 3_), (19, (5_, 13))), (18, (6_, 7_))), 1_);")
+          , ("t30_1", "(2_, (((14, 3_), (19, (5_, 13))), (6_, 7_)), 1_);")
+          , ("t30_2", "(2_, (((14, 3_), (19, (5_, 13))), (18, 6_, 7_)), 1_);")              
+          ]
+
+t30' = naiveDistMatrix$ P.map nwtree t30
+ -- "[[],[1]]"
+
+case_t30 = HU.assertEqual "3-tree distance matrix"
+              "[[],[0],[1,0]]" (showMat t30')
+
+-- m = printDistMat stdout (fst d)
+
+
+-- Simple show for a distance matrix:
+showMat (m,_) = show$ V.toList$ V.map U.toList m
+
+main = $(defaultMainGenerator)
+
diff --git a/old_script/cheztrace.ss b/old_script/cheztrace.ss
new file mode 100644
index 0000000..b11b7b4
--- /dev/null
+++ b/old_script/cheztrace.ss
@@ -0,0 +1,62 @@
+;; [2005.11.13 ]  Having a go at implementing these in PLT:
+;; DOESNT WORK YET.
+
+(module cheztrace mzscheme
+;  (require (lib "errortrace.ss" "errortrace")
+;           (lib "stacktrace.ss" "errortrace"))
+  (require (lib "trace.ss"))
+  
+  (provide trace-lambda trace-define trace-let inspect break)
+
+  ;; Just stubs.
+  (define (inspect x) (void))
+  (define (break x) x)
+  
+  
+  #;
+  (define-syntax trace-lambda
+    (lambda (x)
+      (syntax-case x ()
+        [(_ params  bods ...) 
+         #`(lambda params 
+             #,(annotate #'(begin bods ...) #f))])))
+
+  (define-syntax trace-lambda
+    (lambda (x)
+      (syntax-case x ()
+        [(_ name (params ...) bods ...)
+         (identifier? #'name)
+         #'(let ((name (lambda (params ...) bods ...)))
+             (trace name)
+             name)])))
+#|      
+  (define-syntax trace-define
+    (lambda (x)
+      (syntax-case x ()
+        [(_ (id v ...) e) #'(begin (define (id v ...) e) (trace id))]
+        [(_ x e) (identifier? #'x) #'(begin (define x e) (trace x))])))
+  
+  (define-syntax trace-let
+    (syntax-rules ()
+      [(_ n ([l* r*] ...) bods ...)
+       (letrec ((n (lambda (l* ...) bods ...)))
+         (trace n)
+         (n r* ...))]))
+|#
+
+;; [2006.03.23] For now killing these ^^^, not sure how much they worked.
+
+;; These simply don't do anything under PLT for the moment.
+(define-syntax trace-define (syntax-rules () [(_ e ...) (define e ...)]))
+(define-syntax trace-let (syntax-rules () [(_ e ...) (let e ...)]))
+  
+
+)
+
+;(require cheztrace)
+;(define f (trace-lambda (x) x (if (odd? x) (error 'foo "") (add1 x))))
+;(f 40)
+;(f 39)
+
+;(trace-let loop ((n 10))
+;           (if (> n 0) (loop (sub1 n))))
diff --git a/old_script/find_tree_matches.ss b/old_script/find_tree_matches.ss
new file mode 100644
index 0000000..969f2e9
--- /dev/null
+++ b/old_script/find_tree_matches.ss
@@ -0,0 +1,420 @@
+#! /bin/bash
+#|
+exec mzscheme -qu "$0" ${1+"$@"}
+|#
+;;====================================================================================================
+;; This is a script to parse .dnd tree files.
+
+;; NOTE: This was the "Version 1" script and has been replaced by the haskell version, phybin.hs
+
+;;====================================================================================================
+
+;; [2009.12.06] Hacking it to work with wolbachia_A_group/input
+
+(module find_tree_matches mzscheme  
+  ;; Import the parser and lexer generators.
+  (require (lib "yacc.ss" "parser-tools")
+	   (lib "lex.ss" "parser-tools")
+	   (prefix : (lib "lex-sre.ss" "parser-tools"))
+	   (lib "list.ss")
+	   ;(lib "iu-match.ss")
+	   "iu-match.ss"
+	   )
+;  (provide (all-defined))
+
+(require (lib "pretty.ss")
+	 (lib "process.ss")
+	 (prefix srfi: (lib "1.ss" "srfi")))
+  
+;;====================================================================================================
+;; First define the PARSER
+;;====================================================================================================
+
+(define-empty-tokens op-tokens 
+   (LeftParen RightParen Comma SemiColon Colon
+    EOF ))
+
+(define-tokens value-tokens (NAME NUM))
+
+(define-lex-abbrevs (lower-letter (:/ #\a #\z))
+                    (upper-letter (:/ #\A #\Z))
+		    (alpha (:or lower-letter upper-letter))
+		    (digit (:/ #\0 #\9))
+                    (digits (:+ digit))
+		    (letter (:or alpha digit "." "'" "-" "/"))
+		    
+		    ;; This requires that names have an underscore in them:
+		    ;; That's what distinguishes them from numbers:
+		    ;(name (:seq (:+ letter) (:+ (:seq "_" (:* letter)))))
+		    
+		    ;; [2010.08.13] I can't remember why I did that... were there gene names that started with numbers??
+		    (name (:seq alpha (:+ (:or letter "_"))))
+		    
+		    ;; [2009.12.06] Odd, getting negative branch lengths!!
+		    ;; [2009.12.07] Ran into some trees with 0 distance and no decimal point:
+		    (number (:seq (:? "-") digits (:? (:seq "." digits (:? (:seq "E-" digits))))))
+		    ;(number (:seq digit digits))
+		    )
+
+(define lex
+  (lexer-src-pos
+
+   [(eof) 'EOF]
+   
+   ;; Ignore all whitespace:
+   [(:or #\tab #\space #\newline #\return) (return-without-pos (lex input-port))]
+
+   ["(" 'LeftParen]
+   [")" 'RightParen]  
+   ["," 'Comma] 
+   [";" 'SemiColon] 
+   [":" 'Colon]
+
+   ;; Like most of these scripts this is a hack.  It assumes all node ID's in the tree start with a particular string.
+   
+   ;[(:seq "c" name) (token-NAME lexeme)]
+   [name (token-NAME lexeme)]
+
+   [number (token-NUM (string->number lexeme))]
+   ; [number (token-NUM  lexeme)]
+   
+   ))
+
+(define (format-pos pos)
+  (if (position-line pos)
+      (format "line ~a:~a" (position-line pos) (position-col pos))
+      (format "char ~a" (position-offset pos))))
+
+(define parse
+ (parser        
+   (src-pos)
+   (start wholefile)
+   (end EOF)
+
+  (tokens value-tokens op-tokens)
+  (error (lambda (a b c start end) 
+            (fprintf (current-error-port)
+		    "PARSE ERROR: after ~a token, ~a~a.\n" 
+                    (if a "valid" "invalid") b 
+                    (if c (format " carrying value ~s" c) ""))
+            (fprintf (current-error-port)
+		     "  Located between ~a and ~a.\n"
+                    (format-pos start) (format-pos end))))
+   ;; Precedence:
+  ;(precs  	  )
+   ;(debug "_parser.log")
+
+;    Tree: The full input Newick Format for a single tree
+;    Subtree: an internal node (and its descendants) or a leaf node
+;    Leaf: a leaf node
+;    Internal: an internal node (and its descendants)
+;    BranchSet: a set of one or more Branches
+;    Branch: a tree edge and its descendant subtree.
+;    Name: the name of a node
+;    Length: the length of a tree edge.
+; [edit]The grammar rules
+
+(grammar
+
+   (wholefile [(Subtree SemiColon) $1]
+	      [(Branch SemiColon)  $1])
+   (Subtree [(Leaf) $1]
+            [(Internal) $1])
+   (Leaf [(Name) $1])
+   (Internal [(LeftParen BranchSet RightParen Name) 
+	      (if $4 (cons $4 $2) $2)])
+
+   (BranchSet [(Branch) (list $1)]
+              ;[(BranchSet Comma Branch) (cons $1 $3)]
+	      [(Branch Comma BranchSet) (cons $1 $3)]
+	      )
+   (Branch [(Subtree Length) (list $2 $1)])
+
+   (Name [() #f]
+         [(NAME) $1])
+   (Length [() 0]
+           [(Colon NUM) $2])
+)
+
+#;
+   (grammar
+
+    (wholefile [(tree SemiColon) $1]
+	       ;; ACK making semicolon and final tag optional!!
+	       [(tree) $1]
+	       [(LeftParen nodes+ RightParen) (cons 0.0 $2)]
+	       [(LeftParen nodes+ RightParen SemiColon) (cons 0.0 $2)]
+	       )
+
+    (numtag [(Colon NUM) $2]
+            ;; [2010.08.13] Making them optional everywhere: 
+            [() 0.0]
+	    )
+
+
+    (tree [(NAME numtag) (list $2 $1)]
+	  [(LeftParen nodes+ RightParen numtag) (cons $4 $2)])
+    
+    (nodes+ [(tree) (list $1)]
+	    [(tree Comma nodes+) (cons $1 $3)])
+)
+
+))
+
+;; ================================================================================
+
+(define (parse-file f) 
+    (let ([p (open-input-file f)])    
+      (let ([res (parse-port p)])
+        ;(printf "PARSED: \n")(pretty-print  res)
+	(close-input-port p)
+	res)))
+
+(define (parse-port ip)
+  (port-count-lines! ip)
+  (parse (lambda () 
+            (let ((lexed (lex ip)))
+	      ;(printf "LEXED: ~s\n" (position-token-token lexed))
+	      (flatten lexed))))
+  )
+
+(define (flatten pt)
+  (let loop ((x pt))
+        (if (position-token? (position-token-token x))
+            (begin (error 'flatten "No nested position-tokens!")
+              (loop (position-token-token x)))
+            x)))
+
+(define allargs (vector->list (current-command-line-arguments)))
+(define pretty? #t)
+ ;; End main
+
+(print-graph #t)
+(print-vector-length #f)
+
+
+;;====================================================================================================
+;; Now normalize the trees.
+;;====================================================================================================
+
+;; The basic idea here is to sort branches by some criteria.
+;; If that criteria turns out to be insufficient... then we can add extra criteria.
+(define (normalize-tree tr)
+  ;; This returns a branch-sorted tree
+  (define (loop tr )
+    (match tr
+      [(,dist ,name) (guard (string? name))
+        (values (list dist name) 1 1)]
+      [(,dist ,x)
+       (error 'normalize-tree "was not expecting interior nodes with one sub-branch... ~s" (list dist x))]
+      [(,dist ,[nodes sizes depths] ...)
+       ;; This creates an ugly data type (list overuse) with both the branches and their size/depth measurement:
+       (define sorted 
+	 (sort 
+	  (map list nodes sizes depths)
+	  (lambda (ls1 ls2) 
+	    ;(printf "Comparing ~s and ~s\n" (cdr ls1) (cdr ls2))
+	    (or (< (cadr ls1) (cadr ls2))
+		(and (= (cadr ls1) (cadr ls2))
+		     (< (caddr ls1) (caddr ls2)))))
+	  ))	
+        ;(printf "\n ~a Node sizes ~s and depths ~s\n" dist sizes depths)
+	;; If there are any non-leaf "plateaus" in the ramp then we can't deal with that yet:
+       (let loop ((last (car sorted)) 
+		  (rest (cdr sorted)))
+	 (cond 
+	  [(null? rest) (void)]
+	  ;; If they're equal according to the size/depth metric:
+	  [(and (equal? (cdr last) (cdar rest))
+		;; And non-leaf:
+		(not (equal? (cdr last) '(1 1))))
+	   ;; Ok, given that we have a match, that's still ok if they are the SAME tree. 
+	   ;; The only problem is if they're different in a way that our metric doesn't capture.
+	   (if (tree-shape-equals? (car last) (caar rest))
+	       (void)
+	       (begin
+	         (printf "\nERRORful subbranches:\n")
+		 (pretty-print (car last))
+		 (pretty-print (caar rest))
+		 (error 'normalize-tree "Cannot yet handle same depth/size signatures of sub-branches: ~s ~s" 
+			(cdr last) (cdar rest))))]
+	  [else (loop (car rest) (cdr rest))]))
+
+        (values (cons dist (map car sorted))
+		(apply + 1 sizes)
+		(add1 (apply max depths))
+		)]))
+  (define-values (tree _ __) (loop tr))
+  tree)
+
+;; Assumes that the trees are already in normal form:
+;; Compares tree shape and leaf nodes, IGNORES interior nodes.
+(define (tree-equals? tr1 tr2)
+  ;(printf "EQUALS? ~s ~s\n" tr1 tr2)
+  (match (cons tr1 tr2)
+    [((,dist1 ,name1) . (,dist2 ,name2))
+     (guard (string? name1) (string? name2))
+     (if 
+          #t                    ;; -- This would give you only shape.
+	  (string=? name1 name2) ;; -- This requires that the leaf names be the same.
+	  )
+     ]
+    [((,dist1 ,nodes1 ...) . (,dist2 ,nodes2 ...))
+     (and (equal? (length nodes1) (length nodes2))
+	  (andmap tree-equals? nodes1 nodes2))]
+    [(,s1 . ,s2) (guard (string? s1) (string? s2)) (error 'tree-equals? "implementation assumption violated... oops")]
+    [,else #f]))
+
+(define (tree-shape-equals? tr1 tr2)
+  ;(printf "\nSHAPE EQUALS? ~s ~s\n" tr1 tr2)
+  (match (cons tr1 tr2)
+    [((,dist1 ,name1) . (,dist2 ,name2))
+     (guard (string? name1) (string? name2))
+     #t ;; -- This would give you only shape.
+     ]
+    [((,dist1 ,nodes1 ...) . (,dist2 ,nodes2 ...))
+     (and (equal? (length nodes1) (length nodes2))
+	  (andmap tree-shape-equals? nodes1 nodes2))]
+    [(,s1 . ,s2) (guard (string? s1) (string? s2)) (error 'tree-shape-equals? "implementation assumption violated... oops")]
+    [,else #f]))
+
+
+;;====================================================================================================
+;; TESTS
+;;====================================================================================================
+
+(define tests 
+ '(
+   "(,,(,));"                               ; no nodes are named
+   "(A,B,(C,D));"                           ; leaf nodes are named
+   "(A,B,(C,D)E)F;"                         ; all nodes are named
+   "(:0.1,:0.2,(:0.3,:0.4):0.5);"           ; all but root node have a distance to parent
+   "(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;"       ; all have a distance to parent
+   "(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);"       ; distances and leaf names (popular)
+   "(A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;"     ; distances and all names
+   "((B:0.2,(C:0.3,D:0.4)E:0.5)F:0.1)A;"    ; a tree rooted on a leaf node (rare)
+   ))
+
+(define (test)  (map parse tests))
+
+;;====================================================================================================
+;; Here's our script "main"
+;;====================================================================================================
+
+(define (new-main files)
+  (define __ (begin (printf "\nBeginning processing of ~s files\n" (length files))))
+  ; (define parsed (parse-file file))
+  ; (define normalized (normalize-tree parsed))
+  ;(pretty-print parsed) ;(newline)
+  ;(printf "NORMALIZED ~s\n " file)
+  ;(pretty-print normalized)
+  ;;(exit)
+
+  (define ___ (begin (printf "Parsing and normalizing trees: ")(flush-output)))
+  (define all-trees
+    (map (lambda (file)
+           ;(define __ (begin (printf " ~a " file)(flush-output)))
+	   (define parsed (parse-file file))
+	   (define normalized (normalize-tree parsed))
+	   (printf ".") (flush-output)
+	   (list file normalized))
+      files))
+
+  ;; Type: (((name tree) ...) ...)
+  ;; Each equivalence class is represented by (tree name ...)
+  ;; Only a single representative tree is needed.
+  (define classes '())
+
+  (if (null? (cdr all-trees))
+      (begin
+        ;; If we have one input just print it:
+	(printf "\nOnly one file as input.  Not making equivalence classes, just printing it:\n")
+	(pretty-print (cadar all-trees))        
+	)
+      (begin 
+	(printf "\nComputing equivalence classes on ~s trees: " (length all-trees)) (flush-output)
+	(for-each
+	    (lambda (entry)
+	      (define found-hit #f)
+	      (match entry
+		[(,file ,normtree)
+		 (let ([new (map (lambda (class) 
+				   (if (tree-equals? normtree (car class))
+				       (begin 
+					 (set! found-hit #t)
+					 (cons (car class) (cons file (cdr class))))
+				       class))
+			      classes)])
+		   (printf ".") (flush-output)		    
+		   (set! classes
+			 (if found-hit new
+			     (cons `(,normtree ,file) classes))))
+		 ]))
+	  all-trees)
+	(newline)
+
+	;; Now sort them:
+	(set! classes (sort classes (lambda (a b) (< (length a) (length b)))))
+
+	(printf "Finished.  ~s files fell into ~s equivalence classes\n" (length files) (length classes))
+	(printf "Sizes of each equivalence class:  ")
+	(for-each (lambda (class) (printf "  ~s" (length (cdr class)))) classes)
+	(newline)
+	(printf "Dumping lists of files to class<N>.txt, and representative tree to class<N>.tree...\n")
+
+	(system "rm -f class*.txt class*.tree")
+	
+	(for-each (lambda (i class) 
+		;(when (file-exists? (format "class~s.txt" i))  (delete-file (format "class~s.txt" i)))
+		;(when (file-exists? (format "class~s.tree" i)) (delete-file (format "class~s.tree" i)))
+		(with-output-to-file (format "class~s.txt" i)
+		  (lambda ()
+		    (for-each (lambda (file) (printf "~a " file)) (cdr class))
+		    (newline)))
+		(with-output-to-file (format "class~s.tree" i)
+		  (lambda () (pretty-print (car class))))
+		)
+      (srfi:iota (length classes))
+      classes)
+)))
+
+(define (print-usage)
+  (printf "Usage: find_tree_matches <file-to-process> ...\n\n"))
+
+
+;COUNTING: ((0 ((0 "lpg140") (0 "lpl135"))) (0 "lpp135") (0 "LPC_081"))
+
+(define (sumcount stuff) 
+  ;(printf "sumcount ~s\n" stuff)
+  (if (or (string? stuff) (not stuff)) 0
+      (foldl (lambda (x acc) (+ acc (count x))) 0 stuff)))
+
+(define (count tree)
+  ;(define __ (printf "COUNTING: ~s\n" tree))
+     (if (number? (car tree))
+	  (+ 1 (sumcount (cadr tree)))
+	  (sumcount tree)))
+
+;; BENCHMARK TEST:
+
+;(for-each (lambda (file) (parse-file file)) allargs)
+
+(pretty-print (foldl + 0 (map (lambda (file) (count (parse-file file))) allargs)))
+;(pretty-print (map (lambda (file) (pretty-print (count (parse-file file)))) allargs))
+
+;(pretty-print (count '((0 ((0 "lpg292") (0 "lpl285"))) (0 "lpp299") (0 "LPC_323"))))
+
+
+
+#;
+(begin
+  (when (null? allargs) 
+    (print-usage) (flush-output)
+  ;(error 'find_tree_matches "no files given")
+  (printf "Error: no files given!\n")
+  (exit 1))
+  (new-main allargs))
+
+
+) ;; End module.
+
diff --git a/old_script/iu-exptime.ss b/old_script/iu-exptime.ss
new file mode 100644
index 0000000..855c341
--- /dev/null
+++ b/old_script/iu-exptime.ss
@@ -0,0 +1,67 @@
+;;; match.ss 
+;;; (broken out for PLT port only)
+
+;;; This program was originally designed and implemented by Dan
+;;; Friedman.  It was redesigned and implemented by Erik Hilsdale;
+;;; some improvements were suggested by Steve Ganz.  Additional
+;;; modifications were made by Kent Dybvig. Original port to PLT Scheme
+;;; by Matthew Flatt. As of 20020328, Matt Jadud is maintaining
+;;; the PLT Scheme port of this code.
+
+;;; Copyright (c) 1992-2002 Cadence Research Systems
+;;; Permission to copy this software, in whole or in part, to use this
+;;; software for any lawful purpose, and to redistribute this software
+;;; is granted subject to the restriction that all copies made of this
+;;; software must include this copyright notice in full.  This software
+;;; is provided AS IS, with NO WARRANTY, EITHER EXPRESS OR IMPLIED,
+;;; INCLUDING BUT NOT LIMITED TO IMPLIED WARRANTIES OF MERCHANTABILITY
+;;; OR FITNESS FOR ANY PARTICULAR PURPOSE.  IN NO EVENT SHALL THE
+;;; AUTHORS BE LIABLE FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES OF ANY
+;;; NATURE WHATSOEVER.
+
+;;; If you are reading this comment, then you do not have
+;;; the most recent version of 'match'.
+
+
+(module iu-exptime mzscheme
+  (provide syntax-error
+	   let-synvalues*
+	   with-values syntax-lambda
+	   with-temp with-temps)
+
+  (define (syntax-error e m)
+    (raise-syntax-error #f m e))
+
+  (define-syntax let-synvalues*
+    (syntax-rules ()
+      ((_ () B0 B ...) (begin B0 B ...))
+      ((_ (((Formal ...) Exp) Decl ...) B0 B ...)
+       (call-with-values (lambda () Exp)
+	 (lambda (Formal ...)
+	   (with-syntax ((Formal Formal) ...)
+	     (let-synvalues* (Decl ...) B0 B ...)))))))
+
+  (define-syntax with-values
+      (syntax-rules ()
+        ((_ P C) (call-with-values (lambda () P) C))))
+
+  (define-syntax syntax-lambda
+      (lambda (x)
+        (syntax-case x ()
+          ((_ (Pat ...) Body0 Body ...)
+           (with-syntax (((X ...) (generate-temporaries #'(Pat ...))))
+             #'(lambda (X ...)
+                 (with-syntax ((Pat X) ...)
+                   Body0 Body ...)))))))
+
+    (define-syntax with-temp
+      (syntax-rules ()
+        ((_ V Body0 Body ...)
+         (with-syntax (((V) (generate-temporaries '(x))))
+           Body0 Body ...))))
+    (define-syntax with-temps
+      (syntax-rules ()
+        ((_ (V ...) (Exp ...) Body0 Body ...)
+         (with-syntax (((V ...) (generate-temporaries #'(Exp ...))))
+           Body0 Body ...))))
+)
diff --git a/old_script/iu-match.ss b/old_script/iu-match.ss
new file mode 100644
index 0000000..c963cca
--- /dev/null
+++ b/old_script/iu-match.ss
@@ -0,0 +1,698 @@
+;;; match.ss
+
+;;; This program was originally designed and implemented by Dan
+;;; Friedman.  It was redesigned and implemented by Erik Hilsdale;
+;;; some improvements were suggested by Steve Ganz.  Additional
+;;; modifications were made by Kent Dybvig. Original port to PLT Scheme
+;;; by Matthew Flatt. As of 20020328, Matt Jadud is maintaining
+;;; the PLT Scheme port of this code.
+
+;;; Copyright (c) 1992-2002 Cadence Research Systems
+;;; Permission to copy this software, in whole or in part, to use this
+;;; software for any lawful purpose, and to redistribute this software
+;;; is granted subject to the restriction that all copies made of this
+;;; software must include this copyright notice in full.  This software
+;;; is provided AS IS, with NO WARRANTY, EITHER EXPRESS OR IMPLIED,
+;;; INCLUDING BUT NOT LIMITED TO IMPLIED WARRANTIES OF MERCHANTABILITY
+;;; OR FITNESS FOR ANY PARTICULAR PURPOSE.  IN NO EVENT SHALL THE
+;;; AUTHORS BE LIABLE FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES OF ANY
+;;; NATURE WHATSOEVER.
+
+;;; A complete CHANGELOG and some documentation can be found at the 
+;;; end of this file.
+
+;;; If you are reading this comment, then you do not have
+;;; the most recent version of 'match'.
+
+;; =============================================================
+
+;; Exp    ::= (match              Exp Clause)
+;;         || (trace-match        Exp Clause)
+;;         || (match+       (Id*) Exp Clause*)
+;;         || (trace-match+ (Id*) Exp Clause*)
+;;         || OtherSchemeExp
+
+;; Clause ::= (Pat Exp+) || (Pat (guard Exp*) Exp+)
+
+;; Pat    ::= (Pat ... . Pat)
+;;         || (Pat . Pat)
+;;         || ()
+;;         || #(Pat* Pat ... Pat*)
+;;         || #(Pat*)
+;;         || ,Id
+;;         || ,[Id*]
+;;         || ,[Cata -> Id*]
+;;         || Id
+
+;; Cata   ::= Exp
+
+;; YOU'RE NOT ALLOWED TO REFER TO CATA VARS IN GUARDS. (reasonable!)
+
+#cs
+(module iu-match mzscheme
+  (provide match+ trace-match+ match trace-match )
+  (require-for-syntax "iu-exptime.ss" (lib "stx.ss" "syntax"))
+  (require (lib "list.ss") (lib "etc.ss") )
+  ;; RRN: [2005.11.13]
+  (require "cheztrace.ss")
+  
+  (define (make-list n v)
+    (vector->list (make-vector n v)))
+
+
+  (define match-equality-test
+  (make-parameter
+   equal?
+   (lambda (x)
+     (unless (procedure? x)
+	     (error 'match-equality-test "~s is not a procedure" x))
+     x)))
+  
+  (define-syntax match+
+    (lambda (x)
+      (syntax-case x ()
+	[(_ (ThreadedId ...) Exp Clause ...)
+	 #'(let f ((ThreadedId ThreadedId) ... (x Exp))
+	     (match-help _ f x (ThreadedId ...) Clause ...))])))
+
+  (define-syntax match
+    (lambda (x)
+      (syntax-case x ()
+	[(_ Exp Clause ...)
+	 #'(let f ((x Exp))
+	     (match-help _ f x () Clause ...))])))
+
+  (define-syntax trace-match+
+    (lambda (x)
+      (syntax-case x ()
+	[(_ (ThreadedId ...) Name Exp Clause ...)
+	 #'(letrec ((f (trace-lambda Name (ThreadedId ... x)
+				     (match-help _ f x (ThreadedId ...) Clause ...))))
+	     (f ThreadedId ... x))])))
+
+  (define-syntax trace-match
+    (lambda (x)
+      (syntax-case x ()
+	[(_ Name Exp Clause ...)
+	 #'(letrec ((f (trace-lambda Name (x)
+				     (match-help _ f x () Clause ...))))
+	     (f Exp))])))
+
+;;; ------------------------------
+
+  (define-syntax let-values**
+    (syntax-rules ()
+      ((_ () B0 B ...) (begin B0 B ...))
+      ((_ ((Formals Exp) Rest ...) B0 B ...)
+       (let-values** (Rest ...) 
+		     (call-with-values (lambda () Exp)
+		       (lambda Formals B0 B ...))))))
+
+  (define-syntax match-help
+    (lambda (x)
+      (syntax-case x ()
+	((_ Template Cata Obj ThreadedIds)
+	 #'(error 'match "Unmatched datum: ~s" Obj))
+	((_ Template Cata Obj ThreadedIds (Pat B0 B ...) Rest ...)
+	 #'(convert-pat Pat
+			(match-help1 Template Cata Obj ThreadedIds 
+				     (B0 B ...)
+				     Rest ...))))))
+
+  (define-syntax match-help1
+    (syntax-rules (guard)
+      ((_ PatLit Vars Cdecls Template Cata Obj ThreadedIds
+	  ((guard G ...) B0 B ...) Rest ...)
+       (let ((ls/false (sexp-dispatch Obj PatLit)))
+	 (if (and ls/false (apply (lambda Vars
+				    (guard-body Cdecls
+						(extend-backquote Template (and G ...))))
+				  ls/false))
+	     (apply (lambda Vars
+		      (clause-body Cata Cdecls ThreadedIds
+				   (extend-backquote Template B0 B ...)))
+		    ls/false)
+	     (match-help Template Cata Obj ThreadedIds Rest ...))))
+      ((_ PatLit Vars Cdecls Template Cata Obj ThreadedIds
+	  (B0 B ...) Rest ...)
+       (let ((ls/false (sexp-dispatch Obj PatLit)))
+	 (if ls/false
+	     (apply (lambda Vars
+		      (clause-body Cata Cdecls ThreadedIds
+				   (extend-backquote Template B0 B ...)))
+		    ls/false)
+	     (match-help Template Cata Obj ThreadedIds Rest ...))))))
+
+  (define-syntax clause-body
+    (lambda (x)
+      (define build-mapper
+	(lambda (vars depth cata tIds)
+	  (if (zero? depth)
+	      cata
+	      (with-syntax ((rest (build-mapper vars (- depth 1) cata tIds))
+			    (vars vars)
+			    (tIds tIds))
+		#'(mapper rest vars tIds)))))
+      (syntax-case x ()
+	((_ Cata ((CVar CDepth CMyCata CFormal ...) ...) (ThreadedId ...) B)
+	 (with-syntax (((Mapper ...)
+			(map (lambda (mycata formals depth)
+			       (build-mapper formals
+					     (syntax-object->datum depth)
+					     (syntax-case mycata ()
+					       [#f #'Cata]
+					       [exp #'exp])
+					     #'(ThreadedId ...)))
+			     (syntax->list #'(CMyCata ...))
+			     (syntax->list #'((CFormal ...) ...))
+			     (syntax->list #'(CDepth ...))))
+		       (((ThreadedId ...) ...)
+			(map (lambda (x) #'(ThreadedId ...))
+			     (syntax->list #'((CFormal ...) ...)))))
+	   #'(let-values** (([ThreadedId ... CFormal ...]
+			     (Mapper ThreadedId ... CVar))
+			    ...)
+			   B))))))
+
+  (define-syntax guard-body
+    (lambda (x)
+      (syntax-case x ()
+	((_ ((Cvar Cdepth MyCata Cformal ...) ...) B)
+	 (with-syntax (((CF ...) (apply append (map syntax->list (syntax->list #'((Cformal ...) ...))))))
+	   #'(let-syntax
+		 ((CF
+		   (lambda (x)
+		     (syntax-case x ()
+		       (Name
+                        (syntax-error #'Name
+				      "guard cannot refer to return-value variable")))))
+		  ...)
+	       B))))))
+
+  (define-syntax convert-pat
+    ;; returns sexp-pat x vars x cdecls
+    (let ()
+      (define ellipsis?
+	(lambda (x)
+	  (and (identifier? x) (eq? (syntax-e x) '...))))
+      (define Var?
+	(lambda (x)
+	  (syntax-case x (->)
+	    [-> #f]
+	    [id (identifier? #'id)])))
+      (define (f syn vars cdecls depth)
+	(syntax-case syn (unquote)
+	  ((unquote . stuff) ; separate for better error detection
+	   (syntax-case syn (unquote ->)
+	     ((unquote [MyCata -> Var ...])
+	      (andmap Var? (syntax->list #'(Var ...)))
+	      (with-syntax (((Temp) (generate-temporaries '(x)))
+			    (Depth depth))
+		(values #'any
+			(cons #'Temp vars)
+			(cons #'[Temp Depth MyCata Var ...] cdecls))))
+	     ((unquote [Var ...])
+	      (andmap Var? (syntax->list #'(Var ...)))
+	      (with-syntax (((Temp) (generate-temporaries '(x)))
+			    (Depth depth))
+		(values #'any
+			(cons #'Temp vars)
+			(cons #'[Temp Depth #f Var ...] cdecls))))
+	     ((unquote Var)
+	      (Var? #'Var)
+	      (values #'any (cons #'Var vars) cdecls))))
+	  (((unquote . stuff) Dots) ; separate for better error detection
+	   (ellipsis? #'Dots)
+	   (syntax-case syn (unquote ->)
+	     (((unquote [MyCata -> Var ...]) Dots)
+	      (andmap Var? (syntax->list #'(Var ...)))
+	      (with-syntax (((Temp) (generate-temporaries '(x)))
+			    (Depth+1 (add1 depth)))
+		(values #'each-any
+			(cons #'Temp vars)
+			(cons #'[Temp Depth+1 MyCata Var ...] cdecls))))
+	     (((unquote [Var ...]) Dots)
+	      (andmap Var? (syntax->list #'(Var ...)))
+	      (with-syntax (((Temp) (generate-temporaries '(x)))
+			    (Depth+1 (add1 depth)))
+		(values #'each-any
+			(cons #'Temp vars)
+			(cons #'[Temp Depth+1 #f Var ...] cdecls))))
+	     (((unquote Var) Dots)
+	      (Var? #'Var)
+	      (values #'each-any (cons #'Var vars) cdecls))
+	     ((expr Dots) (syntax-error #'expr "match-pattern unquote syntax"))))
+	  ((Pat Dots)
+	   (ellipsis? #'Dots)
+	   (let-synvalues* (((Dpat Dvars Dcdecls)
+			     (f #'Pat vars cdecls (add1 depth))))
+;                           (printf "GOT VARS: ~s\n" vars)
+			   (with-syntax ((Size (- (length (syntax->list #'Dvars))
+						  ;; RRN: this is screwy:
+						  (length 
+						   (if (syntax? vars)
+						       (syntax->list vars)
+						       vars))
+						  )))
+			     ;;MCJ wrapped syntax->list around vars 
+			     (values #'#(each Dpat Size) #'Dvars #'Dcdecls))))
+	  ((Pat Dots . Rest)
+	   (ellipsis? #'Dots)
+	   (let-synvalues* (((Rpat Rvars Rcdecls)
+			     (f #'Rest vars cdecls depth))
+			    ((Dpat Dvars Dcdecls)
+			     (f #'(Pat (... ...)) #'Rvars #'Rcdecls
+				depth)))
+			   (with-syntax ((Size (- (length (syntax->list #'Dvars)) (length (syntax->list #'Rvars))))
+					 ((RevRestTl . RevRest) (reverseX (syntax->list #'Rpat) '())))
+			     (values #'#(tail-each Dpat Size RevRest RevRestTl)
+				     #'Dvars #'Dcdecls))))
+	  ((X . Y)
+	   (let-synvalues* (((Ypat Yvars Ycdecls)
+			     (f #'Y vars cdecls depth))
+			    ((Xpat Xvars Xcdecls)
+			     (f #'X #'Yvars #'Ycdecls depth)))
+			   (values #'(Xpat . Ypat) #'Xvars #'Xcdecls)))
+	  (() (values #'() vars cdecls))
+	  (#(X ...)
+	   (let-synvalues* (((Pat Vars CDecls) (f #'(X ...) vars cdecls depth)))
+			   (values #'#(vector Pat) #'Vars #'CDecls)))
+	  (Thing (values #'#(atom Thing) vars cdecls))))
+      (define reverseX
+	(lambda (ls acc)
+	  (if (pair? ls)
+	      (reverseX (cdr ls) (cons (car ls) acc))
+	      (cons ls acc))))
+      (lambda (syn) 
+	(syntax-case syn ()
+	  ((_ syn (kh . kt))
+	   (let-synvalues* (((a b c) (f #'syn '() '() 0)))
+			   #'(kh 'a b c . kt)))))))
+
+  (define-syntax mapper
+    (lambda (x)
+      (syntax-case x ()
+	((_ F (RetId ...) (ThreadId ...))
+	 (with-syntax (((t ...) (generate-temporaries #'(RetId ...)))
+		       ((ts ...) (generate-temporaries #'(RetId ...)))
+		       ((null ...) (map (lambda (x) #'()) (syntax->list #'(RetId ...)))))
+	   #'(let ((fun F))
+	       (rec g
+		    (lambda (ThreadId ... ls)
+		      (if (null? ls)
+			  (values ThreadId ... null ...)
+			  (call-with-values
+			      (lambda () (g ThreadId ... (cdr ls)))
+			    (lambda (ThreadId ... ts ...)
+			      (call-with-values
+				  (lambda () (fun ThreadId ... (car ls)))
+				(lambda (ThreadId ... t ...)
+				  (values ThreadId ... (cons t ts) ...))))))))))))))
+
+;;; ------------------------------
+
+  (define-syntax my-backquote
+    (lambda (x)
+      (define ellipsis?
+	(lambda (x)
+	  (and (identifier? x) (eq? (syntax-e x) '...))))
+      (define destruct
+	(lambda (Orig x depth)
+	  (syntax-case x (quasiquote unquote unquote-splicing)
+	    ;; inner quasiquote
+	    ((quasiquote Exp)
+	     (with-values (destruct Orig #'Exp (add1 depth))
+	       (syntax-lambda (Builder Vars Exps)
+			      (if (null? #'Vars)
+				  (values #''(quasiquote Exp) '() '())
+				  (values #'(list 'quasiquote Builder) #'Vars #'Exps)))))
+	    ;; unquote
+	    ((unquote Exp)
+	     (zero? depth)
+	     (with-temp X
+			(values #'X (list #'X) (list #'Exp))))
+	    ((unquote Exp)
+	     (with-values (destruct Orig #'Exp (sub1 depth))
+	       (syntax-lambda (Builder Vars Exps)
+			      (if (null? #'Vars)
+				  (values #''(unquote Exp) '() '())
+				  (values #'(list 'unquote Builder) #'Vars #'Exps)))))
+	    ;; splicing
+	    (((unquote-splicing Exp))
+	     (zero? depth)
+	     (with-temp X
+			(values #'X (list #'X) (list #'Exp))))
+	    (((unquote-splicing Exp ...))
+	     (zero? depth)
+	     (with-temps (X ...) (Exp ...)
+			 (values #'(append X ...) #'(X ...) #'(Exp ...))))
+	    (((unquote-splicing Exp ...) . Rest)
+	     (zero? depth)
+	     (with-values (destruct Orig #'Rest depth)
+	       (syntax-lambda (Builder Vars Exps)
+			      (with-temps (X ...) (Exp ...)
+					  (if (null? #'Vars)
+					      (values #'(append X ... 'Rest)
+						      #'(X ...) #'(Exp ...))
+					      (values #'(append X ... Builder)
+						      #'(X ... . Vars) #'(Exp ... . Exps)))))))
+	    ((unquote-splicing Exp ...)
+	     (with-values (destruct Orig #'(Exp ...) (sub1 depth))
+	       (syntax-lambda (Builder Vars Exps)
+			      (if (null? #'Vars)
+				  (values #''(unquote-splicing Exp ...) '() '())
+				  (values #'(cons 'unquote-splicing Builder)
+					  #'Vars #'Exps)))))
+	    ;; dots
+	    (((unquote Exp) Dots)
+	     (and (zero? depth) (ellipsis? #'Dots))
+	     (with-temp X
+			(values #'X (list #'X) (list #'Exp))))
+	    (((unquote Exp) Dots . Rest)
+	     (and (zero? depth) (ellipsis? #'Dots))
+	     (with-values (destruct Orig #'Rest depth)
+	       (syntax-lambda (RestBuilder RestVars RestExps)
+			      (with-syntax ((TailExp
+					     (if (stx-null? #'RestVars)
+						 #''Rest
+						 #'RestBuilder)))
+				(with-temp X
+					   (values #'(append X TailExp)
+						   (cons #'X #'RestVars)
+						   (cons #'Exp #'RestExps)))))))
+	    ((Exp Dots . Rest)
+	     (and (zero? depth) (ellipsis? #'Dots))
+	     (with-values (destruct Orig #'Exp depth)
+	       (syntax-lambda (ExpBuilder (ExpVar ...) (ExpExp ...))
+			      (if (null? #'(ExpVar ...))
+				  (syntax-error Orig "Bad ellipsis")
+				  (with-values (destruct Orig #'Rest depth)
+				    (syntax-lambda (RestBuilder RestVars RestExps)
+						   (with-syntax ((TailExp
+								  (if (null? #'RestVars)
+								      #''Rest
+								      #'RestBuilder))
+								 (Orig Orig))
+						     (values #'(let f ((ExpVar ExpVar) ...)
+								 (if (and (pair? ExpVar) ...)
+								     (cons
+								      (let ((ExpVar (car ExpVar)) ...)
+									ExpBuilder)
+								      (f (cdr ExpVar) ...))
+								     (if (and (null? ExpVar) ...)
+									 TailExp
+									 (error 'unquote
+										"Mismatched lists in ~s"
+										Orig))))
+							     (append (syntax->list #'(ExpVar ...)) (syntax->list #'RestVars))
+							     (append (syntax->list #'(ExpExp ...)) (syntax->list #'RestExps))))))))))
+	    ;; Vectors
+	    (#(X ...)
+	     (with-values (destruct Orig #'(X ...) depth)
+	       (syntax-lambda (LsBuilder LsVars LsExps)
+			      (values #'(list->vector LsBuilder) #'LsVars #'LsExps))))
+	    ;; random stuff
+	    ((Hd . Tl)
+	     (with-values (destruct Orig #'Hd depth)
+	       (syntax-lambda (HdBuilder HdVars HdExps)
+			      (with-values (destruct Orig #'Tl depth)
+				(syntax-lambda (TlBuilder TlVars TlExps)
+					       (with-syntax ((Hd (if (null? #'HdVars)
+								     #''Hd
+								     #'HdBuilder))
+							     (Tl (if (null? #'TlVars)
+								     #''Tl
+								     #'TlBuilder)))
+						 (values #'(cons Hd Tl)
+							 (append (syntax->list #'HdVars) (syntax->list #'TlVars))
+							 (append (syntax->list #'HdExps) (syntax->list #'TlExps)))))))))
+	    (OtherThing
+	     (values #''OtherThing '() '())))))
+      ;; macro begins
+      (syntax-case x ()
+	((_ Datum)
+	 (with-values (destruct #'(quasiquote Datum) #'Datum 0)
+	   (syntax-lambda (Builder (Var ...) (Exp ...))
+			  (if (null? #'(Var ...))
+			      #''Datum
+			      #'(let ((Var Exp) ...)
+				  Builder))))))))
+
+  (define-syntax extend-backquote
+    (lambda (x)
+      (syntax-case x ()
+	((_ Template Exp ...)
+	 (with-syntax ((quasiquote
+			(datum->syntax-object #'Template 'quasiquote)))
+	   #'(let-syntax ((quasiquote
+			   (lambda (x)
+			     (syntax-case x ()
+			       ((_ Foo) #'(my-backquote Foo))))))
+	       Exp ...))))))
+
+;;; ------------------------------
+
+  (define-syntax with-values
+    (syntax-rules ()
+      ((_ P C) (call-with-values (lambda () P) C))))
+
+  (define-syntax letcc
+    (syntax-rules ()
+      ((_ V B0 B ...) (call/ec (lambda (V) B0 B ...)))))
+
+  (define classify-list
+    (lambda (ls)
+      (cond
+       ((null? ls) 'proper)
+       ((not (pair? ls)) 'improper)
+       (else
+        (let f ((tortoise ls) (hare (cdr ls)))
+          (cond
+	   ((eq? tortoise hare) 'infinite)
+	   ((null? hare) 'proper)
+	   ((not (pair? hare)) 'improper)
+	   (else
+	    (let ((hare (cdr hare)))
+	      (cond
+	       ((null? hare) 'proper)
+	       ((not (pair? hare)) 'improper)
+	       (else (f (cdr ls) (cdr hare))))))))))))
+
+  (define ilist-copy-flat
+    (lambda (ils)
+      (let f ((tortoise ils) (hare (cdr ils)))
+	(if (eq? tortoise hare)
+	    (list (car tortoise))
+	    (cons (car tortoise) (f (cdr tortoise) (cddr hare)))))))
+
+  (define sexp-dispatch
+    (lambda (obj pat);; #f or list of vars
+      (letcc escape
+	     (let ((fail (lambda () (escape #f))))
+	       (let f ((pat pat) (obj obj) (vals '()))
+		 (cond
+		  ((eq? pat 'any)
+		   (cons obj vals))
+		  ((eq? pat 'each-any)
+		   ;; handle infinities
+		   (case (classify-list obj)
+		     ((proper infinite) (cons obj vals))
+		     ((improper) (fail))))
+		  ((pair? pat)
+		   (if (pair? obj)
+		       (f (car pat) (car obj) (f (cdr pat) (cdr obj) vals))
+		       (fail)))
+		  ((vector? pat)
+		   (case (vector-ref pat 0)
+		     ((atom)
+		      (let ((a (vector-ref pat 1)))
+			(if (eqv? obj a)
+			    vals
+			    (fail))))
+		     ((vector)
+		      (if (vector? obj)
+			  (let ((vec-pat (vector-ref pat 1)))
+			    (f vec-pat (vector->list obj) vals))
+			  (fail)))
+		     ((each)
+		      ;; if infinite, copy the list as flat, then do the matching,
+		      ;; then do some set-cdrs. 
+		      (let ((each-pat (vector-ref pat 1))
+			    (each-size (vector-ref pat 2)))
+			(case (classify-list obj)
+			  ((improper) (fail))
+			  ((infinite)
+			   (error 'iu-match "not handling infinite lists at the moment...")
+			   #;
+			   (let ((each-vals (f pat (ilist-copy-flat obj) '())))
+			     (for-each (lambda (x) (set-cdr! (last-pair x) x))
+				       each-vals)
+			     (append each-vals vals)))
+			  ((proper)
+			   (append
+			    (let g ((obj obj))
+			      (if (null? obj)
+				  (make-list each-size '())
+				  (let ((hd-vals (f each-pat (car obj) '()))
+					(tl-vals (g (cdr obj))))
+				    (map cons hd-vals tl-vals))))
+			    vals)))))
+		     ((tail-each)
+		      (let ((each-pat (vector-ref pat 1))
+			    (each-size (vector-ref pat 2))
+			    (revtail-pat (vector-ref pat 3))
+			    (revtail-tail-pat (vector-ref pat 4)))
+			(when (eq? (classify-list obj) 'infinite) (fail))
+			(with-values
+			    (let g ((obj obj))
+			      ;; in-tail?, vals, revtail-left/ls
+			      (cond
+			       ((pair? obj)
+				(with-values (g (cdr obj))
+				  (lambda (in-tail? vals tail-left/ls)
+				    (if in-tail?
+					(if (null? tail-left/ls)
+					    (values #f vals (list (car obj)))
+					    (values #t (f (car tail-left/ls)
+							  (car obj)
+							  vals)
+						    (cdr tail-left/ls)))
+					(values #f vals
+						(cons (car obj) tail-left/ls))))))
+			       (else
+				(values #t
+					(f revtail-tail-pat obj '())
+					revtail-pat))))
+			  (lambda (in-tail? vals tail-left/ls)
+			    (if in-tail?
+				(if (null? tail-left/ls)
+				    (append (make-list each-size '())
+					    vals)
+				    (fail))
+				(f each-pat tail-left/ls vals))))))))
+		  (else
+		   (if (eqv? obj pat)
+		       vals
+		       (fail)))))))))
+  )
+
+
+
+;;;CHANGELOG
+
+;; [04 Nov 2005]
+;; rrn fixed a bug in the code for handling ellipses.  
+;; (was (length (syntax->list vars)) rather than (length vars))
+
+;; [13 March 2002]
+;; rkd added following change by Friedman and Ganz to the main source
+;; code thread and fixed a couple of minor problems.
+
+;; [9 March 2002]
+;; Dan Friedman and Steve Ganz added the ability to use identical pattern
+;; variables.  The patterns represented by the variables are compared
+;; using the value of the parameter match-equality-test, which defaults
+;; to equal?.
+;;
+;; > (match '(1 2 1 2 1)
+;;     [(,a ,b ,a ,b ,a) (guard (number? a) (number? b)) (+ a b)])
+;; 3
+;; ;;
+;; > (match '((1 2 3) 5 (1 2 3))
+;;     [((,a ...) ,b (,a ...)) `(,a ... ,b)])
+;; (1 2 3 5)
+;; ;;
+;; > (parameterize ([match-equality-test (lambda (x y) (equal? x (reverse y)))])
+;;     (match '((1 2 3) (3 2 1))   
+;;       [(,a ,a) 'yes]
+;;       [,oops 'no]))
+;; yes
+
+;; [10 Jan 2002]
+;; eh fixed bug that caused (match '((1 2 3 4)) (((,a ... ,d) . ,x) a)) to
+;; blow up.  The bug was caused by a bug in the sexp-dispatch procedure
+;; where a base value empty list was passed to an accumulator from inside
+;; the recursion, instead of passing the old value of the accumulator.
+
+;; [14 Jan 2001]
+;; rkd added syntax checks to unquote pattern parsing to weed out invalid
+;; patterns like ,#(a) and ,[(vector-ref d 1)].
+
+;; [14 Jan 2001]
+;; rkd added ,[Cata -> Id* ...] to allow specification of recursion
+;; function.  ,[Id* ...] recurs to match; ,[Cata -> Id* ...] recurs
+;; to Cata.
+
+;; [14 Jan 2001]
+;; rkd tightened up checks for ellipses and nested quasiquote; was comparing
+;; symbolic names, which, as had been noted in the source, is a possible
+;; hygiene bug.  Replaced error call in guard-body with syntax-error to
+;; allow error to include source line/character information.
+
+;; [13 Jan 2001]
+;; rkd fixed match patterns of the form (stuff* ,[x] ... stuff+), which
+;; had been recurring on subforms of each item rather than on the items
+;; themselves.
+
+;; [29 Feb 2000]
+;; Fixed a case sensitivity bug.
+
+;; [24 Feb 2000]
+;; Matcher now handles vector patterns.  Quasiquote also handles
+;; vector patterns, but does NOT do the csv6.2 optimization of
+;; `#(a 1 ,(+ 3 4) x y) ==> (vector 'a 1 (+ 3 4) 'x 'y).
+;; Also fixed bug in (P ... . P) matching code. 
+
+;; [23 Feb 2000]
+;; KSM fixed bug in unquote-splicing inside quasiquote.
+
+;; [10 Feb 2000]
+;; New forms match+ and trace-match+ thread arguments right-to-left.
+;; The pattern (P ... . P) now works the way you might expect.
+;; Infinite lists are now properly matched (and not matched).
+;; Removed the @ pattern.
+;; Internal: No longer converting into syntax-case. 
+
+;; [6 Feb 2000]
+;; Added expansion-time error message for referring to cata variable
+;; in a guard.
+
+;; [4 Feb 2000]
+;; Fixed backquote so it can handle nested backquote (oops).
+;; Double-backquoted elipses are neutralized just as double-backquoted
+;; unquotes are.  So:
+;;   `(a ,'(1 2 3) ... b)    =eval=> (a 1 2 3 b)
+;;   ``(a ,'(1 2 3) ... b)   =eval=> `(a ,'(1 2 3) ... b)
+;;   ``(a ,(,(1 2 3) ...) b) =eval=> `(a ,(1 2 3) b)
+;; Added support for
+;;   `((unquote-splicing x y z) b) =expand==> (append x y z (list 'b))
+
+;; [1 Feb 2000]
+;; Fixed a bug involving forgetting to quote stuff in the revised backquote.
+;; Recognized unquote-splicing and signalled errors in the appropriate places.
+;; Added support for deep elipses in backquote.
+;; Rewrote backquote so it does the rebuilding directly instead of
+;; expanding into Chez's backquote. 
+
+;; [31 Jan 2000]
+;; Kent Dybvig fixed template bug.
+
+;; [31 Jan 2000]
+;; Added the trace-match form, and made guards contain
+;; an explicit and expression:
+;;    (guard E ...) ==> (guard (and E ...))
+
+;; [26 Jan 2000]
+;; Inside the clauses of match expressions, the following
+;; transformation is performed inside backquote expressions:
+;;    ,v ...      ==> , at v
+;;    (,v ,w) ... ==> ,@(map list v w)
+;;    etc.
+
+;(require iu-match)
+
+;; RRN: DEBUGGING
+;(define (f)
+;  (match '((1 2) (3 4))
+;    [((,x ,y) ...) y]))
+;(f)
\ No newline at end of file
diff --git a/old_script/match.ss b/old_script/match.ss
new file mode 100644
index 0000000..a215765
--- /dev/null
+++ b/old_script/match.ss
@@ -0,0 +1,800 @@
+;;; match.ss
+
+;;; Time-stamp: Sun Jan 14 09:04:21 EST 2001 rkd
+
+;; [14 Jan 2001]
+;; rkd added syntax checks to unquote pattern parsing to weed out invalid
+;; patterns like ,#(a) and ,[(vector-ref d 1)].
+
+;; [14 Jan 2001]
+;; rkd added ,[Cata -> Id* ...] to allow specification of recursion
+;; function.  ,[Id* ...] recurs to match; ,[Cata -> Id* ...] recurs
+;; to Cata.
+
+;; [14 Jan 2001]
+;; rkd tightened up checks for ellipses and nested quasiquote; was comparing
+;; symbolic names, which, as had been noted in the source, is a possible
+;; hygiene bug.  Replaced error call in guard-body with syntax-error to
+;; allow error to include source line/character information.
+
+;; [13 Jan 2001]
+;; rkd fixed match patterns of the form (stuff* ,[x] ... stuff+), which
+;; had been recurring on subforms of each item rather than on the items
+;; themselves.
+
+;; Previous changelog listings at end of file.
+
+;; =============================================================
+
+;; Exp    ::= (match              Exp Clause)
+;;         || (trace-match        Exp Clause)
+;;         || (match+       (Id*) Exp Clause*)
+;;         || (trace-match+ (Id*) Exp Clause*)
+;;         || OtherSchemeExp
+
+;; Clause ::= (Pat Exp+) || (Pat (guard Exp*) Exp+)
+
+;; Pat    ::= (Pat ... . Pat)
+;;         || (Pat . Pat)
+;;         || ()
+;;         || #(Pat* Pat ... Pat*)
+;;         || #(Pat*)
+;;         || ,Id
+;;         || ,[Id*]
+;;         || ,[Cata -> Id*]
+;;         || Id
+
+;; Cata   ::= Exp
+
+;; YOU'RE NOT ALLOWED TO REFER TO CATA VARS IN GUARDS. (reasonable!)
+
+(module iu-match 
+       ((match+ match-help match-help1 clause-body let-values**
+          guard-body convert-pat mapper my-backquote extend-backquote
+          sexp-dispatch)
+        (trace-match+ match-help match-help1 clause-body let-values**
+          guard-body convert-pat mapper my-backquote extend-backquote
+          sexp-dispatch)
+        (match match-help match-help1 clause-body let-values**
+          guard-body convert-pat mapper my-backquote extend-backquote
+          sexp-dispatch)
+        (trace-match match-help match-help1 clause-body let-values**
+          guard-body convert-pat mapper my-backquote extend-backquote
+          sexp-dispatch)
+
+	;; TEMP 
+	clause-body extend-backquote
+	)
+
+;(import scheme)
+
+(define-syntax match+
+  (lambda (x)
+    (syntax-case x ()
+      [(_ (ThreadedId ...) Exp Clause ...)
+       #'(let f ((ThreadedId ThreadedId) ... (x Exp))
+           (match-help _ f x (ThreadedId ...) Clause ...))])))
+
+(define-syntax match
+  (lambda (x)
+    (syntax-case x ()
+      [(_ Exp Clause ...)
+       #'(let f ((x Exp))
+           (match-help _ f x () Clause ...))])))
+
+(define-syntax trace-match+
+  (lambda (x)
+    (syntax-case x ()
+      [(_ (ThreadedId ...) Name Exp Clause ...)
+       #'(letrec ((f (trace-lambda Name (ThreadedId ... x)
+                       (match-help _ f x (ThreadedId ...) Clause ...))))
+           (f ThreadedId ... x))])))
+
+(define-syntax trace-match
+  (lambda (x)
+    (syntax-case x ()
+      [(_ Name Exp Clause ...)
+       #'(letrec ((f (trace-lambda Name (x)
+                       (match-help _ f x () Clause ...))))
+           (f Exp))])))
+
+;;; ------------------------------
+
+(define-syntax let-values**
+  (syntax-rules ()
+    ((_ () B0 B ...) (begin B0 B ...))
+    ((_ ((Formals Exp) Rest ...) B0 B ...)
+     (let-values** (Rest ...) 
+       (call-with-values (lambda () Exp)
+         (lambda Formals B0 B ...))))))
+
+(define-syntax match-help
+  (lambda (x)
+    (syntax-case x ()
+      ((_ Template Cata Obj ThreadedIds)
+       ;(inspect `(HRM ,(datum Template) ,(datum Cata) ,(datum Obj) ,(datum ThreadedIds)))
+       ;(inspect #'Template)
+       #'(error 'match "Unmatched datum.\n  Datum: ~s\n  Source-Location: ~s\n" Obj #'Template))
+
+      ((_ Template Cata Obj ThreadedIds (Pat B0 B ...) Rest ...)
+       #'(convert-pat Pat
+           (match-help1 Template Cata Obj ThreadedIds 
+             (B0 B ...)
+             Rest ...)))
+
+      )))
+
+(define-syntax match-help1
+  (syntax-rules (guard)
+    ((_ PatLit Vars Cdecls Template Cata Obj ThreadedIds
+       ((guard G ...) B0 B ...) Rest ...)
+     (let ((ls/false (sexp-dispatch Obj PatLit)))
+       (if (and ls/false (apply (lambda Vars
+                                  (guard-body Cdecls
+                                    (extend-backquote Template (and G ...))))
+                           ls/false))
+           (apply (lambda Vars
+                    (clause-body Cata Cdecls ThreadedIds
+                      (extend-backquote Template B0 B ...)))
+             ls/false)
+           (match-help Template Cata Obj ThreadedIds Rest ...))))
+    ((_ PatLit Vars Cdecls Template Cata Obj ThreadedIds
+       (B0 B ...) Rest ...)
+     (let ((ls/false (sexp-dispatch Obj PatLit)))
+       (if ls/false
+           (apply (lambda Vars
+                    (clause-body Cata Cdecls ThreadedIds
+                      (extend-backquote Template B0 B ...)))
+             ls/false)
+           (match-help Template Cata Obj ThreadedIds Rest ...))))))
+
+;((clause-body f () () (extend-backquote _ (void))))
+(define-syntax clause-body
+  (lambda (x)
+    (define build-mapper
+      (lambda (vars depth cata tIds)
+        (if (zero? depth)
+            cata
+            (with-syntax ((rest (build-mapper vars (- depth 1) cata tIds))
+                          (vars vars)
+                          (tIds tIds))
+              #'(mapper rest vars tIds)))))
+    (syntax-case x ()
+      ((_ Cata ((CVar CDepth CMyCata CFormal ...) ...) (ThreadedId ...) B)
+       ;;; RRN: Hacking this a bit for larceny compat:
+       (with-syntax ((((TIDs ...) ...)
+		      (datum->syntax-object #'_
+		       (map (lambda _ #'(ThreadedId ...))
+			 (syntax-object->datum #'((CFormal ...) ...))))))
+	 (with-syntax (((Mapper ...)
+			(map (lambda (mycata formals depth)
+			       (build-mapper formals
+					     (syntax-object->datum depth)
+					     (syntax-case mycata ()
+							  [#f #'Cata]
+							  [exp #'exp])
+					     #'(ThreadedId ...)))
+			  #'(CMyCata ...)
+			  #'((CFormal ...) ...)
+			  #'(CDepth ...))))
+	   ;; [2007.12.26] RRN: Removing ellipses from references to ThreadedId here:
+	   ;; This appears to have been a bug.  Introducing TIDs instead.	
+	   #'(let-values** (([TIDs ... CFormal ...]
+			     (Mapper TIDs ... CVar))
+			    ...)
+			   B)
+
+	   )
+	 )))))
+
+(define-syntax guard-body
+  (lambda (x)
+    (syntax-case x ()
+      ((_ ((Cvar Cdepth MyCata Cformal ...) ...) B)
+       (with-syntax (((CF ...) (apply append #'((Cformal ...) ...))))
+         #'(let-syntax
+               ((CF
+                  (lambda (x)
+                    (syntax-case x ()
+                      (Name
+                        (syntax-error #'Name
+                          "guard cannot refer to return-value variable")))))
+                ...)
+             B))))))
+
+(define-syntax convert-pat
+  ;; returns sexp-pat x vars x cdecls
+  (let ()
+    (define ellipsis?
+      (lambda (x)
+        (and (identifier? x) (literal-identifier=? x #'(... ...)))))
+    (define Var?
+      (lambda (x)
+        (syntax-case x (->)
+          [-> #f]
+          [id (identifier? #'id)])))
+    (define (f syn vars cdecls depth)
+      (syntax-case syn (unquote)
+        ((unquote . stuff) ; separate for better error detection
+         (syntax-case syn (unquote ->)
+           ((unquote [MyCata -> Var ...])
+            (andmap Var? #'(Var ...))
+            (with-syntax (((Temp) (generate-temporaries '(x)))
+                          (Depth depth))
+              (values #'any
+                      (cons #'Temp vars)
+                      (cons #'[Temp Depth MyCata Var ...] cdecls))))
+           ((unquote [Var ...])
+            (andmap Var? #'(Var ...))
+            (with-syntax (((Temp) (generate-temporaries '(x)))
+                          (Depth depth))
+              (values #'any
+                      (cons #'Temp vars)
+                      (cons #'[Temp Depth #f Var ...] cdecls))))
+           ((unquote Var)
+            (Var? #'Var)
+            (values #'any (cons #'Var vars) cdecls))))
+        (((unquote . stuff) Dots) ; separate for better error detection
+         (ellipsis? #'Dots)
+         (syntax-case syn (unquote ->)
+           (((unquote [MyCata -> Var ...]) Dots)
+            (andmap Var? #'(Var ...))
+            (with-syntax (((Temp) (generate-temporaries '(x)))
+                          (Depth+1 (+ 1 depth)))
+              (values #'each-any
+                      (cons #'Temp vars)
+                      (cons #'[Temp Depth+1 MyCata Var ...] cdecls))))
+           (((unquote [Var ...]) Dots)
+            (andmap Var? #'(Var ...))
+            (with-syntax (((Temp) (generate-temporaries '(x)))
+                          (Depth+1 (+ 1 depth)))
+              (values #'each-any
+                      (cons #'Temp vars)
+                      (cons #'[Temp Depth+1 #f Var ...] cdecls))))
+           (((unquote Var) Dots)
+            (Var? #'Var)
+            (values #'each-any (cons #'Var vars) cdecls))
+           ((expr Dots) (syntax-error #'expr "match-pattern unquote syntax"))))
+        ((Pat Dots)
+         (ellipsis? #'Dots)
+         (let-synvalues* (((Dpat Dvars Dcdecls)
+                           (f #'Pat vars cdecls (+ 1 depth))))
+           (with-syntax ((Size (- (length #'Dvars) (length vars))))
+             (values #'#(each Dpat Size) #'Dvars #'Dcdecls))))
+        ((Pat Dots . Rest)
+         (ellipsis? #'Dots)
+         (let-synvalues* (((Rpat Rvars Rcdecls)
+                           (f #'Rest vars cdecls depth))
+                          ((Dpat Dvars Dcdecls)
+                           (f #'(Pat (... ...)) #'Rvars #'Rcdecls
+			     depth)))
+           (with-syntax ((Size (- (length #'Dvars) (length #'Rvars)))
+                         ((RevRestTl . RevRest) (reverseX #'Rpat '())))
+             (values #'#(tail-each Dpat Size RevRest RevRestTl)
+                     #'Dvars #'Dcdecls))))
+        ((X . Y)
+         (let-synvalues* (((Ypat Yvars Ycdecls)
+                           (f #'Y vars cdecls depth))
+                          ((Xpat Xvars Xcdecls)
+                           (f #'X #'Yvars #'Ycdecls depth)))
+           (values #'(Xpat . Ypat) #'Xvars #'Xcdecls)))
+        (() (values #'() vars cdecls))
+	(#(X ...)
+	 (let-synvalues* (((Pat Vars CDecls) (f #'(X ...) vars cdecls depth)))
+	   (values #'#(vector Pat) #'Vars #'CDecls)))
+        (Thing (values #'#(atom Thing) vars cdecls))))
+    (define reverseX
+      (lambda (ls acc)
+        (if (pair? ls)
+            (reverseX (cdr ls) (cons (car ls) acc))
+            (cons ls acc))))
+    (define-syntax let-synvalues*
+      (syntax-rules ()
+        ((_ () B0 B ...) (begin B0 B ...))
+        ((_ (((Formal ...) Exp) Decl ...) B0 B ...)
+         (call-with-values (lambda () Exp)
+           (lambda (Formal ...)
+             (with-syntax ((Formal Formal) ...)
+               (let-synvalues* (Decl ...) B0 B ...)))))))
+    (lambda (syn) 
+      (syntax-case syn ()
+        ((_ syn (kh . kt))
+         (let-synvalues* (((a b c) (f #'syn '() '() 0)))
+           #'(kh 'a b c . kt)))))))
+
+(define-syntax mapper
+  (lambda (x)
+    (syntax-case x ()
+      ((_ F (RetId ...) (ThreadId ...))
+       (with-syntax (((t ...) (generate-temporaries #'(RetId ...)))
+                     ((ts ...) (generate-temporaries #'(RetId ...)))
+                     ((null ...) (map (lambda (x) #'()) #'(RetId ...))))
+         #'(let ((fun F))
+             (rec g
+               (lambda (ThreadId ... ls)
+                 (if (null? ls)
+                     (values ThreadId ... null ...)
+                     (call-with-values
+                         (lambda () (g ThreadId ... (cdr ls)))
+                       (lambda (ThreadId ... ts ...)
+                         (call-with-values
+                             (lambda () 
+			       (fun ThreadId ... 
+				    (car ls)))
+                           (lambda (ThreadId ... t ...)
+                             (values ThreadId ... (cons t ts) ...))))))))))))))
+
+;;; ------------------------------
+
+(define-syntax my-backquote
+  (lambda (x)
+    (define ellipsis?
+      (lambda (x)
+        (and (identifier? x) (literal-identifier=? x #'(... ...)))))
+    (define-syntax with-values
+      (syntax-rules ()
+        ((_ P C) (call-with-values (lambda () P) C))))
+    (define-syntax syntax-lambda
+      (lambda (x)
+        (syntax-case x ()
+          ((_ (Pat ...) Body0 Body ...)
+           (with-syntax (((X ...) (generate-temporaries #'(Pat ...))))
+             #'(lambda (X ...)
+                 (with-syntax ((Pat X) ...)
+                   Body0 Body ...)))))))
+    (define-syntax with-temp
+      (syntax-rules ()
+        ((_ V Body0 Body ...)
+         (with-syntax (((V) (generate-temporaries '(x))))
+           Body0 Body ...))))
+    (define-syntax with-temps
+      (syntax-rules ()
+        ((_ (V ...) (Exp ...) Body0 Body ...)
+         (with-syntax (((V ...) (generate-temporaries #'(Exp ...))))
+           Body0 Body ...))))
+    (define destruct
+      (lambda (Orig x depth)
+        (syntax-case x (quasiquote unquote unquote-splicing)
+          ;; inner quasiquote
+          ((quasiquote Exp)
+           (with-values (destruct Orig #'Exp (+ 1 depth))
+             (syntax-lambda (Builder Vars Exps)
+               (if (null? #'Vars)
+                   (values #''(quasiquote Exp) '() '())
+                   (values #'(list 'quasiquote Builder) #'Vars #'Exps)))))
+          ;; unquote
+          ((unquote Exp)
+           (zero? depth)
+           (with-temp X
+             (values #'X (list #'X) (list #'Exp))))
+          ((unquote Exp)
+           (with-values (destruct Orig #'Exp (+ -1 depth))
+             (syntax-lambda (Builder Vars Exps)
+               (if (null? #'Vars)
+                   (values #''(unquote Exp) '() '())
+                   (values #'(list 'unquote Builder) #'Vars #'Exps)))))
+          ;; splicing
+          (((unquote-splicing Exp))
+           (zero? depth)
+           (with-temp X
+             (values #'X (list #'X) (list #'Exp))))
+          (((unquote-splicing Exp ...))
+           (zero? depth)
+           (with-temps (X ...) (Exp ...)
+             (values #'(append X ...) #'(X ...) #'(Exp ...))))
+          (((unquote-splicing Exp ...) . Rest)
+           (zero? depth)
+           (with-values (destruct Orig #'Rest depth)
+             (syntax-lambda (Builder Vars Exps)
+               (with-temps (X ...) (Exp ...)
+                 (if (null? #'Vars)
+                     (values #'(append X ... 'Rest)
+                             #'(X ...) #'(Exp ...))
+                     (values #'(append X ... Builder)
+                             #'(X ... . Vars) #'(Exp ... . Exps)))))))
+          ((unquote-splicing Exp ...)
+           (with-values (destruct Orig #'(Exp ...) (+ -1 depth))
+             (syntax-lambda (Builder Vars Exps)
+               (if (null? #'Vars)
+                   (values #''(unquote-splicing Exp ...) '() '())
+                   (values #'(cons 'unquote-splicing Builder)
+                           #'Vars #'Exps)))))
+          ;; dots
+          (((unquote Exp) Dots)
+           (and (zero? depth) (ellipsis? #'Dots))
+           (with-temp X
+             (values #'X (list #'X) (list #'Exp))))
+          (((unquote Exp) Dots . Rest)
+           (and (zero? depth) (ellipsis? #'Dots))
+           (with-values (destruct Orig #'Rest depth)
+             (syntax-lambda (RestBuilder RestVars RestExps)
+               (with-syntax ((TailExp
+                               (if (null? #'RestVars)
+                                   #''Rest
+                                   #'RestBuilder)))
+                 (with-temp X
+                   (values #'(append X TailExp)
+                           (cons #'X #'RestVars)
+                           (cons #'Exp #'RestExps)))))))
+          ((Exp Dots . Rest)
+           (and (zero? depth) (ellipsis? #'Dots))
+           (with-values (destruct Orig #'Exp depth)
+             (syntax-lambda (ExpBuilder (ExpVar ...) (ExpExp ...))
+               (if (null? #'(ExpVar ...))
+                   (syntax-error Orig "Bad ellipsis")
+                   (with-values (destruct Orig #'Rest depth)
+                     (syntax-lambda (RestBuilder RestVars RestExps)
+                       (with-syntax ((TailExp
+                                       (if (null? #'RestVars)
+                                           #''Rest
+                                           #'RestBuilder))
+                                     (Orig Orig))
+                         (values #'(let f ((ExpVar ExpVar) ...)
+                                     (if (and (pair? ExpVar) ...)
+                                         (cons
+                                           (let ((ExpVar (car ExpVar)) ...)
+                                             ExpBuilder)
+                                           (f (cdr ExpVar) ...))
+                                         (if (and (null? ExpVar) ...)
+                                             TailExp
+                                             (error 'unquote
+                                               "Mismatched lists in ~s"
+                                               Orig))))
+                                 (append #'(ExpVar ...) #'RestVars)
+                                 (append #'(ExpExp ...) #'RestExps)))))))))
+	  ;; Vectors
+	  (#(X ...)
+	   (with-values (destruct Orig #'(X ...) depth)
+	     (syntax-lambda (LsBuilder LsVars LsExps)
+	       (values #'(list->vector LsBuilder) #'LsVars #'LsExps))))
+          ;; random stuff
+          ((Hd . Tl)
+           (with-values (destruct Orig #'Hd depth)
+             (syntax-lambda (HdBuilder HdVars HdExps)
+               (with-values (destruct Orig #'Tl depth)
+                 (syntax-lambda (TlBuilder TlVars TlExps)
+                   (with-syntax ((Hd (if (null? #'HdVars)
+                                         #''Hd
+                                         #'HdBuilder))
+                                 (Tl (if (null? #'TlVars)
+                                         #''Tl
+                                         #'TlBuilder)))
+                     (values #'(cons Hd Tl)
+                             (append #'HdVars #'TlVars)
+                             (append #'HdExps #'TlExps))))))))
+          (OtherThing
+            (values #''OtherThing '() '())))))
+    ;; macro begins
+    (syntax-case x ()
+      ((_ Datum)
+       (with-values (destruct #'(quasiquote Datum) #'Datum 0)
+         (syntax-lambda (Builder (Var ...) (Exp ...))
+           (if (null? #'(Var ...))
+               #''Datum
+               #'(let ((Var Exp) ...)
+                   Builder))))))))
+
+(define-syntax extend-backquote
+  (lambda (x)
+    (syntax-case x ()
+      ((_ Template Exp ...)
+       (with-syntax ((quasiquote
+                       (datum->syntax-object #'Template 'quasiquote)))
+         #'(let-syntax ((quasiquote
+                          (lambda (x)
+                            (syntax-case x ()
+                              ((_ Foo) #'(my-backquote Foo))))))
+             Exp ...))))))
+
+;;; ------------------------------
+
+(define-syntax with-values
+  (syntax-rules ()
+    ((_ P C) (call-with-values (lambda () P) C))))
+
+(define-syntax letcc
+  (syntax-rules ()
+    ((_ V B0 B ...) (call/1cc (lambda (V) B0 B ...)))))
+
+(define classify-list
+  (lambda (ls)
+    (cond
+      ((null? ls) 'proper)
+      ((not (pair? ls)) 'improper)
+      (else
+        (let f ((tortoise ls) (hare (cdr ls)))
+          (cond
+            ((eq? tortoise hare) 'infinite)
+            ((null? hare) 'proper)
+            ((not (pair? hare)) 'improper)
+            (else
+              (let ((hare (cdr hare)))
+                (cond
+                  ((null? hare) 'proper)
+                  ((not (pair? hare)) 'improper)
+                  (else (f (cdr ls) (cdr hare))))))))))))
+
+(define ilist-copy-flat
+  (lambda (ils)
+    (let f ((tortoise ils) (hare (cdr ils)))
+      (if (eq? tortoise hare)
+          (list (car tortoise))
+          (cons (car tortoise) (f (cdr tortoise) (cddr hare)))))))
+
+(define sexp-dispatch
+  (lambda (obj pat);; #f or list of vars
+    (letcc escape
+      (let ((fail (lambda () (escape #f))))
+        (let f ((pat pat) (obj obj) (vals '()))
+          (cond
+            ((eq? pat 'any)
+             (cons obj vals))
+            ((eq? pat 'each-any)
+             ;; handle infinities
+             (case (classify-list obj)
+               ((proper infinite) (cons obj vals))
+               ((improper) (fail))))
+            ((pair? pat)
+             (if (pair? obj)
+                 (f (car pat) (car obj) (f (cdr pat) (cdr obj) vals))
+                 (fail)))
+            ((vector? pat)
+             (case (vector-ref pat 0)
+               ((atom)
+                (let ((a (vector-ref pat 1)))
+                  (if (eqv? obj a)
+                      vals
+                      (fail))))
+	       ((vector)
+		(if (vector? obj)
+		    (let ((vec-pat (vector-ref pat 1)))
+		      (f vec-pat (vector->list obj) vals))
+		    (fail)))
+               ((each)
+                ;; if infinite, copy the list as flat, then do the matching,
+                ;; then do some set-cdrs. 
+                (let ((each-pat (vector-ref pat 1))
+                      (each-size (vector-ref pat 2)))
+                  (case (classify-list obj)
+                    ((improper) (fail))
+                    ((infinite)
+                     (let ((each-vals (f pat (ilist-copy-flat obj) '())))
+                       (for-each (lambda (x) (set-cdr! (last-pair x) x))
+                         each-vals)
+                       (append each-vals vals)))
+                    ((proper)
+                     (append
+                       (let g ((obj obj))
+                         (if (null? obj)
+                             (make-list each-size '())
+                             (let ((hd-vals (f each-pat (car obj) '()))
+                                   (tl-vals (g (cdr obj))))
+                               (map cons hd-vals tl-vals))))
+                       vals)))))
+               ((tail-each)
+                (let ((each-pat (vector-ref pat 1))
+                      (each-size (vector-ref pat 2))
+                      (revtail-pat (vector-ref pat 3))
+                      (revtail-tail-pat (vector-ref pat 4)))
+                  (when (eq? (classify-list obj) 'infinite) (fail))
+                  (with-values
+                      (let g ((obj obj))
+                        ;; in-tail?, vals, revtail-left/ls
+                        (cond
+                          ((pair? obj)
+                           (with-values (g (cdr obj))
+                             (lambda (in-tail? vals tail-left/ls)
+                               (if in-tail?
+                                   (if (null? tail-left/ls)
+                                       (values #f vals (list (car obj)))
+                                       (values #t (f (car tail-left/ls)
+                                                    (car obj)
+                                                    vals)
+                                               (cdr tail-left/ls)))
+                                   (values #f vals
+                                           (cons (car obj) tail-left/ls))))))
+                          (else
+                            (values #t
+                                    (f revtail-tail-pat obj '())
+                                    revtail-pat))))
+                    (lambda (in-tail? vals tail-left/ls)
+                      (if in-tail?
+                          (if (null? tail-left/ls)
+                              (append (make-list each-size '())
+                                vals)
+                              (fail))
+                          (f each-pat tail-left/ls vals))))))))
+            (else
+              (if (eqv? obj pat)
+                  vals
+                  (fail)))))))))
+
+
+) ;; End Module
+
+
+#|
+
+;;; examples of passing along threaded information.
+
+;;; Try (collect-symbols '(if (x y 'a 'c zz) 'b 'c))
+;;; Note that it commonizes the reference to c. 
+
+(define-syntax with-values
+  (syntax-rules ()
+    ((_ P C) (call-with-values (lambda () P) C))))
+(define collect-symbols
+  (lambda (exp)
+    (with-values (collect-symbols-help exp)
+      (lambda (symbol-decls exp)
+        (match symbol-decls
+          (((,symbol-name . ,symbol-var) ...)
+           `(let ((,symbol-var (quote ,symbol-name)) ...) ,exp)))))))
+(define collect-symbols-help
+  (lambda (exp)
+    (let ((symbol-env '()))
+      (match+ (symbol-env) exp
+        (,x
+          (guard (symbol? x))
+          (values symbol-env x))
+        ((quote ,x)
+         (guard (symbol? x))
+         (let ((pair/false (assq x symbol-env)))
+           (if pair/false
+               (values symbol-env (cdr pair/false))
+               (let ((v (gensym)))
+                 (values (cons (cons x v) symbol-env)
+                         v)))))
+        ((quote ,x)
+         (values symbol-env `(quote ,x)))
+        ((if ,[t] ,[c] ,[a])
+         (values symbol-env `(if ,t ,c ,a)))
+        ((,[op] ,[arg] ...)
+         (values symbol-env `(,op ,arg ...)))))))
+
+;;; the grammar for this one is just if-exprs and everything else
+
+(define collect-leaves
+  (lambda (exp acc)
+    (match+ (acc) exp
+      ((if ,[] ,[] ,[])
+       acc)
+      ((,[] ,[] ...)
+       acc)
+      (,x
+        (cons x acc)))))
+
+;; here's something that takes apart quoted stuff. 
+
+(define destruct
+  (lambda (datum)
+    (match datum
+      (() `'())
+      ((,[X] . ,[Y])`(cons ,X ,Y))
+      (#(,[X] ...) `(vector ,X ...))
+      (,thing
+	(guard (symbol? thing))
+	`',thing)
+      (,thing
+	thing))))
+
+;; examples using explicit Catas
+
+(define sumsquares
+  (lambda (ls)
+    (define square 
+      (lambda (x)
+        (* x x)))
+    (match ls 
+      [(,[a*] ...) (apply + a*)]
+      [,[square -> n] n])))
+
+(define sumsquares
+  (lambda (ls)
+    (define square 
+      (lambda (x)
+        (* x x)))
+    (let ([acc 0])
+      (match+ (acc) ls 
+        [(,[] ...) acc]
+        [,[(lambda (acc x) (+ acc (square x))) ->] acc]))))
+
+;;; The following uses explicit Catas to parse programs in the
+;;; simple language defined by the grammar below
+
+;;; <Prog> -> (program <Stmt>* <Expr>)
+;;; <Stmt> -> (if <Expr> <Stmt> <Stmt>)
+;;;         | (set! <var> <Expr>)
+;;; <Expr> -> <var>
+;;;         | <integer>
+;;;         | (if <Expr> <Expr> <Expr>)
+;;;         | (<Expr> <Expr*>)
+
+
+(define parse
+  (lambda (x)
+    (define Prog
+      (lambda (x)
+        (match x
+          [(program ,[Stmt -> s*] ... ,[Expr -> e])
+           `(begin ,s* ... ,e)]
+          [,other (error 'parse "invalid program ~s" other)])))
+    (define Stmt
+      (lambda (x)
+        (match x
+          [(if ,[Expr -> e] ,[Stmt -> s1] ,[Stmt -> s2])
+           `(if ,e ,s1 ,s2)]
+          [(set! ,v ,[Expr -> e])
+           (guard (symbol? v))
+           `(set! ,v ,e)]
+          [,other (error 'parse "invalid statement ~s" other)])))
+    (define Expr
+      (lambda (x)
+        (match x
+          [,v (guard (symbol? v)) v]
+          [,n (guard (integer? n)) n]
+          [(if ,[e1] ,[e2] ,[e3])
+           `(if ,e1 ,e2 ,e3)]
+          [(,[rator] ,[rand*] ...) `(,rator ,rand* ...)]
+          [,other (error 'parse "invalid expression ~s" other)])))
+    (Prog x)))
+;;; (parse '(program (set! x 3) (+ x 4)))) => (begin (set! x 3) (+ x 4))
+
+;; CHANGELOG (most recent changes are logged at the top of this file)
+
+;; [29 Feb 2000]
+;; Fixed a case sensitivity bug.
+
+;; [24 Feb 2000]
+;; Matcher now handles vector patterns.  Quasiquote also handles
+;; vector patterns, but does NOT do the csv6.2 optimization of
+;; `#(a 1 ,(+ 3 4) x y) ==> (vector 'a 1 (+ 3 4) 'x 'y).
+;; Also fixed bug in (P ... . P) matching code. 
+
+;; [23 Feb 2000]
+;; KSM fixed bug in unquote-splicing inside quasiquote.
+
+;; [10 Feb 2000]
+;; New forms match+ and trace-match+ thread arguments right-to-left.
+;; The pattern (P ... . P) now works the way you might expect.
+;; Infinite lists are now properly matched (and not matched).
+;; Removed the @ pattern.
+;; Internal: No longer converting into syntax-case. 
+
+;; [6 Feb 2000]
+;; Added expansion-time error message for referring to cata variable
+;; in a guard.
+
+;; [4 Feb 2000]
+;; Fixed backquote so it can handle nested backquote (oops).
+;; Double-backquoted elipses are neutralized just as double-backquoted
+;; unquotes are.  So:
+;;   `(a ,'(1 2 3) ... b)    =eval=> (a 1 2 3 b)
+;;   ``(a ,'(1 2 3) ... b)   =eval=> `(a ,'(1 2 3) ... b)
+;;   ``(a ,(,(1 2 3) ...) b) =eval=> `(a ,(1 2 3) b)
+;; Added support for
+;;   `((unquote-splicing x y z) b) =expand==> (append x y z (list 'b))
+
+;; [1 Feb 2000]
+;; Fixed a bug involving forgetting to quote stuff in the revised backquote.
+;; Recognized unquote-splicing and signalled errors in the appropriate places.
+;; Added support for deep elipses in backquote.
+;; Rewrote backquote so it does the rebuilding directly instead of
+;; expanding into Chez's backquote. 
+
+;; [31 Jan 2000]
+;; Kent Dybvig fixed template bug.
+
+;; [31 Jan 2000]
+;; Added the trace-match form, and made guards contain
+;; an explicit and expression:
+;;    (guard E ...) ==> (guard (and E ...))
+
+;; [26 Jan 2000]
+;; Inside the clauses of match expressions, the following
+;; transformation is performed inside backquote expressions:
+;;    ,v ...      ==> , at v
+;;    (,v ,w) ... ==> ,@(map list v w)
+;;    etc.
+
+|#
\ No newline at end of file
diff --git a/old_script/parser2.ss b/old_script/parser2.ss
new file mode 100644
index 0000000..dae3a7b
--- /dev/null
+++ b/old_script/parser2.ss
@@ -0,0 +1,45 @@
+#! /bin/bash
+#|
+exec chez --script "$0" ${1+"$@"}
+|#
+
+(printf "Called with ~s\n" (command-line-arguments))
+
+(include "match.ss")
+(import iu-match)
+
+;[2001.07.15]
+(define port->slist
+  (lambda (p)
+    (let porttoslistloop ([exp (read p)] [acc '()])
+        (if (eof-object? exp)
+            (begin (close-input-port p)
+                   (reverse! acc))
+            (porttoslistloop (read p) (cons exp acc))))))
+;; [2008.05.07] Adding a hack to tolerate #! lines at the top of the file.
+(define file->slist
+  (lambda (filename . opts)
+    (let* ([p (apply open-input-file filename opts)]
+	   [line1 (get-line p)])
+      ;; Doesn't allow whitespace!
+      (if (and (>= (string-length line1) 2)
+	       (string=? "#!" (substring line1 0 2)))
+	  ;; Read the rest of it straight away:
+	  (port->slist p)
+	  ;; Oops, we need to "put back" that first line  We do that by just starting over.
+	  (begin (close-input-port p)
+		 (port->slist (apply open-input-file filename opts))))
+      )))
+
+(define sexps (file->slist (car (command-line-arguments))))
+
+(define wstrings
+  (match sexps
+    [() '()]
+    [(,uq ,[x]) (guard (eq? uq 'unquote)) x]
+    [,s (guard (symbol? s)) (symbol->string s)]
+    [(,[hd] . ,[tl]) `(,hd . ,tl)]
+    ))
+
+(pretty-print wstrings)
+
diff --git a/old_script/parser_fullnames.ss b/old_script/parser_fullnames.ss
new file mode 100644
index 0000000..06d0e2f
--- /dev/null
+++ b/old_script/parser_fullnames.ss
@@ -0,0 +1,187 @@
+#! /bin/bash
+#|
+exec mzscheme -qu "$0" ${1+"$@"}
+|#
+
+;; I'm duplicating the code from parser.ss and hacking it.
+
+(module parser_fullnames mzscheme  
+  ;; Import the parser and lexer generators.
+  (require (lib "yacc.ss" "parser-tools")
+	   (lib "lex.ss" "parser-tools")
+	   (prefix : (lib "lex-sre.ss" "parser-tools"))
+	   (lib "list.ss")
+	   ;(lib "iu-match.ss")
+	   "iu-match.ss"
+	   )
+;  (provide (all-defined))
+
+(require (lib "pretty.ss"))
+  
+(define-empty-tokens op-tokens 
+   (LeftParen RightParen Comma SemiColon Colon
+    EOF ))
+
+(define-tokens value-tokens (NAME NUM))
+
+(define-lex-abbrevs (lower-letter (:/ "a" "z"))
+                    (upper-letter (:/ #\A #\Z))
+		    (digit (:/ "0" "9"))
+                    (digits (:* digit))
+		    (letter (:or lower-letter upper-letter digit "." "'" "-" "/"))
+		    (name (:seq (:+ letter) (:* (:seq "_" (:* letter)))))
+		    (number (:seq digits "." digits (:? (:seq "E-" digits))))
+		    )
+
+(define ws-lex
+  (lexer-src-pos
+   [(eof) 'EOF]
+   
+   ;; Ignore all whitespace:
+   [(:or #\tab #\space #\newline #\return) (return-without-pos (ws-lex input-port))]
+
+   ["(" 'LeftParen]
+   [")" 'RightParen]  
+   ["," 'Comma] [";" 'SemiColon] [":" 'Colon]
+
+   [(:seq "'silva_" name "'") 
+    (begin ;(printf "TEST ~a\n" (substring lexeme 7 (sub1 (string-length lexeme))))
+	   (token-NAME (substring lexeme 7 (sub1 (string-length lexeme))))
+	   )]
+   
+   [(:seq "silva_" name) (token-NAME (substring lexeme 6 (string-length lexeme)))]
+
+
+   ;[number (token-NUM (string->number lexeme))]
+   [number (token-NUM  lexeme)]
+   
+   ))
+
+(define (format-pos pos)
+  (if (position-line pos)
+      (format "line ~a:~a" (position-line pos) (position-col pos))
+      (format "char ~a" (position-offset pos))))
+
+(define ws-parse
+ (parser        
+   (src-pos)
+   (start wholefile)
+   (end EOF)
+
+  (tokens value-tokens op-tokens)
+  (error (lambda (a b c start end) 
+            (printf "PARSE ERROR: after ~a token, ~a~a.\n" 
+                    (if a "valid" "invalid") b 
+                    (if c (format " carrying value ~s" c) ""))
+            (printf "  Located between ~a and ~a.\n"
+                    (format-pos start) (format-pos end))))
+   ;; Precedence:
+  ;(precs  	  )
+   ;(debug "_parser.log")
+
+   (grammar
+
+    (wholefile [(tree SemiColon) $1]
+	       ;; ACK making semicolon and final tag optional!!
+	       [(tree) $1]
+	       [(LeftParen nodes+ RightParen) (cons "0.0" $2)]
+	       )
+
+    (numtag [(Colon NUM) $2])
+
+    (tree [(NAME numtag) (list $2 $1)]
+	  [(LeftParen nodes+ RightParen numtag) (cons $4 $2)])
+    
+    (nodes+ [(tree) (list $1)]
+	    [(tree Comma nodes+) (cons $1 $3)])
+
+)))
+
+;; ================================================================================
+
+(define (ws-parse-file f) 
+    (let ([p (open-input-file f)])    
+      (let ([res (ws-parse-port p)])
+	(close-input-port p)
+	res)))
+
+(define (ws-parse-port ip)
+  (port-count-lines! ip)
+  ;; cdr
+  (ws-parse (lambda () (flatten (ws-lex ip))))
+  ;(position-token-token (ws-lex ip))
+  )
+
+(define (flatten pt)
+  ;(printf " ")
+  (let loop ((x pt))
+        (if (position-token? (position-token-token x))
+            (begin (error 'flatten "No nested position-tokens!")
+              (loop (position-token-token x)))
+            x)))
+
+(define allargs (vector->list (current-command-line-arguments)))
+(define pretty? #t)
+(define (main filename)
+  #;
+  (if pretty?      
+      (pretty-print (ws-parse-file filename))
+      (begin (write (ws-parse-file filename))
+	     (newline)
+	     ))
+
+  (define table '())
+  (define counter 1)
+  (define (clean-name n)
+    (define matches (regexp-match #rx".*__(.*)" n))
+    (define cleaned (car (reverse matches)))
+    ;(printf "MATCHES ~a\n" matches)
+    (set! table `((,counter ,cleaned) . ,table))
+    (set! counter (+ 1 counter))
+    ;(sub1 counter)
+
+    ;; THIS IS ALL I CHANGED:
+    cleaned
+    )
+
+  (with-output-to-file (string-append filename ".nameonly")
+      (lambda ()	
+	(let loop ((x (ws-parse-file filename)))
+	  (match x
+	    ;; A leaf:
+	    [(,dist ,name) (guard (string? name))       
+	     (printf "~a:~a" (clean-name name) dist)]
+	    [(,dist ,children ...)
+	     (printf "(")
+	     (let inner ((ls children))
+	       (cond 
+		[(null? ls)       (void)]
+		[(null? (cdr ls)) (loop (car ls))]
+		[else (loop (car ls)) (printf ",")
+		      (inner (cdr ls))]))
+	     (printf "):~a" dist)]
+	    ))
+	(printf ";\n")))
+  
+  (with-output-to-file (string-append filename ".table")
+    (lambda ()
+      (for-each 
+	  (lambda (ls) (printf "~a ~a\n" (car ls) (cadr ls)))
+	(reverse table))))
+  )
+
+(print-graph #t)
+(print-vector-length #f)
+
+
+;; Here's our script invocation:
+
+;; When run in --persist mode we run in a loop.
+(if (null? allargs)
+	(error 'wsparse "No filename provided...")
+	(main (car allargs)))
+(printf "Done writing output files.\n")
+;(exit 0)
+
+) ;; End module.
+
diff --git a/phybin.cabal b/phybin.cabal
new file mode 100644
index 0000000..1e8c417
--- /dev/null
+++ b/phybin.cabal
@@ -0,0 +1,149 @@
+Name:           phybin
+Version:        0.3
+License: BSD3
+License-file:   LICENSE
+Stability: Beta
+Author:			Ryan Newton <rrnewton at gmail.com>
+Maintainer:		Ryan Newton <rrnewton at gmail.com>
+
+
+-- Version history:
+-- 0.1.2   -- first significant release
+-- 0.1.2.1 -- 
+-- 0.1.2.4 -- new release, several new features
+-- 0.1.2.5 -- bump for graphviz API changes
+-- 0.2 -- Add Robinson-Foulds distance, use Int labels.
+-- 0.2.2 -- misc changes and expose library
+-- 0.2.4 -- add consensus trees
+-- 0.2.6 -- colorization, hashrf, misc improvements
+-- 0.2.7 -- Add command line opt --showtrees
+-- 0.2.8 -- Add command line opt --highlight
+-- 0.2.9 -- Add command line opt --interior
+-- 0.2.10 -- Add command line opt --matching
+-- 0.2.11 -- Cleanup and windows compatibility.
+-- 0.3    -- significant improvements and new functionality.
+
+-- homepage: http://code.haskell.org/phybin
+-- homepage: http://people.csail.mit.edu/newton/phybin/
+homepage: http://www.cs.indiana.edu/~rrnewton/projects/phybin/
+
+Copyright: Copyright (c) 2010 Ryan Newton
+Synopsis: Utility for clustering phylogenetic trees in Newick format based on Robinson-Foulds distance.
+Description: 
+   This package provides a libary and executable for dealing with Newick tree files.
+   .
+   It can do simple binning of identical trees or more complex clustering based on 
+   an all-to-all Robinson-Foulds distance matrix.
+   .
+   phybin produces output files that characterize the size and contents of each bin or cluster (including
+   generating GraphViz-based visual representations of the tree topologies).
+
+Category: Bioinformatics
+Cabal-Version: >= 1.8
+
+Extra-source-files: README.md
+
+Build-type: Simple
+
+Source-repository head
+  type:     git
+  location: git://github.com/rrnewton/PhyBin.git
+
+Flag hashrf
+  description: Use the HashRF algorithm by default instead of the naive one.
+  default: True
+
+Flag bitvec
+  description: Use bitvectors rather than IntSets for bipartitions.
+  default: False
+
+Flag sequential
+  description: Don't use any parallelism at all.
+  default: False
+
+Library
+  Exposed-modules:   Bio.Phylogeny.PhyBin
+                     Bio.Phylogeny.PhyBin.Binning
+                     Bio.Phylogeny.PhyBin.CoreTypes                     
+                     Bio.Phylogeny.PhyBin.Parser
+                     Bio.Phylogeny.PhyBin.PreProcessor
+                     Bio.Phylogeny.PhyBin.RFDistance  
+                     Bio.Phylogeny.PhyBin.Util
+                     Bio.Phylogeny.PhyBin.Visualize
+  -- Data.BitList
+  Build-Depends:     base >= 3 && < 5, directory, process, containers, 
+                     async, time,
+                     filepath, 
+                     graphviz >= 2999.16, 
+                     text >= 0.11 && < 0.12,
+                     prettyclass, fgl,
+                     HUnit, bytestring, 
+                     -- For bytestring.lazy support:
+                     parsec >= 3.1.0, 
+                     vector >= 0.10,
+                     hierarchical-clustering >= 0.4, split >= 0.2
+--                     lattice-par, 
+  GHC-Options: -O2 -funbox-strict-fields -rtsopts
+
+  if flag(hashrf)
+     CPP-Options: -DUSE_HASHRF
+  if flag(bitvec)
+     CPP-Options: -DBITVEC_BIPS
+     Build-Depends:  bitvec >= 0.1
+  if flag(sequential)
+     CPP-Options: -DSEQUENTIALIZE
+
+Executable phybin
+  Main-is:           Main.hs
+  Build-Depends:     phybin
+  -- DUPLICATE:
+  Build-Depends:     base >= 3 && < 5, directory, process, containers, 
+                     async, time,
+                     filepath, 
+                     graphviz >= 2999.16, 
+                     text >= 0.11 && < 0.12,
+                     prettyclass, fgl,
+                     HUnit, bytestring, 
+                     -- For bytestring.lazy support:
+                     parsec >= 3.1.0, 
+                     vector >= 0.10,
+                     hierarchical-clustering >= 0.4, split >= 0.2
+--                     lattice-par, 
+  GHC-Options: -O2 -funbox-strict-fields -rtsopts -threaded
+
+  if flag(hashrf)
+     CPP-Options: -DUSE_HASHRF
+  if flag(bitvec)
+     CPP-Options: -DBITVEC_BIPS
+     Build-Depends:  bitvec >= 0.1
+  if flag(sequential)
+     CPP-Options: -DSEQUENTIALIZE
+
+
+-- DUPLICATED from executable:
+Test-Suite test-phybin
+  Main-is:           TestAll.hs
+  Type:              exitcode-stdio-1.0
+--  Build-Depends:     phybin
+  Build-Depends:     base >= 3 && < 5, directory, process, containers, 
+                     async, time,
+                     filepath, 
+                     graphviz >= 2999.16, 
+                     text >= 0.11 && < 0.12,
+                     prettyclass, fgl,
+                     HUnit, bytestring, 
+                     parsec >= 3.1.0, 
+                     vector >= 0.10,
+                     hierarchical-clustering >= 0.4, split >= 0.2
+  -- Additional depends for test:
+  Build-Depends:  HUnit, test-framework, test-framework-hunit, test-framework-th
+  GHC-Options: -O2 -funbox-strict-fields -rtsopts -threaded
+
+  if flag(hashrf)
+     CPP-Options: -DUSE_HASHRF
+  if flag(bitvec)
+     CPP-Options: -DBITVEC_BIPS
+     Build-Depends:  bitvec >= 0.1
+  if flag(sequential)
+     CPP-Options: -DSEQUENTIALIZE
+
diff --git a/point_check.sh b/point_check.sh
new file mode 100755
index 0000000..3a213c5
--- /dev/null
+++ b/point_check.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+arg1=$1
+arg2=$2
+
+phybin -p2 -b0.02 -n10 --complete --editdist=0 --rfdist Wolbachia/trees/*.$arg1.codon Wolbachia/trees/*.$arg2.codon
diff --git a/stripTable.hs b/stripTable.hs
new file mode 100644
index 0000000..bb2a8e8
--- /dev/null
+++ b/stripTable.hs
@@ -0,0 +1,50 @@
+
+
+-- | This is a quick hack to split up a table in a tab-delimited file of
+-- the format Irene produces.  The result is a simple two-column,
+-- white-space delimited table of the kind phybin expects.
+
+
+import System.Environment (getArgs)
+
+import Control.Monad
+import Data.List
+import Data.List.Split
+
+import System.IO
+
+
+isDataLine :: String -> Bool
+isDataLine l =
+  case words l of
+    ("Roundup":"Orthology":_) -> False
+    (_:"results":"found":_)   -> False
+    ("Gene":"Cluster":_) -> False
+    ("Id":"Genome":_)    -> False
+    []                   -> False
+    _                    -> True
+
+main = do
+  args <- getArgs
+  let file = case args of
+              [f] -> f
+              _   -> error "Expects one argument!! [filename]"
+
+  raw <- readFile file
+  let lns  = lines raw
+      filt = filter isDataLine lns
+      toks = map (splitOn ['\t']) filt
+      put  = hPutStrLn stderr
+  
+  put$"Read "++show (length lns)++" lines from file "++show file
+  put$"     "++show (length filt)++" contain data"
+  put$"  Distinct #toks found on data lines: " ++ show(nub (map length toks))
+  put$"  A sample of ten parsed lines :"
+  mapM_ (put .  ("    "++) . show) $ take 10 $ toks
+
+  put$"  Echoing columns one and two in space-separated form:"
+
+  forM_ toks $ \ (one:two:_) ->
+    putStrLn$ one ++"  "++ two
+
+  return ()
diff --git a/tests/t1/112.tr b/tests/t1/112.tr
new file mode 100644
index 0000000..d5425ac
--- /dev/null
+++ b/tests/t1/112.tr
@@ -0,0 +1 @@
+((6_134_D-alanine--D-alanine_ligase_wRi_WRi_001550:0.00139828386866881348,7_88_D-alanine--D-alanine_ligase_wMel_WD0095:0.00292963228134226648)90:0.01074298822528597409,((1_177_D-alanine--D-alanine_ligase_family_protein_wUni-gwu_gwu_177:0.00000260053623247659,2_1056_D-alanine--D-alanine_ligase_family_protein_wVitA-gwv_gwv_1056:0.00000260053623247659)100:0.04173654858419943714,((3_584_D-alanine--D-alanine_ligase_wBm_Wbm0570:0.12287024242687821785,14_97_D-alanine--D-alanine_ligase_wOo_wOo_0 [...]
diff --git a/tests/t1/13.tr b/tests/t1/13.tr
new file mode 100644
index 0000000..f6526dd
--- /dev/null
+++ b/tests/t1/13.tr
@@ -0,0 +1 @@
+(1_903_uncharacterized_protein_involved_in_outer_membrane_biogenesis_wUni-gwu_gwu_903:0.00040202748005558947,(6_829_hypothetical_protein_wRi_WRi_009630:0.01489729960416468282,(7_929_hypothetical_protein_wMel_WD1011:0.00503560768415985908,(18_792_hypothetical_protein_wHa_wHa_08450:0.00231874015436256235,((14_125_hypothetical_protein_wOo_wOo_01830:0.25811131256411451451,3_124_hypothetical_protein_wBm_Wbm0121:0.13916197071632291360)100:0.06927753636164095397,(19_321_hypothetical_protein_wNo [...]
diff --git a/tests/t2/116.tr b/tests/t2/116.tr
new file mode 100644
index 0000000..d5a3a0c
--- /dev/null
+++ b/tests/t2/116.tr
@@ -0,0 +1 @@
+(3_1_uroporphyrinogen-III_decarboxylase_wBm_Wbm0001:0.24700160468123241730,((((6_846_uroporphyrinogen_decarboxylase_wRi_WRi_009840:0.00000450221771908982,18_807_Uroporphyrinogen_decarboxylase_wHa_wHa_08610:0.00294819211409539598)36:0.00000450221771908982,7_945_uroporphyrinogen_decarboxylase_wMel_WD1028:0.00196333316748527244)98:0.00593311441869561872,(2_624_uroporphyrinogen_decarboxylase_wVitA-gwv_gwv_624:0.00000450221771908982,1_914_uroporphyrinogen_decarboxylase_wUni-gwu_gwu_914:0.0009 [...]
diff --git a/tests/t2/233.tr b/tests/t2/233.tr
new file mode 100644
index 0000000..2656a8d
--- /dev/null
+++ b/tests/t2/233.tr
@@ -0,0 +1 @@
+(5_540_TrbL/VirB6_plasmid_conjugal_transfer_family_protein_wPip-Pel_WPa_0601:0.02084954811541155431,(19_278_Type_IV_secretion_system_protein_VirB6_putative_wNo_wNo_02980:0.05379840855764397162,((3_826_Type_IV_secretory_pathway_VirB6_components_wBm_Wbm0795:0.13408854198340605657,14_87_type_IV_secretory_pathway_VirB6_components_wOo_wOo_01320:0.21280678010849515824)79:0.04287407238705213258,((2_331_trbL/VirB6_plasmid_conjugal_transfer_family_protein_wVitA-gwv_gwv_331:0.00000263655140029384, [...]
diff --git a/tests/t30_mismatched/mismatched.tr b/tests/t30_mismatched/mismatched.tr
new file mode 100644
index 0000000..9b788c9
--- /dev/null
+++ b/tests/t30_mismatched/mismatched.tr
@@ -0,0 +1,3 @@
+(2_, (((14, 3_), (19, (5_, 13))), (18, (6_, 7_))), 1_);
+(2_, (((14, 3_), (19, (5_, 13))), (6_, 7_)), 1_);
+(2_, (((14, 3_), (19, (5_, 13))), (18, 6_, 7_)), 1_);
diff --git a/tests/t3_consensus/cluster1_284_alltrees.tr b/tests/t3_consensus/cluster1_284_alltrees.tr
new file mode 100644
index 0000000..e2f1176
--- /dev/null
+++ b/tests/t3_consensus/cluster1_284_alltrees.tr
@@ -0,0 +1,284 @@
+(3_, (((13, 5_), 19), ((2_, 1_), ((7_, 6_), 18))), 14);
+(2_, (((7_, 6_), 18), (((5_, 13), 19), (3_, 14))), 1_);
+(6_, (18, (((19, (5_, 13)), (3_, 14)), (2_, 1_))), 7_);
+((7_, 6_), (((19, (5_, 13)), (14, 3_)), (1_, 2_)), 18);
+((13, 5_), ((14, 3_), (((6_, 7_), 18), (1_, 2_))), 19);
+(2_, (((14, 3_), (19, (5_, 13))), (18, (6_, 7_))), 1_);
+((5_, 13), ((14, 3_), ((18, (7_, 6_)), (2_, 1_))), 19);
+(14, ((((7_, 6_), 18), (2_, 1_)), ((5_, 13), 19)), 3_);
+(13, (((3_, 14), ((1_, 2_), (18, (6_, 7_)))), 19), 5_);
+((13, 5_), ((((7_, 6_), (1_, 2_)), 18), (14, 3_)), 19);
+(5_, (19, ((18, ((6_, 7_), (2_, 1_))), (14, 3_))), 13);
+(1_, (((((5_, 13), 19), (14, 3_)), 18), (6_, 7_)), 2_);
+(((18, ((6_, 7_), (2_, 1_))), (3_, 14)), (5_, 13), 19);
+(((7_, 6_), (1_, 2_)), ((19, (5_, 13)), (14, 3_)), 18);
+(1_, ((((14, 3_), ((13, 5_), 19)), 18), (6_, 7_)), 2_);
+(6_, ((((19, (13, 5_)), (3_, 14)), 18), (1_, 2_)), 7_);
+(14, (((5_, 13), 19), (((2_, 1_), 18), (7_, 6_))), 3_);
+(5_, (19, ((((2_, 1_), 18), (7_, 6_)), (14, 3_))), 13);
+(2_, (18, (((14, 3_), (19, (5_, 13))), (7_, 6_))), 1_);
+((((14, 3_), (19, (5_, 13))), (6_, 7_)), (1_, 2_), 18);
+(2_, (((((5_, 19), 13), (3_, 14)), 18), (7_, 6_)), 1_);
+(14, ((((1_, 2_), (6_, 7_)), 18), (13, (5_, 19))), 3_);
+(((3_, 14), (13, (5_, 19))), ((7_, 6_), (2_, 1_)), 18);
+(7_, ((1_, 2_), (18, ((3_, 14), (13, (19, 5_))))), 6_);
+(14, ((18, ((6_, 7_), (2_, 1_))), (13, (19, 5_))), 3_);
+(6_, ((((13, (5_, 19)), (3_, 14)), 18), (2_, 1_)), 7_);
+(3_, (((19, 5_), 13), (18, ((6_, 7_), (1_, 2_)))), 14);
+(((1_, 2_), (6_, 7_)), ((3_, 14), (13, (5_, 19))), 18);
+((6_, 7_), ((1_, 2_), ((3_, 14), (13, (19, 5_)))), 18);
+(3_, ((13, (19, 5_)), ((18, (6_, 7_)), (2_, 1_))), 14);
+(19, ((((2_, 1_), (18, (6_, 7_))), (14, 3_)), 13), 5_);
+(7_, (18, ((1_, 2_), ((3_, 14), (13, (19, 5_))))), 6_);
+(14, (((18, (6_, 7_)), (2_, 1_)), (13, (19, 5_))), 3_);
+(6_, (18, ((2_, 1_), (((19, 5_), 13), (14, 3_)))), 7_);
+((7_, 6_), ((1_, 2_), ((14, 3_), (13, (19, 5_)))), 18);
+(7_, (((13, (19, 5_)), (3_, 14)), (18, (2_, 1_))), 6_);
+(3_, (((18, (2_, 1_)), (7_, 6_)), (13, (5_, 19))), 14);
+(14, ((5_, (13, 19)), ((6_, 7_), ((2_, 1_), 18))), 3_);
+(13, (5_, ((3_, 14), ((18, 7_), (6_, (2_, 1_))))), 19);
+(3_, (((6_, (2_, 1_)), (18, 7_)), (5_, (13, 19))), 14);
+((19, 13), (((6_, (1_, 2_)), (7_, 18)), (3_, 14)), 5_);
+(((7_, 18), ((14, 3_), (5_, (19, 13)))), (1_, 2_), 6_);
+(1_, (((18, 7_), 6_), ((5_, (19, 13)), (14, 3_))), 2_);
+(3_, (((13, 19), 5_), ((6_, (18, 7_)), (1_, 2_))), 14);
+(3_, (((1_, 2_), ((7_, 18), 6_)), (5_, (19, 13))), 14);
+(3_, ((5_, (13, 19)), ((1_, 2_), ((18, 7_), 6_))), 14);
+((18, 7_), (((14, 3_), (5_, (13, 19))), (2_, 1_)), 6_);
+(7_, (6_, ((2_, 1_), (((13, 19), 5_), (14, 3_)))), 18);
+(((14, 3_), ((2_, 1_), ((7_, 18), 6_))), (13, 19), 5_);
+(18, ((2_, 1_), ((((19, 13), 5_), (14, 3_)), 6_)), 7_);
+(19, (5_, ((6_, ((1_, 2_), (7_, 18))), (3_, 14))), 13);
+(3_, ((6_, ((2_, 1_), (18, 7_))), (5_, (13, 19))), 14);
+(((2_, 1_), (7_, 18)), ((5_, (19, 13)), (14, 3_)), 6_);
+((19, 13), ((6_, ((2_, 1_), (7_, 18))), (3_, 14)), 5_);
+(2_, ((7_, 18), (((3_, 14), ((19, 13), 5_)), 6_)), 1_);
+(13, (5_, ((6_, ((1_, 2_), (18, 7_))), (3_, 14))), 19);
+(14, ((5_, (13, 19)), (((2_, 1_), (18, 7_)), 6_)), 3_);
+(13, (5_, ((6_, ((1_, 2_), (18, 7_))), (3_, 14))), 19);
+(13, (5_, ((((2_, 1_), (18, 6_)), 7_), (14, 3_))), 19);
+((19, 13), ((14, 3_), (7_, ((1_, 2_), (18, 6_)))), 5_);
+(2_, ((6_, 18), (7_, ((5_, (19, 13)), (3_, 14)))), 1_);
+(((18, 6_), (1_, 2_)), ((5_, (13, 19)), (3_, 14)), 7_);
+(13, (5_, ((14, 3_), (((1_, 2_), (6_, 18)), 7_))), 19);
+(((3_, 14), (7_, ((2_, 1_), (6_, 18)))), (19, 13), 5_);
+((19, 13), ((7_, ((6_, 18), (1_, 2_))), (14, 3_)), 5_);
+(6_, ((((5_, (19, 13)), (14, 3_)), 7_), (2_, 1_)), 18);
+(6_, ((7_, ((3_, 14), (5_, (19, 13)))), (1_, 2_)), 18);
+(1_, (((5_, (13, 19)), (3_, 14)), (7_, (18, 6_))), 2_);
+((19, 13), (((7_, (6_, 18)), (2_, 1_)), (3_, 14)), 5_);
+((13, 19), (((2_, 1_), ((6_, 18), 7_)), (14, 3_)), 5_);
+(1_, (((5_, (13, 19)), (3_, 14)), (7_, (6_, 18))), 2_);
+(6_, (7_, (((14, 3_), (5_, (13, 19))), (2_, 1_))), 18);
+(1_, ((7_, (6_, 18)), ((5_, (19, 13)), (14, 3_))), 2_);
+(14, ((5_, (19, 13)), ((2_, 1_), ((6_, 18), 7_))), 3_);
+(1_, (((3_, 14), (5_, (13, 19))), (7_, (18, 6_))), 2_);
+((13, 19), (((2_, 1_), (7_, (18, 6_))), (14, 3_)), 5_);
+(2_, (((5_, (13, 19)), (14, 3_)), (7_, (18, 6_))), 1_);
+(1_, (((6_, 18), 7_), ((3_, 14), (5_, (13, 19)))), 2_);
+(14, (((7_, (6_, 18)), (2_, 1_)), (5_, (13, 19))), 3_);
+(18, ((7_, (1_, 2_)), ((5_, (13, 19)), (3_, 14))), 6_);
+(((18, 6_), ((5_, (13, 19)), (3_, 14))), (2_, 1_), 7_);
+(1_, (7_, ((18, 6_), ((3_, 14), ((13, 19), 5_)))), 2_);
+(19, (5_, ((14, 3_), (((1_, 2_), 7_), (18, 6_)))), 13);
+(18, (((5_, (13, 19)), (14, 3_)), (7_, (1_, 2_))), 6_);
+((1_, 2_), (((5_, (19, 13)), (14, 3_)), (18, 6_)), 7_);
+(2_, ((7_, 6_), (((3_, 14), ((13, 19), 5_)), 18)), 1_);
+(14, ((5_, (19, 13)), (((7_, 6_), (1_, 2_)), 18)), 3_);
+(19, (((14, 3_), (((2_, 1_), (6_, 7_)), 18)), 5_), 13);
+(2_, ((((5_, (19, 13)), (3_, 14)), 18), (7_, 6_)), 1_);
+((13, 19), ((18, ((6_, 7_), (1_, 2_))), (3_, 14)), 5_);
+(2_, (((((14, 3_), ((19, 13), 5_)), 18), 6_), 7_), 1_);
+(((7_, (1_, 2_)), 6_), ((14, 3_), (5_, (13, 19))), 18);
+(1_, (7_, (6_, (18, ((5_, (13, 19)), (3_, 14))))), 2_);
+(((14, 3_), (18, ((7_, (1_, 2_)), 6_))), (13, 19), 5_);
+((13, 19), ((18, (6_, (7_, (1_, 2_)))), (3_, 14)), 5_);
+(3_, ((((6_, 18), 7_), (2_, 1_)), (19, (13, 5_))), 14);
+(2_, ((((5_, 13), 19), (14, 3_)), (7_, (6_, 18))), 1_);
+(2_, (((3_, 14), ((5_, 13), 19)), ((6_, 18), 7_)), 1_);
+(1_, (((19, (13, 5_)), (3_, 14)), ((18, 6_), 7_)), 2_);
+(((2_, 1_), (((13, 5_), 19), (3_, 14))), (6_, 18), 7_);
+(5_, (19, ((3_, 14), ((2_, 1_), (7_, (6_, 18))))), 13);
+((6_, 18), (((19, (13, 5_)), (14, 3_)), (1_, 2_)), 7_);
+(((2_, 1_), ((19, (13, 5_)), (14, 3_))), (6_, 18), 7_);
+(2_, (((19, (5_, 13)), (3_, 14)), ((6_, 18), 7_)), 1_);
+(5_, (((3_, 14), (((6_, 18), 7_), (1_, 2_))), 19), 13);
+(13, (19, (((2_, 1_), ((18, 6_), 7_)), (3_, 14))), 5_);
+(14, ((19, (5_, 13)), ((7_, (6_, 18)), (2_, 1_))), 3_);
+(5_, (19, ((3_, 14), ((1_, 2_), (7_, (18, 6_))))), 13);
+(5_, (19, ((14, 3_), ((1_, 2_), (7_, (18, 6_))))), 13);
+(18, (7_, ((1_, 2_), ((14, 3_), (19, (5_, 13))))), 6_);
+(5_, (19, (((1_, 2_), ((6_, 18), 7_)), (3_, 14))), 13);
+(14, ((19, (5_, 13)), (((2_, 1_), 7_), (18, 6_))), 3_);
+(14, ((19, (13, 5_)), ((7_, (2_, 1_)), (18, 6_))), 3_);
+(14, (((18, 6_), ((1_, 2_), 7_)), (19, (13, 5_))), 3_);
+(2_, (7_, ((((13, 5_), 19), (14, 3_)), (18, 6_))), 1_);
+(3_, (((5_, 13), 19), ((6_, 18), (7_, (2_, 1_)))), 14);
+(1_, (((18, 6_), ((19, (13, 5_)), (3_, 14))), 7_), 2_);
+(14, ((19, (13, 5_)), ((6_, 18), (7_, (2_, 1_)))), 3_);
+(18, ((7_, (2_, 1_)), ((3_, 14), (19, (13, 5_)))), 6_);
+(((3_, 14), ((18, 6_), (7_, (2_, 1_)))), (5_, 13), 19);
+((5_, 13), ((14, 3_), (7_, ((2_, 1_), (6_, 18)))), 19);
+(3_, ((19, (13, 5_)), (7_, ((2_, 1_), (18, 6_)))), 14);
+(2_, ((7_, ((3_, 14), (19, (13, 5_)))), (6_, 18)), 1_);
+(6_, ((((3_, 14), ((5_, 13), 19)), 7_), (2_, 1_)), 18);
+(18, ((1_, 2_), (7_, ((14, 3_), (19, (5_, 13))))), 6_);
+(3_, ((19, (13, 5_)), (7_, ((2_, 1_), (6_, 18)))), 14);
+(2_, ((18, 6_), (7_, (((13, 5_), 19), (14, 3_)))), 1_);
+(1_, ((((14, 3_), (19, (5_, 13))), 7_), (6_, 18)), 2_);
+(6_, ((7_, ((19, (13, 5_)), (14, 3_))), (1_, 2_)), 18);
+((5_, 13), ((3_, 14), (7_, ((2_, 1_), (18, 6_)))), 19);
+((((((5_, 13), 19), (3_, 14)), 7_), 6_), (2_, 1_), 18);
+(13, (19, ((((18, (1_, 2_)), 6_), 7_), (3_, 14))), 5_);
+((6_, (18, (1_, 2_))), ((3_, 14), (19, (13, 5_))), 7_);
+(1_, ((6_, (7_, ((14, 3_), (19, (5_, 13))))), 18), 2_);
+(1_, (18, (7_, (6_, ((3_, 14), (19, (5_, 13)))))), 2_);
+(((3_, 14), (19, (5_, 13))), (7_, (18, (1_, 2_))), 6_);
+(5_, (19, ((3_, 14), (6_, (7_, (18, (1_, 2_)))))), 13);
+(5_, (19, ((14, 3_), (6_, (7_, ((2_, 1_), 18))))), 13);
+((5_, 13), ((6_, (((2_, 1_), 18), 7_)), (14, 3_)), 19);
+(5_, (19, (((((2_, 1_), 18), 7_), 6_), (14, 3_))), 13);
+((5_, 13), (((7_, (18, (2_, 1_))), 6_), (3_, 14)), 19);
+((5_, 13), ((3_, 14), (6_, ((1_, 2_), (7_, 18)))), 19);
+(1_, ((18, 7_), (((19, (5_, 13)), (14, 3_)), 6_)), 2_);
+(7_, ((6_, ((14, 3_), (19, (13, 5_)))), (1_, 2_)), 18);
+(5_, (19, ((3_, 14), (6_, ((1_, 2_), (18, 7_))))), 13);
+(3_, ((((18, 7_), (2_, 1_)), 6_), (19, (13, 5_))), 14);
+(5_, (19, ((6_, ((2_, 1_), (7_, 18))), (3_, 14))), 13);
+(3_, ((((18, 7_), (2_, 1_)), 6_), (19, (5_, 13))), 14);
+(((2_, 1_), (7_, 18)), ((14, 3_), (19, (5_, 13))), 6_);
+((13, 5_), ((14, 3_), (6_, ((1_, 2_), (7_, 18)))), 19);
+(3_, ((19, (5_, 13)), (((1_, 2_), 6_), (18, 7_))), 14);
+(13, (19, (((7_, 18), (6_, (2_, 1_))), (14, 3_))), 5_);
+(7_, ((6_, (2_, 1_)), (((13, 5_), 19), (3_, 14))), 18);
+(3_, ((19, (5_, 13)), ((6_, (2_, 1_)), (18, 7_))), 14);
+(7_, ((6_, (1_, 2_)), ((3_, 14), (19, (5_, 13)))), 18);
+(1_, ((((19, (5_, 13)), (3_, 14)), (18, 7_)), 6_), 2_);
+(((14, 3_), (18, (7_, ((1_, 2_), 6_)))), (19, 5_), 13);
+(5_, (13, ((14, 3_), (18, ((6_, (2_, 1_)), 7_)))), 19);
+((19, 5_), ((3_, 14), (18, (7_, ((1_, 2_), 6_)))), 13);
+((7_, ((((19, 5_), 13), (14, 3_)), 18)), (1_, 2_), 6_);
+(19, (13, ((18, ((6_, (1_, 2_)), 7_)), (3_, 14))), 5_);
+((1_, 2_), (7_, (18, ((14, 3_), (13, (5_, 19))))), 6_);
+(14, ((18, (7_, ((1_, 2_), 6_))), (5_, (19, 13))), 3_);
+(2_, (6_, ((((14, 3_), (5_, (19, 13))), 18), 7_)), 1_);
+((2_, 1_), (7_, (18, ((5_, (19, 13)), (14, 3_)))), 6_);
+(((2_, 1_), 6_), (18, ((3_, 14), (5_, (13, 19)))), 7_);
+((7_, ((1_, 2_), 6_)), ((3_, 14), (5_, (19, 13))), 18);
+(13, (((14, 3_), ((7_, (6_, (2_, 1_))), 18)), 19), 5_);
+(1_, (6_, (7_, (18, ((14, 3_), (19, (13, 5_)))))), 2_);
+(14, ((19, (5_, 13)), ((((1_, 2_), 6_), 7_), 18)), 3_);
+(2_, ((7_, (((19, (5_, 13)), (14, 3_)), 18)), 6_), 1_);
+((18, ((3_, 14), ((5_, 13), 19))), (6_, (1_, 2_)), 7_);
+((19, 13), ((7_, ((6_, (2_, 1_)), 18)), (3_, 14)), 5_);
+(((6_, (1_, 2_)), 18), ((14, 3_), (5_, (13, 19))), 7_);
+(14, (((((1_, 2_), 6_), 18), 7_), (5_, (13, 19))), 3_);
+(((((3_, 14), ((13, 5_), 19)), 7_), 18), (2_, 1_), 6_);
+((13, 5_), ((7_, (18, (6_, (1_, 2_)))), (14, 3_)), 19);
+(5_, (19, ((7_, (18, ((2_, 1_), 6_))), (3_, 14))), 13);
+(13, (19, ((3_, 14), ((((2_, 1_), 6_), 18), 7_))), 5_);
+(2_, (6_, ((((19, (5_, 13)), (14, 3_)), 7_), 18)), 1_);
+(3_, (((5_, 13), 19), (7_, (18, ((1_, 2_), 6_)))), 14);
+(((6_, (1_, 2_)), 18), (((19, 5_), 13), (3_, 14)), 7_);
+((((14, 3_), ((19, 5_), 13)), 7_), (6_, (1_, 2_)), 18);
+((7_, ((13, (19, 5_)), (3_, 14))), (6_, (2_, 1_)), 18);
+(5_, (13, ((14, 3_), ((18, (6_, (1_, 2_))), 7_))), 19);
+(1_, (6_, (((((19, 5_), 13), (14, 3_)), 7_), 18)), 2_);
+(2_, (7_, (18, (6_, ((14, 3_), ((13, 5_), 19))))), 1_);
+(14, (((((2_, 1_), 7_), 18), 6_), (19, (13, 5_))), 3_);
+(14, ((19, (5_, 13)), (6_, ((7_, (2_, 1_)), 18))), 3_);
+(14, ((19, (5_, 13)), (6_, (18, (7_, (1_, 2_))))), 3_);
+(1_, (7_, (((((13, 5_), 19), (3_, 14)), 6_), 18)), 2_);
+((18, (7_, (1_, 2_))), (((5_, 13), 19), (14, 3_)), 6_);
+((7_, (1_, 2_)), (6_, ((19, (5_, 13)), (3_, 14))), 18);
+(5_, (19, ((6_, (18, (7_, (1_, 2_)))), (14, 3_))), 13);
+((18, (((3_, 14), (5_, (19, 13))), 6_)), (1_, 2_), 7_);
+(1_, (7_, (18, (((5_, (19, 13)), (3_, 14)), 6_))), 2_);
+(1_, (7_, ((6_, ((5_, (13, 19)), (14, 3_))), 18)), 2_);
+(3_, ((6_, (18, ((2_, 1_), 7_))), (5_, (19, 13))), 14);
+(3_, ((5_, (19, 13)), (6_, (18, ((1_, 2_), 7_)))), 14);
+((13, 19), ((6_, (18, (7_, (2_, 1_)))), (14, 3_)), 5_);
+(14, (((19, 13), 5_), ((18, ((2_, 1_), 7_)), 6_)), 3_);
+((6_, (((13, 19), 5_), (3_, 14))), (7_, (1_, 2_)), 18);
+(14, ((6_, (18, ((2_, 1_), 7_))), (13, (5_, 19))), 3_);
+(2_, (7_, (18, (((13, (5_, 19)), (3_, 14)), 6_))), 1_);
+((1_, 2_), (18, (((3_, 14), (13, (19, 5_))), 6_)), 7_);
+(((13, (19, 5_)), (14, 3_)), (18, (7_, (2_, 1_))), 6_);
+(3_, ((((7_, (2_, 1_)), 18), 6_), (13, (5_, 19))), 14);
+(14, ((6_, (((1_, 2_), 7_), 18)), (13, (19, 5_))), 3_);
+(((1_, 2_), 7_), (6_, ((14, 3_), ((19, 5_), 13))), 18);
+((((13, (5_, 19)), (14, 3_)), 6_), (7_, (2_, 1_)), 18);
+((5_, 19), ((6_, (18, ((7_, 2_), 1_))), (3_, 14)), 13);
+(2_, (1_, (18, (((13, (19, 5_)), (14, 3_)), 6_))), 7_);
+((18, (1_, (7_, 2_))), ((14, 3_), (13, (19, 5_))), 6_);
+(7_, ((18, (((14, 3_), (13, (5_, 19))), 6_)), 1_), 2_);
+(1_, (2_, (18, (6_, ((13, (19, 5_)), (14, 3_))))), 7_);
+(18, ((1_, 2_), (6_, (((19, 5_), 13), (3_, 14)))), 7_);
+((7_, ((2_, 1_), 18)), (((5_, 19), 13), (3_, 14)), 6_);
+(19, (13, ((6_, (7_, (18, (2_, 1_)))), (3_, 14))), 5_);
+((19, 5_), ((6_, (7_, (18, (1_, 2_)))), (3_, 14)), 13);
+(5_, (13, ((14, 3_), (6_, (7_, (18, (1_, 2_)))))), 19);
+(19, (13, (((7_, (18, (2_, 1_))), 6_), (3_, 14))), 5_);
+((7_, ((1_, 2_), 18)), ((13, (19, 5_)), (3_, 14)), 6_);
+(((14, 3_), ((18, 6_), (7_, (2_, 1_)))), (19, 5_), 13);
+(6_, ((7_, (2_, 1_)), ((13, (19, 5_)), (3_, 14))), 18);
+(((3_, 14), (((1_, 2_), 7_), (18, 6_))), (5_, 19), 13);
+((5_, 19), (((18, 6_), (7_, (2_, 1_))), (14, 3_)), 13);
+(5_, (((14, 3_), ((6_, 18), (7_, (2_, 1_)))), 13), 19);
+(3_, (((7_, (2_, 1_)), (6_, 18)), (13, (19, 5_))), 14);
+((5_, 19), (((6_, 18), (7_, (1_, 2_))), (3_, 14)), 13);
+(5_, (13, ((7_, (2_, 1_)), ((6_, 18), (14, 3_)))), 19);
+(18, (((13, (5_, 19)), (7_, (2_, 1_))), (3_, 14)), 6_);
+(1_, (7_, ((14, 3_), (6_, (18, (13, (19, 5_)))))), 2_);
+(19, (13, (18, (((14, 3_), 6_), (7_, (2_, 1_))))), 5_);
+((6_, ((14, (3_, (13, (5_, 19)))), 18)), (2_, 1_), 7_);
+(2_, (7_, (6_, ((14, (3_, (13, (19, 5_)))), 18))), 1_);
+((6_, (18, ((14, 3_), (13, (19, 5_))))), (1_, 2_), 7_);
+(5_, (13, (((((1_, 2_), 7_), 6_), 18), (3_, 14))), 19);
+((6_, (7_, (2_, 1_))), ((3_, 14), (13, (19, 5_))), 18);
+((7_, (1_, 2_)), (18, ((14, 3_), ((5_, 19), 13))), 6_);
+(14, ((13, (5_, 19)), (18, (6_, (7_, (1_, 2_))))), 3_);
+((7_, (1_, 2_)), (18, ((13, (19, 5_)), (3_, 14))), 6_);
+(2_, (7_, ((18, ((13, (5_, 19)), (3_, 14))), 6_)), 1_);
+(2_, (((18, (((5_, 19), 13), (14, 3_))), 6_), 7_), 1_);
+(1_, (7_, ((18, (3_, (14, (13, (19, 5_))))), 6_)), 2_);
+((19, 5_), (3_, (14, (18, ((2_, 1_), (6_, 7_))))), 13);
+(18, (6_, (((3_, 14), ((5_, 19), 13)), (1_, 2_))), 7_);
+((((13, (5_, 19)), (3_, 14)), (2_, 1_)), (7_, 18), 6_);
+((((14, 3_), ((19, 5_), 13)), (1_, 2_)), (18, 7_), 6_);
+(7_, (6_, ((1_, 2_), ((3_, 14), ((5_, 19), 13)))), 18);
+(2_, ((6_, (18, 7_)), ((14, 3_), (13, (19, 5_)))), 1_);
+(18, (6_, (((3_, 14), ((19, 5_), 13)), (1_, 2_))), 7_);
+(14, (((19, 5_), 13), ((2_, 1_), ((7_, 18), 6_))), 3_);
+(2_, ((7_, (6_, 18)), ((14, 3_), (13, (19, 5_)))), 1_);
+(19, ((((2_, 1_), (7_, (6_, 18))), (3_, 14)), 13), 5_);
+((18, 6_), (((13, (19, 5_)), (14, 3_)), (2_, 1_)), 7_);
+(2_, (((14, 3_), ((19, 5_), 13)), (7_, (18, 6_))), 1_);
+((5_, 19), ((3_, 14), (((6_, 18), 7_), (2_, 1_))), 13);
+(6_, (7_, (((3_, 14), (13, (5_, 19))), (2_, 1_))), 18);
+((((5_, 19), 13), 14), ((1_, 2_), (18, (7_, 6_))), 3_);
+(((((13, (19, 5_)), 14), 3_), (2_, 1_)), (7_, 6_), 18);
+((6_, 7_), ((3_, (14, (13, (19, 5_)))), (1_, 2_)), 18);
+(((3_, ((13, (5_, 19)), 14)), (1_, 2_)), (6_, 18), 7_);
+(2_, (((14, (13, (19, 5_))), 3_), ((6_, 18), 7_)), 1_);
+((18, 7_), ((3_, (14, (13, (5_, 19)))), (1_, 2_)), 6_);
+((5_, 19), (3_, (((1_, 2_), ((7_, 18), 6_)), 14)), 13);
+((6_, 18), ((2_, 1_), (14, (3_, (13, (19, 5_))))), 7_);
+((13, (5_, 19)), (14, (7_, (18, ((2_, 1_), 6_)))), 3_);
+((((3_, 14), 7_), (18, ((1_, 2_), 6_))), (5_, 19), 13);
+(14, (18, (((19, 5_), 13), (7_, (6_, (2_, 1_))))), 3_);
+(6_, (2_, ((7_, ((13, (19, 5_)), (14, 3_))), 18)), 1_);
+(19, (13, ((18, (7_, ((6_, 1_), 2_))), (14, 3_))), 5_);
+(1_, (((18, ((13, (5_, 19)), (3_, 14))), 2_), 6_), 7_);
+(7_, ((((5_, 19), 13), (3_, 14)), (18, (6_, 2_))), 1_);
+(19, (13, ((((6_, (18, 1_)), 7_), 2_), (14, 3_))), 5_);
+(((((14, 3_), (13, (19, 5_))), 2_), 6_), (18, 1_), 7_);
+(19, (((14, 3_), ((1_, ((18, 2_), 6_)), 7_)), 13), 5_);
+(3_, ((1_, (7_, ((18, 2_), 6_))), (13, (19, 5_))), 14);
+(14, ((7_, ((1_, 2_), (6_, 18))), (13, (19, 5_))), 3_);
+(19, (((((6_, 18), (2_, 1_)), 7_), (3_, 14)), 13), 5_);
+(((14, 3_), (13, (19, 5_))), ((2_, 1_), (6_, 18)), 7_);
+(14, ((13, (5_, 19)), (7_, ((6_, 18), (1_, 2_)))), 3_);
+(5_, (((7_, ((6_, 18), (1_, 2_))), (3_, 14)), 13), 19);
+(5_, (13, ((7_, ((2_, 1_), (18, 6_))), (14, 3_))), 19);
+(3_, (((2_, 1_), (6_, 18)), ((13, (19, 5_)), 7_)), 14);
diff --git a/tests/t3_consensus/cluster1_284_consensus.tr b/tests/t3_consensus/cluster1_284_consensus.tr
new file mode 100644
index 0000000..985b0c6
--- /dev/null
+++ b/tests/t3_consensus/cluster1_284_consensus.tr
@@ -0,0 +1 @@
+((19, 5_, 13), 14, 3_, 18, 6_, 7_, 1_, 2_);
diff --git a/tests/t4_consensus/cluster1_16_alltrees.tr b/tests/t4_consensus/cluster1_16_alltrees.tr
new file mode 100644
index 0000000..3cffbb8
--- /dev/null
+++ b/tests/t4_consensus/cluster1_16_alltrees.tr
@@ -0,0 +1,16 @@
+(3_, ((((6_, 18), 7_), (2_, 1_)), (19, (13, 5_))), 14);
+(2_, ((((5_, 13), 19), (14, 3_)), (7_, (6_, 18))), 1_);
+(2_, (((3_, 14), ((5_, 13), 19)), ((6_, 18), 7_)), 1_);
+(1_, (((19, (13, 5_)), (3_, 14)), ((18, 6_), 7_)), 2_);
+(((2_, 1_), (((13, 5_), 19), (3_, 14))), (6_, 18), 7_);
+(5_, (19, ((3_, 14), ((2_, 1_), (7_, (6_, 18))))), 13);
+((6_, 18), (((19, (13, 5_)), (14, 3_)), (1_, 2_)), 7_);
+(((2_, 1_), ((19, (13, 5_)), (14, 3_))), (6_, 18), 7_);
+(2_, (((19, (5_, 13)), (3_, 14)), ((6_, 18), 7_)), 1_);
+(5_, (((3_, 14), (((6_, 18), 7_), (1_, 2_))), 19), 13);
+(13, (19, (((2_, 1_), ((18, 6_), 7_)), (3_, 14))), 5_);
+(14, ((19, (5_, 13)), ((7_, (6_, 18)), (2_, 1_))), 3_);
+(5_, (19, ((3_, 14), ((1_, 2_), (7_, (18, 6_))))), 13);
+(5_, (19, ((14, 3_), ((1_, 2_), (7_, (18, 6_))))), 13);
+(18, (7_, ((1_, 2_), ((14, 3_), (19, (5_, 13))))), 6_);
+(5_, (19, (((1_, 2_), ((6_, 18), 7_)), (3_, 14))), 13);
diff --git a/tests/t4_consensus/cluster1_16_consensus.tr b/tests/t4_consensus/cluster1_16_consensus.tr
new file mode 100644
index 0000000..cd2ab06
--- /dev/null
+++ b/tests/t4_consensus/cluster1_16_consensus.tr
@@ -0,0 +1 @@
+((((5_, 13), 19), (14, 3_)), ((18, 6_), 7_), (1_, 2_));
diff --git a/tests/t5_consensus/cluster1_35_alltrees.tr b/tests/t5_consensus/cluster1_35_alltrees.tr
new file mode 100644
index 0000000..c40bb39
--- /dev/null
+++ b/tests/t5_consensus/cluster1_35_alltrees.tr
@@ -0,0 +1,35 @@
+(3_, ((((6_, 18), 7_), (2_, 1_)), (19, (13, 5_))), 14);
+(2_, ((((5_, 13), 19), (14, 3_)), (7_, (6_, 18))), 1_);
+(2_, (((3_, 14), ((5_, 13), 19)), ((6_, 18), 7_)), 1_);
+(1_, (((19, (13, 5_)), (3_, 14)), ((18, 6_), 7_)), 2_);
+(((2_, 1_), (((13, 5_), 19), (3_, 14))), (6_, 18), 7_);
+(5_, (19, ((3_, 14), ((2_, 1_), (7_, (6_, 18))))), 13);
+((6_, 18), (((19, (13, 5_)), (14, 3_)), (1_, 2_)), 7_);
+(((2_, 1_), ((19, (13, 5_)), (14, 3_))), (6_, 18), 7_);
+(2_, (((19, (5_, 13)), (3_, 14)), ((6_, 18), 7_)), 1_);
+(5_, (((3_, 14), (((6_, 18), 7_), (1_, 2_))), 19), 13);
+(13, (19, (((2_, 1_), ((18, 6_), 7_)), (3_, 14))), 5_);
+(14, ((19, (5_, 13)), ((7_, (6_, 18)), (2_, 1_))), 3_);
+(5_, (19, ((3_, 14), ((1_, 2_), (7_, (18, 6_))))), 13);
+(5_, (19, ((14, 3_), ((1_, 2_), (7_, (18, 6_))))), 13);
+(18, (7_, ((1_, 2_), ((14, 3_), (19, (5_, 13))))), 6_);
+(5_, (19, (((1_, 2_), ((6_, 18), 7_)), (3_, 14))), 13);
+(14, ((19, (5_, 13)), (((2_, 1_), 7_), (18, 6_))), 3_);
+(14, ((19, (13, 5_)), ((7_, (2_, 1_)), (18, 6_))), 3_);
+(14, (((18, 6_), ((1_, 2_), 7_)), (19, (13, 5_))), 3_);
+(2_, (7_, ((((13, 5_), 19), (14, 3_)), (18, 6_))), 1_);
+(3_, (((5_, 13), 19), ((6_, 18), (7_, (2_, 1_)))), 14);
+(1_, (((18, 6_), ((19, (13, 5_)), (3_, 14))), 7_), 2_);
+(14, ((19, (13, 5_)), ((6_, 18), (7_, (2_, 1_)))), 3_);
+(18, ((7_, (2_, 1_)), ((3_, 14), (19, (13, 5_)))), 6_);
+(((3_, 14), ((18, 6_), (7_, (2_, 1_)))), (5_, 13), 19);
+((5_, 13), ((14, 3_), (7_, ((2_, 1_), (6_, 18)))), 19);
+(3_, ((19, (13, 5_)), (7_, ((2_, 1_), (18, 6_)))), 14);
+(2_, ((7_, ((3_, 14), (19, (13, 5_)))), (6_, 18)), 1_);
+(6_, ((((3_, 14), ((5_, 13), 19)), 7_), (2_, 1_)), 18);
+(18, ((1_, 2_), (7_, ((14, 3_), (19, (5_, 13))))), 6_);
+(3_, ((19, (13, 5_)), (7_, ((2_, 1_), (6_, 18)))), 14);
+(2_, ((18, 6_), (7_, (((13, 5_), 19), (14, 3_)))), 1_);
+(1_, ((((14, 3_), (19, (5_, 13))), 7_), (6_, 18)), 2_);
+(6_, ((7_, ((19, (13, 5_)), (14, 3_))), (1_, 2_)), 18);
+((5_, 13), ((3_, 14), (7_, ((2_, 1_), (18, 6_)))), 19);
diff --git a/tests/t5_consensus/cluster1_35_consensus.tr b/tests/t5_consensus/cluster1_35_consensus.tr
new file mode 100644
index 0000000..84e16db
--- /dev/null
+++ b/tests/t5_consensus/cluster1_35_consensus.tr
@@ -0,0 +1 @@
+((((5_, 13), 19), (14, 3_)), (1_, 2_), (18, 6_), 7_);
diff --git a/vary_branchlen.sh b/vary_branchlen.sh
new file mode 100755
index 0000000..b358463
--- /dev/null
+++ b/vary_branchlen.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+set -e 
+if [ "$1" == "" ]; then echo "ERR: Two args required!"; exit 1; fi
+if [ "$2" == "" ]; then echo "ERR: Two args required!"; exit 1; fi
+
+taxa=$1
+dataset=$2
+# 12 settings:
+branchlens="0.0001 0.001 0.01 0.02 0.03 0.05 0.1 0.2 0.3 0.5 1.0 5.0"
+
+DOGRAPH="-g"
+
+echo Wiping "$dataset/phybin_output/" ...
+rm -rf "$dataset/phybin_output/"
+
+echo "[parallel] Starting jobs..."
+
+for bl in $branchlens; do
+    echo "Running with branch len $bl"
+    echo "=============================="
+    PHYBIN="./phybin.exe -b $bl $DOGRAPH -n $taxa"
+    outdir="$dataset/phybin_output/brlen$bl/"
+    LOG="$dataset/phybin_output/brlen$bl.log"
+
+    mkdir -p $outdir
+    
+    # ASSUMPTION1! We need the renaming table at a particular spot:
+    # ASSUMPTION2! We likewise expect the inputs in a certain spot.
+    RENAMER="$dataset/renaming_table.txt"
+    echo "  RUNNING:  $PHYBIN -m $RENAMER -s '_'  -o $outdir"
+    ($PHYBIN -m $RENAMER -s '_'  -o $outdir $dataset/final_trees/*BranchLab*.out | tee $LOG) &
+done
+
+echo "[parallel] Waiting for outstanding jobs"
+FAIL=0
+for job in `jobs -p`
+do
+echo $job
+    wait $job || let "FAIL+=1"
+done
+if [ "$FAIL" == "0" ];
+then
+echo "All jobs completed successfully"
+else
+echo "ERROR some jobs ($FAIL) failed!"
+exit 1 
+fi
diff --git a/website/trees.jpg b/website/trees.jpg
new file mode 100644
index 0000000..b5ad92c
Binary files /dev/null and b/website/trees.jpg differ
diff --git a/website/website.css b/website/website.css
new file mode 100644
index 0000000..521234b
--- /dev/null
+++ b/website/website.css
@@ -0,0 +1,74 @@
+body {
+    margin: auto;
+    padding-right: 1em;
+    padding-left: 1em;
+    max-width: 44em; 
+    border-left: 1px solid black;
+    border-right: 1px solid black;
+    color: black;
+    font-family: Verdana, sans-serif;
+    font-size: 100%;
+    line-height: 140%;
+    color: #333; 
+}
+pre {
+    border: 1px dotted gray;
+    background-color: #ececec;
+    color: #1111111;
+    padding: 0.5em;
+}
+code {
+    font-family: monospace;
+}
+h1 a, h2 a, h3 a, h4 a, h5 a { 
+    text-decoration: none;
+    color: #7a5ada; 
+}
+h1, h2, h3, h4, h5 { font-family: verdana;
+                     font-weight: bold;
+                     border-bottom: 1px dotted black;
+                     color: #7a5ada; }
+h1 {
+        font-size: 130%;
+}
+
+h2 {
+        font-size: 110%;
+}
+
+h3 {
+        font-size: 95%;
+}
+
+h4 {
+        font-size: 90%;
+        font-style: italic;
+}
+
+h5 {
+        font-size: 90%;
+        font-style: italic;
+}
+
+h1.title {
+        font-size: 200%;
+        font-weight: bold;
+        padding-top: 0.2em;
+        padding-bottom: 0.2em;
+        text-align: left;
+        border: none;
+}
+
+dt code {
+        font-weight: bold;
+}
+dd p {
+        margin-top: 0;
+}
+
+#footer {
+        padding-top: 1em;
+        font-size: 70%;
+        color: gray;
+        text-align: center;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/phybin.git



More information about the debian-med-commit mailing list