[med-svn] [inspect] 01/02: Imported Upstream version 0.0.20120109

Andreas Tille tille at debian.org
Wed Sep 30 15:18:22 UTC 2015


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository inspect.

commit 9f3a74855a2ee51615c80b105ff9164fd76db028
Author: Andreas Tille <tille at debian.org>
Date:   Wed Sep 30 17:12:25 2015 +0200

    Imported Upstream version 0.0.20120109
---
 AdjustPTM.py                         | 1869 +++++++++++++++
 AminoAcidMasses.txt                  |   22 +
 BN.c                                 |  204 ++
 BN.h                                 |   96 +
 BasicStats.py                        |  120 +
 BuildConsensusSpectrum.py            |  273 +++
 BuildInspect.py                      |  123 +
 BuildMGF.py                          |  126 +
 BuildMS2DB.c                         | 2101 +++++++++++++++++
 BuildMS2DB.h                         |   40 +
 BuildMS2DB.jar                       |  Bin 0 -> 182770 bytes
 CCSVM1.model                         |   44 +
 CCSVM1.range                         |   12 +
 CCSVM2.model                         |  118 +
 CCSVM2.range                         |   24 +
 CCSVM2Phos.model                     |  320 +++
 CCSVM2Phos.range                     |   25 +
 CMemLeak.c                           |  421 ++++
 CMemLeak.h                           |   43 +
 Ch2BNPEP.dat                         |  Bin 0 -> 3376 bytes
 Ch2BNPEPQ.dat                        |  Bin 0 -> 3376 bytes
 Ch3BNPEP.dat                         |  Bin 0 -> 3376 bytes
 Ch3BNPEPQ.dat                        |  Bin 0 -> 3376 bytes
 ChargeState.c                        |  899 ++++++++
 ChargeState.h                        |   65 +
 CombinePTMFeatures.py                |  627 +++++
 CompareHEKPTM.py                     |  808 +++++++
 ComputeFDR.jar                       |  Bin 0 -> 4364231 bytes
 ComputeFScore.py                     |  328 +++
 ComputePTMFeatures.py                |  943 ++++++++
 Database/CommonContaminants.fasta    |   20 +
 Database/TestDatabase.index          |  Bin 0 -> 3404 bytes
 Database/TestDatabase.trie           |    1 +
 Errors.c                             |  261 +++
 Errors.h                             |   88 +
 ExonGraphAlign.c                     | 1195 ++++++++++
 ExonGraphAlign.h                     |   40 +
 ExplainPTMs.py                       |  148 ++
 FDRUtils.py                          | 1109 +++++++++
 FreeMod.c                            | 2720 ++++++++++++++++++++++
 FreeMod.h                            |   91 +
 GetByteOffset.py                     |  169 ++
 Global.py                            |   64 +
 InVitroModifications.txt             |    7 +
 InVivoModifications.txt              |    9 +
 Inspect.exe                          |  Bin 0 -> 920576 bytes
 Inspect.h                            |  190 ++
 Inspect.sln                          |   19 +
 Inspect.vcproj                       |  566 +++++
 InspectToPepXML.py                   |  859 +++++++
 IonScoring.c                         | 1873 +++++++++++++++
 IonScoring.h                         |  195 ++
 IsotopePatterns.txt                  | 1750 ++++++++++++++
 LDA.c                                |  280 +++
 LDA.h                                |   59 +
 LDA.py                               |  469 ++++
 Label.py                             |  576 +++++
 Learning.py                          | 1276 ++++++++++
 MQScoreLDA2.model                    |  Bin 0 -> 636 bytes
 MQScoreLDA3.model                    |  Bin 0 -> 636 bytes
 MQScoreSVM2.model                    |  269 +++
 MQScoreSVM2.range                    |    9 +
 MQScoreSVM3.model                    |  282 +++
 MQScoreSVM3.range                    |    9 +
 MS2DB.c                              |  688 ++++++
 MS2DB.h                              |   45 +
 MS2DBShuffler.jar                    |  Bin 0 -> 178348 bytes
 MSSpectrum.py                        |  663 ++++++
 MakeImage.py                         |  623 +++++
 Makefile                             |   36 +
 Mods.c                               |  340 +++
 Mods.h                               |  110 +
 PLSUtils.py                          |  265 +++
 PMCLDA1.model                        |  Bin 0 -> 956 bytes
 PMCLDA2.model                        |  Bin 0 -> 2580 bytes
 PMCLDA2Phos.model                    |  Bin 0 -> 3188 bytes
 PMCLDA3.model                        |  Bin 0 -> 2580 bytes
 PMCLDA3Phos.model                    |  Bin 0 -> 3188 bytes
 PRM2.bn                              |  Bin 0 -> 10964 bytes
 PRM2.dat                             |  Bin 0 -> 2736 bytes
 PRM3.bn                              |  Bin 0 -> 10964 bytes
 PRM3.dat                             |  Bin 0 -> 2736 bytes
 PRMQ2.dat                            |  Bin 0 -> 2736 bytes
 PRMQ3.dat                            |  Bin 0 -> 2736 bytes
 PTMAnalysis.py                       |  523 +++++
 PTMChooserLM.py                      | 1294 +++++++++++
 PTMDatabase.txt                      |  563 +++++
 PTMSearchBigDB.py                    |  171 ++
 PTMods.txt                           |  105 +
 PValue.c                             |  662 ++++++
 PValue.h                             |   42 +
 ParentMass.c                         |  710 ++++++
 ParentMass.h                         |  105 +
 ParseInput.c                         | 1653 +++++++++++++
 ParseInput.h                         |   44 +
 ParseXML.c                           | 1239 ++++++++++
 ParseXML.h                           |   46 +
 ParseXML.py                          |  281 +++
 PhosCut2.bn                          |  Bin 0 -> 19740 bytes
 PhosCut3.bn                          |  Bin 0 -> 22240 bytes
 PhosphateLocalization.py             |  324 +++
 PrepDB.py                            |  283 +++
 ProteinGrouper.py                    |  471 ++++
 PyInspect.pyd                        |  Bin 0 -> 315392 bytes
 PyInspect/PyInspect.c                |  661 ++++++
 PyInspect/PySpectrum.c               | 1265 ++++++++++
 PyInspect/PySpectrum.h               |  145 ++
 PyInspect/PyUtils.c                  |   49 +
 PyInspect/PyUtils.h                  |   39 +
 PySVM.pyd                            |  Bin 0 -> 57344 bytes
 PySVM/PySVM.c                        |  327 +++
 PySVM/PySVM.sln                      |   21 +
 PySVM/PySVM.vcproj                   |  198 ++
 PySVM/svm-predict.c                  |  202 ++
 PySVM/svm.cpp                        | 3087 +++++++++++++++++++++++++
 PySVM/svm.h                          |   72 +
 ReleaseFiles.txt                     |  234 ++
 ReleasePyInspect.py                  |   67 +
 ReleasePySVM.py                      |   48 +
 ResultsParser.py                     |  152 ++
 Run.c                                | 1492 ++++++++++++
 Run.h                                |   41 +
 RunPySVM.py                          |   67 +
 SNP.c                                |  244 ++
 SNP.h                                |   63 +
 SVM.c                                |  644 ++++++
 SVM.h                                |   81 +
 Score.c                              |  862 +++++++
 Score.h                              |   85 +
 Score.py                             |   61 +
 ScoringModel.dat                     |  Bin 0 -> 1680 bytes
 Scorpion.c                           | 1304 +++++++++++
 Scorpion.h                           |  108 +
 SelectProteins.py                    |  397 ++++
 ShuffleDB.py                         |  285 +++
 SpectralSimilarity.py                |  502 ++++
 Spectrum.c                           | 1487 ++++++++++++
 Spectrum.h                           |  160 ++
 SpliceDB.c                           | 4212 ++++++++++++++++++++++++++++++++++
 SpliceDB.h                           |  150 ++
 SpliceScan.c                         | 1003 ++++++++
 SpliceScan.h                         |   39 +
 Spliced.c                            | 2113 +++++++++++++++++
 Spliced.h                            |  120 +
 StripPTM.py                          |  117 +
 Summary.py                           |  471 ++++
 SystemTest.py                        |  251 ++
 SystemTest/BuildSimpleChromosome.txt |    3 +
 SystemTest/Shew_Short.fasta          |   20 +
 SystemTest/Shew_dta.txt              | 1451 ++++++++++++
 SystemTest/SimpleChromosome.trie     |    1 +
 SystemTest/SimpleGenes.gff           |    5 +
 SystemTest/TestCDTA.txt              |    5 +
 SystemTest/TestInput.txt             |   26 +
 SystemTest/TestInputMod.txt          |    8 +
 SystemTest/TestInputTag1.txt         |    9 +
 SystemTest/TestInputTag3.txt         |    9 +
 SystemTest/TestMS2.txt               |    8 +
 SystemTest/TestPMC.txt               |    7 +
 SystemTest/TestSpectra.pkl           | 1773 ++++++++++++++
 SystemTest/TestSpectrum.dta          |  131 ++
 SystemTest/Yeast.ms2                 | 1149 ++++++++++
 SystemTest/YeastSmall.fasta          |   62 +
 TAG2.bn                              |  Bin 0 -> 15372 bytes
 TAG3.bn                              |  Bin 0 -> 15372 bytes
 TagFile.c                            |  493 ++++
 TagFile.h                            |   67 +
 TagSkewScores.dat                    |  Bin 0 -> 252 bytes
 Tagger.c                             | 2148 +++++++++++++++++
 Tagger.h                             |  199 ++
 TrainPTMFeatures.py                  |  762 ++++++
 Trie.c                               | 2659 +++++++++++++++++++++
 Trie.h                               |  309 +++
 TrieUtils.py                         |  256 +++
 Utils.c                              |  683 ++++++
 Utils.h                              |  345 +++
 Utils.py                             | 1074 +++++++++
 base64.c                             |  217 ++
 base64.h                             |    6 +
 docs/Analysis.html                   |   79 +
 docs/Copyright.html                  |   47 +
 docs/Database.html                   |   78 +
 docs/InspectTutorial.pdf             |  Bin 0 -> 120117 bytes
 docs/Installation.html               |   42 +
 docs/MS2DB.html                      |   51 +
 docs/PLSTutorial.pdf                 |  Bin 0 -> 61551 bytes
 docs/RunningInspectOnTheFWGrid.pdf   |  Bin 0 -> 27939 bytes
 docs/Searching.html                  |  128 ++
 docs/UnrestrictedSearchTutorial.pdf  |  Bin 0 -> 90009 bytes
 docs/index.html                      |   42 +
 main.c                               |  863 +++++++
 191 files changed, 73681 insertions(+)

diff --git a/AdjustPTM.py b/AdjustPTM.py
new file mode 100644
index 0000000..3cb9338
--- /dev/null
+++ b/AdjustPTM.py
@@ -0,0 +1,1869 @@
+#Title:          AdjustPTM.py
+#Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Merge and reconcile peptide species, after running ComputePTMFeatures
+and TrainPTMFeatures.  Iterate over peptide species from "best" to
+"worst".  For each species, consider whether there's another species
+which (1) is better, and either (2a) is the same after mod-shifting
+(including charge-state), or (2b) is compatible after mod-shifting.
+Case (2a): Try merging both species into one big cluster, determine
+whether the MQScore / PValue improves.  If so, keep the merge.
+Case (2b): Try shifting the inferior species to match the superior.
+If the MQScore / PValue improves (or at least stays CLOSE), then
+keep the shift.
+
+A note on directories:
+Consensus spectra and clusters can take quite a bit of disk space.  Running
+AdjustPTM changes spectra and clusters, but we'd like the liberty to
+re-run AdjustPTM.  Therefore, AdjustPTM uses a set of "adjusted" directories.
+It wipes these when it starts a merge/reconcile run; it writes to them; it
+reads clusters and spectra from these directories first, if possible.
+"""
+import os
+import struct
+import shutil
+import math
+import sys
+import MSSpectrum
+import string
+import traceback
+import getopt
+import cPickle
+import BuildConsensusSpectrum
+import Learning
+import PyInspect
+import ResultsParser
+import SpectralSimilarity
+from Utils import *
+Initialize()
+import TrainPTMFeatures
+from TrainPTMFeatures import FormatBits
+from TrainPTMFeatures import FeatureBits
+
+PROFILING_RUN = 0
+
+class AnticipatedChemistry:
+    """
+    Represents a chemical adduct which we expect to see often, with relatively low
+    site specificity.  Examples: M+16, *.Q-17.  We want to flag the adducts
+    to highlight the remaining NON-adducts!
+    """
+    def __init__(self):
+        # For the "allowed" members, None means "no restriction".
+        self.AllowedResidues = None
+        self.AllowedPrefix = None
+        self.AllowedSuffix = None
+        self.Terminus = None
+        self.Mass = 0
+        self.Name = ""
+
+class SiteClass:
+    """
+    Wrapper for one or more Species instances which represent the same modification-mass
+    at the same database-position.
+    """
+    def __init__(self):
+        self.ModDBPos = None
+        self.ModMass = None
+        self.SpeciesList = []
+    def __str__(self):
+        return "%+d on dbpos %s"%(self.ModMass, self.ModDBPos)
+    
+class PeptideSpeciesClass:
+    """
+    Represents one (modified) peptide species; different charge states are different
+    peptide species.
+    """
+    def __init__(self):
+        self.MergedFlag = 0
+        self.ConsensusSpectrum = None
+        self.ConsensusMSSpectrum = None
+        self.BestSpectrum = None
+        self.BestMSSpectrum = None
+        self.Bits = None
+        self.ConsensusModlessMSSpectrum = None
+    def __str__(self):
+        return "<peptide species: %s>"%self.Bits
+    def FreeCachedSpectra(self):
+        """
+        Discard our PySpectrum and MSSpectrum objects, because we can't hold ALL
+        such in memory at once:
+        """
+        self.BestSpectrum = None
+        self.BestMSSpectrum = None
+        self.ConsensusSpectrum = None
+        self.ConsensusMSSpectrum = None
+        self.ConsensusModlessMSSpectrum = None
+    def GetBestSpectrum(self, Master):
+        if self.BestSpectrum:
+            return self.BestSpectrum
+        FilePath = self.Bits[FormatBits.BestSpectrumPath]
+        ColonBits = FilePath.split(":")
+        try:
+            FilePos = int(ColonBits[-1])
+            FilePath = string.join(ColonBits[:-1], ":")
+        except:
+            FilePos = 0
+        FilePath = Master.FixSpectrumPath(FilePath)
+        self.BestSpectrum = PyInspect.Spectrum(FilePath, FilePos)
+        return self.BestSpectrum
+    def GetMemberListStr(self, Master):
+        Path = os.path.join(Master.ClusterScanListDirAdjusted, self.Annotation[2], "%s.%s.txt"%(self.Annotation, self.Charge))
+        if not os.path.exists(Path):
+            Path = os.path.join(Master.ClusterScanListDir, self.Annotation[2], "%s.%s.txt"%(self.Annotation, self.Charge))
+        if not os.path.exists(Path):
+            # Punt!
+            return ""
+        File = open(Path, "rb")
+        Text = File.read()
+        File.close()
+        return Text
+    def GetConsensusSpectrumPath(self, Master):
+        Path = os.path.join(Master.ConsensusSpectraDirAdjusted, self.Annotation[2], "%s.%s.dta"%(self.Annotation, self.Charge))
+        if not os.path.exists(Path):
+            Path = os.path.join(Master.ConsensusSpectraDir, self.Annotation[2], "%s.%s.dta"%(self.Annotation, self.Charge))
+        return Path
+    def GetConsensusSpectrum(self, Master):
+        if self.ConsensusSpectrum:
+            return self.ConsensusSpectrum
+        Path = self.GetConsensusSpectrumPath(Master)
+        self.ConsensusSpectrum = PyInspect.Spectrum(Path, 0)
+        return self.ConsensusSpectrum
+    def GetConsensusMSSpectrum(self, Master):
+        if self.ConsensusMSSpectrum:
+            return self.ConsensusMSSpectrum
+        Path = self.GetConsensusSpectrumPath(Master)
+        self.ConsensusMSSpectrum = MSSpectrum.SpectrumClass()
+        self.ConsensusMSSpectrum.ReadPeaks(Path)
+        self.ConsensusMSSpectrum.FilterPeaks()
+        self.ConsensusMSSpectrum.RankPeaksByIntensity()
+        return self.ConsensusMSSpectrum
+    def GetConsensusModlessMSSpectrum(self, Master):
+        if self.ConsensusModlessMSSpectrum:
+            return self.ConsensusModlessMSSpectrum
+        Path = os.path.join(Master.ConsensusSpectraDirAdjusted, self.ModlessAnnotation[2], "%s.%s.dta"%(self.ModlessAnnotation, self.Charge))
+        if not os.path.exists(Path):
+            Path = os.path.join(Master.ConsensusSpectraDir, self.ModlessAnnotation[2], "%s.%s.dta"%(self.ModlessAnnotation, self.Charge))
+        self.ConsensusModlessMSSpectrum = MSSpectrum.SpectrumClass()
+        self.ConsensusModlessMSSpectrum.ReadPeaks(Path)
+        self.ConsensusModlessMSSpectrum.FilterPeaks()
+        self.ConsensusModlessMSSpectrum.RankPeaksByIntensity()
+        return self.ConsensusModlessMSSpectrum
+    def ParseBits(self, Bits):
+        self.Bits = Bits
+        self.Annotation = Bits[FormatBits.Peptide]
+        self.Peptide = GetPeptideFromModdedName(self.Annotation)
+        self.ModlessAnnotation = "%s.%s.%s"%(self.Peptide.Prefix, self.Peptide.Aminos, self.Peptide.Suffix)
+        self.Charge = int(Bits[FormatBits.Charge])
+        self.ModDBPos = int(Bits[FormatBits.DBPos])
+        ModIndex = self.Peptide.Modifications.keys()[0]
+        self.DBPos = self.ModDBPos - ModIndex
+        self.ModMass = self.Peptide.Modifications[ModIndex][0].Mass
+        self.ModAA = self.Peptide.Aminos[ModIndex]
+        self.DBEnd = self.DBPos + len(self.Peptide.Aminos)
+        self.ConsensusMQScore = float(Bits[FormatBits.ConsensusMQScore])
+        try:
+            self.ModelScore = float(Bits[FormatBits.ModelScore])
+            self.PValue = float(Bits[FormatBits.ModelPValue])
+        except:
+            self.ModelScore = None
+            self.PValue = None
+        # Parse old features:
+        self.Features = []
+        for FeatureIndex in range(FormatBits.FirstFeature, FormatBits.LastFeature + 1):
+            try:
+                self.Features.append(float(Bits[FeatureIndex]))
+            except:
+                self.Features.append(0)
+        self.ComputePrefixes()
+    def ComputePrefixes(self):
+        # self.Prefixes[DBPos] is the mass that this species accumulates
+        # *before* the specified residue.  Examples:
+        # ~ Species.Prefixes[Species.DBPos] = 0 always,
+        # ~ Species.Prefix[Species.DBPos + 1] is equal to the mass (with modification, if any)
+        #    of the first residue
+        self.Prefixes = {}
+        self.Suffixes = {}
+        ParentMass = self.Peptide.Masses[-1] + 19
+        AccumulatedMass = 0
+        for Pos in range(len(self.Peptide.Aminos)):
+            self.Prefixes[self.DBPos + Pos] = AccumulatedMass
+            self.Suffixes[self.DBPos + Pos] = ParentMass - AccumulatedMass
+            AccumulatedMass += GetMass(self.Peptide.Aminos[Pos])
+            for Mod in self.Peptide.Modifications.get(Pos, []):
+                AccumulatedMass += Mod.Mass
+
+class PTMAdjuster(ResultsParser.SpectrumOracleMixin):
+    def __init__(self):
+        self.HeaderLines = []
+        self.CompatibilityTolerance = 3
+        self.CachedClusterPath = None
+        self.ConsensusClusterDir = "PTMScore\\Lens-99-10\\Cluster" # default
+        self.ConsensusSpectraDir = "PTMScore\\Lens-99-10\\Spectra" # default
+        self.SortByModel = 1
+        self.PeptideDict = {} # keys: (Annotation, Charge)
+        self.KnownChemistryFileName = None
+        self.OutputModelFileName2 = None
+        self.OutputModelFileName3 = None
+        self.DBStart = None
+        self.DBEnd = None
+        self.SpectrumRoot = None
+        self.CheckDirectoriesFlag = 0
+        self.MergeBlockRunsFlag = 0
+        self.KnownPTMVerboseOutputFileName = None
+        self.MaxPeptideWindowWidth = 2500
+        ResultsParser.SpectrumOracleMixin.__init__(self)
+    def PerformMergeReconcileOnWindow(self, PerformMergeFlag):
+        ###############################################################
+        # Consider merging/reconciling these peptides:
+        SortedList = []
+        self.PeptideDict = {}
+        for Species in self.WindowPeptides:
+            if self.SortByModel:
+                SortedList.append((Species.ModelScore, Species))
+            else:
+                SortedList.append((Species.ConsensusMQScore, Species))
+            Key = (Species.Annotation, Species.Charge)
+            self.PeptideDict[Key] = Species
+        # SortedList lists species from BEST to WORST.
+        SortedList.sort()
+        SortedList.reverse()
+        # Dual iteration over the peptides from the window: Species A has the
+        # lower score, species B has the higher score.
+        # Consider reconciling species A to species B:
+        for IndexA in range(len(SortedList)):
+            (ScoreA, SpeciesA) = SortedList[IndexA]
+            Str = "(%s/%s) %s %s"%(IndexA, len(SortedList), SpeciesA.Charge, SpeciesA.Annotation)
+            if PerformMergeFlag:
+                print "M", Str
+            else:
+                print "C", Str
+            if SpeciesA.MergedFlag:
+                # A has already been merged into another species.
+                continue
+            for IndexB in range(IndexA):
+                (ScoreB, SpeciesB) = SortedList[IndexB]
+                if SpeciesB.MergedFlag:
+                    # B has already been merged into another species.
+                    continue
+                # Compatibility checks.
+                # Charge must be the same in order to MERGE (but not to RECONCILE):
+                if SpeciesA.Charge != SpeciesB.Charge and PerformMergeFlag:
+                    continue
+                # Peptides must overlap:
+                if SpeciesA.DBEnd <= SpeciesB.DBPos or SpeciesB.DBEnd <= SpeciesA.DBPos:
+                    continue
+                # To reconcile, Peptide A must cover the DBposition which is modified in B:
+                if SpeciesB.ModDBPos >= SpeciesA.DBEnd or SpeciesB.ModDBPos < SpeciesA.DBPos:
+                    if not PerformMergeFlag:
+                        continue
+                # First, look for a MERGE:
+                # Prefix and suffix must be the same at some point:
+                SamePrefixSuffix = 0
+                for DBPos in SpeciesA.Prefixes.keys():
+                    PMassA = SpeciesA.Prefixes[DBPos]
+                    PMassB = SpeciesB.Prefixes.get(DBPos, -9999)
+                    if abs(PMassA - PMassB) >= self.CompatibilityTolerance:
+                        continue
+                    SMassA = SpeciesA.Suffixes[DBPos]
+                    SMassB = SpeciesB.Suffixes.get(DBPos, -9999)
+                    if abs(SMassA - SMassB) >= self.CompatibilityTolerance:
+                        continue
+                    SamePrefixSuffix = 1
+                    break
+                if SamePrefixSuffix and SpeciesA.Charge == SpeciesB.Charge:
+                    # Merge is possible.  If this is first-cycle, then do a merge;
+                    # if not, then continue.
+                    if PerformMergeFlag:
+                        MergeFlag = self.AttemptMerge(SpeciesA, SpeciesB) # A into B
+                        if MergeFlag:
+                            SpeciesB.FreeCachedSpectra()
+                            break
+                        # We didn't merge A into B.  But perhaps we can merge B into A!
+                        MergeFlag = self.AttemptMerge(SpeciesB, SpeciesA, 1) # B into A
+                        if MergeFlag:
+                            SpeciesB.FreeCachedSpectra()
+                            break
+                    continue
+                # Merge is impossible.  If this is first-cycle, bail out:
+                if PerformMergeFlag:
+                    continue
+                # If species A and B are already compatible, then there's nothing to do:
+                if SpeciesA.ModDBPos == SpeciesB.ModDBPos and SpeciesA.ModMass == SpeciesB.ModMass:
+                    print "(Already reconciled to %s)"%Species.Annotation
+                    continue
+                # Perhaps A could be conformed to B
+                # if the modification-masses are similar (possibly after
+                # an endpoint shift):
+                if SpeciesA.DBPos < SpeciesB.DBPos:
+                    ExtraPrefixA = GetMass(self.DB[SpeciesA.DBPos:SpeciesB.DBPos])
+                else:
+                    ExtraPrefixA = 0
+                if SpeciesB.DBPos < SpeciesA.DBPos:
+                    ExtraPrefixB = GetMass(self.DB[SpeciesB.DBPos:SpeciesA.DBPos])
+                else:
+                    ExtraPrefixB = 0
+                if SpeciesA.DBEnd > SpeciesB.DBEnd:
+                    ExtraSuffixA = GetMass(self.DB[SpeciesB.DBEnd:SpeciesA.DBEnd])
+                else:
+                    ExtraSuffixA = 0
+                if SpeciesB.DBEnd > SpeciesA.DBEnd:
+                    ExtraSuffixB = GetMass(self.DB[SpeciesA.DBEnd:SpeciesB.DBEnd])
+                else:
+                    ExtraSuffixB = 0
+                # VERBOSE:
+                for DBPos in SpeciesA.Prefixes.keys():
+                    PMassA = SpeciesA.Prefixes[DBPos] + ExtraPrefixA
+                    PMassB = SpeciesB.Prefixes.get(DBPos, -9999) - ExtraPrefixB
+                    SMassA = SpeciesA.Suffixes[DBPos] + ExtraSuffixA
+                    SMassB = SpeciesB.Suffixes.get(DBPos, -9999) - ExtraSuffixB
+                    #print "DBPos %s: PreA %s PreB %s (%s)"%(DBPos, PMassA, PMassB, abs(PMassA-PMassB))
+                    #print "   PostA %s PostB %s (%s)"%(SMassA, SMassB, abs(SMassA-SMassB))
+                    if abs(PMassA - PMassB) >= self.CompatibilityTolerance:
+                        continue
+                    if abs(SMassA - SMassB) >= self.CompatibilityTolerance:
+                        continue
+                    SamePrefixSuffix = 1
+                    break
+                if not SamePrefixSuffix:
+                    # irreconcilable, move on to try the next species:
+                    continue
+                self.ReconcileDetailOutput.write("%s\t%s\t%s\t%s\t\n"%(SpeciesA.ModDBPos, SpeciesB.ModDBPos, SpeciesA.ModMass, SpeciesB.ModMass))
+                self.ReconcileDetailOutput.write("%s\t%s\t%s\t\t%s\t%s\t%s\t\n"%(SpeciesA.Annotation, SpeciesA.DBPos, SpeciesA.DBEnd, SpeciesB.Annotation, SpeciesB.DBPos, SpeciesB.DBEnd))                    
+                # Species A *could* be reconciled with B.
+                Result = self.AttemptReconcile(SpeciesA, SpeciesB)
+                SpeciesB.FreeCachedSpectra()
+                if Result:
+                    # We reconciled to B.  Stop now, don't re-reconcile to
+                    # another (INFERIOR) species:
+                    break                    
+            SpeciesA.FreeCachedSpectra()        
+    def PerformAllMerges(self, PerformMergeFlag = 1):
+        """
+        Workhorse for the merge and reconcile procedure.  Double-loop over sites,
+        from high to low scoring.  Consider either MERGING or RECONCILING the
+        low-scoring site to match the high-scoring site.
+        """
+        self.HeaderLines = []
+        self.ModTypeSpectrumCount = {}
+        self.ModTypeSiteCount = {}
+        # A list of peptides within our 'window'.  At each iteration,
+        # we read peptides until we hit eof, hit a new protein-name, or hit a peptide 2500 characters
+        # past the first one.  Then we attempt reconciliation/merging for peptides
+        # which are "covered" by the window.  Then we advance the window.
+        self.WindowPeptides = []
+        EOFFlag = 0
+        WroteHeaderFlag = 0
+        NextProteinFirstPeptide = None
+        while 1:
+            if NextProteinFirstPeptide:
+                self.WindowPeptides = [NextProteinFirstPeptide]
+                NextProteinFirstPeptide = None
+            if not len(self.WindowPeptides):
+                WindowStart = 0
+                WindowEnd = 0
+                CurrentProteinName = None
+                if EOFFlag:
+                    break
+            else:
+                WindowStart = self.WindowPeptides[0].DBPos
+                WindowEnd = self.WindowPeptides[-1].DBEnd
+                CurrentProteinName = self.WindowPeptides[0].ProteinName
+            ###############################################################
+            # Parse some more peptides:
+            while (not EOFFlag) and (WindowEnd < WindowStart + self.MaxPeptideWindowWidth):
+                FileLine = self.InputFile.readline()
+                if not FileLine:
+                    EOFFlag = 1
+                    break
+                if FileLine[0] == "#":
+                    self.HeaderLines.append(FileLine)
+                    continue # skip comment line
+                FileLine = FileLine.replace("\r", "").replace("\n", "")
+                if not FileLine.strip():
+                    continue # skip blank line
+                Bits = FileLine.split("\t")
+                try:
+                    Species = PeptideSpeciesClass()
+                    Species.ParseBits(Bits)
+                    Species.ProteinName = Bits[FormatBits.ProteinName]
+                except:
+                    traceback.print_exc()
+                    print Bits
+                    continue
+                # SKIP the species if it falls outside our block:
+                if self.DBStart != None and Species.DBPos < self.DBStart:
+                    continue
+                if self.DBEnd != None and Species.DBPos >= self.DBEnd:
+                    continue
+                # If the species comes from a NEW protein, finish the window and save the new species
+                # for next iteration:
+                #print "CurrentProtein '%s', new species protein '%s'"%(CurrentProteinName, Species.ProteinName)
+                if CurrentProteinName == None:
+                    # We have no current-protein.  Start the list:
+                    CurrentProteinName = Species.ProteinName
+                    WindowStart = Species.DBPos
+                else:
+                    # Check whether this species matches the current protein:
+                    if Species.ProteinName != CurrentProteinName:
+                        NextProteinFirstPeptide = Species
+                        break
+                self.WindowPeptides.append(Species)
+                WindowStart = min(WindowStart, Species.DBPos)
+                WindowEnd = max(WindowEnd, Species.DBEnd)
+            ###############################################################
+            print "->Handling %s peptides in range %s...%s\n  %s"%(len(self.WindowPeptides), WindowStart, WindowEnd, CurrentProteinName)
+            self.PerformMergeReconcileOnWindow(PerformMergeFlag)
+            ###############################################################
+            # Re-sort the window peptides, so that sites fall together:
+            SortedList = []
+            for Peptide in self.WindowPeptides:
+                SortedList.append((Peptide.ModDBPos, Peptide.ModMass, Peptide))
+            SortedList.sort()
+            self.WindowPeptides = []
+            for Tuple in SortedList:
+                self.WindowPeptides.append(Tuple[-1])
+            ###############################################################
+            # Write file header now, if we haven't:
+            if not WroteHeaderFlag:
+                for HeaderLine in self.HeaderLines:
+                    self.OutputFile.write(HeaderLine)
+                WroteHeaderFlag = 1
+            ###############################################################                
+            # Write out, and free, peptides in (the early portion of) the window:
+            PeptideIndex = 0
+            while PeptideIndex < len(self.WindowPeptides):
+                Species = self.WindowPeptides[PeptideIndex]
+                ###print "  Species %s of %s: %s-%s (window %s-%s)"%(PeptideIndex, len(self.WindowPeptides), Species.DBPos, Species.DBEnd, WindowStart, WindowEnd)
+                if Species.DBEnd > WindowEnd - 100 and (not EOFFlag and not NextProteinFirstPeptide):
+                    ###print "  ->Leave it in the window"
+                    PeptideIndex += 1
+                    continue
+                # This peptide can be dropped from the list.
+                if Species.MergedFlag:
+                    pass
+                else:
+                    # Update Species.Bits to reflect changes to Species.Features:
+                    for Index in range(FormatBits.FirstFeature, FormatBits.LastFeature + 1):
+                        Species.Bits[Index] = str(Species.Features[Index - FormatBits.FirstFeature])
+                    String = string.join(Species.Bits, "\t")
+                    self.OutputFile.write(String + "\n")
+                    ModTypeKey = (Species.ModAA, Species.ModMass)
+                    # Note NOW the number of sites and spectra for each modification-type.
+                    self.ModTypeSiteCount[ModTypeKey] = self.ModTypeSiteCount.get(ModTypeKey, 0) + 1
+                    self.ModTypeSpectrumCount[ModTypeKey] = self.ModTypeSpectrumCount.get(ModTypeKey, 0) + Species.Features[FeatureBits.SpectrumCount]
+                ###print "  ->Drop it from the window"
+                del self.WindowPeptides[PeptideIndex]
+    def GetSiteScore(self, Site):
+        Site.PValue = 1.0
+        SortedSpeciesScores = []
+        for Species in Site.SpeciesList:
+            SortedSpeciesScores.append(Species.ModelScore)
+            if Species.Charge > 2:
+                Species.PValue = self.Model3.GetPValue(Species.ModelScore)
+            else:
+                Species.PValue = self.Model2.GetPValue(Species.ModelScore)
+            Site.PValue *= Species.PValue
+        SortedSpeciesScores.sort()
+        SortedSpeciesScores.reverse()
+        SiteScore = [-math.log(Site.PValue)]
+        SiteScore.extend(SortedSpeciesScores)
+        return SiteScore
+    def LoadKnownModifications(self):
+        """
+        Parse the KnownPTM file, to get a list of chemical events which we
+        already have a (hypothetical) annotation for.
+        """
+        if not self.KnownChemistryFileName:
+            return
+        self.KnownPTMs = []
+        self.KnownPTMByMass = {}
+        File = open(self.KnownChemistryFileName, "rb")
+        # Load one PTM from each line of the file.
+        # Line format:
+        # Mass, name, AllowedResidues, Terminus, AllowedPrefix, AllowedSuffix
+        # Example: -17, pyroglutamate formation, Q, N, "", ""
+        LineNumber = 0
+        for FileLine in File.xreadlines():
+            LineNumber += 1
+            Bits = list(FileLine.strip().split("\t"))
+            if len(Bits) == 1:
+                continue
+            if FileLine[0] == "#":
+                continue
+            while len(Bits)<6:
+                Bits.append("")
+            try:
+                PTM = AnticipatedChemistry()
+                PTM.Mass = int(Bits[0])
+            except:
+                print "** Skipping invalid line %s of %s"%(LineNumber, self.KnownChemistryFileName)
+                print Bits
+                continue
+            PTM.Name = Bits[1]
+            if len(Bits[2]) == 0 or Bits[2][0] == "*":
+                PTM.AllowedResidues = None
+            else:
+                PTM.AllowedResidues = Bits[2]
+            if len(Bits[3].strip()):
+                PTM.Terminus = Bits[3].strip()
+            if len(Bits[4].strip()):
+                PTM.AllowedPrefix = Bits[4].strip()
+            if len(Bits[5].strip()):
+                PTM.AllowedSuffix = Bits[5].strip()
+            self.KnownPTMs.append(PTM)
+            if not self.KnownPTMByMass.has_key(PTM.Mass):
+                self.KnownPTMByMass[PTM.Mass] = []
+            self.KnownPTMByMass[PTM.Mass].append(PTM)
+        File.close()
+        print "Loaded %s known PTMs from '%s'."%(len(self.KnownPTMs), self.KnownChemistryFileName)
+    def AttemptKnownPTM(self, Site):
+        InitialScore = self.GetSiteScore(Site)
+        # Initialize known-ptm information for each species:
+        for Species in Site.SpeciesList:
+            Species.BestKnownPTMAnnotation = ""
+            Species.BestKnownPTMAnnotationName = ""
+            Species.BestKnownPTMAnnotationPValue = ""
+            Species.BestKnownPTMAnnotationScore = ""
+            Species.BestKnownPTMAnnotationSitePValue = ""
+        # Loop over species to find the allowed database residues.
+        ResidueCounts = {}
+        ResidueInitialCounts = {}
+        for Species in Site.SpeciesList:
+            for Pos in range(Species.DBPos, Species.DBPos + len(Species.Peptide.Aminos)):
+                ResidueCounts[Pos] = ResidueCounts.get(Pos, 0) + 1
+            ResidueInitialCounts[Species.DBPos] = ResidueInitialCounts.get(Species.DBPos, 0) + 1
+        BestEditedSiteScore = None
+        self.KPTMVerbose.write("\n===============================\n")
+        self.KPTMVerbose.write("Site %s initial score (%.3f, %.3f)\n"%(Site, InitialScore[0], InitialScore[1]))
+        Residues = ResidueCounts.items()
+        FirstCoveredResidue = min(ResidueCounts.keys())
+        LastCoveredResidue = max(ResidueCounts.keys())
+        ###############################################################################
+        # Consider shifting the modification to any (legal) residue, with any legal mass:
+        ModMass = Site.SpeciesList[0].ModMass
+        # Decide which endpoint-shifts we'll considered.
+        # If the peptides don't all share the same N-terminus, then shifting the
+        # N-terminus isn't allowed.  (That would perforce SPLIT this site!)
+        Shifts = [None]
+        if ResidueCounts[FirstCoveredResidue] == len(Site.SpeciesList):
+            Shifts.extend(["N+1", "N+2"])
+            if FirstCoveredResidue > 0:
+                Shifts.append("N-1")
+            if FirstCoveredResidue > 1:
+                Shifts.append("N-2")
+        if ResidueCounts[LastCoveredResidue] == len(Site.SpeciesList):
+            Shifts.extend(["C-2", "C-1"])
+            if LastCoveredResidue < len(self.DB) - 1:
+                Shifts.append("C+1")
+            if LastCoveredResidue < len(self.DB) - 2:
+                Shifts.append("C+2")
+        for Shift in Shifts:
+            # CoreMass is equal to the modified mass, possibly modified due to
+            # endpoint shifts:
+            CoreMass = ModMass
+            # AllowedDBList is a list of the database positions where the ptm could
+            # be attached.  We've already selected the range the peptide will cover,
+            # but the PTM could fall on various residues:
+            AllowedDBList = list(range(FirstCoveredResidue, LastCoveredResidue + 1))
+            if Shift in ("N-1", "N-2"):
+                AllowedDBList.append(FirstCoveredResidue - 1)
+                DropMass = Global.AminoMass.get(self.DB[FirstCoveredResidue - 1], None)
+                if not DropMass:
+                    continue
+                CoreMass -= DropMass
+            if Shift == "N-2":
+                AllowedDBList.append(FirstCoveredResidue - 2)
+                DropMass = Global.AminoMass.get(self.DB[FirstCoveredResidue - 2], None)
+                if not DropMass:
+                    continue
+                CoreMass -= DropMass
+            if Shift in ("N+1", "N+2"):
+                AllowedDBList.remove(FirstCoveredResidue)
+                CoreMass += Global.AminoMass.get(self.DB[FirstCoveredResidue], None)
+            if Shift == "N+2":
+                AllowedDBList.remove(FirstCoveredResidue + 1)
+                CoreMass += Global.AminoMass.get(self.DB[FirstCoveredResidue + 1], None)
+            if Shift in ("C+1", "C+2"):
+                AllowedDBList.append(LastCoveredResidue + 1)
+                DropMass = Global.AminoMass.get(self.DB[LastCoveredResidue + 1], None)
+                if not DropMass:
+                    continue
+                CoreMass -= DropMass
+            if Shift == "C+2":
+                AllowedDBList.append(LastCoveredResidue + 2)
+                DropMass = Global.AminoMass.get(self.DB[LastCoveredResidue + 2], None)
+                if not DropMass:
+                    continue
+                CoreMass -= DropMass
+            if Shift in ("C-1", "C-2"):
+                AllowedDBList.remove(LastCoveredResidue)
+                CoreMass += Global.AminoMass.get(self.DB[LastCoveredResidue], None)
+            if Shift == "C-2":
+                AllowedDBList.remove(LastCoveredResidue - 1)
+                CoreMass += Global.AminoMass.get(self.DB[LastCoveredResidue - 1], None)
+            if CoreMass < -250 or CoreMass > 250:
+                continue
+            CoreMass = int(round(CoreMass))
+            ShiftablePeptides = []
+            for DBPos in AllowedDBList:
+                TryMassList = (CoreMass - 2, CoreMass - 1, CoreMass, CoreMass + 1, CoreMass + 2)
+                for NearMass in TryMassList:
+                    KnownPTMList = self.KnownPTMByMass.get(NearMass, [])
+                    for KnownPTM in KnownPTMList:
+                        # Determine whether this is a legal PTM placement.
+                        # The amino acid type must be valid:
+                        if KnownPTM.AllowedResidues != None and self.DB[DBPos] not in KnownPTM.AllowedResidues:
+                            continue
+                        # The terminus must be valid (for at least one peptide species):
+                        if KnownPTM.Terminus in ("N", "^"):
+                            if DBPos > FirstCoveredResidue:
+                                continue
+                        # The prefix and suffix residues must be valid:
+                        if DBPos:
+                            PrefixAA = self.DB[DBPos - 1]
+                        else:
+                            PrefixAA = "-"
+                        if DBPos < len(self.DB) - 1:
+                            SuffixAA = self.DB[DBPos + 1]
+                        else:
+                            SuffixAA = "-"
+                        if KnownPTM.AllowedPrefix != None and PrefixAA not in KnownPTM.AllowedPrefix:
+                            continue
+                        if KnownPTM.AllowedSuffix != None and SuffixAA not in KnownPTM.AllowedSuffix:
+                            continue
+                        ############################################################
+                        # Okay, this is a LEGAL placement.  Determine its score:
+                        Score = self.TryShiftedSite(Site, NearMass, DBPos, KnownPTM, Shift)
+                        if Score > BestEditedSiteScore:
+                            BestEditedSiteScore = Score
+                            self.RememberOptimalKnownPTM(Site)
+            # Now, for this shift, let's try no modification at all...if
+            # our mass is not too large.  Many spectra, especially from LTQ
+            # data-sets, have spurious +6 PTMs:
+            if abs(CoreMass) < 10.0:
+                Score = self.TryShiftedSite(Site, 0, DBPos, None, Shift)
+                if Score > BestEditedSiteScore:
+                    BestEditedSiteScore = Score
+                    self.RememberOptimalKnownPTM(Site)
+        ###############################################
+        # Loop over modification SHIFTS, POSITIONS and MASSES is now complete.
+        # Clean up memory usage:
+        for Species in Site.SpeciesList:
+            Species.FreeCachedSpectra()
+        # Edit the species bits, for output:
+        for Species in Site.SpeciesList:
+            while len(Species.Bits) <= FormatBits.KnownPTMSitePValue:
+                Species.Bits.append("")
+            Species.Bits[FormatBits.KnownPTMName] = Species.BestKnownPTMAnnotationName
+            Species.Bits[FormatBits.KnownPTMAnnotation] = Species.BestKnownPTMAnnotation
+            Species.Bits[FormatBits.KnownPTMScore] = str(Species.BestKnownPTMAnnotationScore)
+            Species.Bits[FormatBits.KnownPTMSitePValue] = str(Species.BestKnownPTMAnnotationSitePValue)
+        # Verbose output:
+        if BestEditedSiteScore:
+            # Let stdout know what we're up to:
+            Species = Site.SpeciesList[0]
+            self.KPTMVerbose.write("Result: PValue %.3f (versus %.3f)\n"%(Species.BestKnownPTMAnnotationSitePValue, Site.PValue))
+            ScoreDiff = -math.log(Species.BestKnownPTMAnnotationSitePValue) + math.log(Site.PValue)
+            self.KPTMVerbose.write("==>Score change: %s\n"%ScoreDiff)
+            for Species in Site.SpeciesList:
+                self.KPTMVerbose.write("  %s (original)\n"%Species.Annotation)
+                self.KPTMVerbose.write("  %s (%s)\n"%(Species.BestKnownPTMAnnotation, Species.BestKnownPTMAnnotationName))
+                self.KPTMVerbose.write("Score %s (vs %s)\n"%(Species.BestKnownPTMAnnotationScore, Species.ModelScore))
+        ###############################################
+        # And now, we can output the site:
+        for Species in Site.SpeciesList:
+            Str = string.join(Species.Bits, "\t")
+            self.OutputFile.write(Str + "\n")
+    def TryShiftedSite(self, Site, ModMass, ModDBPos, KnownPTM, Shift):
+        """
+        Try shifting this modification site to the specified database position
+        and mass.  Consider the effects on the peptide-score of each peptide.
+        Return the resulting site-score.
+        The value of Shift can be None, N-1, N-2, N+1, N+2, C-1, C-2, C+1, C+2.
+        """
+        SitePValue = 1.0
+        SortedSpeciesScores = []
+        #print "try shift to %+d on %s%s shift %s (%s)"%(ModMass, self.DB[ModDBPos], ModDBPos, Shift, KnownPTM.Name)
+        for Species in Site.SpeciesList:
+            if Species.Charge > 2:
+                Model = self.Model3
+            else:
+                Model = self.Model2
+            # Default "null" values:
+            Species.KnownPTMAnnotation = ""
+            Species.KnownPTMAnnotationScore = ""
+            Species.KnownPTMAnnotationPValue = ""
+            Species.KnownPTMAnnotationName = ""
+            DBStart = Species.DBPos
+            DBEnd = Species.DBEnd
+            if Shift == "N-1":
+                DBStart -= 1
+            elif Shift == "N-2":
+                DBStart -= 2
+            elif Shift == "N+1":
+                DBStart += 1
+            elif Shift == "N+2":
+                DBStart += 2
+            elif Shift == "C-1":
+                DBEnd -= 1
+            elif Shift == "C-2":
+                DBEnd -= 2
+            elif Shift == "C+1":
+                DBEnd += 1
+            elif Shift == "C+2":
+                DBEnd += 2
+            if DBEnd <= ModDBPos or DBStart > ModDBPos:
+                continue
+            if KnownPTM and KnownPTM.Terminus in ("N", "^") and ModDBPos != DBStart:
+                continue
+            EditedFeatures = Species.Features[:]
+            BestSpectrum = Species.GetBestSpectrum(self)
+            NewAminos = self.DB[DBStart:DBEnd]
+            NewPrefix = self.DB[DBStart - 1]
+            NewSuffix = self.DB[DBEnd]
+            ModPos = ModDBPos - DBStart
+            if ModMass == 0:
+                NewAnnotation = NewAminos
+            else:
+                NewAnnotation = "%s%+d%s"%(NewAminos[:ModPos + 1], ModMass, NewAminos[ModPos + 1:])
+            NewAnnotation = "%s.%s.%s"%(NewPrefix, NewAnnotation, NewSuffix)
+            if NewAnnotation == Species.Annotation:
+                # Shortcut - the annotation hasn't changed, so neither will the score!
+                if Species.Charge > 2:
+                    PValue = self.Model3.GetPValue(Species.ModelScore)
+                else:
+                    PValue = self.Model2.GetPValue(Species.ModelScore)
+                SortedSpeciesScores.append(Species.ModelScore)
+                SitePValue *= PValue
+                # Store these temp values; if the site is GOOD then we'll edit the species:
+                Species.KnownPTMAnnotation = NewAnnotation
+                if KnownPTM:
+                    Species.KnownPTMAnnotationName = KnownPTM.Name
+                else:
+                    Species.KnownPTMAnnotationName = "unmodified"
+                Species.KnownPTMAnnotationScore = Species.ModelScore
+                Species.KnownPTMAnnotationPValue = PValue
+                continue
+            # Best spectrum score:
+            Tuple = BestSpectrum.ScorePeptideDetailed(NewAnnotation)
+            EditedFeatures[FeatureBits.BestMQScore] = Tuple[0]
+            #EditedFeatures[FeatureBits.BestMQScore] = BestSpectrum.ScorePeptide(NewAnnotation)
+            # Delta-score:
+            ScoreDiff = EditedFeatures[FeatureBits.BestMQScore] - Species.Features[FeatureBits.BestMQScore]
+            EditedFeatures[FeatureBits.BestDeltaScore] += ScoreDiff
+            # Consensus spectrum score:
+            #print Species
+            ConsensusSpectrum = Species.GetConsensusSpectrum(self)
+            ScoreInfo = ConsensusSpectrum.ScorePeptideDetailed(NewAnnotation)
+            EditedFeatures[FeatureBits.ConsensusMQScore] = ScoreInfo[0]
+            ScoreDiff = EditedFeatures[FeatureBits.ConsensusMQScore] - Species.Features[FeatureBits.ConsensusMQScore]
+            EditedFeatures[FeatureBits.DeltaVsBigDB] += ScoreDiff
+            EditedFeatures[FeatureBits.PeptideLength] = ScoreInfo[1]
+            EditedFeatures[FeatureBits.TotalCutScore] = ScoreInfo[2]
+            EditedFeatures[FeatureBits.MedianCutScore] = ScoreInfo[3]
+            EditedFeatures[FeatureBits.YPresent] = ScoreInfo[4]
+            EditedFeatures[FeatureBits.BPresent] = ScoreInfo[5]
+            EditedFeatures[FeatureBits.BYIntensity] = ScoreInfo[6]
+            EditedFeatures[FeatureBits.NTT] = ScoreInfo[7]
+##            EditedFeatures[FeatureBits.PRMScore] = ScoreInfo[1]
+##            EditedFeatures[FeatureBits.BYPresence] = ScoreInfo[2]
+##            EditedFeatures[FeatureBits.TopPeakExplanation] = ScoreInfo[3]
+##            EditedFeatures[FeatureBits.NTT] = ScoreInfo[4]
+            ModTypeKey = (self.DB[ModDBPos], ModMass)
+            EditedFeatures[FeatureBits.SpectraThisModType] = self.ModTypeSpectrumCount.get(ModTypeKey, 1)
+            EditedFeatures[FeatureBits.SitesThisModType] = self.ModTypeSiteCount.get(ModTypeKey, 1)
+            EditedFeatures[FeatureBits.LogSpecThisType] = math.log(EditedFeatures[FeatureBits.SpectraThisModType])
+            EditedFeatures[FeatureBits.LogSitesThisType] = math.log(EditedFeatures[FeatureBits.SitesThisModType])
+            # Spectral similarity:
+            # Spectral similarity:
+            try:
+                SisterAnnotationFlag = int(Species.Bits[FormatBits.SisterAnnotationFlag])
+            except:
+                SisterAnnotationFlag = 0
+            if SisterAnnotationFlag:
+                try:
+                    ConsensusMSSpectrum = Species.GetConsensusMSSpectrum(self)
+                    ModlessMSSpectrum = Species.GetConsensusModlessMSSpectrum(self)
+                    Comparator = SpectralSimilarity.SpectralSimilarity(ConsensusMSSpectrum,
+                        ModlessMSSpectrum, NewAnnotation, Species.ModlessAnnotation)
+                    # COPIED from ComputePTMFeatures:
+                    Comparator.LabelPeaks(0.5)
+                    Similarity = Comparator.DotProduct(0.5)
+                    EditedFeatures[FeatureBits.Dot] = Similarity
+                    Similarity = Comparator.GetSharedPeakCount(0, 1)
+                    EditedFeatures[FeatureBits.Shared01] = Similarity
+                    Similarity = Comparator.GetSharedPeakCount(1, 1)
+                    EditedFeatures[FeatureBits.Shared11] = Similarity
+                    CorrelationCoefficient = Comparator.ComputeCorrelationCoefficient(1.0)
+                    EditedFeatures[FeatureBits.Correlation] = Similarity
+                except:
+                    traceback.print_exc()
+                    print "*** Unable to generate spectral-similarity features; continuing."
+                    print Site, ModMass, ModDBPos, KnownPTM, Shift
+                    print Species.Annotation, NewAnnotation
+                    print "BITS:", Species.Bits
+                    print "SisterAnnotationFlag:", SisterAnnotationFlag
+            # Features are complete - score the altered peptide!
+            ModelScore = self.ScoreInstance(Model, EditedFeatures)
+##            # TEMP: Verbose output
+##            self.KPTMVerbose.write("%s -> %s\n"%(Species.Annotation, NewAnnotation))
+##            for Index in range(len(EditedFeatures)):
+##                self.KPTMVerbose.write("%s: %.2f %.2f (%.4f)\n"%(Index, Species.Features[Index], EditedFeatures[Index], EditedFeatures[Index] - Species.Features[Index]))
+##            self.KPTMVerbose.write("Score: %s versus old %s\n"%(ModelScore, Species.ModelScore))
+            PValue = Model.GetPValue(ModelScore)
+            SortedSpeciesScores.append(ModelScore)
+            SitePValue *= PValue
+            # Store these temp values; if the site is GOOD then we'll edit the species:
+            Species.KnownPTMAnnotation = NewAnnotation
+            if KnownPTM:
+                Species.KnownPTMAnnotationName = KnownPTM.Name
+            else:
+                Species.KnownPTMAnnotationName = "Unmodified"
+            Species.KnownPTMAnnotationScore = ModelScore
+            Species.KnownPTMAnnotationPValue = PValue
+        SortedSpeciesScores.sort()
+        SortedSpeciesScores.reverse()
+        for Species in Site.SpeciesList:
+            Species.KnownPTMAnnotationSitePValue = SitePValue
+        SiteScore = [-math.log(SitePValue)]
+        SiteScore.extend(SortedSpeciesScores)
+        if len(SortedSpeciesScores):
+            self.KPTMVerbose.write("%s%+d on %s (%s): score (%.3f, %.3f)\n"%(self.DB[ModDBPos], ModMass, ModDBPos, Shift, SiteScore[0], SiteScore[1]))
+        return SiteScore
+    def ScoreInstance(self, Model, Features):
+        NiceFeatures = []
+        #for FeatureIndex in TrainPTMFeatures.ValidFeatureIndices:
+        #    NiceFeatures.append(Features[FeatureIndex])
+        return Model.ScoreInstance(Features)
+    def RememberOptimalKnownPTM(self, Site):
+        for Species in Site.SpeciesList:
+            Species.BestKnownPTMAnnotation = Species.KnownPTMAnnotation
+            Species.BestKnownPTMAnnotationScore = Species.KnownPTMAnnotationScore
+            Species.BestKnownPTMAnnotationPValue = Species.KnownPTMAnnotationPValue
+            Species.BestKnownPTMAnnotationSitePValue = Species.KnownPTMAnnotationSitePValue
+            Species.BestKnownPTMAnnotationName = Species.KnownPTMAnnotationName
+    def LoadCoverageLevels(self):
+        "Load peptide coverage levels, written by ComputePTMFeatures"
+        CoveragePath = os.path.join(self.TempFileDir, "Coverage.dat")
+        try:
+            CoverageFile = open(CoveragePath, "rb")
+        except:
+            traceback.print_exc()
+            print "** WARNING: Coverage levels not found at '%s'"%CoveragePath
+            self.Coverage = [1] * len(self.DB)
+            self.ModCoverage = [1] * len(self.DB)
+            return
+        self.Coverage = []
+        self.ModCoverage = []
+        BlokSize = struct.calcsize("<II")
+        for DBPos in range(len(self.DB)):
+            Blok = CoverageFile.read(BlokSize)
+            Tuple = struct.unpack("<II", Blok)
+            self.Coverage.append(Tuple[0])
+            self.ModCoverage.append(Tuple[1])
+        CoverageFile.close()
+    def SaveCoverageLevels(self):
+        "Save peptide coverage levels, which may have changed during merge+reconcile"
+        Dir = self.TempFileDir #os.path.split(self.OutputFileName)[0]
+        if self.DBEnd != None:
+            CoveragePath = os.path.join(Dir, "AdjustedCoverage.%s.%s.dat"%(self.DBStart, self.DBEnd))
+        else:
+            CoveragePath = os.path.join(Dir, "AdjustedCoverage.dat")
+        CoverageFile = open(CoveragePath, "wb")
+        for DBPos in range(len(self.DB)):
+            if self.Coverage[DBPos] < 0 or self.Coverage[DBPos] >= 65535:
+                print "* Coverage of %s is %s"%(DBPos, self.Coverage[DBPos])
+            if self.ModCoverage[DBPos] < 0 or self.ModCoverage[DBPos] >= 65535:
+                print "* ModCoverage of %s is %s"%(DBPos, self.ModCoverage[DBPos])
+            Str = struct.pack("<II", self.Coverage[DBPos], self.ModCoverage[DBPos])
+            CoverageFile.write(Str)
+        CoverageFile.close()
+    def MergeAndReconcile(self):
+        """
+        Iterate over peptide species, from best to worst.  Two iterations: In the first, we
+        consider MERGING a peptide with a superior one.  In the second iteration, we consider
+        RECONCILING each species to a superior one.
+        """
+        self.LoadCoverageLevels()
+        #self.ParseOriginalSpectraForModType(self.InputFileName)
+        if self.DBEnd != None:
+            MergeFileName = "MergeDetails.%s.%s.txt"%(self.DBStart, self.DBEnd)
+            ReconcileFileName = "ReconcileDetails.%s.%s.txt"%(self.DBStart, self.DBEnd)
+        else:
+            MergeFileName = "MergeDetails.txt"
+            ReconcileFileName = "ReconcileDetails.txt"
+        OutputDir = os.path.split(self.OutputFileName)[0]
+        self.MergeDetailOutput = open(os.path.join(OutputDir, MergeFileName), "wb")
+        Header = "Flag\tCharge\tMaster\tServant\tMasterMQScore\tServantMQScore\tServantRescore\tScoreChange\tNewConsScore\tOldModelScore\tNewModelScore\t"
+        self.MergeDetailOutput.write(Header + "\n")
+        self.ReconcileDetailOutput = open(os.path.join(OutputDir, ReconcileFileName), "wb")
+        # Flow for file lines:
+        # Initial input file -> MergeTemp -> ReconcileTemp -> output file
+        ########################################################
+        # FIRST cycle through points: Consider merging.
+        self.InputFile = open(self.InputFileName, "rb")
+        MergeTempPath = "%s.merge"%self.OutputFileName
+        self.OutputFile = open(MergeTempPath, "wb")
+        print ">>>PerformAllMerges 1: Read from %s, write to %s"%(self.InputFileName, MergeTempPath)
+        self.PerformAllMerges(1)
+        self.InputFile.close()
+        self.OutputFile.close()
+        ########################################################
+        # SECOND cycle through points: Consider conforming.
+        print "\n\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+        print "Reconcile:"
+        self.InputFile = open(MergeTempPath, "rb")
+        ConformTempPath = "%s.conform"%self.OutputFileName
+        self.OutputFile = open(ConformTempPath, "wb")
+        print ">>>PerformAllMerges 0: Read from %s, write to %s"%(MergeTempPath, ConformTempPath)
+        self.PerformAllMerges(0)
+        self.InputFile.close()
+        self.OutputFile.close()
+        self.SaveCoverageLevels()
+        if self.DBStart != None:
+            # We're handling just one block.  Therefore, we shouldn't re-score the peptides:
+            return 
+        ########################################################
+        # At this point, we know the number of sites and spectra for each
+        # modification type.  We need that information for when we
+        # consider changing to known PTMs.  Let's pickle it.
+        self.SaveSitesByType()
+        ########################################################
+        # THIRD cycle through points: Update sites-per-mod and spectra-per-mod,
+        # and accumulate feature vectors for the model.
+        #self.InputFile = open(ConformTempPath, "rb")
+        self.ParseFeatureVectors(ConformTempPath)
+        #self.InputFile.close()
+        ########################################################
+        # FOURTH cycle through points: Write the revised score!
+        self.OutputFile = open(self.OutputFileName, "wb")
+        for HeaderLine in self.HeaderLines:
+            self.OutputFile.write(HeaderLine)
+        self.ProcessSites(ConformTempPath, "rescore")
+    def MergeBlockRuns(self):
+        """
+        Combine the output of several AdjustPTM runs for sub-blocks of the database.  
+        """
+        Directory = os.path.split(self.OutputFileName)[0]
+        self.CombineBlockCoverage(self.TempFileDir)
+        self.SaveCoverageLevels()
+        # Populate self.ModTypeSpectrumCount and self.ModTypeSiteCount:
+        self.LoadModSitesByTypeBlocks(Directory)
+        self.SaveSitesByType()
+        # Concatenate block files into one large file:
+        ConcatenatedFileName = os.path.join(Directory, "ConcatenatedFeatures.txt")
+        self.ConcatenateBlockOutputFiles(Directory, ConcatenatedFileName)
+        # Parse feature-vectors, train model, output model:
+        self.ParseFeatureVectors(ConcatenatedFileName)
+        # Rescore:
+        self.OutputFile = open(self.OutputFileName, "wb")
+        for HeaderLine in self.HeaderLines:
+            self.OutputFile.write(HeaderLine)
+        self.ProcessSites(ConcatenatedFileName, "rescore")
+        self.OutputFile.close()
+    def ConcatenateBlockOutputFiles(self, Directory, OutputFileName):
+        """
+        Concatenate the merge-and-reconcile output files from various blocks of the database.
+        """
+        OutputFile = open(OutputFileName, "wb")
+        FirstFileFlag = 1
+        for FileName in os.listdir(Directory):
+            (Stub, Extension) = os.path.splitext(FileName)
+            if Extension != ".conform":
+                continue
+            FilePath = os.path.join(Directory, FileName)
+            File = open(FilePath, "rb")
+            for FileLine in File.xreadlines():
+                if FileLine[0] == "#":
+                    # Header line.  Write it out iff this is the first file
+                    if FirstFileFlag:
+                        OutputFile.write(FileLine)
+                else:
+                    OutputFile.write(FileLine)
+            File.close()
+            FirstFileFlag = 0
+        print "Concatenated results from '%s' into '%s'"%(Directory, OutputFileName)
+    def LoadModSitesByTypeBlocks(self, Directory):
+        """
+        Iterate over block output files from this directory.  Populate
+        self.ModTypeSiteCount and self.ModTypeSpectrumCount based on the contents.
+        """
+        self.ModTypeSpectrumCount = {}
+        self.ModTypeSiteCount = {}        
+        for FileName in os.listdir(Directory):
+            (Stub, Extension) = os.path.splitext(FileName)
+            if Extension != ".conform":
+                continue
+            Path = os.path.join(Directory, FileName)
+            File = open(Path, "rb")
+            print "Read SitesByType from %s..."%Path
+            for FileLine in File.xreadlines():
+                Bits = FileLine.split("\t")
+                if FileLine[0] == "#":
+                    continue
+                AA = Bits[FormatBits.ModifiedAA]
+                Mass = int(Bits[FormatBits.ModificationMass])
+                Spectra = int(float(Bits[FormatBits.SpectrumCount]))
+                Key = (AA, Mass)
+                self.ModTypeSiteCount[Key] = self.ModTypeSiteCount.get(Key, 0) + 1
+                self.ModTypeSpectrumCount[Key] = self.ModTypeSpectrumCount.get(Key, 0) + Spectra
+    def CombineBlockCoverage(self, Directory):
+        # Load the original coverage, just for reference:
+        self.LoadCoverageLevels()
+        # Iterate over coverage output files from the individual blocks:
+        for FileName in os.listdir(Directory):
+            (Stub, Extension) = os.path.splitext(FileName)
+            if Extension != ".dat":
+                continue
+            Bits = FileName.split(".")
+            # Names have the form AdjustedCoverage.START.END.dat
+            if len(Bits) < 4:
+                continue
+            DBStart = int(Bits[1])
+            DBEnd = int(Bits[2])
+            Path = os.path.join(Directory, FileName)
+            print "Read block coverage from %s..."%Path
+            CoverageFile = open(Path, "rb")
+            BlokSize = struct.calcsize("<II")
+            for DBPos in range(len(self.DB)):
+                Blok = CoverageFile.read(BlokSize)
+                Tuple = struct.unpack("<II", Blok)
+                if DBPos < DBStart or DBPos >= DBEnd:
+                    continue
+                self.Coverage[DBPos] = Tuple[0]
+                self.ModCoverage[DBPos] = Tuple[1]
+            CoverageFile.close()
+    def ProcessSites(self, InputFileName, Command):
+        """
+        Parse modification-sites from the input file.  Once all the peptides
+        for a site have been read, execute the command.
+        """
+        if Command == "knownptm":
+            if self.KnownPTMVerboseOutputFileName:
+                self.KPTMVerbose = open(self.KnownPTMVerboseOutputFileName, "wb")
+            else:
+                self.KPTMVerbose = sys.stdout
+        CurrentSite = None
+        InputFile = open(InputFileName, "rb")
+        for FileLine in InputFile.xreadlines():
+            FileLine = FileLine.replace("\r", "").replace("\n", "")
+            if not FileLine:
+                continue  # skip blank lines
+            Bits = FileLine.split("\t")
+            if FileLine[0] == "#" or len(Bits) < 2:
+                continue
+            try:
+                Species = PeptideSpeciesClass()
+                Species.ParseBits(Bits)
+            except:
+                traceback.print_exc()
+                print Bits
+                continue
+            if CurrentSite == None or Species.ModDBPos != CurrentSite.ModDBPos or Species.ModMass != CurrentSite.ModMass:
+                # Finish the previous (if any) site:
+                if CurrentSite:
+                    if Command == "rescore":
+                        self.RescoreAndWriteSite(CurrentSite)
+                    elif Command == "knownptm":
+                        self.AttemptKnownPTM(CurrentSite)
+                CurrentSite = SiteClass()
+                CurrentSite.ModDBPos = Species.ModDBPos
+                CurrentSite.ModMass = Species.ModMass
+            # Add a species to the current site:
+            CurrentSite.SpeciesList.append(Species)
+        InputFile.close()
+        # Finish the last site:
+        if CurrentSite:
+            if Command == "rescore":
+                self.RescoreAndWriteSite(CurrentSite)
+            elif Command == "knownptm":
+                self.AttemptKnownPTM(CurrentSite)
+    def ParseFeatureVectors(self, FileName):
+        """
+        Called after merge and reconcile.  Read feature-vectors, updating
+        the spectrum/site counts for modification type, and obtain scores!
+        """
+        FeatureSet2 = Learning.FeatureSetClass()
+        FeatureSet3 = Learning.FeatureSetClass()
+        File = open(FileName, "rb")
+        for FileLine in File.xreadlines():
+            Bits = FileLine.split("\t")
+            if FileLine[0] == "#":
+                continue
+            if len(Bits) < 2:
+                continue
+            Charge = int(Bits[FormatBits.Charge])
+            Vector = Learning.FeatureVector()
+            Vector.Features = [] 
+            for BitIndex in range(FormatBits.FirstFeature, FormatBits.LastFeature + 1):
+                try:
+                    Vector.Features.append(float(Bits[FeatureIndex]))
+                except:
+                    Vector.Features.append(0)
+            # Tweak spectra-by-type and sites-by-type:
+            ModTypeKey = (Bits[FormatBits.ModifiedAA], int(Bits[FormatBits.ModificationMass]))
+            TotalSpectra = self.ModTypeSpectrumCount.get(ModTypeKey, 0)
+            TotalSites = self.ModTypeSiteCount.get(ModTypeKey, 0)
+            Vector.Features[FeatureBits.SpectraThisModType] = TotalSpectra
+            Vector.Features[FeatureBits.SitesThisModType] = TotalSites
+            print "Total Spectra: %d"%TotalSpectra
+            Vector.Features[FeatureBits.LogSpecThisType] = math.log(TotalSpectra)
+            Vector.Features[FeatureBits.LogSitesThisType] = math.log(TotalSites)
+            if Charge > 2:
+                FeatureSet = FeatureSet3
+            else:
+                FeatureSet = FeatureSet2
+            if int(Bits[FormatBits.TrueProteinFlag]):
+                Vector.TrueFlag = 1
+                FeatureSet.TrueVectors.append(Vector)
+            else:
+                Vector.TrueFlag = 0
+                FeatureSet.FalseVectors.append(Vector)
+            FeatureSet.AllVectors.append(Vector)
+        File.close()
+        FeatureSet2.SetCounts()
+        FeatureSet3.SetCounts()
+        self.Model2.Test(FeatureSet2)
+        self.Model3.Test(FeatureSet3)
+        if self.OutputModelFileName2:
+            self.Model2.SaveModel(self.OutputModelFileName2)
+            self.Model3.SaveModel(self.OutputModelFileName3)
+    def LoadCluster(self, Path):
+        Builder = BuildConsensusSpectrum.ConsensusBuilder()
+        Builder.UnpickleCluster(Path)
+        return Builder
+    def AttemptMerge(self, SpeciesA, SpeciesB, BWeak = 0):
+        """
+        Attermpt a merge of SpeciesA into SpeciesB.  If the merge is valid,
+        perform the merge, set SpeciesA.MergedFlag, and RETURN TRUE.
+        """
+        print "AttemptMerge chg%s %s into %s"%(SpeciesA.Charge, SpeciesA.Annotation, SpeciesB.Annotation)
+        ################################################
+        # Condition 1: Consensus A seems to match annotation B reasonably well
+        if BWeak:
+            # Species A has the stronger score, so don't screw it up!
+            ScoreLossLimit = 0.1
+        else:
+            ScoreLossLimit = 3
+        SpectrumA = SpeciesA.GetConsensusSpectrum(self)
+        Score = SpectrumA.ScorePeptide(SpeciesB.Annotation)
+        DetailStr = "%s\t%s\t%s\t%s\t%s\t"%(SpeciesA.Charge, SpeciesB.Annotation, SpeciesA.Annotation, SpeciesB.Features[FeatureBits.ConsensusMQScore], SpeciesA.Features[FeatureBits.ConsensusMQScore])
+        DetailStr += "%s\t%s\t"%(Score, SpeciesA.Features[FeatureBits.ConsensusMQScore] - Score)
+        if (SpeciesA.Features[FeatureBits.ConsensusMQScore] - Score) > ScoreLossLimit:
+            DetailStr = "FailAScore\t"+DetailStr
+            self.MergeDetailOutput.write(DetailStr + "\n")
+            return 0
+        ################################################
+        # Condition 2: The merged consensus spectrum is not significantly WORSE.
+        # Load in ClusterA and ClusterB (we cache the species-A cluster)
+        ClusterPathA = os.path.join(self.ConsensusClusterDir, SpeciesA.Annotation[2], "%s.%s.cls"%(SpeciesA.Annotation, SpeciesA.Charge))
+        if ClusterPathA == self.CachedClusterPath:
+            ClusterA = self.CachedCluster
+        else:
+            self.CachedCluster = self.LoadCluster(ClusterPathA)
+            self.CachedClusterPath = ClusterPathA
+            ClusterA = self.CachedCluster
+        ClusterPathB = os.path.join(self.ConsensusClusterDir, SpeciesB.Annotation[2], "%s.%s.cls"%(SpeciesB.Annotation, SpeciesB.Charge))
+        ClusterB = self.LoadCluster(ClusterPathB)
+        # If we combine these two clusters into a single consensus
+        # spectrum, what sort of MQScore do we end up with?
+        TempConsensusPath = os.path.join(self.ConsensusSpectraDir, "Consensus.dta")
+        ClusterB.AssimilateCluster(ClusterA)
+        NewConsensusSpectrum = ClusterB.ProduceConsensusSpectrum()
+        NewConsensusSpectrum.WritePeaks(TempConsensusPath)
+        # Set the file members of the spectrum, since Label.py reads them:
+        NewConsensusSpectrum.FilePath = TempConsensusPath
+        NewConsensusSpectrum.FilePos = 0
+        NewConsensusSpectrum.RankPeaksByIntensity()
+        PySpectrum = PyInspect.Spectrum(TempConsensusPath, 0)
+        ScoreInfo = PySpectrum.ScorePeptideDetailed(SpeciesB.Annotation)
+        DetailStr += "%s\t"%ScoreInfo[0]
+        if SpeciesB.Features[FeatureBits.ConsensusMQScore] - ScoreInfo[0] > 2:
+            DetailStr = "FailBConsensus\t"+DetailStr
+            self.MergeDetailOutput.write(DetailStr + "\n")
+            return 0
+        NewFeatures = SpeciesB.Features[:]
+        # Update feature values for the merged guy:
+        # Spectra:
+        NewFeatures[FeatureBits.SpectrumCount] = SpeciesA.Features[FeatureBits.SpectrumCount] + SpeciesB.Features[FeatureBits.SpectrumCount]
+        NewFeatures[FeatureBits.LogSpectrumCount] = math.log(NewFeatures[FeatureBits.SpectrumCount])
+        NewFeatures[FeatureBits.ModlessSpectrumCount] = SpeciesB.Features[FeatureBits.ModlessSpectrumCount] # unchanged
+        # BestMQ, BestDelta, PeptideCount:
+        BestSpectrumA = SpeciesA.GetBestSpectrum(self)
+        NewBestMQA = BestSpectrumA.ScorePeptide(SpeciesB.Annotation)
+        NewBestDeltaA = SpeciesA.Features[3] + (NewBestMQA - SpeciesA.Features[2])
+        print "A best MQ %.4f (was %.4f) delta %.4f (was %.4f)"%(NewBestMQA, SpeciesA.Features[2],
+            NewBestDeltaA, SpeciesA.Features[3])
+        # Best MQScore:
+        NewFeatures[FeatureBits.BestMQScore] = max(NewBestMQA, SpeciesB.Features[FeatureBits.BestMQScore])
+        # BestDelta:
+        NewFeatures[FeatureBits.BestDeltaScore] = max(NewBestDeltaA, SpeciesB.Features[FeatureBits.BestDeltaScore])
+        # Peptide length:
+        NewFeatures[FeatureBits.PeptideLength] = SpeciesB.Features[FeatureBits.PeptideLength]
+        # Peptide count:
+        NewFeatures[FeatureBits.PeptideCount] = SpeciesB.Features[FeatureBits.PeptideCount]
+        # Consensus scoring (Score, and score components):
+        NewFeatures[FeatureBits.ConsensusMQScore] = ScoreInfo[0]
+        NewFeatures[FeatureBits.PeptideLength] = ScoreInfo[1]
+        NewFeatures[FeatureBits.TotalCutScore] = ScoreInfo[2]
+        NewFeatures[FeatureBits.MedianCutScore] = ScoreInfo[3]
+        NewFeatures[FeatureBits.YPresent] = ScoreInfo[4]
+        NewFeatures[FeatureBits.BPresent] = ScoreInfo[5]
+        NewFeatures[FeatureBits.BYIntensity] = ScoreInfo[6]
+        NewFeatures[FeatureBits.NTT] = ScoreInfo[7]
+        # Adjust delta-score by the difference in consensus-mq-score:
+        NewFeatures[FeatureBits.DeltaVsBigDB] = SpeciesB.Features[FeatureBits.DeltaVsBigDB] + (ScoreInfo[0] - SpeciesB.Features[FeatureBits.ConsensusMQScore])
+        # Similarity scores for the new consensus spectrum:
+        if SpeciesB.Bits[FormatBits.SisterAnnotationFlag]:
+            ModlessSpectrum = SpeciesB.GetConsensusModlessMSSpectrum(self)
+            #print "Build comparator:"
+            Comparator = SpectralSimilarity.SpectralSimilarity(NewConsensusSpectrum,
+                ModlessSpectrum, SpeciesB.Annotation, SpeciesB.ModlessAnnotation)
+            #print "Label peaks:"
+            # COPIED from ComputePTMFeatures:
+            Comparator.LabelPeaks(0.5)
+            #print "Compute:"
+            Similarity = Comparator.DotProduct(0.5)
+            NewFeatures[FeatureBits.Dot] = Similarity
+            Similarity = Comparator.GetSharedPeakCount(0, 1)
+            NewFeatures[FeatureBits.Shared01] = Similarity
+            Similarity = Comparator.GetSharedPeakCount(1, 1)
+            NewFeatures[FeatureBits.Shared11] = Similarity
+            CorrelationCoefficient = Comparator.ComputeCorrelationCoefficient(1.0)
+            NewFeatures[FeatureBits.Correlation] = Similarity
+        # Ask the trained model what it thinks of this new feature-vector:
+        if SpeciesA.Charge > 2:
+            Model = self.Model3
+        else:
+            Model = self.Model2
+        NewModelScore = self.ScoreInstance(Model, NewFeatures)
+        PValue = Model.GetPValue(NewModelScore)
+        OldPValue = Model.GetPValue(SpeciesB.ModelScore)
+        print "Score of the NEW FEATURES: %.3f (%.1f%%) versus %.3f (%.1f%%) old)"%(NewModelScore, 100 * PValue, SpeciesB.ModelScore, 100 * OldPValue)
+        DetailStr += "%s\t%s\t"%(SpeciesB.ModelScore, NewModelScore)
+        ################################################
+        # Condition 3: Model score should not be dramatically worse!        
+        if NewModelScore < SpeciesB.ModelScore - 0.5:
+            DetailStr = "FailModelScore\t%s"%DetailStr
+            self.MergeDetailOutput.write(DetailStr + "\n")
+            return 0
+        DetailStr = "MERGE\t" + DetailStr
+        print "MERGE %s and %s"%(SpeciesA.Annotation, SpeciesB.Annotation)
+        self.MergeDetailOutput.write(DetailStr + "\n")
+        #######################################################
+        # ALL CONDITIONS PASSED, NOW LET'S MERGE:
+        SpeciesA.MergedFlag = 1 # this species won't be written out.
+        # Remember the new "best spectrum", if it belonged to A:
+        if (NewBestMQA > SpeciesB.Features[2]):
+            SpeciesB.ConsensusMQScore = NewBestMQA
+            SpeciesB.Bits[FormatBits.BestSpectrumPath] = SpeciesA.Bits[FormatBits.BestSpectrumPath]
+        SpeciesB.Features = NewFeatures
+        ############################################
+        # Update our COVERAGE and MODDED-FRACTION:
+        for DBPos in range(SpeciesA.DBPos, SpeciesA.DBEnd):
+            if SpeciesA.Peptide.Modifications.keys():
+                self.ModCoverage[DBPos] -= int(SpeciesA.Features[FeatureBits.SpectrumCount])
+            else:
+                self.Coverage[DBPos] -= int(SpeciesA.Features[FeatureBits.SpectrumCount])
+        for DBPos in range(SpeciesB.DBPos, SpeciesB.DBEnd):
+            if SpeciesB.Peptide.Modifications.keys():
+                self.ModCoverage[DBPos] += int(SpeciesB.Features[FeatureBits.SpectrumCount])
+            else:
+                self.Coverage[DBPos] += int(SpeciesB.Features[FeatureBits.SpectrumCount])
+        SpeciesB.Features[FeatureBits.ModdedFraction] = self.ModCoverage[SpeciesB.ModDBPos] / float(self.ModCoverage[SpeciesB.ModDBPos] + self.Coverage[SpeciesB.ModDBPos])
+        ############################################    
+        # Write the adjusted consensus cluster:
+        ClusterPathB = os.path.join(self.ConsensusClusterDirAdjusted, SpeciesB.Annotation[2], "%s.%s.cls"%(SpeciesB.Annotation, SpeciesB.Charge))
+        ClusterB.PickleCluster(ClusterPathB)
+        SpeciesB.ConsensusMSSpectrum = NewConsensusSpectrum
+        SpeciesB.ConsensusSpectrum = PySpectrum
+        SpeciesB.ModelScore = NewModelScore
+        # Write the adjusted consensus spectrum:
+        ConsensusPath = os.path.join(self.ConsensusSpectraDirAdjusted, SpeciesB.Annotation[2], "%s.%s.dta"%(SpeciesB.Annotation, SpeciesB.Charge))
+        NewConsensusSpectrum.WritePeaks(ConsensusPath)
+        # Write the merged list of members:
+        MemberListStr = ""
+        try:
+            MemberListStr += SpeciesA.GetMemberListStr(self)
+            MemberListStr += SpeciesB.GetMemberListStr(self)
+        except:
+            print "* ERROR!"
+            print SpeciesA
+            print SpeciesB
+            raise
+        Path = os.path.join(self.ClusterScanListDirAdjusted, SpeciesB.Annotation[2], "%s.%s.txt"%(SpeciesB.Annotation, SpeciesB.Charge))
+        ClusterMemberFile = open(Path, "wb")
+        ClusterMemberFile.write(MemberListStr)
+        ClusterMemberFile.close()
+        # Remove peptide A from self.PeptideDict:
+        Key = (SpeciesA.Annotation, SpeciesA.Charge)
+        try:
+            del self.PeptideDict[Key]
+        except:
+            pass
+        return 1
+    def AttemptReconcileFixEndpoints(self, SpeciesA, SpeciesB, OldDBPos, OldDBEnd):
+        # Adjust endpoints, if necessary for reconciliation:
+        if SpeciesA.ModMass > SpeciesB.ModMass:
+            # Species A has a larger modification.  Maybe we can ADD 1-2 residues
+            # and make the modification mass equal?
+            # Try shifting N-terminus:
+            ExtraMass = Global.AminoMass.get(self.DB[SpeciesA.DBPos - 1], 999999)
+            FullMass = int(round(SpeciesA.ModMass - ExtraMass))
+            if FullMass == SpeciesB.ModMass:
+                return (OldDBPos - 1, OldDBEnd)
+            if FullMass < SpeciesB.ModMass:
+                ExtraMass += Global.AminoMass.get(self.DB[SpeciesA.DBPos - 2], 999999)
+                FullMass = int(round(SpeciesA.ModMass - ExtraMass))
+                if FullMass == SpeciesB.ModMass:
+                    return (OldDBPos - 2, OldDBEnd)
+            # Try shifting C-terminus:
+            ExtraMass = Global.AminoMass.get(self.DB[SpeciesA.DBEnd], 999999)
+            FullMass = int(round(SpeciesA.ModMass - ExtraMass))
+            if abs(FullMass - SpeciesB.ModMass) < 2:
+                return (OldDBPos, OldDBEnd + 1)
+            if FullMass < SpeciesB.ModMass:
+                ExtraMass += Global.AminoMass.get(self.DB[SpeciesA.DBEnd + 1], 999999)
+                FullMass = int(round(SpeciesA.ModMass - ExtraMass))
+                if abs(FullMass - SpeciesB.ModMass) < 2:
+                    return (OldDBPos, OldDBEnd + 2)
+        if SpeciesA.ModMass < SpeciesB.ModMass:
+            # Species A has a smaller modification.  Maybe we can REMOVE 1-2 residues
+            # and make the modification mass equal?
+            # Try shifting N-terminus:
+            ExtraMass = Global.AminoMass[self.DB[SpeciesA.DBPos]]
+            FullMass = int(round(SpeciesA.ModMass + ExtraMass))
+            if abs(FullMass - SpeciesB.ModMass) < 2:
+                return (OldDBPos + 1, OldDBEnd)
+            if FullMass > SpeciesB.ModMass:
+                ExtraMass += Global.AminoMass[self.DB[SpeciesA.DBPos + 1]]
+                FullMass = int(round(SpeciesA.ModMass + ExtraMass))
+                if abs(FullMass - SpeciesB.ModMass) < 2:
+                    return (OldDBPos + 2, OldDBEnd)
+            # Try shifting C-terminus:
+            ExtraMass = Global.AminoMass[self.DB[SpeciesA.DBEnd - 1]]
+            FullMass = int(round(SpeciesA.ModMass + ExtraMass))
+            if abs(FullMass - SpeciesB.ModMass) < 2:
+                return (OldDBPos, OldDBEnd - 1)
+            if FullMass < SpeciesB.ModMass:
+                ExtraMass += Global.AminoMass[self.DB[SpeciesA.DBEnd - 2]]
+                FullMass = int(round(SpeciesA.ModMass + ExtraMass))
+                if abs(FullMass - SpeciesB.ModMass) < 2:
+                    return (OldDBPos, OldDBEnd - 2)
+        return (OldDBPos, OldDBEnd)
+    def AttemptReconcile(self, SpeciesA, SpeciesB):
+        """
+        Attempt to reconcile species A with species B.  In other words,
+        edit annotation A so that it carries the same modification as B, and
+        on the same database position.  If the effects on match quality score
+        (and/or model score) are an IMPROVEMENT (or at least, not a big
+        disappointment), then perform the reconciliation, and return TRUE.
+        """
+        OldDBPos = SpeciesA.DBPos
+        OldDBEnd = SpeciesA.DBEnd
+        (NewDBPos, NewDBEnd) = self.AttemptReconcileFixEndpoints(SpeciesA, SpeciesB, SpeciesA.DBPos, SpeciesA.DBEnd)
+        NewPrefix = self.DB[NewDBPos - 1]
+        NewSuffix = self.DB[NewDBEnd]
+        ConsensusSpectrum = SpeciesA.GetConsensusSpectrum(self)
+        ModIndex = SpeciesB.ModDBPos - NewDBPos
+        ModdedAnnotation = "%s%+d%s"%(self.DB[NewDBPos:SpeciesB.ModDBPos + 1], SpeciesB.ModMass, self.DB[SpeciesB.ModDBPos + 1:NewDBEnd])
+        NewAnnotation = "%s.%s.%s"%(NewPrefix, ModdedAnnotation, NewSuffix)
+        NewAnnotation = NewAnnotation.replace("*", "-")
+        NewConsensusScore = ConsensusSpectrum.ScorePeptide(NewAnnotation)
+        ScoreDiff = NewConsensusScore - SpeciesA.ConsensusMQScore
+        self.ReconcileDetailOutput.write("%s\t%s\t%s\t%s\t%s\t\n"%(SpeciesA.Annotation, NewAnnotation, SpeciesA.ConsensusMQScore, NewConsensusScore, ScoreDiff))
+        if ScoreDiff < -0.5:
+            return 0
+        OldAnnotation = SpeciesA.Annotation
+        NewPeptide = GetPeptideFromModdedName(NewAnnotation)
+        NewModlessAnnotation = "%s.%s.%s"%(NewPeptide.Prefix, self.DB[NewDBPos:NewDBEnd], NewPeptide.Suffix)
+        # Compute new features of the 'reconciled peptide':
+        NewFeatures = SpeciesA.Features[:]
+        # Best spectrum MQScore and Delta-score:
+        PySpectrum = SpeciesA.GetBestSpectrum(self)
+        NewBestMQ = PySpectrum.ScorePeptide(NewAnnotation)
+        NewFeatures[FeatureBits.BestDeltaScore] += (NewBestMQ - NewFeatures[FeatureBits.BestMQScore])
+        NewFeatures[FeatureBits.BestMQScore] = NewBestMQ
+        PeptideLength = NewDBEnd - NewDBPos
+        NewFeatures[FeatureBits.PeptideLength] = PeptideLength
+        NewFeatures[FeatureBits.LogPeptideLength] = math.log(PeptideLength)
+        # Consensus score:
+        PySpectrum = SpeciesA.GetConsensusSpectrum(self)
+        ScoreInfo = PySpectrum.ScorePeptideDetailed(NewAnnotation)
+        NewFeatures[FeatureBits.ConsensusMQScore] = ScoreInfo[0]
+        NewFeatures[FeatureBits.PeptideLength] = ScoreInfo[1]
+        NewFeatures[FeatureBits.TotalCutScore] = ScoreInfo[2]
+        NewFeatures[FeatureBits.MedianCutScore] = ScoreInfo[3]
+        NewFeatures[FeatureBits.YPresent] = ScoreInfo[4]
+        NewFeatures[FeatureBits.BPresent] = ScoreInfo[5]
+        NewFeatures[FeatureBits.BYIntensity] = ScoreInfo[6]
+        NewFeatures[FeatureBits.NTT] = ScoreInfo[7]
+        # Adjust delta-score by the difference in consensus-mq-score:
+        NewFeatures[FeatureBits.DeltaVsBigDB] = SpeciesA.Features[FeatureBits.DeltaVsBigDB] + (ScoreInfo[0] - SpeciesA.Features[FeatureBits.ConsensusMQScore])
+        # Adjust spectra, sites for this modification type:
+        NewFeatures[FeatureBits.SpectraThisModType] = SpeciesB.Features[FeatureBits.SpectraThisModType]
+        NewFeatures[FeatureBits.SitesThisModType] = SpeciesB.Features[FeatureBits.SitesThisModType]
+        NewFeatures[FeatureBits.LogSpecThisType] = SpeciesB.Features[FeatureBits.LogSpecThisType]
+        NewFeatures[FeatureBits.LogSitesThisType] = SpeciesB.Features[FeatureBits.LogSitesThisType]
+        # Modless spectra:
+        # - If our endpoint didn't change, then we keep our old modless spectrum
+        # - If our endpoints changed and we're assimilating a target (ExistingSpecies),
+        #   then we inherit *its* modless spectra
+        # - Otherwise, we LOSE our modless spectra!
+        Key = (NewAnnotation, SpeciesA.Charge)
+        ExistingSpecies = self.PeptideDict.get(Key, None)
+        # Initialize:
+        ModlessSpectrumFlag = 0
+        BestModlessSpectrumPath = ""
+        BestModlessMQScore = ""
+        if OldDBPos == NewDBPos and OldDBEnd == NewDBEnd:
+            try:
+                ModlessSpectrumFlag = int(SpeciesA.Bits[FormatBits.SisterAnnotationFlag])
+                BestModlessSpectrumPath = SpeciesA.Bits[FormatBits.BestModlessSpectrumPath]
+                BestModlessMQScore = float(SpeciesA.Bits[FormatBits.BestModlessMQScore])
+                ModlessMSSpectrum = SpeciesA.GetConsensusModlessMSSpectrum(self)
+            except:
+                pass # the modless-bits weren't set; that's fine
+        elif ExistingSpecies:
+            try:
+                ModlessSpectrumFlag = int(ExistingSpecies.Bits[FormatBits.SisterAnnotationFlag])
+                BestModlessSpectrumPath = ExistingSpecies.Bits[FormatBits.BestModlessSpectrumPath]
+                BestModlessMQScore = float(ExistingSpecies.Bits[FormatBits.BestModlessMQScore])
+                ModlessMSSpectrum = ExistingSpecies.GetConsensusModlessMSSpectrum(self)
+            except:
+                pass # the modless-bits weren't set; that's fine
+        else:
+            ModlessSpectrumFlag = ""
+            BestModlessSpectrumPath = ""
+            BestModlessMQScore = ""
+        if ModlessSpectrumFlag:
+            MSSpectrum = SpeciesA.GetConsensusMSSpectrum(self)
+            Comparator = SpectralSimilarity.SpectralSimilarity(MSSpectrum,
+                ModlessMSSpectrum, NewAnnotation, NewModlessAnnotation)
+            # COPIED from ComputePTMFeatures:
+            Comparator.LabelPeaks(0.5)
+            Similarity = Comparator.DotProduct(0.5)
+            NewFeatures[FeatureBits.Dot] = Similarity
+            Similarity = Comparator.GetSharedPeakCount(0, 1)
+            NewFeatures[FeatureBits.Shared01] = Similarity
+            Similarity = Comparator.GetSharedPeakCount(1, 1)
+            NewFeatures[FeatureBits.Shared11] = Similarity
+            CorrelationCoefficient = Comparator.ComputeCorrelationCoefficient(1.0)
+            NewFeatures[FeatureBits.Correlation] = Similarity
+        if SpeciesA.Charge > 2:
+            Model = self.Model3
+        else:
+            Model = self.Model2
+        NewModelScore = self.ScoreInstance(Model, NewFeatures)
+        # Finalize:
+        # If self.PeptideDict already has an entry, then this peptide
+        # is the same as another after reconciliation.  But, we already
+        # skipped the opportunity to merge with that other.
+        if ExistingSpecies:
+            self.ReconcileDetailOutput.write("# Existing species has score %s vs model %s\n"%(ExistingSpecies.ModelScore, SpeciesA.ModelScore))
+            if ExistingSpecies.ModelScore < SpeciesA.ModelScore:
+                ExistingSpecies.MergedFlag = 1
+            else:
+                # We want to reconcile to the master...but that would make us the same as another
+                # peptide species, which we refused to merge with!
+                self.ReconcileDetailOutput.write("# *-> We'd like to reconcile %s to %s, but...\n"%(SpeciesA.Annotation, NewAnnotation))
+                self.ReconcileDetailOutput.write("# ...there's ALREADY a superior peptide at %s\n"%str(Key))
+                return 0
+        ################################################################################
+        # All tests passed.  RECONCILE!
+        self.ReconcileDetailOutput.write("> Reconcile %s to %s\n"%(SpeciesA.Annotation, NewAnnotation))
+        # Copy over our consensus spectrum.  
+        NewSpectrumPath = os.path.join(self.ConsensusSpectraDirAdjusted, NewAnnotation[2], "%s.%s.dta"%(NewAnnotation, SpeciesA.Charge))
+        OldSpectrumPath = os.path.join(self.ConsensusSpectraDirAdjusted, SpeciesA.Annotation[2], "%s.%s.dta"%(SpeciesA.Annotation, SpeciesA.Charge))
+        if os.path.exists(OldSpectrumPath):
+            # If we've already adjusted once, move the *old* adjusted to the *new* adjusted:
+            if sys.platform == "win32":
+                Command = "move \"%s\" \"%s\""%(OldSpectrumPath, NewSpectrumPath)
+            else:
+                Command = "mv \"%s\" \"%s\""%(OldSpectrumPath, NewSpectrumPath)
+        else:
+            OldSpectrumPath = os.path.join(self.ConsensusSpectraDir, SpeciesA.Annotation[2], "%s.%s.dta"%(SpeciesA.Annotation, SpeciesA.Charge))
+            if sys.platform == "win32":
+                Command = "copy \"%s\" \"%s\""%(OldSpectrumPath, NewSpectrumPath)
+            else:
+                Command = "cp \"%s\" \"%s\""%(OldSpectrumPath,NewSpectrumPath)
+        print Command
+        os.system(Command)
+        # Copy over the cluster member list:
+        MemberListStr = SpeciesA.GetMemberListStr(self)
+        Path = os.path.join(self.ClusterScanListDirAdjusted, NewAnnotation[2], "%s.%s.txt"%(NewAnnotation, SpeciesA.Charge))
+        ClusterMemberFile = open(Path, "wb")
+        ClusterMemberFile.write(MemberListStr)
+        ClusterMemberFile.close()
+        # Update features and such:
+        SpeciesA.Features = NewFeatures
+        SpeciesA.ConsensusMQScore = NewConsensusScore
+        SpeciesA.ModelScore = NewModelScore
+        SpeciesA.Peptide = NewPeptide
+        SpeciesA.ComputePrefixes()
+        SpeciesA.Annotation = NewAnnotation
+        SpeciesA.ModDBPos = SpeciesB.ModDBPos
+        SpeciesA.ModMass = SpeciesB.ModMass
+        SpeciesA.ModAA = SpeciesB.ModAA
+        SpeciesA.ModlessAnnotation = NewModlessAnnotation
+        # Revise file bits:
+        SpeciesA.Bits[FormatBits.DBPos] = str(SpeciesA.ModDBPos)
+        SpeciesA.Bits[FormatBits.ModificationMass] = str(SpeciesA.ModMass)
+        SpeciesA.Bits[FormatBits.ModifiedAA] = SpeciesB.Bits[FormatBits.ModifiedAA]
+        SpeciesA.Bits[FormatBits.ModifiedResidueNumber] = SpeciesB.Bits[FormatBits.ModifiedResidueNumber]
+        SpeciesA.Bits[FormatBits.Peptide] = NewAnnotation
+        for FeatureIndex in range(len(SpeciesA.Features)):
+            SpeciesA.Bits[FormatBits.FirstFeature + FeatureIndex] = str(SpeciesA.Features[FeatureIndex])
+        SpeciesA.Bits[FormatBits.ModelScore] = str(NewModelScore)
+        SpeciesA.Bits[FormatBits.ConsensusMQScore] = str(NewConsensusScore)
+        # Bits for modless spectra:
+        SpeciesA.Bits[FormatBits.SisterAnnotationFlag] = str(ModlessSpectrumFlag)
+        SpeciesA.Bits[FormatBits.BestModlessSpectrumPath] = BestModlessSpectrumPath
+        SpeciesA.Bits[FormatBits.BestModlessMQScore] = str(BestModlessMQScore)
+        # Remove our old PeptideDict entry:
+        try:
+            del self.PeptideDict[(OldAnnotation, SpeciesA.Charge)]
+        except:
+            pass
+        # Add a new PeptideDict entry:
+        self.PeptideDict[Key] = SpeciesA
+        ############################################
+        # Update our COVERAGE and MODDED-FRACTION:
+        for DBPos in range(OldDBPos, OldDBEnd):
+            self.ModCoverage[DBPos] -= int(SpeciesA.Features[FeatureBits.SpectrumCount])
+        for DBPos in range(SpeciesA.DBPos, SpeciesA.DBEnd):
+            self.ModCoverage[DBPos] += int(SpeciesA.Features[FeatureBits.SpectrumCount])
+        SpeciesA.Features[FeatureBits.ModdedFraction] = self.ModCoverage[SpeciesA.ModDBPos] / float(self.ModCoverage[SpeciesA.ModDBPos] + self.Coverage[SpeciesA.ModDBPos])
+        ############################################    
+        return 1
+    def LoadModel(self):
+        print "load %s"%self.SavedModelFileName2
+        self.Model2 = Learning.LoadGeneralModel(self.SavedModelFileName2)
+        print "load %s"%self.SavedModelFileName3
+        self.Model3 = Learning.LoadGeneralModel(self.SavedModelFileName3)
+        #print "Model.MixtureModel", self.Model.MixtureModel
+    def GroupPeptidesBySite(self):
+        self.Sites = {} # (ModDBPos, ModMass) -> site instance
+        for Species in self.SpeciesList:
+            if Species.MergedFlag:
+                continue
+            Key = (Species.ModDBPos, Species.ModMass)
+            Site = self.Sites.get(Key, None)
+            if not Site:
+                Site = SiteClass()
+                Site.ModDBPos = Species.ModDBPos
+                Site.ModMass = Species.ModMass
+                Site.ModAA = Species.ModAA
+                self.Sites[Key] = Site
+            Site.SpeciesList.append(Species)
+    def ScorePTMSites(self):
+        """
+        Group peptide species by site, and compute the p-value (odds FALSE) for
+        each site. 
+        """
+        self.GroupPeptidesBySite()
+        for Site in self.Sites.values():
+            Site.PValue = 1.0
+            for Species in Site.SpeciesList:
+                while len(Species.Bits) <= FormatBits.SitePValue:
+                    Species.Bits.append("")
+                if Species.Charge > 2:
+                    Model = self.Model3
+                else:
+                    Model = self.Model2
+                PeptidePValue = Model.GetPValue(Species.ModelScore)
+                Species.Bits[FormatBits.ModelPValue] = str(PeptidePValue)
+                Site.PValue *= PeptidePValue
+            for Species in Site.SpeciesList:
+                Species.Bits[FormatBits.SitePValue] = str(Site.PValue)
+    def RescoreAndWriteSite(self, Site):
+        Site.PValue = 1.0
+        for Species in Site.SpeciesList:
+            ModTypeKey = (Species.Bits[FormatBits.ModifiedAA], Species.ModMass)
+            TotalSpectra = self.ModTypeSpectrumCount.get(ModTypeKey, 0)
+            TotalSites = self.ModTypeSiteCount.get(ModTypeKey, 0)
+            Species.Features[FeatureBits.SpectraThisModType] = TotalSpectra
+            Species.Features[FeatureBits.SitesThisModType] = TotalSites
+            Species.Features[FeatureBits.LogSpecThisType] = math.log(TotalSpectra)
+            Species.Features[FeatureBits.LogSitesThisType] = math.log(TotalSites)
+            Species.Bits[FormatBits.SpectraWithThisModType] = str(TotalSpectra)
+            Species.Bits[FormatBits.SitesWithThisModType] = str(TotalSites)
+            Species.Bits[FormatBits.LogSpectraThisModType] = str(math.log(TotalSpectra))
+            Species.Bits[FormatBits.LogSitesThisModType] = str(math.log(TotalSites))
+            DBPosition = int(Species.Bits[FormatBits.DBPos])
+            # Update modded%:
+            ModdedSpectra = self.ModCoverage[DBPosition]
+            ModlessSpectra = self.Coverage[DBPosition]
+            TotalSpectra = ModdedSpectra + ModlessSpectra
+            if TotalSpectra <= 0:
+                print "*** Warning: Site %s has no coverage at DB position %s"%(Species.Annotation, DBPosition)
+            Species.Bits[FormatBits.ModdedFraction] = str(ModdedSpectra / float(max(1, TotalSpectra)))
+            # Pad with empty bits if necessary:
+            while len(Species.Bits) <= FormatBits.SitePValue:
+                Species.Bits.append("")
+            if Species.Charge > 2:
+                Model = self.Model3
+            else:
+                Model = self.Model2
+            PeptidePValue = Model.GetPValue(Species.ModelScore)
+            Species.Bits[FormatBits.ModelPValue] = str(PeptidePValue)
+            Site.PValue *= PeptidePValue
+        for Species in Site.SpeciesList:
+            Species.Bits[FormatBits.SitePValue] = str(Site.PValue)
+            Str = string.join(Species.Bits, "\t")
+            self.OutputFile.write(Str + "\n")
+    def OutputPTMs(self):
+        File = open(self.OutputFileName, "wb")
+        for Line in self.HeaderLines:
+            File.write(Line)
+        # Sort the sites:
+        SortedSites = []
+        for Site in self.Sites.values():
+            SortedSites.append((Site.PValue, Site))
+        SortedSites.sort()
+        CumulativeSiteCount = 0
+        CumulativeTrueSiteCount = 0
+        CumulativeSpeciesCount = 0
+        # Report peptides, grouped by site, from best site to worst:
+        for (PValue, Site) in SortedSites:
+            BestSpecies = None
+            for Species in Site.SpeciesList:
+                if Species.MergedFlag:
+                    continue
+                CumulativeSpeciesCount += 1
+                if (BestSpecies == None) or (Species.PValue < BestSpecies.PValue):
+                    BestSpecies = Species
+            if not BestSpecies:
+                continue
+            CumulativeSiteCount += 1
+            if int(Species.Bits[FormatBits.TrueProteinFlag]):
+                CumulativeTrueSiteCount += 1
+            # The LENS way:
+            FalseProteinCount = CumulativeSiteCount - CumulativeTrueSiteCount
+            #FalseWithinTrue = FalseProteinCount * 0.01
+            FalseWithinTrue = FalseProteinCount
+            TrueCount = max(0, CumulativeTrueSiteCount - FalseWithinTrue)
+            SiteCount = CumulativeTrueSiteCount
+            # FDR:
+            if CumulativeTrueSiteCount <= 0:
+                FDR = 1.0
+            else:
+                # False discovery rate:
+                # The number of spurious sites which come from valid proteins
+                # divided by the number of sites that come from valid proteins
+                FDR = FalseWithinTrue / float(CumulativeTrueSiteCount)
+                FDR = min(1.0, FDR)
+            print "pvalue %.6f sites%d species%d T%d F%d FWT %.3f SC %.3f FDR %.3f"%(\
+                Site.PValue, CumulativeSiteCount, CumulativeSpeciesCount, 
+                CumulativeTrueSiteCount, FalseProteinCount, FalseWithinTrue, SiteCount, FDR)
+            for Species in Site.SpeciesList:
+                if Species.MergedFlag:
+                    continue
+                try:
+                    Str = string.join(Species.Bits, "\t")
+                    Str += "\t%s\t"%FDR
+                except:
+                    traceback.print_exc()
+                    print Species.Bits
+                    print map(type, Species.Bits)
+                File.write(Str + "\n")
+        File.close()
+    def ParseDB(self):
+        DBFile = open(self.DBFileName, "rb")
+        self.DB = DBFile.read()
+        DBFile.close()
+    def CheckSpectrumDirectories(self):
+        """
+        Create our adjusted-spectrum and adjusted-cluster directories, wiping out
+        old ones beforehand if we must.  We do this for merge-and-reconcile;
+        we DON'T do it for biochem tweaking
+        """
+        print "Prepare spectrum directories... (-z option, set for "
+        print "single-block runs and the first block of multi-block runs...)"
+        try:
+            shutil.rmtree(self.ConsensusClusterDirAdjusted)
+        except:
+            pass
+        try:
+            shutil.rmtree(self.ConsensusSpectraDirAdjusted)
+        except:
+            pass
+        try:
+            shutil.rmtree(self.ClusterScanListDirAdjusted)
+        except:
+            pass
+        MakeDirectory(self.ConsensusClusterDirAdjusted)
+        MakeDirectory(self.ConsensusSpectraDirAdjusted)
+        MakeDirectory(self.ClusterScanListDirAdjusted)
+        Aminos = "ACDEFGHIKLMNOPQRSTUVWY"
+        for Amino in Aminos:
+            Dir = os.path.join(self.ConsensusClusterDirAdjusted, Amino)
+            MakeDirectory(Dir)
+            Dir = os.path.join(self.ConsensusSpectraDirAdjusted, Amino)
+            MakeDirectory(Dir)
+            Dir = os.path.join(self.ClusterScanListDirAdjusted, Amino)
+            MakeDirectory(Dir)
+            
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "r:w:d:c:m:k:M:x:y:X:zev:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-r":
+                # -r results file(s)
+                if not os.path.exists(Value):
+                    print "** Error: couldn't find results file '%s'\n\n"%Value
+                    print UsageInfo
+                    sys.exit(1)
+                self.InputFileName = Value
+            elif Option == "-m":
+                self.SavedModelFileName2 = "%s.2"%Value
+                self.SavedModelFileName3 = "%s.3"%Value
+            elif Option == "-M":
+                self.OutputModelFileName2 = "%s.2"%Value
+                self.OutputModelFileName3 = "%s.3"%Value
+            elif Option == "-w":
+                self.OutputFileName = Value
+            elif Option == "-d":
+                self.DBFileName = Value
+            elif Option == "-k":
+                self.KnownChemistryFileName = Value
+            elif Option == "-c":
+                self.TempFileDir = Value
+                self.ConsensusClusterDir = os.path.join(Value, "Clusters")
+                self.ConsensusSpectraDir = os.path.join(Value, "Spectra")
+                self.ClusterScanListDir = os.path.join(Value, "ClusterMembers")
+                self.ConsensusClusterDirAdjusted = os.path.join(Value, "ClustersAdjusted")
+                self.ConsensusSpectraDirAdjusted = os.path.join(Value, "SpectraAdjusted")
+                self.ClusterScanListDirAdjusted = os.path.join(Value, "ClusterMembersAdjusted")
+            elif Option == "-x":
+                self.DBStart = int(Value)
+            elif Option == "-y":
+                self.DBEnd = int(Value)
+            elif Option == "-X":
+                self.SpectrumRoot = Value
+            elif Option == "-z":
+                self.CheckDirectoriesFlag = 1
+            elif Option == "-e":
+                # mErge block runs:
+                self.MergeBlockRunsFlag = 1
+            elif Option == "-v":
+                self.KnownPTMVerboseOutputFileName = Value
+    def SaveSitesByType(self):
+        "Save sites/spectra by modification type"
+        PicklePath = os.path.join(self.TempFileDir, "SitesByModType.dat")
+        SitesFile = open(PicklePath, "wb")
+        cPickle.dump(self.ModTypeSiteCount, SitesFile)
+        cPickle.dump(self.ModTypeSpectrumCount, SitesFile)
+        SitesFile.close()
+    def LoadSitesByType(self):
+        "Load sites/spectra by modification type"
+        PicklePath = os.path.join(self.TempFileDir, "SitesByModType.dat")
+        SitesFile = open(PicklePath, "rb")
+        self.ModTypeSiteCount = cPickle.load(SitesFile)
+        self.ModTypeSpectrumCount = cPickle.load(SitesFile)
+        SitesFile.close()
+    def LoadHeaderLines(self, FileName):
+        File = open(FileName, "rb")
+        LineNumber = 0
+        for FileLine in File.xreadlines():
+            LineNumber += 1
+            if LineNumber > 10:
+                break
+            if FileLine[0] == "#":
+                self.HeaderLines.append(FileLine)
+        File.close()
+    def Main(self):
+        if self.SpectrumRoot:
+            self.PopulateSpectrumOracle(self.SpectrumRoot)
+        print "Load model..."
+        self.LoadModel()
+        print "Parse database..."
+        self.ParseDB()
+        if self.MergeBlockRunsFlag:
+            self.MergeBlockRuns()
+        elif self.KnownChemistryFileName:
+            print "Tweak sites to match KNOWN CHEMISTRY..."
+            self.LoadKnownModifications()
+            self.LoadSitesByType()
+            self.LoadHeaderLines(self.InputFileName)
+            self.OutputFile = open(self.OutputFileName, "wb")
+            for HeaderLine in self.HeaderLines:
+                self.OutputFile.write(HeaderLine)
+            self.ProcessSites(self.InputFileName, "knownptm")
+        else:
+            print "MERGE and RECONCILE..."
+            if self.CheckDirectoriesFlag:
+                self.CheckSpectrumDirectories()
+            self.MergeAndReconcile()
+            # Re-compute spectra for mod type, since some peptides
+            # have now been re-annotated:
+            #self.ComputeTotalSpectraForModType()
+            #self.RescorePeptides()
+            #self.ScorePTMSites()
+        
+        #print "Write output to %s"%(self.OutputFileName)
+        #self.OutputPTMs()        
+
+UsageInfo = """
+AdjustPTM: Merge, reconcile, and tweak PTM annotations.
+
+Arguments:
+-r [FILENAME]: Feature file (from TrainPTMFeatures) to read in
+-w [FILENAME]: Output modded-peptide filename
+-d [DBFILE]: Database searched
+-c [DIR]: Cluster directory
+-k [FILENAME]: Known chemistry filename.  If specified, consider altering sites
+   to match known chemical adducts; report the best site-score attainable by using
+   known chemical adducts.
+-m [FILENAME]: Peptide scoring model INPUT filename
+-M [FILENAME]: Peptide scoring model OUTPUT filename
+-x [POS]: Database start position
+-y [POS]: Database end position
+"""
+   
+if __name__ == "__main__":
+    if PROFILING_RUN:
+        import profile
+        profile.run("Main()")
+    else:
+        try:
+            import psyco
+            psyco.full()
+        except:
+            print "(Psyco not found - no optimization)"
+        Adjutant = PTMAdjuster()
+        Adjutant.ParseCommandLine(sys.argv[1:])
+        Adjutant.Main()
diff --git a/AminoAcidMasses.txt b/AminoAcidMasses.txt
new file mode 100644
index 0000000..46b3d7c
--- /dev/null
+++ b/AminoAcidMasses.txt
@@ -0,0 +1,22 @@
+Glycine Gly G 57.02146 57.0520
+Alanine Ala A 71.03711 71.0788
+Serine Ser S 87.03203 87.0782
+Proline Pro P 97.05276 97.1167
+Valine Val V 99.06841 99.1326
+Threonine Thr T 101.04768 101.1051
+Cysteine Cys C 103.00919 103.1448
+Leucine Leu L 113.08406 113.1595
+Isoleucine Ile I 113.08406 113.1595
+Asparagine Asn N 114.04293 114.1039
+AsparticAcid Asp D 115.02694 115.0886
+Glutamine Gln Q 128.05858 128.1308
+Lysine Lys K 128.09496 128.1742
+GlutamicAcid Glu E 129.04259 129.1155
+Methionine Met M 131.04049 131.1986
+Histidine His H 137.05891 137.1412
+Phenylalanine Phe F 147.06841 147.1766
+Arginine Arg R 156.10111 156.1876
+Tyrosine Tyr Y 163.06333 163.1760
+Tryptophan Trp W 186.07931 186.2133
+#Selenocysteine Sel U 150.8 151.0
+#Pyrrolysine Pyr O 237.1 237.3
\ No newline at end of file
diff --git a/BN.c b/BN.c
new file mode 100644
index 0000000..8f559bc
--- /dev/null
+++ b/BN.c
@@ -0,0 +1,204 @@
+//Title:          BN.c
+//Authors:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+// Bayesian network support functions.
+// We employ a BN for scoring PRMs (prefix residue masses).  
+#include "CMemLeak.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <malloc.h>
+#include "Errors.h"
+#include "BN.h"
+#include "Utils.h"
+#include "Inspect.h"
+#include "Spectrum.h"
+#include "Trie.h"
+#include <math.h>
+
+BayesianModel* BNCharge2ScoringBN = NULL;
+BayesianModel* BNCharge3ScoringBN = NULL;
+BayesianModel* BNCharge2TaggingBN = NULL;
+BayesianModel* BNCharge3TaggingBN = NULL;
+
+void FreeBayesianModel(BayesianModel* Model);
+
+void OldInitBayesianModels()
+{
+    char FilePath[2048];
+    if (GlobalOptions->InstrumentType == INSTRUMENT_TYPE_QTOF)
+    {
+        sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "Ch2BNPEPQ.dat");
+        BNCharge2ScoringBN = LoadBayesianModel(FilePath);
+        sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PRMQ2.dat");
+        BNCharge2TaggingBN = LoadBayesianModel(FilePath);
+        sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "Ch3BNPEPQ.dat");
+        BNCharge3ScoringBN = LoadBayesianModel(FilePath);
+        sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PRMQ3.dat");
+        BNCharge3TaggingBN = LoadBayesianModel(FilePath);
+    }
+    else
+    {
+        sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "Ch2BNPEP.dat");
+        BNCharge2ScoringBN = LoadBayesianModel(FilePath);
+        sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "Ch3BNPEP.dat");
+        BNCharge3ScoringBN = LoadBayesianModel(FilePath);
+        if (GlobalOptions->DigestType == DIGEST_TYPE_TRYPSIN)
+        {
+            sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PRM2.dat");
+            BNCharge2TaggingBN = LoadBayesianModel(FilePath);
+            sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PRM3.dat");
+            BNCharge3TaggingBN = LoadBayesianModel(FilePath);
+        }
+        else
+        {
+            sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PRM2.dat");
+            BNCharge2TaggingBN = LoadBayesianModel(FilePath);
+            sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PRM3.dat");
+            BNCharge3TaggingBN = LoadBayesianModel(FilePath);
+        }
+    }
+}
+
+void OldFreeBayesianModels()
+{
+    if (BNCharge2ScoringBN)
+    {
+        FreeBayesianModel(BNCharge2ScoringBN);
+        BNCharge2ScoringBN = NULL;
+    }
+    if (BNCharge2TaggingBN)
+    {
+        FreeBayesianModel(BNCharge2TaggingBN);
+        BNCharge2TaggingBN = NULL;
+    }
+    if (BNCharge3ScoringBN)
+    {
+        FreeBayesianModel(BNCharge3ScoringBN);
+        BNCharge3ScoringBN = NULL;
+    }
+    if (BNCharge3TaggingBN)
+    {
+        FreeBayesianModel(BNCharge3TaggingBN);
+        BNCharge3TaggingBN = NULL;
+    }
+
+}
+
+// Compute the probability of a basesian node:
+// return ProbTable[ParentValue1*ParentBlock1 + ... + ParentValueN*ParentBlockN + FeatureValue]
+float ComputeBNProbability(BayesianNode* BN, int NodeIndex, int* FeatureValues)
+{
+    int ProbTableIndex;
+    int Parent;
+    int ParentIndex;
+    //
+    ProbTableIndex = 0;
+    for (ParentIndex = 0; ParentIndex < 4; ParentIndex++)
+    {
+        Parent = BN->Parents[ParentIndex];
+        if (Parent >= 0)
+        {
+            //ProbTableIndex += BN->ParentBlocks[ParentIndex] * Model->Nodes[Parent].Value;
+            ProbTableIndex += BN->ParentBlocks[ParentIndex] * FeatureValues[Parent];
+        }
+        else
+        {
+            break;
+        }
+    }
+    ProbTableIndex += FeatureValues[NodeIndex];
+    return BN->ProbTable[ProbTableIndex];
+}
+
+void FreeBayesianModel(BayesianModel* Model)
+{
+    int NodeIndex;
+    BayesianNode* BN;
+    if (Model)
+    {
+        for (NodeIndex = 0; NodeIndex < Model->NodeCount; NodeIndex++)
+        {
+            BN = Model->Nodes + NodeIndex;
+            SafeFree(BN->ProbTable);
+        }
+        SafeFree(Model->Nodes);
+        SafeFree(Model);
+    }
+}
+
+BayesianModel* LoadBayesianModel(char* FileName)
+{
+    int FeatureCount;
+    int FeatureIndex;
+    FILE* File;
+    BayesianNode* BN;
+    BayesianModel* Model;
+    //
+    File = fopen(FileName, "rb");
+    if (!File)
+    {
+        REPORT_ERROR_S(3, FileName);
+        return NULL;
+    }
+    ReadBinary(&FeatureCount, sizeof(int), 1, File);
+    if (FeatureCount < 1 || FeatureCount > 100)
+    {
+        REPORT_ERROR_I(6, FeatureCount);
+        return NULL;
+    }
+    Model = (BayesianModel*)calloc(1, sizeof(BayesianModel));
+    Model->NodeCount = FeatureCount;
+    Model->Nodes = (BayesianNode*)calloc(FeatureCount, sizeof(BayesianNode));
+    for (FeatureIndex = 0; FeatureIndex < FeatureCount; FeatureIndex++)
+    {
+        BN = Model->Nodes + FeatureIndex;
+        ReadBinary(&BN->Flags, sizeof(int), 1, File);
+        ReadBinary(&BN->ValueCount, sizeof(int), 1, File);
+        ReadBinary(&BN->Name, sizeof(char), 64, File);
+        if (BN->Flags & BNODE_HAS_PARENTS)
+        {
+            ReadBinary(BN->Parents, sizeof(int), 4, File);
+            ReadBinary(BN->ParentBlocks, sizeof(int), 4, File);
+            ReadBinary(&BN->ProbTableSize, sizeof(int), 1, File);
+            if (BN->ProbTableSize <= 0 || BN->ProbTableSize > 1000)
+            {
+                REPORT_ERROR_II(7, BN->ProbTableSize, FeatureIndex);
+            }
+            BN->ProbTable = (float*)calloc(BN->ProbTableSize, sizeof(float));
+            ReadBinary(BN->ProbTable, sizeof(float), BN->ProbTableSize, File);
+
+        }
+    }
+    return Model;
+}
diff --git a/BN.h b/BN.h
new file mode 100644
index 0000000..8126b60
--- /dev/null
+++ b/BN.h
@@ -0,0 +1,96 @@
+//Title:          BN.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef BN_H
+#define BN_H
+
+// Structs to support use of Bayesian Networks.  We use bayesian 
+// networks to score PRMs, for both tag generation and for final 
+// scoring of matches.  Most of the nodes in the network correspond to
+// fragment types.  The edges between nodes help capture the co-occurrence
+// relations between peaks (e.g. b-h2o is more likely in presence of b),
+// as well as other factors that predict peak strength (e.g. which spectrum
+// sector the PRM lies in)
+///
+///
+// The bayesian network file has the following format:
+// There's one NodeRecord per bayesian network node.  The NodeRecord 
+// has flags (int), ValueCount (int; the number of possible values for the node), 
+// and a Name (char64).  If the node has parents, it then has:
+// Parent indices (4 ints)
+// Parent block-sizes (4 ints, used in computing positions in the probability table)
+// ProbTableSize (equals the first parent block size * the ValueCount)
+// Probability table (float array of size ProbTableSize)
+// Note that the values in the probability table are log-probabilities, so that we can
+// add them up.
+#include "Utils.h"
+#include "Inspect.h"
+#include "Spectrum.h"
+#include "Trie.h"
+
+// Flags for BayesianNode.Flags:
+// A node has the BNODE_USE_PROB flag set if it's a leaf node, whose
+// probability is to be used.  A node has the BNODE_HAS_PARENTS flag
+// set if it has one or more parent nodes.
+#define BNODE_HAS_PARENTS 1
+#define BNODE_USE_PROB 2
+
+typedef struct BayesianNode
+{
+    int Flags;
+    int Value;
+    int ValueCount;
+    char Name[64];
+    int Parents[4];
+    int ParentBlocks[4];
+    int ProbTableSize; // redundant, but useful to keep around for sanity-checks
+    float* ProbTable;
+} BayesianNode;
+
+typedef struct BayesianModel
+{
+    BayesianNode* Nodes;
+    int NodeCount;
+} BayesianModel;
+
+extern BayesianModel* BNCharge2ScoringBN;
+extern BayesianModel* BNCharge3ScoringBN;
+extern BayesianModel* BNCharge2TaggingBN;
+extern BayesianModel* BNCharge3TaggingBN;
+
+BayesianModel* LoadBayesianModel(char* FileName);
+float ComputeBNProbability(BayesianNode* BN, int NodeIndex, int* FeatureValues);
+void OldFreeBayesianModels();
+void OldInitBayesianModels();
+
+#endif // BN_H
+
diff --git a/BasicStats.py b/BasicStats.py
new file mode 100644
index 0000000..4a5b56f
--- /dev/null
+++ b/BasicStats.py
@@ -0,0 +1,120 @@
+#Title:          BasicStats.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+import math
+
+def ComputeROCCurve(List):
+    """
+    Compute the ROC curve for a set of tuples of the form (reading, truthflag)
+    """
+    List.sort()
+    List.reverse()
+    AllPositive = 0 
+    AllNegative = 0
+    for (Score, Truth) in List:
+        if (Truth):
+            AllPositive += 1
+        else:
+            AllNegative += 1
+    Area = 0
+    TPCount = 0
+    FPCount = 0
+    for (Score, Truth) in List:
+        if (Truth):
+            TPCount += 1
+        else:
+            FPCount += 1
+            TPRate = TPCount / float(AllPositive)
+            Area += TPRate
+    Area /= float(AllNegative)
+    return Area
+
+def GetMedian(List):
+    LocalCopy = List[:]
+    LocalCopy.sort()
+    Len = len(LocalCopy)
+    if Len % 2:
+        return LocalCopy[Len / 2]
+    return (LocalCopy[Len / 2] + LocalCopy[(Len / 2) - 1]) / 2.0
+        
+def Sum(List):
+    Total = 0
+    for Entry in List:
+        Total += Entry
+    return Total
+
+def GetMedian(List):
+    SortedList = List[:]
+    SortedList.sort()
+    Len = len(SortedList)
+    if Len % 2 == 1:
+        return SortedList[Len / 2]
+    Score = (SortedList[Len / 2] + SortedList[(Len / 2) - 1]) / 2.0
+    return Score
+
+def GetMean(List):
+    if not len(List):
+        return None
+    Mean = 0
+    for Entry in List:
+        Mean += Entry
+    Mean /= float(len(List))
+    return Mean
+
+def GetMeanStdDev(List):
+    "Computes mean, standard deviation for a list of numbers"
+    if not len(List):
+        return (0, 0)
+    Mean = 0
+    for Entry in List:
+        Mean += Entry
+    Mean /= float(len(List))
+    StdDev = 0
+    for Entry in List:
+        StdDev += (Entry - Mean) ** 2
+    StdDev = math.sqrt(StdDev / float(len(List)))
+    return (Mean, StdDev)
+
+
+def GetStdDev(List):
+    "Computes standard deviation for a list of numbers"
+    if not len(List):
+        return 0.0
+    Mean = 0
+    for Entry in List:
+        Mean += Entry
+    Mean /= float(len(List))
+    StdDev = 0
+    for Entry in List:
+        StdDev += (Entry - Mean) ** 2
+    StdDev = math.sqrt(StdDev / float(len(List)))
+    return StdDev
+
diff --git a/BuildConsensusSpectrum.py b/BuildConsensusSpectrum.py
new file mode 100644
index 0000000..ad7abdf
--- /dev/null
+++ b/BuildConsensusSpectrum.py
@@ -0,0 +1,273 @@
+#Title:          BuildConsensusSpectrum.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+BuildConsensusSpectrum:
+- Given many similar spectra which we consider to be the "same" (either due to
+Inspect annotations or due to clustering), we'd like to generate a CONSENSUS
+SPECTRUM.  The consensus spectrum should contain less noise than the individual
+spectra, and mass errors should be decreased.  Also, the computation shouldn't
+require too much time; for efficiency, it should require just one I/O pass through
+the spectra.
+
+Current pseudocode:
+
+# Accumulate intensity and peak count:
+For spectrum S:
+  Read in peaks from disk
+  Intensity1 = max(max peak intensity, grass peak intensity * 20)
+  For peak P in spectrum S:
+    ScaledIntensity = (Intensity(P) / Intensity1)
+    MassBin = mass of P, rounded to nearest 0.1Da
+    TotalIntensity[MassBin] += ScaledIntensity
+    PeakCount[MassBin]++
+    PeakCount[MassBin-1]++
+    PeakCount[MassBin+1]++
+    
+# Process intensity into a peak list:
+Iterate over peaks P from high to low peak-count:
+  if Assimilated[P], continue
+  Peak P assimilates neighboring peaks if their total intensity is lower
+  new spectrum receives this assimilated peak
+"""
+import PyInspect
+import MSSpectrum
+import os
+import sys
+import traceback
+import cPickle
+
+USE_COUNT_FLAG = 1
+
+# Scaling factors, for levels of peak presence from 0% to 100%.
+ScalingFactors = {}
+for X in range(0, 101):
+    #ScalingFactors[X] = 0.95 + 0.05 * (1.0 + X / 100.0)**5
+    ScalingFactors[X] = 0.95 + 0.05 * (1.0 + X / 100.0)**5
+    #print X, ScalingFactors[X]
+
+
+class ConsensusBuilder:
+    def __init__(self, Charge = None):
+        self.SpectrumCount = 0
+        self.TotalMZ = 0
+        self.Intensity = {}
+        self.PeakCount = {}
+        self.Charge = Charge
+        self.SignalPeakCount = 0
+    def DebugPrint(self):
+        Bins = self.Intensity.keys()
+        Bins.sort()
+        MinBin = min(Bins)
+        MaxBin = max(Bins)
+        for Bin in range(MinBin, MaxBin + 1):
+            Str = "%s\t%s\t%s\t"%(Bin, self.Intensity.get(Bin, 0), self.PeakCount.get(Bin, 0))
+            print Str
+        
+    def AddSpectrum(self, Spectrum):
+        """
+        Add one more spectrum to the binned intensity and peak counts
+        """
+        self.TotalMZ += Spectrum.PrecursorMZ
+        self.SpectrumCount += 1
+        if not self.Charge:
+            self.Charge = Spectrum.Charge # ASSUMED: all spectra have same charge!
+        if not self.SignalPeakCount:
+            # Expect to see this many "signal" peaks:
+            self.SignalPeakCount = int(round((Spectrum.ParentMass / 100.0) * 4))
+        MaxIntensity = 0
+        IntensityList = []
+        for Peak in Spectrum.Peaks:
+            MaxIntensity = max(MaxIntensity, Peak.Intensity)
+            IntensityList.append(Peak.Intensity)
+        IntensityList.sort()
+        # Spectra with high signal-to-noise are weighted a bit more heavily:
+        if len(IntensityList) > self.SignalPeakCount:
+            MinimumPresenceIntensity = IntensityList[-self.SignalPeakCount]
+            ScalingFactor = min(20.0 / MaxIntensity, 1.0 / MinimumPresenceIntensity)
+        else:
+            ScalingFactor = 20.0 / MaxIntensity
+            MinimumPresenceIntensity = 0
+        #print "%s peaks; Spectrum has scaling factor %s (grass %s, max peak %s)"%(len(IntensityList), ScalingFactor, MinimumPresenceIntensity, MaxIntensity)
+        for Peak in Spectrum.Peaks:
+            MZBin = int(round(Peak.Mass * 10))
+            ScaledIntensity = Peak.Intensity * ScalingFactor
+            self.Intensity[MZBin] = self.Intensity.get(MZBin, 0) + ScaledIntensity
+            if Peak.Intensity > MinimumPresenceIntensity:
+                self.PeakCount[MZBin] = self.PeakCount.get(MZBin, 0) + 1
+                self.PeakCount[MZBin-1] = self.PeakCount.get(MZBin-1, 0) + 1
+                self.PeakCount[MZBin-2] = self.PeakCount.get(MZBin-2, 0) + 1
+                self.PeakCount[MZBin+1] = self.PeakCount.get(MZBin+1, 0) + 1
+                self.PeakCount[MZBin+2] = self.PeakCount.get(MZBin+2, 0) + 1
+            #self.Intensity[MZBin] = self.Intensity.get(MZBin, 0) + ScaledIntensity
+    def PickleCluster(self, PicklePath):
+        """
+        Serialize the information we've read from many spectra.  This method is used
+        if we want to reserve the option to add to the cluster later.
+        """
+        File = open(PicklePath, "wb")
+        cPickle.dump(self.Charge, File)
+        cPickle.dump(self.TotalMZ, File)
+        cPickle.dump(self.SpectrumCount, File)
+        cPickle.dump(self.Intensity, File)
+        cPickle.dump(self.PeakCount, File)
+        File.close()
+    def UnpickleCluster(self, PicklePath):
+        """
+        Sister method to PickleCluster - load a cluster from disk.
+        """
+        File = open(PicklePath, "rb")
+        self.Charge = cPickle.load(File)
+        self.TotalMZ = cPickle.load(File)
+        self.SpectrumCount = cPickle.load(File)
+        self.Intensity = cPickle.load(File)
+        self.PeakCount = cPickle.load(File)
+        File.close()
+        
+    def ProduceConsensusSpectrum(self):
+        Spectrum = MSSpectrum.SpectrumClass()
+        Spectrum.Charge = self.Charge
+        Spectrum.PrecursorMZ = self.TotalMZ / float(max(self.SpectrumCount, 1))
+        Spectrum.ParentMass = (Spectrum.PrecursorMZ * Spectrum.Charge) - (Spectrum.Charge - 1)*1.0078
+        # Iterate over intensity entries:
+        self.AssimilationFlag = {}
+        SortedList = []
+        for (Bin, Score) in self.Intensity.items():
+            SortedList.append((Score, Bin))
+        SortedList.sort()
+        SortedList.reverse()
+        Spectrum.Peaks = []
+        for (Intensity, Bin) in SortedList:
+            if self.AssimilationFlag.get(Bin, 0):
+                continue
+            Peak = MSSpectrum.PeakClass(Bin / 10.0, Intensity)
+            for NearBin in (Bin-1, Bin-2, Bin-3, Bin+1, Bin+2, Bin+3):
+                if self.AssimilationFlag.get(NearBin, 0):
+                    continue
+                self.AssimilationFlag[NearBin] = 1
+                Peak.Intensity += self.Intensity.get(NearBin, 0)
+            # Scale the intensity by the peak count, IF the cluster is large:
+            if USE_COUNT_FLAG and self.SpectrumCount > 4:
+                FractionPresent = self.PeakCount.get(Bin,0) / float(self.SpectrumCount)
+                Bin = min(100, int(round(FractionPresent * 100)))
+                #print FractionPresent, Bin, ScalingFactors[Bin]
+                Peak.Intensity *= ScalingFactors[Bin]
+            Spectrum.Peaks.append(Peak)
+        Spectrum.Peaks.sort()
+        return Spectrum
+    def AssimilateCluster(self, OtherCluster):
+        """
+        Assimilate the other consensus information into our consensus spectrum.
+        """
+        self.SpectrumCount += OtherCluster.SpectrumCount
+        self.TotalMZ += OtherCluster.TotalMZ
+        for (Key, Value) in OtherCluster.Intensity.items():
+            self.Intensity[Key] = self.Intensity.get(Key, 0) + Value
+        for (Key, Value) in OtherCluster.PeakCount.items():
+            self.PeakCount[Key] = self.PeakCount.get(Key, 0) + Value
+def TestConsensus(AnnotationsFile, Annotation, Charge = 2):
+    """
+    Build a consensus spectrum for a collection of spectra, and verify
+    that it looks good.
+    """
+    #AnnotationsFile = "ConsensusTest.txt"
+    #Annotation = "M.D+173VTIQHPWFK.R"
+    SpectrumDir = "e:\\ms\\lens\\spectra"
+    TestOutputFile = "ConsensusTest.dta"
+    Builder = ConsensusBuilder()
+    InputFile = open(AnnotationsFile, "rb")
+    BestScores = []
+    #MGFFile = open("TestTest.mgf", "wb") #%TEMP
+    #MGFFile.close() #%TEMP
+    for FileLine in InputFile.xreadlines():
+        Bits = FileLine.split("\t")
+        try:
+            FilePath = Bits[0]
+            FilePos = int(Bits[15])
+            SpectrumCharge = int(Bits[4])
+            MQScore = float(Bits[5])
+        except:
+            continue
+        if SpectrumCharge != Charge:
+            continue
+        FilePath = os.path.join(SpectrumDir, FilePath.replace("/", "\\").split("\\")[-1])
+        BestScores.append((MQScore, FilePath, FilePos))
+        Spectrum = MSSpectrum.SpectrumClass()
+        Spectrum.ReadPeaks("%s:%s"%(FilePath, FilePos))
+        Spectrum.SetCharge(Charge)
+        Builder.AddSpectrum(Spectrum)
+        # Try ari's thingy:
+        #Spectrum.WriteMGFPeaks("TestTest.mgf") #%TEMP
+    InputFile.close()
+    print "Build consensus spectrum for %s members."%Builder.SpectrumCount
+    Consensus = Builder.ProduceConsensusSpectrum()
+    Consensus.WritePeaks(TestOutputFile)
+    #Command = "MakeConsensus -mgf TestTest.mgf > %s"%(TestOutputFile) #%TEMP
+    #print Command #%TEMP
+    #os.system(Command) #%TEMP
+    PySpectrum = PyInspect.Spectrum(TestOutputFile, 0)
+    Results = PySpectrum.ScorePeptideDetailed(Annotation)
+    ConsensusScore = Results[0]
+    print "Consensus spectrum score: %.2f (%.2f, %.2f, %.2f, %d)"%(ConsensusScore, Results[1], Results[2], Results[3], Results[4])
+    # Compare the CONSENSUS score to the average for the BEST FIVE:
+    BestScores.sort()
+    TopHits = 0
+    for (Score, Path, Pos) in BestScores[-5:]:
+        PySpectrum = PyInspect.Spectrum(Path, Pos)
+        Score = PySpectrum.ScorePeptide(Annotation)
+        TopHits += Score
+    BestFiveAverage = TopHits / 5.0
+    ScoreGain = ConsensusScore - BestFiveAverage
+    print "Consensus %s vs top-5 average %s (%s)"%(ConsensusScore, BestFiveAverage, ScoreGain)
+    return ScoreGain
+
+def TestMain():
+    TestCases = [("Consensus.GNTIEIQGDDAPSLWVYGFSDR.txt", "K.GNTIEIQGDDAPSLWVYGFSDR.V", 2),
+                 ("ConsensusTest.txt", "-.M+42DVTIQHPWFK.R", 1),
+                 ("ConsensusTest.txt", "-.M+42DVTIQHPWFK.R", 2),
+                 ("Consensus.R.QD-17DHGYISR.E.txt", "R.Q-17DDHGYISR.E", 1),
+                 ("Consensus.R.QD-17DHGYISR.E.txt", "R.Q-17DDHGYISR.E", 2),
+                 ]
+    ResultTotal = 0
+    ResultCount = 0
+    #TestCases = TestCases[0:1] # TEMP%!
+    for (AnnotationFile, Annotation, Charge) in TestCases:
+        ResultTotal += TestConsensus(AnnotationFile, Annotation, Charge)
+        ResultCount += 1
+    print "OVERALL RESULTS: Average MQGain is %s"%(ResultTotal / float(max(1, ResultCount)))
+    
+if __name__ == "__main__":
+    # Given the filename of a cluster, print verbose info:
+    FileName = sys.argv[1]
+    Bob = ConsensusBuilder()
+    Bob.UnpickleCluster(FileName)
+    Bob.DebugPrint()
diff --git a/BuildInspect.py b/BuildInspect.py
new file mode 100644
index 0000000..be2e65c
--- /dev/null
+++ b/BuildInspect.py
@@ -0,0 +1,123 @@
+#Title:          BuildInspect.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+Python script to build Inspect.  An alternative to makefiles.
+"""
+import sys
+import distutils
+import distutils.command.build
+import distutils.ccompiler
+
+def BuildInspect(BuildNow = 0):
+    InspectSourceFiles = [
+        "base64.c", "BN.c", "BuildMS2DB.c", "ChargeState.c", "CMemLeak.c",
+        "Errors.c", "ExonGraphAlign.c", 
+        "FreeMod.c", "IonScoring.c", "LDA.c", "main.c", "Mods.c",
+        "MS2DB.c", "ParentMass.c", "ParseInput.c", 
+        "ParseXML.c", "PValue.c", 
+        "Run.c", "Score.c", "Scorpion.c", "SNP.c", "Spectrum.c", "Spliced.c", 
+        "SpliceDB.c", "SpliceScan.c", "SVM.c", "Tagger.c", "Trie.c", "Utils.c", "TagFile.c"
+        ]
+    ExtraIncludeDirectories = ["expat\\lib",]
+    class MyBuildClass(distutils.command.build.build):
+        def build_opt(self):
+            CC = distutils.ccompiler.new_compiler()
+            #if sys.platform != 'win32':
+            #    CC.add_library('m')
+            #import os.path
+            print dir(CC)
+            CC.library_dirs.append("expat/lib/release")
+            if sys.platform == "win32":
+                CC.add_library("libexpat")
+            else:
+                CC.add_library("expat") # not "libexpat", that won't work on Linux.
+                CC.add_library("m")
+            CC.set_include_dirs(ExtraIncludeDirectories)
+            opt_obj = CC.compile(InspectSourceFiles)
+            CC.link_executable(opt_obj, "inspect")
+        def run(self):
+            self.build_opt()
+            distutils.command.build.build.run(self)
+    if BuildNow:
+        Dist = distutils.dist.Distribution()
+        Dist.parse_config_files()
+        Dist.cmdclass["build"] = MyBuildClass
+        Dist.commands = ["build"]
+        Dist.run_commands()
+    else:
+        distutils.core.setup(cmdclass = {"build":MyBuildClass,})
+
+def BuildInspectOnConvey(BuildNow = 0):
+    InspectSourceFiles = [
+        "base64.c", "BN.c", "BuildMS2DB.c", "ChargeState.c", "CMemLeak.c",
+        "Errors.c", "ExonGraphAlign.c", 
+        "FreeMod.c", "IonScoring.c", "LDA.c", "main.c", "Mods.c",
+        "MS2DB.c", "ParentMass.c", "ParseInput.c", 
+        "ParseXML.c", "PValue.c", 
+        "Run.c", "Score.c", "Scorpion.c", "SNP.c", "Spectrum.c", "Spliced.c", 
+        "SpliceDB.c", "SpliceScan.c", "SVM.c", "Tagger.c", "Trie.c", "Utils.c", "TagFile.c",
+        "cny_kernel_wrapper.c", "pdk_kernel.c", "kernel.c", "cny_util.c"]
+    ExtraIncludeDirectories = ["expat\\lib",]
+    class MyBuildClass(distutils.command.build.build):
+        def build_opt(self):
+            CC = distutils.ccompiler.new_compiler()
+            #if sys.platform != 'win32':
+            #    CC.add_library('m')
+            #import os.path
+            print dir(CC)
+            CC.library_dirs.append("expat/lib/release")
+            if sys.platform == "win32":
+                CC.add_library("libexpat")
+            else:
+                CC.add_library("expat") # not "libexpat", that won't work on Linux.
+                CC.add_library("m")
+            CC.set_include_dirs(ExtraIncludeDirectories)
+            opt_obj = CC.compile(InspectSourceFiles)
+            CC.link_executable(opt_obj, "inspect")
+        def run(self):
+            self.build_opt()
+            distutils.command.build.build.run(self)
+    if BuildNow:
+        Dist = distutils.dist.Distribution()
+        Dist.parse_config_files()
+        Dist.cmdclass["build"] = MyBuildClass
+        Dist.commands = ["build"]
+        Dist.run_commands()
+    else:
+        distutils.core.setup(cmdclass = {"build":MyBuildClass,})
+    
+
+if __name__ == "__main__":
+    #sys.argv = ["", "build"]
+    BuildInspect()
+    
diff --git a/BuildMGF.py b/BuildMGF.py
new file mode 100644
index 0000000..25980e4
--- /dev/null
+++ b/BuildMGF.py
@@ -0,0 +1,126 @@
+#Title:          BuildMGF.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+BuildMGF.py
+This is a part of the PTMAnalysis pipeline.  It creates an
+.mgf file of all the consensus spectra created by ComputePTMFeatures.py
+"""
+
+import os
+import sys
+import string
+import getopt
+import MSSpectrum
+from Utils import *
+Initialize()
+from TrainPTMFeatures import FormatBits
+
+class PeptideFeatureBag:
+    pass
+
+class MGFBuilder:
+    def __init__(self):
+        self.ConsensusSpectrumDir = "ptmscore\\LensLTQ-99-5\\spectra"
+        self.PeptideFeatureFileName = "PTMScore\\LensLTQ-99-5.txt"
+        self.MGFPath = "PTMScore\\LensLTQ-99-5.mgf"
+        self.ModifiedPeptides = []
+    def ParsePeptideFeatureFile(self):
+        """
+        Parse the contents of the peptide feature-file.  We need to know the
+        path to the consensus spectrum file, the consensus annotation MQScore,
+        and the index.
+        """
+        File = open(self.PeptideFeatureFileName, "rb")
+        LineNumber = 0
+        for FileLine in File.xreadlines():
+            LineNumber +=1
+            if FileLine[0] == "#":
+                continue
+            Bits = list(FileLine.replace("\r", "").replace("\n", "").split("\t"))
+            try:
+                ConsensusMQScore = float(Bits[FormatBits.ConsensusMQScore])
+            except:
+                print "** Error: Can't parse consensus MQScore from line %s!"%LineNumber
+                print Bits
+                continue
+            PeptideFeatures = PeptideFeatureBag()
+            PeptideFeatures.Bits = Bits
+            PeptideFeatures.ConsensusMQScore = ConsensusMQScore
+            NiceAnnotation = Bits[FormatBits.Peptide].replace("*", "-")
+            PeptideFeatures.Bits[FormatBits.Peptide] = NiceAnnotation
+            FirstResidue = NiceAnnotation[2]
+            Charge = Bits[FormatBits.Charge]
+            PeptideFeatures.SpectrumPath = os.path.join(self.ConsensusSpectrumDir, FirstResidue, "%s.%s.dta"%(NiceAnnotation, Charge))
+            self.ModifiedPeptides.append(PeptideFeatures)
+        File.close()
+        print "Parsed %s modified peptides from %s file lines."%(len(self.ModifiedPeptides), LineNumber)
+    def PrepareSearchMGF(self):
+        """
+        Concatenate our consensus spectra into an MGF file, for searching.
+        """
+        MGFFile = open(self.MGFPath, "wb")
+        ScanNumber = 1
+        for PeptideIndex in range(len(self.ModifiedPeptides)):
+            if PeptideIndex % 100 == 0:
+                print "Peptide species %s/%s..."%(PeptideIndex, len(self.ModifiedPeptides))
+            PeptideFeatures = self.ModifiedPeptides[PeptideIndex]
+            Spectrum = MSSpectrum.SpectrumClass()
+            Spectrum.ReadPeaks(PeptideFeatures.SpectrumPath)
+            Spectrum.WriteMGFPeaks(MGFFile, PeptideFeatures.Bits[FormatBits.Peptide], ScanNumber)
+            ScanNumber += 1
+        MGFFile.close()
+    def Main(self):
+        self.ParsePeptideFeatureFile()
+        self.PrepareSearchMGF()
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "d:m:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-d":
+                self.PeptideFeatureDir = Value
+            elif Option == "-m":
+                self.MGFPath = Value
+        self.ConsensusSpectrumDir = os.path.join(self.PeptideFeatureDir, "Spectra")
+        self.PeptideFeatureFileName = os.path.join(self.PeptideFeatureDir, "PTMFeatures.txt")
+
+UsageInfo = """
+BuildMGF arguments:
+  -d [DIR]: Peptide feature directory
+  -m [FILE]: Output .mgf file name
+"""
+  
+if __name__ == "__main__":
+    Builder = MGFBuilder()
+    Builder.ParseCommandLine(sys.argv[1:])
+    Builder.Main()
diff --git a/BuildMS2DB.c b/BuildMS2DB.c
new file mode 100644
index 0000000..7e8d6e2
--- /dev/null
+++ b/BuildMS2DB.c
@@ -0,0 +1,2101 @@
+//Title:          BuildMS2DB.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+// BuildMS2DB.c is responsible for building a splice-tolerant database
+// from an input file in .gff format.  Our overall plan is as follows:
+
+//- Parse all lines of the .gff file.  Discard those lines which don't come from
+//  the correct chromosome and strand.  Build up a linked list of exons, 
+//  which is indexed by a hash based on (start, end, reading-frame).  
+//  NOTE: Reading frame is defined as "the first base pair in a codon, modulo 3".  
+//  NOTE: Records of type EST give rise to three exons (one for each reading frame),
+//        which will be pruned later.
+//  NOTE: We may parse SEVERAL gff files!
+//- Merge and split exons, to produce a minimal disjoint list.  
+//- Prune exons with short ORFs.
+//- ITERATE, until all exons are covered:
+//  - Take the first uncovered exon.  Grab all the exons which it links to.  Build 
+//    a corresponding gene record.  Flag these exons as covered.
+//- Write cross-reference records for the GFFGenes.
+//  
+//  IMPORTANT NOTE: The data structures built in this file aren't used during a search.
+//  When it comes to searching, look in MS2DB.c, which uses different (simpler) data
+//  structures to track this stuff.
+
+#include "Utils.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include "Trie.h"
+#include "Inspect.h"
+#include "Spliced.h"
+#include "SpliceDB.h"
+#include "SNP.h"
+#include "Errors.h"
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Macros:
+#define MAX_NAME 256
+#define EXON_HASH_SIZE 10000
+
+#define MS2_EXON_NONE 0
+#define MS2_EXON_CUSTOMAA 1
+
+#define CODON_LENGTH 3
+
+//#define GFF_QUICKPARSE
+
+// Use this flag for a single-base exon which covers the middle
+// base of an exon, and which has only one link forward to an
+// adjacent exon.  
+// The flag reflects this scenario (where B is the one-base exon):
+//  AAAA---(X)---BCCCC
+// Rather than these scenarios:
+//  AAAAB---(X)---CCC
+//  AAAA---B---(X)---CCC
+#define MS2_EXON_CUSTOMAA_HEAD 2
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Structs:
+typedef struct MS2Exon
+{
+    int Start;
+    int End;
+    int ReadingFrame;
+    struct MS2Exon* Next;
+    struct MS2Exon* Prev;
+    char Prefix[CODON_LENGTH];
+    char Suffix[CODON_LENGTH];
+    struct MS2Edge* FirstForward;
+    struct MS2Edge* LastForward;
+    struct MS2Edge* FirstBackward;
+    struct MS2Edge* LastBackward;
+    struct MS2Gene* Gene;
+  char SeqName[MAX_NAME + 1];
+  
+    int Index; // Assigned to a completed gene
+    char* Sequence;
+    // Flags for special bookkeeping.  "bookkeeping" and its derivatives are perhaps the 
+    // only English words with three consecutive double-letters.
+    int Flags; 
+    // CustomAA is set only for length-1 exons that are part of special bridges.
+    char CustomAA;
+} MS2Exon;
+
+typedef struct MS2Edge
+{
+    struct MS2Exon* LinkTo;
+    struct MS2Exon* LinkFrom;
+    struct MS2Edge* Next;
+    // The amino acid for this edge can be set specially, up-front:
+    char SpecialAA;
+} MS2Edge;
+
+// A wrapper for an exon.  This lets us stick an exon into multiple linked lists.
+typedef struct MS2ExonNode
+{
+    struct MS2ExonNode* Next;
+    struct MS2ExonNode* Prev;
+    MS2Exon* Exon;
+} MS2ExonNode;
+
+typedef struct IntNode
+{
+    struct IntNode* Next;
+    int Value;
+} IntNode;
+
+typedef struct MS2CrossReference
+{
+    struct MS2Gene* Gene;
+    struct GFFGeneClass* GFFGene;
+    IntNode* FirstExonID;
+    IntNode* LastExonID;
+    struct MS2CrossReference* Next;
+} MS2CrossReference;
+
+typedef struct MS2Gene
+{
+    MS2ExonNode* FirstExon;
+    MS2ExonNode* LastExon;
+    struct MS2Gene* Next;
+    int Index; // for debugging, mostly!
+    MS2CrossReference* FirstCrossReference;
+    MS2CrossReference* LastCrossReference;
+} MS2Gene;
+
+// Singleton class tracking high-level data like the exon hashes.
+typedef struct MS2Builder
+{
+    FILE* GenomeFile;
+    int ForwardFlag;
+    MS2ExonNode** ExonHash;
+    MS2Exon* FirstExon;
+    MS2Exon* LastExon;
+    struct GFFGeneClass* FirstGFFGene;
+    struct GFFGeneClass* LastGFFGene;
+    MS2Gene* FirstGene;
+    MS2Gene* LastGene;
+    char ChromosomeName[MAX_NAME];
+    int ExonCount;
+    int GeneCount;
+    int VerboseFlag;
+} MS2Builder;
+
+typedef struct GFFExonClass
+{
+    int Start;
+    int End;
+    int ReadingFrame;
+    struct GFFExonClass* Next;
+} GFFExonClass;
+
+typedef struct GFFGeneClass
+{
+    char Name[MAX_NAME + 1];
+    char DatabaseName[MAX_NAME + 1];
+  
+    GFFExonClass* FirstExon;
+    GFFExonClass* LastExon;
+    struct GFFGeneClass* Next;
+    struct MS2CrossReference* CrossReference;
+} GFFGeneClass;
+
+typedef struct GFFParser
+{
+    // Link to our builder, where the REAL data (not transient parse-state) lives:
+    MS2Builder* Builder;
+    // Keep a link to the current gene, so we can add exons to it:
+    struct GFFGeneClass* CurrentGene;
+    // Remember our filename (mostly for error reporting)
+    char* CurrentFileName;
+    // Keep a link to the last exon of the current gene, so that we 
+    // can add edges between exons as needed:
+    MS2Exon* PrevExon;
+} GFFParser;
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Forward declarations:
+void DebugPrintMS2Builder(MS2Builder* Builder, char* Notes);
+void ExonInheritOneForwardEdge(MS2Exon* Exon, MS2Edge* OldEdge);
+void ExonInheritOneBackwardEdge(MS2Exon* Exon, MS2Edge* OldEdge);
+void FreeMS2CrossReference(MS2CrossReference* CR);
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Constructor functions:
+MS2Exon* NewExon(int Start, int End, int ReadingFrame)
+{
+    MS2Exon* Exon;
+    Exon = (MS2Exon*)calloc(1, sizeof(MS2Exon));
+    Exon->Start = Start;
+    Exon->End = End;
+    Exon->ReadingFrame = ReadingFrame;
+    return Exon;
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Destructor functions:
+
+void FreeGFFGene(GFFGeneClass* Gene)
+{
+    GFFExonClass* Exon;
+    GFFExonClass* Prev;
+    if (!Gene)
+    {
+        return;
+    }
+    FreeMS2CrossReference(Gene->CrossReference);
+    Prev = NULL;
+    for (Exon = Gene->FirstExon; Exon; Exon = Exon->Next)
+    {
+        SafeFree(Prev);
+        Prev = Exon;
+    }
+    SafeFree(Prev);
+    SafeFree(Gene);
+}
+
+// Free an MS2Exon, and its associated edges.
+void FreeMS2Exon(MS2Exon* Exon)
+{
+    MS2Edge* Edge;
+    MS2Edge* Prev;
+    //
+    if (!Exon)
+    {
+        return;
+    }
+    // Free the SEQUENE:
+    SafeFree(Exon->Sequence);
+    Exon->Sequence = NULL;
+    // Free the list of FORWARD edges:
+    Prev = NULL;
+    for (Edge = Exon->FirstForward; Edge; Edge = Edge->Next)
+    {
+        SafeFree(Prev);
+        Prev = Edge;
+    }
+    SafeFree(Prev);
+    // Free the list of BACKWARD edges:
+    Prev = NULL;
+    for (Edge = Exon->FirstBackward; Edge; Edge = Edge->Next)
+    {
+        SafeFree(Prev);
+        Prev = Edge;
+    }
+    SafeFree(Prev);
+    SafeFree(Exon);
+}
+
+void FreeExonHash(MS2Builder* Builder)
+{
+    int HashIndex;
+    MS2ExonNode* Node;
+    MS2ExonNode* Prev;
+    //
+    if (!Builder->ExonHash)
+    {
+        return;
+    }
+    for (HashIndex = 0; HashIndex < EXON_HASH_SIZE; HashIndex++)
+    {
+        Prev = NULL;
+        for (Node = Builder->ExonHash[HashIndex]; Node; Node = Node->Next)
+        {
+            SafeFree(Prev);
+            Prev = Node;
+        }
+        SafeFree(Prev);
+    }
+    SafeFree(Builder->ExonHash);
+    Builder->ExonHash = NULL;
+}
+
+// Free an MS2CrossReference, and its associated list of integers.
+void FreeMS2CrossReference(MS2CrossReference* CR)
+{
+    IntNode* Node;
+    IntNode* Prev;
+    if (!CR)
+    {
+        return;
+    }
+    Prev = NULL;
+    for (Node = CR->FirstExonID; Node; Node = Node->Next)
+    {
+        SafeFree(Prev);
+        Prev = Node;
+    }
+    SafeFree(Prev);
+    SafeFree(CR);
+}
+
+// Free an MS2Gene, and its associated list of MS2ExonNodes
+void FreeMS2Gene(MS2Gene* Gene)
+{
+    MS2ExonNode* Node;
+    MS2ExonNode* Prev = NULL; 
+    //
+    if (!Gene)
+    {
+        return;
+    }
+    for (Node = Gene->FirstExon; Node; Node = Node->Next)
+    {
+        SafeFree(Prev);
+        Prev = Node;
+    }
+    SafeFree(Prev);
+}
+
+void FreeGFFGenes(MS2Builder* Builder)
+{
+    GFFGeneClass* Gene;
+    GFFGeneClass* Prev;
+    //
+    Prev = NULL;
+    for (Gene = Builder->FirstGFFGene; Gene; Gene = Gene->Next)
+    {
+        FreeGFFGene(Prev);
+        Prev = Gene;
+    }
+    FreeGFFGene(Prev);
+    Builder->FirstGFFGene = NULL;
+    Builder->LastGFFGene = NULL;
+}
+
+void FreeMS2Genes(MS2Builder* Builder)
+{
+    MS2Gene* Gene;
+    MS2Gene* Prev = NULL;
+    for (Gene = Builder->FirstGene; Gene; Gene = Gene->Next)
+    {
+        FreeMS2Gene(Prev);
+        Prev = Gene;
+    }
+    FreeMS2Gene(Prev);
+    Builder->FirstGene = NULL;
+    Builder->LastGene = NULL;
+}
+
+void FreeMS2Exons(MS2Builder* Builder)
+{
+    MS2Exon* Exon;
+    MS2Exon* Prev = NULL;
+    //
+    for (Exon = Builder->FirstExon; Exon; Exon = Exon->Next)
+    {
+        FreeMS2Exon(Prev);
+        Prev = Exon;
+    }
+    FreeMS2Exon(Prev);
+    Builder->FirstExon = NULL;
+    Builder->LastExon = NULL;
+
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// Other functions:
+
+void MS2CrossReferenceAddID(MS2CrossReference* CR, int ID)
+{
+    IntNode* Node;
+    //
+    Node = (IntNode*)calloc(1, sizeof(IntNode));
+    Node->Value = ID;
+    if (CR->LastExonID)
+    {
+        CR->LastExonID->Next = Node;
+    }
+    else
+    {
+        CR->FirstExonID = Node;
+    }
+    CR->LastExonID = Node;
+}
+
+// Return true if we successfully cover everything from start...end
+int BuildGFFCrossReference(MS2Builder* Builder, MS2CrossReference* CR, 
+                           MS2Gene* Gene, int Start, int End, int ReadingFrame)
+{
+    MS2Exon* Exon;
+    MS2ExonNode* Node;
+    int Result = 0;
+    //
+    for (Node = Gene->FirstExon; Node; Node = Node->Next)
+    {
+        Exon = Node->Exon;
+        // Ignore exons with wrong reading frame:
+        if (Exon->ReadingFrame != ReadingFrame)
+        {
+            continue;
+        }
+        // Handle perfect-overlap:
+        if (Exon->Start == Start && Exon->End == End)
+        {
+            MS2CrossReferenceAddID(CR, Exon->Index);
+            return 1;
+        }
+        // Handle partial-overlap:
+        if (Builder->ForwardFlag && Exon->Start == Start && Exon->End <= End)
+        {
+            MS2CrossReferenceAddID(CR, Exon->Index);
+            return BuildGFFCrossReference(Builder, CR, Gene, Exon->End, End, ReadingFrame);
+        }
+        if (!Builder->ForwardFlag && Exon->End == End && Exon->Start >= Start)
+        {
+            MS2CrossReferenceAddID(CR, Exon->Index);
+            return BuildGFFCrossReference(Builder, CR, Gene, Start, Exon->Start, ReadingFrame);
+        }
+    }
+    return Result;
+}
+
+void BuildGFFCrossReferences(MS2Builder* Builder)
+{
+    // Iterate over GFFGenes.  Inner loop over MS2Genes.  When you find an MS2Gene which covers
+    // any of the GFFGene, build a cross-reference.
+    GFFGeneClass* GFFGene;
+    GFFExonClass* GFFExon;
+    MS2Gene* Gene;
+    MS2Exon* Exon;
+    MS2ExonNode* Node;
+    int OverlapFlag;
+    int CoveredFlag;
+    int CoverageComplete;
+    int Result;
+    //
+    for (GFFGene = Builder->FirstGFFGene; GFFGene; GFFGene = GFFGene->Next)
+    {
+        CoveredFlag = 0;
+        for (Gene = Builder->FirstGene; Gene; Gene = Gene->Next)
+        {
+            // First, check whether the gene overlaps this GFF gene's first exon:
+            OverlapFlag = 0;
+            GFFExon = GFFGene->FirstExon;
+            for (Node = Gene->FirstExon; Node; Node = Node->Next)
+            {
+                Exon = Node->Exon;
+                if (Exon->Start < GFFExon->End && Exon->End > GFFExon->Start && Exon->ReadingFrame == GFFExon->ReadingFrame)
+                {
+                    OverlapFlag = 1;
+                }
+            }
+            if (!OverlapFlag)
+            {
+                continue;
+            }
+            // They overlap - create a cross-reference and start adding exon IDs to the list!
+            GFFGene->CrossReference = (MS2CrossReference*)calloc(1, sizeof(MS2CrossReference));
+            GFFGene->CrossReference->GFFGene = GFFGene;
+            GFFGene->CrossReference->Gene = Gene;
+            // the MS2Gene keeps a list of its cross references:
+            if (Gene->LastCrossReference)
+            {
+                Gene->LastCrossReference->Next = GFFGene->CrossReference;
+            }
+            else
+            {
+                Gene->FirstCrossReference = GFFGene->CrossReference;
+            }
+            Gene->LastCrossReference = GFFGene->CrossReference;
+            CoverageComplete = 1;
+            for (GFFExon = GFFGene->FirstExon; GFFExon; GFFExon = GFFExon->Next)
+            {
+                Result = BuildGFFCrossReference(Builder, GFFGene->CrossReference, Gene, GFFExon->Start, GFFExon->End, GFFExon->ReadingFrame);
+                if (!Result)
+                {
+                    CoverageComplete = 0;
+                }
+            }
+            if (!CoverageComplete)
+            {
+                REPORT_ERROR_S(23, GFFGene->Name);
+            }
+            CoveredFlag = 1;
+            break;
+        }
+        // Sanity check: The GFF gene MUST be covered (since we did, after all, create exons for all these 
+        // GFF exons!)
+        if (!CoveredFlag)
+        {
+            REPORT_ERROR_S(22, GFFGene->Name);
+        }
+    }
+}
+
+void AddMS2Exon(MS2Builder* Builder, MS2Exon* Exon)
+{
+    if (!Builder->FirstExon)
+    {
+        Builder->FirstExon = Exon;
+    }
+    else
+    {
+        Builder->LastExon->Next = Exon;
+        Exon->Prev = Builder->LastExon;
+    }
+    Builder->LastExon = Exon;
+    Builder->ExonCount++;
+}
+
+void LinkExonForward(MS2Exon* FromExon, MS2Exon* ToExon)
+{
+    // Add a link forward from FromExon to ToExon, as well as a reciprocal link back.
+    MS2Edge* Edge;
+
+    // Sanity checking: ToExon starts after FromExon
+    INSPECT_ASSERT(ToExon->Start >= FromExon->End);
+    
+    // If the two exons are ALREADY linked, then return immediately:
+    for (Edge = FromExon->FirstForward; Edge; Edge = Edge->Next)
+    {
+        if (Edge->LinkTo == ToExon)
+        {
+            return;
+        }
+    }
+    // Sanity check: There's no forward link, therefore there must be no reciprocal link:
+    for (Edge = ToExon->FirstBackward; Edge; Edge = Edge->Next)
+    {
+        INSPECT_ASSERT(Edge->LinkTo != FromExon);
+    }
+    // Add an edge linking FORWARD:
+    Edge = (MS2Edge*)calloc(1, sizeof(MS2Edge));
+    Edge->LinkFrom = FromExon;
+    Edge->LinkTo = ToExon;
+    if (!FromExon->FirstForward)
+    {
+        FromExon->FirstForward = Edge;
+    }
+    else
+    {
+        FromExon->LastForward->Next = Edge;
+    }
+    FromExon->LastForward = Edge;
+
+    // Add a reciprocal edge linking BACKWARD:
+    Edge = (MS2Edge*)calloc(1, sizeof(MS2Edge));
+    Edge->LinkFrom = ToExon;
+    Edge->LinkTo = FromExon;
+    if (!ToExon->FirstBackward)
+    {
+        ToExon->FirstBackward = Edge;
+    }
+    else
+    {
+        ToExon->LastBackward->Next = Edge;
+    }
+    ToExon->LastBackward = Edge;
+}
+void RemoveBackwardEdge(MS2Exon* Exon, MS2Exon* LinkedExon)
+{
+    MS2Edge* Prev = NULL;
+    MS2Edge* Edge;
+    for (Edge = Exon->FirstBackward; Edge; Edge = Edge->Next)
+    {
+        if (Edge->LinkTo == LinkedExon)
+        {
+            // Remove this edge!
+            if (Prev)
+            {
+                Prev->Next = Edge->Next;
+            }
+            else
+            {
+                Exon->FirstBackward = Edge->Next;
+            }
+            if (Exon->LastBackward == Edge)
+            {
+                Exon->LastBackward = Prev;
+            }
+            SafeFree(Edge);
+            break;
+        }
+        Prev = Edge;
+    }
+}
+
+void RemoveForwardEdge(MS2Exon* Exon, MS2Exon* LinkedExon)
+{
+    MS2Edge* Prev = NULL;
+    MS2Edge* Edge;
+    for (Edge = Exon->FirstForward; Edge; Edge = Edge->Next)
+    {
+        if (Edge->LinkTo == LinkedExon)
+        {
+            // Remove this edge!
+            if (Prev)
+            {
+                Prev->Next = Edge->Next;
+            }
+            else
+            {
+                Exon->FirstForward = Edge->Next;
+            }
+            if (Exon->LastForward == Edge)
+            {
+                Exon->LastForward = Prev;
+            }
+            SafeFree(Edge);
+            break;
+        }
+        Prev = Edge;
+    }
+}
+
+void ExonInheritForwardEdges(MS2Exon* Exon, MS2Exon* DeadExon)
+{
+    MS2Edge* Edge;
+    MS2Edge* Prev = NULL;
+    //
+    // Sanity checking: Exon and DeadExon share their right endpoint.
+    INSPECT_ASSERT(Exon->End == DeadExon->End);
+    for (Edge = DeadExon->FirstForward; Edge; Edge = Edge->Next)
+    {
+        // Add a link forward from Exon->LinkToExon:
+        LinkExonForward(Exon, Edge->LinkTo);
+        // Remove reciprocal exon link from LinkTo back to DeadExon:
+        RemoveBackwardEdge(Edge->LinkTo, DeadExon);
+        SafeFree(Prev);
+        Prev = Edge;
+    }
+    SafeFree(Prev);
+    DeadExon->FirstForward = NULL;
+    DeadExon->LastForward = NULL;
+}
+
+void ExonInheritBackwardEdges(MS2Exon* Exon, MS2Exon* DeadExon)
+{
+    MS2Edge* Edge;
+    MS2Edge* Prev = NULL;
+    //
+    // Sanity checking: Exon and DeadExon share their right endpoint.
+    INSPECT_ASSERT(Exon->Start == DeadExon->Start);
+    for (Edge = DeadExon->FirstBackward; Edge; Edge = Edge->Next)
+    {
+        // Add a link forward from LinkToExon->Exon:
+        LinkExonForward(Edge->LinkTo, Exon);
+        // Remove reciprocal exon link from LinkTo to DeadExon:
+        RemoveForwardEdge(Edge->LinkTo, DeadExon);
+        SafeFree(Prev);
+        Prev = Edge;
+    }
+    SafeFree(Prev);
+    DeadExon->FirstBackward = NULL;
+    DeadExon->LastBackward = NULL;
+}
+
+// Given this start, end, and reading frame, look up the corresponding 
+// exon in Builder->ExonHash.  If the exon doesn't exist yet, then create it.
+// Return the exon.
+MS2Exon* HashExon(MS2Builder* Builder, int Start, int End, int ReadingFrame)
+{
+    int HashValue;
+    MS2ExonNode* Node;
+    MS2ExonNode* Prev = NULL;
+    MS2Exon* Exon;
+    //
+    HashValue = (Start + CODON_LENGTH * End + ReadingFrame) % EXON_HASH_SIZE;
+    for (Node = Builder->ExonHash[HashValue]; Node; Node = Node->Next)
+    {
+        if (Node->Exon->Start == Start && Node->Exon->End == End && Node->Exon->ReadingFrame == ReadingFrame)
+        {
+            return (Node->Exon);
+        }
+        Prev = Node;
+    }
+    // There's no node for this exon yet.  Add one:
+    Exon = (MS2Exon*)calloc(1, sizeof(MS2Exon));
+    Exon->Start = Start;
+    Exon->End = End;
+    Exon->ReadingFrame = ReadingFrame;
+    Node = (MS2ExonNode*)calloc(1, sizeof(MS2ExonNode));
+    Node->Exon = Exon;
+    if (Prev)
+    {
+        Prev->Next = Node;
+        Node->Prev = Prev;
+    }
+    else
+    {
+        Builder->ExonHash[HashValue] = Node;
+    }
+    AddMS2Exon(Builder, Exon);
+    return Exon;
+}
+
+char* GetGeneNameFromGFF(char* GFFToken)
+{
+  char* Temp = strtok(GFFToken,"=");
+  if(!strcmp(Temp,"Parent") || !strcmp(Temp,"parent"))
+    return strtok(NULL,"=");
+  return NULL;
+
+}
+
+// Parse one line of a .gff file.  Callback for ParseFileByLines
+int HandleGFFFileLine(int LineNumber, int FilePos, char* LineBuffer, void* ParseData)
+{
+    GFFParser* Parser;
+    MS2Builder* Builder;
+    GFFGeneClass* GFFGene;
+    GFFExonClass* GFFExon;
+
+    char* SeqName;
+    char* TokTemp;
+    char* GeneNameTemp;
+    char* GeneName;
+    char* DatabaseName;
+    char* DummyStr;
+    char* IntervalType;
+    int SyntaxErrorFlag = 0;
+    int Start;
+    int End;
+    int ReadingFrame;
+    MS2Exon* Exon;
+    Parser = (GFFParser*)ParseData;
+    Builder = Parser->Builder;
+    // Break the line up by tabs.
+    // Bit 0: Seq name
+    SeqName = strtok(LineBuffer, "\t");
+    // Bit 1: Source (used to populate the Database field)
+    DatabaseName = strtok(NULL, "\t");
+    if (!DatabaseName)
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+    // Debugging option:
+#ifdef GFF_QUICKPARSE
+    if (LineNumber > 1000)
+    {
+        return 0;
+    }
+#endif
+    // Bit 2: interval type (est or exon)
+    IntervalType = strtok(NULL, "\t");
+    if (!IntervalType)
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+    // Bit 3: start
+    DummyStr = strtok(NULL, "\t");
+    if (!DummyStr)
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+    Start = atoi(DummyStr) - 1; // fix one-based numbering!
+
+    // Bit 4: end
+    DummyStr = strtok(NULL, "\t");
+    if (!DummyStr)
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+    End = atoi(DummyStr); 
+
+    // Error checking:
+    if (Start < 0 || End < 0 || Start >= End)
+    {
+        REPORT_ERROR_IIIS(20, Start, End, LineNumber, Parser->CurrentFileName);
+    }
+
+    // Bit 5: score (ignored)
+    DummyStr = strtok(NULL, "\t");
+    if (!DummyStr)
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+    // Bit 6: forward flag
+    DummyStr = strtok(NULL, "\t");
+    if (!DummyStr || !*DummyStr)
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+    // Skip over this exon, if it comes from the wrong strand:
+    if (*DummyStr == '+')
+    {
+        if (!Builder->ForwardFlag)
+        {
+            goto cleanup;
+        }
+    }
+    else if (*DummyStr == '-')
+    {
+        if (Builder->ForwardFlag)
+        {
+            goto cleanup;
+        }
+    }
+    else
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+    // Bit 6: reading frame
+    DummyStr = strtok(NULL, "\t");
+    if (!DummyStr)
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+    ReadingFrame = atoi(DummyStr);
+    if (ReadingFrame < 0 || ReadingFrame > 2)
+    {
+        SyntaxErrorFlag = 1;
+        goto cleanup;
+    }
+
+    // Bit 7: notes, but we assume it contains parent name 
+    DummyStr = strtok(NULL,"\t");
+    
+    GeneName = SeqName;
+    
+    if(DummyStr)
+      {
+	
+	TokTemp = strtok(DummyStr,";");
+	while(TokTemp)
+	  {
+	    GeneNameTemp = GetGeneNameFromGFF(TokTemp);
+	    if(GeneNameTemp)
+	      {
+		GeneName = GeneNameTemp;
+		break;
+	      }
+	    TokTemp = strtok(NULL,";");
+	  }
+      }
+		
+    /*printf("CurrGFFLine:\n");
+    printf(" name:%s\n",GeneName);
+    printf(" Strand:%d\n",Builder->ForwardFlag);
+    printf(" frame: %d\n",ReadingFrame);
+    */
+    /////////////////////////////////////////////////////////////////////////////////
+    // We've parsed a valid gff file line.  Create a new GFFGene (if necessary), and 
+    // add a new GFFExon to our current GFFGene.
+    // Fix up the reading frame.  As always, reading frame is the modulus of the first base 
+    // pair of a codon.  In GFF format, reading frame is the number of bases to be skipped over
+    // before the first base pair of a codon.
+    if (Builder->ForwardFlag)
+    {
+        ReadingFrame = (Start + ReadingFrame) % CODON_LENGTH;
+    }
+    else
+    {
+        ReadingFrame = (End - 1 - ReadingFrame) % CODON_LENGTH;
+    }
+    // Create a new GFFGene, if necessary:
+    if (!Parser->CurrentGene || CompareStrings(GeneName, Parser->CurrentGene->Name))
+    {
+        GFFGene = (GFFGeneClass*)calloc(1, sizeof(GFFGeneClass));
+        strncpy(GFFGene->Name, GeneName, MAX_NAME);
+        strncpy(GFFGene->DatabaseName, DatabaseName, MAX_NAME);
+	//strncpy(GFFGene->SeqName,SeqName,MAX_NAME);
+        Parser->CurrentGene = GFFGene;
+        if (!Builder->FirstGFFGene)
+        {
+            Builder->FirstGFFGene = GFFGene;
+        }
+        else
+        {
+            Builder->LastGFFGene->Next = GFFGene;
+        }
+        Builder->LastGFFGene = GFFGene;
+        Parser->PrevExon = NULL;
+    }
+    else
+    {
+        // We're continuing along the same GFFGene, so we can link the previous exon to this one.
+        // (ASSUMPTION: Exons for the same gene are linked by introns, and come IN ORDER!)
+    }
+    // Append a new GFFExon to the current GFFGene:
+    GFFExon = (GFFExonClass*)calloc(1, sizeof(GFFExonClass));
+    GFFExon->Start = Start;
+    GFFExon->End = End;
+    GFFExon->ReadingFrame = ReadingFrame;
+    if (!Parser->CurrentGene->FirstExon)
+    {
+        Parser->CurrentGene->FirstExon = GFFExon;
+    }
+    else
+    {
+        Parser->CurrentGene->LastExon->Next = GFFExon;
+    }
+    Parser->CurrentGene->LastExon = GFFExon;
+
+    // Construct an MS2Exon:
+    Exon = HashExon(Builder, Start, End, ReadingFrame);
+    
+    // Add a link, if necessary, between this exon and the previous:
+    if (Parser->PrevExon)
+    {
+        // Report an error if the exons overlap:
+        if (Parser->PrevExon->End > Exon->Start && Parser->PrevExon->Start < Exon->End)
+        {
+            REPORT_ERROR_IIII(17, Parser->PrevExon->Start, Parser->PrevExon->End, Exon->Start, Exon->End);
+        }
+        else if (Parser->PrevExon->Start < Exon->Start)
+        {
+            // Exons listed from low genome coords to high.  Typical for forward strand.
+            LinkExonForward(Parser->PrevExon, Exon);
+        }
+        else
+        {
+            // Exons listed from high genome coords to low.  Typical for reverse strand.
+            LinkExonForward(Exon, Parser->PrevExon);
+        }
+        
+    }
+    Parser->PrevExon = Exon;
+
+    // Report syntax errors:
+cleanup:
+    if (SyntaxErrorFlag)
+    {
+        REPORT_ERROR_IS(14, LineNumber, Parser->CurrentFileName);
+        return 0;
+    }
+    return 1;
+
+}
+
+// Iterate over all GFF files.  Parse each one, using the HandleGFFFileLine callback to do the real work.
+void ParseGFFFiles(MS2Builder* Builder)
+{
+    StringNode* GFFNode;
+    FILE* GFFFile;
+    GFFParser* Parser;
+
+    for (GFFNode = GlobalOptions->FirstGFFFileName; GFFNode; GFFNode = GFFNode->Next)
+    {
+        GFFFile = fopen(GFFNode->String, "rb");
+        if (!GFFFile)
+        {
+            REPORT_ERROR_S(8, GFFNode->String);
+            continue;
+        }
+        Parser = (GFFParser*)calloc(1, sizeof(GFFParser));
+        Parser->Builder = Builder;
+        Parser->CurrentFileName = GFFNode->String;
+        ParseFileByLines(GFFFile, HandleGFFFileLine, Parser, 0);
+        free(Parser);
+        fclose(GFFFile);
+    }
+}
+
+// Remove an exon from the master linked-list:
+void DeleteMS2Exon(MS2Builder* Builder, MS2Exon* Exon)
+{
+    if (Exon == Builder->FirstExon)
+    {
+        Builder->FirstExon = Builder->FirstExon->Next;
+    }
+    if (Exon == Builder->LastExon)
+    {
+        Builder->LastExon = Builder->LastExon->Prev;
+    }
+    if (Exon->Next)
+    {
+        Exon->Next->Prev = Exon->Prev;
+    }
+    if (Exon->Prev)
+    {
+        Exon->Prev->Next = Exon->Next;
+    }
+    FreeMS2Exon(Exon);
+    Builder->ExonCount--;
+}
+
+// When we're making our double-iteration over the exon linked list, it's important to keep 
+// track of where we are.  (Note that ExonA itself may be deleted, so we can't simply finish 
+// the loop and then go to ExonA->Next!)  The variable NextExonA is what ExonA should be set to 
+// for the next A-loop.  And normally NextExonA is simply equal to ExonA->Next.  However, 
+// if we delete exons A and B, *and* exon B happens to be ExonA->Next, then on the next time through
+// the loop, ExonA must be shifted two positions forward.  Trust me.
+#define DELETE_EXON_B()\
+{\
+    if (NextExonA == ExonB)\
+    {\
+        NextExonA = ExonB->Next;\
+    }\
+    DeleteMS2Exon(Builder, ExonB);\
+}
+
+// Iterate over all pairs of exons in the builder.  If the exons overlap, split them!
+// We handle overlap in many different special cases, each of which is straightforward.
+void SplitOverlappingExons(MS2Builder* Builder)
+{
+    MS2Exon* Exon1;
+    MS2Exon* Exon2;
+    MS2Exon* Exon3;
+    MS2Exon* ExonA;
+    MS2Exon* NextExonA;
+    MS2Exon* ExonB;
+    MS2Exon* NextExonB;
+    int ReadingFrame;
+    int OverlapFlag = 0;
+    //
+    ExonA = Builder->FirstExon;
+    while (ExonA)
+    {
+        if (OverlapFlag)
+        {
+            //DebugPrintMS2Builder(Builder, "A");
+            OverlapFlag = 0;
+        }
+
+        //printf("ExonA[%d]: %d-%d\n", ExonA->ReadingFrame, ExonA->Start, ExonA->End);
+        ReadingFrame = ExonA->ReadingFrame;
+        NextExonA = ExonA->Next;
+        // Loop B:
+        ExonB = ExonA->Next;
+        while (ExonB)
+        {
+            if (OverlapFlag)
+            {
+                //DebugPrintMS2Builder(Builder, "B");
+                OverlapFlag = 0;
+            }
+            // Compare exon A to exon B:
+            if (ExonA->ReadingFrame != ExonB->ReadingFrame)
+            {
+                ExonB = ExonB->Next;
+                continue;
+            }
+            if (ExonA->End <= ExonB->Start)
+            {
+                ExonB = ExonB->Next;
+                continue;
+            }
+            if (ExonA->Start >= ExonB->End)
+            {
+                ExonB = ExonB->Next;
+                continue;
+            }
+            NextExonB = ExonB->Next;
+            //printf("  %d-%d Overlaps with %d-%d\n", ExonA->Start, ExonA->End, ExonB->Start, ExonB->End);
+            OverlapFlag = 1;
+            ////////////////////////////////////////////////////////////////////////////////////
+            // There's overlap.  Handle each case in turn.
+            if (ExonA->Start == ExonB->Start && ExonA->End == ExonB->End)
+            {
+                ExonInheritBackwardEdges(ExonA, ExonB);
+                ExonInheritForwardEdges(ExonA, ExonB);
+                DELETE_EXON_B();
+                ExonB = NextExonB;
+                continue;
+            }
+            if (ExonA->Start == ExonB->Start)
+            {
+                if (ExonA->End > ExonB->End)
+                {
+                    // A-----
+                    // B---
+                    //     11
+                    Exon1 = NewExon(ExonB->End, ExonA->End, ReadingFrame);
+                    AddMS2Exon(Builder, Exon1);
+                    ExonInheritForwardEdges(Exon1, ExonA);
+                    ExonInheritBackwardEdges(ExonB, ExonA);
+                    DeleteMS2Exon(Builder, ExonA);
+                    break;
+                }
+                else
+                {
+                    // A---  
+                    // B-----
+                    //     11
+                    Exon1 = NewExon(ExonA->End, ExonB->End, ReadingFrame);
+                    AddMS2Exon(Builder, Exon1);
+                    ExonInheritForwardEdges(Exon1, ExonB);
+                    ExonInheritBackwardEdges(ExonA, ExonB);
+                    DELETE_EXON_B();
+                    ExonB = NextExonB;
+                    continue;
+                }
+            } // end: ExonA->Start == ExonB->Start
+            if (ExonA->End == ExonB->End)
+            {
+                if (ExonA->Start < ExonB->Start)
+                {
+                    // A-----
+                    // B  ---
+                    //  11
+                    Exon1 = NewExon(ExonA->Start, ExonB->Start, ReadingFrame);
+                    AddMS2Exon(Builder, Exon1);
+                    ExonInheritForwardEdges(ExonB, ExonA);
+                    ExonInheritBackwardEdges(Exon1, ExonA);
+                    DeleteMS2Exon(Builder, ExonA);
+                    break;
+                }
+                else
+                {
+                    // A  ---  
+                    // B-----
+                    //  11
+                    Exon1 = NewExon(ExonB->Start, ExonA->Start, ReadingFrame);
+                    AddMS2Exon(Builder, Exon1);
+                    ExonInheritForwardEdges(ExonA, ExonB);
+                    ExonInheritBackwardEdges(Exon1, ExonB);
+                    DELETE_EXON_B();
+                    ExonB = NextExonB;
+                    continue;
+                }
+            } // end: ExonA->End == ExonB->End
+            if (ExonA->Start < ExonB->Start && ExonA->End < ExonB->End)
+            {
+                // A------
+                // B   ------
+                //  111222333
+                Exon1 = NewExon(ExonA->Start, ExonB->Start, ReadingFrame);
+                AddMS2Exon(Builder, Exon1);
+                Exon2 = NewExon(ExonB->Start, ExonA->End, ReadingFrame);
+                AddMS2Exon(Builder, Exon2);
+                Exon3 = NewExon(ExonA->End, ExonB->End, ReadingFrame);
+                AddMS2Exon(Builder, Exon3);
+                ExonInheritBackwardEdges(Exon1, ExonA);
+                ExonInheritBackwardEdges(Exon2, ExonB);
+                ExonInheritForwardEdges(Exon2, ExonA);
+                ExonInheritForwardEdges(Exon3, ExonB);
+                DELETE_EXON_B();
+                ExonB = NextExonB;
+                DeleteMS2Exon(Builder, ExonA);
+                break;
+            }
+            if (ExonA->Start < ExonB->Start && ExonA->End > ExonB->End)
+            {
+                // A---------
+                // B   ---
+                //  111   222
+                Exon1 = NewExon(ExonA->Start, ExonB->Start, ReadingFrame);
+                AddMS2Exon(Builder, Exon1);
+                Exon2 = NewExon(ExonB->End, ExonA->End, ReadingFrame);
+                AddMS2Exon(Builder, Exon2);
+                ExonInheritBackwardEdges(Exon1, ExonA);
+                ExonInheritForwardEdges(Exon2, ExonA);
+                DeleteMS2Exon(Builder, ExonA);
+                break;
+            }
+            if (ExonA->Start > ExonB->Start && ExonA->End > ExonB->End)
+            {
+                // A   ------
+                // B------
+                //  111222333
+                Exon1 = NewExon(ExonB->Start, ExonA->Start, ReadingFrame);
+                AddMS2Exon(Builder, Exon1);
+                Exon2 = NewExon(ExonA->Start, ExonB->End, ReadingFrame);
+                AddMS2Exon(Builder, Exon2);
+                Exon3 = NewExon(ExonB->End, ExonA->End, ReadingFrame);
+                AddMS2Exon(Builder, Exon3);
+                ExonInheritBackwardEdges(Exon1, ExonB);
+                ExonInheritBackwardEdges(Exon2, ExonA);
+                ExonInheritForwardEdges(Exon3, ExonA);
+                ExonInheritForwardEdges(Exon2, ExonB);
+                DELETE_EXON_B();
+                ExonB = NextExonB;
+                DeleteMS2Exon(Builder, ExonA);
+                break;
+            }
+            if (ExonA->Start > ExonB->Start && ExonA->End < ExonB->End)
+            {
+                // A   ---   
+                // B---------
+                //  111   222
+                Exon1 = NewExon(ExonB->Start, ExonA->Start, ReadingFrame);
+                AddMS2Exon(Builder, Exon1);
+                Exon2 = NewExon(ExonA->End, ExonB->End, ReadingFrame);
+                AddMS2Exon(Builder, Exon2);
+                ExonInheritBackwardEdges(Exon1, ExonB);
+                ExonInheritForwardEdges(Exon2, ExonB);
+                DELETE_EXON_B();
+                ExonB = NextExonB;
+                continue;
+            }
+            INSPECT_ASSERT(0); // we'd better not reach this point!
+        }
+        ExonA = NextExonA;
+    }
+}
+
+// If two exons are adjacent (one begins just after the other ends) and have 
+// compatible reading frames, then add a link between them if necessary.
+void AddAdjacentExonLinks(MS2Builder* Builder)
+{
+    MS2Exon* ExonA;
+    MS2Exon* ExonB;
+    MS2Edge* TestEdge;
+    int LinkFound;
+    //
+    for (ExonA = Builder->FirstExon; ExonA; ExonA = ExonA->Next)
+    {
+        //printf("AAEL: %d-%d\n", ExonA->Start, ExonA->End);
+        for (ExonB = Builder->FirstExon; ExonB; ExonB = ExonB->Next)
+        {
+            if (ExonA->End == ExonB->Start && ExonA->ReadingFrame == ExonB->ReadingFrame)
+            {
+                LinkFound = 0;
+                for (TestEdge = ExonA->FirstForward; TestEdge; TestEdge = TestEdge->Next)
+                {
+                    if (TestEdge->LinkTo == ExonB)
+                    {
+                        LinkFound = 1;
+                        break;
+                    }
+                }
+                if (!LinkFound)
+                {
+                    LinkExonForward(ExonA, ExonB);
+                }
+            }
+        }
+    }
+}
+
+// Add an MS2Exon to an MS2Gene.  Also, add to the gene all the exons
+// which are (recursively) linked to by MS2Exon.  
+void AddExonToGene(MS2Gene* Gene, MS2Exon* Exon)
+{
+    MS2ExonNode* Node;
+    MS2Edge* Edge;
+    //
+    // We follow edges forward as well as edges back, so we'll re-visit the
+    // same exons, which stops the recursion:
+    if (Exon->Gene == Gene)
+    {
+        return;
+    }
+    //printf("[[Add exon %d-%d R%d to gene %d\n", Exon->Start, Exon->End, Exon->ReadingFrame, Gene->Index);
+    //if(Exon->Gene)
+    //  {
+    //	printf("But exon already belongs to Gene: %d\n",Exon->Gene->Index);
+    //	getchar();
+    // }
+    INSPECT_ASSERT(!Exon->Gene);
+    Exon->Gene = Gene;
+    Node = (MS2ExonNode*)calloc(1, sizeof(ExonNode));
+    Node->Exon = Exon;
+    if (!Gene->FirstExon)
+    {
+        Gene->FirstExon = Node;
+    }
+    else
+    {
+        Gene->LastExon->Next = Node;
+        Node->Prev = Gene->LastExon;
+    }
+    Gene->LastExon = Node;
+    // Follow edges:
+    for (Edge = Exon->FirstForward; Edge; Edge = Edge->Next)
+    {
+      // printf("Following forward edge\n");
+        AddExonToGene(Gene, Edge->LinkTo);
+	//printf("Finished forward edge\n");
+    }
+    for (Edge = Exon->FirstBackward; Edge; Edge = Edge->Next)
+    {
+      //printf("Following reverse edge\n");
+        AddExonToGene(Gene, Edge->LinkTo);
+	//printf("Finished reverse edge\n");
+    }
+}
+
+// Assimilate all MS2Exons from the master list into MS2Genes.  Iteratively:
+// Take the first exon that's not in a gene.  Build a new gene, and add this exon, 
+// and (recursively) add in everything the exon links to.
+void GroupExonsIntoGenes(MS2Builder* Builder)
+{
+    MS2Exon* Exon;
+    MS2Gene* Gene;
+    //
+    // Iterate over exons:
+    for (Exon = Builder->FirstExon; Exon; Exon = Exon->Next)
+    {
+        if (Exon->Gene)
+        {
+            continue;
+        }
+        // This exon doesn't have a gene yet.  Create a gene to contain it:
+        Gene = (MS2Gene*)calloc(1, sizeof(MS2Gene));
+        Gene->Index = Builder->GeneCount;
+        AddExonToGene(Gene, Exon);
+        
+        if (!Builder->FirstGene)
+        {
+            Builder->FirstGene = Gene;
+        }
+        else
+        {
+            Builder->LastGene->Next = Gene;
+        }
+        Builder->LastGene = Gene;
+        Builder->GeneCount++;
+    }
+    // All exons are now assigned to genes.
+}
+
+// Temp-struct for sorting exons by genome-position
+typedef struct MS2SortedExonNode
+{
+    MS2Exon* Exon;
+} MS2SortedExonNode;
+
+// Callback for qsort, to sort exons by genome-position, FORWARD strand
+int CompareMS2ExonNodesForward(const MS2SortedExonNode* NodeA, const MS2SortedExonNode* NodeB)
+{
+    if (NodeA->Exon->Start < NodeB->Exon->Start)
+    {
+        return -1;
+    }
+    if (NodeA->Exon->Start > NodeB->Exon->Start)
+    {
+        return 1;
+    }
+    if (NodeA->Exon->End < NodeB->Exon->End)
+    {
+        return -1;
+    }
+    if (NodeA->Exon->End > NodeB->Exon->End)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+// Callback for qsort, to sort exons by genome-position, REVERSE strand
+int CompareMS2ExonNodesBackward(const MS2SortedExonNode* NodeA, const MS2SortedExonNode* NodeB)
+{
+    if (NodeA->Exon->Start < NodeB->Exon->Start)
+    {
+        return 1;
+    }
+    if (NodeA->Exon->Start > NodeB->Exon->Start)
+    {
+        return -1;
+    }
+    if (NodeA->Exon->End < NodeB->Exon->End)
+    {
+        return 1;
+    }
+    if (NodeA->Exon->End > NodeB->Exon->End)
+    {
+        return -1;
+    }
+    return 0;
+}
+
+// Read (and translate) the protein sequence for an exon.
+void ReadExonSequence(MS2Builder* Builder, MS2Exon* Exon)
+{
+    FILE* File;
+    int DNALength;
+    int DNABufferSize = 0;
+    char* DNABuffer = NULL;
+    char* RCBuffer = NULL;
+    int Modulo;
+    char* TranslationStart;
+    int AAIndex;
+    char* TranslateMe;
+    int SuffixPos;
+    int AALength;
+    int LengthPrefix;
+    int LengthBody;
+    int LengthSuffix;
+    int LengthFull;
+    //
+    File = GlobalOptions->OutputFile;
+
+    // Allocate a buffer to store the DNA sequence (and reverse complement):
+    DNALength = Exon->End - Exon->Start;
+    if (DNALength + 1 > DNABufferSize)
+    {
+        SafeFree(DNABuffer);
+        SafeFree(RCBuffer);
+        DNABufferSize = max(1024, DNALength + 5);
+        DNABuffer = (char*)calloc(DNABufferSize, sizeof(char));
+        RCBuffer = (char*)calloc(DNABufferSize, sizeof(char));
+    }
+
+    // Retrieve the DNA:
+    fseek(Builder->GenomeFile, Exon->Start, 0);
+    ReadBinary(DNABuffer, sizeof(char), DNALength, Builder->GenomeFile);
+    DNABuffer[DNALength] = '\0';
+    if (Builder->ForwardFlag)
+    {
+        Modulo = Exon->Start % CODON_LENGTH;
+        if (Modulo == Exon->ReadingFrame)
+        {
+            TranslationStart = DNABuffer;
+            Exon->Prefix[0] = '\0';
+        }
+        else if ((Exon->ReadingFrame + 1) % CODON_LENGTH == Modulo % CODON_LENGTH)
+        {
+            TranslationStart = DNABuffer + 2;
+            Exon->Prefix[0] = DNABuffer[0];
+            Exon->Prefix[1] = DNABuffer[1];
+            Exon->Prefix[2] = '\0';
+        }
+        else
+        {
+            TranslationStart = DNABuffer + 1;
+            Exon->Prefix[0] = DNABuffer[0];
+            Exon->Prefix[1] = '\0';
+        }
+        TranslateMe = DNABuffer + strlen(Exon->Prefix);
+    }
+    else
+    {
+        WriteReverseComplement(DNABuffer, RCBuffer);
+        Modulo = (Exon->End - 1) % 3;
+        if (Modulo == Exon->ReadingFrame)
+        {
+            TranslationStart = RCBuffer;
+            Exon->Prefix[0] = '\0';
+        }
+        else if ((Exon->ReadingFrame + 1) % CODON_LENGTH == Modulo % CODON_LENGTH)
+        {
+            TranslationStart = RCBuffer + 1;
+            Exon->Prefix[0] = RCBuffer[0];
+            Exon->Prefix[1] = '\0';
+        }
+        else
+        {
+            TranslationStart = RCBuffer + 2;
+            Exon->Prefix[0] = RCBuffer[0];
+            Exon->Prefix[1] = RCBuffer[1];
+            Exon->Prefix[2] = '\0';
+        }
+        TranslateMe = RCBuffer + strlen(Exon->Prefix);
+    }
+    AALength = (DNALength - strlen(Exon->Prefix)) / CODON_LENGTH;
+    Exon->Sequence = (char*)calloc(AALength + 1, sizeof(char));
+    for (AAIndex = 0; AAIndex < AALength; AAIndex++)
+    {
+        Exon->Sequence[AAIndex] = TranslateCodon(TranslateMe);
+	if(Exon->Sequence[AAIndex] < 'A' || Exon->Sequence[AAIndex] >= 'Z')
+	  {
+	    printf("ExonSequence: Contains a roque character %c at position %d-%d\n",Exon->Start, Exon->End);
+	    getchar();
+	  }
+        TranslateMe += CODON_LENGTH;
+    }
+    // Set the suffix:
+    for (SuffixPos = 0; SuffixPos < CODON_LENGTH; SuffixPos++)
+    {
+        Exon->Suffix[SuffixPos] = *TranslateMe;
+        if (!*TranslateMe)
+        {
+            break;
+        }
+        TranslateMe++;
+    }
+    // Double-check lengths:
+    LengthPrefix = strlen(Exon->Prefix);
+    LengthSuffix = strlen(Exon->Suffix);
+    LengthBody = strlen(Exon->Sequence) * CODON_LENGTH;
+    LengthFull = LengthPrefix + LengthSuffix + LengthBody;
+    if (LengthFull != Exon->End - Exon->Start)
+    {
+        printf("** Error: Length %d != genomic span %d\n", LengthFull, Exon->End - Exon->Start);
+    }
+    SafeFree(DNABuffer);
+    SafeFree(RCBuffer);
+}
+
+// Output the <Exon> tag for this MS2Exon, along with child tags (edges, and sequence)
+void OutputMS2Exon(MS2Builder* Builder, MS2Gene* Gene, MS2Exon* Exon)
+{
+    FILE* File;
+    char SpanningCodon[4];
+    int LengthA;
+    int LengthB;
+    char AA;
+    MS2Edge* Edge;
+    MS2Exon* LinkExon;
+    //
+    File = GlobalOptions->OutputFile;
+    // Start the exon tag:
+    fprintf(File, "  <Exon Index=\"%d\" Start=\"%d\" End=\"%d\"", 
+        Exon->Index, Exon->Start, Exon->End);
+    fprintf(File, ">\n");
+
+    if (Exon->Sequence)
+    {
+        fprintf(File, "    <ExonSequence Length=\"%d\">%s</ExonSequence>\n", strlen(Exon->Sequence), Exon->Sequence);
+    }
+    //fprintf(File, "    <ExonSequence>%s</ExonSequence>\n", Exon->Sequence);
+    // Write out all the edges linking back from this exon to lower-numbered exons:
+    if (Builder->ForwardFlag)
+    {
+        Edge = Exon->FirstBackward;
+    }
+    else
+    {
+        Edge = Exon->FirstForward;
+    }
+    for (; Edge; Edge = Edge->Next)
+    {
+        // Start an <ExtendsExon> or a <LinkFrom> tag:
+        LinkExon = Edge->LinkTo;
+        if (LinkExon->Start == Exon->End || LinkExon->End == Exon->Start)
+        {
+            fprintf(File, "    <ExtendsExon");
+        }
+        else
+        {
+            fprintf(File, "    <LinkFrom");
+        }
+
+        // Indicate the exon index:
+        fprintf(File, " Index=\"%d\"", LinkExon->Index);
+
+        // Get the amino acid!
+        if (Exon->Flags & MS2_EXON_CUSTOMAA_HEAD)
+        {
+            AA = Exon->CustomAA;
+        }
+        else if (Exon->Flags & MS2_EXON_CUSTOMAA)
+        {
+            AA = '\0';
+        }
+        else if (LinkExon->Flags & MS2_EXON_CUSTOMAA)
+        {
+            AA = LinkExon->CustomAA;
+        }
+        else
+        {
+            // The spanning codon consists of 1 or 2 bases from this exon,
+            // and 2 or 1 bases from the linked exon.
+            AA = '\0';
+            memset(SpanningCodon, 0, sizeof(char) * 4);
+            LengthA = strlen(LinkExon->Suffix);
+            LengthB = strlen(Exon->Prefix);
+            if (LengthA + LengthB == CODON_LENGTH)
+            {
+                strcpy(SpanningCodon, LinkExon->Suffix);
+                strcat(SpanningCodon, Exon->Prefix);
+            }
+            else if (LengthA + LengthB != 0)
+            {
+                // Report an error now, if the exons have incompatible reading frames!
+                REPORT_ERROR_IIII(16, Exon->Start, Exon->End, LinkExon->Start, LinkExon->End);
+            }
+
+            if (SpanningCodon[0])
+            {
+                AA = TranslateCodon(SpanningCodon);
+            }
+        }
+        if (AA)
+        {
+            fprintf(File, " AA=\"%c\"", AA);
+        }
+
+        // End the tag:
+        fprintf(File, " />\n");
+    }
+    
+    // End the exon tag:
+    fprintf(File, "  </Exon>\n");
+    
+}
+
+// Assign exon indexes for this gene, by first sorting the exons:
+void SortMS2GeneExons(MS2Builder* Builder, MS2Gene* Gene)
+{
+    MS2SortedExonNode* SortedExonBlock;
+    int ExonIndex;
+    int ExonCount;
+    MS2ExonNode* Node;
+
+    //   
+    ExonCount = 0;
+    for (ExonIndex = 0, Node = Gene->FirstExon; Node; ExonIndex++, Node = Node->Next)
+    {
+        ExonCount++;
+    }
+
+    SortedExonBlock = (MS2SortedExonNode*)calloc(ExonCount, sizeof(MS2SortedExonNode));
+    for (ExonIndex = 0, Node = Gene->FirstExon; Node; ExonIndex++, Node = Node->Next)
+    {
+        SortedExonBlock[ExonIndex].Exon = Node->Exon;
+    }
+    if (Builder->ForwardFlag)
+    {
+        qsort(SortedExonBlock, ExonCount, sizeof(MS2SortedExonNode), (QSortCompare)CompareMS2ExonNodesForward);
+    }
+    else
+    {
+        qsort(SortedExonBlock, ExonCount, sizeof(MS2SortedExonNode), (QSortCompare)CompareMS2ExonNodesBackward);
+    }
+    for (ExonIndex = 0; ExonIndex < ExonCount; ExonIndex++)
+    {
+        SortedExonBlock[ExonIndex].Exon->Index = ExonIndex;
+        //ReadExonSequence(Builder, Gene, SortedExonBlock[ExonIndex].Exon);
+    }
+    DebugPrintMS2Builder(Builder, "Exons sorted");
+    SafeFree(SortedExonBlock);
+}
+
+// Generate XML for an MS2CrossReference.  We need the GFF gene's database and accession number,
+// and we need the list of exon indices.
+void OutputMS2CrossReference(MS2Builder* Builder, FILE* File, MS2CrossReference* CR)
+{
+    IntNode* Node;
+    //
+    fprintf(File, "  <CrossReference Database=\"%s\" ID=\"%s\">\n", CR->GFFGene->DatabaseName, CR->GFFGene->Name);
+    fprintf(File, "    <CRExons Index=\"");
+    for (Node = CR->FirstExonID; Node; Node = Node->Next)
+    {
+        if (Node->Next)
+        {
+            fprintf(File, "%d, ", Node->Value);
+        }
+        else
+        {
+            fprintf(File, "%d", Node->Value);
+        }
+    }
+    fprintf(File, "\"/>\n");
+    fprintf(File, "  </CrossReference>\n");
+}
+
+// Output the XML for this MS2Gene.
+void OutputMS2Gene(MS2Builder* Builder, MS2Gene* Gene)
+{
+    FILE* File;
+    int ExonCount;
+    int ExonIndex;
+    MS2ExonNode* Node;
+    MS2CrossReference* CR;
+    
+    File = GlobalOptions->OutputFile;
+
+    // Count exons in the gene:
+    ExonCount = 0;
+    for (ExonIndex = 0, Node = Gene->FirstExon; Node; ExonIndex++, Node = Node->Next)
+    {
+        ExonCount++;
+    }
+
+    // Start the Gene tag:
+    fprintf(File, "<Gene ExonCount=\"%d\" Chromosome=\"%s\" ForwardFlag=\"%d\">\n", ExonCount, Builder->ChromosomeName, Builder->ForwardFlag);
+
+    // Loop over exons, and ouptut an <Exon> tag for each one:
+    for (ExonIndex = 0; ExonIndex < ExonCount; ExonIndex++)
+    {
+        for (Node = Gene->FirstExon; Node; Node = Node->Next)
+        {
+            if (Node->Exon->Index != ExonIndex)
+            {
+                continue;
+            }
+            OutputMS2Exon(Builder, Gene, Node->Exon);
+            break;
+        }
+    }
+
+    // Output all cross-references for the gene:
+    for (CR = Gene->FirstCrossReference; CR; CR = CR->Next)
+    {
+        OutputMS2CrossReference(Builder, File, CR);
+    }
+
+    // Complete the Gene tag:
+    fprintf(File, "</Gene>\n\n");
+    fflush(File);
+    
+}
+
+// Convert a codon into a number from 0 to 63.  (We probably could just TRANSLATE the
+// codon and use the amino acid value...)
+int GetCodonHashValue(char* EncodeCodon)
+{
+    int Pos;
+    int Multiplier[] = {1, 4, 16};
+    int Value = 0;
+    for (Pos = 0; Pos < CODON_LENGTH; Pos++)
+    {
+        switch (EncodeCodon[Pos])
+        {
+        case 'a':
+        case 'A':
+            Value += 0 * Multiplier[Pos];
+            break;
+        case 'c':
+        case 'C':
+            Value += 1 * Multiplier[Pos];
+            break;
+        case 'g':
+        case 'G':
+            Value += 2 * Multiplier[Pos];
+            break;
+        case 't':
+        case 'T':
+            Value += 3 * Multiplier[Pos];
+            break;
+        default:
+            //printf("* Error in GetCodonHashValue('%c')\n", EncodeCodon[Pos]);
+            REPORT_ERROR_I(24, EncodeCodon[Pos]);
+            return 0;
+        }
+    }
+    return Value;
+}
+
+// Scenario: What if exon X consists of a single base pair!?
+// That's tricky if the base pair is the middle of a codon, because the
+// linked exons must look PAST this central exon to get their prefix / suffix.
+// We'll produce special degree-1 "customAA" exons, one for each codon, to
+// get from each predecessor of exon X to each successor of exon X.  
+// We produce one CustomAA exon for each possible codon.  
+void RepairPromiscuousSingletonExons(MS2Builder* Builder)
+{
+    MS2Exon* Exon;
+    MS2Exon* NextExon;
+    int Modulo;
+    MS2Exon* CodonExons[64];
+    MS2Edge* BackwardEdge;
+    MS2Edge* ForwardEdge;
+    MS2Edge* Edge;
+    char Codon[4];
+    char RCCodon[4];
+    char* EncodeCodon;
+    int CodonValue;
+    int BridgedFlag;
+    //
+    memset(Codon, 0, sizeof(char) * 4);
+    memset(RCCodon, 0, sizeof(char) * 4);
+    Exon = Builder->FirstExon;
+    while (1)
+    {
+        if (!Exon)
+        {
+            break;
+        }
+        // Skip this exon if its length is > 1:
+        if (Exon->End > Exon->Start + 1)
+        {
+            Exon = Exon->Next;
+            continue;
+        }
+        // Skip this exon, unless its base pair is the middle of a codon:
+        if (Builder->ForwardFlag)
+        {
+            Modulo = Exon->Start % CODON_LENGTH;
+            if ((Exon->ReadingFrame + 1) % CODON_LENGTH != Modulo)
+            {
+                Exon = Exon->Next;
+                continue;
+            }
+        }
+        else
+        {
+            Modulo = (Exon->End - 1) % CODON_LENGTH;
+            if ((Exon->ReadingFrame - 1) % CODON_LENGTH != Modulo)
+            {
+                Exon = Exon->Next;
+                continue;
+            }
+        }
+        // Skip customAA exons built by previous passes through this loop:
+        if (Exon->CustomAA)
+        {
+            Exon = Exon->Next;
+            continue;
+        }
+        // This is the tricky case: A length-1 exon in the middle of a codon.
+        // Consider every pairing of a predecessor and a successor.  For each distinct codon,
+        // build one CustomAA exon:
+        memset(CodonExons, 0, sizeof(MS2Exon*) * 64);
+        BridgedFlag = 0;
+        for (BackwardEdge = Exon->FirstBackward; BackwardEdge; BackwardEdge = BackwardEdge->Next)
+        {
+            for (ForwardEdge = Exon->FirstForward; ForwardEdge; ForwardEdge = ForwardEdge->Next)
+            {
+                if (Builder->ForwardFlag)
+                {
+                    Codon[0] = BackwardEdge->LinkTo->Suffix[0];
+                    Codon[1] = Exon->Prefix[0];
+                    Codon[2] = ForwardEdge->LinkTo->Prefix[0];
+                    EncodeCodon = Codon;
+                }
+                else
+                {
+                    Codon[0] = ForwardEdge->LinkTo->Suffix[0];
+                    Codon[1] = Exon->Prefix[0];
+                    Codon[2] = BackwardEdge->LinkTo->Prefix[0];
+                    WriteReverseComplement(Codon, RCCodon);
+                    EncodeCodon = RCCodon;
+                }
+                CodonValue = GetCodonHashValue(EncodeCodon);
+                INSPECT_ASSERT(CodonValue >= 0 && CodonValue < 64);
+                // CodonExons[CodonValue] will hold the custom-aa exon:
+                if (!CodonExons[CodonValue])
+                {
+                    CodonExons[CodonValue] = (MS2Exon*)calloc(1, sizeof(MS2Exon));
+                    CodonExons[CodonValue]->CustomAA = TranslateCodon(EncodeCodon);
+                    CodonExons[CodonValue]->Start = Exon->Start;
+                    CodonExons[CodonValue]->End = Exon->End;
+                    CodonExons[CodonValue]->ReadingFrame = Exon->ReadingFrame;
+                    AddMS2Exon(Builder, CodonExons[CodonValue]);
+                }
+                ExonInheritOneForwardEdge(CodonExons[CodonValue], ForwardEdge);
+                ExonInheritOneBackwardEdge(CodonExons[CodonValue], BackwardEdge);
+                BridgedFlag = 1;
+            }
+        }
+
+        // Assign flags to these CustomAA exons.  All this work to handle 
+        // the case of a single-base-pair exon with out-degree 1 whose 
+        // outgoing edge is to an adjacent exon; in that case, we want the 
+        // amino acid to be placed on our incoming edges rather than 
+        // on the outgoing edges.
+        for (CodonValue = 0; CodonValue < 64; CodonValue++)
+        {
+            if (CodonExons[CodonValue])
+            {
+                CodonExons[CodonValue]->Flags = MS2_EXON_CUSTOMAA;
+                if (Builder->ForwardFlag)
+                {
+                    Edge = CodonExons[CodonValue]->FirstForward;
+                }
+                else
+                {
+                    Edge = CodonExons[CodonValue]->FirstBackward;
+                }
+                if (!Edge || !Edge->Next)
+                {
+                    continue;
+                }
+                if (Edge->LinkFrom->Start == Edge->LinkTo->End || Edge->LinkFrom->End == Edge->LinkTo->Start)
+                {
+                    CodonExons[CodonValue]->Flags = MS2_EXON_CUSTOMAA_HEAD;
+                }
+            }
+        }
+        NextExon = Exon->Next;
+        if (BridgedFlag)
+        {
+            DeleteMS2Exon(Builder, Exon);
+        }
+        else
+        {
+            REPORT_WARNING_I(21, Exon->Start);
+        }
+        Exon = NextExon;
+    }
+}
+
+void ExonInheritOneForwardEdge(MS2Exon* Exon, MS2Edge* OldEdge)
+{
+    MS2Exon* DisplacedExon;
+    MS2Exon* LinkedExon;
+    //
+    DisplacedExon = OldEdge->LinkFrom;
+    LinkedExon = OldEdge->LinkTo;
+    RemoveBackwardEdge(LinkedExon, DisplacedExon);
+    RemoveForwardEdge(DisplacedExon, LinkedExon);
+    LinkExonForward(Exon, LinkedExon);
+}
+
+void ExonInheritOneBackwardEdge(MS2Exon* Exon, MS2Edge* OldEdge)
+{
+    MS2Exon* DisplacedExon;
+    MS2Exon* LinkedExon;
+    //
+    DisplacedExon = OldEdge->LinkFrom;
+    LinkedExon = OldEdge->LinkTo;
+    RemoveBackwardEdge(DisplacedExon, LinkedExon);
+    RemoveForwardEdge(LinkedExon, DisplacedExon);
+    LinkExonForward(LinkedExon, Exon);
+}
+
+// Main entry point for building MS2 database.
+void BuildMS2DB()
+{
+    MS2Builder* Builder;
+    int ForwardFlag;
+    MS2Gene* Gene;
+    MS2Exon* Exon;
+    //
+    Builder = (MS2Builder*)calloc(1, sizeof(MS2Builder));
+    // Builder->VerboseFlag = 1; // spewy!
+    // Open the genome file:
+    Builder->GenomeFile = fopen(GlobalOptions->GenomeFileName, "rb");
+    if (!Builder->GenomeFile)
+    {
+        REPORT_ERROR_S(8, GlobalOptions->GenomeFileName);
+        goto cleanup;
+    }
+    // At least one GFF file must be specified!
+    if (!GlobalOptions->FirstGFFFileName)
+    {
+        REPORT_ERROR(12);
+        goto cleanup;
+    }
+    fprintf(GlobalOptions->OutputFile, "<Database CreatedBy=\"BuildMS2DB.c\">\n");
+    // Loop: First the forward strand, then the reverse strand:
+    for (ForwardFlag = 1; ForwardFlag >= 0; ForwardFlag--)
+    {
+        Builder->ForwardFlag = ForwardFlag;
+        Builder->ExonHash = (MS2ExonNode**)calloc(EXON_HASH_SIZE, sizeof(ExonNode*));
+        strncpy(Builder->ChromosomeName, GlobalOptions->ChromosomeName, 256);
+
+        // Parse exons from GFF files:
+        ParseGFFFiles(Builder);
+
+        // Bail out, if we have no exons at all:
+        if (!Builder->FirstExon)
+        {
+	  //REPORT_ERROR(15);
+            continue;
+        }
+
+        printf("Parsed GFF files.  We now have %d exons.\n", Builder->ExonCount);
+        DebugPrintMS2Builder(Builder, "After GFF parse");
+
+        // Merge and split any overlapping exons as needed.  Note that if we merge exons, then we
+        // can't report a cross-reference ("record FOO covers exons 1, 2, 3, 4, 5").  Therefore, 
+        // most exons are NOT permitted to be merged.  Only exons produced from EST alignments
+        // should be considerd merge-able.
+        SplitOverlappingExons(Builder);
+        
+        DebugPrintMS2Builder(Builder, "After exon split");
+
+        // Add edges between adjacent exons:
+        AddAdjacentExonLinks(Builder);
+
+        // Read all exon sequences.  We *could* read just the exons for one gene at a time.
+        for (Exon = Builder->FirstExon; Exon; Exon = Exon->Next)
+        {
+            ReadExonSequence(Builder, Exon);
+        }
+
+        // Ensure that length-1 exons (if any exist!) have at most one back-link.
+        RepairPromiscuousSingletonExons(Builder);
+
+        // Group exons into genes:
+        GroupExonsIntoGenes(Builder);
+
+        DebugPrintMS2Builder(Builder, "After gene grouping");
+
+        // Sort exons within genes, assigning exons index numbers:
+        for (Gene = Builder->FirstGene; Gene; Gene = Gene->Next)
+        {
+            SortMS2GeneExons(Builder, Gene);
+        }
+
+        // Add cross-references to genes:
+        BuildGFFCrossReferences(Builder);
+
+        // Output XML:
+        for (Gene = Builder->FirstGene; Gene; Gene = Gene->Next)
+        {
+            OutputMS2Gene(Builder, Gene);
+        }
+        // Free our exon hash, exon lists, gene lists, etc:
+        FreeExonHash(Builder);
+        FreeMS2Genes(Builder);
+        FreeGFFGenes(Builder);
+        FreeMS2Exons(Builder);
+    }
+    fprintf(GlobalOptions->OutputFile, "\n</Database>\n");
+cleanup:
+    FreeExonHash(Builder);
+    if (Builder->GenomeFile)
+    {
+        fclose(Builder->GenomeFile);
+    }
+    SafeFree(Builder);
+}
+
+
+// Handy debugging function: Spew out *all* the exons and genes parsed so far!
+void DebugPrintMS2Builder(MS2Builder* Builder, char* Notes)
+{
+    MS2Exon* Exon;
+    MS2ExonNode* Node;
+    MS2Gene* Gene;
+    int GeneExonCount;
+    int GeneStart;
+    int GeneEnd;
+    int GeneIndex;
+    MS2Edge* Edge;
+    int ExonIndex;
+    int ExonCount = 0;
+    int ForwardEdgeCount = 0;
+    int BackwardEdgeCount = 0;
+    MS2Edge* PrevEdge;
+    //
+    if (!Builder->VerboseFlag)
+    {
+        return;
+    }
+    printf("\n=-=-{O}=-=-{O}=-=-{O}=-=-{O}=-=-{O}=-=-{O}=-=-{O}=-=-{O}=-=-{O}=-=-\n");
+    if (Notes)
+    {
+        printf("*-*-> %s\n", Notes);
+    }
+    else
+    {
+        printf("*-*-> MS2Builder state:\n");
+    }
+    
+    for (Exon = Builder->FirstExon, ExonIndex = 0; Exon; Exon = Exon->Next, ExonIndex++)
+    {
+        printf("  Exon %d: %d-%d R %d", ExonIndex, Exon->Start, Exon->End, Exon->ReadingFrame);
+        if (Exon->Gene)
+        {
+            printf(" Gene %d", Exon->Gene->Index);
+        }
+        printf("\n");
+        if (Exon->Sequence)
+        {
+            printf("    Prefix '%s' Suffix '%s'\n", Exon->Prefix, Exon->Suffix);
+            INSPECT_ASSERT(strlen(Exon->Sequence) * 3 + strlen(Exon->Prefix) + strlen(Exon->Suffix) == (Exon->End - Exon->Start));
+        }
+        PrevEdge = NULL;
+        for (Edge = Exon->FirstForward; Edge; Edge = Edge->Next)
+        {
+            printf("    >>>Link to %d-%d R%d\n", Edge->LinkTo->Start, Edge->LinkTo->End, Edge->LinkTo->ReadingFrame);
+            ForwardEdgeCount++;
+            PrevEdge = Edge;
+        }
+        if (PrevEdge != Exon->LastForward)
+        {
+            printf("   *** Error: LastForward link is corrupt!\n");
+        }
+        PrevEdge = NULL;
+        for (Edge = Exon->FirstBackward; Edge; Edge = Edge->Next)
+        {
+            printf("    <<<Link from %d-%d R%d\n", Edge->LinkTo->Start, Edge->LinkTo->End, Edge->LinkTo->ReadingFrame);
+            BackwardEdgeCount++;
+            PrevEdge = Edge;
+        }
+        if (PrevEdge != Exon->LastBackward)
+        {
+            printf("   *** Error: LastForward link is corrupt!\n");
+        }
+
+        ExonCount++;
+    }
+    printf("\n");
+    for (Gene = Builder->FirstGene, GeneIndex = 0; Gene; Gene = Gene->Next, GeneIndex++)
+    {
+        GeneExonCount = 0;
+        GeneStart = Gene->FirstExon->Exon->Start;
+        GeneEnd = Gene->FirstExon->Exon->End;
+        for (Node = Gene->FirstExon; Node; Node = Node->Next)
+        {
+            GeneExonCount++;
+            GeneStart = min(GeneStart, Node->Exon->Start);
+            GeneEnd = max(GeneEnd, Node->Exon->End);
+        }
+        printf("Gene %d/%d (%d...%d) has %d exons\n", GeneIndex, Gene->Index, GeneStart, GeneEnd, GeneExonCount);
+        for (Node = Gene->FirstExon; Node; Node = Node->Next)
+        {
+            if (Node->Exon->Gene != Gene)
+            {
+                printf("** ERROR: Exon %d-%d doesn't link up!\n", Node->Exon->Start, Node->Exon->End);
+            }
+        }
+    }
+    printf("\n...total of %d exons, %d/%d edges\n", ExonCount, ForwardEdgeCount, BackwardEdgeCount);
+}
diff --git a/BuildMS2DB.h b/BuildMS2DB.h
new file mode 100644
index 0000000..e93f170
--- /dev/null
+++ b/BuildMS2DB.h
@@ -0,0 +1,40 @@
+//Title:          BuildMS2DB.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef BUILD_MS2DB_H
+#define BUILD_MS2DB_H
+
+
+
+void BuildMS2DB();
+
+#endif //BUILD_MS2DB_H
diff --git a/BuildMS2DB.jar b/BuildMS2DB.jar
new file mode 100644
index 0000000..31ab930
Binary files /dev/null and b/BuildMS2DB.jar differ
diff --git a/CCSVM1.model b/CCSVM1.model
new file mode 100644
index 0000000..ebad6ff
--- /dev/null
+++ b/CCSVM1.model
@@ -0,0 +1,44 @@
+svm_type c_svc
+kernel_type rbf
+gamma 0.1
+nr_class 2
+total_sv 35
+rho -0.312301
+label 1 -1
+nr_sv 19 16
+SV
+1 1:-0.75179 2:0.607494 3:-1 4:0.320215 5:-1 6:0.72313 7:0.0811355 8:-0.114332 9:-1 10:0.357271 
+0.1660259920160678 1:-0.851741 2:-0.0927604 3:0.666942 4:0.35998 5:0.595754 6:0.738728 7:-0.521993 8:-0.172721 9:-0.336393 10:0.317712 
+1 1:-0.936704 2:0.47541 3:-0.526493 4:0.320844 5:-0.881809 6:0.725531 7:-0.930563 8:-0.117135 9:-0.963797 10:0.356968 
+1 1:-0.644116 2:0.404764 3:-0.253063 4:0.282541 5:-0.51243 6:0.698208 7:0.0279429 8:-0.11326 9:-0.218628 10:0.348546 
+0.7418988259128604 1:-0.790143 2:0.0876919 3:0.81691 4:0.418834 5:0.849416 6:0.790491 7:-0.237949 8:-0.122427 9:-1 10:0.340648 
+0.5082811007489776 1:-0.620509 2:0.466751 3:-1 4:0.313391 5:0.829375 6:0.766265 7:0.538866 8:-0.0931119 9:-1 10:0.35749 
+0.2762882244591353 1:-0.985363 2:0.100306 3:0.971198 4:0.325929 5:0.996571 6:0.729376 7:-1 8:-0.115009 9:0.197818 10:0.358083 
+0.2358071042810414 1:-0.986348 2:0.233124 3:0.84868 4:0.326947 5:0.386482 6:0.727203 7:-1 8:-0.115112 9:0.600604 10:0.359261 
+1 1:-0.637898 2:0.51292 3:-1 4:0.313375 5:-0.58639 6:0.714613 7:0.428808 8:-0.100835 9:-1 10:0.351135 
+1 1:-0.569543 2:0.672606 3:-1 4:0.314482 5:-0.935458 6:0.716067 7:-0.102802 8:-0.119006 9:-0.196241 10:0.351188 
+1 1:-0.618451 2:0.484406 3:-0.289553 4:0.288088 5:-0.142375 6:0.710582 7:-0.536415 8:-0.149913 9:-0.600327 10:0.314834 
+1 1:-0.953112 2:0.6566 3:-1 4:0.223062 5:-0.999028 6:0.484834 7:-0.980275 8:-0.162242 9:-0.997067 10:0.306999 
+1 1:-0.80244 2:0.307676 3:-0.994303 4:-1 5:-0.98863 6:-0.508631 7:-0.980039 8:-0.583873 9:-1 10:-0.278361 
+1 1:-0.802308 2:0.51663 3:0.16622 4:0.323858 5:-0.45996 6:0.724424 7:-0.0455461 8:-0.115047 9:-1 10:0.356722 
+0.4426695076697531 1:-0.960351 2:-0.0444583 3:0.735875 4:0.324235 5:-0.259143 6:0.724855 7:-0.412562 8:-0.116235 9:-0.380176 10:0.357039 
+1 1:-0.487744 2:0.408039 3:-0.386503 4:0.248649 5:-0.64458 6:0.654016 7:-0.693115 8:-0.157841 9:-1 10:0.304833 
+1 1:-0.747522 2:0.454089 3:-0.124586 4:0.311553 5:0.284901 6:0.750294 7:-0.440615 8:-0.134631 9:-0.420396 10:0.332584 
+1 1:-0.696164 2:0.104826 3:-0.978551 4:-0.145873 5:-0.994744 6:-0.633391 7:-0.933211 8:-0.860357 9:-0.987323 10:-0.519298 
+1 1:-0.748748 2:-0.128167 3:-0.470328 4:0.305497 5:-1 6:0.715018 7:0.291475 8:-0.0984399 9:0.343179 10:0.364289 
+-1 1:-0.1627 2:0.0819672 3:0.478414 4:0.332095 5:-0.642318 6:0.677763 7:-0.770054 8:-0.330038 9:-1 10:0.338713 
+-1 1:-0.137889 2:0.324858 3:-1 4:-0.0920287 5:-0.960472 6:0.435168 7:-0.570267 8:-0.167445 9:-0.898061 10:0.292412 
+-1 1:-0.196224 2:0.495265 3:-0.675143 4:0.317098 5:-1 6:0.720882 7:-0.720932 8:-0.123296 9:-0.61846 10:0.353556 
+-1 1:-0.327768 2:0.953552 3:-0.591787 4:0.320595 5:0.818117 6:0.735123 7:-0.343423 8:-0.115808 9:-0.23334 10:0.357507 
+-1 1:-0.327305 2:0.496896 3:-0.732837 4:0.0975269 5:-0.26576 6:0.601986 7:-0.355449 8:-0.181283 9:-0.70581 10:0.246242 
+-1 1:0.0231203 2:0.471475 3:-0.187915 4:0.299803 5:-0.984348 6:-0.10867 7:-0.143927 8:-0.121665 9:-0.857203 10:0.24761 
+-1 1:-0.373026 2:0.43907 3:-0.853121 4:0.314152 5:-0.924011 6:0.719738 7:-0.945645 8:-0.125024 9:-0.940812 10:0.344711 
+-1 1:-0.606214 2:0.28052 3:-0.788078 4:0.321041 5:-0.940023 6:0.723269 7:-0.974707 8:-0.119677 9:-0.963612 10:0.348768 
+-0.9665150629728126 1:-0.253084 2:0.441708 3:-0.970703 4:0.2657 5:-0.998178 6:0.622214 7:-0.866261 8:-0.12946 9:-0.883559 10:0.344661 
+-1 1:-0.32251 2:0.436116 3:-0.893977 4:0.252243 5:-0.96782 6:0.624908 7:-0.428191 8:-0.122013 9:-0.249862 10:0.354316 
+-1 1:-0.0601224 2:-0.246926 3:1 4:0.323794 5:1 6:0.728849 7:-0.584148 8:-0.123886 9:-0.449 10:0.350944 
+-0.9199085377320684 1:0.24048 2:0.480049 3:-0.202535 4:0.316312 5:0.0612596 6:0.730524 7:-0.902298 8:-0.710501 9:-0.968861 10:-0.483095 
+-0.4845471543829543 1:-0.00856172 2:0.345378 3:-0.999429 4:-0.640491 5:-0.999609 6:-0.262666 7:-0.999091 8:-0.455881 9:-0.999846 10:-0.311333 
+-1 1:-0.312826 2:0.409405 3:-0.921582 4:0.299281 5:-0.994074 6:0.640002 7:-0.763224 8:-0.117524 9:-0.94537 10:0.352891 
+-1 1:-0.285649 2:0.36626 3:-0.898888 4:0.273682 5:-0.963255 6:0.685241 7:-0.91936 8:-0.143351 9:-0.98431 10:0.290957 
+-1 1:0.0781984 2:0.452641 3:0.0661623 4:0.324274 5:-0.995084 6:-0.645437 7:-0.136239 8:-0.117996 9:-0.934717 10:0.192604 
diff --git a/CCSVM1.range b/CCSVM1.range
new file mode 100644
index 0000000..d65d8ad
--- /dev/null
+++ b/CCSVM1.range
@@ -0,0 +1,12 @@
+x
+-1 1
+1 0 0.82135439
+2 0 0.73939395
+3 0 1
+4 -0.5379861 0.2760399
+5 0 1
+6 -0.86319005 0.13671456
+7 0 1
+8 -0.78792346 0.99241817
+9 0 1
+10 -0.74330956 0.35138521
diff --git a/CCSVM2.model b/CCSVM2.model
new file mode 100644
index 0000000..a94184b
--- /dev/null
+++ b/CCSVM2.model
@@ -0,0 +1,118 @@
+svm_type c_svc
+kernel_type rbf
+gamma 0.0454545
+nr_class 2
+total_sv 109
+rho 1.88691
+label 1 -1
+nr_sv 56 53
+SV
+1 1:-0.586343 2:0.447929 3:-0.586343 4:0.447929 5:0.586343 6:-0.447929 7:0.270654 8:-0.35777 9:-0.614851 10:-0.771449 11:-0.286179 12:0.973306 13:-0.361175 14:0.958467 15:-0.397918 16:-0.948572 17:0.484338 18:-0.934396 19:0.236794 20:-0.724109 21:0.547246 22:-0.125524 
+0.5596070646832195 1:0.0682849 2:0.464911 3:0.0682849 4:0.464911 5:-0.0682842 6:-0.464911 7:-0.883563 8:0.896384 9:-0.191985 10:0.907317 11:-0.0719692 12:0.497519 13:-0.301422 14:0.688257 15:-0.268794 16:-0.839139 17:0.362837 18:-0.460607 19:0.169239 20:-1 21:0.540128 22:-0.30313 
+1 1:-0.341176 2:0.368027 3:-0.341176 4:0.368027 5:0.341174 6:-0.368027 7:-0.161618 8:0.987183 9:-0.391364 10:0.999116 11:-0.0518543 12:0.994744 13:-0.117239 14:0.998121 15:0.0820365 16:-0.429618 17:0.470359 18:0.542289 19:0.306701 20:-0.937824 21:0.411759 22:0.0496908 
+1 1:-0.301444 2:0.519534 3:-0.301444 4:0.519534 5:0.301443 6:-0.519534 7:-0.231671 8:0.915459 9:-0.558913 10:0.996772 11:-0.0455303 12:0.979313 13:-0.378779 14:0.960248 15:-0.411043 16:-0.817763 17:0.499885 18:-0.892501 19:0.238828 20:0.587939 21:0.556435 22:-0.739683 
+0.4744165168332187 1:-0.100362 2:0.497448 3:-0.100362 4:0.497448 5:0.100362 6:-0.497448 7:-0.586211 8:0.961407 9:-0.449057 10:0.965527 11:0.711384 12:0.999177 13:-0.192454 14:0.999381 15:-0.307237 16:-0.970007 17:0.376268 18:-0.976209 19:0.192955 20:-0.990705 21:0.537001 22:-0.92019 
+1 1:-0.0162669 2:0.481616 3:-0.0162669 4:0.481616 5:0.0162667 6:-0.481616 7:-0.734485 8:0.999507 9:0.668068 10:0.998986 11:-0.00859887 12:0.977547 13:-0.340345 14:0.95151 15:-0.416897 16:-0.988217 17:-0.263232 18:-1 19:0.225853 20:-0.549695 21:0.542771 22:-0.900102 
+1 1:-0.0998906 2:0.149826 3:-0.0998906 4:0.149826 5:0.0998906 6:-0.149826 7:-0.587042 8:1 9:-0.529908 10:1 11:-0.264798 12:0.96059 13:0.00532483 14:1 15:-0.403677 16:-0.0983453 17:0.497203 18:-1 19:0.236032 20:-0.660344 21:0.470228 22:-0.131685 
+0.1989762850809156 1:0.36994 2:0.416452 3:0.36994 4:0.416452 5:-0.369941 6:-0.416452 7:-0.591375 8:0.895658 9:-0.288629 10:0.956849 11:-0.133629 12:0.934105 13:0.125194 14:0.954301 15:0.170537 16:-0.949086 17:0.196624 18:-0.900376 19:-0.0101391 20:0.025301 21:0.551641 22:-0.30438 
+1 1:0.00835184 2:0.448781 3:0.00835184 4:0.448781 5:-0.00835176 6:-0.44878 7:-0.777892 8:0.607312 9:-0.540934 10:0.110858 11:-0.264211 12:0.338182 13:-0.326846 14:0.199405 15:-0.350711 16:-0.207805 17:0.483608 18:-0.245967 19:0.219276 20:-0.384979 21:0.514448 22:-0.109859 
+1 1:0.281903 2:0.444653 3:0.281903 4:0.444653 5:-0.281904 6:-0.444653 7:-0.746599 8:0.903924 9:-0.207278 10:0.780967 11:-0.0903158 12:-0.0501338 13:-0.449562 14:-0.395518 15:-0.57681 16:-0.801324 17:0.347715 18:-0.556582 19:0.154491 20:-0.577215 21:0.536102 22:-0.683604 
+1 1:0.671585 2:0.477308 3:0.671585 4:0.477308 5:-0.671585 6:-0.477308 7:-0.0595257 8:0.970662 9:-0.530234 10:0.959048 11:-0.263206 12:0.844518 13:-0.0837123 14:0.925784 15:-0.17477 16:0.0445118 17:0.51009 18:-0.680938 19:0.199413 20:-0.562869 21:0.539659 22:-0.277483 
+1 1:0.657934 2:0.316153 3:0.657934 4:0.316153 5:-0.657934 6:-0.316153 7:-0.0835945 8:1 9:-0.610388 10:0.93972 11:-0.280656 12:1 13:-0.381819 14:0.987189 15:-0.414015 16:0.426698 17:0.527226 18:-0.0722581 19:0.23855 20:-0.515298 21:0.546444 22:-0.116434 
+1 1:-0.440958 2:0.384736 3:-0.440958 4:0.384736 5:0.440958 6:-0.384736 7:0.0143158 8:0.754663 9:-0.585509 10:0.831596 11:-0.279427 12:0.245362 13:-0.418379 14:0.202683 15:-0.438459 16:-0.0630584 17:0.505839 18:0.389734 19:0.26914 20:0.237103 21:0.562722 22:-0.388008 
+1 1:-0.486414 2:0.430894 3:-0.486414 4:0.430894 5:0.486415 6:-0.430894 7:0.0944636 8:0.999547 9:-0.550971 10:0.998508 11:-0.270172 12:0.969579 13:-0.289701 14:0.948459 15:-0.373008 16:-0.931439 17:-0.0476394 18:-0.48044 19:0.217399 20:-0.965434 21:0.40285 22:-0.749482 
+1 1:-0.3299 2:0.396217 3:-0.3299 4:0.396217 5:0.3299 6:-0.396217 7:-0.181497 8:0.953396 9:-0.570408 10:0.495372 11:-0.282587 12:0.998933 13:-0.395738 14:0.976972 15:-0.435797 16:-0.910217 17:0.439613 18:-0.454396 19:0.233376 20:0.227735 21:0.551649 22:0.045953 
+1 1:-0.351795 2:0.256739 3:-0.351795 4:0.256739 5:0.351795 6:-0.256739 7:-0.142893 8:0.999503 9:-0.364336 10:0.992364 11:-0.194011 12:0.999445 13:-0.164377 14:0.992241 15:-0.274452 16:-0.550078 17:0.454044 18:0.0301268 19:0.240288 20:0.861797 21:0.565931 22:0.146761 
+1 1:-0.12363 2:0.089939 3:-0.12363 4:0.089939 5:0.12363 6:-0.089939 7:-0.545186 8:1 9:-0.436512 10:1 11:-0.177595 12:1 13:-0.244387 14:1 15:-0.275533 16:-0.656413 17:0.380233 18:-0.791762 19:0.114865 20:0.803011 21:0.571513 22:0.367263 
+1 1:0.193279 2:0.48267 3:0.193279 4:0.48267 5:-0.193281 6:-0.48267 7:-0.902856 8:0.992069 9:-0.420573 10:0.986801 11:-0.219053 12:0.984208 13:0.327109 14:0.989893 15:0.766696 16:-0.974373 17:-0.0870789 18:-1 19:-0.147046 20:-0.873146 21:0.487336 22:-0.162558 
+0.1097681833559589 1:0.633849 2:0.509146 3:0.633849 4:0.509146 5:-0.63385 6:-0.509146 7:-0.126059 8:0.134206 9:-0.614563 10:0.72621 11:-0.285493 12:0.999878 13:0.103765 14:0.999389 15:0.0290541 16:0.534943 17:0.516041 18:0.859695 19:0.254284 20:-1 21:0.548412 22:-0.886915 
+1 1:-0.308093 2:0.480976 3:-0.308093 4:0.480976 5:0.308094 6:-0.480976 7:-0.219946 8:0.258287 9:-0.61395 10:0.0982527 11:-0.28583 12:0.0574076 13:-0.42947 14:-0.180807 15:-0.447977 16:-0.167755 17:0.491927 18:0.931486 19:0.246749 20:0.0827929 21:0.548853 22:-0.687232 
+1 1:-0.25603 2:0.0995601 3:-0.25603 4:0.0995601 5:0.25603 6:-0.0995601 7:-0.311743 8:1 9:-0.614317 10:-1 11:-0.28595 12:0.998647 13:-0.364239 14:1 15:-0.416991 16:0.800016 17:0.534311 18:0.0886501 19:0.240971 20:0.538389 21:0.548926 22:-0.870887 
+1 1:0.381401 2:0.467457 3:0.381401 4:0.467457 5:-0.381403 6:-0.467457 7:-0.571166 8:0.966931 9:-0.283778 10:0.987197 11:-0.136138 12:0.996575 13:0.390136 14:0.983524 15:0.36958 16:-0.938605 17:-0.111525 18:-0.889852 19:-0.101351 20:0.409386 21:0.562073 22:-0.238093 
+1 1:-0.407687 2:0.43241 3:-0.409286 4:0.15957 5:0.407687 6:-0.43241 7:-0.0443467 8:0.632951 9:-0.614584 10:1 11:-0.281641 12:0.995113 13:-0.412482 14:0.990009 15:-0.433111 16:0.975196 17:0.616761 18:-0.0532698 19:0.23941 20:-0.798875 21:0.548329 22:-0.812192 
+1 1:-0.381384 2:0.48489 3:-0.38323 4:0.333027 5:0.381384 6:-0.48489 7:-0.0907228 8:0.90056 9:-0.602526 10:0.994116 11:-0.27993 12:0.919902 13:-0.387762 14:0.997017 15:-0.410666 16:0.212216 17:0.509798 18:0.615852 19:0.243923 20:-0.555275 21:0.54677 22:-0.44556 
+1 1:0.657865 2:0.418559 3:0.657865 4:0.418559 5:-0.657865 6:-0.418559 7:-0.0837164 8:0.982051 9:-0.60982 10:0.974959 11:-0.283128 12:0.984598 13:-0.38622 14:0.99638 15:-0.406722 16:-0.677503 17:0.482468 18:-0.680025 19:0.226878 20:-0.807329 21:0.537338 22:-0.115688 
+1 1:0.410844 2:0.793817 3:0.384586 4:0.0450936 5:-0.410844 6:-0.793817 7:-0.519256 8:0.734904 9:-0.355755 10:0.867622 11:-0.151593 12:0.32882 13:-0.0649979 14:0.364197 15:-0.176056 16:-0.713813 17:0.428602 18:-0.506334 19:0.169325 20:-0.318047 21:0.543925 22:-0.639797 
+1 1:0.422156 2:0.533101 3:0.422058 4:0.51777 5:-0.422156 6:-0.533101 7:-0.49931 8:0.975474 9:-0.441186 10:0.953309 11:-0.188735 12:0.875588 13:-0.153936 14:0.923043 15:-0.138697 16:-0.764521 17:0.263615 18:-0.775987 19:0.0585216 20:-0.841292 21:0.457284 22:-0.347796 
+1 1:-0.51645 2:0.230827 3:-0.51645 4:0.230827 5:0.51645 6:-0.230827 7:0.14742 8:0.987989 9:-0.565042 10:0.984247 11:-0.25387 12:0.990325 13:-0.305422 14:0.990213 15:-0.342887 16:0.378168 17:0.540062 18:0.393266 19:0.263358 20:0.259464 21:0.553371 22:0.135901 
+1 1:0.238222 2:0.37266 3:0.238222 4:0.37266 5:-0.238222 6:-0.37266 7:-0.823616 8:0.915236 9:0.134308 10:0.0502518 11:-0.284304 12:0.652288 13:-0.0167093 14:-0.153565 15:-0.462807 16:-0.808788 17:0.434321 18:-0.0034517 19:0.239546 20:-0.704591 21:0.53369 22:-0.629419 
+0.07285356465522599 1:-0.315793 2:0.48801 3:-0.315793 4:0.48801 5:0.315793 6:-0.48801 7:-0.206371 8:0.998677 9:-0.541924 10:0.995599 11:-0.0371308 12:0.975513 13:-0.378187 14:0.886955 15:-0.427352 16:-0.950521 17:0.490977 18:-0.348778 19:0.237224 20:0.662718 21:0.554518 22:-0.739705 
+1 1:-0.36855 2:0.308094 3:-0.36855 4:0.308094 5:0.368549 6:-0.308094 7:-0.113352 8:0.87392 9:-0.543669 10:0.667273 11:-0.256033 12:0.87228 13:-0.328798 14:0.743686 15:-0.328382 16:-0.291816 17:0.45961 18:-0.296628 19:0.219137 20:-0.839547 21:0.495189 22:0.0884328 
+1 1:-0.492285 2:0.431992 3:-0.492285 4:0.431992 5:0.492285 6:-0.431992 7:0.104814 8:-0.101568 9:-0.614847 10:0.863995 11:-0.274842 12:0.977854 13:-0.389846 14:0.97568 15:-0.372854 16:-0.191448 17:0.49417 18:0.344803 19:0.255009 20:-0.619481 21:0.539645 22:-0.343214 
+1 1:-0.335623 2:0.169481 3:-0.335623 4:0.169481 5:0.335624 6:-0.169481 7:-0.171407 8:0.996622 9:-0.383064 10:0.991566 11:0.0107828 12:0.991459 13:-0.063775 14:0.996811 15:0.102334 16:-0.562855 17:0.380578 18:-0.276057 19:0.189847 20:-0.675418 21:0.424216 22:0.222194 
+1 1:-0.60664 2:0.39407 3:-0.60664 4:0.39407 5:0.60664 6:-0.39407 7:0.306442 8:0.998864 9:-0.581221 10:1 11:-0.0263783 12:0.62573 13:-0.428906 14:1 15:-0.439548 16:0.551483 17:0.514559 18:0.114448 19:0.240437 20:-0.954679 21:0.546405 22:-0.823663 
+0.5358741563675483 1:-0.0664034 2:0.383825 3:-0.0664034 4:0.383825 5:0.066403 6:-0.383825 7:-0.646086 8:1 9:-0.503616 10:1 11:-0.257686 12:0.771434 13:-0.253061 14:0.858967 15:-0.331146 16:-0.872614 17:0.290232 18:-0.93065 19:0.118993 20:-0.589183 21:0.515917 22:-0.913288 
+1 1:-0.486864 2:0.422274 3:-0.486864 4:0.422274 5:0.486864 6:-0.422274 7:0.0952555 8:0.996401 9:-0.455773 10:0.999615 11:-0.085654 12:0.999806 13:0.188512 14:0.999491 15:-0.0387637 16:-0.704547 17:0.496536 18:0.175756 19:0.243346 20:0.172858 21:0.551543 22:-0.31052 
+1 1:0.0364816 2:0.493327 3:0.0364816 4:0.493327 5:-0.0364819 6:-0.493327 7:-0.827489 8:0.99666 9:-0.413362 10:0.998596 11:-0.0503499 12:0.969016 13:0.133667 14:0.953747 15:-0.0522417 16:-0.954976 17:-0.545767 18:-0.960684 19:-0.276192 20:-0.714058 21:0.517738 22:-0.801364 
+0.5148494730248897 1:-0.0124078 2:0.498572 3:-0.0124078 4:0.498572 5:0.0124078 6:-0.498572 7:-0.741289 8:0.967992 9:-0.529865 10:1 11:1 12:1 13:-0.42631 14:1 15:-0.441308 16:-0.933978 17:0.472606 18:-1 19:0.232945 20:0.0889109 21:0.548763 22:-0.235028 
+1 1:0.302517 2:0.463415 3:0.302517 4:0.463415 5:-0.302516 6:-0.463415 7:-0.710254 8:0.51784 9:-0.392845 10:0.521332 11:-0.113785 12:0.611404 13:-0.235453 14:0.714318 15:-0.168268 16:-0.949915 17:0.251864 18:-0.670698 19:-0.0427911 20:-0.474422 21:0.499639 22:-0.464822 
+1 1:0.743347 2:0.394416 3:0.743347 4:0.394416 5:-0.743347 6:-0.394416 7:0.0670029 8:0.910294 9:-0.610358 10:0.601571 11:-0.284704 12:1 13:-0.37368 14:1 15:-0.390897 16:0.0657905 17:0.511036 18:-0.10271 19:0.238667 20:-0.147246 21:0.547753 22:-0.11628 
+0.917171674952913 1:-0.28129 2:0.341463 3:-0.28129 4:0.341463 5:0.28129 6:-0.341463 7:-0.267205 8:0.626618 9:-0.605168 10:0.429773 11:-0.285289 12:1 13:-0.322021 14:1 15:-0.404777 16:-0.442832 17:0.494445 18:1 19:0.25183 20:-1 21:0.536631 22:-0.840349 
+1 1:-0.609026 2:0.447929 3:-0.609026 4:0.447929 5:0.609026 6:-0.447929 7:0.310648 8:0.51238 9:-0.613696 10:0.482398 11:-0.285306 12:0.57918 13:-0.426866 14:0.366864 15:-0.440886 16:-0.266691 17:0.503673 18:0.315527 19:0.269261 20:0.931967 21:0.549317 22:-0.0887005 
+0.348164886379205 1:0.0692804 2:0.92874 3:0.0534675 4:-0.0479937 5:-0.0692809 6:-0.92874 7:-0.885319 8:0.983 9:-0.398756 10:0.952999 11:-0.00435766 12:0.999962 13:-0.11588 14:0.902806 15:-0.227063 16:-0.892101 17:0.328324 18:-0.828931 19:0.207083 20:-0.91052 21:0.526309 22:-0.690825 
+1 1:0.578146 2:0.403377 3:0.578146 4:0.403377 5:-0.578146 6:-0.403377 7:-0.224275 8:0.588429 9:-0.6146 10:-1 11:-0.285959 12:0.930657 13:0.65313 14:0.976113 15:0.0436324 16:0.514677 17:0.521094 18:0.794236 19:0.242741 20:-1 21:0.548515 22:-1 
+1 1:-0.00719075 2:0.414634 3:-0.00737814 4:0.398374 5:0.00719096 6:-0.414634 7:-0.750487 8:0.989496 9:-0.259347 10:0.999136 11:-0.111278 12:0.991868 13:-0.207436 14:0.999302 15:-0.232533 16:-0.998136 17:0.318187 18:-0.727043 19:0.0858924 20:-0.919277 21:0.53584 22:-0.303007 
+1 1:0.737565 2:0.616221 3:0.737565 4:0.616221 5:-0.737565 6:-0.616221 7:0.0568082 8:0.749228 9:-0.612368 10:1 11:-0.285055 12:0.270022 13:-0.415722 14:-0.55593 15:-0.450961 16:-1 17:0.508329 18:-0.630999 19:0.238094 20:-0.999998 21:0.529147 22:-0.692216 
+0.9807978899181346 1:0.126935 2:0.503618 3:0.126935 4:0.503618 5:-0.126935 6:-0.503618 7:-0.986974 8:-0.355252 9:-0.798183 10:-0.53595 11:-0.530642 12:0.965781 13:0.188843 14:0.929469 15:0.294725 16:-0.49626 17:0.191486 18:0.303603 19:0.389976 20:-0.644883 21:0.488484 22:-0.316129 
+1 1:-0.303797 2:0.341463 3:-0.303797 4:0.341463 5:0.303798 6:-0.341463 7:-0.227521 8:0.996809 9:-0.556053 10:0.946744 11:-0.265229 12:0.971874 13:-0.388131 14:0.97552 15:-0.406586 16:-0.938298 17:0.495425 18:-0.336644 19:0.237716 20:-0.099517 21:0.54721 22:0.0454306 
+1 1:-0.299369 2:0.304542 3:-0.299369 4:0.304542 5:0.299368 6:-0.304542 7:-0.23533 8:-0.287076 9:-0.61563 10:0.992172 11:-0.277707 12:0.3129 13:-0.423048 14:0.399398 15:-0.440061 16:-1 17:0.431147 18:-0.654444 19:0.225119 20:0.236155 21:0.552155 22:-0.649479 
+1 1:0.0723979 2:0.490515 3:0.0723979 4:0.490515 5:-0.0723979 6:-0.490515 7:-0.890816 8:0.271433 9:-0.550909 10:-0.29294 11:-0.304718 12:0.822387 13:-0.189803 14:0.617052 15:-0.338845 16:-0.99735 17:0.246394 18:-0.241855 19:0.208874 20:-0.1698 21:0.543135 22:-0.580233 
+1 1:0.203415 2:0.52439 3:0.200207 4:0.463415 5:-0.203415 6:-0.52439 7:-0.884987 8:0.201086 9:-0.56697 10:0.684083 11:-0.235972 12:0.867594 13:-0.0074243 14:0.774399 15:-0.10621 16:-0.900716 17:0.234968 18:-0.459144 19:0.121838 20:-0.792795 21:0.479832 22:-0.64185 
+1 1:-0.471464 2:0.367427 3:-0.471464 4:0.367427 5:0.471464 6:-0.367427 7:0.0681036 8:0.684579 9:-0.61453 10:0.987199 11:-0.264995 12:1 13:-0.402389 14:0.989835 15:-0.394969 16:-0.833764 17:0.507274 18:-0.679781 19:0.238515 20:-0.948217 21:0.548065 22:-0.129608 
+1 1:-0.0869371 2:0.428571 3:-0.0869371 4:0.428571 5:0.0869361 6:-0.428571 7:-0.609882 8:0.995544 9:-0.320484 10:0.965462 11:-0.181339 12:1 13:-0.227088 14:0.975314 15:-0.31391 16:-0.947786 17:-0.328383 18:-0.821197 19:0.00299785 20:-0.884176 21:0.506818 22:-0.390821 
+1 1:-0.732964 2:0.371274 3:-0.732964 4:0.371274 5:0.732964 6:-0.371274 7:0.529172 8:0.999058 9:-0.614047 10:1 11:-0.284923 12:0.996677 13:-0.42765 14:0.997889 15:-0.440344 16:0.979815 17:0.512833 18:0.636184 19:0.239863 20:-0.990803 21:0.548183 22:-0.559533 
+1 1:-0.431661 2:0.940564 3:-0.495001 4:-0.0362301 5:0.431661 6:-0.940564 7:-0.00207637 8:0.528697 9:-0.61112 10:0.647861 11:-0.284245 12:0.54907 13:-0.421421 14:-0.803597 15:-0.447746 16:-0.894568 17:0.506539 18:-0.606406 19:0.237431 20:0.472298 21:0.550599 22:-0.681023 
+1 1:0.0866925 2:0.380346 3:0.0866925 4:0.380346 5:-0.0866931 6:-0.380346 7:-0.91602 8:0.0924395 9:-0.613444 10:0.565248 11:-0.25041 12:0.923577 13:-0.224146 14:0.850363 15:-0.3956 16:-0.934678 17:0.282952 18:-0.411323 19:0.222608 20:-0.972609 21:0.484729 22:-0.494112 
+-1 1:0.314173 2:0.57364 3:0.248588 4:0.186679 5:-0.314173 6:-0.57364 7:-0.689702 8:-0.753608 9:-0.635444 10:-0.682679 11:-0.290888 12:0.700462 13:-0.290094 14:0.131287 15:-0.44125 16:-0.952021 17:0.0991982 18:-0.506693 19:0.233323 20:-0.920479 21:0.438822 22:-0.856797 
+-0.9072397084667069 1:-0.819382 2:-0.0838786 3:-0.819382 4:-0.0838786 5:0.819382 6:0.0838787 7:0.68154 8:1 9:-0.614073 10:0.925916 11:-0.285324 12:1 13:-0.424977 14:1 15:-0.440096 16:-0.87566 17:0.501737 18:-0.0224184 19:0.239469 20:-0.548757 21:0.548182 22:0.234697 
+-1 1:0.290014 2:0.103913 3:0.290014 4:0.103913 5:-0.290014 6:-0.103913 7:-0.732298 8:1 9:-0.594535 10:1 11:-0.27302 12:0.94954 13:-0.401871 14:0.886464 15:-0.425788 16:-0.983362 17:-0.885167 18:-0.973772 19:-0.311776 20:0.316398 21:0.552948 22:0.266855 
+-1 1:0.71737 2:0.386179 3:0.71737 4:0.386179 5:-0.71737 6:-0.386179 7:0.0212014 8:1 9:-0.567279 10:1 11:-0.28248 12:0.980334 13:-0.402101 14:1 15:-0.439201 16:-0.27883 17:0.500433 18:-0.562343 19:0.230617 20:-0.981629 21:0.533765 22:-0.520955 
+-1 1:0.0531524 2:0.499283 3:0.0531524 4:0.499283 5:-0.0531525 6:-0.499283 7:-0.856883 8:0.719106 9:-0.45871 10:0.75029 11:-0.180035 12:0.89694 13:-0.100122 14:0.799798 15:-0.31753 16:-0.983592 17:-0.568076 18:-0.966201 19:-0.107333 20:-0.995902 21:0.123553 22:-0.621968 
+-1 1:0.746402 2:0.280488 3:0.746402 4:0.280488 5:-0.746402 6:-0.280488 7:0.0723898 8:1 9:-0.58158 10:1 11:-0.283182 12:0.842823 13:-0.425851 14:0.929143 15:-0.439283 16:-0.936503 17:0.344804 18:-0.859598 19:0.119085 20:-0.988712 21:0.527753 22:-0.225431 
+-1 1:-0.76884 2:0.442267 3:-0.76884 4:0.442267 5:0.768839 6:-0.442267 7:0.592426 8:0.921646 9:-0.609914 10:0.935718 11:-0.278114 12:0.958903 13:-0.41508 14:0.962155 15:-0.436201 16:0.602626 17:0.56163 18:0.818495 19:0.279524 20:0.502347 21:0.55442 22:-0.225346 
+-1 1:-0.918483 2:0.555072 3:-0.918483 4:0.555072 5:0.918483 6:-0.555072 7:0.856272 8:0.977948 9:-0.613634 10:0.963833 11:-0.285524 12:0.994831 13:-0.42505 14:0.986542 15:-0.441408 16:-0.979398 17:0.506681 18:0.867977 19:0.243613 20:-0.241779 21:0.548501 22:-0.508623 
+-1 1:0.362966 2:0.571217 3:0.362629 4:0.556394 5:-0.362966 6:-0.571217 7:-0.603672 8:0.761436 9:-0.545098 10:0.365721 11:-0.27836 12:0.873096 13:-0.308675 14:0.856215 15:-0.319712 16:-0.962038 17:-0.322795 18:-0.625463 19:0.114079 20:-0.545571 21:0.479135 22:-0.400456 
+-1 1:0.247296 2:0.307645 3:0.247296 4:0.307645 5:-0.247296 6:-0.307645 7:-0.807618 8:1 9:-0.611993 10:1 11:-0.284683 12:0.206361 13:-0.333169 14:-0.836851 15:-0.706334 16:-0.998879 17:-0.254898 18:-0.947889 19:-0.0790606 20:-0.59284 21:0.510645 22:-0.0997706 
+-1 1:0.551959 2:0.553273 3:0.551959 4:0.553273 5:-0.551958 6:-0.553273 7:-0.270447 8:0.656892 9:-0.592363 10:0.912889 11:-0.18659 12:0.702479 13:-0.330583 14:0.45293 15:-0.398743 16:-0.975299 17:0.0483964 18:-0.995985 19:-0.404355 20:-1 21:0.52356 22:-0.249263 
+-1 1:-0.714479 2:0.52439 3:-0.714479 4:0.52439 5:0.714479 6:-0.52439 7:0.496578 8:0.994425 9:-0.55654 10:0.907499 11:-0.252391 12:0.999472 13:-0.41595 14:0.995312 15:-0.4274 16:0.615873 17:0.706353 18:0.967788 19:0.677751 20:-0.0829828 21:0.548365 22:-0.318398 
+-1 1:0.475181 2:0.527205 3:0.475181 4:0.527205 5:-0.475181 6:-0.527204 7:-0.405818 8:0.545675 9:-0.606504 10:0.952094 11:-0.279339 12:0.546147 13:-0.353924 14:0.869819 15:-0.410809 16:-0.910452 17:0.231864 18:-0.940585 19:-0.371012 20:-0.580494 21:0.534986 22:-0.250645 
+-0.9375540531464036 1:0.273824 2:0.701368 3:0.249197 4:0.325104 5:-0.273824 6:-0.701368 7:-0.760845 8:-0.732894 9:-0.621524 10:-0.481251 11:-0.288462 12:0.953364 13:-0.355107 14:0.919803 15:-0.38208 16:-0.889975 17:0.504062 18:-1 19:0.23929 20:-0.998936 21:-1 22:-0.565464 
+-1 1:0.152462 2:0.767426 3:0.0863133 4:0.00450816 5:-0.152461 6:-0.767426 7:-0.974826 8:-0.580813 9:-0.668134 10:0.898186 11:0.208259 12:0.521068 13:-0.2819 14:0.386054 15:-0.34436 16:-0.944409 17:0.25892 18:-0.438837 19:0.19293 20:-0.994983 21:0.0227629 22:-0.504649 
+-1 1:-0.821065 2:0.341463 3:-0.821065 4:0.341463 5:0.821065 6:-0.341463 7:0.684508 8:0.976281 9:-0.611809 10:0.73847 11:-0.284926 12:0.969001 13:-0.413961 14:0.992339 15:-0.43337 16:0.800543 17:0.624509 18:0.0670394 19:0.244416 20:-1 21:0.544043 22:-0.316082 
+-1 1:0.0874382 2:0.533101 3:0.0874382 4:0.533101 5:-0.087438 6:-0.533101 7:-0.917334 8:0.684992 9:-0.478737 10:0.21614 11:-0.264174 12:0.399124 13:-0.37267 14:-0.425209 15:-0.517443 16:-0.973893 17:-0.577042 18:-0.869095 19:0.0488588 20:-0.887083 21:0.486157 22:-0.355497 
+-1 1:0.335699 2:0.588772 3:0.335699 4:0.588772 5:-0.335699 6:-0.588772 7:-0.651748 8:0.763722 9:-0.486027 10:0.873241 11:-0.187559 12:0.878274 13:-0.291976 14:0.797142 15:-0.326437 16:-0.913324 17:-0.477834 18:-0.812836 19:0.1443 20:-0.930905 21:0.430101 22:-0.398904 
+-1 1:0.426947 2:0.482176 3:0.426947 4:0.482176 5:-0.426947 6:-0.482176 7:-0.490862 8:1 9:-0.545892 10:1 11:-0.280435 12:0.992943 13:-0.251762 14:1 15:-0.395434 16:-0.691613 17:0.44945 18:-0.619779 19:0.140397 20:-0.785487 21:0.44963 22:0.244653 
+-1 1:-0.880866 2:0.425305 3:-0.880866 4:0.425305 5:0.880866 6:-0.425305 7:0.789947 8:0.987777 9:-0.601968 10:0.997644 11:-0.276129 12:0.996272 13:-0.396802 14:0.997961 15:-0.408024 16:0.757128 17:0.530727 18:0.580053 19:0.299234 20:-0.111389 21:0.546603 22:-0.374854 
+-1 1:0.838314 2:0.401274 3:0.83142 4:0.367096 5:-0.838314 6:-0.401274 7:0.234446 8:0.944777 9:-0.601175 10:0.915073 11:-0.281699 12:0.865605 13:-0.405837 14:1 15:-0.432593 16:-0.99649 17:0.471513 18:-0.975827 19:0.208841 20:-0.81984 21:0.537619 22:-0.200444 
+-1 1:0.28387 2:0.494773 3:0.28387 4:0.494773 5:-0.28387 6:-0.494773 7:-0.743131 8:1 9:-0.598043 10:1 11:-0.273415 12:1 13:-0.313937 14:1 15:-0.37725 16:-1 17:0.344304 18:-0.554773 19:0.142136 20:-1 21:0.507637 22:0.165558 
+-0.2184773448480964 1:-0.931151 2:0.664408 3:-0.937486 4:0.440831 5:0.931151 6:-0.664408 7:0.878607 8:0.976887 9:-0.530169 10:0.980579 11:-0.224123 12:0.915827 13:-0.428529 14:0.725425 15:-0.442071 16:0.91644 17:0.602366 18:0.881946 19:0.428949 20:-0.398664 21:0.525115 22:-0.251924 
+-1 1:0.447162 2:0.788618 3:0.266108 4:0.229675 5:-0.447162 6:-0.788618 7:-0.45522 8:0.758634 9:-0.48651 10:0.161047 11:-0.276599 12:0.503117 13:-0.360738 14:-0.560266 15:-0.507808 16:-0.697934 17:0.0567238 18:-0.807127 19:-0.00422272 20:-0.869989 21:0.518937 22:-0.411518 
+-1 1:0.421399 2:0.77575 3:0.354302 4:0.447622 5:-0.421399 6:-0.77575 7:-0.500644 8:0.822446 9:-0.238664 10:0.299605 11:-0.224904 12:0.719876 13:-0.195063 14:0.726083 15:-0.278783 16:-0.891679 17:-0.198613 18:-0.907721 19:-0.0262203 20:-0.91968 21:0.209938 22:-0.728423 
+-1 1:-0.542993 2:0.308745 3:-0.542993 4:0.308745 5:0.542993 6:-0.308745 7:0.19422 8:0.892032 9:-0.565442 10:0.98268 11:-0.0576717 12:0.974617 13:-0.27553 14:0.97146 15:-0.333374 16:-0.501866 17:0.390441 18:0.32146 19:0.278349 20:-0.00317282 21:0.548403 22:0.0561746 
+-1 1:-0.767483 2:0.0622756 3:-0.767483 4:0.0622756 5:0.767483 6:-0.0622757 7:0.590033 8:1 9:-0.609413 10:1 11:-0.27798 12:1 13:-0.41259 14:1 15:-0.430814 16:0.106161 17:0.516786 18:-0.272557 19:0.230707 20:0.704961 21:0.564232 22:0.356582 
+-1 1:0.291942 2:0.615526 3:0.290535 4:0.586677 5:-0.291942 6:-0.615526 7:-0.728899 8:-0.750747 9:-0.688631 10:-0.864176 11:-0.415871 12:0.684524 13:0.0113795 14:0.290082 15:-0.359071 16:-0.90887 17:0.202634 18:-0.980114 19:-0.301483 20:-0.904303 21:0.485313 22:-0.897096 
+-0.756695633922057 1:0.432115 2:0.56883 3:0.432115 4:0.56883 5:-0.432115 6:-0.56883 7:-0.48175 8:1 9:-0.556104 10:1 11:-0.196911 12:0.568382 13:-0.240856 14:-0.277788 15:-0.476479 16:-0.971105 17:-0.28234 18:-0.966243 19:-0.281655 20:-0.944121 21:0.495331 22:-0.249043 
+-1 1:-0.758062 2:0.334584 3:-0.758062 4:0.334584 5:0.758062 6:-0.334584 7:0.573424 8:0.991925 9:-0.60947 10:0.928439 11:-0.283941 12:0.968487 13:-0.421123 14:0.986012 15:-0.431774 16:-0.471938 17:0.497059 18:0.0125694 19:0.239731 20:-0.059729 21:0.548216 22:0.0804318 
+-1 1:0.599813 2:0.617948 3:0.599813 4:0.617948 5:-0.599813 6:-0.617948 7:-0.186072 8:-0.499586 9:-0.619897 10:-0.186008 11:-0.28841 12:0.100056 13:-0.412439 14:1 15:-0.390079 16:0.00793922 17:0.510547 18:0.111645 19:0.248091 20:-1 21:0.521258 22:-0.267001 
+-1 1:-0.832127 2:0.498842 3:-0.832127 4:0.498842 5:0.832127 6:-0.498842 7:0.704011 8:0.97619 9:-0.595345 10:0.979067 11:-0.271839 12:0.869747 13:-0.427941 14:0.956343 15:-0.439005 16:0.76817 17:0.682224 18:0.885431 19:0.454549 20:0.453709 21:0.554543 22:-0.314007 
+-0.3058345899969018 1:0.454476 2:0.948652 3:0.174883 4:-0.0397946 5:-0.454475 6:-0.948652 7:-0.442325 8:0.904787 9:-0.558912 10:-0.783207 11:-0.323264 12:-0.10814 13:-0.454124 14:-0.714833 15:-0.788736 16:-0.862256 17:0.440911 18:-0.830246 19:0.132482 20:-0.749265 21:0.50943 22:-0.788272 
+-1 1:-0.792715 2:0.307067 3:-0.798203 4:0.261205 5:0.792714 6:-0.307067 7:0.634521 8:0.977711 9:-0.608349 10:0.83682 11:-0.282645 12:0.932819 13:-0.419286 14:0.976412 15:-0.434809 16:0.0773 17:0.509631 18:-0.859197 19:0.22891 20:0.00155986 21:0.548643 22:-0.502924 
+-1 1:0.795872 2:0.350347 3:0.795872 4:0.350347 5:-0.795872 6:-0.350347 7:0.159614 8:0.975269 9:-0.59939 10:0.986503 11:-0.274122 12:0.993335 13:-0.421467 14:0.901366 15:-0.438505 16:-0.905651 17:0.456635 18:-0.911964 19:0.191985 20:-0.938038 21:0.528195 22:-0.225585 
+-0.8561146207073825 1:0.396423 2:0.637374 3:0.396423 4:0.637374 5:-0.396422 6:-0.637374 7:-0.544682 8:0.90862 9:-0.606329 10:0.997799 11:0.0645829 12:0.460201 13:-0.424133 14:0.360053 15:-0.440772 16:-0.983808 17:-0.647309 18:-0.982008 19:-0.437347 20:-0.986235 21:0.486437 22:-0.544801 
+-1 1:0.380801 2:0.389033 3:0.380801 4:0.389033 5:-0.380801 6:-0.389033 7:-0.572226 8:0.940192 9:-0.595803 10:1 11:-0.242985 12:0.980484 13:-0.123342 14:1 15:-0.385484 16:-0.743772 17:0.45919 18:-0.84692 19:0.215335 20:-0.875739 21:0.489558 22:0.156744 
+-1 1:-0.767483 2:0.0622756 3:-0.767483 4:0.0622756 5:0.767483 6:-0.0622757 7:0.590033 8:1 9:-0.609413 10:1 11:-0.27798 12:1 13:-0.41259 14:1 15:-0.430814 16:0.106161 17:0.516786 18:-0.272557 19:0.230707 20:0.704961 21:0.564232 22:0.356582 
+-0.730563744163681 1:-0.668374 2:-0.26158 3:-0.668374 4:-0.26158 5:0.668374 6:0.26158 7:0.415288 8:-1 9:-0.614618 10:-1 11:-0.28595 12:1 13:-0.419665 14:1 15:-0.433852 16:-0.137892 17:0.489976 18:1 19:0.453025 20:-0.299368 21:0.546516 22:0.470314 
+-1 1:-0.297354 2:-0.0707181 3:-0.297354 4:-0.0707181 5:0.297355 6:0.070718 7:-0.238881 8:1 9:-0.598231 10:1 11:-0.27587 12:1 13:-0.409261 14:1 15:-0.420867 16:-0.78332 17:0.454446 18:-0.354751 19:0.218161 20:0.0967076 21:0.549721 22:0.345704 
+-1 1:-0.824537 2:0.492614 3:-0.824537 4:0.492614 5:0.824537 6:-0.492614 7:0.690629 8:0.73043 9:-0.612161 10:0.670696 11:-0.285496 12:0.965013 13:-0.417322 14:0.920844 15:-0.438107 16:-0.965634 17:0.477821 18:0.826109 19:0.3156 20:0.108412 21:0.549347 22:-0.564079 
+-1 1:0.322354 2:0.47561 3:0.320656 4:0.445799 5:-0.322354 6:-0.47561 7:-0.675277 8:0.953242 9:-0.50744 10:0.10698 11:-0.283228 12:0.851789 13:-0.343993 14:0.817963 15:-0.398817 16:-0.963488 17:-0.435988 18:-0.92677 19:-0.295056 20:-0.781032 21:0.44056 22:-0.399254 
+-1 1:-0.820881 2:-0.059829 3:-0.820881 4:-0.059829 5:0.820881 6:0.0598291 7:0.684183 8:0.979115 9:-0.604718 10:1 11:-0.283004 12:0.980862 13:-0.417066 14:1 15:-0.443738 16:-0.705948 17:0.500993 18:0.922388 19:0.423673 20:-0.805536 21:0.544161 22:-0.304094 
+-1 1:0.700548 2:0.732181 3:0.669869 4:0.146105 5:-0.700548 6:-0.732181 7:-0.00845817 8:0.316352 9:-0.596465 10:0.490465 11:-0.257142 12:0.556675 13:-0.397558 14:0.236811 15:-0.439143 16:-0.832081 17:0.385703 18:-0.194489 19:0.227845 20:-0.949427 21:0.394388 22:-0.414531 
+-1 1:0.541867 2:0.561704 3:0.541867 4:0.561704 5:-0.541867 6:-0.561704 7:-0.28824 8:0.911582 9:-0.441525 10:0.79721 11:-0.203541 12:0.486591 13:-0.161126 14:0.409746 15:-0.0484364 16:-0.875679 17:-0.148297 18:-0.936686 19:-0.484127 20:-0.917694 21:0.5258 22:-0.551231 
+-1 1:-0.656029 2:0.272186 3:-0.656029 4:0.272186 5:0.656029 6:-0.272186 7:0.393521 8:0.92188 9:-0.578817 10:0.880935 11:-0.265524 12:0.969755 13:-0.320846 14:0.993371 15:-0.285081 16:0.179222 17:0.535012 18:-0.134057 19:0.225315 20:0.557611 21:0.577787 22:0.135846 
+-1 1:0.157111 2:0.543121 3:0.157111 4:0.543121 5:-0.157111 6:-0.543121 7:-0.966629 8:-0.0528439 9:-0.619864 10:0.094418 11:-0.276867 12:0.342407 13:-0.350232 14:-0.346832 15:-0.476239 16:-0.937902 17:-0.314103 18:-0.824794 19:-0.0263368 20:-0.975459 21:0.0839253 22:-0.534444 
+-1 1:-0.67354 2:0.55248 3:-0.67354 4:0.55248 5:0.673541 6:-0.55248 7:0.424397 8:0.827255 9:-0.606568 10:0.962555 11:-0.277078 12:0.738011 13:-0.418606 14:0.634877 15:-0.438127 16:0.957779 17:0.80045 18:-0.662054 19:0.220401 20:0.941966 21:0.828716 22:-0.317922 
+-1 1:0.67119 2:0.743902 3:0.634946 4:0.231707 5:-0.67119 6:-0.743902 7:-0.0602221 8:0.154936 9:-0.604384 10:0.286247 11:-0.277055 12:0.661604 13:-0.364537 14:0.789637 15:-0.41548 16:-0.935762 17:0.418935 18:-0.701344 19:0.168631 20:-0.984544 21:0.436366 22:-0.412 
+-1 1:-0.920398 2:-0.000478218 3:-0.920398 4:-0.000478218 5:0.920397 6:0.000478299 7:0.859647 8:1 9:-0.614377 10:0.97164 11:-0.285754 12:0.998428 13:-0.422282 14:1 15:-0.440975 16:-0.990831 17:0.494214 18:-0.794761 19:0.233376 20:0.995874 21:0.564844 22:-0.529095 
+-1 1:-0.874856 2:0.285569 3:-0.874856 4:0.285569 5:0.874856 6:-0.285569 7:0.77935 8:0.996218 9:-0.579749 10:0.988024 11:-0.275913 12:0.999882 13:-0.413516 14:1 15:-0.432648 16:-0.306094 17:0.500914 18:0.930494 19:0.284092 20:-1 21:0.548445 22:-0.362289 
+-1 1:-0.808501 2:0.369411 3:-0.808501 4:0.369411 5:0.8085 6:-0.369411 7:0.662354 8:1 9:-0.611906 10:0.930421 11:-0.284141 12:0.972686 13:-0.427859 14:0.984724 15:-0.442367 16:-0.0655697 17:0.509323 18:0.479945 19:0.244883 20:0.580084 21:0.554692 22:0.0803294 
+-1 1:0.150323 2:0.35984 3:0.133919 4:0.286335 5:-0.150322 6:-0.35984 7:-0.978598 8:0.989614 9:-0.112132 10:0.375646 11:-0.243991 12:0.903139 13:-0.215751 14:0.790831 15:-0.266959 16:-0.939576 17:-0.0371604 18:-0.932835 19:-0.0659647 20:-0.591267 21:0.491697 22:-0.339613 
diff --git a/CCSVM2.range b/CCSVM2.range
new file mode 100644
index 0000000..35a3bea
--- /dev/null
+++ b/CCSVM2.range
@@ -0,0 +1,24 @@
+x
+-1 1
+1 0 0.88008529
+2 0 0.7454545500000001
+3 0 0.88008529
+4 0 0.7454545500000001
+5 0.1199147 1
+6 0.25454545 1
+7 0.00169894 1
+8 0 1
+9 -0.17033307 0.71363628
+10 0 1
+11 -0.48724923 0.87749803
+12 0 1
+13 -0.32095683 0.80732721
+14 0 1
+15 -0.25240511 0.65639925
+16 0 1
+17 -0.37407586 0.12153328
+18 0 1
+19 -0.1868066 0.11459596
+20 0 1
+21 -0.71157825 0.20744637
+22 0.84306002 3.57725191
diff --git a/CCSVM2Phos.model b/CCSVM2Phos.model
new file mode 100644
index 0000000..7e3ad09
--- /dev/null
+++ b/CCSVM2Phos.model
@@ -0,0 +1,320 @@
+svm_type c_svc
+kernel_type rbf
+gamma 0.0434783
+nr_class 2
+total_sv 311
+rho 0.920224
+label 1 -1
+nr_sv 154 157
+SV
+1 1:-0.306225 2:0.532802 3:-0.306097 4:0.532802 5:0.306225 6:-0.532802 7:-0.272234 8:0.883795 9:-0.796615 10:-0.55418 11:-0.403206 12:0.986492 13:-0.00369742 14:0.910231 15:-0.11334 16:-0.150704 17:0.289522 18:-1 19:0.190512 20:-0.99268 21:-0.177757 22:-0.417847 23:-0.0632531 
+1 1:-0.0349255 2:0.687495 3:-0.034747 4:0.687495 5:0.0349255 6:-0.687495 7:-0.769739 8:0.999893 9:-0.506393 10:0.998949 11:-0.184983 12:0.988026 13:-0.188819 14:0.988558 15:0.0156268 16:-0.995519 17:-0.171474 18:-0.992497 19:-0.227256 20:-0.936611 21:0.0848795 22:-0.108089 23:-0.0632531 
+1 1:0.162597 2:0.710048 3:0.162812 4:0.710048 5:-0.162597 6:-0.710048 7:-0.868046 8:1 9:-0.779149 10:0.809524 11:-0.393677 12:0.844498 13:-0.24917 14:0.924883 15:-0.201451 16:-0.943969 17:0.191648 18:-0.882353 19:0.170514 20:-0.792148 21:0.194007 22:-0.860499 23:-0.0632531 
+1 1:-0.148112 2:0.781048 3:-0.147954 4:0.781048 5:0.148112 6:-0.781048 7:-0.56218 8:0.925492 9:-0.803581 10:0.859259 11:-0.365957 12:0.722106 13:-0.370044 14:0.500133 15:-0.199406 16:-0.912046 17:0.183318 18:-0.897784 19:0.160519 20:-0.134615 21:0.216893 22:-0.37673 23:0.294318 
+1 1:-0.516125 2:0.71301 3:-0.516036 4:0.71301 5:0.516125 6:-0.71301 7:0.112677 8:0.569825 9:-0.822344 10:-0.279817 11:-0.403771 12:0.878779 13:-0.406922 14:0.996107 15:-0.202869 16:-0.989414 17:0.287979 18:-0.884527 19:0.191749 20:-0.988495 21:0.208402 22:0.0415306 23:-0.0269604 
+1 1:0.26407 2:0.718679 3:0.264304 4:0.718679 5:-0.26407 6:-0.718679 7:-0.681967 8:0.922756 9:-0.817504 10:0.986233 11:-0.400206 12:-0.168108 13:-0.572699 14:0.305759 15:-0.12954 16:-0.130582 17:0.240107 18:-0.606051 19:0.185066 20:-1 21:0.217473 22:-0.74586 23:-0.0632531 
+1 1:0.247391 2:0.820807 3:0.247622 4:0.820807 5:-0.247391 6:-0.820807 7:-0.712553 8:0.870915 9:-0.762923 10:0.369565 11:-0.399138 12:0.973918 13:-0.222038 14:-0.298146 15:-0.244729 16:-1 17:0.256591 18:0.390625 19:0.198507 20:0.915294 21:0.230678 22:-0.761314 23:-0.0632531 
+1 1:0.21859 2:0.814726 3:0.218208 4:0.790403 5:-0.21859 6:-0.814726 7:-0.765367 8:0.989133 9:-0.638863 10:0.97679 11:-0.326505 12:0.904967 13:-0.00390097 14:-0.920201 15:-0.248326 16:-0.970755 17:0.283935 18:-0.820225 19:0.190674 20:0.875278 21:0.218898 22:-0.604254 23:-0.0632531 
+1 1:0.266839 2:0.707854 3:0.267073 4:0.707854 5:-0.266839 6:-0.707854 7:-0.676889 8:0.340359 9:-0.800598 10:-0.116088 11:-0.425256 12:0.98492 13:-0.0988582 14:1 15:0.19386 16:-0.74859 17:0.271083 18:-0.409987 19:0.190266 20:-0.941317 21:0.185219 22:-0.0567437 23:-0.0632531 
+1 1:-0.0863616 2:0.746904 3:-0.086408 4:0.720273 5:0.0863615 6:-0.746904 7:-0.675416 8:0.982624 9:-0.622616 10:0.998664 11:-0.0607351 12:0.999703 13:-0.151633 14:0.998817 15:-0.1833 16:-0.98361 17:0.135573 18:-0.805345 19:0.16883 20:-0.474114 21:0.210039 22:-0.648321 23:-0.0632531 
+1 1:0.658459 2:0.77898 3:0.657894 4:0.718507 5:-0.658459 6:-0.77898 7:0.0412571 8:-1 9:-0.834675 10:0.69236 11:-0.394934 12:0.996904 13:-0.406991 14:1 15:-0.20548 16:-1 17:0.254501 18:-1 19:0.191028 20:-0.855359 21:0.210253 22:-0.318576 23:-0.0632531 
+1 1:-0.240495 2:0.854875 3:-0.240707 4:0.810917 5:0.240495 6:-0.854875 7:-0.392769 8:-0.993031 9:-0.824338 10:-0.914949 11:-0.402612 12:0.984478 13:-0.39279 14:0.977262 15:-0.205614 16:-0.997399 17:0.286937 18:-1 19:0.190992 20:-0.928168 21:0.214553 22:-0.440547 23:-0.907866 
+1 1:0.188961 2:0.728386 3:0.189181 4:0.728386 5:-0.188961 6:-0.728386 7:-0.819701 8:0.985612 9:-0.519506 10:0.958438 11:0.0323777 12:0.975721 13:0.230589 14:0.934846 15:0.237687 16:-0.892632 17:0.253123 18:-0.924783 19:0.179293 20:-1 21:-0.104714 22:-0.438406 23:-0.0632531 
+1 1:-0.216965 2:0.660397 3:-0.21682 4:0.660397 5:0.216964 6:-0.660397 7:-0.435919 8:-0.0357246 9:-0.824068 10:-0.511265 11:-0.406554 12:0.503939 13:-0.375255 14:0.147809 15:-0.204928 16:-0.126638 17:0.290443 18:-0.841281 19:0.187779 20:1 21:0.217734 22:-0.603584 23:0.35836 
+1 1:0.126543 2:0.7712 3:0.126751 4:0.7712 5:-0.126543 6:-0.7712 7:-0.934163 8:0.470885 9:-0.809999 10:0.386412 11:-0.395955 12:0.98846 13:-0.317418 14:0.971743 15:-0.208852 16:-1 17:0.258772 18:-1 19:0.191955 20:0.144852 21:0.218132 22:-0.718059 23:-0.0632531 
+1 1:0.482737 2:0.702424 3:0.482927 4:0.663609 5:-0.482737 6:-0.702424 7:-0.28098 8:-0.0253395 9:-0.824065 10:-0.96895 11:-0.428305 12:0.999925 13:-0.0203751 14:0.995765 15:-0.115595 16:-0.288267 17:0.290332 18:-0.00046284 19:0.192487 20:0.931399 21:0.219189 22:-0.87511 23:-0.0632531 
+1 1:-0.120308 2:0.702424 3:-0.120145 4:0.702424 5:0.120308 6:-0.702424 7:-0.613166 8:0.996779 9:0.865883 10:0.955841 11:-0.376045 12:-0.824248 13:-0.432964 14:-0.20134 15:-0.211618 16:-1 17:0.290468 18:-1 19:0.192433 20:-0.985736 21:0.211291 22:-0.865218 23:-0.0632531 
+1 1:0.336888 2:0.559248 3:0.337135 4:0.559248 5:-0.336888 6:-0.559248 7:-0.548435 8:0.577939 9:-0.447842 10:-0.910517 11:-0.536692 12:0.575363 13:0.487363 14:0.722909 15:-0.204055 16:-0.711574 17:0.288609 18:-0.0335134 19:0.192136 20:-0.946893 21:0.212278 22:-0.252141 23:-0.0632531 
+1 1:-0.219208 2:0.762337 3:-0.219063 4:0.762337 5:0.219208 6:-0.762337 7:-0.431805 8:0.969195 9:-0.78788 10:0.985164 11:-0.344994 12:0.985881 13:-0.329152 14:0.931624 15:-0.161195 16:-0.988262 17:0.213492 18:-0.688027 19:0.186252 20:-0.977571 21:0.149049 22:-0.0547176 23:-0.0632531 
+1 1:0.489092 2:0.684477 3:0.489367 4:0.684477 5:-0.489092 6:-0.684477 7:-0.269326 8:-0.724159 9:-0.835749 10:0.439437 11:-0.392025 12:0.995355 13:0.153908 14:0.99791 15:0.145176 16:-0.995236 17:0.273034 18:-0.99882 19:0.165674 20:0.899849 21:0.22736 22:-0.849311 23:-0.0632531 
+1 1:-0.0501292 2:0.709536 3:-0.0499537 4:0.709536 5:0.0501292 6:-0.709536 7:-0.741859 8:1 9:-0.618351 10:0.999221 11:-0.242838 12:0.998154 13:-0.314072 14:0.992543 15:-0.158431 16:-0.869337 17:0.287157 18:-1 19:0.190409 20:0.215341 21:0.21862 22:-0.212347 23:-0.0632531 
+1 1:0.508016 2:0.596471 3:0.508294 4:0.596471 5:-0.508016 6:-0.596471 7:-0.234624 8:0.42156 9:-0.815146 10:-1 11:-0.402245 12:0.907751 13:0.0251185 14:-0.892501 15:-0.209672 16:-0.351823 17:0.290131 18:0.94012 19:0.19969 20:1 21:0.218011 22:-0.989382 23:-0.0632531 
+0.4341541218136421 1:-0.186337 2:0.775201 3:-0.186531 4:0.757659 5:0.186337 6:-0.775201 7:-0.492083 8:0.856329 9:-0.818178 10:0.485855 11:-0.399262 12:0.977313 13:-0.340842 14:0.870646 15:-0.171987 16:-0.600967 17:0.287615 18:-1 19:0.189426 20:-0.632363 21:0.214209 22:-0.295078 23:0.40084 
+1 1:-0.428794 2:0.675359 3:-0.428689 4:0.675359 5:0.428794 6:-0.675359 7:-0.0474687 8:-0.855238 9:-0.825026 10:0.564168 11:-0.394448 12:-0.340595 13:-0.418474 14:0.112532 15:-0.204492 16:-0.948902 17:0.275856 18:-0.831832 19:0.186012 20:1 21:0.218246 22:-0.418392 23:0.363924 
+0.8345004290608965 1:-0.219606 2:0.654477 3:-0.219462 4:0.654477 5:0.219606 6:-0.654477 7:-0.431075 8:1 9:-0.802097 10:0.961755 11:-0.400785 12:1 13:-0.382889 14:0.0393486 15:-0.209308 16:-1 17:0.284435 18:-0.284909 19:0.192418 20:1 21:0.287478 22:-0.778663 23:-0.0632531 
+1 1:-0.217722 2:0.739195 3:-0.217577 4:0.739195 5:0.217722 6:-0.739195 7:-0.43453 8:0.998529 9:-0.702731 10:0.999669 11:-0.17032 12:0.99722 13:0.265464 14:0.998297 15:0.121132 16:-0.986812 17:-0.3273 18:-0.998055 19:-0.108091 20:-0.862809 21:0.209738 22:-0.545599 23:-0.0632531 
+1 1:0.541535 2:0.633127 3:0.54182 4:0.633127 5:-0.541535 6:-0.633127 7:-0.173157 8:1 9:-0.559817 10:0.999773 11:-0.298788 12:0.99998 13:-0.236374 14:0.999971 15:-0.0653444 16:-0.759404 17:0.289709 18:0.410828 19:0.192591 20:0.910987 21:0.218508 22:-0.207797 23:-0.0632531 
+1 1:0.119575 2:0.703533 3:0.119782 4:0.703533 5:-0.119575 6:-0.703533 7:-0.94694 8:1 9:-0.792408 10:0.259849 11:-0.206731 12:0.978056 13:0.0987599 14:0.996327 15:0.0999099 16:-0.97546 17:0.281481 18:-0.743737 19:0.170398 20:-0.18297 21:0.20432 22:-0.62244 23:-0.0632531 
+1 1:0.144758 2:0.873902 3:0.14497 4:0.873902 5:-0.144758 6:-0.873902 7:-0.90076 8:0.969201 9:-0.758642 10:-0.996746 11:-0.49423 12:0.998751 13:0.0300521 14:0.785322 15:-0.201455 16:-1 17:0.290428 18:-1 19:0.192488 20:-0.771229 21:0.194938 22:-0.456922 23:-0.0632531 
+1 1:-0.386043 2:0.614732 3:-0.385929 4:0.614732 5:0.386043 6:-0.614732 7:-0.125866 8:1 9:-0.805941 10:-1 11:-0.404572 12:0.889398 13:-0.342019 14:-1 15:-0.21345 16:-0.871646 17:0.239677 18:-0.941601 19:0.157741 20:-0.884363 21:0.211568 22:-0.652159 23:-0.617591 
+1 1:0.0557555 2:0.637733 3:0.0559506 4:0.637733 5:-0.0557554 6:-0.637733 7:-0.936028 8:-0.0164683 9:-0.82549 10:0.144006 11:-0.390894 12:0.982952 13:-0.280561 14:0.392394 15:-0.199116 16:-0.574871 17:0.272508 18:-0.791519 19:0.174658 20:-1 21:0.200462 22:-0.556245 23:-0.0632531 
+1 1:0.240387 2:0.557835 3:0.240616 4:0.557835 5:-0.240387 6:-0.557835 7:-0.725397 8:0.980389 9:-0.734141 10:1 11:-0.213811 12:0.995571 13:-0.234204 14:0.993438 15:-0.0535768 16:-0.726678 17:0.272805 18:-0.877184 19:0.189955 20:-0.0170749 21:0.217419 22:-0.0876851 23:-0.0632531 
+1 1:0.00349736 2:0.826392 3:0.00101542 4:0.807777 5:-0.0034973 6:-0.826392 7:-0.840198 8:0.113419 9:-0.821843 10:-0.720149 11:-0.409123 12:-0.59719 13:-0.43442 14:-0.464864 15:-0.214859 16:-0.97627 17:-0.229407 18:-1 19:0.0784554 20:-0.849469 21:0.178391 22:-0.151814 23:-0.0632531 
+1 1:-0.171103 2:0.572861 3:-0.17095 4:0.572861 5:0.171103 6:-0.572861 7:-0.520019 8:0.109346 9:-0.819378 10:0.260822 11:-0.381463 12:0.315848 13:-0.382266 14:0.194383 15:-0.195365 16:-0.949534 17:0.263764 18:-0.838685 19:0.167333 20:-0.084619 21:0.215628 22:-0.530801 23:0.339372 
+1 1:-0.523278 2:0.658209 3:-0.52319 4:0.658209 5:0.523278 6:-0.658209 7:0.125795 8:0.966822 9:-0.823164 10:-1 11:-0.402194 12:0.98707 13:-0.398026 14:1 15:-0.208576 16:-0.998123 17:0.287543 18:-1 19:0.184289 20:1 21:0.218344 22:-0.745667 23:-0.0632531 
+1 1:-0.28954 2:0.777822 3:-0.289408 4:0.777822 5:0.28954 6:-0.777822 7:-0.302832 8:0.996123 9:-0.743915 10:0.690819 11:-0.389177 12:0.910347 13:-0.388554 14:0.793317 15:-0.184427 16:-0.982833 17:0.0106509 18:-1 19:0.167176 20:-0.393081 21:0.162874 22:-0.148599 23:-0.0207082 
+1 1:0.418861 2:0.744699 3:0.418827 4:0.72146 5:-0.418861 6:-0.744699 7:-0.398115 8:-0.276899 9:-0.832891 10:-0.462298 11:-0.413044 12:0.170994 13:-0.359996 14:-0.0541652 15:-0.227293 16:-0.984623 17:0.28776 18:-0.990695 19:0.188786 20:-0.323687 21:0.215724 22:-0.535869 23:-0.0632531 
+1 1:0.107347 2:0.785115 3:0.106387 4:0.758676 5:-0.107348 6:-0.785115 7:-0.969363 8:-0.763671 9:-0.830769 10:1 11:-0.401404 12:0.745063 13:-0.259053 14:0.808729 15:-0.182904 16:-0.639518 17:0.284613 18:1 19:0.194282 20:-0.847581 21:0.217239 22:-0.398374 23:0.0552768 
+1 1:0.0671629 2:0.789327 3:0.0673602 4:0.789327 5:-0.0671628 6:-0.789327 7:-0.956947 8:-0.271343 9:-0.846312 10:-0.657465 11:-0.464495 12:-0.214314 13:-0.689886 14:-0.517092 15:-1 16:-0.493849 17:0.290292 18:-0.195974 19:0.192513 20:0.945581 21:0.228781 22:-0.817859 23:-0.0632531 
+1 1:0.15233 2:0.634037 3:0.152543 4:0.634037 5:-0.15233 6:-0.634037 7:-0.886875 8:0.985611 9:-0.671378 10:0.970543 11:-0.198614 12:0.903288 13:-0.239296 14:0.587315 15:-0.16984 16:-0.543036 17:0.27746 18:0.19393 19:0.203289 20:-0.301432 21:0.216377 22:-0.270404 23:-0.0632531 
+1 1:-0.0453449 2:0.731436 3:-0.0451684 4:0.731436 5:0.0453447 6:-0.731437 7:-0.750632 8:-0.107448 9:-0.825215 10:0.70577 11:-0.334646 12:0.889888 13:-0.354492 14:0.847748 15:-0.173777 16:-0.787967 17:0.27012 18:-0.414971 19:0.187407 20:-0.773894 21:0.187805 22:0.0158976 23:-0.0632531 
+1 1:0.0601876 2:0.632558 3:0.0603836 4:0.632558 5:-0.0601876 6:-0.632558 7:-0.944156 8:0.383126 9:-0.80594 10:0.0295566 11:-0.400374 12:0.925 13:-0.218222 14:0.946588 15:-0.145421 16:-0.986207 17:0.225916 18:-0.930769 19:0.143359 20:-0.419476 21:0.191983 22:0.0253624 23:-0.0632531 
+1 1:-0.161997 2:0.797194 3:-0.161842 4:0.797194 5:0.161997 6:-0.797194 7:-0.536717 8:0.942067 9:-0.785248 10:0.989778 11:-0.374849 12:0.972677 13:-0.346326 14:0.974429 15:-0.153014 16:-0.717805 17:0.262764 18:-1 19:0.192361 20:-0.746421 21:0.212703 22:-0.0957457 23:0.403285 
+1 1:-0.100918 2:0.606783 3:-0.100752 4:0.606783 5:0.100918 6:-0.606783 7:-0.648723 8:0.972445 9:-0.759378 10:0.838443 11:-0.375796 12:1 13:-0.207455 14:0.950135 15:-0.114143 16:-0.556339 17:0.246156 18:-1 19:0.181752 20:-0.719222 21:0.1549 22:0.0582286 23:-0.0632531 
+1 1:0.178708 2:0.749864 3:0.178926 4:0.749864 5:-0.178708 6:-0.749864 7:-0.838503 8:0.516482 9:-0.78854 10:0.883464 11:-0.330934 12:0.818683 13:-0.152922 14:0.731571 15:-0.151949 16:-1 17:0.17631 18:-0.640448 19:0.181283 20:0.437181 21:0.229119 22:-0.294616 23:-0.0632531 
+1 1:0.10186 2:0.605388 3:0.102064 4:0.605388 5:-0.10186 6:-0.605388 7:-0.979425 8:0.99038 9:-0.783032 10:0.65055 11:-0.373562 12:0.996776 13:-0.191403 14:0.975602 15:-0.179632 16:-0.74613 17:0.284487 18:-0.463557 19:0.187998 20:0.712418 21:0.2279 22:-0.304853 23:-0.0632531 
+1 1:0.686141 2:0.693947 3:0.686453 4:0.693947 5:-0.686141 6:-0.693947 7:0.0920201 8:0.990604 9:-0.821792 10:0.976436 11:-0.400575 12:0.982894 13:0.0729861 14:0.0770402 15:-0.207491 16:-0.923776 17:0.290511 18:-0.623337 19:0.192383 20:-0.986807 21:0.217521 22:-0.935983 23:-0.0632531 
+1 1:0.0635538 2:0.575232 3:0.0637503 4:0.575232 5:-0.0635539 6:-0.575232 7:-0.950329 8:0.957018 9:-0.802388 10:0.918807 11:-0.391858 12:0.999203 13:-0.255428 14:1 15:-0.113911 16:-0.0239476 17:0.289595 18:-0.57802 19:0.17813 20:-0.880769 21:0.205445 22:0.117657 23:-0.0632531 
+1 1:0.207202 2:0.722577 3:0.207425 4:0.722577 5:-0.207202 6:-0.722577 7:-0.786252 8:0.615963 9:-0.744839 10:0.302986 11:-0.363134 12:0.431554 13:-0.142151 14:0.179229 15:-0.160378 16:-0.727251 17:0.0231157 18:-0.869577 19:0.0309888 20:-0.91894 21:0.203818 22:-0.77315 23:-0.0632531 
+1 1:0.116612 2:0.751932 3:0.115789 4:0.733317 5:-0.116612 6:-0.751932 7:-0.952374 8:0.874878 9:-0.613366 10:0.717092 11:-0.284599 12:0.998957 13:-0.161959 14:0.917417 15:-0.135463 16:-0.915587 17:0.13432 18:-0.819163 19:0.00232518 20:-0.258316 21:0.206694 22:-0.232752 23:-0.0632531 
+1 1:-0.387804 2:0.695552 3:-0.38769 4:0.695552 5:0.387804 6:-0.695552 7:-0.122637 8:0.973853 9:-0.811555 10:0.969349 11:-0.381948 12:0.987446 13:-0.311156 14:0.986849 15:-0.13086 16:-0.936836 17:0.257818 18:-0.965243 19:0.158503 20:-1 21:0.213802 22:-0.23223 23:-0.0632531 
+1 1:0.532303 2:0.729594 3:0.532586 4:0.729594 5:-0.532303 6:-0.729594 7:-0.190086 8:1 9:-0.783825 10:1 11:-0.393876 12:1 13:-0.172228 14:0.99315 15:-0.0120141 16:-0.401428 17:0.27125 18:-0.782135 19:0.191253 20:-0.340801 21:0.217167 22:-0.690986 23:-0.0632531 
+1 1:0.245824 2:0.706206 3:0.246054 4:0.706206 5:-0.245824 6:-0.706206 7:-0.715427 8:0.993616 9:-0.328227 10:0.986997 11:0.216223 12:0.991176 13:0.0550569 14:0.971326 15:0.0431528 16:-0.912875 17:0.243873 18:-0.944168 19:0.161733 20:-0.579504 21:0.20411 22:-0.107436 23:-0.0632531 
+1 1:-0.00832371 2:0.672979 3:-0.00814046 4:0.672979 5:0.0083239 6:-0.672979 7:-0.818521 8:0.297334 9:-0.811032 10:0.298776 11:-0.388401 12:1 13:-0.273576 14:0.990446 15:-0.0677674 16:-0.53247 17:0.283952 18:-1 19:0.184584 20:1 21:0.223036 22:-0.688598 23:-0.0632531 
+1 1:0.132734 2:0.547167 3:0.132944 4:0.547167 5:-0.132734 6:-0.547167 7:-0.922809 8:0.947276 9:-0.812591 10:0.768566 11:-0.25118 12:0.0494058 13:-0.393631 14:-0.222756 15:-0.282622 16:-0.702472 17:0.194362 18:-0.984063 19:0.0937532 20:-0.85342 21:0.214302 22:-0.563088 23:-0.0632531 
+1 1:0.0314578 2:0.845684 3:0.0316484 4:0.845684 5:-0.0314578 6:-0.845684 7:-0.891472 8:0.965987 9:-0.806981 10:-0.876213 11:-0.495916 12:0.977565 13:-0.313275 14:1 15:-0.206214 16:-1 17:0.289785 18:-1 19:0.192404 20:0.977372 21:0.224864 22:-0.725741 23:-0.0632531 
+1 1:0.455671 2:0.731008 3:0.45594 4:0.731008 5:-0.455671 6:-0.731008 7:-0.330613 8:0.359655 9:-0.821171 10:0.103563 11:-0.400356 12:0.519318 13:-0.268662 14:-0.12904 15:-0.253595 16:0.388154 17:0.303146 18:0.198657 19:0.209778 20:0.842271 21:0.222274 22:-0.641625 23:-0.0632531 
+1 1:0.140156 2:0.687495 3:0.140367 4:0.687495 5:-0.140156 6:-0.687495 7:-0.909199 8:0.997556 9:-0.652371 10:0.503889 11:0.0201712 12:0.635657 13:0.0892422 14:0.470433 15:0.0133549 16:-0.950448 17:0.172844 18:-0.846154 19:0.156472 20:-0.757293 21:0.188762 22:-0.457396 23:-0.0632531 
+1 1:0.0734864 2:0.66634 3:0.0736848 4:0.66634 5:-0.0734864 6:-0.66634 7:-0.968543 8:0.953281 9:-0.54209 10:0.947652 11:0.0939237 12:0.899415 13:0.211877 14:0.815257 15:0.0789465 16:-0.989869 17:0.201174 18:-0.892012 19:0.03788 20:-0.126484 21:0.21601 22:0.0384862 23:-0.0632531 
+0.6440765177238077 1:-0.206812 2:0.711586 3:-0.206666 4:0.711586 5:0.206812 6:-0.711586 7:-0.454536 8:1 9:-0.781289 10:1 11:-0.38774 12:0.833333 13:-0.359038 14:0.977579 15:-0.185706 16:0.116635 17:0.292787 18:-0.128571 19:0.18952 20:-0.927978 21:0.202014 22:-0.00174955 23:-0.0632531 
+0.566826615180688 1:-0.16809 2:0.720273 3:-0.167937 4:0.720273 5:0.16809 6:-0.720273 7:-0.525543 8:0.998939 9:-0.654107 10:0.999699 11:-0.300221 12:-0.0231844 13:-0.416526 14:-0.358835 15:-0.24606 16:-0.261239 17:0.268799 18:-0.793803 19:0.14198 20:0.766706 21:0.222821 22:-0.536773 23:0.180826 
+1 1:-0.129706 2:0.698046 3:-0.129545 4:0.698046 5:0.129706 6:-0.698046 7:-0.595932 8:0.513905 9:-0.817739 10:-0.726664 11:-0.42855 12:0.695017 13:-0.343012 14:0.0655372 15:-0.207771 16:-0.60691 17:0.266154 18:-0.93658 19:0.139886 20:-0.89167 21:0.204805 22:-0.52815 23:0.0720403 
+1 1:-0.0729949 2:0.577403 3:-0.0728235 4:0.577403 5:0.0729949 6:-0.577403 7:-0.699928 8:0.699864 9:-0.776629 10:0.937796 11:-0.352453 12:0.883473 13:-0.329476 14:0.663417 15:-0.184604 16:-0.861255 17:0.265568 18:-0.863081 19:0.175518 20:-0.693931 21:0.210424 22:-0.412549 23:-0.0632531 
+1 1:-0.378084 2:0.483157 3:-0.377969 4:0.483157 5:0.378084 6:-0.483157 7:-0.14046 8:0.855937 9:-0.819031 10:-1 11:-0.402191 12:0.787256 13:-0.387222 14:1 15:-0.207637 16:-0.827411 17:0.276106 18:-1 19:0.162785 20:0.710157 21:0.226291 22:-0.278221 23:0.23432 
+1 1:0.101276 2:0.687495 3:0.10148 4:0.687495 5:-0.101276 6:-0.687495 7:-0.980496 8:0.994648 9:-0.512788 10:0.982353 11:-0.330881 12:0.999286 13:0.0130847 14:0.998448 15:-0.0320905 16:-0.989002 17:0.0672119 18:-0.984202 19:0.0431935 20:-0.637362 21:0.215716 22:-0.508034 23:-0.0632531 
+1 1:-0.237115 2:0.890075 3:-0.237079 4:0.862642 5:0.237115 6:-0.890075 7:-0.398967 8:0.998298 9:0.835756 10:-1 11:-0.402212 12:-0.293894 13:-0.415261 14:0.983201 15:-0.207746 16:-1 17:0.289603 18:-1 19:0.190196 20:1 21:0.218281 22:-0.674865 23:-0.0632531 
+1 1:-0.170443 2:0.657252 3:-0.17029 4:0.657252 5:0.170443 6:-0.657252 7:-0.521229 8:0.97105 9:-0.790875 10:0.77564 11:-0.37038 12:0.998583 13:-0.240406 14:0.853846 15:-0.208412 16:-0.99897 17:0.008621 18:-0.897032 19:0.156762 20:-0.885098 21:0.204586 22:-0.0829299 23:-0.0632531 
+1 1:-0.331121 2:0.761787 3:-0.330997 4:0.761787 5:0.331121 6:-0.761787 7:-0.22658 8:0.22236 9:-0.8228 10:0.693405 11:-0.396567 12:0.59197 13:-0.388919 14:0.304452 15:-0.199639 16:-1 17:0.279122 18:-0.0214689 19:0.19212 20:-0.272727 21:0.216024 22:-0.457318 23:-0.0632531 
+1 1:-0.0598278 2:0.468824 3:-0.0623656 4:0.424056 5:0.0598277 6:-0.468824 7:-0.724074 8:0.994709 9:-0.647303 10:0.944828 11:-0.320249 12:0.989095 13:-0.11863 14:1 15:-0.126456 16:-0.489051 17:0.284568 18:-0.871743 19:0.163422 20:-0.995342 21:-0.199482 22:-0.574823 23:-0.0632531 
+1 1:0.0483076 2:0.599891 3:0.0485014 4:0.599891 5:-0.0483076 6:-0.599891 7:-0.922371 8:-0.346535 9:-0.831795 10:0.314286 11:-0.400262 12:0.987571 13:-0.289406 14:-0.435407 15:-0.223449 16:-0.724687 17:0.223867 18:-0.263158 19:0.186611 20:-0.866667 21:0.207226 22:-0.508339 23:-0.0632531 
+1 1:0.168241 2:0.857417 3:0.168457 4:0.857417 5:-0.168242 6:-0.857417 7:-0.857696 8:0.894682 9:-0.781288 10:0.983026 11:-0.280529 12:0.984492 13:-0.107579 14:0.954739 15:-0.114122 16:-0.962573 17:0.130208 18:-1 19:0.0573685 20:-0.173176 21:0.204548 22:-0.327724 23:-0.0632531 
+1 1:-0.236923 2:0.660539 3:-0.236782 4:0.660539 5:0.236923 6:-0.66054 7:-0.399319 8:0.890501 9:-0.444824 10:0.84965 11:-0.21866 12:0.78618 13:-0.230125 14:0.628217 15:-0.142747 16:-0.988246 17:0.231274 18:-0.914408 19:0.120629 20:0.745898 21:0.220422 22:-0.182255 23:-0.0434058 
+1 1:-0.223964 2:0.699189 3:-0.22382 4:0.699189 5:0.223964 6:-0.699189 7:-0.423083 8:0.781026 9:-0.81465 10:0.925069 11:-0.36878 12:0.699332 13:-0.238442 14:0.575473 15:-0.157387 16:-1 17:0.272486 18:-0.751666 19:0.189249 20:-0.759764 21:0.217128 22:-0.864747 23:-0.0632531 
+1 1:0.136638 2:0.74585 3:0.136848 4:0.74585 5:-0.136638 6:-0.74585 7:-0.915651 8:-0.220876 9:-0.824551 10:-1 11:-0.402217 12:-0.520886 13:-0.890013 14:1 15:-0.208168 16:-0.873023 17:-0.751058 18:-0.995182 19:0.180066 20:-0.527121 21:0.217419 22:-0.853771 23:-0.0632531 
+1 1:-0.312793 2:0.678192 3:-0.312666 4:0.678192 5:0.312793 6:-0.678192 7:-0.260189 8:0.929336 9:-0.71202 10:0.960239 11:-0.179472 12:0.999853 13:-0.170301 14:0.999872 15:-0.0646316 16:-0.952787 17:0.0383451 18:-0.998824 19:-0.071344 20:-0.355805 21:0.217478 22:-0.229235 23:0.0244024 
+1 1:-0.0192814 2:0.739195 3:-0.019323 4:0.675186 5:0.0192814 6:-0.739195 7:-0.798427 8:0.968521 9:-0.822993 10:-1 11:-0.402897 12:0.989899 13:-0.383999 14:-0.985027 15:-0.276541 16:-0.965286 17:0.273968 18:-0.136267 19:0.19238 20:0.742633 21:0.221264 22:-0.809248 23:-0.0632531 
+1 1:-0.235987 2:0.646672 3:-0.235846 4:0.646672 5:0.235988 6:-0.646672 7:-0.401035 8:-0.114108 9:-0.824104 10:0.669285 11:-0.400872 12:1 13:-0.184714 14:0.995031 15:-0.0526796 16:-0.872314 17:0.285868 18:-1 19:0.191453 20:-1 21:0.210279 22:-0.0429919 23:0.404365 
+1 1:-0.0297984 2:0.656623 3:-0.029619 4:0.656623 5:0.0297985 6:-0.656623 7:-0.779141 8:0.993438 9:-0.700997 10:0.977482 11:-0.373951 12:0.990358 13:0.299084 14:0.972454 15:0.0492867 16:-0.974579 17:0.0640217 18:-1 19:0.159524 20:0.376872 21:0.219493 22:0.0257981 23:-0.0632531 
+1 1:0.27701 2:0.757659 3:0.277246 4:0.757659 5:-0.27701 6:-0.75766 7:-0.658238 8:0.289403 9:-0.81095 10:0.885804 11:-0.289817 12:0.981413 13:0.0860793 14:0.996706 15:0.0800199 16:-0.97378 17:0.256406 18:-1 19:0.192488 20:-0.469311 21:0.151961 22:-0.317927 23:-0.0632531 
+1 1:-0.342565 2:0.503732 3:-0.342443 4:0.503732 5:0.342565 6:-0.503732 7:-0.205595 8:0.90533 9:-0.823638 10:0.840146 11:-0.402121 12:0.827948 13:-0.407506 14:-0.96634 15:-0.210014 16:-1 17:0.161712 18:-1 19:-0.0629136 20:-0.95995 21:0.213501 22:-0.880661 23:0.254755 
+1 1:0.576811 2:0.758398 3:0.577103 4:0.758398 5:-0.576811 6:-0.758398 7:-0.108467 8:0.987384 9:-0.789352 10:0.858425 11:-0.387642 12:0.981544 13:0.0196823 14:1 15:-0.136626 16:0.954226 17:0.308156 18:-1 19:0.192429 20:-0.999398 21:0.0830696 22:-0.753143 23:-0.0632531 
+1 1:-0.139193 2:0.727341 3:-0.139587 4:0.70482 5:0.139193 6:-0.727342 7:-0.578535 8:-0.782106 9:-0.840057 10:-0.274142 11:-0.40687 12:0.946582 13:0.0948028 14:0.984362 15:-0.151443 16:-0.414223 17:0.278427 18:-0.817138 19:0.179467 20:-1 21:0.197958 22:-0.355559 23:-0.00919147 
+1 1:0.261415 2:0.773316 3:0.261648 4:0.773316 5:-0.261414 6:-0.773316 7:-0.686837 8:-1 9:-0.824609 10:0.980584 11:-0.360146 12:0.874048 13:-0.274443 14:0.981657 15:-0.156209 16:-1 17:0.273757 18:-0.468715 19:0.182453 20:0.437444 21:0.227839 22:-0.556424 23:-0.0632531 
+0.7428170623900131 1:0.152541 2:0.563311 3:0.152754 4:0.563311 5:-0.152541 6:-0.563311 7:-0.886488 8:0.899953 9:-0.640214 10:0.91024 11:-0.362241 12:0.939472 13:0.327847 14:0.938259 15:-0.0187221 16:-0.894873 17:0.094033 18:-1 19:0.171848 20:-0.650007 21:0.197296 22:-0.708569 23:-0.0632531 
+1 1:-0.248337 2:0.699189 3:-0.248198 4:0.699189 5:0.248337 6:-0.699189 7:-0.378388 8:1 9:-0.823464 10:0.797255 11:-0.400856 12:0.956463 13:-0.377113 14:0.884177 15:-0.192558 16:-1 17:0.289144 18:-0.326411 19:0.192036 20:-0.721111 21:0.214353 22:-0.871277 23:-0.0632531 
+1 1:0.000682715 2:0.707854 3:0.000867698 4:0.707854 5:-0.000682792 6:-0.707854 7:-0.835037 8:0.843388 9:-0.737043 10:0.693171 11:-0.255865 12:0.870157 13:-0.255591 14:0.888411 15:-0.00403872 16:-0.9059 17:0.243196 18:-0.783729 19:0.137626 20:-0.144475 21:0.212808 22:-0.213076 23:-0.0632531 
+1 1:-0.0189278 2:0.71301 3:-0.0187465 4:0.71301 5:0.0189278 6:-0.71301 7:-0.799075 8:1 9:-0.771747 10:1 11:0.16858 12:0.985637 13:-0.140125 14:0.978262 15:0.010199 16:-0.895109 17:0.273922 18:-0.869634 19:0.16182 20:-0.468155 21:0.211244 22:-0.0973053 23:0.148949 
+1 1:0.314176 2:0.654477 3:0.314419 4:0.654477 5:-0.314176 6:-0.654477 7:-0.590083 8:0.987678 9:-0.4149 10:0.989072 11:0.0394218 12:0.937717 13:-0.00426253 14:0.91183 15:0.0941794 16:-1 17:0.290548 18:-1 19:0.188761 20:0.265623 21:0.218638 22:-0.0956312 23:-0.0632531 
+1 1:0.0374852 2:0.193204 3:0.037677 4:0.193204 5:-0.0374852 6:-0.193204 7:-0.902525 8:1 9:-0.750877 10:-0.757549 11:-0.525479 12:0.915461 13:0.281952 14:0.688736 15:-0.0798519 16:0.0488372 17:0.293726 18:-1 19:0.192488 20:-1 21:0.217526 22:-0.652002 23:-0.0632531 
+1 1:-0.0401896 2:0.802376 3:-0.0400122 4:0.802376 5:0.0401897 6:-0.802376 7:-0.760086 8:0.619918 9:-0.780856 10:0.298471 11:-0.391417 12:0.782492 13:-0.16892 14:0.380992 15:-0.197757 16:-0.401451 17:0.266574 18:-0.943662 19:0.161187 20:0.355089 21:0.227519 22:-0.37085 23:-0.0632531 
+1 1:0.593364 2:0.721662 3:0.593659 4:0.721662 5:-0.593364 6:-0.721662 7:-0.0781126 8:0.327007 9:-0.817028 10:0.435712 11:-0.395637 12:0.334403 13:-0.263916 14:0.333923 15:-0.133776 16:0.200581 17:0.292005 18:0.996591 19:0.192922 20:-0.292007 21:0.217495 22:-0.662365 23:-0.0632531 
+1 1:-0.252656 2:0.57582 3:-0.252518 4:0.57582 5:0.252656 6:-0.57582 7:-0.370469 8:-0.870265 9:-0.838649 10:-0.993001 11:-0.424336 12:0.855285 13:-0.301459 14:0.0717378 15:-0.206151 16:-0.915042 17:0.231242 18:-1 19:0.182447 20:0.458941 21:0.221055 22:-0.182083 23:0.138912 
+1 1:-0.0849132 2:0.791886 3:-0.0847441 4:0.791886 5:0.0849133 6:-0.791886 7:-0.678072 8:1 9:-0.683607 10:0.332315 11:-0.401688 12:0.855131 13:-0.330215 14:-1 15:-0.210893 16:-0.684838 17:0.267898 18:-0.792907 19:0.182893 20:-0.847203 21:0.199504 22:-0.2918 23:0.146189 
+0.655182527845297 1:0.137335 2:0.862642 3:0.137101 4:0.835209 5:-0.137335 6:-0.862642 7:-0.914373 8:0.837256 9:-0.817288 10:0.987549 11:-0.387045 12:0.285412 13:-0.412263 14:-0.91026 15:-0.215793 16:0.965595 17:0.915413 18:0.897666 19:0.361581 20:-0.956581 21:0.214418 22:-0.551314 23:-0.0632531 
+1 1:0.0316125 2:0.770547 3:0.0310414 4:0.751932 5:-0.0316125 6:-0.770547 7:-0.891755 8:-0.0106301 9:-0.824543 10:-0.224223 11:-0.405134 12:0.475421 13:-0.325222 14:0.344676 15:-0.146767 16:-0.875327 17:0.26912 18:0.952775 19:0.21573 20:-0.211055 21:0.216167 22:-0.279366 23:-0.053392 
+1 1:-0.170342 2:0.310862 3:-0.170189 4:0.310862 5:0.170342 6:-0.310862 7:-0.521414 8:1 9:-0.79365 10:0.990465 11:-0.297924 12:1 13:-0.369465 14:1 15:-0.188219 16:0.0898204 17:0.291671 18:-0.207547 19:0.186937 20:-0.945018 21:-0.448259 22:0.336798 23:0.0575001 
+1 1:0.0297766 2:0.76474 3:0.0299669 4:0.76474 5:-0.0297767 6:-0.76474 7:-0.888389 8:0.146848 9:-0.807663 10:-1 11:-0.615893 12:0.0493344 13:-0.395803 14:-1 15:-0.681691 16:0.574712 17:0.495383 18:-0.405405 19:0.191986 20:0.270015 21:0.472228 22:-0.848925 23:-0.0632531 
+1 1:0.212054 2:0.858627 3:0.212278 4:0.858627 5:-0.212054 6:-0.858628 7:-0.777353 8:1 9:-0.666995 10:-0.324512 11:-0.467039 12:1 13:-0.139155 14:1 15:-0.133725 16:-0.98472 17:-0.149672 18:-0.993251 19:-0.0659815 20:0.737368 21:0.220036 22:-0.208711 23:-0.0632531 
+1 1:-0.0887151 2:0.787202 3:-0.0890203 4:0.739195 5:0.0887151 6:-0.787202 7:-0.671101 8:0.663843 9:-0.731821 10:-0.201834 11:-0.406241 12:-0.21059 13:-0.453487 14:-0.43447 15:-0.25778 16:-0.441863 17:0.276802 18:1 19:0.208065 20:-0.883309 21:0.207454 22:-0.492605 23:0.369715 
+1 1:-0.241389 2:0.851212 3:-0.241624 4:0.783646 5:0.241389 6:-0.851212 7:-0.39113 8:-0.924655 9:-0.895289 10:0.0273721 11:-0.400054 12:0.982632 13:-0.317019 14:0.994482 15:-0.193302 16:0.997846 17:0.337517 18:1 19:0.195872 20:-0.992619 21:-0.825363 22:-0.820041 23:-0.0632531 
+1 1:0.319385 2:0.732686 3:0.319629 4:0.732686 5:-0.319385 6:-0.732686 7:-0.580531 8:1 9:-0.736953 10:-0.68749 11:-0.462725 12:-0.047668 13:-0.455591 14:-0.559951 15:-0.260023 16:-0.666956 17:0.202791 18:-0.959572 19:0.176329 20:-0.752905 21:0.140323 22:-0.73553 23:-0.0632531 
+1 1:-0.130918 2:0.653341 3:-0.130758 4:0.653341 5:0.130918 6:-0.653341 7:-0.593709 8:0.703526 9:-0.817849 10:0.688669 11:-0.379211 12:0.795469 13:-0.371665 14:0.696061 15:-0.173147 16:-0.988898 17:0.236125 18:-0.789147 19:0.146776 20:-0.311967 21:0.209821 22:0.0130777 23:-0.0632531 
+1 1:0.391936 2:0.783904 3:0.392077 4:0.756053 5:-0.391936 6:-0.783904 7:-0.447489 8:0.34343 9:-0.7257 10:0.479118 11:-0.332733 12:0.99375 13:-0.0345791 14:0.998113 15:-0.00424696 16:-0.980366 17:0.160478 18:-0.845197 19:0.121106 20:-0.910691 21:0.156709 22:-0.629092 23:-0.0632531 
+1 1:0.267094 2:0.676631 3:0.267329 4:0.676631 5:-0.267094 6:-0.676631 7:-0.676421 8:1 9:-0.673153 10:0.173562 11:-0.392116 12:-0.0018655 13:-0.415182 14:0.89756 15:-0.182894 16:-0.471607 17:0.25765 18:-0.854545 19:0.18995 20:-0.356629 21:0.212264 22:-0.507379 23:-0.0632531 
+1 1:0.0959064 2:0.849586 3:0.0944512 4:0.791053 5:-0.0959064 6:-0.849586 7:-0.990343 8:0.974251 9:-0.748831 10:0.919141 11:-0.310543 12:0.893121 13:-0.269683 14:0.778715 15:-0.132585 16:-0.930518 17:0.210268 18:-0.652816 19:0.187104 20:-0.22755 21:0.216872 22:-0.221957 23:0.197357 
+1 1:-0.196263 2:0.746178 3:-0.196215 4:0.71301 5:0.196263 6:-0.746178 7:-0.473881 8:0.964844 9:-0.657281 10:0.352147 11:-0.265404 12:0.978448 13:-0.006587 14:0.928788 15:-0.13768 16:-0.945645 17:0.126833 18:-0.894325 19:0.175177 20:-0.983234 21:0.00336787 22:-0.722465 23:0.0397413 
+1 1:0.197841 2:0.745457 3:0.198062 4:0.745457 5:-0.197841 6:-0.745457 7:-0.803418 8:0.954179 9:-0.793629 10:-0.489741 11:-0.438176 12:0.999942 13:-0.171646 14:0.999781 15:-0.0567855 16:-0.94005 17:0.271378 18:-0.287684 19:0.176573 20:1 21:0.220368 22:-0.952607 23:-0.0632531 
+1 1:-0.316112 2:0.737195 3:-0.315986 4:0.737195 5:0.316112 6:-0.737195 7:-0.254104 8:-0.0503979 9:-0.823972 10:0.981812 11:-0.391155 12:0.975472 13:-0.354869 14:0.929745 15:-0.18953 16:-0.937282 17:0.26277 18:-0.906634 19:0.178873 20:0.693796 21:0.253616 22:-0.247211 23:0.195339 
+1 1:0.203057 2:0.604775 3:0.203279 4:0.604775 5:-0.203057 6:-0.604775 7:-0.793852 8:1 9:-0.765451 10:-0.25 11:-0.404695 12:0.789996 13:0.214549 14:-0.89545 15:-0.464871 16:-0.261905 17:0.286437 18:-0.97351 19:0.161646 20:0.408696 21:0.223065 22:-0.957098 23:-0.0632531 
+1 1:0.163407 2:0.746178 3:0.163622 4:0.746178 5:-0.163407 6:-0.746178 7:-0.866562 8:0.865534 9:-0.814725 10:-0.111186 11:-0.45802 12:0.141573 13:-0.314879 14:-0.0813665 15:-0.211221 16:-0.860756 17:-0.314961 18:-0.849228 19:0.109748 20:-0.978765 21:0.203833 22:-0.665168 23:-0.0632531 
+1 1:0.132525 2:0.606654 3:0.131936 4:0.586825 5:-0.132525 6:-0.606654 7:-0.923193 8:0.882583 9:-0.806111 10:0.688871 11:-0.322639 12:0.919744 13:-0.109111 14:1 15:-0.000878276 16:-0.542225 17:0.239351 18:-0.365741 19:0.153461 20:-0.353748 21:0.19068 22:-0.414726 23:-0.0632531 
+1 1:0.562699 2:0.801716 3:0.562988 4:0.801716 5:-0.562699 6:-0.801716 7:-0.134346 8:0.458081 9:-0.817054 10:0.437927 11:-0.386207 12:0.956342 13:0.130385 14:0.991192 15:0.141364 16:-1 17:0.246428 18:-1 19:0.157161 20:1 21:0.217602 22:-0.849358 23:-0.0632531 
+0.2988193557278515 1:0.347432 2:0.878852 3:0.346528 4:0.812515 5:-0.347432 6:-0.878852 7:-0.5291 8:0.844587 9:-0.816957 10:0.976674 11:-0.394422 12:0.972622 13:-0.347815 14:0.960071 15:-0.198813 16:-0.772482 17:0.0792672 18:0.58651 19:0.193074 20:0.743247 21:0.225391 22:-0.83757 23:-0.0632531 
+1 1:0.0795594 2:0.747248 3:0.0797589 4:0.747248 5:-0.0795594 6:-0.747248 7:-0.97968 8:-0.319205 9:-0.897828 10:-0.569646 11:-0.690431 12:0.953629 13:-0.104135 14:1 15:-0.0846447 16:0.410645 17:0.30153 18:-0.0269685 19:0.190532 20:-0.763702 21:0.109011 22:-0.355503 23:-0.0632531 
+1 1:0.059314 2:0.752031 3:0.0595098 4:0.752031 5:-0.059314 6:-0.752031 7:-0.942554 8:0.873817 9:-0.801572 10:1 11:-0.309524 12:0.921663 13:-0.280381 14:0.792936 15:-0.155293 16:-0.829146 17:0.253466 18:-0.598662 19:0.169901 20:-0.462898 21:0.208241 22:-0.372094 23:-0.0632531 
+1 1:-0.273901 2:0.493512 3:-0.273766 4:0.493512 5:0.273901 6:-0.493512 7:-0.33151 8:1 9:-0.822017 10:1 11:-0.401936 12:1 13:-0.395406 14:-0.346396 15:-0.21447 16:-0.522133 17:0.273192 18:-0.0192103 19:0.192306 20:-0.498588 21:0.209083 22:-0.939724 23:-0.71051 
+1 1:-0.187247 2:0.895924 3:-0.188202 4:0.734959 5:0.187247 6:-0.895924 7:-0.490414 8:0.999854 9:0.343868 10:0.768621 11:-0.401621 12:-0.807611 13:-0.416157 14:-0.191769 15:-0.210551 16:-0.980761 17:0.285657 18:-1 19:0.189957 20:-0.325897 21:0.217286 22:-0.710228 23:-0.0632531 
+1 1:0.157469 2:0.778087 3:0.157683 4:0.778087 5:-0.157469 6:-0.778087 7:-0.877451 8:0.748084 9:-0.742003 10:0.743259 11:-0.358747 12:0.734411 13:-0.144515 14:0.705709 15:-0.166213 16:-1 17:0.245888 18:-0.508607 19:0.165835 20:-1 21:0.192053 22:-0.254484 23:-0.0632531 
+1 1:-0.320087 2:0.641759 3:-0.319961 4:0.641759 5:0.320087 6:-0.641759 7:-0.246815 8:-1 9:-0.823911 10:0.762277 11:-0.389191 12:0.795418 13:-0.376996 14:0.940983 15:-0.200583 16:-1 17:0.282901 18:-0.685962 19:0.190397 20:-0.804593 21:0.21726 22:-0.574855 23:0.506934 
+1 1:-0.379339 2:0.731008 3:-0.379224 4:0.731008 5:0.379339 6:-0.731008 7:-0.138159 8:1 9:-0.823674 10:-0.968733 11:-0.459563 12:0.991014 13:-0.387902 14:-1 15:-0.209367 16:-1 17:0.190889 18:-0.988973 19:0.162569 20:0.857366 21:0.217784 22:-0.59747 23:0.825435 
+1 1:-0.0079766 2:0.665626 3:-0.00779315 4:0.665626 5:0.00797652 6:-0.665626 7:-0.819158 8:0.987081 9:-0.791062 10:0.994786 11:-0.356565 12:0.997747 13:-0.109997 14:0.991928 15:0.0140392 16:-0.846008 17:0.26754 18:-0.51638 19:0.188775 20:-1 21:0.209277 22:-0.242523 23:-0.0632531 
+1 1:-0.714753 2:0.558786 3:-0.7147 4:0.558786 5:0.714752 6:-0.558786 7:0.476917 8:0.85285 9:-0.823786 10:0.531205 11:-0.40192 12:1 13:-0.409548 14:1 15:-0.208435 16:-1 17:0.28923 18:-0.635248 19:0.192387 20:-0.17749 21:0.217473 22:-0.157493 23:-0.0632531 
+1 1:0.132004 2:0.631041 3:0.132213 4:0.631041 5:-0.132004 6:-0.631041 7:-0.924148 8:0.815261 9:-0.814086 10:0.526578 11:-0.384288 12:0.989916 13:-0.154592 14:0.997116 15:-0.0198444 16:-0.933528 17:0.263562 18:0.380783 19:0.196539 20:-0.347594 21:0.214762 22:-0.4581 23:-0.0632531 
+1 1:0.269008 2:0.540177 3:0.269243 4:0.540177 5:-0.269008 6:-0.540177 7:-0.672911 8:-0.442786 9:-0.829197 10:0.765933 11:-0.310489 12:0.746992 13:-0.288874 14:0.764322 15:-0.144849 16:-0.863128 17:0.26496 18:0.534545 19:0.206158 20:-0.918548 21:0.109146 22:-0.0168542 23:-0.0632531 
+1 1:0.38941 2:0.668785 3:0.389667 4:0.668785 5:-0.38941 6:-0.668785 7:-0.452121 8:1 9:-0.822391 10:0.991691 11:-0.400312 12:0.999356 13:0.00416786 14:-0.895105 15:-0.209663 16:-0.522689 17:0.290426 18:-0.450382 19:0.192281 20:-1 21:0.217432 22:-0.950087 23:-0.0632531 
+1 1:0.206941 2:0.681587 3:0.207164 4:0.681587 5:-0.206941 6:-0.681587 7:-0.786729 8:0.997779 9:0.307293 10:1 11:-0.394126 12:-0.311236 13:-0.417953 14:1 15:-0.200268 16:-0.944453 17:0.280156 18:-1 19:0.140211 20:-0.507052 21:0.215701 22:-0.863883 23:-0.0632531 
+1 1:-0.0884673 2:0.652413 3:-0.0882989 4:0.652413 5:0.0884673 6:-0.652413 7:-0.671555 8:1 9:-0.822668 10:0.767666 11:-0.401181 12:1 13:-0.351758 14:1 15:-0.180568 16:-1 17:0.288085 18:-0.918796 19:0.191001 20:0.974217 21:0.236137 22:-0.767877 23:-0.0632531 
+1 1:-0.302318 2:0.69508 3:-0.302189 4:0.69508 5:0.302318 6:-0.695081 7:-0.279399 8:0.593539 9:-0.743154 10:0.397243 11:-0.340935 12:0.438294 13:-0.264157 14:0.0909128 15:-0.181745 16:-0.140605 17:0.280876 18:-0.474279 19:0.1536 20:-1 21:0.217124 22:-0.460489 23:-0.0632531 
+1 1:-0.154169 2:0.760393 3:-0.154013 4:0.760393 5:0.154169 6:-0.760393 7:-0.551072 8:1 9:-0.818338 10:0.991443 11:-0.384922 12:0.968997 13:-0.366901 14:0.517135 15:-0.203058 16:-0.808561 17:0.281905 18:-1 19:0.1868 20:-1 21:0.216772 22:-0.506543 23:0.039733 
+1 1:-0.169449 2:0.637254 3:-0.169725 4:0.614732 5:0.169449 6:-0.637254 7:-0.523053 8:-0.384934 9:-0.83839 10:0.0177925 11:-0.40189 12:0.988658 13:-0.369346 14:0.0151736 15:-0.208545 16:-0.683296 17:0.288366 18:-0.406408 19:0.191617 20:-0.19827 21:0.217386 22:-0.450126 23:0.567944 
+1 1:0.306101 2:0.76861 3:0.306342 4:0.76861 5:-0.306101 6:-0.76861 7:-0.604892 8:0.826528 9:-0.598333 10:0.691547 11:-0.342657 12:0.999468 13:-0.0976256 14:0.994571 15:-0.103882 16:-0.455285 17:0.273478 18:-0.65247 19:0.170639 20:-0.714286 21:0.213681 22:-0.410574 23:-0.0632531 
+1 1:-0.0162082 2:0.660539 3:-0.0160263 4:0.660539 5:0.0162083 6:-0.66054 7:-0.804063 8:1 9:-0.821463 10:0.95889 11:-0.377688 12:0.979258 13:-0.0665365 14:0.988576 15:-0.0461302 16:-0.919031 17:0.268748 18:-0.380981 19:0.189391 20:-0.767838 21:0.213538 22:-0.39165 23:-0.0632531 
+0.2856827907018439 1:-0.684526 2:0.110102 3:-0.684467 4:0.110102 5:0.684526 6:-0.110102 7:0.421488 8:1 9:-0.78992 10:0.964946 11:-0.382698 12:1 13:-0.40381 14:-0.580247 15:-0.209582 16:-0.930906 17:0.288998 18:-0.196429 19:0.192411 20:-0.75 21:0.217479 22:0.255658 23:0.590964 
+1 1:-0.773007 2:-0.730704 3:-0.772965 4:-0.730704 5:0.773007 6:0.730704 7:0.583744 8:1 9:-0.821356 10:1 11:-0.397049 12:1 13:-0.324341 14:1 15:-0.150586 16:-0.261146 17:0.288246 18:0.687861 19:0.202492 20:0.0676692 21:0.217951 22:1 23:-0.0632531 
+1 1:-0.251386 2:0.627887 3:-0.251247 4:0.627887 5:0.251386 6:-0.627887 7:-0.372798 8:1 9:-0.812278 10:1 11:-0.388124 12:0.401094 13:-0.391384 14:0.258262 15:-0.191485 16:-0.979474 17:0.257881 18:-0.745725 19:0.145144 20:-0.272054 21:0.214466 22:0.0910053 23:-0.0632531 
+1 1:0.0827524 2:0.676631 3:0.0829525 4:0.676631 5:-0.0827523 6:-0.676631 7:-0.985535 8:0.969728 9:-0.358383 10:0.973743 11:0.0186958 12:0.881494 13:-0.106832 14:0.892554 15:-0.0345385 16:-1 17:0.284043 18:-0.946093 19:0.0840182 20:-0.978778 21:0.163304 22:-0.301146 23:-0.0632531 
+1 1:-0.481895 2:0.692766 3:-0.481799 4:0.692766 5:0.481895 6:-0.692766 7:0.0499072 8:0.984959 9:-0.822857 10:0.997152 11:-0.39756 12:0.995699 13:-0.405762 14:0.992475 15:-0.202504 16:-0.663942 17:0.289733 18:-0.864198 19:0.191971 20:0.466052 21:0.217757 22:-0.0428539 23:-0.0632531 
+1 1:-0.231255 2:0.655637 3:-0.231112 4:0.655637 5:0.231254 6:-0.655637 7:-0.409714 8:0.924196 9:-0.782716 10:0.89549 11:-0.38072 12:0.900021 13:-0.338303 14:0.859268 15:-0.165078 16:-0.890131 17:0.280767 18:-0.389578 19:0.192359 20:-0.728437 21:0.217395 22:-0.241824 23:-0.0632531 
+1 1:0.0834001 2:0.47814 3:0.0836004 4:0.47814 5:-0.0834 6:-0.47814 7:-0.986723 8:0.859823 9:-0.612383 10:0.747824 11:-0.363183 12:0.716861 13:0.417414 14:-0.0356569 15:-0.212337 16:-1 17:0.268306 18:-0.97399 19:-0.0447194 20:-0.986662 21:0.163091 22:0.130959 23:-0.0632531 
+1 1:0.310102 2:0.537514 3:0.310344 4:0.537514 5:-0.310102 6:-0.537515 7:-0.597555 8:0.0974247 9:-0.757168 10:0.359379 11:-0.175093 12:0.656508 13:0.178032 14:0.536458 15:0.347688 16:-0.834029 17:0.248047 18:-0.028302 19:0.187214 20:-0.852782 21:0.203985 22:0.0348462 23:-0.0632531 
+1 1:0.115184 2:0.77285 3:0.11539 4:0.77285 5:-0.115184 6:-0.77285 7:-0.954992 8:1 9:-0.457656 10:1 11:0.0449038 12:1 13:-0.115993 14:0.995237 15:0.0488367 16:-0.943355 17:0.273089 18:-0.686157 19:0.177235 20:0.0650121 21:0.21785 22:-0.162361 23:-0.0632531 
+0.429568614147608 1:-0.143074 2:0.634037 3:-0.143555 4:0.60508 5:0.143074 6:-0.634037 7:-0.571418 8:0.940388 9:-0.72765 10:0.773155 11:-0.395897 12:0.997778 13:-0.00774144 14:0.991556 15:-0.0250029 16:-0.830451 17:0.150096 18:-0.682776 19:0.16134 20:-0.962036 21:-0.12765 22:-0.582292 23:0.0432485 
+1 1:-0.304142 2:0.710722 3:-0.304013 4:0.710722 5:0.304142 6:-0.710722 7:-0.276054 8:0.507304 9:-0.823754 10:0.992404 11:-0.389524 12:0.998026 13:-0.394616 14:0.988827 15:-0.201448 16:-1 17:0.279404 18:0.026296 19:0.192523 20:-1 21:0.216954 22:-0.247635 23:-0.0632531 
+1 1:-0.586005 2:0.759998 3:-0.585929 4:0.759998 5:0.586005 6:-0.759998 7:0.240823 8:0.922656 9:-0.817053 10:0.976227 11:-0.388324 12:0.853804 13:-0.405845 14:0.595331 15:-0.207949 16:-0.660706 17:0.283402 18:-0.696888 19:0.187447 20:0.943403 21:0.234708 22:0.00929982 23:-0.0632531 
+1 1:0.152267 2:0.727024 3:0.152296 4:0.70133 5:-0.152267 6:-0.727024 7:-0.886991 8:-0.940299 9:-0.825879 10:-0.33424 11:-0.402362 12:0.923168 13:-0.287365 14:0.840684 15:-0.199739 16:-0.594021 17:0.278554 18:1 19:0.192719 20:1 21:0.217588 22:-0.482103 23:-0.0632531 
+1 1:-0.074882 2:0.738079 3:-0.0764778 4:0.695654 5:0.0748819 6:-0.738079 7:-0.696468 8:0.987462 9:-0.802749 10:0.994541 11:-0.333623 12:0.951638 13:-0.146965 14:0.957186 15:-0.0109602 16:-0.777402 17:0.279085 18:-0.546869 19:0.179025 20:-0.429812 21:0.211034 22:-0.382605 23:-0.0632531 
+1 1:-0.201124 2:0.738524 3:-0.200976 4:0.738524 5:0.201124 6:-0.738524 7:-0.464966 8:0.999836 9:-0.806376 10:0.998834 11:-0.398746 12:0.977462 13:-0.401739 14:0.977772 15:-0.204993 16:-0.895868 17:0.221069 18:-0.929028 19:0.16461 20:1 21:0.217616 22:-0.510797 23:-0.0632531 
+1 1:0.832015 2:0.665626 3:0.832237 4:0.618242 5:-0.832015 6:-0.665626 7:0.359521 8:-0.847997 9:-0.828167 10:-0.867057 11:-0.403878 12:1 13:-0.265435 14:1 15:-0.154832 16:-0.142708 17:0.290228 18:0.8437 19:0.194211 20:-1 21:0.217489 22:-0.973201 23:-0.0632531 
+1 1:-0.276997 2:0.283769 3:-0.276863 4:0.283769 5:0.276997 6:-0.283769 7:-0.325833 8:1 9:-0.801578 10:1 11:-0.368446 12:1 13:-0.269996 14:1 15:-0.0355276 16:-0.632275 17:0.246484 18:-0.874972 19:0.154454 20:-0.344346 21:0.214895 22:0.350405 23:-0.0632531 
+1 1:-0.217976 2:0.674441 3:-0.217832 4:0.674441 5:0.217976 6:-0.674441 7:-0.434063 8:0.992651 9:-0.796831 10:0.995306 11:-0.39314 12:1 13:-0.206171 14:1 15:-0.130954 16:-1 17:0.271701 18:-0.927443 19:0.169175 20:-0.572928 21:0.204214 22:-0.23169 23:0.128867 
+1 1:0.0540225 2:0.737428 3:0.0535875 4:0.715044 5:-0.0540226 6:-0.737428 7:-0.932851 8:0.91236 9:-0.805296 10:-0.350698 11:-0.431183 12:0.482463 13:-0.306 14:-0.0888146 15:-0.220719 16:-0.316805 17:0.276154 18:-0.701077 19:0.179071 20:0.656294 21:0.21973 22:-0.366619 23:0.286769 
+0.9688165698611798 1:0.330968 2:0.63838 3:0.331214 4:0.63838 5:-0.330968 6:-0.63838 7:-0.559291 8:1 9:-0.624198 10:0.999681 11:0.0854053 12:0.999812 13:-0.113073 14:0.844538 15:-0.167792 16:-0.659771 17:0.285185 18:-0.661952 19:0.191112 20:0.661482 21:0.218541 22:-0.337786 23:-0.0632531 
+1 1:-0.231053 2:0.716866 3:-0.230911 4:0.716866 5:0.231053 6:-0.716866 7:-0.410083 8:0.454954 9:-0.823416 10:-0.51565 11:-0.402684 12:0.880681 13:-0.409661 14:0.706652 15:-0.207796 16:-0.989361 17:0.271186 18:-1 19:0.173258 20:-0.953465 21:0.203734 22:-0.330396 23:0.424297 
+1 1:-0.472107 2:0.772638 3:-0.472009 4:0.772638 5:0.472107 6:-0.772638 7:0.0319576 8:0.875774 9:-0.823402 10:0.041742 11:-0.402138 12:0.973815 13:-0.397331 14:-0.502318 15:-0.210202 16:0.525919 17:0.290796 18:-0.984924 19:0.190403 20:0.56232 21:0.217879 22:-0.449878 23:-0.0632531 
+-1 1:0.153388 2:0.864943 3:0.152959 4:0.817865 5:-0.153388 6:-0.864943 7:-0.884934 8:0.909895 9:-0.809953 10:0.996062 11:-0.395676 12:1 13:-0.273465 14:0.977478 15:-0.17772 16:-0.929831 17:0.253402 18:-1 19:0.180805 20:-1 21:0.0433982 22:-0.469804 23:-0.344711 
+-1 1:-0.229871 2:0.652703 3:-0.229729 4:0.652703 5:0.229871 6:-0.652703 7:-0.412251 8:-1 9:-0.825485 10:-0.531027 11:-0.40271 12:0.922207 13:-0.115007 14:0.942864 15:-0.0128998 16:-0.614655 17:-0.0594098 18:-1 19:-0.310416 20:-0.997564 21:0.0501324 22:-0.617139 23:-0.188432 
+-0.9092126510934362 1:0.00498815 2:0.886191 3:0.00517386 4:0.886191 5:-0.00498823 6:-0.886191 7:-0.842932 8:1 9:-0.821017 10:1 11:-0.398403 12:1 13:-0.413173 14:-1 15:-0.209448 16:-0.9417 17:-0.0294413 18:-0.997541 19:0.0582388 20:-0.999751 21:0.128743 22:-0.840465 23:-0.0632531 
+-1 1:-0.0337745 2:0.52836 3:-0.0335958 4:0.52836 5:0.0337745 6:-0.52836 7:-0.77185 8:1 9:-0.812554 10:-1 11:-0.402191 12:0.911357 13:-0.328257 14:1 15:-0.183513 16:-1 17:0.177294 18:-0.627119 19:0.157888 20:-0.649485 21:0.118253 22:-0.741959 23:-0.0632531 
+-1 1:-0.00537357 2:0.881188 3:-0.00586151 4:0.855494 5:0.00537356 6:-0.881188 7:-0.823931 8:-0.37528 9:-0.824032 10:1 11:-0.395612 12:0.907762 13:-0.407531 14:0.00699448 15:-0.209361 16:-0.97121 17:0.256732 18:0.377578 19:0.193629 20:-0.915104 21:0.214264 22:-0.502858 23:-0.0632531 
+-1 1:0.0780578 2:0.53345 3:0.0782571 4:0.53345 5:-0.0780578 6:-0.53345 7:-0.976926 8:0.784173 9:-0.815473 10:-0.387755 11:-0.417639 12:0.149492 13:-0.399445 14:-1 15:-0.349829 16:-0.927641 17:0.152291 18:-0.238494 19:0.178682 20:-0.303754 21:0.205418 22:-0.699619 23:-0.0632531 
+-1 1:-0.085083 2:0.803705 3:-0.0864272 4:0.775201 5:0.0850831 6:-0.803705 7:-0.677761 8:1 9:-0.82374 10:-0.48134 11:-0.40503 12:0.356594 13:-0.374107 14:-0.882419 15:-0.301936 16:-0.547432 17:0.0712362 18:-0.0220246 19:0.190936 20:0.906275 21:0.220828 22:-0.587056 23:-0.408774 
+-1 1:-0.0236976 2:0.752031 3:-0.0238529 4:0.73252 5:0.0236976 6:-0.752031 7:-0.790329 8:0.782326 9:-0.820108 10:-0.292881 11:-0.40571 12:0.976323 13:-0.238472 14:0.660114 15:-0.168139 16:-0.965555 17:0.267388 18:-0.442611 19:0.17919 20:0.160667 21:0.218524 22:-0.303291 23:-0.645235 
+-1 1:0.0197718 2:0.767938 3:0.0199603 4:0.767938 5:-0.0197718 6:-0.767938 7:-0.870042 8:0.607312 9:-0.81809 10:0.567405 11:-0.400052 12:0.960324 13:-0.384508 14:0.865721 15:-0.195749 16:-1 17:0.238694 18:-0.907564 19:0.186359 20:0.918589 21:0.254532 22:0.099845 23:-0.452855 
+-1 1:-0.403047 2:0.757659 3:-0.403584 4:0.710883 5:0.403047 6:-0.75766 7:-0.0946841 8:1 9:-0.823866 10:0.954239 11:-0.397411 12:0.769493 13:-0.407051 14:0.599646 15:-0.206965 16:-1 17:0.270754 18:0.924588 19:0.193075 20:-0.0342843 21:0.217489 22:-0.42378 23:-0.69769 
+-1 1:-0.0867144 2:0.395144 3:-0.0865456 4:0.395144 5:0.0867143 6:-0.395144 7:-0.674769 8:1 9:-0.821072 10:1 11:-0.399359 12:0.999328 13:-0.204281 14:0.992732 15:-0.197819 16:-0.960199 17:0.240766 18:-0.993491 19:0.111337 20:0.784738 21:0.235846 22:0.306655 23:-0.360083 
+-1 1:0.0523725 2:0.478585 3:0.0517859 4:0.451152 5:-0.0523725 6:-0.478585 7:-0.929825 8:1 9:-0.823849 10:-0.00025892 11:-0.402196 12:0.964959 13:-0.389871 14:-0.0909091 15:-0.209389 16:-0.91389 17:0.268671 18:-0.997382 19:0.165898 20:1 21:0.217611 22:-0.75917 23:-0.541129 
+-1 1:0.258657 2:0.867918 3:0.258314 4:0.847871 5:-0.258657 6:-0.867918 7:-0.691893 8:0.885222 9:-0.802932 10:0.215716 11:-0.398374 12:0.999682 13:-0.352974 14:0.97779 15:-0.203124 16:-1 17:0.260109 18:-0.199233 19:0.190683 20:-0.981555 21:0.210049 22:-0.365063 23:-0.0632531 
+-1 1:0.342084 2:0.590949 3:0.342332 4:0.590949 5:-0.342084 6:-0.590949 7:-0.538907 8:1 9:-0.823672 10:1 11:-0.400829 12:0.900663 13:-0.404841 14:1 15:-0.194006 16:-0.994144 17:0.250974 18:-0.925076 19:0.179531 20:-0.938791 21:0.214408 22:-0.312872 23:-0.0632531 
+-1 1:-0.0344139 2:0.609006 3:-0.0342354 4:0.609006 5:0.0344139 6:-0.609006 7:-0.770677 8:0.342466 9:-0.822419 10:1 11:-0.343021 12:0.795565 13:-0.368433 14:1 15:-0.185619 16:-0.95828 17:0.167882 18:-1 19:0.169118 20:-0.796211 21:0.15032 22:-0.0213129 23:-0.0632531 
+-1 1:-0.126653 2:0.798213 3:-0.128532 4:0.76474 5:0.126653 6:-0.798213 7:-0.60153 8:0.226016 9:-0.817603 10:-0.798561 11:-0.425702 12:-0.089934 13:-0.464511 14:-0.372917 15:-0.242824 16:-0.785819 17:0.0187224 18:-0.787583 19:-0.0254431 20:0.529881 21:0.249344 22:-0.881634 23:-0.0632531 
+-0.02052404560933848 1:0.375048 2:0.942112 3:0.375302 4:0.942112 5:-0.375048 6:-0.942112 7:-0.478458 8:0.929369 9:-0.808008 10:0.893641 11:-0.384385 12:0.953351 13:-0.346808 14:0.777578 15:-0.190904 16:-0.987618 17:0.0772213 18:-0.839594 19:0.162965 20:-0.940857 21:0.203114 22:-0.325972 23:-0.0632531 
+-1 1:0.648348 2:0.822776 3:0.645804 4:0.770279 5:-0.648348 6:-0.822776 7:0.0227154 8:-1 9:-0.824471 10:-1 11:-0.40279 12:-0.291834 13:-0.413771 14:0.322316 15:-0.209101 16:0.0244751 17:0.2906 18:-0.257695 19:0.192285 20:0.98378 21:0.219241 22:-0.608739 23:-0.0632531 
+-1 1:-0.108019 2:0.835209 3:-0.107854 4:0.835209 5:0.108019 6:-0.83521 7:-0.635701 8:0.988792 9:-0.815144 10:0.997823 11:-0.333819 12:0.239429 13:-0.380854 14:0.392573 15:-0.164401 16:-0.608519 17:0.274312 18:-0.687312 19:0.190433 20:0.586141 21:0.219758 22:-0.110262 23:-0.0632531 
+-1 1:-0.551248 2:0.87173 3:-0.552142 4:0.826963 5:0.551248 6:-0.87173 7:0.177086 8:0.993059 9:-0.797783 10:0.988618 11:-0.397822 12:0.995299 13:-0.355542 14:0.991062 15:-0.185584 16:-0.688027 17:0.268461 18:-0.981231 19:0.189402 20:-0.625931 21:0.217482 22:-0.269898 23:-1 
+-1 1:0.0193923 2:0.490412 3:0.0195807 4:0.490412 5:-0.0193923 6:-0.490412 7:-0.869346 8:0.859425 9:-0.817704 10:1 11:-0.400899 12:0.629893 13:-0.390289 14:0.953846 15:-0.194636 16:-0.979548 17:0.0504075 18:-0.830585 19:0.0197857 20:0.723785 21:0.223263 22:0.127673 23:-0.0632531 
+-1 1:0.0855421 2:0.83905 3:0.0824222 4:0.766079 5:-0.0855421 6:-0.83905 7:-0.990651 8:0.942696 9:-0.788742 10:1 11:-0.317722 12:0.744387 13:-0.352152 14:1 15:-0.173751 16:-0.90917 17:-0.00871449 18:-0.981575 19:-0.140839 20:-1 21:-0.106647 22:-0.556389 23:-0.0632531 
+-1 1:0.0552481 2:0.900251 3:0.0548969 4:0.81198 5:-0.0552481 6:-0.900251 7:-0.935098 8:0.998928 9:-0.72895 10:0.922501 11:-0.385457 12:0.999896 13:0.0061359 14:0.99302 15:-0.110662 16:-0.944823 17:0.236488 18:-1 19:0.181487 20:-1 21:0.217526 22:-0.78978 23:-0.661682 
+-1 1:0.0462187 2:0.79664 3:0.0464121 4:0.79664 5:-0.0462187 6:-0.79664 7:-0.91854 8:0.885498 9:-0.800006 10:0.725025 11:-0.368631 12:0.973132 13:-0.195143 14:0.833977 15:-0.104642 16:-0.999457 17:-0.112182 18:-0.989849 19:-0.0804385 20:-0.991786 21:0.144259 22:-0.229752 23:-0.0632531 
+-1 1:0.342533 2:0.832209 3:0.340995 4:0.718192 5:-0.342532 6:-0.832209 7:-0.538084 8:1 9:-0.823105 10:-0.273597 11:-0.403707 12:0.3105 13:-0.401382 14:0.0589086 15:-0.208795 16:-0.993637 17:0.139746 18:-1 19:0.186969 20:1 21:0.217608 22:-0.696378 23:-0.0632531 
+-1 1:-0.0888127 2:0.603587 3:-0.0886443 4:0.603587 5:0.0888126 6:-0.603588 7:-0.670922 8:0.572243 9:-0.774784 10:0.5 11:-0.397619 12:0.601307 13:-0.335356 14:1 15:-0.0733615 16:-0.599815 17:-0.151237 18:-0.813187 19:0.15469 20:-0.975051 21:-0.26461 22:-0.868273 23:-0.0632531 
+-1 1:0.313109 2:0.854875 3:0.311792 4:0.810917 5:-0.313109 6:-0.854875 7:-0.592041 8:-0.815789 9:-0.825142 10:-0.454545 11:-0.402685 12:1 13:-0.402861 14:-0.118049 15:-0.209987 16:-0.959718 17:0.275681 18:-0.23356 19:0.190884 20:0.911877 21:0.218567 22:-0.55481 23:-0.0632531 
+-1 1:0.033984 2:0.730673 3:0.0341752 4:0.730673 5:-0.033984 6:-0.730673 7:-0.896104 8:1 9:-0.395802 10:1 11:-0.0811909 12:0.995344 13:0.428458 14:0.989116 15:0.0627502 16:-0.991659 17:-0.0504616 18:-0.99805 19:-0.0977683 20:-1 21:-0.514037 22:-0.0957368 23:-0.0632531 
+-1 1:-0.361138 2:0.917507 3:-0.361622 4:0.862642 5:0.361138 6:-0.917508 7:-0.171535 8:0.981529 9:-0.819808 10:0.529376 11:-0.401356 12:0.93581 13:-0.408929 14:0.817766 15:-0.209052 16:-0.994106 17:0.253021 18:-0.941933 19:0.180326 20:0.982615 21:0.218816 22:-0.630748 23:-0.797713 
+-1 1:0.135156 2:0.784085 3:0.135366 4:0.784085 5:-0.135156 6:-0.784085 7:-0.918368 8:0.336424 9:-0.821525 10:0.199394 11:-0.401443 12:0.999935 13:-0.280398 14:-0.757128 15:-0.209719 16:-0.982419 17:0.276259 18:-0.999396 19:0.163582 20:0.0787265 21:0.217554 22:-0.539808 23:-0.0632531 
+-1 1:-0.10388 2:0.851212 3:-0.10594 4:0.783646 5:0.10388 6:-0.851212 7:-0.643291 8:0.998237 9:-0.801309 10:-0.895803 11:-0.420002 12:0.552857 13:-0.412898 14:0.315766 15:-0.209339 16:-0.987181 17:0.222402 18:-0.957838 19:0.148202 20:-0.686478 21:0.2164 22:-0.861619 23:-0.0632531 
+-1 1:-0.00348877 2:0.800168 3:-0.00363322 4:0.746904 5:0.00348876 6:-0.800168 7:-0.827387 8:0.13687 9:-0.797967 10:-0.905165 11:-0.43382 12:0.185193 13:-0.211988 14:-0.0684615 15:-0.231554 16:-0.346309 17:-0.22145 18:-0.985785 19:-0.146139 20:-0.00896782 21:0.217199 22:-0.677666 23:-0.0632531 
+-1 1:0.0712267 2:0.681844 3:0.0714247 4:0.681844 5:-0.0712266 6:-0.681844 7:-0.964399 8:0.999667 9:-0.314068 10:0.999166 11:-0.217965 12:0.999816 13:-0.353627 14:0.999017 15:-0.18525 16:-0.993618 17:0.26337 18:-1 19:0.192416 20:0.872518 21:0.220366 22:-0.33261 23:-0.0632531 
+-1 1:0.147248 2:0.869097 3:0.146243 4:0.797557 5:-0.147248 6:-0.869097 7:-0.896194 8:-0.972856 9:-0.829263 10:0.90389 11:-0.400813 12:0.562395 13:-0.410629 14:0.426222 15:-0.191814 16:-1 17:0.275088 18:0.383562 19:0.19275 20:0.855339 21:0.22144 22:-0.911164 23:-0.0632531 
+-1 1:0.381945 2:0.296777 3:0.382201 4:0.296777 5:-0.381945 6:-0.296776 7:-0.46581 8:1 9:-0.823856 10:0.97304 11:-0.392751 12:0.915577 13:-0.388546 14:0.964644 15:-0.154441 16:-0.470963 17:0.281571 18:-0.956612 19:0.171068 20:-0.800439 21:0.210296 22:0.294887 23:-0.0632531 
+-1 1:-0.0178604 2:0.769639 3:-0.0180861 4:0.725144 5:0.0178604 6:-0.769639 7:-0.801033 8:-0.163941 9:-0.82426 10:0.97235 11:-0.382032 12:0.981337 13:-0.309288 14:-1 15:-0.209367 16:-0.808771 17:0.267596 18:-0.427496 19:0.192014 20:0.384354 21:0.217746 22:-0.366491 23:-0.0632531 
+-1 1:0.167215 2:0.835209 3:0.167431 4:0.835209 5:-0.167215 6:-0.83521 7:-0.859579 8:0.149024 9:-0.823013 10:0.0966996 11:-0.401668 12:0.944595 13:-0.381536 14:0.955372 15:-0.19679 16:-0.90311 17:0.231706 18:0.162234 19:0.192583 20:-0.916235 21:0.193319 22:-0.12 23:-0.202858 
+-1 1:-0.275409 2:0.618012 3:-0.275275 4:0.618012 5:0.275409 6:-0.618012 7:-0.328745 8:1 9:-0.820519 10:0.991694 11:-0.398747 12:0.858996 13:-0.401208 14:0.613702 15:-0.206214 16:-0.813039 17:0.269345 18:-0.71684 19:0.167234 20:-0.551912 21:0.214211 22:0.0868445 23:-0.691359 
+-1 1:-0.209506 2:0.847363 3:-0.209572 4:0.824271 5:0.209506 6:-0.847363 7:-0.449595 8:-0.224368 9:-0.823901 10:-1 11:-0.407708 12:1 13:-0.376183 14:0.823611 15:-0.17924 16:0.0400426 17:0.296546 18:0.975716 19:0.25026 20:-0.103527 21:0.206033 22:-0.393136 23:-0.0632531 
+-1 1:-0.012066 2:0.577073 3:-0.0118834 4:0.577073 5:0.012066 6:-0.577073 7:-0.811658 8:1 9:-0.818731 10:0.710532 11:-0.397432 12:0.950979 13:-0.0907803 14:0.956142 15:0.100392 16:-0.986929 17:-0.352961 18:-1 19:-0.246803 20:-0.403321 21:0.212539 22:-0.21481 23:-0.0632531 
+-1 1:0.326064 2:0.254149 3:0.326309 4:0.254149 5:-0.326064 6:-0.254149 7:-0.568283 8:1 9:-0.80161 10:1 11:-0.390607 12:1 13:-0.291889 14:1 15:-0.180519 16:-0.99189 17:0.115624 18:-0.977232 19:0.104935 20:-0.199186 21:0.209227 22:0.385579 23:-0.0632531 
+-1 1:-0.193829 2:0.807777 3:-0.19368 4:0.807777 5:0.193829 6:-0.807777 7:-0.478345 8:0.768621 9:-0.819398 10:-1 11:-0.403507 12:0.81809 13:-0.408361 14:0.795341 15:-0.194895 16:-0.996252 17:0.0626062 18:-0.984568 19:-0.133564 20:0.951867 21:0.255737 22:-0.207421 23:-0.378022 
+-1 1:-0.0240427 2:0.0162941 3:-0.0238623 4:0.0162941 5:0.0240427 6:-0.0162942 7:-0.789696 8:1 9:-0.702393 10:1 11:-0.392816 12:1 13:-0.411915 14:1 15:-0.206728 16:-0.963723 17:-0.399376 18:-0.967799 19:-0.358363 20:0.327333 21:0.38334 22:0.50718 23:-0.0632531 
+-1 1:0.696306 2:0.58341 3:0.692496 4:0.535085 5:-0.696306 6:-0.583411 7:0.11066 8:0.0571429 9:-0.823902 10:0.723369 11:-0.399226 12:0.926226 13:-0.281108 14:0.885418 15:-0.189214 16:0.0577496 17:0.291174 18:-0.609756 19:0.191694 20:0.122807 21:0.217535 22:-0.636985 23:-0.0632531 
+-0.9480320012272011 1:-0.000557745 2:0.857681 3:-0.000372991 4:0.857681 5:0.000557668 6:-0.857681 7:-0.832762 8:0.735403 9:-0.794027 10:-0.53456 11:-0.412047 12:0.990961 13:-0.113838 14:0.790546 15:-0.130677 16:-0.329547 17:0.288198 18:-0.939431 19:0.189682 20:-1 21:0.2155 22:-0.303118 23:-0.681514 
+-0.3473251243236888 1:-0.98992 2:-1 3:-0.989918 4:-1 5:0.98992 6:1 7:0.981515 8:-1 9:-0.823908 10:-1 11:-0.402191 12:1 13:-0.411874 14:-1 15:-0.209367 16:-1 17:0.290276 18:-1 19:0.192488 20:-1 21:0.217526 22:-0.536444 23:-0.0632531 
+-0.9064384811467762 1:0.351976 2:0.760153 3:0.351916 4:0.741632 5:-0.351976 6:-0.760153 7:-0.520767 8:1 9:-0.818395 10:0.750875 11:-0.400518 12:0.995159 13:-0.390436 14:0.971864 15:-0.205374 16:-0.920107 17:0.236348 18:-0.967476 19:0.174579 20:-0.999799 21:-0.271949 22:-0.34571 23:-0.0632531 
+-1 1:0.218363 2:0.794326 3:0.218167 4:0.723709 5:-0.218363 6:-0.794326 7:-0.765784 8:0.549546 9:-0.802558 10:-0.200297 11:-0.40299 12:0.991949 13:-0.29318 14:0.960852 15:-0.184961 16:-0.991562 17:0.172543 18:-0.953716 19:0.152186 20:1 21:0.25274 22:-0.469927 23:-0.316876 
+-1 1:-0.225408 2:0.612319 3:-0.225264 4:0.612319 5:0.225408 6:-0.612319 7:-0.420436 8:1 9:-0.816309 10:-1 11:-0.403732 12:0.90824 13:-0.2106 14:-0.0286512 15:-0.209512 16:-0.845293 17:0.245158 18:-0.603243 19:0.145275 20:0.497328 21:0.22435 22:-0.0144846 23:-0.0632531 
+-1 1:-0.0398959 2:0.733317 3:-0.041024 4:0.714702 5:0.0398958 6:-0.733317 7:-0.760624 8:0.443038 9:-0.819607 10:-0.479365 11:-0.413021 12:0.99693 13:-0.292486 14:0.782504 15:-0.147659 16:-0.91201 17:0.159206 18:-0.91008 19:0.0718231 20:-0.913023 21:0.00437004 22:-0.382951 23:-0.200227 
+-1 1:-1 2:-0.867574 3:-1 4:-0.867574 5:1 6:0.867574 7:1 8:-1 9:-0.823908 10:-1 11:-0.402191 12:1 13:-0.41298 14:-1 15:-0.209367 16:0.98134 17:0.298093 18:1 19:0.198779 20:1 21:0.221968 22:-0.114762 23:-0.0632531 
+-1 1:-0.0223285 2:0.785115 3:-0.027151 4:0.705799 5:0.0223284 6:-0.785115 7:-0.792839 8:0.93758 9:-0.783243 10:0.166381 11:-0.399063 12:0.839782 13:-0.36917 14:0.756488 15:-0.164988 16:-0.898406 17:0.197269 18:-1 19:0.183237 20:-0.589237 21:0.191472 22:-0.471667 23:0.0141191 
+-1 1:0.150399 2:0.736701 3:0.149616 4:0.71301 5:-0.150399 6:-0.736701 7:-0.890415 8:0.968425 9:-0.665504 10:0.968647 11:-0.170424 12:0.942418 13:-0.363409 14:0.915455 15:-0.117065 16:-0.969606 17:-0.334322 18:-0.908973 19:0.0319943 20:-0.548387 21:0.206564 22:-0.674281 23:-0.0632531 
+-1 1:0.0954275 2:0.755655 3:0.09563 4:0.755655 5:-0.0954275 6:-0.755655 7:-0.991221 8:0.978941 9:-0.816184 10:0.293615 11:-0.397506 12:0.962041 13:-0.384628 14:0.994525 15:-0.207541 16:-0.86759 17:0.22537 18:-0.733511 19:0.158628 20:-0.765866 21:0.211447 22:-0.101168 23:-0.195762 
+-1 1:-0.128208 2:0.894647 3:-0.128913 4:0.829494 5:0.128208 6:-0.894647 7:-0.598679 8:0.769483 9:-0.818848 10:-0.972411 11:-0.402871 12:0.967387 13:-0.357719 14:0.847833 15:-0.196591 16:-0.991484 17:0.0285746 18:-0.997772 19:0.0383957 20:0.904275 21:0.218387 22:-0.378058 23:-0.236408 
+-1 1:-0.0363361 2:0.879589 3:-0.037044 4:0.830942 5:0.0363362 6:-0.87959 7:-0.767152 8:0.289119 9:-0.820824 10:-0.640816 11:-0.402698 12:0.947489 13:-0.39705 14:0.894577 15:-0.19262 16:-0.997303 17:-0.0284193 18:-0.755049 19:0.136666 20:-0.326687 21:0.211052 22:-0.259009 23:-0.0632531 
+-1 1:0.094692 2:0.582589 3:0.0948944 4:0.582589 5:-0.0946921 6:-0.582589 7:-0.99257 8:1 9:-0.809069 10:1 11:-0.371011 12:0.933014 13:-0.263154 14:0.777778 15:-0.123765 16:-0.967655 17:0.14207 18:-0.78 19:0.120036 20:-0.692308 21:0.179961 22:0.178131 23:-0.0632531 
+-1 1:0.129588 2:0.746904 3:0.129592 4:0.693641 5:-0.129588 6:-0.746904 7:-0.928578 8:0.944133 9:-0.809609 10:0.988536 11:-0.178479 12:0.988988 13:-0.0111928 14:0.927463 15:-0.105927 16:-0.936029 17:0.113488 18:-0.875022 19:0.0748007 20:-0.94452 21:0.212426 22:-0.689334 23:-0.0919843 
+-1 1:0.0635339 2:0.8531 3:0.0569969 4:0.807777 5:-0.0635339 6:-0.8531 7:-0.950292 8:0.790927 9:-0.642983 10:0.81263 11:-0.308299 12:-0.267973 13:-0.423345 14:0.264667 15:-0.20219 16:-0.604766 17:0.236302 18:-0.966775 19:0.0477112 20:-1 21:0.183016 22:-0.508933 23:-0.0632531 
+-1 1:-0.382994 2:0.814726 3:-0.38288 4:0.814726 5:0.382993 6:-0.814726 7:-0.131457 8:0.650417 9:-0.822263 10:0.881298 11:-0.401473 12:0.698895 13:-0.411846 14:0.717273 15:-0.206561 16:-0.920976 17:0.27478 18:-0.913992 19:0.18121 20:0.519407 21:0.21842 22:-0.424612 23:-0.0632531 
+-1 1:-0.170153 2:0.620137 3:-0.17 4:0.620137 5:0.170153 6:-0.620138 7:-0.521761 8:0.870006 9:-0.818018 10:0.484056 11:-0.399677 12:0.720288 13:-0.409255 14:-0.344813 15:-0.210546 16:0.212459 17:0.290953 18:-0.061839 19:0.192153 20:-0.515394 21:0.216216 22:0.029868 23:-0.79278 
+-1 1:-0.286641 2:0.708811 3:-0.286509 4:0.708811 5:0.286641 6:-0.708811 7:-0.308147 8:0.240422 9:-0.821538 10:0.73181 11:-0.391961 12:-0.929155 13:-0.416731 14:0.279665 15:-0.207323 16:-0.683158 17:0.268174 18:-0.995641 19:0.0734558 20:-0.818569 21:0.210497 22:-0.572549 23:0.175766 
+-1 1:0.503506 2:0.794525 3:0.503269 4:0.732686 5:-0.503506 6:-0.794526 7:-0.242893 8:0.985274 9:-0.822114 10:0.405848 11:-0.402081 12:-0.960827 13:-0.413275 14:0.953794 15:-0.206365 16:-0.90302 17:0.287831 18:-1 19:0.19185 20:0.107061 21:0.217602 22:-0.770369 23:-0.0632531 
+-1 1:-0.661394 2:0.780604 3:-0.661331 4:0.780604 5:0.661394 6:-0.780604 7:0.379069 8:0.723245 9:-0.823805 10:0.904463 11:-0.401785 12:1 13:-0.403157 14:0.963663 15:-0.20909 16:-0.979149 17:0.274565 18:-0.852209 19:0.176246 20:0.940363 21:0.218543 22:-0.0678971 23:0.570645 
+-1 1:-0.446707 2:0.537198 3:-0.446605 4:0.537198 5:0.446708 6:-0.537198 7:-0.0146194 8:0.916334 9:-0.823086 10:0.358706 11:-0.402064 12:0.986049 13:-0.406812 14:0.671463 15:-0.208537 16:-0.998896 17:0.261824 18:-0.954256 19:0.152662 20:0.604809 21:0.218125 22:0.128106 23:-0.746096 
+-1 1:0.0519501 2:0.758012 3:0.0521446 4:0.758012 5:-0.0519501 6:-0.758012 7:-0.92905 8:1 9:-0.822623 10:0.997103 11:-0.361349 12:0.0779024 13:-0.412796 14:0.614035 15:-0.207279 16:-0.952126 17:0.282809 18:-0.0932031 19:0.192258 20:-0.866836 21:0.214434 22:-0.230974 23:-0.0632531 
+-1 1:0.517986 2:0.804832 3:0.518267 4:0.804832 5:-0.517986 6:-0.804832 7:-0.21634 8:0.951089 9:-0.814541 10:0.942482 11:-0.396756 12:0.929303 13:-0.385458 14:0.953605 15:-0.20378 16:-0.9796 17:0.20036 18:-0.843682 19:0.169585 20:-0.920765 21:0.209399 22:-0.404306 23:-0.0632531 
+-0.3573081161357508 1:0.679311 2:0.483381 3:0.678243 4:0.432352 5:-0.679311 6:-0.483381 7:0.0794949 8:0.233645 9:-0.823889 10:0.931034 11:-0.402167 12:0.997575 13:-0.306732 14:0.627839 15:-0.208695 16:-0.99017 17:0.280732 18:-0.841754 19:0.190388 20:-0.923397 21:0.21337 22:-0.636754 23:-0.0632531 
+-1 1:0.474366 2:0.727024 3:0.474639 4:0.727024 5:-0.474366 6:-0.727024 7:-0.296329 8:0.974026 9:-0.805736 10:0.755506 11:-0.387084 12:0.960438 13:-0.337978 14:-0.791045 15:-0.209933 16:-0.992962 17:-0.00886453 18:-0.177905 19:0.183085 20:-0.965841 21:0.214085 22:-0.765011 23:-0.0632531 
+-1 1:0.61692 2:0.863956 3:0.617219 4:0.863956 5:-0.61692 6:-0.863956 7:-0.0349166 8:0.710145 9:-0.82381 10:1 11:-0.401015 12:0.952613 13:-0.406584 14:0.955437 15:-0.206636 16:-0.982253 17:0.150251 18:0.404378 19:0.196999 20:0.445954 21:0.217598 22:-0.437351 23:-0.0632531 
+-1 1:0.0368548 2:0.018128 3:0.0370464 4:0.018128 5:-0.0368549 6:-0.0181281 7:-0.901369 8:1 9:-0.820096 10:1 11:-0.393283 12:1 13:-0.366482 14:1 15:-0.164141 16:-0.65652 17:0.266107 18:-0.918696 19:0.156205 20:-0.827835 21:0.207235 22:0.508216 23:-0.0632531 
+-0.4116942610135136 1:0.279881 2:0.805048 3:0.279738 4:0.785946 5:-0.279881 6:-0.805048 7:-0.652974 8:0.825518 9:-0.767275 10:0.65194 11:-0.382155 12:0.384136 13:-0.401216 14:-0.494098 15:-0.212531 16:-0.887641 17:0.217726 18:-0.814091 19:0.152935 20:-0.15851 21:0.21384 22:-0.36499 23:-0.0632531 
+-1 1:0.0273445 2:0.835209 3:0.0275344 4:0.835209 5:-0.0273444 6:-0.83521 7:-0.883929 8:0.938154 9:-0.81812 10:0.775796 11:-0.393122 12:0.860386 13:-0.365621 14:0.829981 15:-0.154479 16:-0.998607 17:0.170531 18:-1 19:0.151494 20:-0.997003 21:0.135819 22:-0.126156 23:-0.0632531 
+-1 1:-0.277523 2:0.634966 3:-0.277389 4:0.634966 5:0.277522 6:-0.634966 7:-0.324868 8:0.888411 9:-0.799556 10:0.724761 11:-0.401218 12:0.9853 13:-0.400949 14:0.786581 15:-0.205439 16:-0.971709 17:0.275214 18:-0.94317 19:0.171021 20:-0.495763 21:0.216087 22:0.0870176 23:-0.131772 
+-1 1:0.31084 2:0.912021 3:0.310528 4:0.843611 5:-0.31084 6:-0.912021 7:-0.596201 8:0.882796 9:-0.823558 10:0.97698 11:-0.398053 12:0.911214 13:-0.394062 14:0.91792 15:-0.1975 16:-0.963424 17:0.287422 18:-1 19:0.171708 20:0.701014 21:0.217614 22:-0.494263 23:-0.0632531 
+-1 1:-0.919117 2:-0.929624 3:-0.919102 4:-0.929624 5:0.919117 6:0.929624 7:0.851678 8:-1 9:-0.823908 10:-1 11:-0.402191 12:1 13:-0.396738 14:-1 15:-0.209367 16:0.97561 17:0.355642 18:1 19:0.233301 20:1 21:0.233607 22:-0.0728064 23:-0.0632531 
+-1 1:0.213109 2:0.777414 3:0.209993 4:0.706569 5:-0.213109 6:-0.777415 7:-0.775419 8:0.756532 9:-0.820469 10:-0.0334003 11:-0.402435 12:0.967249 13:-0.143469 14:0.906262 15:-0.19797 16:-0.998807 17:0.0840736 18:-0.981802 19:0.15625 20:-0.990668 21:-0.0407439 22:-0.837851 23:-0.319777 
+-1 1:-0.146724 2:0.813633 3:-0.146933 4:0.752141 5:0.146724 6:-0.813633 7:-0.564725 8:-0.411255 9:-0.823954 10:-0.431116 11:-0.40222 12:0.995842 13:-0.234316 14:0.996485 15:-0.150104 16:-0.993632 17:0.288718 18:-0.854596 19:0.191739 20:0.719703 21:0.217784 22:-0.396565 23:-0.0632531 
+-1 1:-0.295783 2:0.522403 3:-0.295653 4:0.522403 5:0.295783 6:-0.522403 7:-0.291383 8:0.76378 9:-0.822076 10:0.567251 11:-0.397914 12:0.923311 13:-0.388461 14:0.380952 15:-0.209055 16:-0.920177 17:0.164339 18:-0.936306 19:0.0230552 20:-0.996535 21:0.0647395 22:-0.0234873 23:-0.0632531 
+-1 1:0.0584036 2:0.547167 3:0.0585992 4:0.547167 5:-0.0584036 6:-0.547167 7:-0.940885 8:0.948938 9:-0.758493 10:1 11:-0.40049 12:0.822275 13:-0.395917 14:0.327824 15:-0.207598 16:-0.956338 17:0.124544 18:-0.307847 19:0.189034 20:-0.125 21:0.217475 22:0.126217 23:-0.0632531 
+-1 1:-0.556487 2:0.781716 3:-0.556405 4:0.781716 5:0.556487 6:-0.781716 7:0.186693 8:0.429736 9:-0.823521 10:0.883682 11:-0.400342 12:0.544856 13:-0.410607 14:0.0704514 15:-0.209201 16:-0.982438 17:0.273594 18:-0.471961 19:0.186299 20:-0.204325 21:0.215713 22:-0.58228 23:-0.914899 
+-1 1:0.713066 2:0.861696 3:0.713187 4:0.836534 5:-0.713065 6:-0.861696 7:0.141394 8:0.493438 9:-0.823085 10:0.82376 11:-0.401542 12:0.31939 13:-0.412609 14:0.397795 15:-0.209211 16:-0.973828 17:0.288667 18:-1 19:0.191727 20:0.590842 21:0.218244 22:-0.712347 23:-0.0632531 
+-1 1:-0.0937216 2:0.641526 3:-0.0935541 4:0.641526 5:0.0937216 6:-0.641526 7:-0.66192 8:0.598899 9:-0.800762 10:-0.671085 11:-0.40505 12:0.716961 13:-0.310011 14:-0.0319012 15:-0.21029 16:-1 17:0.238332 18:-1 19:0.136438 20:-0.989249 21:0.167325 22:-0.666973 23:-0.0632531 
+-1 1:0.365165 2:0.359097 3:0.365417 4:0.359097 5:-0.365165 6:-0.359098 7:-0.496581 8:0.977762 9:-0.807266 10:1 11:-0.381422 12:0.742119 13:-0.410219 14:0.576283 15:-0.207372 16:-0.996755 17:0.255217 18:-0.971229 19:0.188201 20:0.277338 21:0.218902 22:0.266064 23:-0.0632531 
+-1 1:0.259165 2:0.749864 3:0.254559 4:0.722837 5:-0.259165 6:-0.749864 7:-0.690962 8:-0.307815 9:-0.833329 10:1 11:-0.401564 12:0.994051 13:-0.244029 14:1 15:-0.201723 16:-0.99228 17:-0.426441 18:-1 19:0.188669 20:-0.692308 21:0.212892 22:-0.674236 23:-0.0632531 
+-1 1:0.641037 2:0.376141 3:0.637402 4:0.319133 5:-0.641037 6:-0.376141 7:0.00930961 8:1 9:-0.823793 10:0.98895 11:-0.401883 12:0.970389 13:-0.354825 14:0.970241 15:-0.202621 16:-0.840984 17:0.2496 18:-0.448276 19:0.191767 20:-0.838235 21:0.216638 22:-0.636696 23:-0.0632531 
+-1 1:-0.39915 2:0.329767 3:-0.399039 4:0.329767 5:0.39915 6:-0.329766 7:-0.10183 8:0.97283 9:-0.822909 10:1 11:-0.400963 12:0.99663 13:-0.381523 14:0.991763 15:-0.20029 16:0.934361 17:0.347993 18:-0.721017 19:0.174614 20:-0.99105 21:0.216918 22:0.299444 23:-0.0632531 
+-1 1:0.00633151 2:0.879589 3:0.00651754 4:0.879589 5:-0.00633145 6:-0.87959 7:-0.845396 8:0.809524 9:-0.810094 10:0.453061 11:-0.396927 12:0.982011 13:-0.349961 14:0.560976 15:-0.205276 16:-0.991308 17:-0.180848 18:-0.626768 19:0.0924479 20:-0.739316 21:0.180458 22:-0.188782 23:-0.063689 
+-1 1:0.281704 2:0.861696 3:0.275865 4:0.735884 5:-0.281705 6:-0.861696 7:-0.64963 8:-0.966548 9:-0.883004 10:-0.277031 11:-0.409766 12:-0.635979 13:-0.473714 14:0.591998 15:-0.142561 16:0.233749 17:0.293906 18:-0.0959157 19:0.191795 20:-0.954895 21:-1 22:-1 23:-0.0632531 
+-0.1511414311021485 1:-0.240321 2:0.858627 3:-0.240181 4:0.858627 5:0.240321 6:-0.858628 7:-0.393087 8:0.784334 9:-0.820757 10:0.325235 11:-0.401721 12:0.93089 13:-0.376623 14:0.965426 15:-0.180344 16:-0.948306 17:0.246906 18:-0.978884 19:0.132794 20:-0.99653 21:-0.206467 22:-0.0830693 23:-0.370444 
+-0.5004311581539996 1:-0.0605389 2:0.817429 3:-0.0622114 4:0.716081 5:0.0605389 6:-0.817429 7:-0.72277 8:-0.0442478 9:-0.823917 10:-0.0940555 11:-0.404756 12:-0.744706 13:-0.615789 14:-0.915004 15:-0.288785 16:-0.813664 17:0.135426 18:-0.812378 19:0.150974 20:0.994245 21:0.223209 22:-0.937407 23:-0.0632531 
+-1 1:-0.533776 2:0.653092 3:-0.53369 4:0.653092 5:0.533776 6:-0.653092 7:0.145045 8:0.974003 9:-0.806577 10:-0.330966 11:-0.402547 12:0.986313 13:-0.290658 14:0.816032 15:-0.20553 16:-0.951008 17:0.245202 18:-0.523339 19:0.191183 20:0.367724 21:0.220256 22:-0.263425 23:-0.837749 
+-0.6859808826851237 1:-0.232813 2:0.887697 3:-0.233846 4:0.83905 5:0.232813 6:-0.887697 7:-0.406856 8:0.796904 9:-0.823236 10:-0.87761 11:-0.404877 12:0.99683 13:-0.357176 14:-0.483146 15:-0.241495 16:-0.959888 17:0.211122 18:-0.985625 19:0.0853136 20:-0.99263 21:0.106287 22:-0.519176 23:-0.0632531 
+-1 1:0.306641 2:0.664862 3:0.306882 4:0.664862 5:-0.306641 6:-0.664862 7:-0.603902 8:1 9:-0.823309 10:0.0503376 11:-0.402124 12:0.856849 13:-0.39858 14:0.898892 15:-0.196458 16:-1 17:0.278287 18:-1 19:0.14965 20:-0.468098 21:0.213663 22:-0.569228 23:-0.0632531 
+-1 1:-0.160808 2:0.775201 3:-0.161609 4:0.752397 5:0.160808 6:-0.775201 7:-0.538898 8:0.00342238 9:-0.823901 10:0.0702281 11:-0.397649 12:0.887742 13:-0.332376 14:0.118779 15:-0.194083 16:-0.763914 17:0.167297 18:-0.939527 19:0.0694662 20:-0.991067 21:0.0535393 22:-0.415857 23:-0.0632531 
+-1 1:0.106071 2:0.735884 3:0.101146 4:0.672979 5:-0.106071 6:-0.735884 7:-0.971703 8:-0.0763105 9:-0.824035 10:0.421857 11:-0.380452 12:1 13:-0.207691 14:0.959368 15:-0.0753366 16:-0.976237 17:0.0892575 18:-0.94274 19:0.0512007 20:-0.977612 21:0.127895 22:-0.2819 23:-0.0632531 
+-1 1:0.21386 2:0.693947 3:0.214084 4:0.693947 5:-0.21386 6:-0.693947 7:-0.774042 8:0.515973 9:-0.821507 10:0.158231 11:-0.401364 12:0.918312 13:-0.376853 14:0.562752 15:-0.199306 16:-0.955794 17:0.0404244 18:-0.941021 19:0.0918136 20:-0.659956 21:0.21699 22:-0.868363 23:-0.0632531 
+-1 1:0.0651686 2:0.812671 3:0.0653654 4:0.812671 5:-0.0651686 6:-0.812671 7:-0.95329 8:0.985904 9:-0.698878 10:0.935025 11:-0.343781 12:0.873191 13:-0.227068 14:0.0992506 15:-0.202353 16:-0.964597 17:0.21277 18:-0.12834 19:0.191035 20:-0.979699 21:0.187554 22:-0.0858848 23:-0.0632531 
+-1 1:0.269199 2:0.841404 3:0.269106 4:0.753133 5:-0.269199 6:-0.841404 7:-0.672562 8:0.974028 9:-0.820404 10:0.784946 11:-0.40114 12:0.992415 13:-0.384281 14:-0.484752 15:-0.209542 16:-0.991495 17:0.27579 18:-0.999537 19:0.17478 20:0.581229 21:0.217803 22:-0.789245 23:-0.0632531 
+-1 1:-0.323326 2:0.897988 3:-0.324246 4:0.827824 5:0.323326 6:-0.897988 7:-0.240875 8:-0.152944 9:-0.823951 10:0.912276 11:-0.401247 12:0.954302 13:-0.409486 14:0.350817 15:-0.209338 16:-0.9624 17:0.288568 18:-0.822504 19:0.192045 20:0.0529275 21:0.217534 22:-0.533007 23:-0.0632531 
+-1 1:0.241414 2:0.855494 3:0.240733 4:0.752718 5:-0.241414 6:-0.855494 7:-0.723514 8:-1 9:-0.823908 10:-0.9207 11:-0.402694 12:0.850181 13:-0.393426 14:0.981003 15:-0.202785 16:-0.897406 17:0.267298 18:0.966648 19:0.210524 20:-1 21:0.217526 22:-0.601006 23:-0.0632531 
+-1 1:0.377947 2:0.371222 3:0.378202 4:0.371222 5:-0.377947 6:-0.371222 7:-0.473141 8:1 9:-0.809623 10:1 11:-0.395751 12:0.983827 13:-0.288896 14:1 15:-0.180748 16:-0.984733 17:0.219705 18:-0.333333 19:0.190021 20:-0.501831 21:0.193799 22:0.294949 23:-0.0632531 
+-0.8410738584030928 1:0.468711 2:0.375066 3:0.468982 4:0.375066 5:-0.468711 6:-0.375066 7:-0.3067 8:1 9:-0.821851 10:-0.050505 11:-0.402225 12:-0.109299 13:-0.41424 14:0.459854 15:-0.208987 16:-0.719298 17:0.289546 18:-0.868647 19:0.172172 20:0.522843 21:0.218584 22:0.0494153 23:-0.0632531 
+-1 1:-0.438328 2:0.652703 3:-0.438225 4:0.652703 5:0.438328 6:-0.652703 7:-0.029985 8:1 9:-0.823513 10:-0.372634 11:-0.402443 12:0.993308 13:-0.360648 14:0.482745 15:-0.209084 16:-0.60106 17:0.289627 18:-0.140836 19:0.19217 20:-0.461578 21:0.216949 22:-0.309265 23:-0.920594 
+-1 1:-0.297602 2:0.912021 3:-0.297971 4:0.826173 5:0.297602 6:-0.912021 7:-0.288048 8:0.987341 9:-0.583361 10:0.963 11:-0.361736 12:0.770474 13:-0.388493 14:0.68266 15:-0.198635 16:-0.996345 17:0.168891 18:-0.634396 19:0.167586 20:-0.969169 21:0.205248 22:-0.427928 23:-0.706329 
+-1 1:-0.0275995 2:0.312763 3:-0.0274198 4:0.312763 5:0.0275995 6:-0.312763 7:-0.783173 8:1 9:-0.818125 10:0.879781 11:-0.382609 12:0.998868 13:-0.373507 14:0.550562 15:-0.206737 16:-0.941832 17:0.235299 18:-0.788144 19:0.12637 20:0.625 21:0.2189 22:-0.296457 23:-0.0632531 
+-1 1:0.0728748 2:0.688705 3:0.073073 4:0.688705 5:-0.0728748 6:-0.688705 7:-0.967422 8:0.972503 9:-0.811783 10:-0.823529 11:-0.402425 12:-0.255474 13:-0.413729 14:0.808763 15:-0.19968 16:-0.992517 17:0.279107 18:-0.720613 19:0.190912 20:-0.249428 21:0.21707 22:-0.0363074 23:0.455066 
+-1 1:-0.0641361 2:0.781048 3:-0.0639631 4:0.781048 5:0.0641361 6:-0.781048 7:-0.716173 8:-0.926594 9:-0.827831 10:0.0238465 11:-0.40202 12:0.845508 13:-0.288778 14:0.611825 15:-0.172847 16:-0.47166 17:0.23767 18:-0.538906 19:0.173928 20:-1 21:0.20239 22:-0.0137481 23:-0.0632531 
+-1 1:0.367861 2:0.875273 3:0.368114 4:0.875273 5:-0.367861 6:-0.875273 7:-0.491637 8:0.517816 9:-0.822899 10:-0.885977 11:-0.408103 12:0.999348 13:-0.39164 14:0.815635 15:-0.20753 16:-1 17:0.219743 18:-0.574671 19:0.190677 20:-0.411333 21:0.217329 22:-0.597081 23:-0.0632531 
+-1 1:-0.0848077 2:0.718679 3:-0.0846386 4:0.718679 5:0.0848076 6:-0.718679 7:-0.678266 8:1 9:0.729065 10:0.819417 11:-0.401671 12:0.844973 13:-0.41233 14:0.959073 15:-0.200754 16:-1 17:0.290173 18:-1 19:0.192235 20:1 21:0.220755 22:-0.70531 23:-0.0632531 
+-1 1:0.452558 2:0.884995 3:0.451583 4:0.803916 5:-0.452558 6:-0.884995 7:-0.336321 8:0.247717 9:-0.822655 10:0.373297 11:-0.400187 12:0.664069 13:-0.40934 14:0.449313 15:-0.208031 16:-1 17:0.270082 18:-0.271328 19:0.188877 20:0.447938 21:0.221133 22:-0.696443 23:-0.0632531 
+-0.9891994540451646 1:0.878711 2:0.826799 3:0.878053 4:0.746904 5:-0.878711 6:-0.8268 7:0.445152 8:0.600688 9:-0.823813 10:0.0527023 11:-0.40219 12:0.80072 13:-0.412321 14:0.875692 15:-0.209252 16:-0.295667 17:0.286642 18:-0.892248 19:0.190151 20:-0.825476 21:0.217112 22:-0.666952 23:-0.0632531 
+-1 1:-0.131505 2:0.288937 3:-0.131345 4:0.288937 5:0.131505 6:-0.288936 7:-0.592632 8:1 9:-0.79884 10:0.992843 11:-0.34782 12:0.996525 13:-0.362013 14:0.955728 15:-0.201709 16:-1 17:0.203913 18:-0.720892 19:0.183667 20:-0.145455 21:0.217163 22:0.265733 23:-0.508746 
+-1 1:0.187559 2:0.886191 3:0.184302 4:0.805471 5:-0.187559 6:-0.886191 7:-0.822272 8:-0.234824 9:-0.8318 10:-0.0192313 11:-0.402769 12:0.0850332 13:-0.411078 14:0.28921 15:-0.203213 16:0.553724 17:0.385322 18:0.00305998 19:0.192704 20:-0.996758 21:-0.632059 22:-0.138765 23:-0.0632531 
+-1 1:0.173072 2:0.777601 3:0.173288 4:0.777601 5:-0.173072 6:-0.777601 7:-0.848839 8:0.826087 9:-0.813724 10:1 11:-0.357769 12:0.888502 13:-0.295086 14:1 15:-0.153602 16:-1 17:0.0324854 18:-0.716129 19:0.0993861 20:-0.45098 21:0.20669 22:0.177911 23:-0.0632531 
+-1 1:0.176732 2:0.796384 3:0.176657 4:0.776447 5:-0.176732 6:-0.796384 7:-0.842127 8:0.977011 9:-0.657994 10:0.981818 11:0.0471881 12:0.613188 13:-0.321404 14:0.2688 15:-0.191853 16:-1 17:0.0929446 18:-0.998646 19:0.0602873 20:-0.704811 21:0.159738 22:-0.350252 23:-0.0632531 
+-1 1:0.58645 2:0.82128 3:0.586493 4:0.783471 5:-0.58645 6:-0.82128 7:-0.0907926 8:1 9:-0.808325 10:0.944571 11:-0.371069 12:0.684063 13:-0.396912 14:0.885453 15:-0.193172 16:-0.718137 17:0.25961 18:-0.158898 19:0.190456 20:0.339703 21:0.219271 22:-0.205989 23:-0.0632531 
+-1 1:0.62724 2:0.791053 3:0.626737 4:0.752031 5:-0.62724 6:-0.791053 7:-0.0159923 8:0.40898 9:-0.823603 10:0.835046 11:-0.382693 12:0.904791 13:-0.40702 14:0.836845 15:-0.207687 16:-0.791806 17:0.283239 18:-0.132346 19:0.192312 20:-0.818653 21:0.216848 22:-0.419349 23:-0.0632531 
+-1 1:-0.434857 2:0.804832 3:-0.434753 4:0.804832 5:0.434857 6:-0.804832 7:-0.0363498 8:0.885103 9:-0.82166 10:0.531992 11:-0.401279 12:0.910059 13:-0.400526 14:0.909134 15:-0.201394 16:-0.592385 17:0.278836 18:-0.627505 19:0.185883 20:-0.638811 21:0.214886 22:-0.358917 23:-0.776836 
+-1 1:0.108886 2:0.946226 3:0.108605 4:0.860713 5:-0.108886 6:-0.946226 7:-0.966541 8:-0.783151 9:-0.830185 10:-0.638918 11:-0.403942 12:0.73989 13:-0.383866 14:0.717483 15:-0.192053 16:-0.995379 17:0.178553 18:-0.99175 19:0.142419 20:0.7409 21:0.223893 22:-0.709939 23:-0.0632531 
+-0.9409472333492426 1:0.232942 2:0.813206 3:0.232788 4:0.794203 5:-0.232942 6:-0.813206 7:-0.739049 8:0.946444 9:-0.822963 10:1 11:-0.340441 12:0.87505 13:-0.377963 14:0.674252 15:-0.208335 16:-0.965814 17:0.0602979 18:-0.993555 19:0.117857 20:-0.817141 21:0.216226 22:-0.178362 23:-0.0632531 
+-1 1:-0.266014 2:0.802172 3:-0.266261 4:0.684477 5:0.266014 6:-0.802172 7:-0.345973 8:0.339928 9:-0.770165 10:-0.905991 11:-0.414525 12:0.433412 13:-0.341727 14:-0.76482 15:-0.217774 16:-0.26747 17:0.215641 18:-0.996655 19:0.177917 20:0.496289 21:0.297168 22:-0.93215 23:-0.0632531 
+-1 1:-0.297395 2:0.822057 3:-0.298088 4:0.797067 5:0.297395 6:-0.822057 7:-0.288426 8:-0.703012 9:-0.877516 10:-0.623832 11:-0.435427 12:0.716817 13:-0.0661462 14:0.703454 15:-0.110821 16:0.708766 17:0.343498 18:-0.860971 19:0.150806 20:-0.999175 21:-0.313029 22:-0.511407 23:-0.0632531 
+-1 1:-0.121629 2:0.824456 3:-0.121467 4:0.824456 5:0.12163 6:-0.824456 7:-0.610743 8:0.194964 9:-0.821282 10:0.987906 11:-0.338792 12:0.925433 13:-0.10628 14:0.642163 15:-0.153755 16:-0.895073 17:0.142365 18:0.00328768 19:0.192496 20:-0.819913 21:-0.0492687 22:-0.6288 23:-0.375604 
+-0.7962775312281117 1:0.261626 2:0.857417 3:0.261211 4:0.832597 5:-0.261626 6:-0.857417 7:-0.68645 8:-0.722041 9:-0.828794 10:0.499267 11:-0.399149 12:0.919395 13:-0.409012 14:0.981404 15:0.0671003 16:-0.344997 17:-0.247673 18:-0.524203 19:-0.275102 20:-1 21:0.0559075 22:-0.426953 23:-0.0632531 
+-1 1:-0.176905 2:0.105501 3:-0.176752 4:0.105501 5:0.176904 6:-0.105501 7:-0.50938 8:1 9:-0.767196 10:1 11:-0.371838 12:1 13:-0.290631 14:1 15:-0.166767 16:-0.60237 17:0.236005 18:-0.578695 19:0.156663 20:-0.904036 21:0.097972 22:0.509861 23:-0.0632531 
+-1 1:-0.977259 2:0.640378 3:-0.977255 4:0.640378 5:0.977259 6:-0.640378 7:0.958298 8:0.993924 9:-0.823038 10:1 11:-0.402052 12:0.981684 13:-0.411577 14:0.980987 15:-0.209289 16:-0.244231 17:0.290266 18:-1 19:0.191084 20:0.999959 21:1 22:-0.471949 23:-0.0632531 
+-1 1:-0.0279924 2:0.655637 3:-0.0285908 4:0.635915 5:0.0279923 6:-0.655637 7:-0.782453 8:0.702624 9:-0.821827 10:0.709459 11:-0.400074 12:0.993758 13:-0.370443 14:0.69662 15:-0.205608 16:-0.978292 17:-0.241537 18:-0.976013 19:0.0352147 20:-0.746725 21:0.213632 22:-0.359522 23:-0.0632531 
+-1 1:-0.000776896 2:0.34447 3:-0.000592183 4:0.34447 5:0.000777016 6:-0.34447 7:-0.83236 8:0.973593 9:-0.823447 10:1 11:-0.394724 12:0.997378 13:-0.371369 14:1 15:-0.17595 16:-0.691095 17:0.27506 18:0.611193 19:0.193724 20:-0.803678 21:0.199201 22:0.314628 23:-0.592274 
+-1 1:0.125949 2:0.718192 3:0.12303 4:0.689688 5:-0.125949 6:-0.718192 7:-0.935252 8:-0.110605 9:-0.824609 10:0.498955 11:-0.396616 12:-0.482728 13:-0.443084 14:0.933535 15:-0.185436 16:-0.951793 17:0.174703 18:-0.903254 19:0.151946 20:-0.62203 21:0.199187 22:-0.552538 23:-0.0632531 
+-1 1:0.0700696 2:0.504742 3:0.0702675 4:0.504742 5:-0.0700697 6:-0.504742 7:-0.962278 8:1 9:-0.790355 10:1 11:-0.392373 12:0.828283 13:-0.400422 14:1 15:-0.185462 16:-0.982449 17:0.0131345 18:-0.989503 19:-0.0206808 20:-0.552782 21:0.196429 22:0.126078 23:-0.0632531 
+-1 1:0.0852691 2:0.616948 3:0.0854697 4:0.616948 5:-0.0852692 6:-0.616948 7:-0.99015 8:0.676902 9:-0.823441 10:0.744851 11:-0.396701 12:0.991963 13:-0.37864 14:0.905337 15:-0.198768 16:-0.996765 17:0.161128 18:-0.985585 19:0.149218 20:-0.924563 21:0.209675 22:-0.101227 23:-0.0632531 
+-1 1:0.446439 2:0.504742 3:0.446706 4:0.504742 5:-0.446439 6:-0.504742 7:-0.347542 8:0.994143 9:-0.819983 10:1 11:-0.401827 12:0.995507 13:-0.192002 14:0.989292 15:-0.10923 16:-0.938863 17:0.230642 18:-0.975636 19:0.118727 20:-0.207251 21:0.214127 22:0.189972 23:-0.0632531 
+-1 1:0.661109 2:0.875535 3:0.660975 4:0.83905 5:-0.661109 6:-0.875536 7:0.046117 8:-0.139148 9:-0.824391 10:-0.0240497 11:-0.402233 12:0.969953 13:-0.362246 14:0.805675 15:-0.206018 16:-0.950235 17:0.202486 18:-0.657143 19:0.191031 20:0.738729 21:0.22094 22:-0.348807 23:-0.0632531 
+-1 1:0.00808634 2:0.837992 3:0.00671262 4:0.785115 5:-0.00808628 6:-0.837993 7:-0.848613 8:0.729338 9:-0.814357 10:0.307672 11:-0.401016 12:0.918671 13:-0.351137 14:0.080597 15:-0.209343 16:-0.827623 17:0.261981 18:-0.820619 19:0.148952 20:-0.77224 21:0.209775 22:-0.278504 23:-0.0632531 
+-1 1:-0.116108 2:0.664517 3:-0.115944 4:0.664517 5:0.116108 6:-0.664518 7:-0.620868 8:0.905396 9:-0.818193 10:1 11:-0.402109 12:0.770438 13:-0.40849 14:-0.176046 15:-0.209406 16:-0.996834 17:0.0609481 18:-0.982965 19:-0.0160832 20:-0.99919 21:0.0859699 22:-0.207543 23:0.547938 
+-1 1:0.281982 2:0.598315 3:0.282219 4:0.598315 5:-0.281982 6:-0.598314 7:-0.64912 8:1 9:-0.823004 10:-1 11:-0.40293 12:0.602564 13:-0.383852 14:-0.609375 15:-0.22389 16:-0.582609 17:0.239917 18:-0.940828 19:0.147513 20:-0.307692 21:0.216255 22:-0.613927 23:-0.0632531 
+-1 1:0.18694 2:0.647717 3:0.180025 4:0.618988 5:-0.18694 6:-0.647717 7:-0.823407 8:1 9:-0.77409 10:-0.411043 11:-0.431933 12:0.137534 13:-0.367124 14:0.509269 15:-0.117898 16:0.0853549 17:0.303134 18:-0.818499 19:0.0529374 20:-0.960159 21:0.177248 22:-0.753842 23:-0.0632531 
+-1 1:-0.023637 2:0.787574 3:-0.0234566 4:0.787574 5:0.023637 6:-0.787575 7:-0.79044 8:0.512479 9:-0.80905 10:0.450269 11:-0.396266 12:0.965669 13:-0.241235 14:0.374418 15:-0.197423 16:-0.999046 17:-0.298634 18:-0.964298 19:-0.0910548 20:-0.994376 21:-0.239514 22:-0.667281 23:-0.0632531 
+-1 1:0.221918 2:0.919467 3:0.22068 4:0.882237 5:-0.221918 6:-0.919467 7:-0.759266 8:0.993083 9:-0.780626 10:0.96873 11:-0.318146 12:0.933471 13:-0.407664 14:0.851567 15:-0.190869 16:-0.984477 17:-0.0652056 18:-1 19:0.159315 20:-0.64597 21:0.199063 22:-0.371971 23:-0.0632531 
+-1 1:-0.040382 2:0.906882 3:-0.0428088 4:0.881188 5:0.040382 6:-0.906882 7:-0.759733 8:1 9:-0.79969 10:0.557428 11:-0.37568 12:0.879031 13:-0.360098 14:-0.201698 15:-0.210911 16:-0.971737 17:0.0864258 18:-1 19:-0.152495 20:-0.55247 21:0.177363 22:-0.232054 23:-0.0632531 
+-1 1:-0.0207953 2:0.870323 3:-0.0213622 4:0.828626 5:0.0207954 6:-0.870323 7:-0.795651 8:-0.132254 9:-0.824032 10:0.709544 11:-0.401474 12:0.752628 13:-0.407249 14:-0.963434 15:-0.21033 16:-0.0318351 17:0.290262 18:1 19:0.193116 20:-1 21:0.207366 22:-0.388177 23:-0.665424 
+-0.1597538653661974 1:0.00473722 2:0.89531 3:0.00426237 4:0.811756 5:-0.00473724 6:-0.89531 7:-0.842472 8:-0.721188 9:-0.849335 10:-0.976985 11:-0.42231 12:0.677469 13:-0.351941 14:0.675752 15:-0.163691 16:-1 17:0.245742 18:-0.312889 19:0.186495 20:-0.924925 21:0.0795114 22:-0.69623 23:-0.0632531 
+-0.08658350832329205 1:-0.0657449 2:0.8298 3:-0.0655722 4:0.8298 5:0.0657449 6:-0.8298 7:-0.713223 8:-1 9:-0.826666 10:0.416818 11:-0.396244 12:0.970449 13:-0.339628 14:0.963774 15:-0.164238 16:-0.979115 17:0.262996 18:-0.99907 19:0.0572506 20:-0.813874 21:0.203497 22:-0.100046 23:-0.0632531 
+-1 1:0.0838574 2:0.807777 3:0.0840577 4:0.807777 5:-0.0838573 6:-0.807777 7:-0.987561 8:-0.508542 9:-0.824348 10:1 11:-0.401911 12:0.0635285 13:-0.41039 14:0.299216 15:-0.194277 16:-1 17:0.151461 18:-0.941342 19:0.177537 20:0.143331 21:0.218279 22:-0.965422 23:-0.0632531 
+-1 1:0.139116 2:0.960209 3:0.139132 4:0.925789 5:-0.139116 6:-0.960209 7:-0.911107 8:1 9:-0.821201 10:0.776334 11:-0.39801 12:0.994801 13:-0.4056 14:-0.818117 15:-0.216809 16:-1 17:0.290544 18:-0.187024 19:0.186951 20:1 21:0.217714 22:-0.908665 23:-0.709439 
+-1 1:0.199169 2:0.0985754 3:0.19939 4:0.0985754 5:-0.199169 6:-0.0985756 7:-0.800982 8:1 9:-0.786225 10:1 11:-0.374231 12:1 13:-0.40467 14:1 15:-0.203051 16:0.407612 17:1 18:0.397065 19:1 20:-0.998647 21:-0.438171 22:0.485602 23:-0.0632531 
+-1 1:-0.212196 2:0.615364 3:-0.21205 4:0.615364 5:0.212196 6:-0.615364 7:-0.444663 8:0.108143 9:-0.822724 10:-0.878641 11:-0.420135 12:0.844799 13:-0.383102 14:0.956768 15:-0.189253 16:-0.969814 17:0.194752 18:-0.992026 19:0.118083 20:-0.466941 21:0.202933 22:0.0190595 23:-0.0632531 
+-1 1:0.0669913 2:0.600508 3:0.0652963 4:0.579172 5:-0.0669914 6:-0.600508 7:-0.956633 8:0.82415 9:-0.816564 10:1 11:-0.399722 12:0.936592 13:-0.333198 14:-1 15:-0.209474 16:-0.470947 17:0.266949 18:-0.952452 19:0.17153 20:-0.947903 21:0.156711 22:-0.439143 23:-0.0632531 
+-1 1:0.28422 2:0.818342 3:0.284345 4:0.79369 5:-0.284219 6:-0.818342 7:-0.645018 8:0.950029 9:-0.822843 10:0.900699 11:-0.400095 12:0.994106 13:-0.350851 14:0.984813 15:-0.19887 16:-0.979002 17:0.139485 18:-0.97954 19:0.120082 20:-0.930789 21:0.165772 22:-0.538836 23:-0.0632531 
+-1 1:0.130179 2:0.775201 3:0.128901 4:0.718192 5:-0.130179 6:-0.775201 7:-0.927494 8:0.28365 9:-0.821738 10:-0.652963 11:-0.425953 12:-0.965808 13:-0.45138 14:-0.924065 15:-0.236101 16:-0.031014 17:0.270134 18:-0.350278 19:0.0335525 20:-0.0541865 21:0.213531 22:-0.732648 23:-0.0632531 
+-0.808521001246749 1:0.1077 2:0.355669 3:0.107904 4:0.355669 5:-0.1077 6:-0.355669 7:-0.968717 8:1 9:-0.823301 10:1 11:-0.392813 12:1 13:-0.183414 14:1 15:-0.110712 16:-0.905227 17:0.190292 18:-0.989051 19:0.0248452 20:-0.968899 21:0.100771 22:0.357464 23:-0.237935 
+-1 1:0.0586908 2:0.959099 3:0.0588015 4:0.93556 5:-0.0586909 6:-0.959099 7:-0.941411 8:0.759281 9:-0.746375 10:0.792796 11:-0.344139 12:0.864541 13:-0.317874 14:0.668812 15:-0.176746 16:-0.997848 17:-0.138442 18:-0.982069 19:-0.0135535 20:-0.969809 21:-0.100417 22:-0.453775 23:-0.0632531 
+-1 1:-0.204918 2:0.615577 3:-0.204771 4:0.615577 5:0.204918 6:-0.615577 7:-0.45801 8:0.882485 9:-0.82115 10:0.944516 11:-0.398654 12:0.949687 13:-0.403939 14:1 15:-0.207032 16:-1 17:0.167804 18:-0.980302 19:0.15708 20:-0.980018 21:0.204044 22:-0.23681 23:-0.367254 
+-1 1:-0.12548 2:0.66755 3:-0.125318 4:0.66755 5:0.12548 6:-0.66755 7:-0.603682 8:0.816161 9:-0.819215 10:1 11:-0.399237 12:0.846097 13:-0.393288 14:0.704457 15:-0.196879 16:-0.69609 17:0.274961 18:0.559999 19:0.196981 20:-0.77267 21:0.19395 22:0.0248107 23:-0.270327 
+-1 1:0.21919 2:0.865353 3:0.217149 4:0.801716 5:-0.21919 6:-0.865354 7:-0.764267 8:0.731461 9:-0.773152 10:0.939203 11:-0.367811 12:0.978364 13:-0.0897448 14:0.988306 15:-0.0599378 16:-0.987179 17:-0.175706 18:-0.99627 19:-0.286132 20:-0.999573 21:-0.0379434 22:-0.291702 23:-0.0632531 
+-1 1:-0.113137 2:0.749864 3:-0.112973 4:0.749864 5:0.113137 6:-0.749864 7:-0.626317 8:-1 9:-0.825229 10:0.381174 11:-0.401328 12:0.922936 13:-0.374256 14:-0.0395434 15:-0.20953 16:-0.995443 17:0.259146 18:-0.182804 19:0.191322 20:-0.0085764 21:0.217497 22:-0.583994 23:-0.0632531 
diff --git a/CCSVM2Phos.range b/CCSVM2Phos.range
new file mode 100644
index 0000000..c19b090
--- /dev/null
+++ b/CCSVM2Phos.range
@@ -0,0 +1,25 @@
+x
+-1 1
+1 0.00089166 0.91614902
+2 0.07594935999999999 0.6241135
+3 0.00089166 0.91597986
+4 0.07594935999999999 0.6241135
+5 0.08385096 0.99910837
+6 0.37588653 0.92405063
+7 0 0.99821669
+8 0 1
+9 -0.09029388000000001 0.93523729
+10 0 1
+11 -0.26260048 0.6159429
+12 0 1
+13 -0.34809536 0.83837408
+14 0 1
+15 -0.3935675 0.6020075700000001
+16 0 1
+17 -0.4748702 0.26105013
+18 0 1
+19 -0.19537343 0.13230015
+20 0 1
+21 -0.35513827 0.22823867
+22 0.80000597 3.5263319
+23 0.16302781 0.88247979
diff --git a/CMemLeak.c b/CMemLeak.c
new file mode 100644
index 0000000..b7e33cf
--- /dev/null
+++ b/CMemLeak.c
@@ -0,0 +1,421 @@
+// CMemLeak.c and CMemLeak.h are taken from the public domain.  If the
+// build flag DEBUG_MEMORY_LEAKS is set, then malloc is redefined,
+// to assist in tracking down memory leaks.  Using Purify or Valgrind
+// is better, though.
+
+//#include "CMemLeak.h"
+#undef malloc
+#undef realloc
+#undef free
+#undef strdup
+#undef calloc
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+//#include <malloc.h>
+
+// Guards for checking illegal memory writes 
+static const char xwbProtect[] = "DeAd";
+static const unsigned int xwbProtSize = sizeof(xwbProtect);
+
+// Filename of report file 
+static const char xwbReportFilename[] = "MemLeak.txt";
+
+// Uninitialized memory - pick a value that will cause the most problems 
+static const unsigned char xwbUninit = 0x55;
+
+// Clean memory - pick a value which will cause the most problems 
+static const unsigned char xwbFreed = 0xAA;
+
+static const char xwbIMW[] = "IMW";    // Illegal memory write 
+static const char xwbMLK[] = "MLK";     // Memory leak 
+static const char xwbFNH[] = "FNH";     // Free Non Heap memory 
+static const char xwbFMW[] = "FMW";     // Free Memory Write 
+
+// Node for storing the allocation details 
+struct XWBNode
+{
+    struct XWBNode* mPrev;
+    struct XWBNode* mNext;
+    void* mPtr;
+    unsigned int mSize;
+    const char* mFile;
+    unsigned int mLine;
+    const char* mName;
+};
+
+struct XWBList
+{
+    // Doubly linked list 
+    struct XWBNode* mHead;
+    struct XWBNode* mTail;
+
+    FILE* mReport;
+    unsigned long mAllocUsed;           // Max in the life of the program 
+    unsigned long mAllocTotal;          // Number of allocations 
+    unsigned long mAllocCurrent;        // Current allocation 
+
+    unsigned int mFree;                 // 1 if memory to be freed 
+    unsigned int mAllocMax;             // Not yet - preallocate nodes 
+    struct XWBNode* mNode;              // Not yet - contiguous node storage 
+    struct XWBNode* mUnused;            // Not yet - chain of free nodes 
+};
+
+
+// Link for storing allocation details 
+static struct XWBList xwbMem = 
+{
+    (struct XWBNode*) 0,
+    (struct XWBNode*) 0
+};
+
+// Forward declarations
+
+static struct XWBNode* XWBNodeNew(void);
+static void XWBNodeDelete(struct XWBNode* that);
+static void XWBNodeFree(
+    struct XWBNode* that,
+    const char* iName,
+    const char* iFile,
+    const unsigned int iLine);
+static void XWBNodeLink(
+    struct XWBNode*,
+    struct XWBNode*,
+    struct XWBNode*);
+static void XWBNodeSet(
+    struct XWBNode* that,
+    void* iPtr,
+    const unsigned int iSize,
+    const char* iFile,
+    const unsigned int iLine);
+static void XWBNodeIMWCheck(struct XWBNode* that);
+static void XWBMemNew(void);
+static struct XWBNode* XWBMemFind(
+    void* iPtr,
+    unsigned int* oSIze,
+    const char** oFile,
+    unsigned int* oLine);
+static void XWBMemDump(void);
+static void XWBMemInsert(
+    void* iPtr,
+    const unsigned int iSize,
+    const char* iFile,
+    const unsigned int iLine);
+void XWBReport(const char* iTag);
+
+//  Final Report
+void XWBReportFinal(void)
+{
+    XWBReport("Final Report");
+    fclose(xwbMem.mReport);
+    xwbMem.mReport = 0;
+}
+
+
+static struct XWBNode* XWBNodeNew(void)
+{
+    struct XWBNode* that = (struct XWBNode*)malloc(sizeof(struct XWBNode));
+    that->mPrev = 0;
+    that->mNext = 0;
+    that->mName = 0;
+
+    return that;
+}
+
+static void XWBNodeDelete(struct XWBNode* that)
+{
+    // Unlink 
+    if (that->mPrev)
+    {
+        that->mPrev->mNext = that->mNext;
+    }
+        
+    if (that->mNext)
+    {
+        that->mNext->mPrev = that->mPrev;
+    }
+        
+    free(that);
+}
+
+static void XWBNodeFree(struct XWBNode* that, const char* iName, const char* iFile, const unsigned int iLine)
+{
+    that->mFile = iFile;
+    that->mLine = iLine;
+    that->mName = iName;
+}
+
+static void XWBNodeLink(struct XWBNode* that, struct XWBNode* iPrev, struct XWBNode* iNext)
+{
+    that->mPrev = iPrev;
+    if (iPrev != 0)
+        iPrev->mNext = that;
+        
+    that->mNext = iNext;
+    if (iNext != 0)
+        iNext->mPrev = that;
+}
+
+static void XWBNodeSet(
+    struct XWBNode* that,
+    void* iPtr,
+    const unsigned int iSize,
+    const char* iFile,
+    const unsigned int iLine
+)
+{
+    that->mPtr  = iPtr;
+    that->mSize = iSize;
+    that->mFile = iFile;
+    that->mLine = iLine;
+}
+
+static void XWBMemNew(void)
+{
+    // Set up the doubly linked list 
+    xwbMem.mHead = XWBNodeNew();
+    xwbMem.mTail = XWBNodeNew();
+    XWBNodeLink(xwbMem.mHead, 0, xwbMem.mTail);
+    XWBNodeLink(xwbMem.mTail, xwbMem.mHead, 0);
+
+    // Initialize statistics 
+    xwbMem.mAllocUsed = 0L;
+    xwbMem.mAllocTotal = 0L;
+    xwbMem.mAllocCurrent = 0L;
+
+    xwbMem.mFree = 1;
+
+    xwbMem.mReport = fopen(xwbReportFilename, "w");
+
+    atexit(XWBReportFinal);
+}
+
+// Dump List - used for debugging only
+static void XWBMemDump()
+{
+    int count;
+    struct XWBNode* iter = xwbMem.mHead;
+    
+    for (count = 0; iter != 0; count++, iter = iter->mNext)
+    {
+        fprintf(xwbMem.mReport, "%d node %p prev %p next %p\n", count, iter, iter->mPrev, iter->mNext);
+    }
+    fprintf(xwbMem.mReport, "\n");
+}
+// Insert into the tracking list
+void XWBMemInsert(void* iPtr, const unsigned int iSize, const char* iFile, 
+    const unsigned int iLine)
+{
+    struct XWBNode* node;
+    if (xwbMem.mHead == 0)
+    {
+        XWBMemNew();
+    }
+
+    // Link in the new node 
+    node = XWBNodeNew();
+    XWBNodeSet(node, iPtr, iSize, iFile, iLine);
+    XWBNodeLink(node, xwbMem.mTail->mPrev, xwbMem.mTail);
+
+    xwbMem.mAllocTotal += 1;
+    xwbMem.mAllocCurrent += iSize;
+    if (xwbMem.mAllocUsed < xwbMem.mAllocCurrent)
+    {
+        xwbMem.mAllocUsed = xwbMem.mAllocCurrent;
+    }
+}
+
+// Find a memory pointer
+static struct XWBNode* XWBMemFind(void* iPtr, unsigned int* oSize, 
+    const char** oFile, unsigned int* oLine)
+{
+    struct XWBNode* result = 0;
+    struct XWBNode* iter;
+    
+    iter = xwbMem.mTail;
+    while ((iter = iter->mPrev) != xwbMem.mHead)
+    {
+        if (iter->mPtr == iPtr)
+        {
+            result = iter;
+            *oSize = iter->mSize;
+            *oFile = iter->mFile;
+            *oLine = iter->mLine;
+            break;
+        }
+    }
+    return result;
+}
+
+//Allocate memory
+void* XWBMalloc(unsigned int iSize, const char* iFile, const unsigned int iLine)
+{
+    register usize;
+    unsigned char* result;
+    
+    usize = ((iSize + xwbProtSize) / sizeof(unsigned int) + 1) * sizeof(unsigned int);
+    result = malloc(usize);
+    memset(result, xwbUninit, usize);
+    memcpy(&result[iSize], xwbProtect, xwbProtSize);
+    
+    XWBMemInsert(result, iSize, iFile, iLine);
+    return (void*) result;
+}
+
+// re-allocate memory
+void* XWBRealloc(void* iPtr, unsigned int iSize, const char* iFile, const unsigned int iLine)
+{
+    register usize;
+    unsigned char* result;
+    struct XWBNode* node;
+    unsigned int size, line;
+    const char* name;
+    
+    usize = ((iSize + xwbProtSize) / sizeof(unsigned int) + 1) * sizeof(unsigned int);
+    result = realloc(iPtr, usize);
+    // memset (result, xwbUninit, usize); 
+    memcpy(&result[iSize], xwbProtect, xwbProtSize);
+    
+    // Update the allocation details 
+    name = iFile;
+    line = iLine;
+    node = XWBMemFind(iPtr, &size, &name, &line);
+    XWBNodeSet(node, result, iSize, name, line);
+
+    xwbMem.mAllocCurrent -= size;
+    xwbMem.mAllocCurrent += iSize;
+    if (xwbMem.mAllocUsed < xwbMem.mAllocCurrent)
+    {
+        xwbMem.mAllocUsed = xwbMem.mAllocCurrent;
+    }
+    return (void*)result;
+}
+
+// Unallocate memory
+void XWBFree(void* iPtr, const char* iDesc, const char* iFile, const unsigned int iLine)
+{
+    // Check if it is one of ours 
+    const char* file;
+    unsigned int line;
+    unsigned int size;
+    struct XWBNode* node;
+
+    node = XWBMemFind(iPtr, &size, &file, &line);
+    if (node != 0)
+    {
+        unsigned char* ptr = (unsigned char*)iPtr;
+        if (memcmp(&ptr[size], xwbProtect, xwbProtSize) != 0)
+        {
+            // Illegal memory write 
+            fprintf(xwbMem.mReport, "%s: %s allocated %s: %u\n", xwbIMW, iDesc, file, line);
+            fprintf(xwbMem.mReport, "   : %s deallocated %s: %u\n", iDesc, iFile, iLine); 
+        }
+        memset(iPtr, xwbFreed, size);
+        if (xwbMem.mFree)
+        {
+            free(iPtr);
+            XWBNodeDelete(node);
+        }
+        else
+        {
+            // Save the freed memory details 
+            XWBNodeFree(node, iDesc, iFile, iLine);
+        }
+        xwbMem.mAllocCurrent -= size;
+    }
+    else
+    {
+        // Free non-heap memory 
+        fprintf(xwbMem.mReport, "%s: %s deallocated %s: %u\n", xwbFNH, iDesc, iFile, iLine);
+        
+        // Don't do it otherwise it might crash 
+    }
+}
+
+// Do not free
+void XWBNoFree(void)
+{
+    if (xwbMem.mHead == 0)
+    {
+        XWBMemNew();
+    }
+    xwbMem.mFree = 0;
+}
+
+//Report
+void XWBReport(const char* iTag)
+{
+    struct XWBNode* iter;
+    unsigned char* ptr;
+    unsigned int size;
+    register unsigned int u;
+    
+    if (xwbMem.mHead == 0)
+    {
+        XWBMemNew();
+    }
+
+    if (iTag)
+    {
+        fprintf (xwbMem.mReport, "\n%s\n", iTag);
+    }
+
+    // XWBListDump (); 
+    iter = xwbMem.mHead;    
+    while ((iter = iter->mNext) != xwbMem.mTail)
+    {
+        ptr = (unsigned char*)iter->mPtr;
+        size = iter->mSize;
+        if (iter->mName)
+        {
+            // Check that there are no FMWs 
+            for (u = 0; u < size; u++)
+            {
+                if (ptr[u] != xwbFreed)
+                {
+                    fprintf(xwbMem.mReport, "%s: %s freed at %s: %u\n",
+                        xwbFMW, iter->mName, iter->mFile, iter->mLine);
+                    break;
+                }
+            }
+        }
+        else
+        {
+            fprintf(xwbMem.mReport, "%s: %p %u bytes allocated %s: %u\n", 
+                xwbMLK, iter->mPtr, iter->mSize, iter->mFile, iter->mLine);
+            if (memcmp(&ptr[size], xwbProtect, xwbProtSize) != 0)
+            {
+                // Illegal memory write 
+                fprintf(xwbMem.mReport, "%s: %p allocated %s: %u\n", 
+                    xwbIMW, ptr, iter->mFile, iter->mLine);
+            }
+        }
+    }
+
+    // Print statistics 
+    fprintf(xwbMem.mReport, "Total allocations    : %ld\n",
+        xwbMem.mAllocTotal);
+    fprintf(xwbMem.mReport, "Max memory allocation: %ld (%dK)\n", 
+        xwbMem.mAllocUsed, xwbMem.mAllocUsed / 1024);
+    fprintf(xwbMem.mReport, "Total leak           : %ld\n\n", 
+        xwbMem.mAllocCurrent);
+}
+
+// Duplicate a string
+char* XWBStrDup(const char* iOrig, const char* iFile, const unsigned int iLine)
+{
+    char* result;
+    result = XWBMalloc(strlen(iOrig) + 1, iFile, iLine);
+    strcpy(result, iOrig);
+    return result;
+}
+
+// Allocate a number of items of a specified size
+void* XWBCalloc(unsigned int iNum, unsigned int iSize, const char* iFile, const unsigned int iLine)
+{
+    void* result;
+    unsigned int actual =(((iSize - 1)/sizeof(int)) + 1) * sizeof(int) * iNum;
+    result = XWBMalloc(actual, iFile, iLine);
+    memset(result, 0, actual);
+    return result;
+}
diff --git a/CMemLeak.h b/CMemLeak.h
new file mode 100644
index 0000000..c89d5b2
--- /dev/null
+++ b/CMemLeak.h
@@ -0,0 +1,43 @@
+// CMemLeak.c and CMemLeak.h are taken from the public domain.  If the
+// build flag DEBUG_MEMORY_LEAKS is set, then malloc is redefined,
+// to assist in tracking down memory leaks.  Using Purify or Valgrind
+// is better, though.
+#ifndef CMEMLEAK_H
+#define CMEMLEAK_H
+
+#include <stdlib.h>
+#include <string.h>
+
+// Used for tracking allocations 
+extern void* XWBMalloc(unsigned int iSize, const char* iFile, const unsigned int iLine);
+extern void* XWBCalloc(unsigned int iNum, unsigned int iSize, const char* iFile,
+    const unsigned int iLine);
+extern char* XWBStrDup(const char* iOrig, const char* iFile, const unsigned int iLine);
+
+// Used for tracking reallocations 
+extern void* XWBRealloc(void* iPrev, unsigned int iSize, const char* iFile, const unsigned int iLine);
+
+// Used for tracking deallocations 
+extern void XWBFree(void* iPtr, const char* iDesc, const char* iFile, const unsigned int iLine);
+
+// Used for reporting 
+extern void XWBReport(const char* iTag);
+extern void XWBReportFinal(void);
+
+// Used for detecting FMW 
+extern void XWBNoFree(void);
+extern void XWBPreallocate(const int iInitialAllocations);
+
+//#define DEBUG_MEMORY_LEAKS
+
+// Change this ifdef, in order to redefine malloc(etc.) and track memory leaks:
+#ifdef DEBUG_MEMORY_LEAKS
+#define malloc(x) XWBMalloc((x), __FILE__, __LINE__)
+#define realloc(x,size) XWBRealloc(x,(size),__FILE__,__LINE__)
+#define free(x)   XWBFree(x, #x, __FILE__, __LINE__)
+#define strdup(x) XWBStrDup(x, __FILE__, __LINE__)
+#define calloc(num,size) XWBCalloc((num), (size), __FILE__, __LINE__)
+#endif
+
+#endif // CMEMLEAK_H
+
diff --git a/Ch2BNPEP.dat b/Ch2BNPEP.dat
new file mode 100644
index 0000000..2ae019f
Binary files /dev/null and b/Ch2BNPEP.dat differ
diff --git a/Ch2BNPEPQ.dat b/Ch2BNPEPQ.dat
new file mode 100644
index 0000000..288d3a1
Binary files /dev/null and b/Ch2BNPEPQ.dat differ
diff --git a/Ch3BNPEP.dat b/Ch3BNPEP.dat
new file mode 100644
index 0000000..382db8b
Binary files /dev/null and b/Ch3BNPEP.dat differ
diff --git a/Ch3BNPEPQ.dat b/Ch3BNPEPQ.dat
new file mode 100644
index 0000000..5cbb6fb
Binary files /dev/null and b/Ch3BNPEPQ.dat differ
diff --git a/ChargeState.c b/ChargeState.c
new file mode 100644
index 0000000..7b3fbd3
--- /dev/null
+++ b/ChargeState.c
@@ -0,0 +1,899 @@
+//Title:          ChargeState.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#include "CMemLeak.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "Utils.h"
+#include "ChargeState.h"
+#include "Spectrum.h"
+#include "Inspect.h"
+#include "SVM.h"
+#include "Errors.h"
+#include "LDA.h"
+#include "IonScoring.h"
+
+#ifdef _WIN32
+#include <Windows.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#else
+#include <dirent.h>
+#include <sys/stat.h>
+#endif
+
+#define CC_USE_SVM
+
+#define EPSILON (float)0.00001
+
+SVMModel** PMCModel = NULL;
+
+extern LDAModel* PMCCharge1LDA;
+extern LDAModel* PMCCharge2LDA;
+extern LDAModel* PMCCharge3LDA;
+
+extern SVMModel* PMCCharge1SVM;
+extern SVMModel* PMCCharge2SVM;
+extern SVMModel* PMCCharge3SVM;
+
+extern LDAModel* CCModel1LDA;
+extern LDAModel* CCModel2LDA;
+
+extern SVMModel* CCModel1SVM;
+extern SVMModel* CCModel2SVM;
+
+extern PRMBayesianModel* PRMModelCharge2;
+
+// For converting parts-per-million:
+#define ONE_MILLION 1000000
+
+///////////////////////////////////////////////////
+// Forward declarations:
+void ConvolveMassCorrectedSpectrum(PMCInfo* Info, PMCSpectrumInfo* SpectrumInfo);
+
+///////////////////////////////////////////////////
+// Functions:
+
+// Get charge correction features.  Most of the charge correction features are set
+// during parent mass correction - if BY convolution assuming charge 2 is very high,
+// then it's most probable that the true spectrum charge is 2.
+void GetChargeCorrectionFeatures1(PMCSpectrumInfo* SpectrumInfo1, PMCSpectrumInfo* SpectrumInfo2,
+    PMCSpectrumInfo* SpectrumInfo3, float* Features)
+{
+    float TotalIntensity = 0;
+    float LowIntensity = 0; // Below m/z
+    float MediumIntensity = 0; // Between m/z and 2*m/z
+    float HighIntensity = 0; // Above 2*m/z
+    int LowPeakCount = 0;
+    int MediumPeakCount = 0;
+    int HighPeakCount = 0;
+    int PeakIndex;
+    int FeatureIndex = 0;
+    float Competitor;
+    int MZ;
+
+    MSSpectrum* Spectrum = SpectrumInfo1->Spectrum;
+    MZ = SpectrumInfo1->BestInfo->ParentMass;
+    //
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        TotalIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        if (Spectrum->Peaks[PeakIndex].Mass <= MZ)
+        {
+            LowPeakCount++;
+            LowIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+        else if (Spectrum->Peaks[PeakIndex].Mass <= 2 * MZ)
+        {
+            MediumPeakCount++;
+            MediumIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+        else 
+        {
+            HighPeakCount++;
+            HighIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+    }
+
+    // Feature: How much of the spectral intensity is above M/z?  
+    Features[FeatureIndex++] = (MediumIntensity + HighIntensity) / (float)max(0.001, TotalIntensity);
+    Features[FeatureIndex++] = (MediumPeakCount + HighPeakCount) / (float)Spectrum->PeakCount;
+        
+    // Features: How do the B/Y convolution values compare between charges 1 and 2?
+    Competitor = max(SpectrumInfo2->BestInfo->Convolve[0], SpectrumInfo3->BestInfo->Convolve[0]);
+    Features[FeatureIndex++] = SpectrumInfo1->BestInfo->Convolve[0] / max(EPSILON, SpectrumInfo1->BestInfo->Convolve[0] + Competitor);
+    Features[FeatureIndex++] = SpectrumInfo1->BestInfo->Convolve[0] - Competitor;
+    Competitor = max(SpectrumInfo2->BestInfo->Convolve[1], SpectrumInfo3->BestInfo->Convolve[1]);
+    Features[FeatureIndex++] = SpectrumInfo1->BestInfo->Convolve[1] / max(EPSILON, SpectrumInfo1->BestInfo->Convolve[1] + Competitor);
+    Features[FeatureIndex++] = SpectrumInfo1->BestInfo->Convolve[1] - Competitor;
+    Competitor = max(SpectrumInfo2->BestInfo->Convolve[2], SpectrumInfo3->BestInfo->Convolve[2]);
+    Features[FeatureIndex++] = SpectrumInfo1->BestInfo->Convolve[2] / max(EPSILON, SpectrumInfo1->BestInfo->Convolve[2] + Competitor);
+    Features[FeatureIndex++] = SpectrumInfo1->BestInfo->Convolve[2] - Competitor;
+    Competitor = max(SpectrumInfo2->BestInfo->Convolve[3], SpectrumInfo3->BestInfo->Convolve[3]);
+    Features[FeatureIndex++] = SpectrumInfo1->BestInfo->Convolve[3] / max(EPSILON, SpectrumInfo1->BestInfo->Convolve[3] + Competitor);
+    Features[FeatureIndex++] = SpectrumInfo1->BestInfo->Convolve[3] - Competitor;
+}
+
+// Get charge correction features.  Most of the charge correction features are set
+// during parent mass correction - if BY convolution assuming charge 2 is very high,
+// then it's most probable that the true spectrum charge is 2.
+void GetChargeCorrectionFeatures2(PMCSpectrumInfo* SpectrumInfo2, PMCSpectrumInfo* SpectrumInfo3,
+    float* Features)
+{
+    float TotalIntensity = 0;
+    float MediumIntensity = 0;
+    float HighIntensity = 0;
+    float LowIntensity = 0;
+    int LowPeakCount = 0;
+    int MediumPeakCount = 0;
+    int HighPeakCount = 0;
+    int PeakIndex;
+    int FeatureIndex = 0;
+    float MZ;
+    float Balance2;
+    MSSpectrum* Spectrum = SpectrumInfo2->Spectrum;
+    //
+    MZ = SpectrumInfo2->BestInfo->ParentMass / (float)2.0;
+    //
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        TotalIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        if (Spectrum->Peaks[PeakIndex].Mass <= MZ)
+        {
+            LowPeakCount++;
+            LowIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+        else if (Spectrum->Peaks[PeakIndex].Mass <= 2 * MZ)
+        {
+            MediumPeakCount++;
+            MediumIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+        else 
+        {
+            HighPeakCount++;
+            HighIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+    }
+
+    // Feature: How much of the spectral intensity is above M/z?  
+    Features[FeatureIndex++] = (MediumIntensity + HighIntensity) / TotalIntensity;
+    Features[FeatureIndex++] = (MediumPeakCount + HighPeakCount) / (float)Spectrum->PeakCount;
+    Features[FeatureIndex++] = (MediumIntensity) / TotalIntensity;
+    Features[FeatureIndex++] = (MediumPeakCount) / (float)Spectrum->PeakCount;
+    Features[FeatureIndex++] = (LowIntensity) / TotalIntensity;
+    Features[FeatureIndex++] = (LowPeakCount) / (float)Spectrum->PeakCount;
+    //Features[FeatureIndex++] = (HighIntensity) / TotalIntensity;
+    //Features[FeatureIndex++] = (HighPeakCount) / (float)Spectrum->PeakCount;
+
+    // Features: Balance between low and med-to-high:
+    Balance2 = (float)fabs((MediumIntensity + HighIntensity) - LowIntensity) / TotalIntensity;
+    Features[FeatureIndex++] = Balance2;
+    
+    // Features: How do the B/Y convolution values compare between charges 2 and 3?
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[0] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve[0] + SpectrumInfo3->BestInfo->Convolve[0]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[0] - SpectrumInfo3->BestInfo->Convolve[0];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[1] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve[1] + SpectrumInfo3->BestInfo->Convolve[1]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[1] - SpectrumInfo3->BestInfo->Convolve[1];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[2] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve[2] + SpectrumInfo3->BestInfo->Convolve[2]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[2] - SpectrumInfo3->BestInfo->Convolve[2];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[3] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve[3] + SpectrumInfo3->BestInfo->Convolve[3]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[3] - SpectrumInfo3->BestInfo->Convolve[3];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[0] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve2[0] + SpectrumInfo3->BestInfo->Convolve2[0]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[0] - SpectrumInfo3->BestInfo->Convolve2[0];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[1] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve2[1] + SpectrumInfo3->BestInfo->Convolve2[1]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[1] - SpectrumInfo3->BestInfo->Convolve2[1];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[2] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve2[2] + SpectrumInfo3->BestInfo->Convolve2[2]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[2] - SpectrumInfo3->BestInfo->Convolve2[2];
+    //Features[FeatureIndex++] = Spectrum->PeakCount;
+    Features[FeatureIndex++] = (SpectrumInfo2->BestInfo->ParentMass / (float)(1000 * DALTON));
+}
+
+
+//Phosphorylation uses a distinct PMC model, so that means it needs a distinct CC model
+//most notably, we are going to use the IntensePeakIntensity and skew
+void GetChargeCorrectionFeatures2Phos(PMCSpectrumInfo* SpectrumInfo2, PMCSpectrumInfo* SpectrumInfo3,
+    float* Features)
+{
+    float TotalIntensity = 0;
+    float MediumIntensity = 0;
+    float HighIntensity = 0;
+    float LowIntensity = 0;
+    int LowPeakCount = 0;
+    int MediumPeakCount = 0;
+    int HighPeakCount = 0;
+    int PeakIndex;
+    int FeatureIndex = 0;
+    float MZ;
+    float Balance2;
+	float PhosPeak2;
+	float PhosPeak3;
+    MSSpectrum* Spectrum = SpectrumInfo2->Spectrum;
+    //
+    MZ = SpectrumInfo2->BestInfo->ParentMass / (float)2.0;
+    //
+	for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        TotalIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        if (Spectrum->Peaks[PeakIndex].Mass <= MZ)
+        {
+            LowPeakCount++;
+            LowIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+        else if (Spectrum->Peaks[PeakIndex].Mass <= 2 * MZ)
+        {
+            MediumPeakCount++;
+            MediumIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+        else 
+        {
+            HighPeakCount++;
+            HighIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        }
+    }
+
+    // Feature: How much of the spectral intensity is above M/z?  
+    Features[FeatureIndex++] = (MediumIntensity + HighIntensity) / TotalIntensity;
+    Features[FeatureIndex++] = (MediumPeakCount + HighPeakCount) / (float)Spectrum->PeakCount;
+    Features[FeatureIndex++] = (MediumIntensity) / TotalIntensity;
+    Features[FeatureIndex++] = (MediumPeakCount) / (float)Spectrum->PeakCount;
+    Features[FeatureIndex++] = (LowIntensity) / TotalIntensity;
+    Features[FeatureIndex++] = (LowPeakCount) / (float)Spectrum->PeakCount;
+    //Features[FeatureIndex++] = (HighIntensity) / TotalIntensity;
+    //Features[FeatureIndex++] = (HighPeakCount) / (float)Spectrum->PeakCount;
+
+    // Features: Balance between low and med-to-high:
+    Balance2 = (float)fabs((MediumIntensity + HighIntensity) - LowIntensity) / TotalIntensity;
+    Features[FeatureIndex++] = Balance2;
+    
+    // Features: How do the B/Y convolution values compare between charges 2 and 3?
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[0] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve[0] + SpectrumInfo3->BestInfo->Convolve[0]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[0] - SpectrumInfo3->BestInfo->Convolve[0];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[1] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve[1] + SpectrumInfo3->BestInfo->Convolve[1]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[1] - SpectrumInfo3->BestInfo->Convolve[1];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[2] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve[2] + SpectrumInfo3->BestInfo->Convolve[2]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[2] - SpectrumInfo3->BestInfo->Convolve[2];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[3] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve[3] + SpectrumInfo3->BestInfo->Convolve[3]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve[3] - SpectrumInfo3->BestInfo->Convolve[3];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[0] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve2[0] + SpectrumInfo3->BestInfo->Convolve2[0]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[0] - SpectrumInfo3->BestInfo->Convolve2[0];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[1] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve2[1] + SpectrumInfo3->BestInfo->Convolve2[1]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[1] - SpectrumInfo3->BestInfo->Convolve2[1];
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[2] / max(EPSILON, SpectrumInfo2->BestInfo->Convolve2[2] + SpectrumInfo3->BestInfo->Convolve2[2]);
+    Features[FeatureIndex++] = SpectrumInfo2->BestInfo->Convolve2[2] - SpectrumInfo3->BestInfo->Convolve2[2];
+    //Features[FeatureIndex++] = Spectrum->PeakCount;
+    Features[FeatureIndex++] = (SpectrumInfo2->BestInfo->ParentMass / (float)(1000 * DALTON));
+	//M-p peak related stuff
+	PhosPeak2 = (float) (max(0.1, SpectrumInfo2->BestInfo->IntensePeakIntensity[2]));
+	PhosPeak3 = (float) (max(0.1, SpectrumInfo3->BestInfo->IntensePeakIntensity[2]));
+	Features[FeatureIndex++] = PhosPeak2 / (PhosPeak2 + PhosPeak3);
+}
+
+
+int ChargeCorrectSpectrum(SpectrumNode* Node, float* Model1Score, float* Model2Score)
+{
+    PMCSpectrumInfo* SpectrumInfo1;
+    PMCSpectrumInfo* SpectrumInfo2;
+    PMCSpectrumInfo* SpectrumInfo3;
+    float CCFeatures1[64];
+    float CCFeatures2[64];
+    float Score1;
+    float Score2;
+    //
+    Score1 = 0;
+#ifdef CC_USE_SVM
+    LoadCCModelSVM(0);
+#else
+    LoadCCModelLDA(0);
+#endif
+    /////////////////////////////////
+    // Charge 1 PMC:
+    Node->Spectrum->Charge = 1;
+    Node->Spectrum->ParentMass = (Node->Spectrum->MZ * 1);
+    SpectrumInfo1 = GetPMCSpectrumInfo(Node->Spectrum);
+    PerformPMC(SpectrumInfo1);
+    /////////////////////////////////
+    // Charge 2 PMC:
+    Node->Spectrum->Charge = 2;
+    Node->Spectrum->ParentMass = (Node->Spectrum->MZ * 2) - HYDROGEN_MASS;
+    SpectrumInfo2 = GetPMCSpectrumInfo(Node->Spectrum);
+    PerformPMC(SpectrumInfo2);
+    /////////////////////////////////
+    // Charge 3 PMC:
+    Node->Spectrum->Charge = 3;
+    Node->Spectrum->ParentMass = (Node->Spectrum->MZ * 3) - 2 * HYDROGEN_MASS;
+    SpectrumInfo3 = GetPMCSpectrumInfo(Node->Spectrum);
+    PerformPMC(SpectrumInfo3);
+    // Get features:
+    memset(CCFeatures1, 0, sizeof(float) * 64);
+    memset(CCFeatures2, 0, sizeof(float) * 64);
+    GetChargeCorrectionFeatures1(SpectrumInfo1, SpectrumInfo2, SpectrumInfo3, CCFeatures1);
+    GetChargeCorrectionFeatures2(SpectrumInfo2, SpectrumInfo3, CCFeatures2); //change to Phos function if you need
+#ifdef CC_USE_SVM
+    Score1 = SVMClassify(CCModel1SVM, CCFeatures1, 0);
+    Score2 = SVMClassify(CCModel2SVM, CCFeatures2, 0);
+#else
+    Score1 = ApplyLDAModel(CCModel1LDA, CCFeatures1);
+    Score2 = ApplyLDAModel(CCModel2LDA, CCFeatures2);
+#endif
+    // If the caller asked for them, return the scores from the two models:
+    if (Model1Score)
+    {
+        *Model1Score = Score1;
+    }
+    if (Model2Score)
+    {
+        *Model2Score = Score2;
+    }
+    // Free temporary structs:
+    FreePMCSpectrumInfo(SpectrumInfo1);
+    FreePMCSpectrumInfo(SpectrumInfo2);
+    FreePMCSpectrumInfo(SpectrumInfo3);
+    // Use cutoffs to determine the favorite charge state:
+    if (Score1 > 1.0)
+    {
+        return 1;
+    }
+    if (Score2 > 0.0)
+    {
+        return 2;
+    }
+    return 3;
+}
+
+// We've loaded a spectrum.  Now let's adjust its parent mass and its charge to the 
+// best possible.
+ void TweakSpectrum(SpectrumNode* Node)
+{
+    MSSpectrum* Spectrum;
+    PMCSpectrumInfo* SpectrumInfo;
+    PMCSpectrumInfo* SpectrumInfo1;
+    PMCSpectrumInfo* SpectrumInfo2;
+    PMCSpectrumInfo* SpectrumInfo3;
+    float CCFeatures[64];
+    float CCScore;
+    int TweakIndex;
+	int Charge;
+    //
+    if (!Node->Spectrum || !Node->Spectrum->PeakCount)
+    {
+        return;
+    }
+    Spectrum = Node->Spectrum;
+    //fflush(stdout);
+    // If our models aren't loaded - which should NEVER happen in production - then we'll
+    // trust the input mass and charge.
+    if (!PRMModelCharge2)
+    {
+        if (!Spectrum->Charge)
+        {
+            Spectrum->Charge = 2;
+            Spectrum->ParentMass = (Spectrum->MZ * 2) - HYDROGEN_MASS;
+	    //printf("NEC_ERROR: We are unable to load Model and spectrum has no charge!!!");
+        }
+        TweakIndex = (Spectrum->Charge - 1) * 2;
+        Node->Tweaks[TweakIndex].Charge = Spectrum->Charge;
+        Node->Tweaks[TweakIndex].ParentMass = Spectrum->ParentMass;
+	//printf("NEC_ERROR: We are unable to load PRMModelCharge!!!!\n");
+        return;
+    }
+
+    Node->Spectrum->ParentMass = (Spectrum->MZ * 2) - HYDROGEN_MASS;
+
+    //printf("A\n");
+    //fflush(stdout);
+    PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+    //SpectrumComputeBinnedIntensities(Node);
+    
+    if (!GlobalOptions->MultiChargeMode && Spectrum->FileChargeFlag)
+    {
+        // The spectrum has charge(s) assigned, and we're trusting the charge(s).
+
+      for (Charge = 1; Charge < 5; Charge++)
+	{
+	  if (Spectrum->FileCharge[Charge])
+	    {
+	      //printf("Tweaking for charge %d\n",Charge);
+	      Spectrum->Charge = Charge;
+	      SpectrumInfo = GetPMCSpectrumInfo(Spectrum);
+			
+	      PerformPMC(SpectrumInfo);
+	      TweakIndex = min(3, Spectrum->Charge - 1) * 2;
+	      Node->Tweaks[TweakIndex].Charge = Spectrum->Charge;
+	      Node->Tweaks[TweakIndex].ParentMass = SpectrumInfo->BestInfo->ParentMass;
+	      //printf("NEC_ERROR: We have file charge!! Tweak [%d]: z= %d, PM=%d\n",TweakIndex,Spectrum->Charge,Node->Tweaks[TweakIndex].ParentMass);
+	      if (SpectrumInfo->RunnerUpInfo)
+		{
+		  Node->Tweaks[TweakIndex + 1].Charge = Spectrum->Charge;
+		  Node->Tweaks[TweakIndex + 1].ParentMass = SpectrumInfo->RunnerUpInfo->ParentMass;
+		}
+	      //SpectrumComputeNoiseDistributions(Node);
+	      FreePMCSpectrumInfo(SpectrumInfo);
+	    }
+	}
+      return;
+    }
+
+#ifdef CC_USE_SVM
+    //printf("NEC_ERROR: Using LoadCCModelSVM\n");
+    LoadCCModelSVM(0);
+#else
+    //printf("NEC_ERROR: Using LoadCCModelLDA\n");
+    LoadCCModelLDA(0);
+#endif
+    
+   
+    // Either the spectrum has no charge set, or we're overriding the file guess 
+    // with our charge correction guess.
+
+    // Find the best parent mass if the charge is 1:
+    Node->Spectrum->Charge = 1;
+    SpectrumInfo1 = GetPMCSpectrumInfo(Spectrum);
+    
+    //printf("D\n");
+    //fflush(stdout);
+    
+    PerformPMC(SpectrumInfo1);
+    Node->Tweaks[0].Charge = 1;
+    Node->Tweaks[0].ParentMass = SpectrumInfo1->BestInfo->ParentMass;
+    //printf("NEC_ERROR: Tweak [0]: z= %d, PM=%d\n",Node->Tweaks[0].Charge,Node->Tweaks[0].ParentMass);
+    if (SpectrumInfo1->RunnerUpInfo)
+    {
+        Node->Tweaks[1].Charge = 1;
+        Node->Tweaks[1].ParentMass = SpectrumInfo1->RunnerUpInfo->ParentMass;
+	//printf("NEC_ERROR: Tweak [1]: z= %d, PM=%d\n",Node->Tweaks[1].Charge,Node->Tweaks[1].ParentMass);
+    }
+
+    // Find the best parent mass if the charge is 2:
+
+    //printf("E\n");
+    //fflush(stdout);
+
+    Node->Spectrum->Charge = 2;
+    SpectrumInfo2 = GetPMCSpectrumInfo(Spectrum);
+    PerformPMC(SpectrumInfo2);
+    Node->Tweaks[2].Charge = 2;
+    Node->Tweaks[2].ParentMass = SpectrumInfo2->BestInfo->ParentMass;
+    //printf("NEC_ERROR: Tweak[2]: z= %d, PM=%d\n",Node->Tweaks[2].Charge,Node->Tweaks[2].ParentMass);
+    if (SpectrumInfo2->RunnerUpInfo)
+    {
+        Node->Tweaks[3].Charge = 2;
+        Node->Tweaks[3].ParentMass = SpectrumInfo2->RunnerUpInfo->ParentMass;
+	//printf("NEC_ERROR: Tweak [3]: z= %d, PM=%d\n",Node->Tweaks[3].Charge,Node->Tweaks[3].ParentMass);
+    }
+
+    // Find the best parent mass if the charge is 3:
+ 
+    Node->Spectrum->Charge = 3;
+    SpectrumInfo3 = GetPMCSpectrumInfo(Spectrum);
+    PerformPMC(SpectrumInfo3);
+    Node->Tweaks[4].Charge = 3;
+    Node->Tweaks[4].ParentMass = SpectrumInfo3->BestInfo->ParentMass;
+    //printf("NEC_ERROR: Tweak [4]: z= %d, PM=%d\n",Node->Tweaks[4].Charge,Node->Tweaks[4].ParentMass);
+    if (SpectrumInfo3->RunnerUpInfo)
+    {
+        Node->Tweaks[5].Charge = 3;
+        Node->Tweaks[5].ParentMass = SpectrumInfo3->RunnerUpInfo->ParentMass;
+	//printf("NEC_ERROR: Tweak [5]: z= %d, PM=%d\n",Node->Tweaks[5].Charge,Node->Tweaks[5].ParentMass);
+    }
+    //printf("F\n");
+    //fflush(stdout);
+    GetChargeCorrectionFeatures1(SpectrumInfo1, SpectrumInfo2, SpectrumInfo3, CCFeatures);
+    CCScore = SVMClassify(CCModel1SVM, CCFeatures, 0);
+    if (CCScore > 0)
+    {
+        // It's a singly-charged spectrum:
+        Node->Tweaks[2].Charge = 0;
+        Node->Tweaks[3].Charge = 0;
+        Node->Tweaks[4].Charge = 0;
+        Node->Tweaks[5].Charge = 0;
+    }
+    else
+    {
+        // It's a multiply-charged spectrum:
+        Node->Tweaks[0].Charge = 0;
+        Node->Tweaks[1].Charge = 0;
+		if (GlobalOptions->PhosphorylationFlag)
+		{
+			GetChargeCorrectionFeatures2Phos(SpectrumInfo2, SpectrumInfo3, CCFeatures);
+		}
+		else
+		{
+			GetChargeCorrectionFeatures2(SpectrumInfo2, SpectrumInfo3, CCFeatures);
+		}
+        CCScore = SVMClassify(CCModel2SVM, CCFeatures, 0);
+        if (CCScore >= 0.5)
+        {
+            // It's clearly not charge-3:
+            Node->Tweaks[4].Charge = 0;
+            Node->Tweaks[5].Charge = 0;
+        }
+        if (CCScore <= -0.5)
+        {
+            // It's clearly not charge-2:
+            Node->Tweaks[2].Charge = 0;
+            Node->Tweaks[3].Charge = 0;
+        }
+    }
+    //printf("G\n");
+    //fflush(stdout);
+    
+    // cleanup:
+    FreePMCSpectrumInfo(SpectrumInfo1);
+    FreePMCSpectrumInfo(SpectrumInfo2);
+    FreePMCSpectrumInfo(SpectrumInfo3);
+    //SpectrumComputeNoiseDistributions(Node);
+    
+    return;
+}
+
+void TweakSpectrum_NEC(SpectrumNode* Node)
+{
+    MSSpectrum* Spectrum;
+    PMCSpectrumInfo* SpectrumInfo;
+    PMCSpectrumInfo* SpectrumInfo1;
+    PMCSpectrumInfo* SpectrumInfo2;
+    PMCSpectrumInfo* SpectrumInfo3;
+    float CCFeatures[64];
+    float CCScore;
+    int TweakIndex;
+	int Charge;
+    //
+    if (!Node->Spectrum || !Node->Spectrum->PeakCount)
+    {
+        return;
+    }
+    Spectrum = Node->Spectrum;
+    // If our models aren't loaded - which should NEVER happen in production - then we'll
+    // trust the input mass and charge.
+    if (!PRMModelCharge2)
+    {
+        if (!Spectrum->Charge)
+        {
+            Spectrum->Charge = 2;
+            Spectrum->ParentMass = (Spectrum->MZ * 2) - HYDROGEN_MASS;
+        }
+        TweakIndex = (Spectrum->Charge - 1) * 2;
+        Node->Tweaks[TweakIndex].Charge = Spectrum->Charge;
+        Node->Tweaks[TweakIndex].ParentMass = Spectrum->ParentMass;
+        return;
+    }
+
+    if(GlobalOptions->InstrumentType == INSTRUMENT_TYPE_FT_HYBRID)
+      {
+	if (!Spectrum->Charge)
+        {
+            Spectrum->Charge = 2;
+            Spectrum->ParentMass = (Spectrum->MZ * 2) - HYDROGEN_MASS;
+        }
+        TweakIndex = (Spectrum->Charge - 1) * 2;
+        Node->Tweaks[TweakIndex].Charge = Spectrum->Charge;
+        Node->Tweaks[TweakIndex].ParentMass = Spectrum->ParentMass;
+        return;
+
+      }
+    Node->Spectrum->ParentMass = (Spectrum->MZ * 2) - HYDROGEN_MASS;
+
+
+    PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+    //SpectrumComputeBinnedIntensities(Node);
+    
+    if (!GlobalOptions->MultiChargeMode && Spectrum->FileChargeFlag)
+    {
+        // The spectrum has charge(s) assigned, and we're trusting the charge(s).
+      for (Charge = 1; Charge < 5; Charge++)
+	{
+	  if (Spectrum->FileCharge[Charge])
+	    {
+	      Spectrum->Charge = Charge;
+	      SpectrumInfo = GetPMCSpectrumInfo(Spectrum);
+			
+	      PerformPMC(SpectrumInfo);
+	      TweakIndex = min(3, Spectrum->Charge - 1) * 2;
+	      Node->Tweaks[TweakIndex].Charge = Spectrum->Charge;
+	      Node->Tweaks[TweakIndex].ParentMass = SpectrumInfo->BestInfo->ParentMass;
+	      
+	      if (SpectrumInfo->RunnerUpInfo)
+		{
+		  Node->Tweaks[TweakIndex + 1].Charge = Spectrum->Charge;
+		  Node->Tweaks[TweakIndex + 1].ParentMass = SpectrumInfo->RunnerUpInfo->ParentMass;
+		}
+	      //SpectrumComputeNoiseDistributions(Node);
+	      FreePMCSpectrumInfo(SpectrumInfo);
+	    }
+	}
+      return;
+    }
+#ifdef CC_USE_SVM
+    //printf("NEC_ERROR: Using LoadCCModelSVM\n");
+    LoadCCModelSVM(0);
+#else
+    //printf("NEC_ERROR: Using LoadCCModelLDA\n");
+    LoadCCModelLDA(0);
+#endif
+    
+    // Either the spectrum has no charge set, or we're overriding the file guess 
+    // with our charge correction guess.
+
+    // Find the best parent mass if the charge is 1:
+    Node->Spectrum->Charge = 1;
+    SpectrumInfo1 = GetPMCSpectrumInfo(Spectrum);
+    
+
+    
+    PerformPMC(SpectrumInfo1);
+    Node->Tweaks[0].Charge = 1;
+    Node->Tweaks[0].ParentMass = SpectrumInfo1->BestInfo->ParentMass;
+    if (SpectrumInfo1->RunnerUpInfo)
+    {
+        Node->Tweaks[1].Charge = 1;
+        Node->Tweaks[1].ParentMass = SpectrumInfo1->RunnerUpInfo->ParentMass;
+    }
+
+    // Find the best parent mass if the charge is 2:
+
+
+
+    Node->Spectrum->Charge = 2;
+    SpectrumInfo2 = GetPMCSpectrumInfo(Spectrum);
+    PerformPMC(SpectrumInfo2);
+    Node->Tweaks[2].Charge = 2;
+    Node->Tweaks[2].ParentMass = SpectrumInfo2->BestInfo->ParentMass;
+    if (SpectrumInfo2->RunnerUpInfo)
+    {
+        Node->Tweaks[3].Charge = 2;
+        Node->Tweaks[3].ParentMass = SpectrumInfo2->RunnerUpInfo->ParentMass;
+    }
+
+    // Find the best parent mass if the charge is 3:
+ 
+    Node->Spectrum->Charge = 3;
+    SpectrumInfo3 = GetPMCSpectrumInfo(Spectrum);
+    PerformPMC(SpectrumInfo3);
+    Node->Tweaks[4].Charge = 3;
+    Node->Tweaks[4].ParentMass = SpectrumInfo3->BestInfo->ParentMass;
+    if (SpectrumInfo3->RunnerUpInfo)
+    {
+        Node->Tweaks[5].Charge = 3;
+        Node->Tweaks[5].ParentMass = SpectrumInfo3->RunnerUpInfo->ParentMass;
+    }
+
+    GetChargeCorrectionFeatures1(SpectrumInfo1, SpectrumInfo2, SpectrumInfo3, CCFeatures);
+    CCScore = SVMClassify(CCModel1SVM, CCFeatures, 0);
+    if (CCScore > 0)
+    {
+        // It's a singly-charged spectrum:
+        Node->Tweaks[2].Charge = 0;
+        Node->Tweaks[3].Charge = 0;
+        Node->Tweaks[4].Charge = 0;
+        Node->Tweaks[5].Charge = 0;
+    }
+    else
+    {
+        // It's a multiply-charged spectrum:
+        Node->Tweaks[0].Charge = 0;
+        Node->Tweaks[1].Charge = 0;
+		if (GlobalOptions->PhosphorylationFlag)
+		{
+			GetChargeCorrectionFeatures2Phos(SpectrumInfo2, SpectrumInfo3, CCFeatures);
+		}
+		else
+		{
+			GetChargeCorrectionFeatures2(SpectrumInfo2, SpectrumInfo3, CCFeatures);
+		}
+        CCScore = SVMClassify(CCModel2SVM, CCFeatures, 0);
+        if (CCScore >= 0.5)
+        {
+            // It's clearly not charge-3:
+            Node->Tweaks[4].Charge = 0;
+            Node->Tweaks[5].Charge = 0;
+        }
+        if (CCScore <= -0.5)
+        {
+            // It's clearly not charge-2:
+            Node->Tweaks[2].Charge = 0;
+            Node->Tweaks[3].Charge = 0;
+        }
+    }
+    
+    // cleanup:
+    FreePMCSpectrumInfo(SpectrumInfo1);
+    FreePMCSpectrumInfo(SpectrumInfo2);
+    FreePMCSpectrumInfo(SpectrumInfo3);
+    //SpectrumComputeNoiseDistributions(Node);
+    return;
+}
+
+// Iterate over lines of a training/testing oracle file, and invoke the callback function once for each.
+// Line format: Tab-delimited.  Pieces are:
+// Spectrum file name (not full path), charge, parent mass, annotation
+void TrainOnOracleFile(char* OracleFileName, char* SpectrumDir, TrainingCallback Callback)
+{
+    int BytesToRead;
+    char LineBuffer[MAX_LINE_LENGTH];
+    int BufferPos = 0;
+    int BytesRead;
+    int BufferEnd = 0;
+    int LineNumber = 0;
+    char TextBuffer[BUFFER_SIZE * 2];
+    FILE* OracleFile;
+    char* SpectrumFileName;
+    char FilePath[2048];
+    int Charge;
+    int ParentMass;
+    FILE* DTAFile;
+    char* Field;
+    Peptide* Match;
+    SpectrumNode* Node;
+    InputFileNode* FNode;
+    char* ColonPos;
+    int SpectrumFilePos;
+    char* Extension;
+    //
+    OracleFile = fopen(OracleFileName, "rb");
+    if (!OracleFile)
+    {
+        printf("** Error: Unable to open training oracle '%s'.\n", OracleFileName);
+        return;
+    }
+    Node = (SpectrumNode*)calloc(1, sizeof(SpectrumNode));
+    FNode = (InputFileNode*)calloc(1, sizeof(InputFileNode));
+    LineNumber = 0;
+    while (1)
+    {
+        BytesToRead = BUFFER_SIZE - BufferEnd;
+        BytesRead = ReadBinary(TextBuffer + BufferEnd, sizeof(char), BytesToRead, OracleFile);
+        BufferEnd += BytesRead;
+        TextBuffer[BufferEnd] = '\0';
+        if (BufferPos == BufferEnd)
+        { 
+            // We're done!
+            break;
+        }
+
+        // Copy a line of text to the line buffer.  Skip spaces, and stop at carriage return or newline.
+        BufferPos = CopyBufferLine(TextBuffer, BufferPos, BufferEnd, LineBuffer, 0);
+        LineNumber += 1;
+
+        // Now, move the remaining text to the start of the buffer:
+        memmove(TextBuffer, TextBuffer + BufferPos, BufferEnd - BufferPos);
+        BufferEnd -= BufferPos;
+        BufferPos = 0;
+
+        // Now, process this line of text!
+        // Skip empty lines:
+        if (!LineBuffer[0])
+        {
+            continue;
+        }
+        if (LineBuffer[0] == '#')
+        {
+            continue;
+        }
+        SpectrumFileName = strtok(LineBuffer, "\t");
+        if (!SpectrumFileName)
+        {
+            continue;
+        }
+
+        SpectrumFilePos = 0;
+        ColonPos = SpectrumFileName;
+        if (SpectrumFileName[1] == ':')
+        {
+            ColonPos = SpectrumFileName + 2;
+        }
+        while (*ColonPos)
+        {
+            if (*ColonPos == ':')
+            {
+                *ColonPos = '\0';
+                SpectrumFilePos = atoi(ColonPos + 1);
+                break;
+            }
+            ColonPos++;
+        }
+        
+        Extension = SpectrumFileName + strlen(SpectrumFileName) - 4;
+        if (!CompareStrings(Extension, ".mgf"))
+        {
+            FNode->Format = SPECTRUM_FORMAT_MGF;
+        }
+        else if (!CompareStrings(Extension, ".ms2"))
+        {
+            //FNode->Format = SPECTRUM_FORMAT_MS2;
+            FNode->Format = SPECTRUM_FORMAT_MS2_COLONS;
+        }
+        else if (!CompareStrings(Extension, ".mzxml"))
+        {
+            FNode->Format = SPECTRUM_FORMAT_MZXML;
+        }
+        else if (!CompareStrings(Extension, ".mzdata"))
+        {
+            FNode->Format = SPECTRUM_FORMAT_MZDATA;
+        }
+        else
+        {
+            FNode->Format = SPECTRUM_FORMAT_DTA;
+        }
+        if (SpectrumFileName[1] == ':')
+        {
+            sprintf(FilePath, "%s", SpectrumFileName);
+        }
+        else
+        {
+            sprintf(FilePath, "%s%s", SpectrumDir, SpectrumFileName);
+        }
+        
+        
+        DTAFile = fopen(FilePath, "rb");
+        if (!DTAFile)
+        {
+            printf("**Error: Couldn't open training/testing spectrum '%s'\n", FilePath);
+            continue;
+        }
+        fseek(DTAFile, SpectrumFilePos, 0);
+        Field = strtok(NULL, "\t");
+        if (!Field)
+        {
+            printf("** Syntax error: Line %d of %s\n", LineNumber, OracleFileName);
+            continue;
+        }
+        Charge = atoi(Field);
+        Field = strtok(NULL, "\t");
+        if (!Field)
+        {
+            printf("** Syntax error: Line %d of %s\n", LineNumber, OracleFileName);
+            continue;
+        }
+
+        ParentMass = (int)(atof(Field) * MASS_SCALE + 0.5);
+        Field = strtok(NULL, "\t");
+        if (!Field)
+        {
+            printf("** Syntax error: Line %d of %s\n", LineNumber, OracleFileName);
+            continue;
+        }
+
+        Match = GetPeptideFromAnnotation(Field);
+        Node->Spectrum = (MSSpectrum*)calloc(1, sizeof(MSSpectrum));
+        Node->Spectrum->Node = Node;
+        strcpy(FNode->FileName, FilePath);
+        Node->InputFile = FNode;
+        //strcpy(Node->FileName, FilePath);
+        SpectrumLoadFromFile(Node->Spectrum, DTAFile);
+        fclose(DTAFile);
+        (*Callback)(Node, Charge, ParentMass, Match);
+        FreeSpectrum(Node->Spectrum);
+        FreePeptideNode(Match);
+    }
+}
+
diff --git a/ChargeState.h b/ChargeState.h
new file mode 100644
index 0000000..81dc9af
--- /dev/null
+++ b/ChargeState.h
@@ -0,0 +1,65 @@
+//Title:          ChargeState.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef CHARGE_STATE_H
+#define CHARGE_STATE_H
+
+
+
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include "Utils.h"
+#include "Inspect.h"
+#include "Spectrum.h"
+#include "ParentMass.h"
+
+// Code to support charge state determination.  Our plan is:
+// - Organize a 'training+test' directory of spectra, half charge 2 and half charge 3.  The directory should 
+// include some QTOF results.
+// - Use an API in ChargeState.c to write out a set of features for these spectra.  +1 means charge 3, in this case.
+// - Use libsvm to train a support vector machine on these features
+// - Use the resulting model to guess charge states if the charge is unlisted, or if the MultiCharge option is set.  We 
+// use easy heuristics to detect +1 spectra, then use the svm to separate +2 and +3.  If confidence is low, still search
+// both charge states.
+
+void TweakSpectrum(SpectrumNode* Node);
+void TweakSpectrum_NEC(SpectrumNode* Node);
+
+void GetChargeCorrectionFeatures1(PMCSpectrumInfo* SpectrumInfo1, PMCSpectrumInfo* SpectrumInfo2,
+    PMCSpectrumInfo* SpectrumInfo3, float* Features);
+void GetChargeCorrectionFeatures2(PMCSpectrumInfo* SpectrumInfo2, PMCSpectrumInfo* SpectrumInfo3,
+    float* Features);
+void GetChargeCorrectionFeatures2Phos(PMCSpectrumInfo* SpectrumInfo2, PMCSpectrumInfo* SpectrumInfo3,
+    float* Features);
+int ChargeCorrectSpectrum(SpectrumNode* Node, float* Model1Score, float* Model2Score);
+#endif // CHARGE_STATE_H
diff --git a/CombinePTMFeatures.py b/CombinePTMFeatures.py
new file mode 100644
index 0000000..14e58ba
--- /dev/null
+++ b/CombinePTMFeatures.py
@@ -0,0 +1,627 @@
+#Title:          CombinePTMFeatures.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+CombinePTMFeatures:
+- Parse the output of several runs of ComputePTMFeatures.
+- Accumulate consensus clusters, and accumulate coverage
+- Write the results to a merged directory
+"""
+import sys
+import os
+import math
+import getopt
+import string
+import struct
+import traceback
+import shutil
+import BuildConsensusSpectrum
+import ResultsParser
+import PyInspect
+from Utils import *
+from TrainPTMFeatures import FormatBits
+
+UsageInfo = """
+-r [PATH]: Directory containing the results-directories to merge
+-w [PATH]: Output file
+-d [PATH]: Database path
+
+Optional:
+-M [DIR]: Directory subtree where mzxml files *really* live.
+    Spectrum paths will be corrected to use these paths.
+-x: If set, prepare output directories.  (Should be set for the first
+    run of a batch, and NOT for any others)
+-s [POS]: Start DBPosition
+-e [POS]: End DBPosition
+-q: Quick-parse flag
+-c [STRING]: Required filename chunk
+"""
+
+class PTMFeatureMerger(ResultsParser.SpectrumOracleMixin):
+    def __init__(self):
+        self.OutputDir = None
+        self.Peptides = {} # (annotation, charge) -> peptide species
+        self.HeaderLines = []
+        self.HeaderLinesParsed = 0
+        self.DBStart = None
+        self.DBEnd = None
+        self.QuickParseFlag = 0
+        self.TotalSpectrumCount = 0
+        self.SpectrumRoot = None
+        ResultsParser.SpectrumOracleMixin.__init__(self)
+    def WipeDir(self, Dir):
+        try:
+            shutil.rmtree(Dir)
+        except:
+            pass
+    def ParseCommandLine(self, Arguments):
+        PrepareDirsFlag = 0
+        (Options, Args) = getopt.getopt(Arguments, "d:r:w:s:e:qxM:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-r":
+                self.PTMFeatureDirectory = Value
+            elif Option == "-d":
+                self.DBPath = Value
+            elif Option == "-w":
+                self.OutputPath = Value
+            elif Option == "-s":
+                self.DBStart = int(Value)
+            elif Option == "-e":
+                self.DBEnd = int(Value)
+            elif Option == "-q":
+                self.QuickParseFlag = 1
+            elif Option == "-x":
+                PrepareDirsFlag = 1
+            elif Option == "-M":
+                self.SpectrumRoot = Value
+            else:
+                print "* Error: Unrecognized option %s"%Option
+        if not self.OutputPath:
+            print "* Please specify an output file (-w)"
+            print UsageInfo
+            sys.exit(-1)
+        self.OutputDir = os.path.split(self.OutputPath)[0]
+        #self.OutputPath = os.path.join(self.OutputDir, "PTMFeatures.txt")
+        self.ClusterDir = os.path.join(self.OutputDir, "Clusters")
+        self.SpectrumDir = os.path.join(self.OutputDir, "Spectra")
+        self.ClusterMemberDir = os.path.join(self.OutputDir, "ClusterMembers")
+        print "Prepare directories..."
+        if PrepareDirsFlag:
+            self.WipeDir(self.OutputDir)
+            MakeDirectory(self.OutputDir)
+            for Dir in (self.ClusterDir, self.SpectrumDir, self.ClusterMemberDir):
+                MakeDirectory(Dir)
+                for AA in "ACDEFGHIKLMNPQRSTVWY":
+                    MakeDirectory(os.path.join(Dir, AA))
+        return 1 # success
+    def LoadDB(self):
+        # Populate self.DB with the contents of the .trie file
+        File = open(self.DBPath, "rb")
+        self.DB = File.read()
+        File.close()
+        self.Coverage = [0] * len(self.DB)
+        self.ModCoverage = [0] * len(self.DB)
+    def OutputCoverage(self):
+        CoveragePath = os.path.join(self.OutputDir, "Coverage.dat")
+        CoverageFile = open(CoveragePath, "wb")
+        for DBPos in range(len(self.DB)):
+            Str = struct.pack("<II", self.Coverage[DBPos], self.ModCoverage[DBPos])
+            CoverageFile.write(Str)
+        CoverageFile.close()
+    def OutputPTMFeatures(self):
+        File = open(self.OutputPath, "wb")
+        for FileLine in self.HeaderLines:
+            File.write(FileLine)
+        for Peptide in self.Peptides.values():
+            String = string.join(Peptide.Bits, "\t")
+            File.write(String + "\n")
+    def GrabClusterMembers(self, Directory, Peptide):
+        InputPath = os.path.join(Directory, "ClusterMembers", Peptide.Annotation[2], "%s.%s.txt"%(Peptide.Annotation, Peptide.Charge))
+        OutputPath = os.path.join(self.ClusterMemberDir, Peptide.Annotation[2], "%s.%s.txt"%(Peptide.Annotation, Peptide.Charge))
+        InputFile = open(InputPath, "rb")
+        OutputFile = open(OutputPath, "a+b")
+        OutputFile.write(InputFile.read())
+        InputFile.close()
+        OutputFile.close()
+    def BuildNewPeptide(self, Cursor):
+        """
+        We've parsed the first of the cursors that contains this peptide species.  Build a peptide
+        and start accumulating clusters.
+        """
+        Bits = Cursor.Bits
+        
+    def AddNewPeptide(self, Directory, Bits):
+        """
+        We're parsing a peptide (from the ComputePTMFeatures output) which we haven't
+        seen before.  Create a Peptide object and populate it.
+        """
+        Peptide = Bag()
+        Peptide.Bits = list(Bits)
+        Peptide.Charge = int(Bits[FormatBits.Charge])
+        Peptide.Annotation = Bits[FormatBits.Peptide]
+        SpectrumCount = int(Bits[FormatBits.SpectrumCount])
+        self.TotalSpectrumCount += SpectrumCount
+        Peptide.Peptide = GetPeptideFromModdedName(Peptide.Annotation)
+        Peptide.ModlessAnnotation = "%s.%s.%s"%(Peptide.Peptide.Prefix, Peptide.Peptide.Aminos, Peptide.Peptide.Suffix)
+        # Grab the cluster members right away:
+        self.GrabClusterMembers(Directory, Peptide)
+        # Add a cluster for the peptide:
+        Peptide.Cluster = BuildConsensusSpectrum.ConsensusBuilder(Peptide.Charge)
+        ClusterPath = os.path.join(Directory, "Clusters", Peptide.Annotation[2], "%s.%s.cls"%(Peptide.Annotation, Peptide.Charge))
+        Peptide.Cluster.UnpickleCluster(ClusterPath)
+        # Add a cluster for the modless peptide, if the cluster-file exists:
+        ModlessClusterPath = os.path.join(Directory, "Clusters", Peptide.Annotation[2], "%s.%s.cls"%(Peptide.ModlessAnnotation, Peptide.Charge))
+        if os.path.exists(ModlessClusterPath):
+            Peptide.ModlessCluster = BuildConsensusSpectrum.ConsensusBuilder(Peptide.Charge)
+            Peptide.ModlessCluster.UnpickleCluster(ModlessClusterPath)
+        else:
+            Peptide.ModlessCluster = None
+        # Add the peptide to our dictionary:
+        Key = (Peptide.Annotation, Peptide.Charge)
+        self.Peptides[Key] = Peptide
+    def AssimilatePeptide(self, Dir, Peptide, Bits):
+        """
+        We're parsing a peptide (from the ComputePTMFeatures output) which we 
+        have already seen.  Adjust the features of our Peptide object - accumulate
+        spectrum counts, et cetera.
+        """
+        # Best modless spectrum and MQScore.  These may be empty for the new
+        # file bits, for the existing peptide, or both.
+        ScoreStr = Peptide.Bits[FormatBits.BestModlessMQScore]
+        if ScoreStr:
+            OldModlessMQScore = float(ScoreStr)
+        else:
+            OldModlessMQScore = None
+        ScoreStr = Bits[FormatBits.BestModlessMQScore]
+        if ScoreStr:
+            ModlessMQScore = float(ScoreStr)
+        else:
+            ModlessMQScore = None
+        if ModlessMQScore > OldModlessMQScore:
+            Peptide.Bits[FormatBits.BestModlessMQScore] = Bits[FormatBits.BestModlessMQScore]
+            Peptide.Bits[FormatBits.BestModlessSpectrumPath] = Bits[FormatBits.BestModlessSpectrumPath]
+        # Best modded spectrum, mqscore, delta-score:
+        OldBestMQScore = float(Peptide.Bits[FormatBits.BestMQScore])
+        BestMQScore = float(Bits[FormatBits.BestMQScore])
+        if BestMQScore > OldBestMQScore:
+            Peptide.Bits[FormatBits.BestMQScore] = Bits[FormatBits.BestMQScore]
+            Peptide.Bits[FormatBits.BestDeltaScore] = Bits[FormatBits.BestDeltaScore]
+            Peptide.Bits[FormatBits.BestSpectrumPath] = Bits[FormatBits.BestSpectrumPath]
+        # Spectra:
+        CurrentSpectra = int(Peptide.Bits[FormatBits.SpectrumCount])
+        NewBlockSpectra = int(Bits[FormatBits.SpectrumCount])
+        TotalSpectra = CurrentSpectra + NewBlockSpectra
+        self.TotalSpectrumCount += NewBlockSpectra
+        Peptide.Bits[FormatBits.SpectrumCount] = str(Spectra)
+        # Modless spectra
+        Spectra = int(Peptide.Bits[FormatBits.ModlessSpectrumCount])
+        Spectra += int(Bits[FormatBits.ModlessSpectrumCount])
+        Peptide.Bits[FormatBits.ModlessSpectrumCount] = str(Spectra)
+        # Accumulate modded spectra into the cluster:
+        ClusterPath = os.path.join(Dir, "Clusters", Peptide.Annotation[2], "%s.%s.cls"%(Peptide.Annotation, Peptide.Charge))
+        TempCluster = BuildConsensusSpectrum.ConsensusBuilder(Peptide.Charge)
+        TempCluster.UnpickleCluster(ClusterPath)
+        Peptide.Cluster.AssimilateCluster(TempCluster)
+        # Accumulate modless spectra into the modless cluster:
+        ClusterPath = os.path.join(Dir, "Clusters", Peptide.Annotation[2], "%s.%s.cls"%(Peptide.ModlessAnnotation, Peptide.Charge))
+        if os.path.exists(ClusterPath):
+            TempCluster = BuildConsensusSpectrum.ConsensusBuilder(Peptide.Charge)
+            TempCluster.UnpickleCluster(ClusterPath)
+            if Peptide.ModlessCluster:
+                Peptide.ModlessCluster.AssimilateCluster(TempCluster)
+            else:
+                Peptide.ModlessCluster = TempCluster
+        # Consensus MQScore handled at the end
+        # Spectra/sites this mod type handled at the *VERY* end, possibly after
+        #   multiple runs of CombinePTMFeatures!
+        # Log spectrum-count handled at the end
+    def MergeResultsFromFile(self, Path):
+        Dir = os.path.split(Path)[0]
+        File = open(Path, "rb")
+        LineNumber = 0
+        for FileLine in File.xreadlines():
+            LineNumber += 1
+            if LineNumber % 100 == 0:
+                print "%s line %s (%s peptides, %s spectra)"%(Path, LineNumber, len(self.Peptides.keys()), self.TotalSpectrumCount)
+                if self.QuickParseFlag:
+                    break
+            if FileLine[0] == "#":
+                if not self.HeaderLinesParsed:
+                    self.HeaderLines.append(FileLine)
+                continue
+            Bits = FileLine.strip().split("\t")
+            # Skip any blank lines:
+            if len(Bits) < 2:
+                continue
+            try:
+                DBPos = int(Bits[FormatBits.DBPos])
+                Annotation = Bits[FormatBits.Peptide]
+                Charge = int(Bits[FormatBits.Charge])
+            except:
+                print "* Warning: Line %s of %s isn't valid!"%(LineNumber, Path)
+                traceback.print_exc()
+                continue
+            # Ignore any peptides which don't fall within our database region of interest:
+            if self.DBStart != None and DBPos < self.DBStart:
+                continue
+            if self.DBEnd != None and DBPos >= self.DBEnd:
+                continue
+            Key = (Annotation, Charge)
+            Peptide = self.Peptides.get(Key, None)
+            if Peptide:
+                self.AssimilatePeptide(Dir, Peptide, Bits)
+            else:
+                self.AddNewPeptide(Dir, Bits)
+        File.close()
+        self.HeaderLinesParsed = 1
+    def FinalizePTMFeatures(self):
+        """
+        Some PTM feature processing, such as building and scoring a consensus spectrum,
+        should happen just once.  Those steps happen here, *after* each input file
+        has been parsed.
+        """
+        for Peptide in self.Peptides.values():
+            # Update log-spectrum-count:
+            Spectra = int(Peptide.Bits[FormatBits.SpectrumCount])
+            Peptide.Bits[FormatBits.LogSpectrumCount] = str(math.log(Spectra))
+            # Write out the consensus MODLESS cluster:
+            if Peptide.ModlessCluster:
+                Path = os.path.join(self.ClusterDir, Peptide.Annotation[2], "%s.%s.cls"%(Peptide.ModlessAnnotation, Peptide.Charge))
+                Peptide.ModlessCluster.PickleCluster(Path)
+            # Write out the consensus MODLESS spectrum:
+            if Peptide.ModlessCluster:
+                Path = os.path.join(self.SpectrumDir, Peptide.Annotation[2], "%s.%s.dta"%(Peptide.ModlessAnnotation, Peptide.Charge))
+                Spectrum = Peptide.ModlessCluster.ProduceConsensusSpectrum()
+                Spectrum.WritePeaks(Path)
+            # Write out the CLUSTER:
+            Path = os.path.join(self.ClusterDir, Peptide.Annotation[2], "%s.%s.cls"%(Peptide.Annotation, Peptide.Charge))
+            Peptide.Cluster.PickleCluster(Path)
+            # Write out the consensus SPECTRUM:
+            ConsensusSpectrumPath = os.path.join(self.SpectrumDir, Peptide.Annotation[2], "%s.%s.dta"%(Peptide.Annotation, Peptide.Charge))
+            Spectrum = Peptide.Cluster.ProduceConsensusSpectrum()
+            Spectrum.WritePeaks(ConsensusSpectrumPath)
+            # Compute consensus spectrum features:
+            PySpectrum = PyInspect.Spectrum(ConsensusSpectrumPath, 0)
+            ScoreList = PySpectrum.ScorePeptideDetailed(Peptide.Annotation)
+            Peptide.Bits[FormatBits.ConsensusMQScore] = str(ScoreList[0])
+            Peptide.Bits[FormatBits.ConsensusPRMScore] = str(ScoreList[1])
+            Peptide.Bits[FormatBits.ConsensusBYPresent] = str(ScoreList[2])
+            Peptide.Bits[FormatBits.ConsensusTopPeaks] = str(ScoreList[3])
+            Peptide.Bits[FormatBits.NTT] = str(ScoreList[4])
+            # Compute comparison features:
+            if Peptide.ModlessCluster:
+                Peptide.Bits[FormatBits.SisterAnnotationFlag] = "1"
+                pass #NOTE: skip these features, since we don't really use them!
+            
+    def AssimilateDatabaseCoverage(self, CoverageFilePath):
+        """
+        Read Coverage.dat from one of the ComputePTMFeatures runs.
+        Accumulate total coverage.
+        """
+        StructSize = struct.calcsize("<II")
+        File = open(CoverageFilePath, "rb")
+        for DBPos in range(len(self.DB)):
+            Block = File.read(StructSize)
+            (Coverage, ModCoverage) = struct.unpack("<II", Block)
+            self.Coverage[DBPos] += Coverage
+            self.ModCoverage[DBPos] += ModCoverage
+            if DBPos % 10000 == 0:
+                print "%s/%s..."%(DBPos, len(self.DB))
+        File.close()
+    def WriteSingletonPeptide(self, Species, Cursor):
+        """
+        Here's a peptide that was found in only one of the input files.  That makes our job very
+        easy; all we need to do is write out the fileline, and copy over: cluster members,
+        cluster file, spectrum file, modless cluster file (if it exists), modless spectrum file (if
+        it exists)
+        """
+        # Write the file line:
+        OutputLine = string.join(Cursor.Bits, "\t")
+        self.OutputFile.write(OutputLine + "\n")
+        # Copy the needful files:
+        Annotation = Species.Annotation
+        Charge = Species.Charge
+        AA = Species.AA
+        SourcePath = os.path.join(Cursor.Directory, "ClusterMembers", AA, "%s.%s.txt"%(Annotation, Charge))
+        TargetPath = os.path.join(self.OutputDir, "ClusterMembers", AA, "%s.%s.txt"%(Annotation, Charge))
+        shutil.copyfile(SourcePath, TargetPath)
+        SourcePath = os.path.join(Cursor.Directory, "Clusters", AA, "%s.%s.cls"%(Annotation, Charge))
+        TargetPath = os.path.join(self.OutputDir, "Clusters", AA, "%s.%s.cls"%(Annotation, Charge))
+        shutil.copyfile(SourcePath, TargetPath)
+        SourcePath = os.path.join(Cursor.Directory, "Spectra", AA, "%s.%s.dta"%(Annotation, Charge))
+        TargetPath = os.path.join(self.OutputDir, "Spectra", AA, "%s.%s.dta"%(Annotation, Charge))
+        shutil.copyfile(SourcePath, TargetPath)
+        SourcePath = os.path.join(Cursor.Directory, "Clusters", AA, "%s.%s.cls"%(Species.ModlessAnnotation, Charge))
+        TargetPath = os.path.join(self.OutputDir, "Clusters", AA, "%s.%s.cls"%(Species.ModlessAnnotation, Charge))
+        if os.path.exists(SourcePath):
+            shutil.copyfile(SourcePath, TargetPath)
+        SourcePath = os.path.join(Cursor.Directory, "Spectra", AA, "%s.%s.dta"%(Species.ModlessAnnotation, Charge))
+        TargetPath = os.path.join(self.OutputDir, "Spectra", AA, "%s.%s.dta"%(Species.ModlessAnnotation, Charge))
+        if os.path.exists(SourcePath):
+            shutil.copyfile(SourcePath, TargetPath)
+    def BuildPeptideSpecies(self, Cursor):
+        Species = Bag()
+        Bits = Cursor.Bits
+        Species.Annotation = Bits[FormatBits.Peptide]
+        Species.AA = Species.Annotation[2]
+        Species.Peptide = GetPeptideFromModdedName(Species.Annotation)
+        Species.ModlessAnnotation = "%s.%s.%s"%(Species.Peptide.Prefix, Species.Peptide.Aminos, Species.Peptide.Suffix)
+        Species.Charge = int(Bits[FormatBits.Charge])
+        Species.Bits = Bits[:]
+        Species.Cluster = None # ConsensusBuilder, instantaited later
+        Species.ModlessCluster = None # ConsensusBuilder, instantaited later
+        # Fix the spectrum PATHS:
+        Species.Bits[FormatBits.BestSpectrumPath] = self.FixSpectrumPath(Species.Bits[FormatBits.BestSpectrumPath])
+        if Species.Bits[FormatBits.BestModlessSpectrumPath]:
+            Species.Bits[FormatBits.BestModlessSpectrumPath] = self.FixSpectrumPath(Species.Bits[FormatBits.BestModlessSpectrumPath])
+        return Species
+    def AssimilatePeptideSpectra(self, Species, Cursor):
+        """
+        Accumulate total spectra for this species.
+        """
+        ###############################
+        # Adjust features - best spectrum, number of spectra, etc.
+        # Best modless spectrum and MQScore.  These may be empty for the new
+        # file bits, for the existing peptide, or both.
+        ScoreStr = Species.Bits[FormatBits.BestModlessMQScore]
+        if ScoreStr:
+            OldModlessMQScore = float(ScoreStr)
+        else:
+            OldModlessMQScore = None
+        ScoreStr = Cursor.Bits[FormatBits.BestModlessMQScore]
+        if ScoreStr:
+            ModlessMQScore = float(ScoreStr)
+        else:
+            ModlessMQScore = None
+        if ModlessMQScore > OldModlessMQScore:
+            Species.Bits[FormatBits.BestModlessMQScore] = Cursor.Bits[FormatBits.BestModlessMQScore]
+            Species.Bits[FormatBits.BestModlessSpectrumPath] = Cursor.Bits[FormatBits.BestModlessSpectrumPath]
+        # Best modded spectrum, mqscore, delta-score:
+        OldBestMQScore = float(Species.Bits[FormatBits.BestMQScore])
+        BestMQScore = float(Cursor.Bits[FormatBits.BestMQScore])
+        if BestMQScore > OldBestMQScore:
+            Species.Bits[FormatBits.BestMQScore] = Cursor.Bits[FormatBits.BestMQScore]
+            Species.Bits[FormatBits.BestDeltaScore] = Cursor.Bits[FormatBits.BestDeltaScore]
+            Species.Bits[FormatBits.BestSpectrumPath] = Cursor.Bits[FormatBits.BestSpectrumPath]
+        # Spectra:
+        CurrentSpectra = int(Species.Bits[FormatBits.SpectrumCount])
+        NewBlockSpectra = int(Cursor.Bits[FormatBits.SpectrumCount])
+        TotalSpectra = CurrentSpectra + NewBlockSpectra
+        self.TotalSpectrumCount += NewBlockSpectra
+        Species.Bits[FormatBits.SpectrumCount] = str(TotalSpectra)
+        # Log of spectrum-count:
+        Species.Bits[FormatBits.LogSpectrumCount] = str(math.log(TotalSpectra))
+        # Modless spectra
+        Spectra = int(Species.Bits[FormatBits.ModlessSpectrumCount])
+        Spectra += int(Cursor.Bits[FormatBits.ModlessSpectrumCount])
+        Species.Bits[FormatBits.ModlessSpectrumCount] = str(Spectra)
+        ###############################
+        # Accumulate a list of cluster members:
+        ClusterMemberFileName = "%s.%s.txt"%(Species.Annotation, Species.Charge)
+        ClusterMemberPath = os.path.join(self.OutputDir, "ClusterMembers", Species.AA, ClusterMemberFileName)
+        ClusterMemberFile = open(ClusterMemberPath, "a+b")
+        CursorMemberPath = os.path.join(Cursor.Directory, "ClusterMembers", Species.AA, ClusterMemberFileName)
+        CursorMemberFile = open(CursorMemberPath, "rb") 
+        Text = CursorMemberFile.read()
+        ClusterMemberFile.write(Text)
+        CursorMemberFile.close()
+        ClusterMemberFile.close()
+        ###############################
+        # Accumulate members of the modded cluster:
+        ClusterPath = os.path.join(Cursor.Directory, "Clusters", Species.AA, "%s.%s.cls"%(Species.Annotation, Species.Charge))
+        CursorCluster = BuildConsensusSpectrum.ConsensusBuilder(Species.Charge)
+        CursorCluster.UnpickleCluster(ClusterPath)
+        if not Species.Cluster:
+            Species.Cluster = CursorCluster 
+        else:
+            Species.Cluster.AssimilateCluster(CursorCluster)
+        ###############################
+        # Accumulate members of the modless cluster:
+        ClusterPath = os.path.join(Cursor.Directory, "Clusters", Species.AA, "%s.%s.cls"%(Species.ModlessAnnotation, Species.Charge))
+        if os.path.exists(ClusterPath):
+            CursorCluster = BuildConsensusSpectrum.ConsensusBuilder(Species.Charge)
+            CursorCluster.UnpickleCluster(ClusterPath)
+            if not Species.ModlessCluster:
+                Species.ModlessCluster = CursorCluster 
+            else:
+                Species.ModlessCluster.AssimilateCluster(CursorCluster)
+    def WriteCompletedPeptide(self, Species):
+        """
+        We've read data for this peptide species from TWO OR MORE cursors.  Now we'll write out
+        one line to the output file, and output our consensus spectrum.
+        """
+        # Write the file line:
+        OutputLine = string.join(Species.Bits, "\t")
+        self.OutputFile.write(OutputLine + "\n")
+        # Cluster:
+        ClusterOutputPath = os.path.join(self.OutputDir, "Clusters", Species.AA, "%s.%s.cls"%(Species.Annotation, Species.Charge))
+        Species.Cluster.PickleCluster(ClusterOutputPath)
+        # Consensus spectrum:
+        ConsensusSpectrum = Species.Cluster.ProduceConsensusSpectrum()
+        ConsensusSpectrumPath = os.path.join(self.OutputDir, "Spectra", Species.AA, "%s.%s.dta"%(Species.Annotation, Species.Charge))
+        ConsensusSpectrum.WritePeaks(ConsensusSpectrumPath)
+        if Species.ModlessCluster:
+            # Modless cluster:
+            ClusterOutputPath = os.path.join(self.OutputDir, "Clusters", Species.AA, "%s.%s.cls"%(Species.ModlessAnnotation, Species.Charge))
+            Species.ModlessCluster.PickleCluster(ClusterOutputPath)
+            # Modless consensus spectrum:
+            ConsensusSpectrum = Species.ModlessCluster.ProduceConsensusSpectrum()
+            ConsensusSpectrumPath = os.path.join(self.OutputDir, "Spectra", Species.AA, "%s.%s.dta"%(Species.ModlessAnnotation, Species.Charge))
+            ConsensusSpectrum.WritePeaks(ConsensusSpectrumPath)
+    def MergeResults(self):
+        print "Load db..."
+        self.LoadDB()
+        # Measure combined db coverage:
+        print "Combine database coverage..."
+        self.CombineDatabaseCoverage()
+        print "Populate MZXML oracle..."
+        self.PopulateMZXMLOracle(self.SpectrumRoot)
+        self.OutputFile = open(self.OutputPath, "wb")
+        for FileLine in self.HeaderLines:
+            self.OutputFile.write(FileLine)        
+        class FeatureCursor:
+            """
+            Wrapper for a feature file - tracks the "next" peptide.  
+            """
+            def __init__(self, Path):
+                self.Path = Path
+                self.File = open(Path, "rb")
+                self.NextKey = None
+                self.Bits = None
+                self.Directory = os.path.split(Path)[0]
+                self.HeaderLines = []
+            def Close(self):
+                self.File.close()
+                self.NextKey = None
+            def GetNextPeptide(self):
+                # Read one or more lines (skipping header or invalid lines), and remember
+                # the next peptide to be processed
+                while (1):
+                    FileLine = self.File.readline()
+                    if not FileLine:
+                        self.NextKey = None
+                        return None # EOF
+                    # Skip blank or comment lines:
+                    if FileLine[0] == "#":
+                        self.HeaderLines.append(FileLine)
+                        continue
+                    if not FileLine.strip():
+                        continue
+                    # Attempt to parse the line:
+                    Bits = FileLine.strip().split("\t")
+                    try:
+                        ModDBPos = int(Bits[FormatBits.DBPos])
+                        ModMass = int(Bits[FormatBits.ModificationMass])
+                        Annotation = Bits[FormatBits.Peptide]
+                        Charge = int(Bits[FormatBits.Charge])
+                    except:
+                        traceback.print_exc()
+                        continue # skip invalid line
+                    # We know our next key, so stop now:
+                    self.NextKey = (ModDBPos, ModMass, Annotation, Charge)
+                    self.Bits = Bits
+                    break
+        self.FeatureCursors = []
+        # List the directories that need to be parsed, and open the files:
+        for SubDirectory in os.listdir(self.PTMFeatureDirectory):
+            Dir = os.path.join(self.PTMFeatureDirectory, SubDirectory)
+            if not os.path.isdir(Dir):
+                continue
+            FeatureFilePath = os.path.join(Dir, "PTMFeatures.txt")
+            if not os.path.exists(FeatureFilePath):
+                print "* Warning: Subdirectory %s doesn't contain a feature file!"%Dir
+                continue
+            CoverageFilePath = os.path.join(Dir, "Coverage.dat")
+            if not os.path.exists(CoverageFilePath):
+                print "* Warning: Subdirectory %d doesn't contain a coverage file!"%Dir
+                continue
+            Cursor = FeatureCursor(FeatureFilePath)
+            Cursor.GetNextPeptide()
+            self.FeatureCursors.append(Cursor)
+        # Output the header lines from an (arbitrary) cursor:
+        for HeaderLine in self.FeatureCursors[0].HeaderLines:
+            self.OutputFile.write(HeaderLine)
+        # Loop through the peptides, until all cursors hit EOF.  At each stage, process
+        # the peptide with the first key - and process it from any and all cursors which
+        # are now pointing at it.
+        while (1):
+            CursorsForThisKey = 0
+            CursorThisKey = None
+            FirstKey = None
+            print 
+            for Cursor in self.FeatureCursors:
+                if Cursor.NextKey != None and (FirstKey == None or FirstKey > Cursor.NextKey):
+                    FirstKey = Cursor.NextKey
+                    CursorThisKey = Cursor
+                    CursorsForThisKey = 0
+                if Cursor.NextKey == FirstKey:
+                    CursorsForThisKey += 1
+                if Cursor.NextKey == FirstKey:
+                    print "*Cursor %s: %s"%(Cursor.Path, Cursor.NextKey) 
+                else:
+                    print "Cursor %s: %s"%(Cursor.Path, Cursor.NextKey) 
+            if FirstKey == None:
+                break
+            Species = self.BuildPeptideSpecies(CursorThisKey)
+            print "Next species is '%s', found in %s files."%(Species.Annotation, CursorsForThisKey)
+            ########################################################################
+            # Shortcut: If CursorsForThisKey == 1, then we don't need to re-build the
+            # cluster or the consensus spectrum!
+            if CursorsForThisKey == 1:
+                self.WriteSingletonPeptide(Species, CursorThisKey)
+                CursorThisKey.GetNextPeptide()
+                continue
+            ########################################################################
+            # Standard case: Two or more cursors have the same peptide.  Assimilate the
+            # total spectra, and cluster members, from each one!
+            for Cursor in self.FeatureCursors:
+                if Cursor.NextKey == FirstKey:
+                    self.AssimilatePeptideSpectra(Species, Cursor)
+                    Cursor.GetNextPeptide()
+            # We've assimilated all spectra into the cluster; now write out the
+            # totaled spectrum count and the consensus spectra!
+            self.WriteCompletedPeptide(Species)
+        ########################################################
+        # All peptides have been written out; every cursor is at EOF.
+        # Loop over cursors and finish up:
+        for Cursor in self.FeatureCursors:
+            Cursor.Close()
+    def CombineDatabaseCoverage(self):
+        """
+        Iterate over directories, and compute the total coverage for each database residue.
+        """
+        for SubDirectory in os.listdir(self.PTMFeatureDirectory):
+            Dir = os.path.join(self.PTMFeatureDirectory, SubDirectory)
+            if not os.path.isdir(Dir):
+                continue
+            CoverageFilePath = os.path.join(Dir, "Coverage.dat")
+            if not os.path.exists(CoverageFilePath):
+                print "* Warning: Subdirectory %s doesn't contain a coverage file!"%Dir
+                continue
+            print "Assimilate from %s..."%CoverageFilePath
+            self.AssimilateDatabaseCoverage(CoverageFilePath)
+        self.OutputCoverage()
+
+if __name__ == "__main__":
+    try:
+        import psyco
+    except:
+        print "(psyco not available - no optimization for you)"
+    Merger = PTMFeatureMerger()
+    Result = Merger.ParseCommandLine(sys.argv[1:])
+    if not Result:
+        print UsageInfo
+        sys.exit(-1)
+    Merger.MergeResults()
+    
diff --git a/CompareHEKPTM.py b/CompareHEKPTM.py
new file mode 100644
index 0000000..c751499
--- /dev/null
+++ b/CompareHEKPTM.py
@@ -0,0 +1,808 @@
+#Title:          CompareHEKPTM.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+Compare the collection of PTMs found in the HEK293 data-set with the PTMs reported
+in external databases (HPRD and Uniprot)
+"""
+
+import os
+import getopt
+import string
+import sys
+import struct
+import traceback
+import cPickle
+import xml.dom.minidom
+import xml.sax.handler
+from xml.dom.minidom import Node
+from TrainPTMFeatures import FormatBits
+from Utils import *
+Initialize()
+
+AminoAcidLetters = "ACDEFGHIKLMNPQRSTVWY"
+
+# HEK options:
+InspectOutputFileName = r"C:\Documents and Settings\swt\Desktop\SWTPapers\PTMScoring\SupplementalTables\STHEKSitesK2.txt"
+IPIDatabasePath = os.path.join("Database", "IPISubDB.trie")
+FDRPValueCutoff = 0.270065269846
+
+# LENS options:
+##InspectOutputFileName = r"C:\Documents and Settings\swt\Desktop\SWTPapers\PTMScoring\SupplementalTables\ST1LensSitesFullK2.txt"
+##IPIDatabasePath = os.path.join("Database", "Lens99.trie")
+##FDRPValueCutoff = 0.580962281
+
+UniprotXMLFileName = r"F:\Uniprot\uniprot_sprot.xml"
+HPRDDir = r"f:\ftproot\HPRD\HPRD_XML_010107\HPRD_XML_010107"
+
+SkipModificationNames = {"proteolytic cleavage":1,
+                         "disulfide bridge":1,
+                         }
+
+
+def GetXMLText(NodeList, Strip = 0):
+    """
+    Gets the text associated with an XML Node
+    <a>RETURNS THIS TEXT </a>
+    """
+    BodyText = ""
+    for Node in NodeList:
+        if Node.nodeType == Node.TEXT_NODE:
+            BodyText += Node.data
+    if Strip: # strip all white space, those included below
+        BodyText = BodyText.replace(" ","")
+        BodyText = BodyText.replace("\r","")
+        BodyText = BodyText.replace("\n","")
+        BodyText = BodyText.replace("\t","")
+    return BodyText
+
+def FindDBLocations(DB, Aminos):
+    """
+    Find all occurrences of this peptide in the database.
+    Return DB indices.
+    """
+    PrevPos = -1
+    LocationList = []
+    while (1):
+        Pos = DB.find(Aminos, PrevPos + 1)
+        if Pos == -1:
+            break
+        LocationList.append(Pos)
+        PrevPos = Pos
+    return LocationList
+
+
+class CompareMaster:
+    """
+    This class keeps track of the proteins in the canonical HEK database,
+    as well as the modifications from various sources.
+    """ 
+    def __init__(self):
+        pass
+    def InitializeModMasses(self):
+        """
+        Initialize self.ModMasses, which maps (lower-case) PTM names to masses.
+        Because the list is rather long, it has been moved to a table in
+        ExternalPTMNames.txt
+        """
+        self.ModMasses = {}
+        File = open("ExternalPTMNames.txt", "rb")
+        for FileLine in File.xreadlines():
+            Bits = FileLine.split("\t")
+            Bits = list(Bits)
+            # Repair Excel's broken columns:
+            if Bits[0][0] == '"':
+                Bits[0] = Bits[0][1:-1]
+            if FileLine[0] == "#" or len(Bits) < 2:
+                continue
+            Name = Bits[0].lower()
+            Mass = int(Bits[1])
+            self.ModMasses[Name] = Mass
+        File.close()
+    def LoadDatabase(self):
+        self.ProteinPos = []
+        self.DB = ""
+        File = open(IPIDatabasePath, "rb")
+        self.DB = File.read()
+        File.close()
+        PrevPos = -1
+        while 1:
+            NextPos = self.DB.find("*", PrevPos + 1)
+            if NextPos == -1:
+                break
+            self.ProteinPos.append(PrevPos + 1)
+            PrevPos = NextPos
+        # Read protein names, too:
+        IndexPath = os.path.splitext(IPIDatabasePath)[0] + ".index"
+        BlockSize = struct.calcsize("qi80s")
+        IndexFile = open(IndexPath, "rb")
+        self.ProteinNames = []
+        while 1:
+            Block = IndexFile.read(BlockSize)
+            if not Block:
+                break
+            Tuple = struct.unpack("qi80s", Block)
+            self.ProteinNames.append(Tuple[2])
+        
+    def GetDBPosInfo(self, Pos):
+        """
+        Return (ProteinName, ProteinResidueNumber)
+        """
+        for ProteinIndex in range(len(self.ProteinPos)):
+            ProteinStart = self.ProteinPos[ProteinIndex]
+            if Pos < ProteinStart:
+                continue
+            if ProteinIndex < len(self.ProteinPos) - 1:
+                ProteinEnd = self.ProteinPos[ProteinIndex + 1]
+                if Pos >= ProteinEnd:
+                    continue
+            else:
+                pass
+            ResidueNumber = Pos - ProteinStart
+            return (self.ProteinNames[ProteinIndex], ResidueNumber)
+    def FindPeptideLocations(self, Aminos):
+        """
+        Given an amino acid string, find all locations in the database.
+        Return a list of the form (ProteinIndex, ResidueNumber)
+        """
+        PrevPos = -1
+        LocationList = []
+        while (1):
+            Pos = self.DB.find(Aminos, PrevPos + 1)
+            if Pos == -1:
+                break
+            # Which protein does Pos lie in?
+            LowIndex = 0
+            HighIndex = len(self.ProteinPos) - 1
+            # Pos >= ProteinPos[LowIndex] and Pos < ProteinPos[HighIndex]
+            # Special case - last protein:
+            if Pos > self.ProteinPos[HighIndex]:
+                ProteinID = HighIndex
+                ResidueNumber = Pos - self.ProteinPos[HighIndex]
+            else:
+                while (1):
+                    if LowIndex + 1 == HighIndex:
+                        ProteinID = LowIndex
+                        ResidueNumber = Pos - self.ProteinPos[LowIndex]
+                        break
+                    MidIndex = (LowIndex + HighIndex) / 2
+                    if Pos >= self.ProteinPos[MidIndex]:
+                        LowIndex = MidIndex
+                    else:
+                        HighIndex = MidIndex
+            LocationList.append((ProteinID, ResidueNumber))
+            PrevPos = Pos
+        return LocationList
+    def ParsePTMsInspect5(self):
+        return self.ParsePTMsInspect(0.05)
+    def ParsePTMsInspect(self, PValueThreshold = None):
+        if not PValueThreshold:
+            PValueThreshold = FDRPValueCutoff
+        PTMDictionary = {}
+        File = open(InspectOutputFileName, "rb")
+        LineNumber = 0
+        for FileLine in File.xreadlines():
+            LineNumber += 1
+            if LineNumber % 1000 == 0:
+                print "Line %s..."%LineNumber
+            if FileLine[0] == "#":
+                continue
+            Bits = FileLine.strip().split("\t")
+            try:
+                ProteinName = Bits[FormatBits.ProteinName]
+                #DeltaMass = int(Bits[FormatBits.ModificationMass])
+                DeltaMass = int(Bits[49])
+                Annotation = Bits[FormatBits.Peptide]
+                PeptidePValue = Bits[FormatBits.ModelPValue]
+                SitePValue = float(Bits[FormatBits.SitePValue])
+                KnownAnnotation = Bits[FormatBits.KnownPTMAnnotation]
+                KnownFlag = int(Bits[47])
+                if KnownFlag:
+                    KeepAnnotation = KnownAnnotation
+                else:
+                    KeepAnnotation = Annotation
+                Peptide = GetPeptideFromModdedName(KeepAnnotation)
+            except:
+                traceback.print_exc()
+                continue
+            try:
+                SitePValue = float(Bits[50])
+            except:
+                pass
+            if PValueThreshold != None and SitePValue >= PValueThreshold:
+                continue
+            # If the protein is shuffled (protein name starts with XXX), ignore the PTM:
+            if ProteinName[:3] == "XXX":
+                continue
+            if not Peptide.Modifications.keys():
+                continue # it's actually unmodified!
+            ModPos = Peptide.Modifications.keys()[0]
+            FullAminos = Peptide.Aminos
+            if Peptide.Prefix in AminoAcidLetters:
+                FullAminos = Peptide.Prefix + FullAminos
+            if Peptide.Suffix in AminoAcidLetters:
+                FullAminos = FullAminos + Peptide.Suffix
+            DBHitList = FindDBLocations(self.DB, FullAminos)
+            if not DBHitList:
+                print "*** Warning: Peptide '%s' not found in database!"%FullAminos
+                continue
+            for DBPos in DBHitList:
+                ModDBPos = DBPos + ModPos
+                if Peptide.Prefix in AminoAcidLetters:
+                    ModDBPos += 1
+                if not PTMDictionary.has_key(ModDBPos):
+                    PTMDictionary[ModDBPos] = []
+                # Avoid adding redundant records:
+                FoundFlag = 0
+                for (OldMass, OldName) in PTMDictionary[ModDBPos]:
+                    if OldMass == DeltaMass:
+                        FoundFlag = 1
+                        break
+                if not FoundFlag:
+                    PTMDictionary[ModDBPos].append((DeltaMass, "%+d"%DeltaMass))
+                    print ModDBPos, DeltaMass, KeepAnnotation
+        File.close() 
+        self.PTMDictionaryInspect = PTMDictionary
+        print "Inspect parse: Found %s modified residues in %s file lines"%(len(PTMDictionary.keys()), LineNumber)
+        return PTMDictionary
+    def ParsePTMsUniprot(self):
+        SAXParser = xml.sax.make_parser()
+        UniprotParser = UniprotXMLParser(self.DB)
+        UniprotParser.ModMasses = self.ModMasses
+        SAXParser.setContentHandler(UniprotParser)
+        print "Parse %s..."%UniprotXMLFileName
+        SAXParser.parse(UniprotXMLFileName)
+        self.PTMDictionaryUniprot = UniprotParser.PTMDictionary
+        print "Reporting UNKNOWN PTM names..."
+        UniprotParser.ReportUnknownPTMs("UnknownPTMs.Uniprot.txt")
+        return self.PTMDictionaryUniprot
+    def ParsePTMsHPRD(self):
+        SAXParser = xml.sax.make_parser()
+        HPRDParser = HPRDXMLParser(self.DB)
+        HPRDParser.ModMasses = self.ModMasses
+        SAXParser.setContentHandler(HPRDParser)
+        #print "Parse %s..."%HPRDXMLFileName
+        FileNames = os.listdir(HPRDDir)
+        for FileNameIndex in range(len(FileNames)):
+            FileName = FileNames[FileNameIndex]
+            print "%s/%s: %s"%(FileNameIndex, len(FileNames), FileName)
+            XMLFilePath = os.path.join(HPRDDir, FileName)
+            try:
+                SAXParser.parse(XMLFilePath)
+            except:
+                traceback.print_exc()
+                print "* Error parsing %s"%XMLFilePath
+        self.PTMDictionaryHPRD = HPRDParser.PTMDictionary
+        print "Reporting UNKNOWN PTM names..."
+        HPRDParser.ReportUnknownPTMs("UnknownPTMs.HPRD.txt")
+        return self.PTMDictionaryHPRD
+    def ComparePTMDictionariesOneWay(self, DictA, DictB,
+        MaxMassDiff = 2, MaxPosDiff = 2, LimitMassFlag = None):
+        """
+        Determine how many of A's PTMs are found in B:
+        """
+        HitA = 0
+        MissA = 0
+        TotalA = 0
+        for (Pos, ModList) in DictA.items():
+            for (Mass, Name) in ModList:
+                # LimitMassFlag == 1: Skip very small and very large PTMs.
+                if LimitMassFlag == 1:
+                    if abs(Mass) < 5 or abs(Mass) >= 250:
+                        continue
+                elif LimitMassFlag != None:
+                    if Mass != LimitMassFlag:
+                        continue
+                TotalA += 1
+                HitFlag = 0
+                AllowedPositions = [Pos]
+                for Diff in range(1, MaxPosDiff + 1):
+                    AllowedPositions.append(Pos + Diff)
+                    AllowedPositions.append(Pos - Diff)
+                for NearPos in AllowedPositions:
+                    List = DictB.get(NearPos, [])
+                    for (OtherMass, OtherName) in List:
+                        if abs(Mass - OtherMass) <= MaxMassDiff:
+                            HitFlag = 1
+                if HitFlag:
+                    HitA += 1
+                else:
+                    MissA += 1
+        return (HitA, MissA, TotalA)
+    def ComparePTMDictionaries(self, DictA, DictB,
+        MaxMassDiff = 2, MaxPosDiff = 2, LimitMassFlag = None):
+        """
+        Simple comparison: How many sites are shared between these two
+        dictionaries of PTMs?
+        """
+        (HitA, MissA, TotalA) = self.ComparePTMDictionariesOneWay(DictA, DictB,
+            MaxMassDiff, MaxPosDiff, LimitMassFlag)
+        (HitB, MissB, TotalB) = self.ComparePTMDictionariesOneWay(DictB, DictA,
+            MaxMassDiff, MaxPosDiff, LimitMassFlag)
+        print "ComparePTMDictionaries:"
+        SharedPercent = 100 * HitA / float(max(TotalA, 1))
+        print "A: %s total.  %s (%.2f%%) shared, %s not shared)"%(TotalA, HitA, SharedPercent, MissA)
+        SharedPercent = 100 * HitB / float(max(TotalB, 1))
+        print "B: %s total.  %s (%.2f%%) shared, %s not shared)"%(TotalB, HitB, SharedPercent, MissB)
+        OverallSharedPercent = (HitA + HitB) / float(max(1, TotalA + TotalB))
+        print "Overall shared percent: %.2f"%(100 * OverallSharedPercent)
+    def ParseAndOrPickle(self, ParseMethod, PickleFileName):
+        """
+        Parse PTMs from a file, OR unpickle them from a pre-parsed binary file.
+        (Parsing is slow, so we do it just once)
+        """
+        if os.path.exists(PickleFileName):
+            print "Loading PTM dictionary from %s..."%PickleFileName
+            File = open(PickleFileName, "rb")
+            Dictionary = cPickle.load(File)
+            File.close()
+        else:
+            Dictionary = ParseMethod()
+            print "Saving PTM dictionary to %s..."%PickleFileName
+            File = open(PickleFileName, "wb")
+            cPickle.dump(Dictionary, File)
+            File.close()
+        return Dictionary 
+    def DebugReportPTMs(self):
+        """
+        For debugging purposes, print a file showing all the PTMs found in any source.
+        """
+        AllKeyDict = {}
+        for Key in self.PTMDictionaryInspect.keys():
+            AllKeyDict[Key] = 1
+##        for Key in self.PTMDictionaryInspect5.keys():
+##            AllKeyDict[Key] = 1
+        for Key in self.PTMDictionaryHPRD.keys():
+            AllKeyDict[Key] = 1
+        for Key in self.PTMDictionaryUniprot.keys():
+            AllKeyDict[Key] = 1
+        AllKeys = AllKeyDict.keys()
+        AllKeys.sort()
+        for Key in AllKeys:
+            DBStart = max(0, Key - 7)
+            DBEnd = min(len(self.DB), Key + 8)
+            Aminos = "%s.%s.%s"%(self.DB[DBStart:Key], self.DB[Key], self.DB[Key + 1:DBEnd])
+            MassInspect = self.PTMDictionaryInspect.get(Key, "")
+            #MassInspect5 = self.PTMDictionaryInspect5.get(Key, "")
+            MassHPRD = self.PTMDictionaryHPRD.get(Key, "")
+            MassUniprot = self.PTMDictionaryUniprot.get(Key, "")
+            if MassHPRD == "" and MassUniprot == "":
+                continue
+            Str = "%s\t%s\t%s\t%s\t%s\t"%(Key, Aminos, MassInspect, MassHPRD, MassUniprot)
+            print Str
+    def DebugPrintDict(self, Dict):
+        OverallCount = 0
+        for (Key, List) in Dict.items():
+            OverallCount += len(List)
+        print "Overall count:", OverallCount
+    def Main(self):
+        self.LoadDatabase()
+        self.InitializeModMasses()
+        self.PTMDictionaryInspect = self.ParseAndOrPickle(self.ParsePTMsInspect, "PTMDictionaryInspect.pickle")
+        #self.PTMDictionaryInspect5 = self.ParseAndOrPickle(self.ParsePTMsInspect5, "PTMDictionaryInspect5.pickle")
+        self.PTMDictionaryHPRD = self.ParseAndOrPickle(self.ParsePTMsHPRD, "PTMDictionaryHPRD.pickle")
+        self.PTMDictionaryUniprot = self.ParseAndOrPickle(self.ParsePTMsUniprot, "PTMDictionaryUniprot.pickle")
+        print len(self.PTMDictionaryInspect.keys())
+        #print len(self.PTMDictionaryInspect5.keys())
+        for Dict in (self.PTMDictionaryInspect, self.PTMDictionaryHPRD, self.PTMDictionaryUniprot):
+            self.DebugPrintDict(Dict)
+        #self.DebugCountPTMs(Dict)
+        print "\n\nUniprot and HPRD: EXACT"
+        self.ComparePTMDictionaries(self.PTMDictionaryUniprot, self.PTMDictionaryHPRD, 0, 0, LimitMassFlag = None)
+        print "\n\nUniprot and HPRD: EXACT, omit bad masses"
+        self.ComparePTMDictionaries(self.PTMDictionaryUniprot, self.PTMDictionaryHPRD, 0, 0, LimitMassFlag = 1)
+        print "\n\nUniprot and HPRD: omit bad masses"
+        self.ComparePTMDictionaries(self.PTMDictionaryUniprot, self.PTMDictionaryHPRD, LimitMassFlag = 1)
+
+##        print "\n\nHEK293 best and uniprot, omit bad masses"
+##        self.ComparePTMDictionaries(self.PTMDictionaryInspect5, self.PTMDictionaryUniprot, LimitMassFlag = 1)
+##        print "\n\nHEK293 best and hprd, omit bad masses"
+##        self.ComparePTMDictionaries(self.PTMDictionaryInspect5, self.PTMDictionaryHPRD, LimitMassFlag = 1)
+        print "\n\nHEK293 and uniprot, omit bad masses"
+        self.ComparePTMDictionaries(self.PTMDictionaryInspect, self.PTMDictionaryUniprot, LimitMassFlag = 1)
+        print "\n\nHEK293 and hprd, omit bad masses"
+        self.ComparePTMDictionaries(self.PTMDictionaryInspect, self.PTMDictionaryHPRD, LimitMassFlag = 1)
+        for KeyMass in (14, 28, 42, 80):
+            print "\n\nUniprot and HPRD: Mass %s"%KeyMass
+            self.ComparePTMDictionaries(self.PTMDictionaryUniprot, self.PTMDictionaryHPRD, LimitMassFlag = KeyMass)
+            print "HEK293 and Uniprot, mass %s"%KeyMass
+            self.ComparePTMDictionaries(self.PTMDictionaryInspect, self.PTMDictionaryUniprot, LimitMassFlag = KeyMass)
+            print "HEK293 and HORD, mass %s"%KeyMass
+            self.ComparePTMDictionaries(self.PTMDictionaryInspect, self.PTMDictionaryHPRD, LimitMassFlag = KeyMass)
+            
+        ################################################
+        # Supplemental table: Inspect PTMs that match Uniprot *or* hprd
+        self.ReportInspectMatchedSites()
+    def ReportInspectMatchedSites(self):
+        """
+        Output a verbose report of all Inspect sites which were also seen in HPRD and/or uniprot.
+        """
+        HitA = 0
+        MissA = 0
+        TotalA = 0
+        MaxPosDiff = 2
+        MaxMassDiff = 3
+        LimitMassFlag = 0
+        ReportedAlreadyDict = {}
+        FilterFlag = 1 #%%%
+        if FilterFlag:
+            OutputFile = open("HEKPTM-InspectAndDB.txt", "wb")
+        else:
+            OutputFile = open("HEKPTM-InspectAndDB.unfiltered.txt", "wb")
+        for (Pos, ModList) in self.PTMDictionaryInspect.items():
+            for (Mass, Name) in ModList:
+                # LimitMassFlag == 1: Skip very small and very large PTMs.
+                if LimitMassFlag == 1:
+                    if abs(Mass) < 5 or abs(Mass) >= 250:
+                        continue
+                HPRDHitFlag = 0
+                HPRDMass = ""
+                HPRDName = ""
+                UniprotHitFlag = 0
+                UniprotMass = ""
+                UniprotName = ""
+                HPRDHitPos = ""
+                UniprotHitPos = ""
+                AllowedPositions = [Pos]
+                for Diff in range(1, MaxPosDiff + 1):
+                    AllowedPositions.append(Pos + Diff)
+                    AllowedPositions.append(Pos - Diff)
+                for NearPos in AllowedPositions:
+                    List = self.PTMDictionaryHPRD.get(NearPos, [])
+                    for (OtherMass, OtherName) in List:
+                        if abs(Mass - OtherMass) <= MaxMassDiff:
+                            HPRDHitFlag = 1
+                            HPRDHitPos = NearPos
+                            HPRDMass = OtherMass
+                            HPRDName = OtherName
+                    List = self.PTMDictionaryUniprot.get(NearPos, [])
+                    for (OtherMass, OtherName) in List:
+                        if abs(Mass - OtherMass) <= MaxMassDiff:
+                            UniprotHitFlag = 1
+                            UniprotHitPos = NearPos
+                            UniprotMass = OtherMass
+                            UniprotName = OtherName
+                if HPRDHitFlag or UniprotHitFlag:
+                    ReportMass = HPRDMass
+                    HitPos = HPRDHitPos
+                    if ReportMass == "":
+                        ReportMass = UniprotMass
+                        HitPos = UniprotHitPos
+                    ReportKey = (HitPos, ReportMass)
+                    if ReportedAlreadyDict.has_key(ReportKey):
+                        if FilterFlag:
+                            continue
+                    ReportedAlreadyDict[ReportKey] = 1
+                    #(ProteinName, ProteinResidue) = self.GetDBPosInfo(Pos)
+                    (ProteinName, ProteinResidue) = self.GetDBPosInfo(HitPos)
+                    Residue = self.DB[HitPos]
+                    NearAminos = self.DB[HitPos - 10:HitPos + 11]
+                    Str = "%s\t%s\t%s\t%s\t"%(Pos, Mass, ProteinName, ProteinResidue)
+                    Str += "%s\t%s\t"%(Residue, NearAminos)
+                    Str += "%s\t%s\t"%(HPRDMass, HPRDName)
+                    #Str += "%s\t%s\t%s\t"%(HPRDHitPos, HPRDMass, HPRDName)
+                    Str += "%s\t%s\t"%(UniprotMass, UniprotName)
+                    #Str += "%s\t%s\t%s\t"%(UniprotHitPos, UniprotMass, UniprotName)
+                    OutputFile.write(Str + "\n")
+        OutputFile.close()
+        
+class UXStates:
+    """
+    States for the [U]niprot [X]ml parser.  State can change when we START or END a tag.
+    Most of the time we're in the SKIP state. 
+    """
+    Skip = 0
+    Sequence = 1
+    Feature = 2
+    Accession = 3
+
+class TabularXMLParser(xml.sax.handler.ContentHandler):
+    """
+    Simple subclass of SAX XML parser: Employs dictionaries to look up the 
+    handlers for tag start, tag end, body text.  Keeps a current State.
+    """
+    def __init__(self):
+        self.startElement = self.StartElement
+        self.endElement = self.EndElement
+        self.characters = self.HandleCharacters
+        self.State = None
+        if not hasattr(self, "StartHandlers"):
+            self.StartHandlers = {}
+        if not hasattr(self, "EndHandlers"):
+            self.EndHandlers = {}
+        if not hasattr(self, "StringHandlers"):
+            self.StringHandlers = {}
+        xml.sax.handler.ContentHandler.__init__(self)
+    def StartElement(self, Name, Attributes):
+        Handler = self.StartHandlers.get(Name, None)
+        if Handler:
+            apply(Handler, (Attributes,))
+    def EndElement(self, Name):
+        Handler = self.EndHandlers.get(Name, None)
+        if Handler:
+            apply(Handler)
+    def HandleCharacters(self, String):
+        Handler = self.StringHandlers.get(self.State, None)
+        if Handler:
+            apply(Handler, (String,))
+
+
+class PTMXMLParser(TabularXMLParser):
+    """
+    Simple subclass of TabularXMLParser, adding the ability to look up modification
+    sites in self.DB; relies heavily on subclass methods!
+    """
+    def AddPendingPTMs(self):
+        # Add pending PTMs:
+        for (Name, Position) in self.PendingPTMs:
+            #print "Pending PTM:", Name, Position
+            # Get flanking amino acids:
+            StartPos = max(0, Position - 7)
+            EndPos = min(len(self.Sequence), Position + 8)
+            # If we're next to the edge, extend farther in the other direction:
+            Len = EndPos - StartPos
+            if Len < 15:
+                StartPos = max(0, EndPos - 16)
+                Len = EndPos - StartPos
+            if Len < 15:
+                EndPos = min(len(self.Sequence), StartPos + 16)
+                #StartPos = max(0, Position - 14)
+            Aminos = self.Sequence[StartPos:EndPos]
+            PrefixLength = Position - StartPos
+            if len(Aminos) < 10:
+                print "* Warning: Aminos %s...%s from %s not distinct enough!"%(StartPos, EndPos, self.Accession)
+                print "Sequence length is %s, position is %s"%(len(self.Sequence), Position)
+            # Determine the mass:
+            LowerName = Name.lower()
+            if SkipModificationNames.has_key(LowerName):
+                continue                
+            Mass = self.ModMasses.get(LowerName, 0)
+            if Mass == 0:
+                # Try removing any parenthetical portions:
+                # Example: n6,n6,n6-trimethyllysine (alternate)
+                ParenPos = LowerName.find("(")
+                if ParenPos != -1:
+                    PreParen = LowerName[:ParenPos].strip()
+                    if SkipModificationNames.has_key(PreParen):
+                        continue                
+                    
+                    #print "Try removing parens: '%s' to '%s'"%(LowerName, PreParen)
+                    Mass = self.ModMasses.get(PreParen, 0)
+            if Mass == 0:
+                # Try the first bit of the mod, it might have the form "phosphoserine (by ck1)"
+                #print "try first bit: '%s' to '%s'"%(LowerName, LowerName.split()[0])
+                FirstBit = LowerName.split()[0]
+                Mass = self.ModMasses.get(FirstBit, 0)
+                if SkipModificationNames.has_key(FirstBit):
+                    continue                                
+            if Mass == 0:
+                print "* Warning - mass not known for: %s (accession %s)"%(LowerName, self.Accession)
+                if Position - 1 < 0 or Position - 1 >= len(self.Sequence):
+                    print "Found on residue %s (ILLEGAL NUMBER)"%Position
+                else:
+                    print "  Found on residue %s%s"%(self.Sequence[Position - 1], Position)
+                self.UnknownPTMDictionary[LowerName] = self.UnknownPTMDictionary.get(LowerName, 0) + 1
+            else:
+                #print "Adding ptm of size %s at dbpos %s"%(Mass, DBPos)
+                pass            
+            # Get database positions:
+            #print "Aminos:", Aminos
+            DBHitList = FindDBLocations(self.DB, Aminos)
+            #print "Peptide %s found in %s positions"%(Aminos, len(DBHitList))
+            for AminosDBPos in DBHitList:
+                DBPos = AminosDBPos + PrefixLength
+                if not self.PTMDictionary.has_key(DBPos):
+                    self.PTMDictionary[DBPos] = []
+
+                #print "ModMass %s at position %s, dbpos %s, flanking aminos from %s...%s: %s"%(\
+                #    Mass, Position, DBPos, StartPos, EndPos, Aminos)
+                # Avoid adding REDUNDANT records:
+                FoundFlag = 0
+                for (OldMass, OldName) in self.PTMDictionary[DBPos]:
+                    if OldMass == Mass:
+                        FoundFlag = 1
+                        break
+                if not FoundFlag:
+                    self.PTMDictionary[DBPos].append((Mass, Name))
+    def ReportUnknownPTMs(self, OutputFileName):
+        SortedList = []
+        for (Name, Count) in self.UnknownPTMDictionary.items():
+            SortedList.append((Count, Name))
+        SortedList.sort()
+        SortedList.reverse()
+        File = open(OutputFileName, "wb")
+        for (Count, Name) in SortedList:
+            File.write("%s\t%s\t\n"%(Name, Count))
+        File.close()
+
+class UniprotXMLParser(PTMXMLParser):
+    """
+    Simple XML parser.  Because the start and body and end handlers are handled by various
+    sub-functions, we use a dictionary to map tags to their handlers.
+    Note: Remember that XML parse routines return unicode, hence the calls to str().
+    """
+    def __init__(self, DB):
+        self.DB = DB
+        self.EntryCount = 0
+        self.UnknownPTMDictionary = {}
+        self.PTMDictionary = {}
+        self.StartHandlers = {"entry":self.StartEntry, "sequence": self.StartSequence,
+                              "feature":self.StartFeature, "position": self.StartPosition,
+                              "accession":self.StartAccession, }
+        self.EndHandlers = {"sequence": self.EndSequence, "feature":self.EndFeature,
+                            "entry":self.EndEntry,"accession":self.EndAccession, }
+        self.StringHandlers = {UXStates.Sequence: self.HandleStringSequence,
+                               UXStates.Accession: self.HandleStringAccession}
+        PTMXMLParser.__init__(self)
+    def StartAccession(self, Attributes):
+        self.State = UXStates.Accession
+        self.Accession = ""
+    def EndAccession(self):
+        self.State = UXStates.Skip
+    def StartSequence(self, Attributes):
+        self.Sequence = ""
+        self.State = UXStates.Sequence
+    def EndSequence(self):
+        self.State = UXStates.Skip
+    def StartEntry(self, Attributes):
+        """
+        A new top-level <entry> tag for a protein record.  As we start the new record, we reset any
+        accumulated info.
+        """
+        self.Sequence = ""
+        self.Accession = ""
+        # PendingPTMs is a list of tuples of the form (Name, SequencePosition).
+        self.PendingPTMs = []
+    def EndEntry(self):
+        self.AddPendingPTMs()
+        self.EntryCount += 1
+        if self.EntryCount % 1000 == 0:
+            print "Handled entry #%d"%self.EntryCount
+    def HandleStringSequence(self, String):
+        "Handle the body of the <sequence> tag"
+        self.Sequence += str(String.strip())
+    def HandleStringAccession(self, String):
+        "Handle the body of the <accession> tag"
+        self.Accession += str(String.strip())
+    def StartFeature(self, Attributes):
+        """
+        Handle a <Feature>, ignoring it unless it's of type "modified residue".
+        """
+        Type = Attributes["type"].lower()
+        if Type != "modified residue":
+            return
+        self.CurrentModification = str(Attributes["description"])
+        self.State = UXStates.Feature
+    def EndFeature(self):
+        self.State = UXStates.Skip
+    def StartPosition(self, Attributes):
+        """
+        Handle tag of the form <position position="123"/>
+        """
+        if self.State == UXStates.Feature:
+            # Subtract 1, to go from 1-based to 0-based numbering:
+            Position = int(Attributes["position"]) - 1
+            # Add a PTM to our pending list:
+            self.PendingPTMs.append((self.CurrentModification, Position))
+            #print "Added PTM:", self.PendingPTMs[-1]
+
+
+class HPRDXStates:
+    Skipping = 0
+    Sequence = 1
+    PTMSite = 2
+
+class HPRDXMLParser(PTMXMLParser):
+    """
+    Parser for HPRD records.  Similar to UniprotXMLParser.
+    """
+    def __init__(self, DB):
+        self.DB = DB
+        self.EntryCount = 0
+        self.UnknownPTMDictionary = {}
+        self.PTMDictionary = {}
+        self.State = UXStates.Skip
+        self.StartHandlers = {"protein_sequence":self.StartSequence,
+                              "isoform":self.StartIsoform,
+                              "protein":self.StartProtein,
+                              "modification":self.StartModification,
+                              "ptm_site":self.StartPTMSite,}
+        self.EndHandlers = {"isoform":self.EndIsoform,
+                            "ptm_site":self.EndPTMSite,
+                            "protein_sequence":self.EndSequence}
+        self.StringHandlers = {HPRDXStates.Sequence: self.HandleStringSequence,
+                               HPRDXStates.PTMSite: self.HandleStringPTMSite
+                               }
+        self.DummyTable = string.maketrans("", "")
+        PTMXMLParser.__init__(self)
+    def StartSequence(self, Attributes):
+        self.Sequence = ""
+        self.State = HPRDXStates.Sequence
+    def EndSequence(self):
+        self.State = HPRDXStates.Skipping
+        self.Sequence = self.Sequence.upper()
+        #print "Obtained sequence of length %s"%len(self.Sequence)
+    def HandleStringSequence(self, String):
+        "Handle the body of the <sequence> tag"
+        try:
+            Block = str(String)
+        except:
+            print "wtf?"
+            print "%d: '%s'"%(len(String), String)
+            return
+        Block = Block.translate(self.DummyTable, " \r\n\t")
+        #Block = self.StripWhitespace(String).upper()
+        self.Sequence += Block
+    def HandleStringPTMSite(self, String):
+        self.CurrentSite += String
+##    def StripWhitespace(self, String):
+##        return String.translate(self.DummyTable, " \r\n\t")
+    def StartProtein(self, Attributes):
+        self.Accession = str(Attributes["id"])
+    def StartIsoform(self, Attributes):
+        """
+        Start a protein record.  Clear any accumuated data:
+        """
+        self.PendingPTMs = []
+        self.Sequence = ""
+        #self.Accession = ""
+    def StartModification(self, Attributes):
+        self.CurrentModType = str(Attributes["type"])
+        #print "START modification '%s'"%self.CurrentModType
+    def StartPTMSite(self, Attributes):
+        self.State = HPRDXStates.PTMSite
+        self.CurrentSite = ""
+    def EndPTMSite(self):
+        # subtract one, to go from 1-based to 0-based numbering.
+        #print "FINISH ptm_site"
+        Position = int(self.CurrentSite) - 1 
+        self.PendingPTMs.append((self.CurrentModType, Position))
+        self.State = HPRDXStates.Skipping
+    def EndIsoform(self):
+        """
+        End a protein record.  Save any accumulated modifications:
+        """
+        #print "END ISOFORM: add pending PTMs"
+        self.AddPendingPTMs()
+        self.EntryCount += 1
+        if self.EntryCount % 1000 == 0:
+            print "Handled entry #%d"%self.EntryCount
+
+        
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "(Warning: psyco not found, running non-optimized)"
+    Master = CompareMaster()
+    Master.Main()
diff --git a/ComputeFDR.jar b/ComputeFDR.jar
new file mode 100644
index 0000000..f0bdb24
Binary files /dev/null and b/ComputeFDR.jar differ
diff --git a/ComputeFScore.py b/ComputeFScore.py
new file mode 100644
index 0000000..badd27e
--- /dev/null
+++ b/ComputeFScore.py
@@ -0,0 +1,328 @@
+#Title:          PValue.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2010
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+This script, based on PeptideProphet, computes an F-Score.  The F-Score
+for a match is a weighted sum of the length-corrected MQScore and the delta score.
+
+Also, there is no length constraint on peptides.  An FScore is computed for all peptides.
+The FScore column of each peptide-spectrum match (PSM) is updated, but no p-value or FDR is caluclated.
+
+"""
+import os
+import sys
+import random
+import math
+import getopt
+import traceback
+import struct
+import ResultsParser
+import SelectProteins
+import Learning
+from Utils import *
+Initialize()
+
+class Defaults:
+    MQScoreWeight = 0.3
+    DeltaScoreWeight = 1.5
+    ###########################
+    BlindMQScoreWeight = 0.3
+    BlindDeltaScoreWeight = 1.5
+    
+    ###########################
+    
+
+BLIND_MOD_PENALTY = 1.0
+MIN_MQSCORE = -10.0
+
+# Parse the scores from at most this many output files.  
+MAX_RESULTS_FILES_TO_PARSE = 100
+
+BIN_MULTIPLIER = 10.0
+SQRT2PI = math.sqrt(2 * math.pi)
+
+Cof = [76.18009172947146, -86.50532032941677,
+    24.01409824083091, -1.231739572450155, 
+    0.1208650973866179e-2, -0.5395239384952e-5]
+
+class Bag:
+    pass
+
+class FScoreParser(ResultsParser.ResultsParser):
+    def __init__(self):
+        
+        self.VerboseFlag = 0
+        self.MQScoreWeight = Defaults.MQScoreWeight
+        self.DeltaScoreWeight = Defaults.DeltaScoreWeight
+        self.BlindFlag = 0
+        self.MaxDeltaScoreGap = -3.5
+        self.SplitByCharge = 0
+        self.Columns = ResultsParser.Columns()
+        ResultsParser.ResultsParser.__init__(self)
+
+    def ReadDeltaScoreDistribution(self, FilePath):
+        """
+        Read delta-scores from a file, to compute the average delta-score.
+        If passed a directory, iterate over all results files in the directory.
+        """
+        #
+        self.AllSpectrumCount2 = 0
+        self.AllSpectrumCount3 = 0
+        self.MeanDeltaScore2 = 0
+        self.MeanDeltaScore3 = 0
+        self.ProcessResultsFiles(FilePath, self.ReadDeltaScoreDistributionFromFile, MAX_RESULTS_FILES_TO_PARSE)
+        
+        if self.SplitByCharge == 1:
+            self.MeanDeltaScore2 /= max(1, self.AllSpectrumCount2)
+            self.MeanDeltaScore3 /= max(1, self.AllSpectrumCount3)
+            if self.VerboseFlag:
+                print "Mean delta score ch1..2: %s over %s spectra"%(self.MeanDeltaScore2, self.AllSpectrumCount2)
+                print "Mean delta score ch3: %s over %s spectra"%(self.MeanDeltaScore3, self.AllSpectrumCount3)
+            if not self.MeanDeltaScore2:
+                self.MeanDeltaScore2 = 0.001
+            if not self.MeanDeltaScore3:
+                self.MeanDeltaScore3 = 0.001
+        else:
+            self.MeanDeltaScore = (self.MeanDeltaScore2 + self.MeanDeltaScore3)/(max(1,self.AllSpectrumCount2+self.AllSpectrumCount3))
+            if self.VerboseFlag:
+                print "Mean delta score: %s over %s spectra"%(self.MeanDeltaScore, self.AllSpectrumCount2+self.AllSpectrumCount3)
+
+    def ReadDeltaScoreDistributionFromFile(self, FilePath):
+        "Read delta-scores from a single file, to compute the average delta-score."
+        print "Read delta-score distribution from %s..."%FilePath
+        try:
+            File = open(FilePath, "rb")
+        except:
+            traceback.print_exc()
+            return
+        OldSpectrum = None
+        for FileLine in File.xreadlines():
+            # Skip header lines and blank lines
+            if FileLine[0] == "#":
+                self.Columns.initializeHeaders(FileLine)
+                continue
+            if not FileLine.strip():
+                continue
+            Bits = list(FileLine.split("\t"))
+            if len(Bits) <= self.Columns.getIndex("DeltaScore"):
+                continue
+            try:
+                Charge = int(Bits[self.Columns.getIndex("Charge")])
+                MQScore = float(Bits[self.Columns.getIndex("MQScore")])
+                DeltaScore = float(Bits[self.Columns.getIndex("DeltaScore")])
+                Peptide = GetPeptideFromModdedName(Bits[self.Columns.getIndex("Annotation")])
+                Spectrum = (os.path.basename(Bits[self.Columns.getIndex("SpectrumFile")]), Bits[self.Columns.getIndex("Scan#")])
+            except:
+                traceback.print_exc()
+                print Bits
+                continue # header line
+            if Spectrum == OldSpectrum:
+                continue
+            
+            OldSpectrum = Spectrum
+            
+            if DeltaScore < 0:
+                print "## Warning: DeltaScore < 0!", Spectrum, FilePath
+                print DeltaScore
+                print MQScore
+                print Bits
+                raw_input()
+                continue
+            if Charge < 3:
+                self.AllSpectrumCount2 += 1
+                self.MeanDeltaScore2 += DeltaScore
+                
+            else:
+                self.AllSpectrumCount3 += 1
+                self.MeanDeltaScore3 += DeltaScore
+        File.close()            
+    def WriteMatchesForSpectrum(self, MatchesForSpectrum, OutFile):
+        
+        for Match in MatchesForSpectrum:
+            # Skip short matches:
+            Length = len(Match.Peptide.Aminos)
+            
+            if self.SplitByCharge:
+                if Match.Charge < 3:
+                    CurrMeanDeltaScore = self.MeanDeltaScore2
+                else:
+                    CurrMeanDeltaScore = self.MeanDeltaScore3
+
+            else:
+                CurrMeanDeltaScore = self.MeanDeltaScore
+
+            WeightedScore = self.MQScoreWeight * Match.MQScore + self.DeltaScoreWeight * (Match.DeltaScore / CurrMeanDeltaScore)
+            ScoreBin = int(round(WeightedScore * BIN_MULTIPLIER))
+            
+            Match.Bits[self.Columns.getIndex("F-Score")] = "%s"%WeightedScore
+    
+            OutFile.write(string.join(Match.Bits, "\t"))
+            OutFile.write("\n")
+
+    def WriteFixedScores(self, OutputPath):
+       
+        self.WriteScoresPath = OutputPath
+        # Make the output directory, if it doesn't exist already.
+        # Assume: OutputPath is a directory if ReadScoresPath is a directory,
+        # and OutputPath is a file if ReadScoresPath is a file.
+        if os.path.isdir(self.ReadScoresPath):
+            DirName = OutputPath
+        else:
+            DirName = os.path.split(OutputPath)[0]
+        try:
+            os.makedirs(DirName)
+        except:
+            pass
+        self.ProcessResultsFiles(self.ReadScoresPath, self.WriteFixedScoresFile)
+        
+
+    def WriteFixedScoresFile(self, Path):
+        if os.path.isdir(self.ReadScoresPath):
+            OutputPath = os.path.join(self.WriteScoresPath, os.path.split(Path)[1])
+        else:
+            OutputPath = self.WriteScoresPath
+        
+        try:
+            InFile = open(Path, "rb")
+            OutFile = open(OutputPath, "wb")
+            LineCount = 0
+            
+            OldSpectrum = None
+            MatchesForSpectrum = []
+            for FileLine in InFile:
+                # Lines starting with # are comments (e.g. header line), and are written out as-is:
+                if FileLine[0] == "#":
+                    self.Columns.initializeHeaders(FileLine)
+                    OutFile.write(FileLine)
+                    continue
+                Bits = list(FileLine.strip().split("\t"))
+                Match = Bag()
+                try:
+                    Match.Bits = Bits
+                    Match.Charge = int(Bits[self.Columns.getIndex("Charge")])
+                    Match.MQScore = float(Bits[self.Columns.getIndex("MQScore")])
+                    Match.DeltaScore = float(Bits[self.Columns.getIndex("DeltaScore")])
+                    Match.Peptide = GetPeptideFromModdedName(Bits[self.Columns.getIndex("Annotation")])
+                    Match.ProteinName = Bits[self.Columns.getIndex("Protein")]
+                except:
+                    continue
+                LineCount += 1
+                Spectrum = (Bits[0], Bits[1])
+                if Spectrum != OldSpectrum:
+                    self.WriteMatchesForSpectrum(MatchesForSpectrum, OutFile)
+                    MatchesForSpectrum = []
+                OldSpectrum = Spectrum
+                MatchesForSpectrum.append(Match)
+            # Finish the last spectrum:
+            self.WriteMatchesForSpectrum(MatchesForSpectrum, OutFile)
+            InFile.close()
+            OutFile.close()
+            
+            
+        except:
+            traceback.print_exc()
+            print "* Error filtering annotations from '%s' to '%s'"%(Path, OutputPath)
+
+    def Run(self):
+        self.ReadDeltaScoreDistribution(self.ReadScoresPath)
+        self.WriteFixedScores(self.WriteScoresPath)
+
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "r:w:cvb")
+        OptionsSeen = {}
+
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-b":
+                self.BlindFlag = 1
+                self.MQScoreWeight = Defaults.BlindMQScoreWeight
+                self.DeltaScoreWeight = Defaults.BlindDeltaScoreWeight
+                
+            elif Option == "-r":
+                self.ReadScoresPath  = Value
+            elif Option == "-w":
+                self.WriteScoresPath = Value
+            elif Option == "-c":
+                self.SplitByCharge = 1
+            elif Option == "-v":
+                self.VerboseFlag = 1
+            else:
+                print "** Unknown option:", Option, Value
+        # Check validity of options:
+        if not OptionsSeen.has_key("-r") or not OptionsSeen.has_key("-w"):
+            print "* Error: Missing Arguments"
+            return 0
+        # No major problems - return TRUE for success.
+        return 1
+
+UsageInfo = """
+ComputeFScore.py - Compute FScore based on match quality score (MQScore) and delta score.  
+Write out an updated results file.
+
+Required Parameters:
+ -r [FILENAME] Read results from filename (and fit the probability mixture
+    model to these results).  If the option value is a directory, we'll read
+    all the results-files from the directory.
+ -w [FILENAME] Write re-scored results to a file.
+ 
+Optional Parameters:
+ -c Split by charge (compute the FScore separately for charge 1 and 2, and for charge 3.
+ -b Results are from a blind search (not recommended)
+
+Internal use only:
+ -v Verbose output (for debugging)
+
+    
+Example:
+  ComputeFScore.py -r ShewanellaResults -w ShewanellaFiltered -c
+"""
+
+def Main(Parser = None):
+    global MAX_RESULTS_FILES_TO_PARSE
+    
+    if not Parser:
+        Parser = FScoreParser()
+        Result = Parser.ParseCommandLine(sys.argv[1:])
+        if not Result:
+            print UsageInfo
+            return
+        Parser.Run()
+    
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "psyco not found - running without optimization"
+    #TestMain()
+    Main()
diff --git a/ComputePTMFeatures.py b/ComputePTMFeatures.py
new file mode 100644
index 0000000..2e678e1
--- /dev/null
+++ b/ComputePTMFeatures.py
@@ -0,0 +1,943 @@
+#Title:          ComputePTMFeatures.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+Plan:
+Output a large collection of features for each post-translational modification accepted on a
+search of a part-bogus database.  All modifications on the bogus proteins are incorrect.
+An equivalent number of modifications on the non-bogus proteins are incorrect.  Let's compute
+a variety of features for the PTMs observed.
+
+Input:
+A collection of annotated spectra, output by SelectSites.py
+Output:
+A file listing all the observed modification sites, with various features computed.  
+
+Then, we train a model to distinguish between good (correct DB) and bad (incorrect DB)
+modifications.  Model types: LDA, logistic regression, SVM, etc.
+
+(Another possible experiment: Search unmodified spectra against a mutated database,
+judge correct precisely those modifications which "undo" the mutations)
+"""
+
+import os
+import sys
+import struct
+import traceback
+import getopt
+import MSSpectrum
+import PyInspect
+import random
+import shutil
+import time
+import math
+import cPickle
+import BasicStats
+import ResultsParser
+import BuildConsensusSpectrum
+import SpectralSimilarity
+import StripPTM
+random.seed(1)
+from Utils import *
+from TrainPTMFeatures import FormatBits
+Initialize()
+
+AMINO_ACIDS = "ACDEFGHIKLMNOPQRSTUVWY" # O and U are included, for now.
+INVALID_MASS = 99999
+
+# Retain at most this many spectra for an unmodified peptide:
+MAX_MODLESS_CLUSTER_SIZE = 100
+
+# For running the Python profiler:
+PROFILE_FLAG = 0
+
+class PeptideSpecies:
+    """
+    Peptides[(Annotation, Charge)] -> PeptideSpecies instance
+    The PeptideSpecies remembers a list of spectra, the modification position, the modification mass.
+    """
+    InstanceCount = 0
+    def __init__(self):
+        self.HitCount = 0
+        self.ModifiedFlag = 0
+        self.ModMass = 0
+        self.DBPos = 0
+        self.ModDBPos = 0
+        self.Spectra = []
+        self.Peptide = None
+        self.SpectrumCount = 0
+        PeptideSpecies.InstanceCount += 1
+    def __del__(self):
+        if PeptideSpecies:
+            PeptideSpecies.InstanceCount -= 1
+    def __str__(self):
+        return self.Annotation
+
+class SpectrumInfoClass(ResultsParser.ResultsParser):
+    """
+    Information about a single scan.  We remember only the info we'll need later.
+    """
+    InstanceCount = 0
+    def __init__(self, Bits, Trainer):
+        ResultsParser.ResultsParser.__init__(self)
+        
+        self.FileNameIndex = Trainer.RememberString(Trainer.CachedFilePaths, Bits[0])
+        self.MQScore = float(Bits[Trainer.Columns.getIndex("MQScore")])
+        self.DeltaScore = float(Bits[Trainer.Columns.getIndex("DeltaScore")])
+        self.ByteOffset = int(Bits[Trainer.Columns.getIndex("SpecFilePos")])
+        self.ScanNumber = int(Bits[Trainer.Columns.getIndex("Scan#")])
+        SpectrumInfoClass.InstanceCount += 1
+    def __cmp__(self, Other):
+        """
+        Sort from BEST to WORST match.
+        """
+        if self.MQScore > Other.MQScore:
+            return -1
+        if self.MQScore < Other.MQScore:
+            return 1
+        return 0
+    def __del__(self):
+        if SpectrumInfoClass:
+            SpectrumInfoClass.InstanceCount -= 1
+        
+class PTMFeatureComputer(ResultsParser.ResultsParser, ResultsParser.SpectrumOracleMixin):
+    def __init__(self):
+        self.PValueCutoff = 0.1 # default
+        self.ResultsFileName = None
+        self.DBPath = None
+        self.OutputDir = "PTM"
+        self.OutputPath = os.path.join(self.OutputDir, "PTMFeatures.txt")
+        self.ConsensusClusterDir = None
+        self.ConsensusSpectrumDir = None
+        # Peptides keeps a list of SpectrumInfo objects for each peptide species
+        # we've observed.  The keys have the form (Annotation, Charge) and the values
+        # are lists SpectrumInfo instance.
+        self.Peptides = {}
+        self.PTMs = {} # keys of the form (DBPos, Mass)
+        self.CoverageThreshold = 2 # at least this many spectra to consider a residue 'covered'.
+        self.QuickParseFlag = 0 # if true, then parse only the first n lines
+        self.PoolFlag = 0
+        self.ModelType = None
+        self.SisterProteins = {} # protein index -> sister protein's index
+        self.MZXMLOracle = {}
+        self.ModelTrainFilePath = "PTMFeatures.All.txt"
+        self.ModelTestFilePath = None
+        # Dictionary of unmodified peptides, for computing the coverage level:
+        self.UnmodifiedPeptides = {}
+        self.FeatureSelectionFlag = None
+        self.CachedFilePaths = []
+        self.CachedFixedFilePaths = []
+        self.StartOutputDBPos = 0
+        self.RequiredFileNameChunk = None
+        self.Columns = ResultsParser.Columns()
+        ResultsParser.ResultsParser.__init__(self)
+        ResultsParser.SpectrumOracleMixin.__init__(self)
+
+    def RememberString(self, StringList, NewString):
+        """
+        Return the index of NewString within StringList, adding to the list if necessary.
+        We keep a list of mzxml file names and store indexes into the list, to avoid
+        the memory hit required to store each occurrence of the name.
+        """
+        try:
+            Index = StringList.index(NewString)
+            return Index
+        except:
+            StringList.append(NewString)
+            return len(StringList) - 1
+    def LoadDB(self):
+        """
+        Load the database searched.  For future reference, we want the protein names as well.
+        """
+        # Populate self.DB with the contents of the .trie file
+        File = open(self.DBPath, "rb")
+        self.DB = File.read()
+        File.close()
+        # Populate self.ProteinNames and self.ProteinPositions by parsing the index file:
+        self.ProteinNames = []
+        self.ProteinPositions = []
+        IndexPath = os.path.splitext(self.DBPath)[0] + ".index"
+        File = open(IndexPath, "rb")
+        BlockSize = struct.calcsize("<qi80s")
+        while 1:
+            Block = File.read(BlockSize)
+            if not Block:
+                break
+            Tuple = struct.unpack("<qi80s", Block)
+            Name = Tuple[-1]
+            NullPos = Name.find("\0")
+            if NullPos != -1:
+                Name = Name[:NullPos]
+            self.ProteinNames.append(Name)
+            self.ProteinPositions.append(Tuple[1])
+        File.close()
+        # Initialize our coverage arrays:
+        self.Coverage = [0] * len(self.DB)
+        self.ModCoverage = [0] * len(self.DB)
+        self.PeptideCoverage = [0] * len(self.DB)
+        self.ModPeptideCoverage = [0] * len(self.DB)
+        # Find sister-proteins.
+        # A shuffled protein name should be the standard protein's name
+        # with the characters "XXX" prepended.  (If the standard protein name
+        # is very long, the last characters may "slide off the edge")
+        for IndexA in range(len(self.ProteinNames)):
+            Name = self.ProteinNames[IndexA]
+            SisterName = None
+            if Name[:3] == "XXX":
+                SisterName = Name[3:]
+            if SisterName:
+                for IndexB in range(len(self.ProteinNames)):
+                    Name = self.ProteinNames[IndexB][:77]
+                    if Name == SisterName:
+                        self.SisterProteins[IndexA] = IndexB
+                        self.SisterProteins[IndexB] = IndexA
+    def ComputeProteinCoverage(self):
+        """
+        Compute residue-level coverage.  And, compute
+        what fraction of each protein is covered. 
+        """
+        for Species in self.Peptides.values():
+            DBPos = Species.DBPos
+            if Species.ModifiedFlag:
+                for Pos in range(DBPos, DBPos + len(Species.Peptide.Aminos)):
+                    self.ModCoverage[Pos] += Species.SpectrumCount # count modified spectra
+                    self.ModPeptideCoverage[Pos] += 1 # count distinct peptide species
+            else:
+                for Pos in range(DBPos, DBPos + len(Species.Peptide.Aminos)):
+                    self.Coverage[Pos] += Species.SpectrumCount # count unmodified spectra
+                    self.PeptideCoverage[Pos] += 1 # count distinct unmodified peptide species
+        #########################################################
+        # Compute percentage of each protein that's covered:
+        self.ProteinCoverageLevels = []
+        for ProteinIndex in range(len(self.ProteinPositions)):
+            StartPos = self.ProteinPositions[ProteinIndex]
+            if ProteinIndex < len(self.ProteinPositions) - 1:
+                EndPos = self.ProteinPositions[ProteinIndex + 1]
+            else:
+                EndPos = len(self.DB)
+            #print "Protein %s (%s) from %s-%s"%(ProteinIndex, self.ProteinNames[ProteinIndex], StartPos, EndPos)
+            CoverFlags = 0
+            for Pos in range(StartPos, EndPos):
+                if self.Coverage[Pos] >= self.CoverageThreshold:
+                    CoverFlags += 1
+            ProteinLength = EndPos - StartPos
+            #print "  -> Coverage %s/%s = %s"%(CoverFlags, ProteinLength, CoverFlags / float(ProteinLength))
+            self.ProteinCoverageLevels.append(CoverFlags / float(ProteinLength))
+        # SAVE protein coverage levels:
+        CoveragePath = os.path.join(self.OutputDir, "Coverage.dat")
+        CoverageFile = open(CoveragePath, "wb")
+        for DBPos in range(len(self.DB)):
+            Str = struct.pack("<II", self.Coverage[DBPos], self.ModCoverage[DBPos])
+            CoverageFile.write(Str)
+        CoverageFile.close()
+        # Boost protein coverage levels based upon sister proteins:
+        for Index in range(len(self.ProteinNames)):
+            SisterIndex = self.SisterProteins.get(Index, None)
+            if SisterIndex == None:
+                continue
+            self.ProteinCoverageLevels[Index] = max(self.ProteinCoverageLevels[Index], self.ProteinCoverageLevels[SisterIndex])
+            #print "%s and %s are sisters, with coverage %s"%(self.ProteinNames[Index], self.ProteinNames[SisterIndex], self.ProteinCoverageLevels[Index])
+    def FixPeptideSpecies(self):
+        """
+        Iterate over all the peptide species we observed.  Strip "obviously unnecessary" PTMs.
+        """
+        Keys = self.Peptides.keys()
+        for Key in Keys:
+            Annotation = Key[0]
+            Species = self.Peptides[Key]
+            Result = StripPTM.StripNeedlessModifications(self.DB, Annotation)
+            if not Result:
+                continue
+            (DBPos, FixedAnnotation) = Result
+            # If the annotation wasn't changed, then continue.
+            if FixedAnnotation == Annotation:
+                continue
+            Species.Peptide = GetPeptideFromModdedName(FixedAnnotation)
+            Species.Annotation = FixedAnnotation
+            Species.DBPos = DBPos
+            ModKeys = Species.Peptide.Modifications.keys()
+            if len(ModKeys):
+                ModIndex = ModKeys[0]
+                Species.ModifiedFlag = 1
+                Species.ModMass = int(round(Species.Peptide.Modifications[ModIndex][0].Mass))
+                Species.ModDBPos = DBPos + ModIndex
+            else:
+                Species.ModifiedFlag = 0
+                Species.ModMass = 0
+                Species.ModDBPos = None
+            del self.Peptides[Key]
+            # Either merge into the existing species with this fixed annotation,
+            # or move into the empty pigeonhole:
+            FixedKey = (FixedAnnotation, Key[1])
+            OldSpecies = self.Peptides.get(FixedKey, None)
+            if OldSpecies:
+                OldSpecies.Spectra.extend(Species.Spectra)
+                OldSpecies.SpectrumCount += Species.SpectrumCount
+                if len(OldSpecies.Spectra) > MAX_MODLESS_CLUSTER_SIZE:
+                    OldSpecies.Spectra.sort()
+                    OldSpecies.Spectra = Species.Spectra[:MAX_MODLESS_CLUSTER_SIZE]
+            else:
+                self.Peptides[FixedKey] = Species
+                if len(Species.Spectra) > MAX_MODLESS_CLUSTER_SIZE:
+                    Species.Spectra.sort()
+                    Species.Spectra = Species.Spectra[:MAX_MODLESS_CLUSTER_SIZE]
+    def ParsePTMsFromResultsFile(self, FilePath):
+        """
+        Callback for parsing one Inspect results-file.  Our job here
+        is to populate self.BestSpectra, and self.PTMs.
+        Note: It's POSSIBLE that we'll spot some modified annotations
+        which can be "trivially fixed" to produce unmodified annotations.
+        Examples: T.T+101LAPTTVPITSAK.A, Y.E+163NPNFTGK.K
+        Because of this, we keep a dictionary self.FixedAnnotation,
+        where keys are raw annotations and values are fixed-up annotations.
+        """
+        if self.RequiredFileNameChunk:
+            Pos = FilePath.find(self.RequiredFileNameChunk)
+            if Pos == -1:
+                return
+        if os.path.isdir(FilePath):
+            print "NOTE: Skipping results sub-directory '%s'"%FilePath
+            return
+        try:
+            File = open(FilePath, "rb")
+        except:
+            print "** Unable to open results file '%s'"%FilePath
+            return
+        LineNumber = 0
+        OldSpectrum = None
+        for FileLine in File.xreadlines():
+            LineNumber += 1
+            if LineNumber % 1000 == 0:
+                print "  Line %s..."%LineNumber
+                if self.QuickParseFlag:
+                    break
+            if FileLine[0] == "#":
+                self.Columns.initializeHeaders(FileLine)
+                continue
+            Bits = FileLine.strip().split("\t")
+            if len(Bits) < 15:
+                continue # not valid!
+            Spectrum = (Bits[0], Bits[1])
+            if Spectrum == OldSpectrum:
+                continue
+            OldSpectrum = Spectrum
+            PValue = float(Bits[self.Columns.getIndex("InspectFDR")])
+            if PValue > self.PValueCutoff:
+                continue
+            Annotation = Bits[self.Columns.getIndex("Annotation")]
+            Charge = int(Bits[self.Columns.getIndex("Charge")])
+            AnnotationKey = (Annotation, Charge)
+            ##############################################################
+            # If we've never seen this annotation before, then create a PeptideSpecies object
+            # and record it in self.Peptides
+            Species = self.Peptides.get(AnnotationKey, None)
+            if not Species:
+                Species = PeptideSpecies()
+                Species.Peptide = GetPeptideFromModdedName(Annotation)
+                Mods = []
+                for (Index, List) in Species.Peptide.Modifications.items():
+                    for Mod in List:
+                        Mods.append((Index, Mod))
+                if len(Mods):
+                    Species.ModifiedFlag = 1
+                    Species.ModMass = int(Mods[0][1].Mass)
+                    Species.ModAA = Species.Peptide.Aminos[Mods[0][0]]
+                else:
+                    Species.ModifiedFlag = 0
+                Species.ProteinName = Bits[self.Columns.getIndex("Protein")]
+                self.Peptides[AnnotationKey] = Species
+                # Get the database position of the peptide:
+                Species.DBPos = self.DB.find(Species.Peptide.Aminos)
+                if len(Species.Peptide.Modifications.keys()):
+                    ModIndex = Species.Peptide.Modifications.keys()[0]
+                    Species.ModDBPos = Species.DBPos + ModIndex
+                else:
+                    Species.ModDBPos = None
+                # Get the residue-number of the peptide:
+                StarPos = self.DB.rfind("*", 0, Species.DBPos)
+                if StarPos == -1:
+                    Species.ResidueNumber = Species.DBPos
+                else:
+                    Species.ResidueNumber = Species.DBPos - StarPos
+                Species.Annotation = Annotation
+                Species.Charge = Charge
+            if Species.DBPos == -1:
+                print "* skipping unknown peptide: %s"%Annotation
+                del self.Peptides[AnnotationKey] # remove the Species that was just created!
+                continue
+            MQScore = float(Bits[self.Columns.getIndex("MQScore")])
+            self.AnnotationCount += 1
+            ##############################################################
+            # Populate Species.Spectra:
+            try:
+                Info = SpectrumInfoClass(Bits, self)
+            except:
+                print "** Error: Couldn't parse spectrum info from line %s of file %s"%(LineNumber, FilePath)
+                traceback.print_exc()
+                continue
+            Species.Spectra.append(Info)
+            Species.SpectrumCount += 1
+            if not Species.ModifiedFlag:
+                if len(Species.Spectra) > MAX_MODLESS_CLUSTER_SIZE:
+                    Species.Spectra.sort()
+                    Species.Spectra = Species.Spectra[:MAX_MODLESS_CLUSTER_SIZE]
+            else:
+                pass
+        File.close()
+    def WipeDir(self, Dir):
+        try:
+            shutil.rmtree(Dir)
+        except:
+            pass
+    def ComputeFeaturesMain(self):
+        """
+        Main method:
+        - Load the database searched
+        - Iterate over the results-file, to get a list of PTMs
+        - Iterate over the PTMs, and write out features for each one.
+        """
+        self.ConsensusClusterDir = os.path.join(self.OutputDir, "Clusters")
+        self.ClusterScanListDir = os.path.join(self.OutputDir, "ClusterMembers")
+        self.ConsensusSpectrumDir = os.path.join(self.OutputDir, "Spectra")
+        if not self.StartOutputDBPos:
+            # Make sure necessary directories exist, and clean up any OLD output:
+            print "Prepare cluster directories..."
+            self.WipeDir(self.ConsensusClusterDir)
+            self.WipeDir(self.ConsensusSpectrumDir)
+            self.WipeDir(self.ClusterScanListDir)
+            MakeDirectory(self.ConsensusClusterDir)
+            MakeDirectory(self.ConsensusSpectrumDir)
+            MakeDirectory(self.ClusterScanListDir)
+            for AA in AMINO_ACIDS:
+                PathA = os.path.join(self.ConsensusClusterDir, AA)
+                PathB = os.path.join(self.ConsensusSpectrumDir, AA)
+                PathC = os.path.join(self.ClusterScanListDir, AA)
+                for Path in (PathA, PathB, PathC):
+                    MakeDirectory(Path)
+        else:
+            print "CONTINUING ComputePTMFeatures from DBPosition %s"%self.StartOutputDBPos
+        print "Load database..."
+        self.LoadDB()
+        print "Parse annotations..."
+        self.AnnotationCount = 0
+        self.PTMAnnotationCount = 0
+        self.BestModlessHits = {}
+        self.ProcessResultsFiles(self.ResultsFileName, self.ParsePTMsFromResultsFile)
+        # Fix annotations:
+        self.FixPeptideSpecies()
+        # Fix file paths:
+        for FilePath in self.CachedFilePaths:
+            FixedPath = self.FixSpectrumPath(FilePath)
+            self.CachedFixedFilePaths.append(FixedPath)
+        self.PairModifiedUnmodifiedPeptides()
+        print "Produce CONSENSUS SPECTRA for modified and unmodified petpides..."
+        StartTime = time.clock()
+        self.ProduceConsensusSpectra()
+        EndTime = time.clock()
+        print "Elapsed time: %s"%(EndTime - StartTime)
+        print "Compute protein coverage..."
+        self.ComputeProteinCoverage()
+        print "Count spectra (and sites) by PTM type..."
+        self.ComputeTotalSpectraForModType()
+        print "Generate non-redundant PTM list..."
+        self.ListDistinctPTMs()
+        print "Compute features and output PTM info..."
+        self.ComputeFeaturesAllPTMs()
+    def PairModifiedUnmodifiedPeptides(self):
+        for Species in self.Peptides.values():
+            if not Species.ModifiedFlag:
+                continue
+            ModlessAnnotation = "%s.%s.%s"%(Species.Peptide.Prefix, Species.Peptide.Aminos, Species.Peptide.Suffix)
+            ModlessKey = (ModlessAnnotation, Species.Charge)
+            ModlessSpecies = self.Peptides.get(ModlessKey, None)
+            Species.Modless = ModlessSpecies
+    def ListDistinctPTMs(self):
+        """
+        Populate self.PTMs; keys are (DBPos, Mass) and values are simple objects
+        with lists of peptide species.
+        """
+        for Species in self.Peptides.values():
+            if not Species.ModifiedFlag:
+                continue
+            Index = Species.Peptide.Modifications.keys()[0]
+            ModifiedPos = Species.DBPos + Index
+            Key = (ModifiedPos, Species.ModMass)
+            if not self.PTMs.has_key(Key):
+                PTM = Bag()
+                PTM.SpeciesList = []
+                self.PTMs[Key] = PTM
+                ModIndex = Species.Peptide.Modifications.keys()[0]
+                PTM.DBPos = Species.ModDBPos
+            PTM.SpeciesList.append(Species)
+            Species.PTM = PTM
+    def ComputeTotalSpectraForModType(self):
+        """
+        Populate a dictionary of the form (AA, Mass) -> SpectrumCount.  If a modificaiton is seen
+        at multiple sites, it is more likely to be valid.
+        """
+        self.ModTypeSpectrumCount = {}
+        self.ModTypeSiteCount = {}
+        for Species in self.Peptides.values():
+            if Species.ModifiedFlag:
+                ModTypeKey = (Species.ModAA, Species.ModMass)
+                self.ModTypeSpectrumCount[ModTypeKey] = self.ModTypeSpectrumCount.get(ModTypeKey, 0) + Species.SpectrumCount
+                self.ModTypeSiteCount[ModTypeKey] = self.ModTypeSiteCount.get(ModTypeKey, 0) + 1
+        pass
+    def OutputPTMInfoHeader(self):
+        """
+        Output column headers, plus some general-purpose information such as the number
+        of spectra parsed and the database size.
+        """
+        Header = "#Group\tDBPosition\tMass\tAminoAcid\tProtein\tResidueNumber\t"
+        Header += "Peptide\tCharge\tValidProteinFlag\tFacultativeFlag\tBestSpectrum\t"
+        Header += "BestModlessSpectrum\tBestModlessMQScore\tBigDBAnn\tBigDBScore\tSpectra\tModlessSpectra\tBestMQScore\t"
+        Header += "BestDeltaScore\tPeptideCount\tConsensusMQScore\tPeptideLength\tCutScoreTotal\t"
+        Header += "MedianCutScore\tYPresent\tBPresent\tBYIntensity\tNTT\tModdedFraction\tProteinCoverage\t"
+        Header += "SpectraThisModType\tSitesThisModType\tUnmodifiedPeptideCount\tDot0.5\tShared01\tShared11\t"
+        Header += "Correlation\tLogSpectrumCount\tLogPeptideLength\tLogSpecThisType\tLogSitesThisType\t"
+        Header += "DeltaVsBigDB\tModelScore\tModelPValue\tSitePValue\tKnownModType\tKnownModAnnotation\t"
+        Header += "KnownModScore\tKnownModSitePValue\t"
+        self.OutputFile.write(Header + "\n")
+        # Two more header lines, for feature-numbers and column-numbers:
+        Header = "#0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15\t16\t17\t18\t19\t20\t21\t22\t23\t24\t25\t26\t27\t28\t29\t30\t31\t32\t33\t34\t35\t36\t37\t38\t39\t40\t41\t42\t43\t44\t45\t46\t47\t48\t49\t"
+        self.OutputFile.write(Header + "\n")
+        Header = "#Feature\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\t11\t12\t13\t14\t15\t16\t17\t18\t19\t20\t21\t22\t23\t24\t25\t26\t"
+        self.OutputFile.write(Header + "\n")
+        ProteinRecordCount = self.DB.count("*")
+        DBResidueSize = len(self.DB) - ProteinRecordCount
+        self.OutputFile.write("#DatabaseSize\t%s\t\n"%DBResidueSize)
+        self.OutputFile.write("#AnnotationCount\t%s\t\n"%self.AnnotationCount)
+        SiteCount = len(self.PTMs.values())
+        self.OutputFile.write("#SiteCount\t%s\t\n"%SiteCount)
+    def ProduceConsensusSpectra(self):
+        """
+        We adopt a brute-force strategy: Output a consensus spectrum for each
+        modified peptide species.  And, if the equivalent unmodified peptide
+        was observed, then output a consensus spectrum for the unmodified
+        peptide, too.
+        We write a consensus CLUSTER of each modified peptide species.  
+        Later on in processing, we may try MERGING two of these clusters
+        (e.g. EAM+16APK, EAMA+16PK)
+        """
+        ClustersBuilt = {} # keep track of clusters that have ALREADY been built
+        Keys = self.Peptides.keys()
+        for PeptideIndex in range(len(Keys)):
+            if (PeptideIndex % 100 == 0):
+                print "For peptide %s/%s..."%(PeptideIndex, len(Keys))
+            AnnotationKey = Keys[PeptideIndex]
+            (Annotation, Charge) = AnnotationKey
+            Species = self.Peptides[AnnotationKey]
+            if not Species.ModifiedFlag:
+                continue
+            Species.ClusterPath = os.path.join(self.ConsensusClusterDir, Annotation[2], "%s.%s.cls"%(Annotation.replace("*", "-"), Charge))
+            Species.ConsensusPath = os.path.join(self.ConsensusSpectrumDir, Annotation[2], "%s.%s.dta"%(Annotation.replace("*", "-"), Charge))
+            ClusterContentPath = os.path.join(self.ClusterScanListDir, Annotation[2], "%s.%s.txt"%(Annotation.replace("*", "-"), Charge))
+            ClusterContentFile = open(ClusterContentPath, "wb")
+            #print "Creating consensus file %s"%(Species.ConsensusPath)
+            #raw_input()
+            Builder = BuildConsensusSpectrum.ConsensusBuilder(Species.Charge)
+            MeanMQ = 0
+            for Info in Species.Spectra:
+                MeanMQ += Info.MQScore
+            MeanMQ /= float(len(Species.Spectra))
+            ValidSpectra = 0
+            for Info in Species.Spectra:
+                # Omit from the consensus spectra with very poor scores:
+                if Info.MQScore < MeanMQ - 3.0:
+                    continue
+                SpectrumFilePath = self.CachedFixedFilePaths[Info.FileNameIndex]
+                # Keep track of where these scans came from:
+                ClusterContentFile.write("%s\t%s\t\n"%(SpectrumFilePath, Info.ByteOffset))
+                Spectrum = MSSpectrum.SpectrumClass()
+                SpectrumFile = open(SpectrumFilePath, "rb")
+                SpectrumFile.seek(Info.ByteOffset)
+                Spectrum.ReadPeaksFromFile(SpectrumFile, SpectrumFilePath)
+                if not Spectrum.PrecursorMZ:
+                    print "* Error: Unable to read spectrum from '%s:%s'"%(SpectrumFilePath, Info.ByteOffset)
+                    continue
+                ValidSpectra += 1
+                Spectrum.SetCharge(Charge)
+                SpectrumFile.close()
+                Builder.AddSpectrum(Spectrum)
+                # Special (and easy) case: If we only saw one spectrum, then write it
+                # out without changing it!
+                if len(Species.Spectra) == 1:
+                    Spectrum.WritePeaks(Species.ConsensusPath)
+            # Write the modded cluster to disk, since we may try to augment it later:
+            Builder.PickleCluster(Species.ClusterPath)
+            if len(Species.Spectra) > 1:
+                Spectrum = Builder.ProduceConsensusSpectrum()
+                Spectrum.WritePeaks(Species.ConsensusPath)
+            ClusterContentFile.close()
+            # If we have unmodified peptides for this species, build their cluster:
+            if Species.Modless:
+                Species.Modless.ConsensusPath = os.path.join(self.ConsensusSpectrumDir, Species.Modless.Annotation[2], "%s.%s.dta"%(Species.Modless.Annotation.replace("*", "-"), Charge))
+                Species.Modless.ClusterPath = os.path.join(self.ConsensusClusterDir, Species.Modless.Annotation[2], "%s.%s.cls"%(Species.Modless.Annotation.replace("*", "-"), Charge))
+                ModlessKey = (Species.Modless.Annotation, Species.Modless.Charge)
+                if ClustersBuilt.has_key(ModlessKey):
+                    pass
+                else:
+                    ModlessMeanMQ = 0
+                    for Info in Species.Modless.Spectra:
+                        ModlessMeanMQ += Info.MQScore
+                    ModlessMeanMQ /= float(len(Species.Modless.Spectra))
+                    Builder = BuildConsensusSpectrum.ConsensusBuilder(Species.Charge)
+                    for Info in Species.Modless.Spectra:
+                        # Omit from the consensus spectra with very poor scores:
+                        if Info.MQScore < ModlessMeanMQ - 3.0:
+                            continue
+                        SpectrumFilePath = self.CachedFixedFilePaths[Info.FileNameIndex]
+                        Spectrum = MSSpectrum.SpectrumClass()
+                        SpectrumFile = open(SpectrumFilePath, "rb")
+                        SpectrumFile.seek(Info.ByteOffset)
+                        Spectrum.ReadPeaksFromFile(SpectrumFile, SpectrumFilePath)
+                        Spectrum.SetCharge(Charge)
+                        SpectrumFile.close()
+                        Builder.AddSpectrum(Spectrum)
+                    Spectrum = Builder.ProduceConsensusSpectrum()
+                    Spectrum.WritePeaks(Species.Modless.ConsensusPath)
+                    Builder.PickleCluster(Species.Modless.ClusterPath)
+                    ClustersBuilt[ModlessKey] = 1
+    def ComputeFeaturesAllPTMs(self):
+        """
+        Compute, and output, features for each modification site.
+        """
+        self.OutputFile = open(self.OutputPath, "wb")
+        self.OutputPTMInfoHeader()
+        # Use self.ConsensusCreatedFlags to flag which unmodified peptides we have
+        # already generated consensus spectra for, so that we don't do the same one twice and waste time:
+        self.ConsensusCreatedFlags = {}
+        # Order the peptides by (ModDBPos, Annotation, Charge).  It's important to order things
+        # in this way so that, when we combine the *large* output files for HEK293, we can keep
+        # consistent 'cursors' moving through each of our input files.
+        print "Sorting %s peptides..."%len(self.Peptides.values())
+        SortedKeys = []
+        for Peptide in self.Peptides.values():
+            Key = (Peptide.ModDBPos, Peptide.Annotation, Peptide.Charge)
+            SortedKeys.append(Key)
+        SortedKeys.sort()
+        for KeyIndex in range(len(SortedKeys)):
+            (ModDBPos, Annotation, Charge) = SortedKeys[KeyIndex]
+            Key = (Annotation, Charge)
+            Species = self.Peptides[Key]
+            if not Species.ModifiedFlag:
+                continue
+            if Species.DBPos < self.StartOutputDBPos:
+                continue
+            print "(%s/%s) PTM: %+d on db residue %d"%(KeyIndex, len(SortedKeys), Species.ModMass, Species.DBPos + Species.Peptide.Modifications.keys()[0])
+            sys.stdout.flush()
+            try:
+                Features = self.ComputePTMFeatures(Species)
+            except:
+                traceback.print_exc()
+                print "** Error: Unable to compute PTM features for %s"%Species
+                continue
+            Str = "%s\t"%self.OutputPath
+            Str += "%s\t%+d\t%s\t"%(Species.ModDBPos, Species.ModMass, Species.ModAA) 
+            Str += "%s\t"%Species.ProteinName
+            Str += "%s\t"%(Species.ResidueNumber + Species.Peptide.Modifications.keys()[0])
+            Str += "%s\t"%Species.Annotation
+            Str += "%s\t"%Species.Charge
+            for Feature in Features:
+                Str += "%s\t"%Feature
+            print Str
+            self.OutputFile.write(Str + "\n")
+            # We're done with this PTM now, so let's forget about it:
+            del self.Peptides[Key]
+        self.OutputFile.close()
+    def ComputePTMFeatures(self, Species):
+        """
+        Compute scoring-features for this peptide species, return them as a list
+        """
+        Features = []
+        # Feature: Is the PTM from a valid protein?  (Note: this feature is not INPUT for the
+        # model, it's our desired output)
+        Feature = self.GetValidProteinFlag(Species)
+        Features.append(Feature)
+        # Important question: Is this PTM constituitive, or facultative?  (In other words:
+        # is there a spectra annotated with an UNMODIFIED peptide for this PTM type?)
+        # Set flag to "1" if the PTM is facultative:
+        if Species.Modless:
+            Features.append("1")
+        else:
+            Features.append("")
+        # The best spectrum observed (meta-data, not a scoring feature)
+        BestMQScore = -999
+        BestDeltaScore = None
+        for Info in Species.Spectra:
+            if Info.MQScore > BestMQScore:
+                BestMQScore = Info.MQScore
+                BestDeltaScore = Info.DeltaScore
+                FilePath = self.CachedFixedFilePaths[Info.FileNameIndex]
+                BestMQSpectrum = ("%s:%s"%(FilePath, Info.ByteOffset))
+        Features.append(BestMQSpectrum)
+        # The best MODLESS spectrum observed (meta-data, not a scoring feature)
+        if Species.Modless:
+            BestMQScoreModless = -999
+            for Info in Species.Modless.Spectra:
+                if Info.MQScore > BestMQScoreModless:
+                    BestMQScoreModless = Info.MQScore
+                    FilePath = self.CachedFixedFilePaths[Info.FileNameIndex]
+                    BestMQSpectrum = ("%s:%s"%(FilePath, Info.ByteOffset))
+            Features.append(BestMQSpectrum)
+            Features.append(str(BestMQScoreModless))
+        else:
+            Features.append("")
+            Features.append("")
+        # Feature: Annotation, and MQScore, from a search versus big-DB.  (This feature
+        # will be spiked in later)
+        Features.append("")
+        Features.append("")
+        # Feature: Number of spectra annotated with this PTM
+        Features.append(Species.SpectrumCount)
+        # Feature: Number of spectra for the *unmodified* peptide version:
+        if Species.Modless:
+            Feature = Species.Modless.SpectrumCount
+            Features.append(Feature)
+        else:
+            Features.append(0)
+        # Feature: Best MQScore for this PTM, and the best delta-score for that scan:
+        Features.append(BestMQScore)
+        Features.append(BestDeltaScore)
+        # Feature: Number of peptide species observed for this PTM on this residue
+        Features.append(len(Species.PTM.SpeciesList))
+        # Feature: Consensus annotation score (and score-features) for this peptide
+        Species.ConsensusScore = None
+        self.GetConsensusMQScore(Species, Features)
+        # Feature: Presence of unmodified peptides covering the residue of interest
+        ModlessCount = self.Coverage[Species.PTM.DBPos]
+        ModdedSpectrumCount = self.ModCoverage[Species.PTM.DBPos]
+        ModdedFraction = ModdedSpectrumCount / float(ModlessCount + ModdedSpectrumCount)
+        Features.append(ModdedFraction)
+        # Feature: Coverage of the protein of interest (ONLY FOR FACULTATIVE!)
+        ProteinIndex = self.GetProteinIndex(Species.DBPos)
+        ProteinCoverage = self.ProteinCoverageLevels[ProteinIndex]
+        Features.append(ProteinCoverage)
+        # Feature: Number of annotations using this modification-type
+        ModTypeKey = (Species.ModAA, Species.ModMass)
+        ModTypeSpectrumCount = self.ModTypeSpectrumCount.get(ModTypeKey, 0)
+        Features.append(ModTypeSpectrumCount)
+        # Feature: Number of sites using this modification-type
+        ModTypeSiteCount = self.ModTypeSiteCount.get(ModTypeKey, 0)
+        Features.append(ModTypeSiteCount)
+        # Feature: Number of unmodified peptide species for this site
+        ModlessPeptides = self.PeptideCoverage[Species.DBPos]
+        Features.append(ModlessPeptides)
+        # Features for FACULTATIVE PTMs only:
+        # These features have been commented out, since we no longer pursue a
+        # special model for facultative PTMs.
+        if 0: #Species.Modless:
+            Comparator = SpectralSimilarity.SpectralSimilarity(Species.ConsensusPath,
+               Species.Modless.ConsensusPath, Species.Annotation, Species.Modless.Annotation)
+            Comparator.LabelPeaks(0.5)
+            Similarity = Comparator.DotProduct(0.5, HashByRank = 1)
+            Features.append(Similarity)
+            Similarity = Comparator.GetSharedPeakCount(0, 1)
+            Features.append(Similarity)
+            Similarity = Comparator.GetSharedPeakCount(1, 1)
+            Features.append(Similarity)
+            CorrelationCoefficient = Comparator.ComputeCorrelationCoefficient(1.0)
+            Features.append(CorrelationCoefficient)
+            del Comparator
+        else:
+            # This PTM is constitutive, so omit the spectrum-comparison features:
+            Features.append("") # dot
+            Features.append("") # shared-peaks
+            Features.append("") # shared-peaks
+            Features.append("") # correlation
+        # Feature: Log of spectrum-count
+        Features.append(math.log(1.0 + Species.SpectrumCount))
+        # Feature: Log of peptide-length
+        Features.append(math.log(len(Species.Peptide.Aminos)))
+        # Feature: Log of same-modtype-spectrum-count
+        Features.append(math.log(1.0 + ModTypeSpectrumCount))
+        # Feature: Log of same-modyupe-site-count
+        Features.append(math.log(1.0 + ModTypeSiteCount))
+        # Feature: Delta-score versus big-db search result.  To be spiked in later!
+        Features.append("")
+        # Free the PySpectrum object now:
+        Species.PySpectrum = None
+        return Features
+    def GetValidProteinFlag(self, PTM):
+        # Normally we prepend "xxx" to the bogus names:
+        if PTM.ProteinName[:3] == "XXX":
+            return 0
+        return 1
+    def GetProteinIndex(self, DBPos):
+        for ProteinIndex in range(len(self.ProteinPositions)):
+            Pos = self.ProteinPositions[ProteinIndex]
+            if Pos > DBPos:
+                return ProteinIndex - 1
+        return len(self.ProteinPositions) - 1
+    def PreComputeAminosForMasses(self):
+        """
+        PepNovo often gives us partial interpretations - e.g. a
+        peptide that starts at 250Da.  We "fill in" the prefix and
+        suffix to generate a (not necessarily optimal) full-length
+        peptide.
+        """
+        Aminos = "ACDEFGHILMNPQSTVWYRK" # PREFER ending in R or K.
+        self.AAStrings = {}
+        TotalMass = 0
+        for AA1 in Aminos:
+            Mass1 = Global.AminoMass[AA1]
+            TotalMass = int(round(Mass1))
+            self.AAStrings[TotalMass] = "%c"%(AA1)
+            for AA2 in Aminos:
+                Mass2 = Global.AminoMass[AA2]
+                TotalMass = int(round(Mass1 + Mass2))
+                self.AAStrings[TotalMass] = "%c%c"%(AA1, AA2)
+                for AA3 in Aminos:
+                    Mass3 = Global.AminoMass[AA3]
+                    TotalMass = int(round(Mass1 + Mass2 + Mass3))
+                    self.AAStrings[TotalMass] = "%c%c%c"%(AA1, AA2, AA3)
+                    for AA4 in Aminos:
+                        Mass4 = Global.AminoMass[AA4]
+                        TotalMass = int(round(Mass1 + Mass2 + Mass3 + Mass4))
+                        self.AAStrings[TotalMass] = "%c%c%c%c"%(AA1, AA2, AA3, AA4)
+    def AddSpectrumToCluster(self, InputFilePath, InputFilePos, ClusterFile, Charge):
+        """
+        Append the specified scan to an ever-growing .mgf file
+        Returns 1 if successful, 0 if failed
+        """
+        try:
+            SpectrumFile = open(InputFilePath, "rb")
+        except:
+            print "** Error: couldn't open spectrum data file %s"%InputFilePath
+            return 0
+        SpectrumFile.seek(InputFilePos)
+        Spectrum = MSSpectrum.SpectrumClass()
+        try:
+            Spectrum.ReadPeaksFromFile(SpectrumFile, InputFilePath)
+        except:
+            traceback.print_exc()
+            print "***Can't parse:", InputFilePath, FileOffset
+            return 0
+        SpectrumFile.close()
+        ParentMass = Spectrum.PrecursorMZ * Charge - (Charge - 1)*1.0078 #Peptide.Masses[-1] + 19
+        #MZ = (ParentMass + (Info.Charge - 1)*1.0078) / Info.Charge
+        # Now write out this spectrum to the cluster:
+        self.ClusterScanNumber += 1 # ASSUMED: The caller set this to 0 at the start of the cluster!
+        ClusterFile.write("BEGIN IONS\n")
+        ClusterFile.write("TITLE=%s:%s\n"%(InputFilePath, InputFilePos))
+        ClusterFile.write("SCAN=%s\n"%self.ClusterScanNumber)
+        ClusterFile.write("CHARGE=%s\n"%Charge)
+        ClusterFile.write("PEPMASS=%s\n"%ParentMass)
+        for Peak in Spectrum.Peaks:
+            ClusterFile.write("%s %s\n"%(Peak.Mass, Peak.Intensity))
+        ClusterFile.write("END IONS\n")
+        #ClusterFile.close()
+        return 1
+    def GetConsensusMQScore(self, Species, Features):
+        """
+        Feature: MQScore of the consensus spectrum.
+        - Write specra to a cluster (done by ProduceConsensusSpectra)
+        - Generate a consensus-spectrum for the cluster (done by ProduceConsensusSpectra)
+        - Load the consensus-spectrum
+        - Score the spectrum
+        """
+        # Load in the consensus spectrum, and score the peptide annotation:
+        try:
+            print ">>PyConsensus spectrum:", Species.ConsensusPath
+            PySpectrum = PyInspect.Spectrum(Species.ConsensusPath, 0)
+            Species.PySpectrum = PySpectrum
+            print ">>ScorePeptideDetailed(%s)"%Species.Annotation
+            ScoreList = PySpectrum.ScorePeptideDetailed(Species.Annotation)
+            Species.ConsensusScore = ScoreList[0]
+            for ScoreItem in ScoreList:
+                Features.append(ScoreItem)
+            print "PyInspect score %s -> %s"%(Species.Annotation, ScoreList[0])
+        except:
+            traceback.print_exc()
+            for X in range(8):
+                Features.append(0)
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "d:r:w:s:M:lp:c:Z:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-r":
+                # -r results file(s)
+                self.ResultsFileName = Value
+            elif Option == "-c":
+                self.RequiredFileNameChunk = Value
+            elif Option == "-d":
+                self.DBPath = Value
+            elif Option == "-M":
+                self.PopulateSpectrumOracle(Value)
+            elif Option == "-w":
+                self.OutputDir = Value
+                self.OutputPath = os.path.join(self.OutputDir, "PTMFeatures.txt")
+            elif Option == "-l":
+                self.QuickParseFlag = 1
+            elif Option == "-s" or Option == "-M":
+                self.PopulateSpectrumOracle(Value)
+                #self.SpectrumDir = Value
+            elif Option == "-p":
+                self.PValueCutoff = float(Value)
+            elif Option == "-Z": # secret debugging option: Start output from DB position
+                self.StartOutputDBPos = int(Value)
+            else:
+                print "* Error: Unrecognized option %s"%Option
+            
+UsageInfo = """
+ComputePTMFeatures: Generate feature values for PTMs observed on a data-set.
+Run this AFTER running SelectSites, and BEFORE running TrainPTMFeatures.
+
+Arguments:
+ -r [ResultsFile]: Name of the results file (or directory)
+ -d [DBPath]: Path to the .trie file searched
+ -w [OutputDir]: Output file directory.  Features are written to
+    PTMFeatures.txt within this directory.  Clusters and other info
+    is written in (or below) this directory.
+ -M [RootDir]: Root directory for mzXML files. 
+"""
+           
+if __name__ == "__main__":
+    if not PROFILE_FLAG:
+        try:
+            import psyco
+            psyco.full()
+        except:
+            print "(psyco not installed; running unoptimized)"
+    Trainer = PTMFeatureComputer()
+    Trainer.ParseCommandLine(sys.argv[1:])
+    if not Trainer.ResultsFileName or not Trainer.DBPath:
+        print UsageInfo
+        sys.exit(-1)
+    if PROFILE_FLAG:
+        import profile
+        profile.run("Trainer.ComputeFeaturesMain()")
+    else:
+        Trainer.ComputeFeaturesMain()
+    
+    
diff --git a/Database/CommonContaminants.fasta b/Database/CommonContaminants.fasta
new file mode 100644
index 0000000..c0ed40b
--- /dev/null
+++ b/Database/CommonContaminants.fasta
@@ -0,0 +1,20 @@
+>TRYP_PIG Porcine trypsin - Sus scrofa (Pig).
+FPTDDDDKIVGGYTCAANSIPYQVSLNSGSHFCGGSLINSQWVVSAAHCYKSRIQVRLGEHNIDVLEGNEQFINAAKIITHPNFNGNTLDNDIMLIKLSSPATLNSRVATVSLPRSCAAAGTECLISGWGNTKSSGSSYPSLLQCLKAPVLSDSSCKSSYPGQITGNMICVGFLEGGKDSCQGDSGGPVVCNGQLQGIVSWGYGCAQKNKPGVYTKVCNYVNWIQQTIAAN
+>TRY1_BOVIN Bovine trypsin - Bos taurus (Bovine).
+FIFLALLGAAVAFPVDDDDKIVGGYTCGANTVPYQVSLNSGYHFCGGSLINSQWVVSAAH
+CYKSGIQVRLGEDNINVVEGNEQFISASKSIVHPSYNSNTLNNDIMLIKLKSAASLNSRV
+ASISLPTSCASAGTQCLISGWGNTKSSGTSYPDVLKCLKAPILSDSSCKSAYPGQITSNM
+FCAGYLEGGKDSCQGDSGGPVVCSGKLQGIVSWGSGCAQKNKPGVYTKVCNYVSWIKQTI
+ASN
+>sp|P35908|K22E_HUMAN Keratin, type II cytoskeletal 2 epidermal (Cytokeratin 2e) (K2e) (CK 2e) - Homo sapiens (Human).
+MSCQISCKSRGRGGGGGGFRGFSSGSAVVSGGSRRSTSSFSCLSRHGGGGGGFGGGGFGSRSLVGLGGTKSISISVAGGGGGFGAAGGFGGRGGGFGGGSGFGGGSGFGGGSGFSGGGFGGGGFGGGRFGGFGGPGGVGGLGGPGGFGPGGYPGGIHEVSVNQSLLQPLNVKVDPEIQNVKAQEREQIKTLNNKFASFIDKVRFLEQQNQVLQTKWELLQQMNVGTRPINLEPIFQGYIDSLKRYLDGLTAERTSQNSELNNMQDLVEDYKKKYEDEINKRTAAENDFVTLKKDVDNAYMIKVELQSKVDLLNQEIEFLKVLYDAEISQIHQSVTDTNVILSMDNSRNLDLDSIIAEVKAQYEEIAQRSKEEAEALYHSKYEELQVTVGRHGDSLKEIKIEISELNRVIQRLQGEIAHVKKQCKNVQDAIADAEQRGEHALKDARNKLNDLEEALQQAKEDLARLLRDYQELMNVKLALDVEIATYRKLLEG [...]
+>sp|Q01546|K22O_HUMAN Keratin, type II cytoskeletal 2 oral (Cytokeratin 2P) (K2P) (CK 2P) - Homo sapiens (Human).
+MNRQVCKKSFSGRSQGFSGRSAVVSGSSRMSCVARSGGAGGGACGFRSGAGSFGSRSLYNLGSNKSISISVAAGSSRAGGFGGGRSSCGFAGGYGGGFGGSYGGGFGGGRGVGSGFGGAGGFGGAGGFGGPGVFGGPGSFGGPGGFGPGGFPGGIQEVIVNQSLLQPLNVEIDPQIGQVKAQEREQIKTLNNKFASFIDKVRFLEQQNKVLETKWELLQQQTTGSGPSSLEPCFESYISFLCKQLDSLLGERGNLEGELKSMQDLVEDFKKKYEDEINKRTAAENEFVGLKKDVDAAFMNKVELQAKVDSLTDEVSFLRTLYEMELSQMQSHASDTSVVLSMDNNRCLDLGSIIAEVRTQYEEIAQRSKSEAEALYQTKLGELQTTAGRHGDDLRNTKSEIMELNRMIQRLRAEIENVKKQNANLQTAIAEAEQRGEMALKDANAKLQDLQTALQKAKDDLARLLRDYQELMNVKLALDVEIATYRKLLEGE [...]
+>sp|P04264|K2C1_HUMAN Keratin, type II cytoskeletal 1 (Cytokeratin 1) (K1) (CK 1) (67 kDa cytokeratin) (Hair alpha protein) - Homo sapiens (Human).
+SRQFSSRSGYRSGGGFSSGSAGIINYQRRTTSSSTRRSGGGGGRFSSCGGGGGSFGAGGGFGSRSLVNLGGSKSISISVARGGGRGSGFGGGYGGGGFGGGGFGGGGFGGGGIGGGGFGGFGSGGGGFGGGGFGGGGYGGGYGPVCPPGGIQEVTINQSLLQPLNVEIDPEIQKVKSREREQIKSLNNQFASFIDKVRFLEQQNQVLQTKWELLQQVDTSTRTHNLEPYFESFINNLRRRVDQLKSDQSRLDSELKNMQDMVEDYRNKYEDEINKRTNAENEFVTIKKDVDGAYMTKVDLQAKLDNLQQEIDFLTALYQAELSQMQTQISETNVILSMDNNRSLDLDSIIAEVKAQNEDIAQKSKAEAESLYQSKYEELQITAGRHGDSVRNSKIEISELNRVIQRLRSEIDNVKKQISNLQQSISDAEQRGENALKDAKNKLNDLEDALQQAKEDLARLLRDYQELMNTKLALDLEIATYRTLLEGEESRM [...]
+>sp|P12035|K2C3_HUMAN Keratin, type II cytoskeletal 3 (Cytokeratin 3) (K3) (CK3) (65 kDa cytokeratin) - Homo sapiens (Human).
+MSRQASKTSGGGSQGFSGRSAVVSGSSRMSCVAHSGGAGGGAYGFRSGAGGFGSRSLYNLGGDKSISISVAAGGSRAGGFGGGRSSCAFAGGYGGGFGSGYGGGFGGGFGGGRGMGGGFGGAGGFGGAGGFGGAGGFGGPGGFGGSGGFGGPGSLGSPGGFAPGGFPGGIQEVTTNQSLLQPLKVETDPQIGQVKAQEREQIKTLNNKFASFIDKVRFLEQQNKVLETKWNLLQQQGTSSISGTNNLEPLFENHINYLRSYLDNILGERGRLDSELKNMEDLVEDFKKKYEDEINKRYAAENEFVTLKKDVDSAYMNKVELQAKVDALIDEIDFLRTLYDAELSQMQSHISDTSVVLSMDNNRSLDLDSIIAEVGAQYEDIAQRSKAEAEALYQTKLGELQTTAGRHGDDLRNTKSEIIELNRMIQRLRAEIEGVKKQNANLQTAIAQAEQHGEMALKDANAKLQELQAALQQAKDDLARLLRDYQELMNVK [...]
+>sp|P08729|K2C7_HUMAN Keratin, type II cytoskeletal 7 (Cytokeratin 7) (K7) (CK 7) (Sarcolectin) - Homo sapiens (Human).
+SIHFSSPVFTSRSAAFSGRGAQVRLSSARPGGLGSSSLYGLGASRPRVAVRSAYGGPVGAGIREVTINQSLLAPLRLDADPSLQRVRQEESEQIKTLNNKFASFIDKVRFLEQQNKLLETKWTLLQEQKSAKSSRLPDIFEAQIAGLRGQLEALQVDGGRLEQGLRTMQDVVEDFKNKYEDEINRRTAAENEFVVLKKDVDAAYMSKVELEAKVDALNDEINFLRTLNETELTELQSQISDTSVVLSMDNSRSLDLDGIIAEVKAQYEEMAKCSRAEAEAWYQTKFETLQAQAGKHGDDLRNTRNEISEMNRAIQRLQAEIDNIKNQRAKLEAAIAEAEERGELALKDARAKQEELEAALQRAKQDMARQLREYQELMSVKLALDIEIATYRKLLEGEESRLAGDGVGAVNISVMNSTGGSSSGGGIGLTLGGTMGSNALSFSSSAGPGLLKAYSIRTASASRRSARD
+>sp|P35527|K1CI_HUMAN Keratin, type I cytoskeletal 9 (Cytokeratin 9) (K9) (CK 9) - Homo sapiens (Human).
+MSCRQFSSSYLTSGGGGGGGLGSGGSIRSSYSRFSSSGGRGGGGRFSSSSGYGGGSSRVCGRGGGGSFGYSYGGGSGGGFSASSLGGGFGGGSRGFGGASGGGYSSSGGFGGGFGGGSGGGFGGGYGSGFGGLGGFGGGAGGGDGGILTANEKSTMQELNSRLASYLDKVQALEEANNDLENKIQDWYDKKGPAAIQKNYSPYYNTIDDLKDQIVDLTVGNNKTLLDIDNTRMTLDDFRIKFEMEQNLRQGVDADINGLRQVLDNLTMEKSDLEMQYETLQEELMALKKNHKEEMSQLTGQNSGDVNVEINVAPGKDLTKTLNDMRQEYEQLIAKNRKDIENQYETQITQIEHEVSSSGQEVQSSAKEVTQLRHGVQELEIELQSQLSKKAALEKSLEDTKNRYCGQLQMIQEQISNLEAQITDVRQEIECQNQEYSLLLSIKMRLEKEIETYHNLLEGGQEDFESSGAGKIGLGGRGGSGGSYGRGSRGGS [...]
diff --git a/Database/TestDatabase.index b/Database/TestDatabase.index
new file mode 100644
index 0000000..66980d6
Binary files /dev/null and b/Database/TestDatabase.index differ
diff --git a/Database/TestDatabase.trie b/Database/TestDatabase.trie
new file mode 100644
index 0000000..096ca19
--- /dev/null
+++ b/Database/TestDatabase.trie
@@ -0,0 +1 @@
+VATIVKMCLVAMALRQPLKRLNVPGEIAESISKNNHAVRRINKKVDKFQSQEQQEMDDPRQDQVHPFAKTQSIVYPFPHPIPDSLPMNIPPVTQTPIFVAPFLEPEILGMHCVKEAMGPKHKELPFPKFPVDPYTEKQSFTNFDVSNLHLPMPLLQSWMHQPHQPLPPTILFPPQRVILLKYMKVLPVPDKEVPYPQRDMSCQAFCLYEDPVIGPWRGPFPLLM*EHHWGYGKHKGPEHWHMDFPLLNGEMQSPVNIDWHRVINDPPLKPLAGVYGSATSRRMLNNGHSMNVEYHDSENKSELKDGPITGAYRIVEFHQRWGSSDDQGSEHTIDRKKYCAELHIVHWNTKYNGFGTSAQQPDGMTIVGTFLTMGDCNPAWRTVLDALDSIKTKGTSTDFPNFDPGTLLPNVIDYWMYPGSLTTPPLMETVTWIVAKEPINMSDEHLFKFRTLNFNAEGDPELIMLANWRPAQPMQNLQVRGFPK*GDVEKAK [...]
\ No newline at end of file
diff --git a/Errors.c b/Errors.c
new file mode 100644
index 0000000..0aa0b8e
--- /dev/null
+++ b/Errors.c
@@ -0,0 +1,261 @@
+//Title:          Errors.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#include "CMemLeak.h"
+#include <stdio.h>
+#include "Inspect.h"
+#include "Errors.h"
+
+void AssertionFailed(char* Assertion, char* FileName, int LineNumber)
+{
+    printf("** ASSERTION FAILED line %d file '%s':\n   '%s'\n", LineNumber, FileName, Assertion);
+}
+
+typedef struct NumberedError
+{
+    int ID;
+    char* Message;
+} NumberedError;
+
+NumberedError ErrorMessages[] = {
+    {0, "Unhandled exception"},
+    {1, "Out of memory"},
+    {2, "Out of disk space"},
+    {3, "Missing required file '%s'"},
+    {4, "Internal assertion '%s'"},
+    {5, "File '%s' not found"}, 
+    {6, "Error in LoadBayesianModel: Bogus feature count %d"},
+    {7, "Bogus-looking probability table size %d for feature %d"},
+    {8, "Unable to open requested file '%s'"},
+    {9, "Scan number range (%d...%d) includes no spectra!"},
+    {10, "Only %d top-scoring matches for charge state; not recalibrating the FDR curve."},
+    {11, "No spectra were specified to search."},
+    {12, "No GFF files were specified as input to build an MS2DB file."},
+    {13, "Ignoring unknown command '%s' from Inspect input file"},
+    {14, "Syntax error on line %d of file %s"},
+    {15, "No valid exons found in GFF files"},
+    {16, "Linked exons %d...%d and %d...%d have incompatible reading frames"},
+    {17, "Consecutive GFF exons %d...%d and %d...%d come from same gene, but can't be linked because they overlap"},
+    {18, "Invalid command-line argument '%s'"},
+    {19, "Command-line argument '%s' requires a parameter."},
+    {20, "Invalid coordinates %d...%d on line %d of file %s"},
+    {21, "Length-1 exon at %d is a codon center, but doesn't link in and out"},
+    {22, "Unable to cover GFF gene '%s'"},
+    {23, "Coverage of GFF gene '%s' is incomplete"},
+    {24, "Unhandled instance: %d"},
+    {25, "XML parse exception: '%s'"},
+    {26, "Error linking exons %d and %d in gene '%s'"},
+    {27, "XML line %d: Unexpected tag '%s'"},
+    {28, "XML line %d: Unexpected attribute '%s' for XML tag '%s'"},
+    {29, "Exon %d of gene '%s' is too long!"},
+    {30, "Spectrum file '%s' has an abnormal extension; attempting to treat it as a .dta file"},
+    {31, "Too many peaks in spectrum (scan %d, file %s); dropping extras!"},
+    {32, "Illegal peak mass in scan %d of file %s"},
+    {33, "Syntax error on line %d of input file %s"},
+    {34, "Modifications specified, but no PTMs permitted in peptides.  Use 'mods,1' to permit modified peptides."},
+    {35, "Too many PTMs specified in input file - ignoring '%s'"},
+    {36, "Invalid modification type '%s'"},
+    {37, "Illegal amino acid specificity '%s' for modification"},
+    {38, "Invalid tag length '%d': Valid values are 1 through 6"},
+    {39, "Input file parameter '%s' doesn't take a value"},
+    {40, "Input file parameter '%s' requires a string value"},
+    {41, "Input file parameter '%s' requires an integer value"},
+    {42, "Invalid mass %dDa at position %d in spectrum file '%s'"},
+    {43, "Invalid mass %dDa in spectrum file"},
+    {44, "Invalid mass ppm %d - should be in the range 1...1000"},
+    {45, "Peak for spectrum %s:%d is %dDa - possible corruption"},
+    {46, "Invalid scoring model specified: Charge must be 2 or 3"},
+    {47, "Invalid RequiredTermini value '%d' specified"},
+    {48, "Peak for spectrum %s:%d has intensity %f - possible corruption"},
+    {49, "Out of memory - failed to allocate %d bytes"},
+    {50, "Unable to write or close output file '%s'"},
+    {-1, NULL}
+};
+
+int ErrorMessageCount;
+
+void InitErrors()
+{
+    ErrorMessageCount = sizeof(ErrorMessages) / sizeof(char*);
+}
+
+// Report an error - write it to GlobalOptions->ErrorFile (if GlobalOptions exists)
+// and to stderr, and increment the count of reported errors.
+void ReportError(int ErrorSeverity, int ErrorID, int SourceLine, char* SourceFileName, int ArgType,
+                 const char* StrA, const char* StrB, const char* StrC, 
+                 int IntA, int IntB, int IntC, int IntD,
+                 float FloatA, float FloatB)
+{
+    char* ErrorMessage;
+    int ErrorIndex;
+    FILE* ErrorFile;
+    FILE* ErrorFile2;
+    //
+    if (!GlobalOptions || !GlobalOptions->ErrorFile)
+    {
+        ErrorFile = stderr;
+        ErrorFile2 = NULL;
+    }
+    else
+    {
+        ErrorFile = GlobalOptions->ErrorFile;
+        ErrorFile2 = stderr;
+    }
+    if (ErrorSeverity)
+    {
+        if (GlobalOptions)
+        {
+            GlobalOptions->ErrorCount++;
+        }
+        fprintf(ErrorFile, "[E%04d] %s:%d:", ErrorID, SourceFileName, SourceLine);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, "[E%04d] %s:%d:", ErrorID, SourceFileName, SourceLine);
+        }
+        
+    }
+    else
+    {
+        if (GlobalOptions)
+        {
+            GlobalOptions->WarningCount++;
+        }
+        fprintf(ErrorFile, "{W%04d} %s:%d:", ErrorID, SourceFileName, SourceLine);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, "{W%04d} %s:%d:", ErrorID, SourceFileName, SourceLine);
+        }
+    }
+    ErrorMessage = "";
+    for (ErrorIndex = 0; ErrorIndex < ErrorMessageCount; ErrorIndex++)
+    {
+        if (ErrorID == ErrorMessages[ErrorIndex].ID)
+        {
+            ErrorMessage = ErrorMessages[ErrorIndex].Message;
+            break;
+        }
+    }
+    switch (ArgType)
+    {
+    case ERROR_ARGS_S:
+        fprintf(ErrorFile, ErrorMessage, StrA);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, StrA);
+        }
+        break;
+    case ERROR_ARGS_SS:
+        fprintf(ErrorFile, ErrorMessage, StrA, StrB);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, StrA, StrB);
+        }
+        break;
+    case ERROR_ARGS_I:
+        fprintf(ErrorFile, ErrorMessage, IntA);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, IntA);
+        }
+        break;
+    case ERROR_ARGS_IS:
+        fprintf(ErrorFile, ErrorMessage, IntA, StrA);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, IntA, StrA);
+        }
+        break;
+    case ERROR_ARGS_ISS:
+        fprintf(ErrorFile, ErrorMessage, IntA, StrA, StrB);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, IntA, StrA, StrB);
+        }
+        break;
+    case ERROR_ARGS_II:
+        fprintf(ErrorFile, ErrorMessage, IntA, IntB);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, IntA, IntB);
+        }
+        break;
+    case ERROR_ARGS_IIS:
+        fprintf(ErrorFile, ErrorMessage, IntA, IntB, StrA);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, IntA, IntB, StrA);
+        }
+        break;
+    case ERROR_ARGS_III:
+        fprintf(ErrorFile, ErrorMessage, IntA, IntB, IntC);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, IntA, IntB, IntC);
+        }
+        break;
+    case ERROR_ARGS_IIII:
+        fprintf(ErrorFile, ErrorMessage, IntA, IntB, IntC, IntD);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, IntA, IntB, IntC, IntD);
+        }
+        break;
+    case ERROR_ARGS_SII:
+        fprintf(ErrorFile, ErrorMessage, StrA, IntA, IntB);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, StrA, IntA, IntB);
+        }
+        break;
+    case ERROR_ARGS_SIF:
+        fprintf(ErrorFile, ErrorMessage, StrA, IntA, FloatA);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage, StrA, IntA, FloatA);
+        }
+        break;
+
+    case ERROR_ARGS_NONE:
+    default:
+        fprintf(ErrorFile, ErrorMessage);
+        if (ErrorFile2)
+        {
+            fprintf(ErrorFile2, ErrorMessage);
+        }
+        break;
+    }
+    fprintf(ErrorFile, "\n");
+    if (ErrorFile2)
+    {
+        fprintf(ErrorFile2, "\n");
+    }
+
+}
diff --git a/Errors.h b/Errors.h
new file mode 100644
index 0000000..451e8f1
--- /dev/null
+++ b/Errors.h
@@ -0,0 +1,88 @@
+//Title:          Errors.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+//
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef ERRORS_H
+#define ERRORS_H
+
+
+void AssertionFailed(char* Assertion, char* FileName, int LineNumber);
+void InitErrors();
+// ReportError is not to be called directly.  Use the REPORT_ERROR and REPORT_WARNING macros.
+void ReportError(int ErrorSeverity, int ErrorID, int SourceLine, char* SourceFileName, int Args,
+                 const char* StrA, const char* StrB, const char* StrC, 
+                 int IntA, int IntB, int IntC, int IntD,
+                 float FloatA, float FloatB);
+
+#define ERROR_ARGS_NONE 0
+#define ERROR_ARGS_S 1
+#define ERROR_ARGS_I 2
+#define ERROR_ARGS_II 3
+#define ERROR_ARGS_III 4
+#define ERROR_ARGS_IIII 5
+#define ERROR_ARGS_IS 6
+#define ERROR_ARGS_IIS 7
+#define ERROR_ARGS_IIIS 8
+#define ERROR_ARGS_SS 9
+#define ERROR_ARGS_ISS 10
+#define ERROR_ARGS_SII 11
+#define ERROR_ARGS_SIF 12
+
+#define REPORT_ERROR(ErrorID) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_NONE, NULL, NULL, NULL, 0, 0, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_S(ErrorID, StrA) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_S, StrA, NULL, NULL, 0, 0, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_SS(ErrorID, StrA, StrB) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_SS, StrA, StrB, NULL, 0, 0, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_I(ErrorID, IntA) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_I, NULL, NULL, NULL, IntA, 0, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_II(ErrorID, IntA, IntB) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_II, NULL, NULL, NULL, IntA, IntB, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_III(ErrorID, IntA, IntB, IntC) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_III, NULL, NULL, NULL, IntA, IntB, IntC, 0, 0.0, 0.0);
+#define REPORT_ERROR_IIII(ErrorID, IntA, IntB, IntC, IntD) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_IIII, NULL, NULL, NULL, IntA, IntB, IntC, IntD, 0.0, 0.0);
+#define REPORT_ERROR_IS(ErrorID, IntA, StrA) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_IS, StrA, NULL, NULL, IntA, 0, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_IIS(ErrorID, IntA, IntB, StrA) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_IIS, StrA, NULL, NULL, IntA, IntB, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_IIIS(ErrorID, IntA, IntB, IntC, StrA) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_IIIS, StrA, NULL, NULL, IntA, IntB, IntC, 0, 0.0, 0.0);
+#define REPORT_ERROR_ISS(ErrorID, IntA, StrA, StrB) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_ISS, StrA, StrB, NULL, IntA, 0, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_SII(ErrorID, StrA, IntA, IntB) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_SII, StrA, NULL, NULL, IntA, IntB, 0, 0, 0.0, 0.0);
+#define REPORT_ERROR_SIF(ErrorID, StrA, IntA, FloatA) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_SII, StrA, NULL, NULL, IntA, 0, 0, 0, FloatA, 0.0);
+
+#define REPORT_WARNING(ErrorID) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_NONE, NULL, NULL, NULL, 0, 0, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_S(ErrorID, StrA) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_S, StrA, NULL, NULL, 0, 0, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_SS(ErrorID, StrA, StrB) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_SS, StrA, StrB, NULL, 0, 0, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_I(ErrorID, IntA) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_I, NULL, NULL, NULL, IntA, 0, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_II(ErrorID, IntA, IntB) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_II, NULL, NULL, NULL, IntA, IntB, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_III(ErrorID, IntA, IntB, IntC) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_III, NULL, NULL, NULL, IntA, IntB, IntC, 0, 0.0, 0.0);
+#define REPORT_WARNING_IIII(ErrorID, IntA, IntB, IntC, IntD) ReportError(1, ErrorID, __LINE__, __FILE__, ERROR_ARGS_IIII, NULL, NULL, NULL, IntA, IntB, IntC, IntD, 0.0, 0.0);
+#define REPORT_WARNING_IS(ErrorID, IntA, StrA) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_IS, StrA, NULL, NULL, IntA, 0, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_IIS(ErrorID, IntA, IntB, StrA) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_IIS, StrA, NULL, NULL, IntA, IntB, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_IIIS(ErrorID, IntA, IntB, IntC, StrA) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_IIIS, StrA, NULL, NULL, IntA, IntB, IntC, 0, 0.0, 0.0);
+#define REPORT_WARNING_ISS(ErrorID, IntA, StrA, StrB) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_ISS, StrA, StrB, NULL, IntA, 0, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_SII(ErrorID, StrA, IntA, IntB) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_SII, StrA, NULL, NULL, IntA, IntB, 0, 0, 0.0, 0.0);
+#define REPORT_WARNING_SIF(ErrorID, StrA, IntA, FloatA) ReportError(0, ErrorID, __LINE__, __FILE__, ERROR_ARGS_SII, StrA, NULL, NULL, IntA, 0, 0, 0, FloatA, 0.0);
+
+#endif // ERRORS_H
diff --git a/ExonGraphAlign.c b/ExonGraphAlign.c
new file mode 100644
index 0000000..dd3a602
--- /dev/null
+++ b/ExonGraphAlign.c
@@ -0,0 +1,1195 @@
+//Title:          ExonGraphAlign.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include <stdio.h>
+#include <memory.h>
+//#include <malloc.h>
+#include "Utils.h"
+#include "Spliced.h"
+
+// This file implements an alignment algorithm between a sequence and 
+// an exon graph, or between two exon graphs. 
+// Exon-graph alignment is very similar to the smith-waterman alignment
+// algorithm.  The main difference is that in the recurrence relation,
+// we may have several "previous" cells to move to, due to the makeup
+// of the exon graph.
+
+#define AA_COUNT 26
+
+// IntNode: For handling lists of successors to each row / column
+typedef struct IntNode
+{
+    int Value;
+    struct IntNode* Next;
+} IntNode;
+
+
+// Forward declarations:
+int AlignExonGraphAgainstExonGraph(GeneStruct* GeneA, GeneStruct* GeneB,
+    char* ScoringMatrixFileName, int StartGapPenalty, int ExtendGapPenalty);
+void FreePrevCellTable(IntNode** PrevCell, int Size);
+
+// Default distance matrix for aligning protein sequences: Hamming distance,
+// a bonus for each match and a penalty for each mismatch.  This is adequate
+// for most purposes.
+int* GenerateHammingDistanceMatrix()
+{
+    int X;
+    int Y;
+    int* Matrix;
+    //
+    Matrix = (int*)calloc(AA_COUNT*AA_COUNT, sizeof(int));
+    for (X = 0; X < AA_COUNT; X++)
+    {
+        for (Y = 0; Y < AA_COUNT; Y++)
+        {
+            if (X == Y)
+            {   
+                Matrix[Y*AA_COUNT + X] = 10;
+            }
+            else
+            {
+                Matrix[Y*AA_COUNT + X] = -10;
+            }
+        }
+    }
+    return Matrix;
+}
+
+int* LoadScoringMatrix(char* ScoringMatrixFileName)
+{
+    //FILE* ScoringMatrixFile;
+    printf("** Scoring matrix support not implemented yet - use Hamming disatnce for now\n");
+    return NULL;
+}
+
+
+void XALinkRowToRow(IntNode** PrevY, int Y, int TargetY)
+{
+    IntNode* INode;
+    IntNode* NewINode;
+    NewINode = (IntNode*)calloc(1, sizeof(IntNode));
+    NewINode->Value = TargetY;
+    //printf("Back-link from row %d to target %d\n", Y, TargetY);
+    if (PrevY[Y])
+    {
+        for (INode = PrevY[Y]; INode->Next; INode = INode->Next)
+        {
+            ;
+        }
+        INode->Next = NewINode;
+    }
+    else
+    {
+        PrevY[Y] = NewINode;
+    }
+}
+
+// This function links the specified row to the specified exon.
+// The catch: If the exon has length 0, then we link to the specified exon's predecessors.
+void XALinkRowToExon(GeneStruct* Gene, IntNode** PrevY, int Y, int ExonIndex, int* ExonOffsets, int* ExonEdgeOffsets)
+{
+    ExonStruct* Exon;
+    int EdgeIndex;
+    int AAEdgeCount;
+    ExonEdge* Edge;
+    //
+    Exon = Gene->Exons + ExonIndex;
+    // Standard case: The exon is non-empty, so we link back to it.
+    if (Exon->Length)
+    {
+        XALinkRowToRow(PrevY, Y, ExonOffsets[Exon->Index] + Exon->Length - 1);
+        return;
+    }
+    // Special case: The exon is empty, so we link to the exon's predecessors.
+    AAEdgeCount = 0;
+    for (EdgeIndex = 0; EdgeIndex < Exon->BackEdgeCount; EdgeIndex++)
+    {
+        Edge = Exon->BackwardEdges + EdgeIndex;
+        if (Edge->AA)
+        {
+            XALinkRowToRow(PrevY, Y, ExonEdgeOffsets[ExonIndex] + AAEdgeCount);
+            AAEdgeCount++;
+        }
+        else
+        {
+            XALinkRowToExon(Gene, PrevY, Y, Edge->Exon->Index, ExonOffsets, ExonEdgeOffsets);
+        }
+    }
+}
+
+// Test scaffolding for graph-based alignment
+void TestExonGraphAlignment(int argc, char* argv[])
+{
+    GeneStruct* GeneA;
+    GeneStruct* GeneB;
+    FILE* GeneFile;
+    //
+    GeneFile = fopen(argv[1], "rb");
+    if (!GeneFile)
+    {
+        printf("** Error: Can't open gene file '%s'.\n", argv[1]);
+        return;
+    }
+    GeneA = LoadGene(GeneFile);
+    fclose(GeneFile);
+    GeneFile = fopen(argv[2], "rb");
+    if (!GeneFile)
+    {
+        printf("** Error: Can't open gene file '%s'.\n", argv[1]);
+        return;
+    }
+    GeneB = LoadGene(GeneFile);
+    AlignExonGraphAgainstExonGraph(GeneA, GeneB, NULL, -10, -3);
+    //AlignSequenceAgainstExonGraph(Gene, Sequence, NULL, -10, -3);
+    printf("\n\nAlignment complete.\n");
+}
+
+#define Z_STANDARD 0
+#define Z_GAP_IN_X 1
+#define Z_GAP_IN_Y 2
+
+// Count the links and the total amino acids in this exon graph.  Used
+// in determining the table size in alignment.
+void GetExonGraphSize(GeneStruct* Gene, int* pLinkCount, int* pSize)
+{
+    int ExonIndex;
+    int LinkIndex;
+    ExonStruct* Exon;
+    ExonEdge* Edge;
+    //
+    *pSize = 0;
+    for (ExonIndex = 0; ExonIndex < Gene->ExonCount; ExonIndex++)
+    {
+        Exon = Gene->Exons + ExonIndex;
+        *pSize += Exon->Length;
+        
+        for (LinkIndex = 0; LinkIndex < Exon->BackEdgeCount; LinkIndex++)
+        {
+            Edge = Exon->BackwardEdges + LinkIndex;
+            if (Edge->AA)
+            {
+                (*pLinkCount)++;
+                (*pSize)++;
+            }
+        }
+    }
+}
+
+int CompareExonsForward(const ExonStruct* ExonA, const ExonStruct* ExonB)
+{
+    if (ExonA->Start < ExonB->Start)
+    {
+        return -1;
+    }
+    if (ExonA->Start > ExonB->Start)
+    {
+        return 1;
+    }
+    // ExonA->Start == ExonB->Start    
+    if (ExonA->End < ExonB->End)
+    {
+        return -1;
+    }
+    if (ExonA->End > ExonB->End)
+    {
+        return 1;
+    }
+    // Same coordinates?  Arbitrary sort:
+    if (ExonA < ExonB)
+    {
+        return -1;
+    }
+    else 
+    {
+        return 1;
+    }
+}
+int CompareExonsReverse(const ExonStruct* ExonA, const ExonStruct* ExonB)
+{
+    if (ExonA->End > ExonB->End)
+    {
+        return -1;
+    }
+    if (ExonA->End < ExonB->End)
+    {
+        return 1;
+    }
+    // ExonA->End == ExonB->End
+    if (ExonA->Start > ExonB->Start)
+    {
+        return -1;
+    }
+    if (ExonA->Start > ExonB->Start)
+    {
+        return 1;
+    }
+    // Same coordinates?  Arbitrary sort:
+    if (ExonA > ExonB)
+    {
+        return 1;
+    }
+    else 
+    {
+        return -1;
+    }
+}
+
+
+// Build the necessary arrays for a d.p. alignment against an exon graph:
+// - ExonEdgeOffsets[n] is the row for the first back-link-with-aa of exon n
+// - ExonOffsets[n] is the row for the first aa in exon n
+// - YSequence[n] is the nth character in the flattened graph
+// - PrevY[n] is a linked list of predecessors for row n.  If n is within an exon,
+//  there's just one entry, n-1.  If n is the start of an exon, there may be several
+//  entries.  If n comes from an edge, there'll be exactly one entry, for the earlier exon.
+void FlattenExonsForAlignment(GeneStruct* Gene, int* ExonOffsets, int* ExonEdgeOffsets, 
+    char* YSequence, IntNode** PrevY, char** YRowInfo)
+{
+    int Y;
+    int ExonIndex;
+    int StartExonIndex;
+    int ExonIterateDir;
+    int ExonCount = Gene->ExonCount;
+    ExonStruct* Exon;
+    int AALinkCount;
+    int LinkIndex;
+    IntNode* NewINode;
+    int Pos;
+    ExonEdge* Edge;
+    //
+    StartExonIndex = 0;
+    ExonIterateDir = 1;
+    Y = 0;
+    for (ExonIndex = StartExonIndex; ExonIndex >= 0 && ExonIndex < ExonCount; ExonIndex += ExonIterateDir)
+    {
+        Exon = Gene->Exons + ExonIndex;
+        // Add a row for each edge with AA:
+        AALinkCount = 0;
+        ExonEdgeOffsets[ExonIndex] = -1;
+        for (LinkIndex = 0; LinkIndex < Exon->BackEdgeCount; LinkIndex++)
+        {
+            // For each exon with an associated amino acid:
+            // - Add a row for the edge, with one back-link.
+            // - Remember the row number, so that the exon can be linked back to this row
+            Edge = Exon->BackwardEdges + LinkIndex;
+            if (Edge->AA)
+            {
+                if (ExonEdgeOffsets[ExonIndex] < 0)
+                {
+                    ExonEdgeOffsets[ExonIndex] = Y;
+                }
+                //EdgeOffsets[AALinkCount] = Y;
+                YSequence[Y] = Edge->AA;
+                sprintf(YRowInfo[Y], "X%d backlink%d to %d", ExonIndex, LinkIndex, Edge->Exon->Index);
+                XALinkRowToExon(Gene, PrevY, Y, Edge->Exon->Index, ExonOffsets, ExonEdgeOffsets);
+                //NewINode = (IntNode*)calloc(1, sizeof(IntNode));
+                //NewINode->Value = ExonOffsets[Exon->BackExon[LinkIndex]->Index] + Exon->BackExon[LinkIndex]->Length;
+                //PrevY[Y] = NewINode;
+                AALinkCount++;
+                if (PrevY[Y])
+                {
+                    printf("Y %d: exon %d link %d, back to exon %d y %d\n", Y, ExonIndex, LinkIndex, Edge->Exon->Index, PrevY[Y]->Value);
+                }
+                Y++;
+            }
+        }
+        if (!Exon->Length)
+        {
+            continue;
+        }
+        printf("Y %d: start exon %d body.\n", Y, ExonIndex);
+        // Add back-links for the first AA in the exon:
+        ExonOffsets[ExonIndex] = Y;
+        AALinkCount = 0;
+        for (LinkIndex = 0; LinkIndex < Exon->BackEdgeCount; LinkIndex++)
+        {
+            Edge = Exon->BackwardEdges + LinkIndex;
+            if (Edge->AA)
+            {
+                XALinkRowToRow(PrevY, Y, ExonEdgeOffsets[ExonIndex] + AALinkCount);
+                printf("ExonEdge offset %d.  Link %d goes to AARow %d\n", ExonEdgeOffsets[ExonIndex], LinkIndex, PrevY[Y]->Value);
+                AALinkCount++;
+            }
+            else
+            {
+                XALinkRowToExon(Gene, PrevY, Y, Edge->Exon->Index, ExonOffsets, ExonEdgeOffsets);
+                printf("Y %d: start exon %d.  Link %d goes to exon row %d\n", Y, ExonIndex, LinkIndex, PrevY[Y]->Value);
+            }
+        }
+        // Add one row for each AA in the exon proper:
+        for (Pos = 0; Pos < Exon->Length; Pos++)
+        {
+            sprintf(YRowInfo[Y], "X%d pos %d/%d", ExonIndex, Pos, Exon->Length);
+            YSequence[Y] = Exon->Sequence[Pos];
+            if (Pos)
+            {
+                NewINode = (IntNode*)calloc(1, sizeof(IntNode));
+                NewINode->Value = Y-1;
+                PrevY[Y] = NewINode;
+            }
+            //printf("%d %c pos %d in exon #%d\n", Y, YSequence[Y], Pos, Exon->Index);
+            Y++;
+        }
+    }
+}
+
+void SortGeneExons(GeneStruct* Gene)
+{
+    return;
+}
+
+// ExonGraphAlign extends the Smith-Waterman alignment algorithm to 
+// handle local alignment of a sequence with an exon graph.  The scoring
+// matrix (such as BLOSUM) can be specified, or Hamming distance can be
+// used.  Gap penalties should also be specified.  The function will 
+// return the score of the best alignment, and (optionally) set 'verbose' 
+// strings specifying the alignment itself, like this:
+//   EAM--APK
+//   ***  * *
+//   EAMCGARK
+// The data structure we use in implementing this alignment is a grid
+// (stored as an array), where each row has a linked list of zero or more
+// nodes specifying the legal predecessor rows.
+int AlignSequenceAgainstExonGraph(GeneStruct* Gene, char* Sequence, 
+    char* ScoringMatrixFileName, int StartGapPenalty, int ExtendGapPenalty)
+{
+    int* ScoreTable;
+    int* NextX;
+    int* NextY;
+    int* NextZ;
+    int X;
+    int Y;
+    int Z;
+    IntNode** PrevY;
+    int TableIndex;
+    int PrevTableIndex;
+    int TableSize;
+    int* ScoringMatrix = NULL;
+    int* ExonOffsets;
+    int* ExonEdgeOffsets;
+    int ExonCount = 0;
+    int LinkCount = 0;
+    int SequenceLength;
+    int MaxY;
+    int AlignScore;
+    int Score;
+    char* YSequence;
+    IntNode* INode;
+    int XBlockSize;
+    int YBlockSize;
+    char ResidueA;
+    char ResidueB;
+    int BestX = 0;
+    int BestY = 0;
+    int BestZ = 0;
+    int AlignStringLength;
+    char* AlignStringA;
+    char* AlignStringB;
+    char* AlignStringC;
+    int BestScore;
+    int StartExonIndex;
+    int NearY;
+    int ExonIterateDir;
+    char** YRowInfo;
+    // Ensure gap penalties are NEGATIVE numbers.  Negative is bad.
+    if (StartGapPenalty > 0)
+    {
+        StartGapPenalty = -StartGapPenalty;
+    }
+    if (ExtendGapPenalty > 0)
+    {
+        ExtendGapPenalty = -ExtendGapPenalty;
+    }
+
+    // Load the scoring matrix (or use default hamming distance)
+    if (ScoringMatrixFileName)
+    {
+        ScoringMatrix = LoadScoringMatrix(ScoringMatrixFileName);
+    }
+    if (!ScoringMatrix)
+    {
+        ScoringMatrix = GenerateHammingDistanceMatrix();
+    }
+    SequenceLength = strlen(Sequence);
+    ////////////////////////////////////////////////////////////
+    // Count the exons and edges (with aa):
+    ExonCount = Gene->ExonCount;
+    
+    if (Gene->ForwardFlag)
+    {
+        ExonIterateDir = 1;
+        StartExonIndex = 0;
+    }
+    else
+    {
+        ExonIterateDir = -1;
+        StartExonIndex = Gene->ExonCount - 1;
+    }
+    GetExonGraphSize(Gene, &LinkCount, &MaxY);
+    ////////////////////////////////////////////////////////////
+    // Allocate arrays:
+    TableSize = MaxY * SequenceLength * 3;
+    ScoreTable = (int*)calloc(TableSize, sizeof(int));
+    NextX = (int*)calloc(TableSize, sizeof(int));
+    NextY = (int*)calloc(TableSize, sizeof(int));
+    NextZ = (int*)calloc(TableSize, sizeof(int));
+    PrevY = (IntNode**)calloc(MaxY, sizeof(IntNode*));
+    YSequence = (char*)calloc(MaxY, sizeof(char));
+    ExonOffsets = (int*)calloc(ExonCount, sizeof(int));
+    ExonEdgeOffsets = (int*)calloc(ExonCount, sizeof(int));
+    YRowInfo = (char**)calloc(MaxY, sizeof(char*));
+    for (Y = 0; Y < MaxY; Y++)
+    {
+        YRowInfo[Y] = (char*)calloc(64, sizeof(char));
+    }
+    ////////////////////////////////////////////////////////////
+    // Initialize the linked lists giving predecessors at each point.
+    SortGeneExons(Gene);
+    //DebugPrintGene(Gene); 
+    FlattenExonsForAlignment(Gene, ExonOffsets, ExonEdgeOffsets, YSequence, PrevY, YRowInfo);
+    //////////////////////////////////////////////////////////////////////
+    // Debug print:
+    for (Y = 0; Y < MaxY; Y++)
+    {
+        printf("%d ", Y);
+        for (NearY = Y - 5; NearY < Y + 6; NearY++)
+        {
+            if (NearY >= 0 && NearY < MaxY)
+            {
+                printf("%c", YSequence[NearY]);
+            }
+        }
+        for (INode = PrevY[Y]; INode; INode = INode->Next)
+        {
+            printf(" ->%d (", INode->Value);
+            for (NearY = INode->Value - 3; NearY < INode->Value; NearY++)
+            {
+                if (NearY >= 0 && NearY < MaxY)
+                {
+                    printf("%c", YSequence[NearY]);
+                }
+            }
+            printf(" %c ", YSequence[INode->Value]);
+            for (NearY = INode->Value + 1; NearY < INode->Value + 4; NearY++)
+            {
+                if (NearY >= 0 && NearY < MaxY)
+                {
+                    printf("%c", YSequence[NearY]);
+                }
+            }
+            printf(")");
+        }
+        printf("\n");
+    }
+    ////////////////////////////////////////////////////////////
+    // Carry out dynamic programming:
+    XBlockSize = 3;
+    YBlockSize = XBlockSize * SequenceLength;
+    for (Y = 0; Y < MaxY; Y++)
+    {
+        ResidueB = YSequence[Y] - 'A';
+        if (ResidueB < 0 || ResidueB > 26)
+        {
+            ResidueB = 23; //'X';
+        }
+        for (X = 0; X < SequenceLength; X++)
+        {
+            ResidueA = Sequence[X] - 'A';
+            if (ResidueA < 0 || ResidueA > 26)
+            {
+                ResidueA = 23; //'X';
+            }
+            ////////////////////////////
+            // Z == 0, the alignment table:
+            TableIndex = Y*YBlockSize + X*XBlockSize + Z_STANDARD;
+            // Default: Jump in
+            BestScore = 0;
+            ScoreTable[TableIndex] = BestScore;
+            NextX[TableIndex] = -1;
+            NextY[TableIndex] = -1;
+            NextZ[TableIndex] = -1;
+            // Consider aligning:
+            AlignScore = ScoringMatrix[ResidueA * AA_COUNT + ResidueB];
+            // Aligning at the edges of the world is allowed:
+            if (!X || !PrevY[Y])
+            {
+                if (AlignScore > BestScore)
+                {
+                    ScoreTable[TableIndex] = AlignScore;
+                    BestScore = AlignScore;
+                }
+            }
+            else
+            {
+                // Consider each predecessor row:
+                for (INode = PrevY[Y]; INode; INode = INode->Next)
+                {
+                    PrevTableIndex = INode->Value * YBlockSize + (X-1)*XBlockSize + 0;
+                    Score = AlignScore + ScoreTable[PrevTableIndex];
+                    if (Score > BestScore)
+                    {
+                        BestScore = Score;
+                        ScoreTable[TableIndex] = BestScore;
+                        NextX[TableIndex] = X - 1;
+                        NextY[TableIndex] = INode->Value;
+                        NextZ[TableIndex] = 0;
+                    }
+                }
+            }
+            // Consider gapping in x:
+            if (X)
+            {
+                PrevTableIndex = Y * YBlockSize + (X-1) * XBlockSize + Z_GAP_IN_X;
+                Score = StartGapPenalty + ScoreTable[PrevTableIndex];
+                if (Score > BestScore)
+                {
+                    BestScore = Score;
+                    ScoreTable[TableIndex] = BestScore;
+                    NextX[TableIndex] = X - 1;
+                    NextY[TableIndex] = Y;
+                    NextZ[TableIndex] = Z_GAP_IN_X;
+                }
+            }
+            // Consider gapping in y:
+            for (INode = PrevY[Y]; INode; INode = INode->Next)
+            {
+                PrevTableIndex = INode->Value * YBlockSize + X * XBlockSize + Z_GAP_IN_Y;
+                Score = StartGapPenalty + ScoreTable[PrevTableIndex];
+                if (Score > BestScore)
+                {
+                    BestScore = Score;
+                    ScoreTable[TableIndex] = BestScore;
+                    NextX[TableIndex] = X;
+                    NextY[TableIndex] = INode->Value;
+                    NextZ[TableIndex] = Z_GAP_IN_Y;
+                }
+            }
+            //printf("At %d, %d, 0: Score %d, prev %d, %d, %d\n", X, Y, ScoreTable[TableIndex],
+            //    NextX[TableIndex], NextY[TableIndex], NextZ[TableIndex]);
+            ////////////////////////////
+            // Z=1, gapping in x:
+            // By default, close the gap...but also consider extending it (unless x == 0)
+            TableIndex = Y*YBlockSize + X*XBlockSize + Z_GAP_IN_X;
+            PrevTableIndex = Y*YBlockSize + X*XBlockSize + Z_STANDARD;
+            BestScore = ScoreTable[PrevTableIndex];
+            ScoreTable[TableIndex] = BestScore;
+            NextX[TableIndex] = X;
+            NextY[TableIndex] = Y;
+            NextZ[TableIndex] = Z_STANDARD;
+            if (X)
+            {
+                Score = ExtendGapPenalty + ScoreTable[Y*YBlockSize + (X-1)*XBlockSize + Z_GAP_IN_X];
+                if (Score > BestScore)
+                {
+                    ScoreTable[TableIndex] = BestScore;
+                    NextX[TableIndex] = X - 1;
+                    NextY[TableIndex] = Y;
+                    NextZ[TableIndex] = Z_GAP_IN_X;
+                }
+            }
+            ////////////////////////////
+            // Z=2, gapping in y:
+            // By default, close the gap...but also consider extending it
+            TableIndex = Y*YBlockSize + X*XBlockSize + Z_GAP_IN_Y;
+            PrevTableIndex = Y*YBlockSize + X*XBlockSize + Z_STANDARD;
+            BestScore = ScoreTable[PrevTableIndex];
+            ScoreTable[TableIndex] = BestScore;
+            NextX[TableIndex] = X;
+            NextY[TableIndex] = Y;
+            NextZ[TableIndex] = Z_STANDARD;
+            for (INode = PrevY[Y]; INode; INode = INode->Next)
+            {
+                Score = ExtendGapPenalty + ScoreTable[INode->Value*YBlockSize + X*XBlockSize + Z_GAP_IN_Y];
+                if (Score > BestScore)
+                {
+                    ScoreTable[TableIndex] = BestScore;
+                    NextX[TableIndex] = X;
+                    NextY[TableIndex] = INode->Value;
+                    NextZ[TableIndex] = Z_GAP_IN_Y;
+                }
+            }
+        }
+    }
+    ////////////////////////////////////////////////////////////
+    // Find where the best alignment ends:
+    BestScore = -9999;
+    for (X = 0; X < SequenceLength; X++)
+    {
+        for (Y = 0; Y < MaxY; Y++)
+        {
+            for (Z = 0; Z < 3; Z++)
+            {
+                Score = ScoreTable[Y*YBlockSize + X*XBlockSize + Z];
+                if (Score > BestScore)
+                {
+                    BestScore = Score;
+                    BestX = X;
+                    BestY = Y;
+                    BestZ = Z;
+                }
+            }
+        }
+    }
+    ////////////////////////////////////////////////////////////
+    // Produce strings for the optimal alignment:
+    X = BestX;
+    Y = BestY;
+    Z = BestZ;
+    AlignStringLength = 0;
+    while (X >= 0)
+    {
+        TableIndex = Y*YBlockSize + X*XBlockSize + Z;
+        // Each step we take will add to the string...except closing a gap.
+        if (!Z || NextZ[TableIndex])
+        {
+            AlignStringLength++;
+        }
+        X = NextX[TableIndex];
+        Y = NextY[TableIndex];
+        Z = NextZ[TableIndex];
+    }
+
+    AlignStringA = (char*)calloc(AlignStringLength + 1, sizeof(char));
+    AlignStringB = (char*)calloc(AlignStringLength + 1, sizeof(char));
+    AlignStringC = (char*)calloc(AlignStringLength + 1, sizeof(char));
+    X = BestX;
+    Y = BestY;
+    Z = BestZ;
+    while (X >= 0)
+    {
+        AlignStringLength--;
+        TableIndex = Y*YBlockSize + X*XBlockSize + Z;
+        switch (Z)
+        {
+        case Z_STANDARD:
+            switch (NextZ[TableIndex])
+            {
+            case Z_STANDARD:
+            default:
+                ResidueA = Sequence[X];
+                ResidueB = YSequence[Y];
+                AlignStringA[AlignStringLength] = Sequence[X];
+                AlignStringC[AlignStringLength] = YSequence[Y];
+                if (ResidueA == ResidueB)
+                {
+                    AlignStringB[AlignStringLength] = '*';
+                }
+                else
+                {
+                    AlignStringB[AlignStringLength] = ' ';
+                }
+                printf("X %d (%c) Y %d (%c) %s\n", X, ResidueA, Y, ResidueB, YRowInfo[Y]);
+                break;
+            case Z_GAP_IN_X:
+                AlignStringA[AlignStringLength] = Sequence[X];
+                AlignStringB[AlignStringLength] = ' ';
+                AlignStringC[AlignStringLength] = '-';
+                break;
+            case Z_GAP_IN_Y:
+                AlignStringA[AlignStringLength] = '-';
+                AlignStringB[AlignStringLength] = ' ';
+                AlignStringC[AlignStringLength] = YSequence[Y];
+                break;
+            }
+            break;
+        case Z_GAP_IN_X:
+            if (NextZ[TableIndex])
+            {
+                AlignStringA[AlignStringLength] = Sequence[X];
+                AlignStringB[AlignStringLength] = ' ';
+                AlignStringC[AlignStringLength] = '-';
+            }
+            break;            
+        case Z_GAP_IN_Y:
+            if (NextZ[TableIndex])
+            {
+                AlignStringA[AlignStringLength] = '-';
+                AlignStringB[AlignStringLength] = ' ';
+                AlignStringC[AlignStringLength] = YSequence[Y];
+            }
+            break;
+        }
+        
+        // Each step we take will add to the string...except closing a gap.
+        if (Z && !NextZ[TableIndex])
+        {
+            AlignStringLength++;
+        }
+        X = NextX[TableIndex];
+        Y = NextY[TableIndex];
+        Z = NextZ[TableIndex];
+    }
+    printf("Alignment score %d.  Alignment follows:\n", BestScore);
+    printf("%s\n", AlignStringA);
+    printf("%s\n", AlignStringB);
+    printf("%s\n", AlignStringC);
+
+    ////////////////////////////////////////////////////////////
+    // cleanup:
+    SafeFree(ScoringMatrix);
+    SafeFree(ScoreTable);
+    SafeFree(ExonOffsets);
+    SafeFree(NextX);
+    SafeFree(NextY);
+    SafeFree(NextZ);
+    SafeFree(YSequence);
+    SafeFree(ExonEdgeOffsets);
+    SafeFree(AlignStringA);
+    SafeFree(AlignStringB);
+    SafeFree(AlignStringC);
+    FreePrevCellTable(PrevY, MaxY);
+    for (Y = 0; Y < MaxY; Y++)
+    {
+        SafeFree(YRowInfo[Y]);
+    }
+    SafeFree(YRowInfo);
+
+    return BestScore;
+}
+
+// Free an array of linked-lists providing predecessor cells
+void FreePrevCellTable(IntNode** PrevCell, int Size)
+{
+    int Index;
+    IntNode* Node;
+    IntNode* Prev;
+    if (!PrevCell)
+    {
+        return;
+    }
+    for (Index = 0; Index < Size; Index++)
+    {
+        Prev = NULL;
+        for (Node = PrevCell[Index]; Node; Node = Node->Next)
+        {
+            SafeFree(Prev);
+            Prev = Node;
+        }
+        SafeFree(Prev);
+    }
+    SafeFree(PrevCell);
+}
+
+
+// Align an exon graph against another exon graph.
+int AlignExonGraphAgainstExonGraph(GeneStruct* GeneA, GeneStruct* GeneB,
+    char* ScoringMatrixFileName, int StartGapPenalty, int ExtendGapPenalty)
+{
+    int MaxX;
+    int MaxY;
+    int LinkCountA;
+    int LinkCountB;
+    int* ScoringMatrix = NULL;
+    int ExonCountA;
+    int ExonCountB;
+    int* ScoreTable;
+    int* NextX;
+    int* NextY;
+    int* NextZ;
+    IntNode** PrevX;
+    IntNode** PrevY;
+    char* XSequence;
+    char* YSequence;
+    int* ExonOffsetsA;
+    int* ExonOffsetsB;
+    int* ExonEdgeOffsetsA;
+    int* ExonEdgeOffsetsB;
+    int XBlockSize;
+    int YBlockSize;
+    IntNode* PrevNodeX;
+    IntNode* PrevNodeY;
+    int TableSize;
+    int X;
+    int Y;
+    int Z;
+    int BestX = 0;
+    int BestY = 0;
+    int BestZ = 0;
+    char ResidueA;
+    char ResidueB;
+    int Score;
+    int AlignScore;
+    int BestScore;
+    char* AlignStringA;
+    char* AlignStringB;
+    char* AlignStringC;
+    int TableIndex;
+    int PrevTableIndex;
+    int AlignStringLength;
+    char** RowInfoA;
+    char** RowInfoB;
+    // Ensure gap penalties are NEGATIVE numbers.  Negative is bad.
+    if (StartGapPenalty > 0)
+    {
+        StartGapPenalty = -StartGapPenalty;
+    }
+    if (ExtendGapPenalty > 0)
+    {
+        ExtendGapPenalty = -ExtendGapPenalty;
+    }
+
+    // Load the scoring matrix (or use default hamming distance)
+
+    if (ScoringMatrixFileName)
+    {
+        ScoringMatrix = LoadScoringMatrix(ScoringMatrixFileName);
+    }
+    if (!ScoringMatrix)
+    {
+        ScoringMatrix = GenerateHammingDistanceMatrix();
+    }
+    printf("\n\nGene A:\n");
+    //DebugPrintGene(GeneA); 
+    printf("\n\nGene B:\n");
+    //DebugPrintGene(GeneB); 
+
+    ////////////////////////////////////////////////////////////
+    // Count the exons and edges (with aa):
+    ExonCountA = GeneA->ExonCount;
+    ExonCountB = GeneB->ExonCount;
+    
+    GetExonGraphSize(GeneA, &LinkCountA, &MaxX);
+    GetExonGraphSize(GeneB, &LinkCountB, &MaxY);
+    ////////////////////////////////////////////////////////////
+    // Allocate arrays:
+    TableSize = MaxY * MaxX * 3;
+    ScoreTable = (int*)calloc(TableSize, sizeof(int));
+    NextX = (int*)calloc(TableSize, sizeof(int));
+    NextY = (int*)calloc(TableSize, sizeof(int));
+    NextZ = (int*)calloc(TableSize, sizeof(int));
+    XSequence = (char*)calloc(MaxX + 1, sizeof(char));
+    PrevX = (IntNode**)calloc(MaxX, sizeof(IntNode*));
+    YSequence = (char*)calloc(MaxY + 1, sizeof(char));
+    PrevY = (IntNode**)calloc(MaxY, sizeof(IntNode*));
+    ExonOffsetsA = (int*)calloc(ExonCountA, sizeof(int));
+    ExonEdgeOffsetsA = (int*)calloc(ExonCountA, sizeof(int));
+    ExonOffsetsB = (int*)calloc(ExonCountB, sizeof(int));
+    ExonEdgeOffsetsB = (int*)calloc(ExonCountB, sizeof(int));
+    RowInfoA = (char**)calloc(MaxX, sizeof(char*));
+    RowInfoB = (char**)calloc(MaxY, sizeof(char*));
+    for (X = 0; X < MaxX; X++)
+    {
+        RowInfoA[X] = (char*)calloc(64, sizeof(char));
+    }
+    for (Y = 0; Y < MaxY; Y++)
+    {
+        RowInfoB[Y] = (char*)calloc(64, sizeof(char));
+    }
+
+    ////////////////////////////////////////////////////////////
+    // Initialize the linked lists giving predecessors at each point.
+    SortGeneExons(GeneA);
+    SortGeneExons(GeneB);
+    FlattenExonsForAlignment(GeneA, ExonOffsetsA, ExonEdgeOffsetsA, XSequence, PrevX, RowInfoA);
+    FlattenExonsForAlignment(GeneB, ExonOffsetsB, ExonEdgeOffsetsB, YSequence, PrevY, RowInfoB);
+    ////////////////////////////////////////////////////////////
+    // Carry out dynamic programming:
+    XBlockSize = 3;
+    YBlockSize = XBlockSize * MaxX;
+    for (Y = 0; Y < MaxY; Y++)
+    {
+        ResidueB = YSequence[Y] - 'A';
+        if (ResidueB < 0 || ResidueB > 26)
+        {
+            ResidueB = 23; //'X';
+        }
+        for (X = 0; X < MaxX; X++)
+        {
+            ResidueA = XSequence[X] - 'A';
+            if (ResidueA < 0 || ResidueA > 26)
+            {
+                ResidueA = 23; //'X';
+            }
+            ////////////////////////////
+            // Z == 0, the alignment table:
+            TableIndex = Y*YBlockSize + X*XBlockSize + Z_STANDARD;
+            // Default: Jump in
+            BestScore = 0;
+            ScoreTable[TableIndex] = BestScore;
+            NextX[TableIndex] = -1;
+            NextY[TableIndex] = -1;
+            NextZ[TableIndex] = -1;
+            // Consider aligning:
+            AlignScore = ScoringMatrix[ResidueA * AA_COUNT + ResidueB];
+            // Aligning at the edges of the world is allowed:
+            if (!PrevX[X] || !PrevY[Y])
+            {
+                if (AlignScore > BestScore)
+                {
+                    ScoreTable[TableIndex] = AlignScore;
+                    BestScore = AlignScore;
+                }
+            }
+            else
+            {
+                // Consider each predecessor cell (x, y):
+                for (PrevNodeX = PrevX[X]; PrevNodeX; PrevNodeX = PrevNodeX->Next)
+                {
+                    for (PrevNodeY = PrevY[Y]; PrevNodeY; PrevNodeY = PrevNodeY->Next)
+                    {
+                        PrevTableIndex = PrevNodeY->Value * YBlockSize + PrevNodeX->Value * XBlockSize + 0;
+                        Score = AlignScore + ScoreTable[PrevTableIndex];
+                        if (Score > BestScore)
+                        {
+                            BestScore = Score;
+                            ScoreTable[TableIndex] = BestScore;
+                            NextX[TableIndex] = PrevNodeX->Value;
+                            NextY[TableIndex] = PrevNodeY->Value;
+                            NextZ[TableIndex] = 0;
+                        }
+                    }
+                }
+            }
+            // Consider gapping in x:
+            for (PrevNodeX = PrevX[X]; PrevNodeX; PrevNodeX = PrevNodeX->Next)
+            {
+                PrevTableIndex = Y * YBlockSize + PrevNodeX->Value * XBlockSize + Z_GAP_IN_X;
+                Score = StartGapPenalty + ScoreTable[PrevTableIndex];
+                if (Score > BestScore)
+                {
+                    BestScore = Score;
+                    ScoreTable[TableIndex] = BestScore;
+                    NextX[TableIndex] = PrevNodeX->Value;
+                    NextY[TableIndex] = Y;
+                    NextZ[TableIndex] = Z_GAP_IN_X;
+                }
+            }
+            // Consider gapping in y:
+            for (PrevNodeY = PrevY[Y]; PrevNodeY; PrevNodeY = PrevNodeY->Next)
+            {
+                PrevTableIndex = PrevNodeY->Value * YBlockSize + X * XBlockSize + Z_GAP_IN_Y;
+                Score = StartGapPenalty + ScoreTable[PrevTableIndex];
+                if (Score > BestScore)
+                {
+                    BestScore = Score;
+                    ScoreTable[TableIndex] = BestScore;
+                    NextX[TableIndex] = X;
+                    NextY[TableIndex] = PrevNodeY->Value;
+                    NextZ[TableIndex] = Z_GAP_IN_Y;
+                }
+            }
+            //printf("At %d, %d, 0: Score %d, prev %d, %d, %d\n", X, Y, ScoreTable[TableIndex],
+            //    NextX[TableIndex], NextY[TableIndex], NextZ[TableIndex]);
+            ////////////////////////////
+            // Z=1, gapping in x:
+            // By default, close the gap...but also consider extending it (unless x == 0)
+            TableIndex = Y*YBlockSize + X*XBlockSize + Z_GAP_IN_X;
+            PrevTableIndex = Y*YBlockSize + X*XBlockSize + Z_STANDARD;
+            BestScore = ScoreTable[PrevTableIndex];
+            ScoreTable[TableIndex] = BestScore;
+            NextX[TableIndex] = X;
+            NextY[TableIndex] = Y;
+            NextZ[TableIndex] = Z_STANDARD;
+            for (PrevNodeX = PrevX[X]; PrevNodeX; PrevNodeX = PrevNodeX->Next)
+            {
+                Score = ExtendGapPenalty + ScoreTable[Y*YBlockSize + PrevNodeX->Value * XBlockSize + Z_GAP_IN_X];
+                if (Score > BestScore)
+                {
+                    ScoreTable[TableIndex] = BestScore;
+                    NextX[TableIndex] = PrevNodeX->Value;
+                    NextY[TableIndex] = Y;
+                    NextZ[TableIndex] = Z_GAP_IN_X;
+                }
+            }
+            ////////////////////////////
+            // Z=2, gapping in y:
+            // By default, close the gap...but also consider extending it
+            TableIndex = Y*YBlockSize + X*XBlockSize + Z_GAP_IN_Y;
+            PrevTableIndex = Y*YBlockSize + X*XBlockSize + Z_STANDARD;
+            BestScore = ScoreTable[PrevTableIndex];
+            ScoreTable[TableIndex] = BestScore;
+            NextX[TableIndex] = X;
+            NextY[TableIndex] = Y;
+            NextZ[TableIndex] = Z_STANDARD;
+            for (PrevNodeY = PrevY[Y]; PrevNodeY; PrevNodeY = PrevNodeY->Next)
+            {
+                Score = ExtendGapPenalty + ScoreTable[PrevNodeY->Value*YBlockSize + X*XBlockSize + Z_GAP_IN_Y];
+                if (Score > BestScore)
+                {
+                    ScoreTable[TableIndex] = BestScore;
+                    NextX[TableIndex] = X;
+                    NextY[TableIndex] = PrevNodeY->Value;
+                    NextZ[TableIndex] = Z_GAP_IN_Y;
+                }
+            }
+        }
+    }
+    ////////////////////////////////////////////////////////////
+    // Find where the best alignment ends:
+    BestScore = -9999;
+    for (X = 0; X < MaxX; X++)
+    {
+        for (Y = 0; Y < MaxY; Y++)
+        {
+            for (Z = 0; Z < 3; Z++)
+            {
+                Score = ScoreTable[Y*YBlockSize + X*XBlockSize + Z];
+                if (Score > BestScore)
+                {
+                    BestScore = Score;
+                    BestX = X;
+                    BestY = Y;
+                    BestZ = Z;
+                }
+            }
+        }
+    }
+    ////////////////////////////////////////////////////////////
+    // Produce strings for the optimal alignment:
+    X = BestX;
+    Y = BestY;
+    Z = BestZ;
+    AlignStringLength = 0;
+    while (X >= 0)
+    {
+        TableIndex = Y*YBlockSize + X*XBlockSize + Z;
+        // Each step we take will add to the string...except closing a gap.
+        if (!Z || NextZ[TableIndex])
+        {
+            AlignStringLength++;
+        }
+        X = NextX[TableIndex];
+        Y = NextY[TableIndex];
+        Z = NextZ[TableIndex];
+    }
+
+    AlignStringA = (char*)calloc(AlignStringLength + 1, sizeof(char));
+    AlignStringB = (char*)calloc(AlignStringLength + 1, sizeof(char));
+    AlignStringC = (char*)calloc(AlignStringLength + 1, sizeof(char));
+    X = BestX;
+    Y = BestY;
+    Z = BestZ;
+    while (X >= 0)
+    {
+        AlignStringLength--;
+        TableIndex = Y*YBlockSize + X*XBlockSize + Z;
+        switch (Z)
+        {
+        case Z_STANDARD:
+            switch (NextZ[TableIndex])
+            {
+            case Z_STANDARD:
+            default:
+                ResidueA = XSequence[X];
+                ResidueB = YSequence[Y];
+                AlignStringA[AlignStringLength] = ResidueA;
+                AlignStringC[AlignStringLength] = ResidueB;
+                if (ResidueA == ResidueB)
+                {
+                    AlignStringB[AlignStringLength] = '*';
+                }
+                else
+                {
+                    AlignStringB[AlignStringLength] = ' ';
+                }
+                break;
+            case Z_GAP_IN_X:
+                AlignStringA[AlignStringLength] = XSequence[X];
+                AlignStringB[AlignStringLength] = ' ';
+                AlignStringC[AlignStringLength] = '-';
+                break;
+            case Z_GAP_IN_Y:
+                AlignStringA[AlignStringLength] = '-';
+                AlignStringB[AlignStringLength] = ' ';
+                AlignStringC[AlignStringLength] = YSequence[Y];
+                break;
+            }
+            break;
+        case Z_GAP_IN_X:
+            if (NextZ[TableIndex])
+            {
+                AlignStringA[AlignStringLength] = XSequence[X];
+                AlignStringB[AlignStringLength] = ' ';
+                AlignStringC[AlignStringLength] = '-';
+            }
+            break;            
+        case Z_GAP_IN_Y:
+            if (NextZ[TableIndex])
+            {
+                AlignStringA[AlignStringLength] = '-';
+                AlignStringB[AlignStringLength] = ' ';
+                AlignStringC[AlignStringLength] = YSequence[Y];
+            }
+            break;
+        }
+        
+        // Each step we take will add to the string...except closing a gap.
+        if (Z && !NextZ[TableIndex])
+        {
+            AlignStringLength++;
+        }
+        X = NextX[TableIndex];
+        Y = NextY[TableIndex];
+        Z = NextZ[TableIndex];
+    }
+    printf("Alignment score %d.  Alignment follows:\n", BestScore);
+    printf("%s\n", AlignStringA);
+    printf("%s\n", AlignStringB);
+    printf("%s\n", AlignStringC);
+    
+    ////////////////////////////////////////////////////////////
+    // cleanup:
+    SafeFree(ScoringMatrix);
+    SafeFree(ScoreTable);
+    SafeFree(ExonOffsetsA);
+    SafeFree(ExonOffsetsB);
+    SafeFree(NextX);
+    SafeFree(NextY);
+    SafeFree(NextZ);
+    SafeFree(YSequence);
+    SafeFree(XSequence);
+    SafeFree(ExonEdgeOffsetsA);
+    SafeFree(ExonEdgeOffsetsB);
+    if (AlignStringA)
+    {
+        SafeFree(AlignStringA);
+        SafeFree(AlignStringB);
+        SafeFree(AlignStringC);
+    }
+    FreePrevCellTable(PrevY, MaxY);
+    FreePrevCellTable(PrevX, MaxX);
+    for (Y = 0; Y < MaxY; Y++)
+    {
+        SafeFree(RowInfoB[Y]);
+    }
+    SafeFree(RowInfoB);
+    for (X = 0; X < MaxX; X++)
+    {
+        SafeFree(RowInfoA[X]);
+    }
+    SafeFree(RowInfoA);
+    return BestScore;
+}
+
+
diff --git a/ExonGraphAlign.h b/ExonGraphAlign.h
new file mode 100644
index 0000000..1143825
--- /dev/null
+++ b/ExonGraphAlign.h
@@ -0,0 +1,40 @@
+//Title:          ExonGraphAlign.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#ifndef EXON_GRAPH_ALIGN_H
+#define EXON_GRAPH_ALIGN_H
+
+
+
+int AlignSequenceAgainstExonGraph(GeneStruct* Gene, char* Sequence, 
+    char* ScoringMatrixFileName, int StartGapPenalty, int ExtendGapPenalty);
+#endif // EXON_GRAPH_ALIGN_H
+
diff --git a/ExplainPTMs.py b/ExplainPTMs.py
new file mode 100644
index 0000000..bf2714e
--- /dev/null
+++ b/ExplainPTMs.py
@@ -0,0 +1,148 @@
+#Title:          ExplainPTMs.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Once an unrestrictive PTM search has completed, attempt to suggest
+possible (bio)chemical explanations for the modifications seen.
+"""
+import Global
+from Utils import *
+Initialize()
+
+# Mass delta (in daltons) -> list of PTMs
+AllPTMsByMass = {}
+InitializeFlag = 0
+
+def FixQuotedString(String):
+    if String and String[0] == '"' and String[-1] == '"':
+        return String[1:-1]
+    return String
+
+class PTMClass:
+    def __init__(self, SourceDB, DBID, Name, Residues, Mass):
+        self.SourceDB = SourceDB
+        self.DBID = DBID
+        self.Name = FixQuotedString(Name)
+        self.Residues = Residues
+        if not self.Residues:
+            self.Residues = None # specific to a terminus, not a residue.
+        self.Mass = Mass
+        self.Terminus = "" # valid values: "C", "N", ""
+    def __str__(self):
+        return self.Name
+    def GetURL(self):
+        if self.SourceDB.lower() == "unimod":
+            return "http://www.unimod.org/cgi/unimod.cgi?record_id=%s&display_details_view.x=7&display_details_view.y=5&display_details_view=on"%self.DBID
+        else:
+            return None
+    def GetNameWithLink(self):
+        URL = self.GetURL()
+        if URL:
+            return "<a href=\"%s\">%s</a>"%(URL, self.Name)
+        else:
+            return self.Name
+        
+def LoadPTMDatabase():
+    global InitializeFlag
+    global AllPTMsByMass
+    if InitializeFlag:
+        return
+    InitializeFlag = 1
+    File = open("PTMDatabase.txt", "rb")
+    for FileLine in File.xreadlines():
+        Bits = FileLine.split("\t")
+        if FileLine[0] == "#" or len(Bits) < 5:
+            continue
+        try:
+            Mass = int(round(float(Bits[2])))
+        except:
+            # No valid mass? Probably a blank line.
+            continue
+        PTM = PTMClass(Bits[0], Bits[1], Bits[3], Bits[4], Mass)
+        if len(Bits) > 5 and Bits[5]:
+            Terminus = Bits[5][0]
+            if Terminus in ("C", "N"):
+                PTM.Terminus = Terminus
+        if not AllPTMsByMass.has_key(Mass):
+            AllPTMsByMass[Mass] = []
+        AllPTMsByMass[Mass].append(PTM)
+    File.close()
+
+def GetExplanation(AA, Mass, Terminus, BasePTM = 0):
+    """
+    Look for a known PTM that matches this residue, delta mass, and terminus.
+    If we don't find any such PTM, then look for a point mutation matching the
+    mass shift.  The output of this function is an initial hypothesis, and requires
+    verification.
+    """
+    AllResidues = "ACDEFGHIKLMNPQRSTVWY"
+    LoadPTMDatabase()
+    Explanations = []
+    # If there's a base modification applied to this residue, then we should
+    # handle that case specially.  Example: On cysteine, "-57" is a missing protecting
+    # group, and "-43" is a methylation!
+    if BasePTM:
+        if Mass == -BasePTM:
+            PTM = PTMClass("ProtectingGroup", None, "Missing %+d fixed mod"%BasePTM, AA, Mass)
+            Explanations.append(PTM)
+            return Explanations
+        Mass = Mass + BasePTM
+    PTMList = AllPTMsByMass.get(Mass, [])
+    for PTM in PTMList:
+        PTMOK = 0
+        if PTM.Residues == None:
+            if Terminus == PTM.Terminus:
+                #Explanations.append(PTM)
+                PTMOK = 1
+        elif (AA != None) and (AA in PTM.Residues):
+            if Terminus == PTM.Terminus or PTM.Terminus == "":
+                PTMOK = 1
+                #Explanations.append(PTM)
+        if not PTMOK:
+            continue
+        # Don't add multiple explanations with the same name!  (There's some redundancy)
+        for OldExplanation in Explanations:
+            if OldExplanation.Name == PTM.Name:
+                PTMOK = 0
+                break
+        if PTMOK:
+            Explanations.append(PTM)
+    # Perhaps we can explain it with a mutation:
+    if AA != None:
+        for OtherAA in AllResidues:
+            Delta = Global.AminoMass[OtherAA] - Global.AminoMass[AA]
+            if abs(Delta - Mass) < 1.0:
+                PTM = PTMClass("Mutation", None, "Mutation from %s to %s"%(AA, OtherAA), AA, Delta)
+                Explanations.append(PTM)
+    return Explanations
+            
diff --git a/FDRUtils.py b/FDRUtils.py
new file mode 100644
index 0000000..722a132
--- /dev/null
+++ b/FDRUtils.py
@@ -0,0 +1,1109 @@
+#Title:          FDRUtils.py (formerly PValue.py)
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+This script, based on PeptideProphet, computes the estimated probability that
+a match is correct.  This probability is derived upon an F-Score.  The F-Score
+for a match is a weighted sum of the length-corrected MQScore and the delta score.
+
+We fit the distribution of F-scores as a mixture of two distributions:
+A GAMMA DISTRIBUTION for false matches (with lower mean)
+A NORMAL DISTRIBUTION for true matches (with higher mean)
+Therefore, the probability that a match with a given F-Score is correct depends
+upon the overall distribution of F-Scores for the rest of the run.
+
+============================================================================
+P-values for searches with shuffled-database:
+
+Given a score cutoff, let the number of valid-protein hits above the
+cutoff be V, and let the number of invalid-protein hits above the
+cutoff be I.
+
+# PVALUE WITH REMOVAL:
+Throw out the I hits from invalid proteins.
+Let TDB and FDB be the true and false database fractions.  (Note: For
+a 1:1 database, TDB and FDB are both 0.5, so TDB/FDB equals 1.0)  Even
+after throwing out hits from invalid proteins, there are still some
+chance hits to true proteins.  The estimated number of hits from V
+that are actually false is equal to: I*(TDB/FDB)
+
+This, the odds that a match above this score cutoff is correct: 
+(V - I*(TDB/FDB)) / V
+
+This formulation of pvalue is the one normally used (e.g. in an
+unmodified search).  Normally, we have no reason to keep any matches
+to shuffled proteins; we only generate them in the first place so that
+we can count them.
+
+# PVALUE WITHOUT REMOVAL (-H command-line option):
+Retain all hits.  As above, the number of
+hits from V that are false is I*(TDB/FDB).  Thus, the odds that a
+match above the score cutoff is correct:
+(V - I*(TDB/FDB)) / (I+V)
+
+"""
+import os
+import sys
+import random
+import math
+import getopt
+import traceback
+import struct
+import ResultsParser
+import SelectProteins
+import Learning
+from Utils import *
+Initialize()
+
+try:
+    from PIL import Image
+    from PIL import ImageDraw
+    from PIL import ImageFont
+    # Fonts don't seem to work on Linux.  (Tried pdf, pcf, and pil formats...but no luck)
+    # So, we'll content ourselves with a default font if we must:
+    try:
+        TheFont = ImageFont.truetype("Times.ttf", 12)
+    except:
+        TheFont = ImageFont.load_default()
+except:
+    print "(PIL not installed - image generation not available)"
+    Image = None
+
+class Colors:
+    White = (255, 255, 255)
+    Grey = (155, 155, 155)
+    Background = (255, 255, 255)
+    Black = (0, 0, 0)
+    Green = (0, 155, 0)
+    Red = (155, 0, 0)
+    Blue = (0, 0, 155)
+
+class Defaults:
+    "Default F-score distribution; a starting point for E/M model fitting."
+    MeanTrue = 4.48
+    VarianceTrue = 1.50
+    MeanFalse = 0.19
+    VarianceFalse = 0.17
+    PriorProbabilityTrue = 0.25
+    GammaOffset = 0.3
+    MQScoreWeight = 0.3
+    DeltaScoreWeight = 1.5
+    ###########################
+    BlindMeanTrue = 5.0
+    BlindVarianceTrue = 11.8
+    BlindMeanFalse = -0.8
+    BlindVarianceFalse = 0.7
+    BlindPriorProbabilityTrue = 0.18
+    BlindGammaOffset = 6.0
+    BlindMQScoreWeight = 0.3
+    BlindDeltaScoreWeight = 1.5
+
+BLIND_MOD_PENALTY = 1.0
+MIN_MQSCORE = -10.0
+
+# Parse the scores from at most this many output files.  
+MAX_RESULTS_FILES_TO_PARSE = 100
+
+BIN_MULTIPLIER = 10.0
+SQRT2PI = math.sqrt(2 * math.pi)
+
+Cof = [76.18009172947146, -86.50532032941677,
+    24.01409824083091, -1.231739572450155, 
+    0.1208650973866179e-2, -0.5395239384952e-5]
+
+def Gamma(Z):
+    X = Z
+    Y = Z
+    Temp = X + 5.5
+    Temp -= (X + 0.5) * math.log(Temp)
+    Ser = 1.000000000190015
+    for J in range(6):
+        Y += 1
+        Ser += Cof[J] / Y
+    Z = -Temp + math.log(2.5066282746310005 * Ser / X)
+    return math.exp(Z)
+
+class Bag:
+    pass
+
+class PValueParser(ResultsParser.ResultsParser):
+    def __init__(self):
+        self.RetainBadMatches = 0
+        self.LoadDistributionPath = None
+        self.ScoreHistogram2 = {}
+        self.ScoreHistogram3 = {}
+        self.ShuffledScoreHistogram2 = {}
+        self.ShuffledScoreHistogram3 = {}
+        self.MinimumPeptideLength = 7
+        self.VerboseFlag = 0
+        self.GenerateImageFlag = 0
+        self.MQScoreWeight = Defaults.MQScoreWeight
+        self.DeltaScoreWeight = Defaults.DeltaScoreWeight
+        self.GammaOffset = Defaults.GammaOffset
+        self.BlindFlag = 0
+        self.PValueCutoff = 0.1 # default
+        # aminos -> location list
+        self.PeptideDict = {}
+        self.MaxDeltaScoreGap = -3.5
+        self.DBPath = []
+        self.PerformProteinSelection = 0
+        self.ProteinPicker = None
+        self.WriteTopMatchOnly = 0
+        self.ShuffledDatabaseFraction = None
+        self.RemoveShuffledMatches = 1
+        # Overwrite existing files in -w target:
+        self.OverwriteNewScoresFlag = 1
+        self.ClusterInfoPath = None
+        self.Columns = ResultsParser.Columns()
+        ResultsParser.ResultsParser.__init__(self)
+    def ReadDeltaScoreDistribution(self, FilePath):
+        """
+        Read delta-scores from a file, to compute the average delta-score.
+        If passed a directory, iterate over all results files in the directory.
+        """
+        #
+        self.AllSpectrumCount2 = 0
+        self.AllSpectrumCount3 = 0
+        self.MeanDeltaScore2 = 0
+        self.MeanDeltaScore3 = 0
+        self.ProcessResultsFiles(FilePath, self.ReadDeltaScoreDistributionFromFile, MAX_RESULTS_FILES_TO_PARSE)
+        self.MeanDeltaScore2 /= max(1, self.AllSpectrumCount2)
+        self.MeanDeltaScore3 /= max(1, self.AllSpectrumCount3)
+        if self.VerboseFlag:
+            print "Mean delta score ch1..2: %s over %s spectra"%(self.MeanDeltaScore2, self.AllSpectrumCount2)
+            print "Mean delta score ch3: %s over %s spectra"%(self.MeanDeltaScore3, self.AllSpectrumCount3)
+        if not self.MeanDeltaScore2:
+            self.MeanDeltaScore2 = 0.001
+        if not self.MeanDeltaScore3:
+            self.MeanDeltaScore3 = 0.001
+    def ReadDeltaScoreDistributionFromFile(self, FilePath):
+        "Read delta-scores from a single file, to compute the average delta-score."
+        print "Read delta-score distribution from %s..."%FilePath
+        try:
+            File = open(FilePath, "rb")
+        except:
+            traceback.print_exc()
+            return
+        OldSpectrum = None
+        for FileLine in File.xreadlines():
+            # Skip header lines and blank lines
+            if FileLine[0] == "#":
+                self.Columns.initializeHeaders(FileLine)
+                continue
+            if not FileLine.strip():
+                continue
+            Bits = list(FileLine.split("\t"))
+            if len(Bits) <= self.Columns.getIndex("DeltaScore"):
+                continue
+            try:
+                Charge = int(Bits[self.Columns.getIndex("Charge")])
+                MQScore = float(Bits[self.Columns.getIndex("MQScore")])
+                DeltaScore = float(Bits[self.Columns.getIndex("DeltaScoreOther")])
+                Peptide = GetPeptideFromModdedName(Bits[self.Columns.getIndex("Annotation")])
+                Spectrum = (os.path.basename(Bits[self.Columns.getIndex("SpectrumFile")]), Bits[self.Columns.getIndex("Scan#")])
+            except:
+                traceback.print_exc()
+                print Bits
+                continue # header line
+            if Spectrum == OldSpectrum:
+                continue
+            
+            OldSpectrum = Spectrum
+            
+            Length = len(Peptide.Aminos)
+            if Length < self.MinimumPeptideLength:
+                continue
+            if DeltaScore < 0:
+                print "## Warning: DeltaScore < 0!", Spectrum, FilePath
+                print DeltaScore
+                print MQScore
+                print Bits
+                raw_input()
+                continue
+            if Charge < 3:
+                self.AllSpectrumCount2 += 1
+                self.MeanDeltaScore2 += DeltaScore
+                
+            else:
+                self.AllSpectrumCount3 += 1
+                self.MeanDeltaScore3 += DeltaScore
+        File.close()            
+    def ReadScoreDistributionFromFile(self, FilePath):
+        """
+        Read F-scores from a single file, to compute the score histogram.
+        """
+        print "Read score distribution from %s..."%FilePath
+        try:
+            File = open(FilePath, "rb")
+        except:
+            traceback.print_exc()
+            return
+        OldSpectrum = None
+        for FileLine in File.xreadlines():
+            # Skip header lines and blank lines
+            if FileLine[0] == "#":
+                self.Columns.initializeHeaders(FileLine)
+                continue
+            if not FileLine.strip():
+                continue            
+            Bits = list(FileLine.split("\t"))
+            try:
+                Charge = int(Bits[self.Columns.getIndex("Charge")])
+                MQScore = float(Bits[self.Columns.getIndex("MQScore")])
+                DeltaScore = float(Bits[self.Columns.getIndex("DeltaScore")])
+                Peptide = GetPeptideFromModdedName(Bits[self.Columns.getIndex("Annotation")])
+                Protein = Bits[self.Columns.getIndex("ProteinName")]
+                Spectrum = (Bits[self.Columns.getIndex("SpectrumFile")], Bits[self.Columns.getIndex("Scan#")])
+            except:
+                continue # header line
+            if Spectrum == OldSpectrum:
+                continue
+            OldSpectrum = Spectrum
+            Length = len(Peptide.Aminos)
+            if Length < self.MinimumPeptideLength:
+                continue
+            if (Charge < 3):
+                MeanDeltaScore = self.MeanDeltaScore2
+            else:
+                MeanDeltaScore = self.MeanDeltaScore3
+            WeightedScore = self.MQScoreWeight * MQScore + self.DeltaScoreWeight * (DeltaScore / MeanDeltaScore)
+            ScoreBin = int(round(WeightedScore * BIN_MULTIPLIER))
+            Hit = 1
+            if self.ClusterInfoPath:
+                # Get this cluster's size:
+                ClusterFileName = Bits[0].replace("/","\\").split("\\")[-1]
+                ScanNumber = int(Bits[1])
+                ClusterSize = self.ClusterSizes.get((ClusterFileName, ScanNumber), None)
+                if not ClusterSize:
+                    print "* Warning: ClusterSize not known for %s, %s"%(ClusterFileName, ScanNumber)
+                else:
+                    Hit = ClusterSize
+            if Charge < 3:
+                self.ScoreHistogram2[ScoreBin] = self.ScoreHistogram2.get(ScoreBin, 0) + Hit
+            else:
+                self.ScoreHistogram3[ScoreBin] = self.ScoreHistogram3.get(ScoreBin, 0) + Hit
+            if self.ShuffledDatabaseFraction:
+                if Protein[:3] == "XXX":
+                    if Charge < 3:
+                        self.ShuffledScoreHistogram2[ScoreBin] = self.ShuffledScoreHistogram2.get(ScoreBin, 0) + Hit
+                    else:
+                        self.ShuffledScoreHistogram3[ScoreBin] = self.ShuffledScoreHistogram3.get(ScoreBin, 0) + Hit
+        File.close()
+    def ProduceScoreDistributionImage(self, ImagePath, Charge3Flag = 0):
+        """
+        Write out, to the specified path, an image with f-score on the X-axis
+        and p-value on the Y-axis.  If we fit a mixture model, plot the true
+        and false (model) distributions; if we fit using shuffled proteins,
+        plot the empirical distributions.
+        """
+        if Image == None:
+            return
+        
+        if Charge3Flag:
+            ScoreHistogram = self.ScoreHistogram3
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram3
+        else:
+            ScoreHistogram = self.ScoreHistogram2
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram2
+        # Image size:
+        self.Width = 900
+        self.Height = 500
+        self.LeftPadding = 50
+        self.RightPadding = 80
+        self.BottomPadding = 40
+        self.TopPadding = 10
+        self.PlotWidth = self.Width - (self.LeftPadding + self.RightPadding)
+        self.PlotHeight = self.Height - (self.TopPadding + self.BottomPadding)
+        self.PlotImage = Image.new("RGB", (self.Width, self.Height), Colors.Background)
+        self.Draw = ImageDraw.Draw(self.PlotImage)
+        # Find largest and smallest bins and entries:
+        self.MaxScoreHistogramEntry = 10
+        self.MaxScoreHistogramEntryValid = 10
+        self.MaxScoreHistogramEntryInvalid = 10
+        self.MaxScoreBin = -9999
+        self.MinScoreBin = 9999
+        self.TotalHistogramEntries = 0
+        for (Bin, Entry) in ScoreHistogram.items():
+            self.MaxScoreHistogramEntry = max(Entry, self.MaxScoreHistogramEntry)
+            InvalidCount = ShuffledScoreHistogram.get(Bin, 0)
+            ValidCount = max(0, Entry - InvalidCount)
+            self.MaxScoreHistogramEntryValid = max(self.MaxScoreHistogramEntryValid, ValidCount)
+            self.MaxScoreHistogramEntryInvalid = max(self.MaxScoreHistogramEntryInvalid, InvalidCount)
+            self.MaxScoreBin = max(self.MaxScoreBin, Bin)
+            self.MinScoreBin = min(self.MinScoreBin, Bin)
+            self.TotalHistogramEntries += Entry
+            #print "Bin %s: Valid %s, invalid %s"%(Bin, ValidCount, InvalidCount)
+        self.BinCount = self.MaxScoreBin - self.MinScoreBin + 1
+        # Draw the Y axis:
+        self.Draw.line((self.LeftPadding, self.TopPadding, self.LeftPadding, self.TopPadding + self.PlotHeight), Colors.Black)
+        self.Draw.line((self.Width - self.RightPadding, self.TopPadding, self.Width - self.RightPadding, self.TopPadding + self.PlotHeight), Colors.Black)
+        Fraction = 0
+        while Fraction <= 1.0:
+            Y = self.TopPadding + self.PlotHeight * (1.0 - Fraction)
+            Label = str(int(round(Fraction * self.MaxScoreHistogramEntry)))
+            self.Draw.text((self.LeftPadding - 5 - len(Label)*5, Y - 6), Label, Colors.Black)
+            self.Draw.line((self.LeftPadding - 5, Y, self.LeftPadding, Y), Colors.Black)
+            Label = str(Fraction)
+            self.Draw.text((self.Width - self.RightPadding + 10, Y - 6), Label, Colors.Black)
+            self.Draw.line((self.Width - self.RightPadding, Y, self.Width - self.RightPadding + 5, Y), Colors.Black)
+            Fraction += 0.1
+        # Draw the X axis:
+        self.Draw.line((self.LeftPadding, self.Height - self.BottomPadding, self.Width - self.RightPadding, self.Height - self.BottomPadding), Colors.Black)
+        Bin = self.MinScoreBin
+        while Bin % 10 != 0:
+            Bin += 1
+        while Bin < self.MaxScoreBin:
+            BinNumber = Bin - self.MinScoreBin
+            X = self.LeftPadding + BinNumber * self.PlotWidth / float(self.BinCount)            
+            self.Draw.line((X, self.Height - self.BottomPadding - 2, X, self.Height - self.BottomPadding + 2), Colors.Black)
+            Label = "%.1f"%(Bin / BIN_MULTIPLIER)
+            self.Draw.text((X - len(Label) * 2.5, self.Height - self.BottomPadding + 2), Label, Colors.Black)
+            Bin += 10
+        if self.ShuffledDatabaseFraction != None:
+            self.ProduceImageShuffledDB(Charge3Flag)
+        else:
+            self.ProduceImageMixtureModel(Charge3Flag)
+        self.PlotImage.save(ImagePath)
+        # Free:
+        self.PlotImage = None
+        self.Draw = None
+    def ProduceImageShuffledDB(self, Charge3Flag = 0):
+        if not Image:
+            return
+
+        if Charge3Flag:
+            ScoreHistogram = self.ScoreHistogram3
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram3
+            OddsTrue = self.OddsTrue3
+        else:
+            ScoreHistogram = self.ScoreHistogram2
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram2
+            OddsTrue = self.OddsTrue2
+        # Draw the legend:
+        Y = self.Height - self.BottomPadding + 20
+        self.Draw.line((105, Y, 125, Y), Colors.Black)
+        self.Draw.rectangle((113, Y-2, 118, Y+2), Colors.Black)
+        self.Draw.text((130, Y - 5), "p-value", Colors.Black)
+        Y = self.Height - self.BottomPadding + 30
+        self.Draw.line((105, Y, 125, Y), Colors.Blue)
+        self.Draw.rectangle((113, Y-2, 118, Y+2), Colors.Grey)
+        self.Draw.text((130, Y - 5), "All hits", Colors.Grey)
+        Y = self.Height - self.BottomPadding + 20
+        self.Draw.line((305, Y, 325, Y), Colors.Red)
+        self.Draw.rectangle((313, Y-2, 318, Y+2), Colors.Green)
+        self.Draw.text((330, Y - 5), "Valid proteins", Colors.Green)
+        Y = self.Height - self.BottomPadding + 30
+        self.Draw.line((305, Y, 325, Y), Colors.Red)
+        self.Draw.rectangle((313, Y-2, 318, Y+2), Colors.Red)
+        self.Draw.text((330, Y - 5), "Invalid proteins", Colors.Red)
+        # Loop over bins, plotting distributions:
+        PrevYOdds = None
+        PrevYAll = None
+        PrevYTrue = None
+        PrevYFalse = None
+        PrevX = None
+        for Bin in range(self.MinScoreBin, self.MaxScoreBin + 1):
+            BinNumber = Bin - self.MinScoreBin
+            XX = self.LeftPadding + (BinNumber * self.PlotWidth / float(self.BinCount))
+            # p-value:
+            PValue = 1.0 - OddsTrue[Bin]
+            YOdds = self.Height - self.BottomPadding - self.PlotHeight * PValue
+            self.Draw.rectangle((XX - 2, YOdds - 2, XX + 2, YOdds + 2), Colors.Black)
+            if PrevYOdds != None:
+                self.Draw.line((PrevX, PrevYOdds, XX, YOdds), Colors.Black)
+            # Overall:
+            Count = ScoreHistogram.get(Bin, 0)
+            YAll = self.Height - self.BottomPadding - self.PlotHeight * Count / float(self.MaxScoreHistogramEntry)
+            self.Draw.rectangle((XX - 2, YAll - 2, XX + 2, YAll + 2), Colors.Grey)
+            if (PrevYAll):
+                self.Draw.line((PrevX, PrevYAll, XX, YAll), Colors.Grey)
+            # Invalid:
+            CountInvalid = ShuffledScoreHistogram.get(Bin, 0)
+            YFalse = self.Height - self.BottomPadding - self.PlotHeight * CountInvalid / float(self.MaxScoreHistogramEntryInvalid)
+            self.Draw.rectangle((XX - 2, YFalse - 2, XX + 2, YFalse + 2), Colors.Red)
+            if (PrevYFalse):
+                self.Draw.line((PrevX, PrevYFalse, XX, YFalse), Colors.Red)
+            # Valid:
+            CountValid = Count - CountInvalid
+            YTrue = self.Height - self.BottomPadding - self.PlotHeight * CountValid / float(self.MaxScoreHistogramEntryValid)
+            self.Draw.rectangle((XX - 2, YTrue - 2, XX + 2, YTrue + 2), Colors.Green)
+            #print "Bin %s: Valid %s/%s invalid %s/%s"%(Bin, CountValid, self.MaxScoreHistogramEntryValid, CountInvalid, self.MaxScoreHistogramEntryInvalid)
+            if (PrevYTrue):
+                self.Draw.line((PrevX, PrevYTrue, XX, YTrue), Colors.Green)
+            # Remember these values, for linking to the next in the series:
+            PrevX = XX
+            PrevYOdds = YOdds
+            PrevYAll = YAll
+            PrevYFalse = YFalse
+            PrevYTrue = YTrue
+    def ProduceImageMixtureModel(self, Charge3Flag = 0):
+        """
+        Helper for ProduceScoreDistributionImage, if we're using a mixture
+        model (not a shuffled database)
+        """
+        if Image == None:
+            return
+        if Charge3Flag:
+            ScoreHistogram = self.ScoreHistogram3
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram3
+            MixtureModel = self.MixtureModel3
+            OddsTrue = self.OddsTrue3
+        else:
+            ScoreHistogram = self.ScoreHistogram2
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram2
+            MixtureModel = self.MixtureModel2
+            OddsTrue = self.OddsTrue2
+        # Draw the legend:
+        Y = self.Height - self.BottomPadding + 20
+        self.Draw.line((105, Y, 125, Y), Colors.Black)
+        self.Draw.rectangle((113, Y-2, 118, Y+2), Colors.Black)
+        self.Draw.text((130, Y - 5), "Empirical score distribution", Colors.Black)
+        Y = self.Height - self.BottomPadding + 30
+        self.Draw.line((105, Y, 125, Y), Colors.Blue)
+        self.Draw.rectangle((113, Y-2, 118, Y+2), Colors.Blue)
+        self.Draw.text((130, Y - 5), "Probability true (1-pvalue)", Colors.Blue)
+        Y = self.Height - self.BottomPadding + 20
+        self.Draw.line((305, Y, 325, Y), Colors.Red)
+        self.Draw.rectangle((313, Y-2, 318, Y+2), Colors.Red)
+        self.Draw.text((330, Y - 5), "Gamma dist. (fit to false matches)", Colors.Red)
+        Y = self.Height - self.BottomPadding + 30
+        self.Draw.line((305, Y, 325, Y), Colors.Green)
+        self.Draw.rectangle((313, Y-2, 318, Y+2), Colors.Green)
+        self.Draw.text((330, Y - 5), "Normal dist. (fit to true matches)", Colors.Green)
+        Y = self.Height - self.BottomPadding + 20
+        self.Draw.line((555, Y, 575, Y), Colors.Grey)
+        self.Draw.rectangle((563, Y-2, 568, Y+2), Colors.Grey)
+        self.Draw.text((580, Y - 5), "Fitted mixture model", Colors.Grey)
+        # Draw the plot of OBSERVED SCORES:
+        PrevX = None
+        PrevY = None
+        for Bin in range(self.MinScoreBin, self.MaxScoreBin + 1):
+            BinNumber = Bin - self.MinScoreBin
+            X = self.LeftPadding + BinNumber * self.PlotWidth / float(self.BinCount)
+            Count = ScoreHistogram.get(Bin, 0)
+            Y = self.Height - self.BottomPadding - self.PlotHeight * Count / float(self.MaxScoreHistogramEntry)
+            self.Draw.rectangle((X - 2, Y - 2, X + 2, Y + 2), Colors.Black)
+            if PrevX != None:
+                self.Draw.line((PrevX, PrevY, X, Y), Colors.Black)
+            PrevX = X
+            PrevY = Y
+        #######################################################
+        # Find the scaling factor for the MERGED distribution:
+        ComboDistTotal = 0
+        for Bin in range(self.MinScoreBin, self.MaxScoreBin + 1):
+            TrueScore = Bin / BIN_MULTIPLIER
+            Pow = - ((TrueScore - MixtureModel.MeanTrue)**2) / (2 * MixtureModel.VarianceTrue)
+            TrueNormal = math.exp(Pow) / (MixtureModel.StdDevTrue * SQRT2PI)
+            GX = max(0.01, TrueScore + MixtureModel.GammaOffset)
+            FalseGamma = math.pow(GX, MixtureModel.KFalse - 1) * math.exp(-GX / MixtureModel.ThetaFalse) / MixtureModel.GammaDemonFalse
+            ComboDist = TrueNormal * MixtureModel.PriorProbabilityTrue + (1.0 - MixtureModel.PriorProbabilityTrue) * FalseGamma
+            ComboDistTotal += ComboDist
+        YFittedScalingFactor = self.TotalHistogramEntries / ComboDistTotal
+        #######################################################
+        # Draw the plot of the FALSE HIT GAMMA and TRUE HIT NORMAL and MERGED distributions:
+        PrevX = None
+        PrevYNormal = None
+        PrevYGamma = None
+        PrevYOdds = None
+        PrevYFitted = None
+        for Bin in range(self.MinScoreBin, self.MaxScoreBin + 1):
+            BinNumber = Bin - self.MinScoreBin
+            XX = self.LeftPadding + (BinNumber * self.PlotWidth / float(self.BinCount))
+            TrueScore = Bin / BIN_MULTIPLIER
+            Pow = - ((TrueScore - MixtureModel.MeanTrue)**2) / (2 * MixtureModel.VarianceTrue)
+            TrueNormal = math.exp(Pow) / (MixtureModel.StdDevTrue * SQRT2PI)
+            GX = max(0.01, TrueScore + MixtureModel.GammaOffset)
+            FalseGamma = math.pow(GX, MixtureModel.KFalse - 1) * math.exp(-GX / MixtureModel.ThetaFalse) / MixtureModel.GammaDemonFalse
+            YNormal = self.Height - self.BottomPadding - self.PlotHeight * TrueNormal
+            # Normal distribution:
+            self.Draw.rectangle((XX - 2, YNormal - 2, XX + 2, YNormal + 2), Colors.Green)
+            if PrevX != None:
+                self.Draw.line((PrevX, PrevYNormal, XX, YNormal), Colors.Green)
+            # Gamma distribution:
+            YGamma = self.Height - self.BottomPadding - self.PlotHeight * FalseGamma
+            self.Draw.rectangle((XX - 2, YGamma - 2, XX + 2, YGamma + 2), Colors.Red)
+            if PrevX != None:
+                self.Draw.line((PrevX, PrevYGamma, XX, YGamma), Colors.Red)
+            # Fitted curve:
+            ComboDist = TrueNormal * MixtureModel.PriorProbabilityTrue + (1.0 - MixtureModel.PriorProbabilityTrue) * FalseGamma
+            YFitted = ComboDist * YFittedScalingFactor / self.MaxScoreHistogramEntry
+            YFitted = self.Height - self.BottomPadding - YFitted * self.PlotHeight
+            #print TrueNormal, FalseGamma, self.AllSpectrumCount, ComboDist, YFitted
+            self.Draw.rectangle((XX - 2, YFitted - 2, XX + 2, YFitted + 2), Colors.Grey)
+            if PrevX != None:
+                self.Draw.line((PrevX, PrevYFitted, XX, YFitted), Colors.Grey)
+            # P-Value:
+            PValue = 1.0 - OddsTrue.get(Bin, 0)
+            YOdds = self.Height - self.BottomPadding - self.PlotHeight * PValue
+            self.Draw.rectangle((XX - 2, YOdds - 2, XX + 2, YOdds + 2), Colors.Blue)
+            if PrevX != None:
+                self.Draw.line((PrevX, PrevYOdds, XX, YOdds), Colors.Blue)
+            # Remember these points' coords for drawing lines next time:
+            PrevX = XX
+            PrevYNormal = YNormal
+            PrevYGamma = YGamma
+            PrevYOdds = YOdds
+            PrevYFitted = YFitted
+    def FitMixtureModel(self):
+        self.MixtureModel2 = Learning.MixtureModelClass()
+        self.MixtureModel2.Model(None, self.ScoreHistogram2)
+        self.OddsTrue2 = self.MixtureModel2.OddsTrue
+        self.MixtureModel3 = Learning.MixtureModelClass()
+        self.MixtureModel3.Model(None, self.ScoreHistogram3)
+        self.OddsTrue3 = self.MixtureModel3.OddsTrue        
+        return 1
+    def SavePValueDistribution(self, Charge3Flag = 0):
+        """
+        Write out a p-value distribution derived from forward+shuffled database
+        """
+        if Charge3Flag:
+            OddsTrue = self.OddsTrue3
+            MeanDeltaScore = self.MeanDeltaScore3
+            ScoreHistogram = self.ScoreHistogram3
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram3
+        else:
+            OddsTrue = self.OddsTrue2
+            MeanDeltaScore = self.MeanDeltaScore2
+            ScoreHistogram = self.ScoreHistogram2
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram2
+        Keys = OddsTrue.keys()
+        if not Keys:
+            return
+        MinBin = min(Keys)
+        MaxBin = max(Keys)
+        self.OutputDistributionFile.write("#MeanDeltaScore\t%s\n"%MeanDeltaScore)
+        self.OutputDistributionFile.write("#BlindFlag\t%s\n"%self.BlindFlag)
+        if self.ShuffledDatabaseFraction != None:
+            Header = "#Bin\tFDR\tTotalHits\tHitsValid\tHitsInvalid\tPeptideFDR\tPeptidesValid\tPeptidesInvalid\tProteinFDR\tProteinsValid\tProteinsInvalid\t\n"
+        else:
+            Header = "#Bin\tFDR\tTotalHits\t\n"
+        self.OutputDistributionFile.write(Header)
+        if self.ShuffledDatabaseFraction != None:
+            # Count the total number of true hits, false hits, true peptides, false peptides...
+            CumulativeTrueHits = 0
+            CumulativeFalseHits = 0
+            for Bin in range(MinBin, MaxBin + 1):
+                AllHits = ScoreHistogram.get(Bin, 0)
+                FalseHits = ShuffledScoreHistogram.get(Bin, 0)
+                CumulativeFalseHits += FalseHits
+                CumulativeTrueHits += (AllHits - FalseHits)
+            if self.ProteinPicker:
+                ######################################################
+                # Peptides:
+                ValidPeptides = {}
+                InvalidPeptides = {}
+                CumulativeTruePeptides = 0
+                CumulativeFalsePeptides = 0
+                BestScoreByProtein = {}
+                for (Peptide, Score) in self.ProteinPicker.BestScoresByPeptide.items():
+                    Bin = int(round(Score / BIN_MULTIPLIER))
+                    ProteinID = self.ProteinPicker.PeptideProteins.get(Peptide, None)
+                    if not ProteinID:
+                        print "*** Warning: Peptide '%s' was never assigned to a protein!"%Peptide
+                        LocationList = self.ProteinPicker.PeptideDict[Peptide]
+                        print LocationList
+                        for (ProteinID, Pos) in LocationList:
+                            print ProteinID, self.ProteinPicker.ProteinNames[ProteinID], self.ProteinPicker.ProteinPeptideCounts[ProteinID]
+                        continue # shouldn't occur!
+                    ProteinName = self.ProteinPicker.ProteinNames[ProteinID]
+                    OldScore = BestScoreByProtein.get(ProteinID, -9999)
+                    BestScoreByProtein[ProteinID] = max(OldScore, Score)
+                    if ProteinName[:3] == "XXX":
+                        InvalidPeptides[Bin] = InvalidPeptides.get(Bin, 0) + 1
+                        CumulativeFalsePeptides += 1
+                    else:
+                        ValidPeptides[Bin] = ValidPeptides.get(Bin, 0) + 1
+                        CumulativeTruePeptides += 1
+                ######################################################
+                # Proteins:
+                ValidProteins = {}
+                InvalidProteins = {}
+                CumulativeTrueProteins = 0
+                CumulativeFalseProteins = 0
+                for (ProteinID, Score) in BestScoreByProtein.items():
+                    ProteinName = self.ProteinPicker.ProteinNames[ProteinID]
+                    if ProteinName[:3] == "XXX":
+                        InvalidProteins[Bin] = InvalidProteins.get(Bin, 0) + 1
+                        CumulativeFalseProteins += 1
+                    else:
+                        ValidProteins[Bin] = ValidProteins.get(Bin, 0) + 1
+                        CumulativeTrueProteins += 1
+        for Bin in range(MinBin, MaxBin + 1):
+            FDR = 1.0 - OddsTrue[Bin]
+            AllHits = ScoreHistogram.get(Bin, 0)
+            self.OutputDistributionFile.write("%s\t%s\t%s\t"%(Bin, FDR, AllHits))
+            if self.ShuffledDatabaseFraction != None:
+                FalseHits = ShuffledScoreHistogram.get(Bin, 0)
+                TrueHits = AllHits - FalseHits
+                CumulativeTrueHits -= TrueHits
+                CumulativeFalseHits -= FalseHits                
+                self.OutputDistributionFile.write("%s\t%s\t"%(CumulativeTrueHits, CumulativeFalseHits))
+                if self.ProteinPicker:
+                    # Peptide FDR:
+                    FalseWithinTrue = min(CumulativeTruePeptides, CumulativeFalsePeptides * self.ShuffledScalingFactor)
+                    PeptideFDR = FalseWithinTrue / float(max(1, CumulativeTruePeptides))
+                    self.OutputDistributionFile.write("%.4f\t%s\t%s\t"%(PeptideFDR, CumulativeTruePeptides, CumulativeFalsePeptides))
+                    CumulativeTruePeptides -= ValidPeptides.get(Bin, 0)
+                    CumulativeFalsePeptides -= InvalidPeptides.get(Bin, 0)
+                    # Protein FDR:
+                    FalseWithinTrue = min(CumulativeTrueProteins, CumulativeFalseProteins * self.ShuffledScalingFactor)
+                    ProteinFDR = FalseWithinTrue / float(max(1, CumulativeTrueProteins))
+                    self.OutputDistributionFile.write("%.4f\t%s\t%s\t"%(ProteinFDR, CumulativeTrueProteins, CumulativeFalseProteins))
+                    CumulativeTrueProteins -= ValidProteins.get(Bin, 0)
+                    CumulativeFalseProteins -= InvalidProteins.get(Bin, 0)
+            self.OutputDistributionFile.write("\n")
+    def LoadPValueDistribution(self, FileName):
+        Charge3Flag = 0
+        
+        File = open(FileName, "rb")
+
+        for FileLine in File.xreadlines():
+            Bits = list(FileLine.strip().split("\t"))
+            if len(Bits) < 2:
+                continue            
+            if FileLine[0] == "#":
+                # Header line.  Parse special lines:
+                Name = Bits[0][1:]
+                if Name == "BlindFlag":
+                    self.BlindFlag = int(Bits[1])
+                elif Name == "MeanDeltaScore":
+                    if Charge3Flag:
+                        self.MeanDeltaScore3 = float(Bits[1])
+                        OddsTrue = {}
+                        self.OddsTrue3 = OddsTrue
+                    else:
+                        self.MeanDeltaScore2 = float(Bits[1])
+                        OddsTrue = {}
+                        self.OddsTrue2 = OddsTrue
+                        
+                else:
+                    print "(Skipping comment '%s', not understood)"%Bits[0]
+                continue
+            Bin = int(Bits[0])
+            OddsTrue[Bin] = 1.0 - float(Bits[1])
+            Charge3Flag = 1 #We've gotten past all the comments for charges 1 and 2, so the next time
+            #we see a '#' it will be for charge 3
+        File.close()
+        if self.BlindFlag:
+            self.MQScoreWeight = Defaults.BlindMQScoreWeight
+            self.DeltaScoreWeight = Defaults.BlindDeltaScoreWeight
+            self.GammaOffset = Defaults.BlindGammaOffset
+        else:
+            self.MQScoreWeight = Defaults.MQScoreWeight
+            self.DeltaScoreWeight = Defaults.DeltaScoreWeight
+            self.GammaOffset = Defaults.GammaOffset
+    def WriteMatchesForSpectrum(self, MatchesForSpectrum, OutFile):
+        if self.WriteTopMatchOnly:
+            MatchesForSpectrum = MatchesForSpectrum[0:1]
+        for Match in MatchesForSpectrum:
+            # If we have a shuffled database (-S option), then by default we get to throw shuffled-protein
+            # matches away for free.  We don't get to keep runners-up to them, though!!
+            if Match.ProteinName[:3] == "XXX" and self.ShuffledDatabaseFraction != None and self.RemoveShuffledMatches:
+                break
+            # Skip matches with poor delta-score:
+            if Match.DeltaScore < self.MaxDeltaScoreGap and not self.RetainBadMatches:
+                continue
+            # Skip short matches:
+            Length = len(Match.Peptide.Aminos)
+            if Length < self.MinimumPeptideLength:
+                continue
+            if Match.Charge < 3:
+                MeanDeltaScore = self.MeanDeltaScore2
+            else:
+                MeanDeltaScore = self.MeanDeltaScore3
+            WeightedScore = self.MQScoreWeight * Match.MQScore + self.DeltaScoreWeight * (Match.DeltaScore / MeanDeltaScore)
+            ScoreBin = int(round(WeightedScore * BIN_MULTIPLIER))
+            if Match.Charge < 3:
+                TrueOdds = self.OddsTrue2.get(ScoreBin, None)
+            else:
+                TrueOdds = self.OddsTrue3.get(ScoreBin, None)
+            if TrueOdds == None:
+                if ScoreBin < 0:
+                    TrueOdds = 0.00001
+                else:
+                    TrueOdds = 0.99999
+            else:
+                TrueOdds = max(0.00001, min(TrueOdds, 0.99999))
+            Match.PValue = (1.0 - TrueOdds)
+            Match.Bits[self.Columns.getIndex("F-Score")] = "%s"%WeightedScore
+            Match.Bits[self.Columns.getIndex("InspectFDR")] = "%s"%Match.PValue
+            if self.ProteinPicker:
+                # Replace the original protein with the "correct" one:
+                ProteinID = self.ProteinPicker.PeptideProteins.get(Match.Peptide.Aminos, None)
+                if ProteinID != None:
+                    Match.Bits[self.Columns.getIndex("RecordNumber")] = str(ProteinID)
+                    Match.Bits[self.Columns.getIndex("Protein")] = self.ProteinPicker.ProteinNames[ProteinID]
+            if (not self.RetainBadMatches):
+                if (Match.PValue > self.PValueCutoff):
+                    continue
+                # Sometimes things with a horrible MQScore get a good pvalue.
+                # We want to exclude these.
+                if Match.MQScore < MIN_MQSCORE:
+                    continue
+            self.LinesAcceptedCount += 1
+            OutFile.write(string.join(Match.Bits, "\t"))
+            OutFile.write("\n")
+    def WriteFixedScores(self, OutputPath):
+        self.TotalLinesAcceptedCount = 0
+        self.TotalLinesSecondPass = 0
+        self.WriteScoresPath = OutputPath
+        # Make the output directory, if it doesn't exist already.
+        # Assume: OutputPath is a directory if ReadScoresPath is a directory,
+        # and OutputPath is a file if ReadScoresPath is a file.
+        if os.path.isdir(self.ReadScoresPath):
+            DirName = OutputPath
+        else:
+            DirName = os.path.split(OutputPath)[0]
+        try:
+            os.makedirs(DirName)
+        except:
+            pass
+        self.ProcessResultsFiles(self.ReadScoresPath, self.WriteFixedScoresFile)
+        print "Total accepted lines: %s of %s"%(self.TotalLinesAcceptedCount, self.TotalLinesSecondPass)
+    def WriteFixedScoresFile(self, Path):
+        if os.path.isdir(self.ReadScoresPath):
+            OutputPath = os.path.join(self.WriteScoresPath, os.path.split(Path)[1])
+        else:
+            OutputPath = self.WriteScoresPath
+        if (not self.OverwriteNewScoresFlag) and os.path.exists(OutputPath):
+            return
+        try:
+            InFile = open(Path, "rb")
+            OutFile = open(OutputPath, "wb")
+            LineCount = 0
+            self.LinesAcceptedCount = 0
+            OldSpectrum = None
+            MatchesForSpectrum = []
+            for FileLine in InFile.xreadlines():
+                # Lines starting with # are comments (e.g. header line), and are written out as-is:
+                if FileLine[0] == "#":
+                    self.Columns.initializeHeaders(FileLine)
+                    OutFile.write(FileLine)
+                    continue
+                Bits = list(FileLine.strip().split("\t"))
+                Match = Bag()
+                try:
+                    Match.Bits = Bits
+                    Match.Charge = int(Bits[self.Columns.getIndex("Charge")])
+                    Match.MQScore = float(Bits[self.Columns.getIndex("MQScore")])
+                    #Match.DeltaScoreAny = float(Bits[self.Columns.DeltaScoreAny])
+                    Match.DeltaScore = float(Bits[self.Columns.getIndex("DeltaScore")])
+                    Match.Peptide = GetPeptideFromModdedName(Bits[self.Columns.getIndex("Annotation")])
+                    Match.ProteinName = Bits[self.Columns.getIndex("Protein")]
+                except:
+                    continue
+                LineCount += 1
+                Spectrum = (Bits[0], Bits[1])
+                if Spectrum != OldSpectrum:
+                    self.WriteMatchesForSpectrum(MatchesForSpectrum, OutFile)
+                    MatchesForSpectrum = []
+                OldSpectrum = Spectrum
+                MatchesForSpectrum.append(Match)
+            # Finish the last spectrum:
+            self.WriteMatchesForSpectrum(MatchesForSpectrum, OutFile)
+            InFile.close()
+            OutFile.close()
+            print "%s\t%s\t%s\t"%(Path, LineCount, self.LinesAcceptedCount)
+            self.TotalLinesAcceptedCount += self.LinesAcceptedCount
+            self.TotalLinesSecondPass += LineCount
+        except:
+            traceback.print_exc()
+            print "* Error filtering annotations from '%s' to '%s'"%(Path, OutputPath)
+    def ComputePValuesWithShuffled(self, Charge3Flag = 0):
+        """
+        Set self.OddsTrue using results from a partially-shuffled database.
+        Given a score cutoff we assume that, above the score cutoff, there are
+        T hits from valid proteins and F hits from invalid proteins.
+        
+        # PVALUE WITH REMOVAL:
+        Let TDB and FDB be the true and false database fractions (FDB = self.ShuffledDatabaseFraction)
+        After filtering out all F hits from invalid proteins, there are still some
+        chance hits to true proteins.  The estimated number of hits from T that are actually
+        false is equal to F*(TDB/FDB).  This, the odds true for this cutoff is:
+        1.0 - (F*(TDB/FDB) / T)
+        
+        # PVALUE WITHOUT REMOVAL:
+        The odds true for the cutoff is simply T/(T+F).  
+        """
+        OddsTrue = {}
+        if Charge3Flag:
+            self.OddsTrue3 = OddsTrue
+            ScoreHistogram = self.ScoreHistogram3
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram3
+        else:
+            self.OddsTrue2 = OddsTrue
+            ScoreHistogram = self.ScoreHistogram2
+            ShuffledScoreHistogram = self.ShuffledScoreHistogram2
+        CumulativeHits = 0
+        CumulativeHitsTrue = 0
+        CumulativeHitsFalse = 0
+        Keys = ScoreHistogram.keys()
+        Keys.sort()
+        if not Keys:
+            # There are NO HITS for this charge state.
+            return
+        MinKey = Keys[0]
+        MaxKey = Keys[-1]
+        TrueFraction = 1.0 - self.ShuffledDatabaseFraction
+        self.ShuffledScalingFactor = TrueFraction / self.ShuffledDatabaseFraction
+        for Key in range(MaxKey, MinKey - 1, -1):
+            AllHits = ScoreHistogram.get(Key, 0)
+            FalseHits = ShuffledScoreHistogram.get(Key, 0)
+            ValidHits = AllHits - FalseHits
+            CumulativeHitsTrue += ValidHits
+            CumulativeHitsFalse += FalseHits
+            FalseWithinTrue = min(CumulativeHitsTrue, CumulativeHitsFalse * self.ShuffledScalingFactor)
+            ##NEC_MOD
+            #if FalseWithinTrue == 0:
+            #    FalseWithinTrue = 1
+            if self.RemoveShuffledMatches:
+                # OddsTrue = (V - I*(TDB/FDB)) / V
+                BinOddsTrue = max(0, CumulativeHitsTrue - FalseWithinTrue) / float(max(1, CumulativeHitsTrue))
+                
+            else:
+                # OddsTrue = (V - I*(TDB/FDB)) / (I+V)
+                BinOddsTrue = max(0, CumulativeHitsTrue - FalseWithinTrue) / float(max(1, CumulativeHitsTrue + CumulativeHitsFalse))
+            if self.VerboseFlag:
+                # Bin, true, false, cumtrue, cumfalse
+                Str = "%s\t%s\t%s\t%s\t%s\t"%(Key, ValidHits, FalseHits, CumulativeHitsTrue, CumulativeHitsFalse)
+                Str += "%.5f\t%.5f\t"%(BinOddsTrue, 1.0 - BinOddsTrue)
+                print Str
+            OddsTrue[Key] = BinOddsTrue
+        if self.VerboseFlag:
+            print "\n\n"
+    def SelectProteins(self, PValueCutoff, ReadScoresPath):
+        """
+        Using SelectProteins, assign each peptide the the most reasonable "owner" protein.
+        """
+        # Select the F-score cutoff:
+        FScoreCutoff2 = 9999
+        FScoreCutoff3 = 9999
+        for FScoreBin in self.OddsTrue2.keys():
+            OddsTrue = self.OddsTrue2[FScoreBin]
+            if (1.0 - OddsTrue) <= PValueCutoff:
+                if (FScoreBin / BIN_MULTIPLIER) < FScoreCutoff2:
+                    FScoreCutoff2 = FScoreBin / BIN_MULTIPLIER
+        for FScoreBin in self.OddsTrue3.keys():
+            OddsTrue = self.OddsTrue3[FScoreBin]
+            if (1.0 - OddsTrue) <= PValueCutoff:
+                if (FScoreBin / BIN_MULTIPLIER) < FScoreCutoff3:
+                    FScoreCutoff3 = FScoreBin / BIN_MULTIPLIER
+        self.ProteinPicker.FScoreCutoff2 = FScoreCutoff2
+        self.ProteinPicker.FScoreCutoff3 = FScoreCutoff3
+        self.ProteinPicker.MeanDeltaScore2 = self.MeanDeltaScore2
+        self.ProteinPicker.MeanDeltaScore3 = self.MeanDeltaScore3
+        self.ProcessResultsFiles(ReadScoresPath, self.ProteinPicker.ParseAnnotations)
+        # We've COUNTED the protein hits.  Now ask the picker to decide which
+        # protein 'owns' each peptide:
+        self.ProteinPicker.ChooseProteins()
+    def SetOutputDistributionPath(self, Path):
+        self.OutputDistributionPath = Path
+        self.OutputDistributionFile = open(Path, "wb")
+    def ParseClusterInfo(self):
+        """
+        Parse cluster-sizes from an info file.  
+        """
+        self.ClusterSizes = {}
+        File = open(self.ClusterInfoPath, "rb")
+        for FileLine in File.xreadlines():
+            Bits = FileLine.split()
+            try:
+                ScanNumber = int(Bits[1])
+                ClusterSize = int(Bits[2])
+            except:
+                print "* Skipping this line:", FileLine
+            self.ClusterSizes[(Bits[0], ScanNumber)] = ClusterSize
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "l:s:r:w:m:bp:vixzd:a1S:HX:")
+        OptionsSeen = {}
+        self.SaveDistributionPath = "PValues.txt" # default
+        self.ReadScoresPath = None
+        self.WriteScoresPath = None
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-l":
+                if not os.path.exists(Value):
+                    print "** Error: can't read p-value distribution from file '%s'"%Value
+                    return 0
+                self.LoadDistributionPath = Value
+            elif Option == "-p":
+                self.PValueCutoff = float(Value)
+            elif Option == "-s":
+                self.SaveDistributionPath = Value
+            elif Option == "-x":
+                self.RetainBadMatches = 1
+            elif Option == "-b":
+                self.BlindFlag = 1
+                self.MQScoreWeight = Defaults.BlindMQScoreWeight
+                self.DeltaScoreWeight = Defaults.BlindDeltaScoreWeight
+                self.GammaOffset = Defaults.BlindGammaOffset
+            elif Option == "-r":
+                self.ReadScoresPath  = Value
+            elif Option == "-w":
+                self.WriteScoresPath = Value
+            elif Option == "-m":
+                MAX_RESULTS_FILES_TO_PARSE = int(Value)
+            elif Option == "-v":
+                self.VerboseFlag = 1
+            elif Option == "-i":
+                self.GenerateImageFlag = 1
+            elif Option == "-d":
+                if not os.path.exists(Value):
+                    print "** Error: couldn't find database file '%s'\n\n"%Value
+                    print UsageInfo
+                    sys.exit(1)
+                self.DBPath.append(Value)
+            elif Option == "-a":
+                self.PerformProteinSelection = 1
+            elif Option == "-1":
+                self.WriteTopMatchOnly = 1
+            elif Option == "-S":
+                self.ShuffledDatabaseFraction = float(Value)
+                if self.ShuffledDatabaseFraction <= 0 or self.ShuffledDatabaseFraction >= 1:
+                    print "* Invalid value for -S: %s"%Value
+                    return 0
+            elif Option == "-H":
+                self.RemoveShuffledMatches = 0
+            elif Option == "-X":
+                # Undocumented option for CLUSTER searches:
+                self.ClusterInfoPath = Value
+                self.ParseClusterInfo()
+            else:
+                print "** Unknown option:", Option, Value
+        # Check validity of options:
+        if self.PerformProteinSelection and not self.DBPath:
+            print "* Error: -a option requires -d option!"
+            return 0
+        # No major problems - return TRUE for success.
+        return 1
+
+UsageInfo = """
+FDRUtils.py - Compute probability that each match from a tandem MS
+peptide database search is correct.  Write out an updated results file containing
+only the high-quality results.
+
+Parameters:
+ -r [FILENAME] Read results from filename (and fit the probability mixture
+    model to these results).  If the option value is a directory, we'll read
+    all the results-files from the directory.
+ -w [FILENAME] Write re-scored results to a file.
+ -l [FILENAME] Load p-value distribution from a file (written out earlier
+    with -s option)
+
+Protein selection can be performed, replacing the protein identification
+with a parsimonious set of protein IDs (using a simple iterative
+approach).  The following options are required for protein selection:
+ -a: Replace protein identifications with a "parsimonious" set of protein IDs.
+     Requires -d option!
+ -d [FILENAME] Database (.trie file) searched
+ -S [FRACTION]: (see below)
+
+Other options:
+ -S [FRACTION]: The fraction of the database consisting of shuffled
+    proteins.  For instance, if you use a 1:1 mix of valid and invalid
+    proteins, use -S 0.5.  If this option is set, p-values will be set using
+    the number of matches to shuffled proteins, whose names begin with XXX
+ -s [FILENAME] Save p-value distribution to a file.
+ -i Write a .png image of the distribution graph (requires PIL)
+ -p [NUM] FDR cutoff for saving results; by default, 0.1
+ -b Blind search (use different score/deltascore weighting)
+ -x If the -x flag is passed, even "bad" matches are written out (no p-value
+    filtering is performed)
+ -1 Write only the top hit for each spectrum, even if "good" runners-up exist
+
+Internal use only:
+ -v Verbose output (for debugging)
+ -H Retain matches to shuffled proteins.  Used for further processing ONLY.
+    
+Example:
+  FDRUtils.py -r ShewanellaResults -s ShewFDR.txt -w ShewanellaFiltered
+     -p 0.05 -d database\Shew.trie -a 
+"""
+
+def Main(Parser = None):
+    global MAX_RESULTS_FILES_TO_PARSE
+    
+    if not Parser:
+        Parser = PValueParser()
+        Result = Parser.ParseCommandLine(sys.argv[1:])
+        if not Result:
+            print UsageInfo
+            return
+    if Parser.DBPath and Parser.PerformProteinSelection:
+        Parser.ProteinPicker = SelectProteins.ProteinSelector()
+        Parser.ProteinPicker.LoadMultipleDB(Parser.DBPath)
+    if Parser.LoadDistributionPath:
+        print "Load p-value distribution from %s..."%Parser.LoadDistributionPath
+        Parser.LoadPValueDistribution(Parser.LoadDistributionPath)
+    elif Parser.ReadScoresPath:
+        print "Read scores from search results at %s..."%Parser.ReadScoresPath
+        Parser.ReadDeltaScoreDistribution(Parser.ReadScoresPath)
+        Parser.SetOutputDistributionPath(Parser.SaveDistributionPath)
+        ##############################
+        # Loop for F-score methods
+        Parser.ProcessResultsFiles(Parser.ReadScoresPath, Parser.ReadScoreDistributionFromFile, MAX_RESULTS_FILES_TO_PARSE)
+        if Parser.ShuffledDatabaseFraction != None:
+            print "Compute PValues with shuffled..."
+            Parser.ComputePValuesWithShuffled(0)
+            Parser.ComputePValuesWithShuffled(1)
+        else:
+            Result = Parser.FitMixtureModel()
+            if not Result:
+                sys.exit(1)
+        if Parser.PerformProteinSelection:
+            Parser.SelectProteins(Parser.PValueCutoff, Parser.ReadScoresPath)
+        print "Write p-value distribution to %s..."%Parser.SaveDistributionPath
+        (Stub, Extension) = os.path.splitext(Parser.SaveDistributionPath)
+        Parser.SavePValueDistribution(0)
+        Parser.SavePValueDistribution(1)
+        ##############################
+        Parser.OutputDistributionFile.close()
+    else:
+        print "** Please specify either a distribution file or results file."
+        print UsageInfo
+        sys.exit(1)
+    if Parser.GenerateImageFlag and Image:
+        ImagePath = os.path.splitext(Parser.SaveDistributionPath)[0] + ".2.png"
+        Parser.ProduceScoreDistributionImage(ImagePath, 0)
+        ImagePath = os.path.splitext(Parser.SaveDistributionPath)[0] + ".3.png"
+        Parser.ProduceScoreDistributionImage(ImagePath, 1)
+    if Parser.WriteScoresPath:
+        Parser.WriteFixedScores(Parser.WriteScoresPath)
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "psyco not found - running without optimization"
+    #TestMain()
+    Main()
diff --git a/FreeMod.c b/FreeMod.c
new file mode 100644
index 0000000..a011b8a
--- /dev/null
+++ b/FreeMod.c
@@ -0,0 +1,2720 @@
+//Title:          FreeMod.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+// Mod-tolerant matching of peptides to spectra.  See header file FreeMod.h for overview.
+#include "CMemLeak.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "Utils.h"
+#include "Inspect.h"
+#include "Trie.h"
+#include "Mods.h"
+#include "Tagger.h"
+#include "Score.h"
+#include "FreeMod.h"
+#include "Scorpion.h"
+#include "SVM.h"
+#include "IonScoring.h"
+
+// SkewPenalty[n] is a score penalty applied to a node that is n/100 daltons
+// away from where it should be.  Size 100.  (Derived from a functional
+// fit to empirical histogram)
+int g_SkewPenalty[] = {0, 0, 0, 0, -1, -2, -2, -3, -4, -4, -4, -4, -4, -5,
+        -5, -6, -7, -8, -8, -9, -9, -9, -9, -9, -10, -11, -11, -12, -12,
+        -12, -12, -12, -12, -13, -13, -14, -14, -15, -15, -15, -15, -15,
+        -15, -15, -16, -16, -16, -17, -17, -17, -17, -17, -17, -17, -18,
+        -18, -18, -19, -19, -19, -19, -19, -19, -19, -20, -20, -20, -21,
+        -21, -22, -22, -22, -22, -22, -22, -23, -24, -24, -25, -25, -25,
+        -25, -25, -26, -26, -28, -29, -30, -31, -32, -32, -32, -32, -34,
+        -35, -39, -41, -48, -57, -65};
+
+int g_SkewPenaltySize = sizeof(g_SkewPenalty) / sizeof(int);
+int g_SkewPenaltyMax = sizeof(g_SkewPenalty) / sizeof(int) - 1;
+
+// MassDeltas[AminoAcid][n] is the nth modification (normally sorted by size) possible on AminoAcid. 
+// e.g. MassDeltas[0][0] is the smallest (or most negative) modification allowed on alanine
+MassDelta** MassDeltas = NULL;
+
+// MassDeltaByMass[AminoAcid][PRM] is a linked list of MassDeltaNodes corresponding to MassDeltas matching 
+// PRM on amino acid.  
+MassDeltaNode*** MassDeltaByMass = NULL;
+
+// For user-supplied (limited) PTMs, MassDeltaByIndex[AA*MAX_PT_MODTYPE + n] points to an instance of the PTM with 
+// index n, attached to AA.  Used for decorations!  A special case: Always, for AA of 26 (MDBI_ALL_MODS), store 
+// a pointer to a valid PTM.
+// The array AllKnownPTMods holds *one* entry for all modifications of the same type (e.g. all phosphorylations),
+// but there's a separate MassDelta instance for serine-phos, threonine-phos, and tyrosine-phos.  (And this is probably
+// as it should be, since we might attach a different penalty to phosphotyrosine than to phosphoserine, to reflect the
+// fact that serines are more commonly phosphorylated)
+MassDelta** MassDeltaByIndex = NULL;
+
+/////////////////////////////////////////////////////////
+// Forward declarations:
+void DebugPrintMultiModTable(TagGraph* Graph, char* Buffer, int MaxX, int MaxY, int MaxZ);
+void AddMultiModMatch(MSSpectrum* Spectrum, int CellIndex, int Bonus,
+    char* Buffer, int StartPos, int ModBlockSize, int AminoBlockSize, int BonusLength,
+    int BufferEnd);
+int ExtendMatchRightwardDuo(SearchInfo* Info, char* Buffer, int BufferEnd, int MatchMass, 
+    int MaxMods, int ScoreToBeat, int FilePos, SpectrumTweak* Tweak);
+
+int MSAlignmentGeneral(SearchInfo* Info, char* Buffer, int BufferEnd, int MatchMass, 
+    int MaxMods, int ScoreToBeat, int FilePos, SpectrumTweak* Tweak);
+
+void AddNewMatchDuo(SearchInfo* Info, SpectrumTweak* Tweak, char* Buffer, int Score, int* PrevCellTable, MassDelta** DeltaTable, 
+    int CellIndex, MassDelta* FinalDelta, int AminoBlockSize, int AminoIndex, int EndAminoIndex,
+    int FilePos);
+
+void DebugPrintPRMScores(MSSpectrum* Spectrum, SpectrumTweak* Tweak);
+
+
+// Allocate the array MassDeltaByIndex, if it's not already allocated.
+void AllocMassDeltaByIndex()
+{
+    int MallocSize;
+    if (MassDeltaByIndex)
+    {
+        return;
+    }
+    MallocSize = (MAX_PT_MODTYPE * (AMINO_ACIDS + 1)) * sizeof(MassDelta*);
+    MassDeltaByIndex = (MassDelta**)malloc(MallocSize);
+}
+
+// Free the 2-dimensional table MassDeltaByMass.  It's a big table, so don't forget to free it :)
+void FreeMassDeltaByMass()
+{
+    int AA;
+    int PRM;
+    MassDeltaNode* Node;
+    MassDeltaNode* Prev = NULL;
+    if (MassDeltaByMass)
+    {
+        for (AA = 0; AA < AMINO_ACIDS; AA++)
+        {
+            for (PRM = 0; PRM < GlobalOptions->DeltaBinCount; PRM++)
+            {
+                // MassDeltaByMass[AA][PRM] is either null, or it points to the head of a 
+                // linked list of MassDeltaNode objects.  
+                if (MassDeltaByMass[AA][PRM])
+                {
+                    // Free each node of the list:
+                    Node = MassDeltaByMass[AA][PRM];
+                    Prev = NULL;
+                    while (Node)
+                    {
+                        SafeFree(Prev);
+                        Prev = Node;
+                        Node = Node->Next;
+                    }
+                    SafeFree(Prev);
+                }
+            }
+            SafeFree(MassDeltaByMass[AA]);
+        }
+        SafeFree(MassDeltaByMass);
+        MassDeltaByMass = NULL;
+    }
+}
+
+// Free all the mods in MassDeltas array.
+void FreeMassDeltas()
+{
+    int AA;
+    if (MassDeltas)
+    {
+        for (AA = 0; AA < AMINO_ACIDS; AA++)
+        {
+            SafeFree(MassDeltas[AA]);
+        }
+        SafeFree(MassDeltas);
+        MassDeltas = NULL;
+    }
+    FreeMassDeltaByMass();
+    //SafeFree(MassDeltaByIndex);
+    //MassDeltaByIndex = NULL;
+}
+
+// Initialize the hash MassDeltaByMass.  The table entry MassDeltaByMass[AA][Delta] points to a linked list of
+// mass deltas for amino acid AA matching Delta.  
+// In some cases, it makes sense to consider two mass deltas of the same size. Example: Mutation
+// to Q or to K.  We keep a *list* of mass deltas in all cases. 
+void InitMassDeltaByMass()
+{
+    int MassDeltaIndex;
+    int Fudge;
+    int Delta;
+    int AA;
+    // We populate adjacent cells in MassDeltas[AA] as well.  FudgeMax == how many bins away from the "right" bin to consider.
+    // FudgeMax is usually 1, to handle roundoff error.  But FudgeMax can be 2-3 if the parent mass epsilon is quite large.
+    int FudgeMax = 1 + (GlobalOptions->ParentMassEpsilon / DALTON);
+    MassDeltaNode* OldNode;
+    MassDeltaNode* NewNode;
+    //
+
+    FreeMassDeltaByMass();
+    MassDeltaByMass = (MassDeltaNode***)calloc(AMINO_ACIDS, sizeof(MassDeltaNode**));
+    for (AA = 0; AA < AMINO_ACIDS; AA++)
+    {
+        MassDeltaByMass[AA] = (MassDeltaNode**)calloc(GlobalOptions->DeltaBinCount + 1, sizeof(MassDeltaNode**));
+        for (MassDeltaIndex = 0; MassDeltaIndex < GlobalOptions->DeltaBinCount; MassDeltaIndex++)
+        {
+            if (!MassDeltas[AA][MassDeltaIndex].Flags)
+            {
+                // Null array entry.
+                break;
+            }
+            ROUND_MASS_TO_DELTA_BIN(MassDeltas[AA][MassDeltaIndex].RealDelta, Delta);
+            // Add our MassDelta to the bin (and to neighboring bins), either filling the bin
+            // or adding the new MassDelta to the end of the bin's linked list of MassDeltaNodes:
+            for (Fudge = max(0, Delta - FudgeMax); Fudge < min(GlobalOptions->DeltaBinCount, Delta + FudgeMax + 1); Fudge++)
+            {
+                NewNode = (MassDeltaNode*)calloc(1, sizeof(MassDeltaNode));
+                NewNode->Delta = &MassDeltas[AA][MassDeltaIndex];
+                //NewNode->RealDelta = NewNode->Delta->RealDelta;
+                OldNode = MassDeltaByMass[AA][Fudge];
+                if (!OldNode)
+                {
+                    MassDeltaByMass[AA][Fudge] = NewNode;
+                }
+                else
+                {
+                    while (OldNode->Next)
+                    {
+                        OldNode = OldNode->Next;
+                    }
+                    OldNode->Next = NewNode;
+                }
+            }
+        }
+    }
+}
+
+void debugMassDeltaByMass()
+{
+
+  int AA, MassDeltaIndex,Fudge,Delta;
+
+  int FudgeMax = 1 + (GlobalOptions->ParentMassEpsilon / DALTON);
+  printf("MassDeltaByMass:\n");
+  for(AA=0; AA < AMINO_ACIDS; AA++)
+    {
+
+      for(MassDeltaIndex=0; MassDeltaIndex < GlobalOptions->DeltaBinCount; MassDeltaIndex++)
+	{
+	  if (!MassDeltas[AA][MassDeltaIndex].Flags)
+            {
+                // Null array entry.
+                break;
+            }
+	  ROUND_MASS_TO_DELTA_BIN(MassDeltas[AA][MassDeltaIndex].RealDelta, Delta);
+            // Add our MassDelta to the bin (and to neighboring bins), either filling the bin
+            // or adding the new MassDelta to the end of the bin's linked list of MassDeltaNodes:
+            for (Fudge = max(0, Delta - FudgeMax); Fudge < min(GlobalOptions->DeltaBinCount, Delta + FudgeMax + 1); Fudge++)
+	      {
+		MassDelta * currDelta = MassDeltaByMass[AA][Fudge];
+		printf("[%c][%d][%d] : Delta=%d,RealDelta=%d,Name=%s,Index=%d\n",(char)(AA+'A'),MassDeltaIndex,Fudge,currDelta->Delta,currDelta->RealDelta,currDelta->Name,currDelta->Index);
+	      }
+	}
+    }
+}
+		
+
+
+// Read, from the binary file Mutations.dat, the definitions of all mass modifications we will consider.
+// (It's faster to consider a large but LIMITED set of modifications than to consider every feasible value
+// of delta.  Also, this limited set lets us assign a SCORE and a NAME to each delta, which is very useful)
+// The file Mutations.dat is written out by the scaffold script PrepBlosum.py 
+// Any mass delta with Flags == 0 is a dummy record, which is included simply to pad out the 
+// array to a uniform size; such deltas should *never* actually be used! 
+// If ReadFlag is false, then don't actually read anything from a file - just init the structure.
+// After calling this, the caller should also call InitMassDeltaByMass to init the hash.
+//ASSUMPTION: If ReadFlag is true, then we are reading from a mutations file and we only look for 26 mutations!
+void LoadMassDeltas(char* FileName, int ReadFlag)
+{
+    int AA;
+    int DeltaIndex;
+    FILE* MassDeltaFile;
+
+    int ScaledMassDelta;
+    int Bin;
+    float RealMassDelta;
+    float Score;
+    char crapola[21];
+    int ModFlags = DELTA_FLAG_VALID;
+
+    
+
+    FreeMassDeltas(); // Free up any pre-conceived notions
+
+    MassDeltas = (MassDelta**)calloc(AMINO_ACIDS, sizeof(MassDelta*));
+    for (AA = 0; AA < AMINO_ACIDS; AA++)
+    {
+        MassDeltas[AA] = (MassDelta*)calloc(GlobalOptions->DeltasPerAA, sizeof(MassDelta));
+    }
+    if (!ReadFlag)
+    {
+        // That was a freebie.
+        return;
+    }
+    if (!FileName || !*FileName)
+    {
+        // No file to open.
+        return;
+    }
+    MassDeltaFile = fopen(FileName, "rb");
+    if (!MassDeltaFile)
+    {
+        printf("Error: Unable to open mutation data file '%s'", FileName);
+        return;
+    }
+    
+    AllPTModCount = 0;
+    
+    for (AA = 0; AA < AMINO_ACIDS; AA++)
+    {
+        MassDeltas[AA] = (MassDelta*)calloc(GlobalOptions->DeltasPerAA, sizeof(MassDelta));
+        //for (DeltaIndex = 0; DeltaIndex < AMINO_ACIDS; DeltaIndex++)
+	//for (DeltaIndex = 0; DeltaIndex < GlobalOptions->DeltasPerAA; DeltaIndex++)
+	DeltaIndex = 0;
+	//printf("DeltasPerAA: %d\n",GlobalOptions->DeltasPerAA);
+	while(DeltaIndex < GlobalOptions->DeltasPerAA)
+	  {
+            ReadBinary(&ScaledMassDelta, sizeof(int), 1, MassDeltaFile);
+	    ReadBinary(&Score, sizeof(float), 1, MassDeltaFile);
+
+	    if(Score < GlobalOptions->MinLogOddsForMutation)
+	      {
+		
+		ReadBinary(crapola,sizeof(char),20,MassDeltaFile);
+		//printf("NEC_DEBUG: Found a mutation with too small a log odds %f:%s\n",Score,crapola);
+		ReadBinary(&crapola,sizeof(int),1,MassDeltaFile);
+		ReadBinary(&crapola,sizeof(char),1,MassDeltaFile);
+		DeltaIndex += 1;
+		continue;
+		
+	      }
+	    
+	    MassDeltas[AA][DeltaIndex].RealDelta = ScaledMassDelta;
+	    RealMassDelta = ((float)(ScaledMassDelta))/MASS_SCALE;
+	    ROUND_MASS_TO_DELTA_BIN(RealMassDelta, Bin);
+            MassDeltas[AA][DeltaIndex].Delta = Bin;
+	    MassDeltas[AA][DeltaIndex].Score = Score;
+            
+	    ReadBinary(&MassDeltas[AA][DeltaIndex].Name, sizeof(char), 20, MassDeltaFile);
+            //printf("Found a good score for %f:%s with mass %d\n",Score,MassDeltas[AA][DeltaIndex].Name,ScaledMassDelta);
+	    ReadBinary(&MassDeltas[AA][DeltaIndex].Flags, sizeof(int), 1, MassDeltaFile);
+            ReadBinary(&MassDeltas[AA][DeltaIndex].Amino, sizeof(char), 1, MassDeltaFile);
+	    MassDeltas[AA][DeltaIndex].Flags = ModFlags;
+	    MassDeltas[AA][DeltaIndex].Index = AllPTModCount;
+	    MassDeltaByIndex[AA * MAX_PT_MODTYPE + AllPTModCount] = &MassDeltas[AA][DeltaIndex];
+	    MassDeltaByIndex[MDBI_ALL_MODS * MAX_PT_MODTYPE + AllPTModCount] = &MassDeltas[AA][DeltaIndex];
+	    
+	    AllKnownPTMods[AllPTModCount].Mass = ScaledMassDelta;
+	    AllKnownPTMods[AllPTModCount].Flags = ModFlags;
+	    AllKnownPTMods[AllPTModCount].Allowed[AA] = 1;
+	    strncpy(AllKnownPTMods[AllPTModCount].Name,MassDeltas[AA][DeltaIndex].Name,5);
+	    g_PTMLimit[AllPTModCount] = 1;
+	    AllPTModCount ++;
+	    DeltaIndex += 1;
+	    
+	  }
+    }
+    fclose(MassDeltaFile);
+    // The caller should now invoke InitMassDeltaByMass()
+    printf("Found %d total PTMs\n",AllPTModCount);
+}
+
+// Enrich MassDeltas[] to include one modification for any (reasonable) mass change applicable to any
+// amino acid.  Also, update InitMassDeltaByMass()
+void AddBlindMods()
+{
+    int AA;
+    int DeltaMass;
+    int Bin;
+    int FoundFlag;
+    int Index;
+    int MaxDeltaMass;
+    //
+    for (AA = 0; AA < AMINO_ACIDS; AA++)
+    {
+        DeltaMass = PeptideMass['A' + AA];
+        if (!DeltaMass)
+        {
+            continue; // bogus amino like B or Z
+        }
+        DeltaMass = (DeltaMass / 1000) * 1000;
+        // The largest *negative* modification permitted is one that takes us down to the mass of glycine:
+        DeltaMass = GLYCINE_MASS - DeltaMass;
+	if(DeltaMass < GlobalOptions->MinPTMDelta * MASS_SCALE)
+	  DeltaMass = GlobalOptions->MinPTMDelta * MASS_SCALE;
+        MaxDeltaMass = GlobalOptions->MaxPTMDelta * MASS_SCALE;
+
+	//printf("Min delta: %d\n",DeltaMass);
+	//printf("Max delta: %d\n",MaxDeltaMass);
+        while (DeltaMass < MaxDeltaMass)
+        {
+            // Don't add a mutation for mass delta ~0:
+            if (abs(DeltaMass) < MASS_SCALE)
+            {
+                DeltaMass += MASS_SCALE;
+                continue;
+            }
+            ROUND_MASS_TO_DELTA_BIN(DeltaMass, Bin);
+            FoundFlag = 0;
+            // If we already know a PTM that matches this mass closely enough, don't add another:
+            for (Index = 0; Index < GlobalOptions->DeltasPerAA; Index++)
+            {
+                if (!MassDeltas[AA][Index].Flags)
+                {
+                    break;
+                }
+                if (abs(MassDeltas[AA][Index].RealDelta - DeltaMass) < HALF_DALTON)
+                {
+                    FoundFlag = 1;
+                    break;
+                }
+            }
+            if (!FoundFlag)
+            {
+	      
+                MassDeltas[AA][Index].RealDelta = DeltaMass;
+                MassDeltas[AA][Index].Delta = Bin;
+                MassDeltas[AA][Index].Flags = 1;
+                MassDeltas[AA][Index].Score = -1; // Somewhat magical score!
+                sprintf(MassDeltas[AA][Index].Name, "%+d", DeltaMass / MASS_SCALE);
+		//printf("MassDeltas[%c][%d].Delta = %d\n",(char)(AA+'65'),Index,Bin);
+		//printf("Name=%s\n",MassDeltas[AA][Index].Name);
+	    }
+            DeltaMass += DALTON;
+        }
+    }
+    InitMassDeltaByMass();
+}
+
+// For development only: Print out the scores of all PRMs, as well as the b and y scores
+// and witness scores from which the PRMScores were derived.  Requires some slow
+// business to set Spectrum->PRMDebugStrings, and so is not enabled in normal builds.
+void DebugPrintPRMScores(MSSpectrum* Spectrum, SpectrumTweak* Tweak)
+{
+    FILE* PRMFile = NULL;
+    int PRM;
+    ///
+    PRMFile = fopen("PRMScores.xls", "w");
+    if (!PRMFile)
+    {
+        printf("NO DEBUG PRINT OF PRM SCORES DONE.\n");
+        return;
+    }
+    fprintf(PRMFile, "#PRM\tMass\tScore\tBScore\tYScore\tWitnessScore\n");
+    for (PRM = 0; PRM < Tweak->PRMScoreMax; PRM++)
+    {
+        fprintf(PRMFile, "%d\t%.2f\t%d\t", PRM, PRM / 10.0, Tweak->PRMScores[PRM]);
+        //fprintf(PRMFile, "%s\n", Spectrum->PRMDebugStrings[PRM]);
+        fprintf(PRMFile, "\n");
+    }
+    fclose(PRMFile);
+}
+
+// When doing 2-mutant extension, MAX_RIGHT_EXTENSIONS needs to be large:
+#define MAX_RIGHT_EXTENSIONS 512
+Peptide LeftExtensions[MAX_RIGHT_EXTENSIONS];
+int LeftExtensionCount;
+Peptide RightExtensions[MAX_RIGHT_EXTENSIONS];
+int RightExtensionCount;
+
+Peptide* Add1ModMatch(SearchInfo* Info, char* Buffer, int BufferLength, int SuffixEndPos, int SuffixStartPos, int PrefixLength, 
+    int Score, MassDelta* Delta, SpectrumTweak* Tweak, int FilePos, char ExtraPrefixChar)
+{
+    Peptide* Match;
+    int Length;
+    int Pos;
+    MSSpectrum* Spectrum = Info->Spectrum;
+    //
+    Length = SuffixEndPos - SuffixStartPos + PrefixLength + 1;
+    Match = NewPeptideNode();
+    Match->Tweak = Tweak;
+    strncpy(Match->Bases, Buffer + SuffixStartPos - PrefixLength, Length);
+
+    Match->InitialScore = Score;
+    Match->RecordNumber = Info->RecordNumber;
+    Match->FilePos = FilePos + SuffixStartPos - PrefixLength;
+    if (SuffixStartPos - PrefixLength > 0)
+    {
+        Match->PrefixAmino = Buffer[SuffixStartPos - PrefixLength - 1];
+    }
+    else
+    {
+        Match->PrefixAmino = ExtraPrefixChar;
+    }
+    Pos = SuffixStartPos - PrefixLength + Length;
+    if (Pos < BufferLength)
+    {
+        Match->SuffixAmino = Buffer[Pos];
+    }
+    if (Delta)
+    {
+        Match->AminoIndex[0] = PrefixLength;
+        Match->ModType[0] = Delta;
+    }
+    Match->DB = Info->DB;
+    GetPeptideParentMass(Match);
+    return StoreSpectralMatch(Spectrum, Match, Length, 0);
+}
+
+// SeekMatch1PTM performs a blind search with at most one PTM permitted.
+// Schematic of SeekMatch1PTM:
+//                SuffixStartPos = PrefixEndPos, PTM attaches here
+//               /
+//              / SuffixEndPos
+//             / / 
+//            * *  
+//  IKKWLSLPGEMTRPLIL
+//     *       
+//      \--PrefixStartPos
+
+// Kludge: If Buffer points to the middle of a long peptide,
+// ExtraPrefixChar is the character that precedes Buffer.
+#define MAX_1MOD_PEPTIDE_LEN 64
+
+int SeekMatch1PTM(SearchInfo* Info, char* Buffer, int BufferLen, int MatchMass, int ScoreToBeat,
+                  SpectrumTweak* Tweak, int FilePos, char ExtraPrefixChar)
+{
+    static int* PrefixScores = NULL;
+    static int* PrefixMasses = NULL;
+    int PrefixStartPos;
+    int PrefixEndPos;
+    int PRM;
+    int PRMBin;
+    int Score;
+    int MatchScore;
+    int MaxPrefix;
+    int SkipBases;
+    int ArrayIndex;
+    int PrefixLength;
+    int MaxPrefixLength;
+    int MinPossibleDelta = max(-130000,GlobalOptions->MinPTMDelta*MASS_SCALE);
+    int MaxPossibleDelta = GlobalOptions->MaxPTMDelta * MASS_SCALE;
+    int MaxMass = MatchMass + GlobalOptions->ParentMassEpsilon + MaxPossibleDelta;
+    int Delta;
+    int DeltaBin;
+    MassDeltaNode* DeltaNode;
+    char AA;
+    int AAIndex;
+    int AbsSkew;
+    int AAMass = 0;
+    int Skew;
+    int SuffixEndPos;
+    int SuffixStartPos;
+    int MatchScoreWithDelta;    
+    Peptide* Match;
+    MSSpectrum* Spectrum = Info->Spectrum;
+
+    //printf("SeekMatch1PTM:\n");
+    //printf("MinPTM: %d\n",MinPossibleDelta);
+    //printf("MaxPTM: %d\n",MaxPossibleDelta);
+    //
+    if (!BufferLen)
+    {
+        return 1;
+    }
+    if (!PrefixScores)
+    {
+        PrefixScores = (int*)calloc(512 * MAX_1MOD_PEPTIDE_LEN, sizeof(int));
+        PrefixMasses = (int*)calloc(512 * MAX_1MOD_PEPTIDE_LEN, sizeof(int));
+    }
+    // The prefix of our peptide will extend from 
+    // PrefixStartPos...PrefixEndPos, NOT including PrefixEndPos
+    
+    // By default, we cover up to 450 bases in each call.  If we hit the end of a protein, we stop there
+    // and handle the next protein in the next call to this function.
+    SkipBases = min(BufferLen, 450); 
+    for (PrefixStartPos = 0; PrefixStartPos < SkipBases; PrefixStartPos++)
+    {
+        if (Buffer[PrefixStartPos]=='*')
+        {
+            SkipBases = PrefixStartPos + 1;
+            break;
+        }
+        PRM = 0;
+        Score = 0;
+        MaxPrefix = min(SkipBases, PrefixStartPos + MAX_1MOD_PEPTIDE_LEN);
+        for (PrefixEndPos = PrefixStartPos; PrefixEndPos < MaxPrefix; PrefixEndPos++)
+        {
+            ArrayIndex = PrefixEndPos * MAX_1MOD_PEPTIDE_LEN + (PrefixEndPos - PrefixStartPos);
+            if (ArrayIndex < 0 || ArrayIndex > 512 * MAX_1MOD_PEPTIDE_LEN)
+            {
+                printf("** error: Array index for prefix is %d\n", ArrayIndex);
+            }
+            PrefixScores[ArrayIndex] = Score;
+            PrefixMasses[ArrayIndex] = PRM;
+            //printf("%d: Prefix %d-%d score %d PRM %d\n", ArrayIndex, PrefixStartPos, PrefixEndPos, Score, PRM); //Verbose1Mod
+            if (PRM > MaxMass)
+            {
+                break;
+            }
+            AAMass = PeptideMass[Buffer[PrefixEndPos]];
+            if (AAMass)
+            {
+                PRM += AAMass;
+            }
+            else
+            {
+                Score = -9999999;
+            }
+            if (PRM > MaxMass)
+            {
+                // Modless prefix is too long!
+                Score = -9999999;
+                break;
+            }
+            else
+            {
+                PRMBin = (PRM + 50) / 100;
+                if (PRMBin >= 0 && PRMBin < Tweak->PRMScoreMax)
+                {
+                    Score += Tweak->PRMScores[PRMBin];
+                }
+            }
+        }
+    }
+    // Now that the prefix table's complete, consider all possible suffixes.
+    // The suffix of our peptide will extend from SuffixStartPos...SuffixEndPos, INCLUSIVE.
+    for (SuffixEndPos = SkipBases - 1; SuffixEndPos > 0; SuffixEndPos--)
+    {
+        //printf("Try ending at pos'n %d (%c)\n", SuffixEndPos, Buffer[SuffixEndPos]); //Verbose1Mod
+        PRM = MatchMass;
+        Score = 0;
+        if (Spectrum->Node->LastMatch)
+        {
+            ScoreToBeat = Spectrum->Node->LastMatch->InitialScore;
+        }
+        for (SuffixStartPos = SuffixEndPos; SuffixStartPos >= 0; SuffixStartPos--)
+        {   
+            // Grow the c-terminal suffix by one residue:
+            AA = Buffer[SuffixStartPos];
+            AAIndex = AA - 'A';
+            AAMass = PeptideMass[AA];
+            if (AAMass)
+            {
+                PRM -= AAMass;
+            }
+            else
+            {
+                break; // bogus AA encountered
+            }
+            
+	    //NEC_DEBUG
+            //printf("Suffix %d (%c) to %d (%c), mass remaining %.2f\n", SuffixStartPos, Buffer[SuffixStartPos], SuffixEndPos, Buffer[SuffixEndPos], PRM / 1000.0); //Verbose1Mod
+            //if (PRM < -GlobalOptions->ParentMassEpsilon)
+	    if(PRM < MinPossibleDelta)
+            {
+	      
+                break; // modless suffix is too long!
+            }
+            // Try to hook up a to prefix:
+            ArrayIndex = SuffixStartPos * MAX_1MOD_PEPTIDE_LEN;
+            MaxPrefixLength = min(MAX_1MOD_PEPTIDE_LEN, SuffixStartPos + 1);
+            for (PrefixLength = 0; PrefixLength < MaxPrefixLength; PrefixLength++)
+            {
+                Delta = PRM - PrefixMasses[ArrayIndex];
+                if (Delta < MinPossibleDelta)
+                {
+                    break;
+                }
+                if (Delta < MaxPossibleDelta)
+                {
+                    MatchScore = Score + PrefixScores[ArrayIndex];
+                    //printf("Prefix %d-%d, suffix %d-%d, delta %.2f, score %d\n", SuffixStartPos - PrefixLength, SuffixStartPos, SuffixStartPos, SuffixEndPos, Delta / (float)DALTON, MatchScore);
+                    if (MatchScore > ScoreToBeat)
+                    {
+                        // Look for the delta that can hook these together:
+                        if (abs(Delta) < GlobalOptions->ParentMassEpsilon)
+                        {
+                            Match = Add1ModMatch(Info, Buffer, BufferLen, SuffixEndPos, SuffixStartPos, PrefixLength, MatchScore, NULL, Tweak, FilePos, ExtraPrefixChar);
+                            //after every call to add a match, the ScoreToBeat MUST be updated.
+                            ScoreToBeat = Spectrum->Node->LastMatch->InitialScore;
+                        }
+                        else
+                        {
+                            ROUND_MASS_TO_DELTA_BIN(Delta, DeltaBin);
+                            DeltaNode = MassDeltaByMass[AAIndex][DeltaBin];
+                            while (DeltaNode)
+                            {
+                                Skew = Delta - DeltaNode->Delta->RealDelta;
+                                AbsSkew = abs(Skew);
+                                if (AbsSkew <= GlobalOptions->Epsilon)
+                                {                                    
+                                    MatchScoreWithDelta = MatchScore + (int)(DeltaNode->Delta->Score * DELTA_SCORE_SCALER);
+                                    if (MatchScoreWithDelta > ScoreToBeat)
+                                    {
+                                        Match = Add1ModMatch(Info, Buffer, BufferLen, SuffixEndPos, SuffixStartPos, PrefixLength, MatchScoreWithDelta, DeltaNode->Delta, Tweak, FilePos, ExtraPrefixChar);
+                                        ScoreToBeat = Spectrum->Node->LastMatch->InitialScore;
+                                    }
+                                }
+                                DeltaNode = DeltaNode->Next;
+                            }
+                            // If the modification mass was small, then ALSO try the unmodified peptide:
+                            if (abs(Delta) < 5 * DALTON)
+                            {
+                                Add1ModMatch(Info, Buffer, BufferLen, SuffixEndPos, SuffixStartPos, PrefixLength, MatchScore, NULL, Tweak, FilePos, ExtraPrefixChar);
+                                ScoreToBeat = Spectrum->Node->LastMatch->InitialScore;
+
+                            }
+                        }
+                    }
+                }
+                ArrayIndex++;
+            }
+            // If we didn't just link up, then accumulate some score:
+            if (PRM >= 0)
+            {
+                Score += Tweak->PRMScores[MASS_TO_BIN(PRM)];
+                //printf("Accumulate score %.2f from PRM %d\n", Tweak->PRMScores[MASS_TO_BIN(PRM)], PRM);
+            }
+        } // SuffixEndPos loop
+    } // SuffixStartPos loop
+    return SkipBases;
+
+}
+
+int* PTMScoreTable = NULL;
+int* PrevCellIndexTable = NULL;
+MassDelta** DeltaTable = NULL;
+int* MassDeltaTable = NULL;
+
+#define DB_BUFFER_SIZE 1024000
+#define DB_SHUNT_BOUNDARY 900000
+#define DB_READ_BOUNDARY 900000
+
+// Search a database, using *no* tag-based filtering at all.  This is much slower than searching
+// with tag-based filters, but also more sensitive, particularly since we haven't handled the
+// problem of tagging in the presence of mutations.  
+void SearchDatabaseTagless(SearchInfo* Info, int MaxMods, int VerboseFlag, SpectrumTweak* Tweak)
+{
+    static char* Buffer = NULL;  // will be big
+    int IsEOF = 0;
+    FILE* DBFile;
+    int FilePos = 0;
+    int BufferEnd = 0;
+    int BufferPos = 0;
+    int BytesRead;
+    MSSpectrum* Spectrum = Info->Spectrum;
+    // We require all peptide candidates to be long enough to be meaningful (at least 500 Da, or large
+    // enough to equal the parent mass after maximum mod mass, whichever is largest)
+    // We also stop considering peptide candidates after they are too long to match
+    // the spectrum (even after deducting some mass due to modifications)
+    int ParentResidueMass = Spectrum->ParentMass - PARENT_MASS_BOOST;
+    int ScoreToBeat = -999999;
+    int SkipBases;
+    char PrefixChar;
+    //
+    if (!Buffer)
+    {
+        Buffer = (char*)malloc(sizeof(char) * DB_BUFFER_SIZE);
+        if (!Buffer)
+        {
+            printf("** ERROR: Unable to allocate buffer in SearchDatabaseTagless()!\n");
+            return;
+        }
+    }
+
+    // Ensure that the PRM scores of this spectrum are set, so that we can score candidates:
+    if (!Tweak->PRMScores)
+    {
+        if (VerboseFlag)
+        {
+            printf("[V] SetPRMScores()\n");
+        }
+        SetSpectrumPRMScores(Spectrum, Tweak);
+    }
+    // Open the database, and start reading:
+
+    DBFile = Info->DB->DBFile;
+
+    //DebugPrintPRMScores(Spectrum, Tweak); 
+    Info->RecordNumber = 0;
+    while (1)
+    {
+        if (VerboseFlag)
+        {
+            printf("[V] Bufferpos %d BufferEnd %d IsEOF %d FilePos %d Record# %d\n", BufferPos, BufferEnd, IsEOF, FilePos, Info->RecordNumber);
+        }
+
+        // Shunt bases toward front of buffer:
+        if (BufferPos > DB_SHUNT_BOUNDARY)
+        {
+            memmove(Buffer, Buffer + BufferPos, DB_BUFFER_SIZE - BufferPos);
+            BufferEnd = DB_BUFFER_SIZE - BufferPos;
+            BufferPos = 0;
+        }
+        // Read more bases:
+        if (!IsEOF && BufferEnd < DB_READ_BOUNDARY)
+        {
+            BytesRead = ReadBinary(Buffer + BufferEnd, sizeof(char), DB_BUFFER_SIZE - BufferEnd, DBFile);
+            BufferEnd += BytesRead;
+            if (!BytesRead)
+            {
+                IsEOF = 1;
+            }
+        }
+        if (BufferPos >= BufferEnd) // hit the end if the database
+        {
+            break;
+        }
+
+        // If this isn't an amino acid, skip onward:
+        if (!PeptideMass[Buffer[BufferPos]])
+        {
+            BufferPos++;
+            FilePos++;
+            continue;
+        }
+        if (Buffer[BufferPos]=='*')
+        { 
+            BufferPos++;
+            FilePos++;
+            Info->RecordNumber++;
+            continue;
+        }
+        // Try to find peptide matches from a prefix of Buffer[BufferPos:]
+        if (MaxMods > 2)
+        {
+            // The SLOW way!
+            SkipBases = MSAlignmentGeneral(Info, Buffer + BufferPos, BufferEnd - BufferPos, ParentResidueMass, 
+                MaxMods, ScoreToBeat, FilePos, Tweak);
+            if (VerboseFlag)
+            {
+                printf("[V] General() return.  SkipBases %d\n", SkipBases);
+            }
+
+        }
+        else if (MaxMods > 1)
+        {
+            // Extend into a match, possibly up to 2 mods.  
+            SkipBases = ExtendMatchRightwardDuo(Info, Buffer + BufferPos, BufferEnd - BufferPos, ParentResidueMass, 
+                min(2, MaxMods), ScoreToBeat, FilePos, Tweak);
+        }
+        else
+        {
+            // Extend into a match using at most one mod.  
+            if (BufferPos)
+            {
+                PrefixChar = Buffer[BufferPos - 1];
+            }
+            else
+            {
+                PrefixChar = '*';
+            }
+            SkipBases = SeekMatch1PTM(Info, Buffer + BufferPos, BufferEnd - BufferPos, ParentResidueMass, ScoreToBeat, Tweak, FilePos, PrefixChar);
+        }
+        // RightExtensionCount is set to -1 if there's an error and this spectrum can't be searched.
+        if (RightExtensionCount < 0)
+        {
+            break;
+        }
+        BufferPos += SkipBases;
+        FilePos += SkipBases;
+        if (Buffer[BufferPos-1] == '*')
+        {
+            Info->RecordNumber++;
+        }
+        if (Spectrum->Node->MatchCount == GlobalOptions->StoreMatchCount)
+        {
+            ScoreToBeat = Spectrum->Node->LastMatch->InitialScore;
+        }
+        
+    }
+    SafeFree(Buffer);
+    Buffer = NULL;
+
+    // At this point, we have a list of candidates.  They've been quick-scored, but we can sort them better if
+    // we score them more meticulously.  The *caller* will call ScoreSpectralMatches(Spectrum) to re-score them
+    // (we could do it here, but that would be wrong in the MultiCharge case)
+    //fclose(DBFile);
+}
+
+void DebugPrintMatch(Peptide* Match)
+{
+    int ModIndex;
+    char* Amino;
+    int Mass = 0;
+    printf("Match '%s' ", Match->Bases);
+    // Show the mods:
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (Match->AminoIndex[ModIndex] < 0)
+        {
+            break;
+        }
+        printf(" %c%d:%s(%.2f)", Match->Bases[Match->AminoIndex[ModIndex]], Match->AminoIndex[ModIndex],
+            Match->ModType[ModIndex]->Name, Match->ModType[ModIndex]->RealDelta/100.0);
+        Mass += Match->ModType[ModIndex]->RealDelta;
+    }
+    for (Amino = Match->Bases; *Amino; Amino++)
+    {
+        Mass += PeptideMass[*Amino];
+    }
+    printf(" mass %.2f score %d:%.3f dcn%.2f dcno%.2f\n", Mass/(float)MASS_SCALE, Match->InitialScore, Match->MatchQualityScore, Match->DeltaCN, Match->DeltaCNOther);
+}
+
+// Print out a list of matches for the spectrum node (Spectrum->FirstMatch through Spectrum->LastMatch).
+void DebugPrintMatchList(SpectrumNode* Spectrum)
+{
+    Peptide* Match;
+    //
+    printf("Spectrum has %d matches:\n", Spectrum->MatchCount);
+    for (Match = Spectrum->FirstMatch; Match; Match = Match->Next)
+    {
+        DebugPrintMatch(Match);
+    }
+}
+
+// Re-score spectral matches.  The matches in the list Spectrum->FirstMatch have been 
+// quick-scored, but we can sort them better if we score them more meticulously.  
+// Let's do so, and re-sort the list based on the new scores.
+void MQScoreSpectralMatches(SpectrumNode* Node)
+{
+    Peptide* PrevMatch;
+    Peptide* Match;
+    int OldScore;
+    int VerboseFlag = 0;
+    MSSpectrum* Spectrum = Node->Spectrum;
+    //
+    if (!Node->FirstMatch)
+    {
+        return; // that was easy - we scored 0 of 0 :)
+    }
+    PrevMatch = Node->FirstMatch;
+
+    Match = PrevMatch->Next;
+    Node->FirstMatch = NULL;
+    Node->LastMatch = NULL;
+    Node->MatchCount = 0;
+    while (PrevMatch)
+    {
+        PrevMatch->Prev = NULL;
+        PrevMatch->Next = NULL;
+        OldScore = PrevMatch->InitialScore;
+	
+        ComputeMQScoreFeatures(Spectrum, PrevMatch, PrevMatch->ScoreFeatures, VerboseFlag);
+#ifdef MQSCORE_USE_SVM
+        PrevMatch->MatchQualityScore = SVMComputeMQScore(Spectrum, PrevMatch, PrevMatch->ScoreFeatures);
+#else
+        PrevMatch->MatchQualityScore = LDAComputeMQScore(Spectrum, PrevMatch, PrevMatch->ScoreFeatures);
+#endif
+        StoreSpectralMatch(Spectrum, PrevMatch, strlen(PrevMatch->Bases), 1);
+        PrevMatch = Match;
+        if (!Match)
+        {
+            break;
+        }
+        Match = Match->Next;
+    }
+    //SetMatchDeltaCN(Spectrum);
+}
+
+void PrunePoorGraphNodes(TagGraph* Graph)
+{
+    int NodeIndex = 0;
+    float* NodeScores;
+    float CutoffNodeScore;
+    TagGraphNode* Node;
+    TagGraphNode* NextNode = NULL;
+    TagGraphEdge* Edge;
+    TagGraphEdge* NextEdge = NULL;
+    TagGraphEdge* PrevEdge = NULL;
+    //
+    // Write the node scores to array NodeScores, sort them, and select the cutoff score.
+    NodeScores = (float*)malloc(sizeof(float) * Graph->NodeCount);
+    
+    for (NodeIndex = 0, Node = Graph->FirstNode; Node; Node = Node->Next,NodeIndex++)
+    {
+        NodeScores[NodeIndex] = Node->Score;
+    }
+    qsort(NodeScores, NodeIndex, sizeof(float), (QSortCompare)CompareFloats);
+    CutoffNodeScore = NodeScores[498]; // Allow two endpoint nodes to survive
+    SafeFree(NodeScores);
+    // Eliminate every node whose score is <= the cutoff.  Start by eliminating all EDGES to such nodes!
+    for (Node = Graph->FirstNode; Node; Node = Node->Next)
+    {
+        PrevEdge = NULL;
+        for (Edge= Node->FirstEdge; Edge; Edge = NextEdge)
+        {
+            NextEdge = Edge->Next;
+            if (Edge->ToNode->Score <= CutoffNodeScore && (Edge->ToNode->NodeType == evGraphNodeB || Edge->ToNode->NodeType == evGraphNodeY))
+            {
+                // Free this edge:
+                if (PrevEdge)
+                {
+                    PrevEdge->Next = Edge->Next;
+                }
+                if (Node->FirstEdge == Edge)
+                {
+                    Node->FirstEdge = Edge->Next;
+                }
+                if (Node->LastEdge == Edge)
+                {
+                    Node->LastEdge = PrevEdge;
+                }
+                SafeFree(Edge);
+            }
+            else
+            {
+                PrevEdge = Edge;
+            }
+        }
+    }
+    // Now free the nodes themselves:
+    for (Node = Graph->FirstNode; Node; Node = NextNode)
+    {
+        NextNode = Node->Next;
+        if (Node->Score <= CutoffNodeScore && (Node->NodeType == evGraphNodeB || Node->NodeType == evGraphNodeY))
+        {
+            if (Node->Prev)
+            {
+                Node->Prev->Next = Node->Next;
+            }
+            if (Node->Next)
+            {
+                Node->Next->Prev = Node->Prev;
+            }
+            if (Graph->FirstNode == Node)
+            {
+                Graph->FirstNode = Node->Next;
+            }
+            if (Graph->LastNode == Node)
+            {
+                Graph->LastNode = Node->Prev;
+            }
+            FreeTagGraphNode(Node);
+            Graph->NodeCount--;
+        }
+    }
+    if (Graph->NodeCount > 500)
+    {
+        printf("* ERROR: Failed to prune excess graph nodes!\n");
+    }
+    // Fix node numbering:
+    for (NodeIndex = 0, Node = Graph->FirstNode; Node; Node = Node->Next, NodeIndex++)
+    {
+        Node->Index = NodeIndex;
+    }
+    // And now, rebuild the node index:
+    TagGraphBuildNodeIndex(Graph);
+}
+
+// Called after populating the tag graph with nodes.
+// Now we add edges between any two nodes that can be linked by a JUMP (an amino acid, or 
+// an amino acid plus a decoration, or two amino acids plus 0-1 decorations)
+void TagGraphPopulateBackEdges(TagGraph* Graph)
+{
+    TagGraphNode* Node;
+    TagGraphNode* OtherNode;
+    TagGraphBackEdge* Edge;
+    TagGraphBackEdge* OldEdge;
+    int AA1;
+    int AA2;
+    int AA3;
+    int Mass;
+    int AA1Mass;
+    int AA2Mass;
+    int AA3Mass;
+    int Skew;
+    int AbsSkew;
+    int NextBackEdgeIndex = 0;
+    int BackEdgeBufferSize;
+    //
+
+    if (!Graph->NodeIndex)
+    {
+        TagGraphBuildNodeIndex(Graph);
+    }
+    SafeFree(Graph->BackEdgeBuffer);
+    BackEdgeBufferSize = min(5000000, 8420 * Graph->NodeCount);
+    Graph->BackEdgeBuffer = (TagGraphBackEdge*)calloc(BackEdgeBufferSize, sizeof(TagGraphBackEdge));
+    if (!Graph->BackEdgeBuffer)
+    {
+        printf("*** ERROR: Unable to allocate BackEdgeBuffer!\n");
+        fflush(stdout);
+    }
+    // NB: We can't easily realloc the BackEdgeBuffer, because there are many many pointers into it.  If
+    // we overflow the buffer, we just complain and then bail out to avoid crashing.
+
+    // Ensure that there aren't too many PRMNodes.  The array in ExtendMatchRightwardDuo assumes that there
+    // are at most 500.  That should be *plenty*, since at most 20-30 of them can be true.
+    if (Graph->NodeCount > 500)
+    {
+        PrunePoorGraphNodes(Graph);
+    }
+
+    for (Node = Graph->FirstNode; Node; Node = Node->Next)
+    {
+        Node->BackEdge = (TagGraphBackEdge**)calloc(AMINO_ACIDS, sizeof(TagGraphBackEdge*));
+        Node->BackEdgeDouble = (TagGraphBackEdge**)calloc(AMINO_ACIDS*AMINO_ACIDS, sizeof(TagGraphBackEdge*));
+        Node->BackEdgeTriple = (TagGraphBackEdge**)calloc(AMINO_ACIDS*AMINO_ACIDS*AMINO_ACIDS, sizeof(TagGraphBackEdge*));
+        for (AA1 = 0; AA1 < AMINO_ACIDS; AA1++)
+        {
+            AA1Mass = PeptideMass[AA1 + 'A'];
+            if (!AA1Mass)
+            {
+                continue;
+            }
+            // Try to jump back by this amino acid's mass:
+            Mass = Node->Mass - AA1Mass;
+            if (Mass < -GlobalOptions->Epsilon)
+            {
+                continue;
+            }
+            Mass = max(Mass, 0);
+            OtherNode = Graph->NodeIndex[Mass / MASS_SCALE];
+            while (OtherNode)
+            {
+                Skew = OtherNode->Mass - Mass;
+                if (Skew > GlobalOptions->Epsilon)
+                {
+                    break;
+                }
+                if (Skew < -GlobalOptions->Epsilon)
+                {
+                    OtherNode = OtherNode->Next;
+                    continue;
+                }
+                AbsSkew = abs(Skew) / 10;
+                Edge = Graph->BackEdgeBuffer + NextBackEdgeIndex;
+                NextBackEdgeIndex++;
+                if (NextBackEdgeIndex >= BackEdgeBufferSize)
+                {
+                    printf("** Too many BackEdges for buffer - bailing out!\n");
+                    return;
+                }
+                //Edge = (TagGraphBackEdge*)calloc(1, sizeof(TagGraphBackEdge));
+                Edge->FromNode = Node;
+                Edge->ToNode = OtherNode;
+                Edge->Skew = Skew;
+                Edge->Next = NULL;
+                if (AbsSkew > g_SkewPenaltyMax)
+                {
+                    Edge->Score = g_SkewPenalty[g_SkewPenaltyMax];
+                }
+                else
+                {
+                    Edge->Score = g_SkewPenalty[AbsSkew];
+                }
+                OldEdge = Node->BackEdge[AA1];
+                if (!OldEdge)
+                {
+                    Node->BackEdge[AA1] = Edge;
+                }
+                else
+                {
+                    while (OldEdge->Next)
+                    {
+                        OldEdge = OldEdge->Next;
+                    }
+                    OldEdge->Next = Edge;
+                }
+                OtherNode = OtherNode->Next;
+            }
+
+            // Try to jump back by a pair of amino acids:
+            for (AA2 = 0; AA2 < AMINO_ACIDS; AA2++)
+            {
+                AA2Mass = PeptideMass[AA2 + 'A'];
+                if (!AA2Mass)
+                {
+                    continue;
+                }
+
+                Mass = Node->Mass - AA1Mass - AA2Mass;
+                if (Mass < -GlobalOptions->Epsilon)
+                {
+                    continue;
+                }
+                Mass = max(Mass, 0);
+                OtherNode = Graph->NodeIndex[Mass / MASS_SCALE];
+                while (OtherNode)
+                {
+                    Skew = OtherNode->Mass - Mass;
+                    if (Skew > GlobalOptions->Epsilon)
+                    {
+                        break;
+                    }
+                    if (Skew < -GlobalOptions->Epsilon)
+                    {
+                        OtherNode = OtherNode->Next;
+                        continue;
+                    }
+                    AbsSkew = abs(Skew) / 10;
+
+                    Edge = Graph->BackEdgeBuffer + NextBackEdgeIndex;
+                    NextBackEdgeIndex++;
+                    if (NextBackEdgeIndex >= BackEdgeBufferSize)
+                    {
+                        printf("** Too many BackEdges for buffer - bailing out!\n");
+                        return;
+                    }
+                
+                    //Edge = (TagGraphBackEdge*)calloc(1, sizeof(TagGraphBackEdge));
+                    Edge->FromNode = Node;
+                    Edge->ToNode = OtherNode;
+                    Edge->Skew = Skew;
+                    Edge->Next = NULL;
+                    Edge->HalfMass = Node->Mass - AA1Mass;
+                    if (AbsSkew > g_SkewPenaltyMax)
+                    {
+                        Edge->Score = g_SkewPenalty[g_SkewPenaltyMax];
+                    }
+                    else
+                    {
+                        Edge->Score = g_SkewPenalty[AbsSkew];
+                    }
+                    OldEdge = Node->BackEdgeDouble[AA1 * AMINO_ACIDS + AA2];
+                    if (!OldEdge)
+                    {
+                        Node->BackEdgeDouble[AA1 * AMINO_ACIDS + AA2] = Edge;
+                    }
+                    else
+                    {
+                        while (OldEdge->Next)
+                        {
+                            OldEdge = OldEdge->Next;
+                        }
+                        OldEdge->Next = Edge;
+                    }
+                    OtherNode = OtherNode->Next;
+                }
+
+                // Triple-jump (three amino acids) 
+                for (AA3 = 0; AA3 < AMINO_ACIDS; AA3++)
+                {
+                    AA3Mass = PeptideMass[AA3+'A'];
+                    if (!AA3Mass)
+                    {
+                        continue;
+                    }
+
+                    Mass = Node->Mass - AA1Mass - AA2Mass - AA3Mass;
+                    if (Mass < -GlobalOptions->Epsilon)
+                    {
+                        continue;
+                    }
+                    Mass = max(Mass, 0);
+                    OtherNode = Graph->NodeIndex[Mass / MASS_SCALE];
+                    while (OtherNode)
+                    {
+                        Skew = OtherNode->Mass - Mass;
+                        if (Skew > GlobalOptions->Epsilon)
+                        {
+                            break;
+                        }
+                        if (Skew < -GlobalOptions->Epsilon)
+                        {
+                            OtherNode = OtherNode->Next;
+                            continue;
+                        }
+                        AbsSkew = abs(Skew) / 10;
+
+                        Edge = Graph->BackEdgeBuffer + NextBackEdgeIndex;
+                        NextBackEdgeIndex++;
+                        if (NextBackEdgeIndex >= BackEdgeBufferSize)
+                        {
+                            printf("** Too many BackEdges for buffer - bailing out!\n");
+                            return;
+                        }
+                    
+                        //Edge = (TagGraphBackEdge*)calloc(1, sizeof(TagGraphBackEdge));
+                        Edge->FromNode = Node;
+                        Edge->ToNode = OtherNode;
+                        Edge->Skew = Skew;
+                        Edge->Next = NULL;
+                        Edge->HalfMass = Node->Mass - AA1Mass;
+                        Edge->HalfMass2 = Node->Mass - AA1Mass - AA2Mass;
+                        if (AbsSkew > g_SkewPenaltyMax)
+                        {
+                            Edge->Score = g_SkewPenalty[g_SkewPenaltyMax];
+                        }
+                        else
+                        {
+                            Edge->Score = g_SkewPenalty[AbsSkew];
+                        }
+                        OldEdge = Node->BackEdgeTriple[AA1*676 + AA2*26 + AA3];
+                        if (!OldEdge)
+                        {
+                            Node->BackEdgeTriple[AA1*676 + AA2*26 + AA3] = Edge;
+                        }
+                        else
+                        {
+                            while (OldEdge->Next)
+                            {
+                                OldEdge = OldEdge->Next;
+                            }
+                            OldEdge->Next = Edge;
+                        }
+                        OtherNode = OtherNode->Next;
+                    }
+                }
+            }
+        }
+    }
+    return;
+}
+
+// The maximum dimensions of the Duo Table are 512 rows (for amino acids) and 500 columns (for the nodes).
+#define MAX_ROWS 512
+#define MAX_NODES 500
+
+// Find the end of this peptide block.  Returns 1 if the block is valid, 0 if we
+// needn't bother searching this block at all.
+int FindPeptideBlockEnd(MSSpectrum* Spectrum, char* Buffer,int BufferEnd, int* pMaxAmino, int* pReturnAmino)
+{
+    int AccumMass = 0;
+    int AminoIndex;
+    char Amino;
+    //
+    *pMaxAmino = MAX_ROWS - 1; // default;
+    *pReturnAmino = -1; // uninitialized
+    // Iterate over the amino acids, keeping track of the total mass (AccumMass), and watching for
+    // record boundaries.
+    for (AminoIndex = 0; AminoIndex < MAX_ROWS; AminoIndex++)
+    {
+        if (AminoIndex >= BufferEnd)
+        {
+            *pMaxAmino = AminoIndex;
+            *pReturnAmino = *pMaxAmino;
+            break;
+        }
+        Amino = Buffer[AminoIndex];
+        if (Amino == '*')
+        {
+            // No peptide can span record boundaries, so we'll stop the block here.
+            *pMaxAmino = AminoIndex;// + 1;
+            *pReturnAmino = *pMaxAmino;
+            break;
+        }
+        if (!PeptideMass[Amino])
+        {
+            // A bogus amino acid in the database.  Stop the block here.
+            *pMaxAmino = AminoIndex;// + 1;
+            *pReturnAmino = *pMaxAmino;
+            break;
+        }
+        AccumMass += PeptideMass[Amino];
+        if (AminoIndex >= MAX_ROWS-1)
+        {
+            // Our block is as large as it can get.
+            *pMaxAmino = AminoIndex-1;
+            *pReturnAmino = *pMaxAmino - 20;
+            break;
+        }
+    }
+    if (*pMaxAmino < 5)
+    {
+        // Not enough amino acids to make a reasonable peptide candidate, so just exit.
+        // (The longest we could get is 4aa, which is too small)
+        *pReturnAmino = *pMaxAmino;
+        return 0; 
+    }
+    // Check to see whether the amino acids we've got are large enough - with PTMs included - to match the target:
+    AccumMass += GlobalOptions->MaxPTMDelta*100*2 + PARENT_MASS_BOOST + GlobalOptions->ParentMassEpsilon;
+    if (AccumMass < Spectrum->ParentMass)
+    {
+        // There's not enough peptide sequence left to match our target.
+        *pReturnAmino = *pMaxAmino;
+        return 0; 
+    }
+    if (*pReturnAmino < 0)
+    {
+        // Shift forward by most of the length of the block.  (Leave some overlap, because a valid peptide
+        // may start near the end of block #1)
+        *pReturnAmino = max(1, *pMaxAmino - 20);
+    }
+    return 1;
+}
+
+void DebugPrintPrefixSuffixTable(FILE* TableFile, char* Buffer, int MaxAmino, int* ScoreTable, int* MassTable)
+{
+    int AminoIndexI;
+    int AminoIndexJ;
+    int CellIndex;
+
+    // Header line:
+    fprintf(TableFile, "<  >\t");
+    for (AminoIndexJ = 0; AminoIndexJ <= MaxAmino; AminoIndexJ++)
+    {
+        if (AminoIndexJ > 0)
+        {
+            fprintf(TableFile, "[%d %c]\t", AminoIndexJ, Buffer[AminoIndexJ-1]);
+        }
+        else
+        {
+            fprintf(TableFile, "[0]\t");
+        }
+    }
+    fprintf(TableFile, "\n");
+    // Other lines:
+    for (AminoIndexI = 0; AminoIndexI <= MaxAmino; AminoIndexI++)
+    {
+        if (AminoIndexI > 0)
+        {
+            fprintf(TableFile, "[%d %c]\t", AminoIndexI, Buffer[AminoIndexI-1]);
+        }
+        else
+        {
+            fprintf(TableFile, "[0]\t");
+        }
+
+        for (AminoIndexJ = 0; AminoIndexJ <= MaxAmino; AminoIndexJ++)
+        {
+            if (AminoIndexJ < AminoIndexI)
+            {
+                fprintf(TableFile, "\t");
+                continue;
+            }
+            CellIndex = AminoIndexI*MAX_ROWS + AminoIndexJ;
+            fprintf(TableFile, "%d : %d\t", MassTable[CellIndex], ScoreTable[CellIndex]);
+        }
+        fprintf(TableFile, "\n");
+    }
+}
+
+void DebugPrintPrefixSuffixTables(int MaxAmino, char* Buffer, 
+                                  int* PrefixTable, int* SuffixTable, int* PrefixMassTable, int* SuffixMassTable)
+{
+    FILE* TableFile;
+    TableFile = fopen("PrefixSuffix.xls", "w");
+    if (!TableFile)
+    {
+        return;
+    }
+    fprintf(TableFile, "Prefix table:\n");
+    DebugPrintPrefixSuffixTable(TableFile, Buffer, MaxAmino, PrefixTable, PrefixMassTable);
+    fprintf(TableFile, "\n\nSuffix table:\n");
+    DebugPrintPrefixSuffixTable(TableFile, Buffer, MaxAmino, SuffixTable, SuffixMassTable);
+    fclose(TableFile);
+}
+
+// Fill in the score tables PrefixTable and SuffixTable, plus the mass tables PrefixMassTable and SuffixMassTable.
+// The entry PrefixTable[i, j] is the score that one obtains by matching Buffer[i..j] against the spectrum as
+// a prefix.  PrefixTable[i,i] uses one PRM score, PrefixTable[i, i + 1] uses two PRM scores, and so on.
+// Most candidate peptides will have a PrefixTable entry as part of their final score.
+void FillPrefixSuffixTables(MSSpectrum* Spectrum, SpectrumTweak* Tweak, int MatchMass, char* Buffer, int MaxAmino, 
+    int* PrefixTable, int* SuffixTable, int* PrefixMassTable, int* SuffixMassTable)
+{
+    int AminoIndexI;
+    int AminoIndexJ;
+    int CellIndex;
+    int PrevCellIndex = 0;
+    int PRM;
+
+    int MaxPRM = Tweak->PRMScoreMax;
+    //
+    // Brute force initializiation.  (Note: Don't use memset here, because setting every byte
+    // to -1 (a) is hacky, and (b) makes scores that can easily wrap around to become HUGE POSITIVE)
+    for (CellIndex = 0; CellIndex < MAX_ROWS * MAX_ROWS; CellIndex++)
+    {
+        PrefixTable[CellIndex] = FORBIDDEN_PATH;
+        SuffixTable[CellIndex] = FORBIDDEN_PATH;
+        PrefixMassTable[CellIndex] = -999999;
+        SuffixMassTable[CellIndex] = -999999;
+    }
+    
+    for (AminoIndexI = 1; AminoIndexI <= MaxAmino; AminoIndexI++)
+    {
+        for (AminoIndexJ = AminoIndexI; AminoIndexJ <= MaxAmino; AminoIndexJ++)
+        {
+            CellIndex = AminoIndexI * MAX_ROWS + AminoIndexJ;
+            /////////////////////////////
+            // Prefix table:
+            if (AminoIndexJ == AminoIndexI)
+            {
+                PrefixMassTable[CellIndex] = PeptideMass[Buffer[AminoIndexI-1]];
+            }
+            else
+            {
+                PrevCellIndex = AminoIndexI*MAX_ROWS + (AminoIndexJ-1);
+                PrefixMassTable[CellIndex] = PrefixMassTable[PrevCellIndex] + PeptideMass[Buffer[AminoIndexJ-1]];
+            }
+            PRM = MASS_TO_BIN(PrefixMassTable[CellIndex]);
+
+            // Allow PRMs that are slightly too big or small:
+            if (PRM > -PRM_ARRAY_SLACK)
+            {
+                PRM = max(PRM, 0);
+            }
+            if (PRM < MaxPRM + 5)
+            {
+                PRM = min(MaxPRM, PRM);
+            }
+            if (PRM >= 0 && PRM <= MaxPRM)
+            {
+                PrefixTable[CellIndex] = Tweak->PRMScores[PRM];
+                if (AminoIndexJ != AminoIndexI)
+                {
+                    PrefixTable[CellIndex] += PrefixTable[PrevCellIndex];
+                }
+            }
+            else
+            {
+                PrefixTable[CellIndex] = FORBIDDEN_PATH;
+                break;
+            }
+        }
+    }
+    for (AminoIndexJ = MaxAmino; AminoIndexJ; AminoIndexJ--)
+    {
+        for (AminoIndexI = AminoIndexJ; AminoIndexI; AminoIndexI--)
+        {
+            CellIndex = AminoIndexI*MAX_ROWS + AminoIndexJ;
+
+            /////////////////////////////
+            // Suffix table:
+            if (AminoIndexJ == AminoIndexI)
+            {
+                SuffixMassTable[CellIndex] = MatchMass - PeptideMass[Buffer[AminoIndexI-1]];
+            }
+            else
+            {
+                PrevCellIndex = (AminoIndexI + 1)*MAX_ROWS + AminoIndexJ;
+                SuffixMassTable[CellIndex] = SuffixMassTable[PrevCellIndex] - PeptideMass[Buffer[AminoIndexI-1]];
+            }
+            PRM = MASS_TO_BIN(SuffixMassTable[CellIndex]);
+            if (PRM > -PRM_ARRAY_SLACK)
+            {
+                PRM = max(PRM, 0);
+            }
+            if (PRM < MaxPRM+5)
+            {
+                PRM = min(MaxPRM, PRM);
+            }
+            if (PRM >= 0 && PRM <= MaxPRM)
+            {
+                SuffixTable[CellIndex] = Tweak->PRMScores[PRM];
+                if (AminoIndexI!=AminoIndexJ)
+                {
+                    SuffixTable[CellIndex] += SuffixTable[PrevCellIndex];
+                }
+            }
+            else
+            {
+                SuffixTable[CellIndex] = FORBIDDEN_PATH;
+                break;
+            }
+        }
+    }
+
+}
+
+// Print the Duo table to a file, for debugging.  (This is most useful when searching a very small database, since then
+// the table has a managable height)
+void DebugPrintDTable(MSSpectrum* Spectrum, char* Buffer, int* DTable, MassDelta** DeltaTable, int* PrevCellTable, int MaxAmino)
+{
+    int AminoIndex;
+    int NodeIndex;
+    TagGraphNode* Node;
+    FILE* DTableFile = NULL;
+    int CellIndex;
+    int PrevCellIndex = 0;
+    
+    int AminoBlockSize = Spectrum->Graph->NodeCount;
+    //
+    DTableFile = fopen("DTable.xls", "w");
+    if (!DTableFile)
+    {
+        return;
+    }
+    // Header:
+    fprintf(DTableFile, "\t");
+    for (NodeIndex = 0, Node = Spectrum->Graph->FirstNode; Node; Node = Node->Next, NodeIndex++)
+    {
+        fprintf(DTableFile, "%d (%.2f)\t", NodeIndex, Node->Mass / 100.0);
+    }
+    fprintf(DTableFile, "\n");
+    // Body:
+    for (AminoIndex = 0; AminoIndex < MaxAmino; AminoIndex++)
+    {
+        if (AminoIndex)
+        {
+            fprintf(DTableFile, "%c %d\t", Buffer[AminoIndex-1], AminoIndex);
+        }
+        else
+        {
+            fprintf(DTableFile, "%d\t", AminoIndex);
+        }
+        for (NodeIndex = 0, Node = Spectrum->Graph->FirstNode; Node; Node = Node->Next, NodeIndex++)
+        {
+            CellIndex = AminoIndex*AminoBlockSize + NodeIndex;
+            fprintf(DTableFile, "%d ", DTable[CellIndex]);
+            if (DeltaTable[CellIndex])
+            {
+                fprintf(DTableFile, "%s", DeltaTable[CellIndex]->Name);
+            }
+            PrevCellIndex = PrevCellTable[CellIndex];
+            if (PrevCellIndex >0)
+            {
+                fprintf(DTableFile, "-> (%d, %d)", PrevCellIndex/AminoBlockSize, PrevCellIndex%AminoBlockSize);
+            }
+            fprintf(DTableFile, "\t");
+        }
+        fprintf(DTableFile, "\n");
+    }
+
+    fclose(DTableFile);
+}
+
+void DebugPrintGeneralTable(MSSpectrum* Spectrum, char* Buffer, int MaxAmino, int MaxMods,
+    int* ScoreTable, int* PrevCellTable)
+{
+    int AminoIndex;
+    int NodeIndex;
+    int CellIndex;
+    int ModCountIndex;
+    FILE* DebugFile;
+    TagGraphNode* Node;
+    int AminoBlockSize;
+    int ZSize = MaxMods + 1;
+    //
+    DebugFile = fopen("DPTable.txt", "wb");
+    if (!DebugFile)
+    {
+        printf("Unable to open DPTable.txt - not debugprinting.\n");
+        return;
+    }
+    AminoBlockSize = Spectrum->Graph->NodeCount * ZSize;
+    for (ModCountIndex = 0; ModCountIndex < ZSize; ModCountIndex++)
+    {
+        fprintf(DebugFile, "\nZ = %d\n", ModCountIndex);
+        /////////////////////////////////
+        // Column headers:
+        fprintf(DebugFile, "\t");
+        for (AminoIndex = 0; AminoIndex < MaxAmino; AminoIndex++)
+        {
+            fprintf(DebugFile, "%d\t", AminoIndex);
+        }
+        fprintf(DebugFile, "\n");
+        fprintf(DebugFile, "\t\t");
+        for (AminoIndex = 1; AminoIndex < MaxAmino; AminoIndex++)
+        {
+            fprintf(DebugFile, "%c\t", Buffer[AminoIndex - 1]);
+        }
+        fprintf(DebugFile, "\n");
+        /////////////////////////////////
+        // Body:
+        for (NodeIndex = 0, Node = Spectrum->Graph->FirstNode; Node; Node = Node->Next, NodeIndex++)
+        {
+            // Print a ROW for this node:
+            fprintf(DebugFile, "%d (%.2f)\t", NodeIndex, Node->Mass / 100.0);
+            for (AminoIndex = 0; AminoIndex < MaxAmino; AminoIndex++)
+            {
+                CellIndex = AminoIndex * AminoBlockSize + NodeIndex * ZSize + ModCountIndex;
+                fprintf(DebugFile, "%d (c%d)\t", ScoreTable[CellIndex], CellIndex);
+            }
+            fprintf(DebugFile, "\n");
+        }
+    }
+    fclose(DebugFile);
+}
+
+
+static int* PrefixTable = NULL;
+static int* SuffixTable = NULL;
+static int* PrefixMassTable = NULL;
+static int* SuffixMassTable = NULL;
+
+// MS-Alignment algorithm, general case (handles k>2).  For most purposes, this code is
+// unacceptably slow and non-selective.  But, for completeness, it is implemented.
+// In practice, one should use "mods,1" or "mods,2" and find the corpus of 
+// available PTMs that way.
+int MSAlignmentGeneral(SearchInfo* Info, char* Buffer, int BufferEnd, int MatchMass, 
+    int MaxMods, int ScoreToBeat, int FilePos, SpectrumTweak* Tweak)
+{
+    static int* PrevCellTable = NULL;
+    static int* ScoreTable = NULL;
+    // StartPointPenalty and EndPointPenalty provide a simple protease specificity.
+    int StartPointPenalty[MAX_ROWS];
+    int EndPointPenalty[MAX_ROWS];
+    int Result;
+    int ReturnAmino = -1;
+    int AminoIndex;
+    int AA;
+    int AminoBlock = 0;
+    int NodeIndex;
+    int CellIndex;
+    int SliceIndex;
+    int AA2;
+    int AA3;
+    TagGraphNode* Node;
+    TagGraphBackEdge* Edge;
+    int ModCountIndex;
+    int AminoBlockSize;
+    int ZSize = MaxMods + 1;
+    int MaxAmino;
+    int AAMass;
+    int AA2Mass;
+    int BackEdgeDoubleIndex;
+    int BackEdgeTripleIndex;
+    int PrevCellIndex;
+    int Score;
+    int PRM;
+    TagGraphNode* BackNode;
+    //char MatchBuffer[256];
+    char MatchBufferPos;
+    Peptide* Match;
+    int Mass;
+    int BackNodeIndex;
+    int BackNodeDirection;
+    int Delta;
+    int MinPossibleDelta = -130000; // W->G mutation
+    int MaxPossibleDelta = GlobalOptions->MaxPTMDelta * MASS_SCALE;
+    int DeltaBin;
+    MassDeltaNode* DeltaNode;
+    int Skew;
+    int AbsSkew;
+    int MaxPRM = Tweak->PRMScoreMax - 1;
+    int X;
+    int Y;
+    int NextY;
+    int Z;
+    int MatchPTMCount;
+    int ModIndex;
+    int TokenDropped;
+    MSSpectrum* Spectrum = Info->Spectrum;
+    //
+
+    // Allocate tables, if necessary:
+    if (!PrevCellTable)
+    {
+        PrevCellTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_ROWS * (MaxMods + 1));
+        ScoreTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_NODES * (MaxMods + 1));
+        DeltaTable = (MassDelta**)malloc(sizeof(MassDelta*) * MAX_ROWS * MAX_NODES * (MaxMods+1));
+        MassDeltaTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_NODES * (MaxMods + 1));
+    }
+
+    /////////////////////////////////////////
+    // Find MaxAmino:
+    //VerboseFlag = 1;
+    if (Info->VerboseFlag)
+    {
+        printf("[V] FindPeptideBlockEnd:\n");
+    }
+    Result = FindPeptideBlockEnd(Spectrum, Buffer, BufferEnd, &MaxAmino, &ReturnAmino);
+    if (!Result)
+    {
+        // No extension necessary.  Advance the database pointer:
+        return ReturnAmino;
+    }
+    if (Info->VerboseFlag)
+    {
+        printf("[V] FindPeptideBlockEnd: MaxAmino %d returnamino %d\n", MaxAmino, ReturnAmino);
+    }
+
+    // Apply a slap-on-the-wrist for using non-tryptic peptides:
+    for (AminoIndex = 0; AminoIndex < MaxAmino; AminoIndex++)
+    {
+        StartPointPenalty[AminoIndex] = 0;
+        if (AminoIndex)
+        {
+            AA = Buffer[AminoIndex - 1];
+            if (AA != 'R' && AA != 'K' && AA != '*')
+            {
+                StartPointPenalty[AminoIndex] = -500;
+            }
+        }
+
+        EndPointPenalty[AminoIndex] = 0;
+        if (AminoIndex)
+        {
+            AA = Buffer[AminoIndex - 1];
+            if ((AA != 'R' && AA != 'K') && (AminoIndex <= MaxAmino-1 && Buffer[AminoIndex + 1]!='*'))
+            {
+                EndPointPenalty[AminoIndex] = -500;
+            }
+        }
+    }
+    AminoBlockSize = Spectrum->Graph->NodeCount * ZSize;
+    // Loop over the d.p. table to populate scores and path.
+    // Iterate by amino acid index (row), by node (column), then by PTMCount (z).
+    for (AminoIndex = 0; AminoIndex <= MaxAmino; AminoBlock += AminoBlockSize, AminoIndex++)
+    {
+        CellIndex = AminoBlock;
+        AA2 = 0;
+        AA3 = 0;
+        if (AminoIndex)
+        {
+            AA = Buffer[AminoIndex-1] - 'A';
+            AAMass = PeptideMass[Buffer[AminoIndex - 1]];
+        }
+        if (AminoIndex > 1)
+        {
+            AA2 = Buffer[AminoIndex-2] - 'A';
+            AA2Mass = PeptideMass[Buffer[AminoIndex - 2]];
+            BackEdgeDoubleIndex = AA*AMINO_ACIDS + AA2;
+        }
+        if (AminoIndex > 2)
+        {
+            AA3 = Buffer[AminoIndex - 3] - 'A';
+            BackEdgeTripleIndex = AA*676 + AA2*26 + AA3;
+        }
+        for (NodeIndex = 0, Node = Spectrum->Graph->FirstNode; Node; Node = Node->Next, NodeIndex++)
+        {
+            SliceIndex = CellIndex;
+            for (ModCountIndex = 0; ModCountIndex < ZSize; ModCountIndex++)
+            {
+                // Check our cell index:
+                if (CellIndex != AminoIndex * AminoBlockSize + NodeIndex * ZSize + ModCountIndex)
+                {
+                    printf("Bad cell index %d, %d, %d -> %d (%d)\n", NodeIndex, AminoIndex, ModCountIndex, CellIndex,
+                        AminoIndex * AminoBlockSize + NodeIndex * ZSize + ModCountIndex);
+                }
+                ScoreTable[CellIndex] = FORBIDDEN_PATH; // default
+                DeltaTable[CellIndex] = NULL;
+                PrevCellTable[CellIndex] = -1;
+                
+                ///////////////
+                // Free rides:
+                if (ModCountIndex == 0 && Node->Mass < GlobalOptions->ParentMassEpsilon)
+                {
+                    ScoreTable[CellIndex] = StartPointPenalty[AminoIndex];
+                    PrevCellTable[CellIndex] = -1;
+                    DeltaTable[CellIndex] = NULL;
+                    MassDeltaTable[CellIndex] = Node->Mass;
+                    CellIndex++;
+                    continue;
+                }
+                ///////////////
+                // Drop a token:
+                TokenDropped = 0;
+                if (ModCountIndex)
+                {
+                    PrevCellIndex = CellIndex - 1; 
+                    ScoreTable[CellIndex] = ScoreTable[PrevCellIndex];
+                    PrevCellTable[CellIndex] = PrevCellIndex;
+                    TokenDropped = 1;
+                }
+
+                // And that's all we do on the top row:
+                if (AminoIndex == 0)
+                {
+                    CellIndex++;
+                    continue; 
+                }
+               
+                ///////////////
+                // One unmodified amino acid:
+                Edge = Node->BackEdge[AA];
+                while (Edge)
+                {
+                    PrevCellIndex = AminoBlock - AminoBlockSize + (Edge->ToNode->Index * ZSize) + ModCountIndex;
+                    Score = ScoreTable[PrevCellIndex] + Edge->Score; 
+                    if (Score > ScoreTable[CellIndex])
+                    {
+                        ScoreTable[CellIndex] = Score;
+                        PrevCellTable[CellIndex] = PrevCellIndex;
+                        DeltaTable[CellIndex] = NULL;
+                        MassDeltaTable[CellIndex] = Edge->Skew + MassDeltaTable[PrevCellIndex];
+                        TokenDropped = 0;
+                    }
+                    Edge = Edge->Next;
+                }
+
+                ///////////////
+                // Two unmodified amino acids:
+                if (AminoIndex > 1)
+                {
+                    Edge = Node->BackEdgeDouble[BackEdgeDoubleIndex];
+                    while (Edge)
+                    {
+                        PrevCellIndex = AminoBlock - AminoBlockSize - AminoBlockSize + (Edge->ToNode->Index * ZSize) + ModCountIndex;
+                        // Accumulate points for the middle of the jump:
+                        PRM = MASS_TO_BIN(Edge->HalfMass);
+                        Score = Tweak->PRMScores[PRM] + ScoreTable[PrevCellIndex] + Edge->Score;
+                        if (Score > ScoreTable[CellIndex])
+                        {
+                            ScoreTable[CellIndex] = Score;
+                            PrevCellTable[CellIndex] = PrevCellIndex;
+                            DeltaTable[CellIndex] = NULL;
+                            MassDeltaTable[CellIndex] = Edge->Skew + MassDeltaTable[PrevCellIndex];
+                            TokenDropped = 0;
+                        }
+                        Edge = Edge->Next;
+                    }
+                }
+                ///////////////
+                // Three unmodified amino acids:
+                if (AminoIndex > 2)
+                {
+                    Edge = Node->BackEdgeTriple[BackEdgeTripleIndex];
+                    while (Edge)
+                    {
+                        PrevCellIndex = AminoBlock - AminoBlockSize - AminoBlockSize - AminoBlockSize + (Edge->ToNode->Index * ZSize) + ModCountIndex;
+                        // Accumulate points for the middle of the jump:
+                        PRM = MASS_TO_BIN(Edge->HalfMass);
+                        Score = Tweak->PRMScores[PRM] + ScoreTable[PrevCellIndex] + Edge->Score;
+                        PRM = MASS_TO_BIN(Edge->HalfMass2);
+                        Score += Tweak->PRMScores[PRM];
+                        if (Score > ScoreTable[CellIndex])
+                        {
+                            ScoreTable[CellIndex] = Score;
+                            PrevCellTable[CellIndex] = PrevCellIndex;
+                            DeltaTable[CellIndex] = NULL;
+                            MassDeltaTable[CellIndex] = Edge->Skew + MassDeltaTable[PrevCellIndex];
+                            TokenDropped = 0;
+                        }
+                        Edge = Edge->Next;
+                    }
+                }
+
+                if (ModCountIndex)
+                {
+                    ///////////////
+                    // Modification!  
+                    // Remember: there may be no node corresponding to
+                    // the peptide with a PTM removed.  Example: Assume the peptide is 
+                    // AFKDEDTQAM+16PFR and we're at the node at 1152 for AFKDEDTQAM+16.
+                    // We cannot place the M+16 PTM and jump to a node.  We must place
+                    // the M+16 PTM while placing the M amino acid in order to jump.  
+                    // If (due to poor fragmentation) there's no node available, then 
+                    // we cannot place the PTM at all, but hopefully we will place the correct
+                    // PTM mass at another node.
+                    Mass = Node->Mass - AAMass;
+                    BackNodeIndex = NodeIndex;
+                    BackNodeDirection = -1;
+                    BackNode = Node;
+                    while (1)
+                    {
+                        // Bouncing iteration: Iterate back along the node list until
+                        // you hit the start of the list (or mass becomes too small).
+                        // Then iterate forward along the list until you hit the end of the
+                        // list (or mass becomes too large).  We iterate over a "neighborhood"
+                        // to save time.
+                        if (BackNodeDirection < 0)
+                        {
+                            BackNode = BackNode->Prev;
+                            BackNodeIndex--;
+                            if (!BackNode)
+                            {
+                                BackNodeDirection = 1;
+                                BackNodeIndex = NodeIndex;
+                                BackNode = Node;
+                            }
+                            else
+                            {
+                                Delta = Mass - BackNode->Mass;
+                                if (Delta > MaxPossibleDelta)
+                                {
+                                    BackNodeDirection = 1;
+                                    BackNodeIndex = NodeIndex;
+                                    BackNode = Node;
+                                }
+                            }
+                        }
+                        if (BackNodeDirection > 0)
+                        {
+                            BackNode = BackNode->Next;
+                            BackNodeIndex++;
+                            if (!BackNode)
+                            {
+                                break;
+                            }
+                            else
+                            {
+                                Delta = Mass - BackNode->Mass;
+                                if (Delta < MinPossibleDelta)
+                                {
+                                    break;
+                                }
+                            }
+                        }
+                        
+                        ROUND_MASS_TO_DELTA_BIN(Delta, DeltaBin);
+                        DeltaNode = MassDeltaByMass[AA][DeltaBin];
+                        while (DeltaNode)
+                        {
+                            Skew = Delta - DeltaNode->Delta->RealDelta;
+                            AbsSkew = abs(Skew) / 10;
+                            if (abs(Skew) <= GlobalOptions->Epsilon)
+                            {
+                                PrevCellIndex = AminoBlock - AminoBlockSize + (BackNodeIndex * ZSize) + ModCountIndex - 1;
+                                Score = g_SkewPenalty[AbsSkew] + (int)(DeltaNode->Delta->Score * DELTA_SCORE_SCALER) + ScoreTable[PrevCellIndex];
+                                if (Score > ScoreTable[CellIndex])
+                                {
+                                    ScoreTable[CellIndex] = Score;
+                                    PrevCellTable[CellIndex] = PrevCellIndex;
+                                    DeltaTable[CellIndex] = DeltaNode->Delta;
+                                    MassDeltaTable[CellIndex] = MassDeltaTable[PrevCellIndex] + Skew;
+                                    TokenDropped = 0;
+                                }
+                            }
+                            DeltaNode = DeltaNode->Next;
+                        }
+                    } // loop over back-nodes
+                    
+                } // if ModCount
+                //////////////////////////////////////////////////////////////////////////////
+                // We now have our move backwards (or our FORBIDDEN_PATH).  Get points for this node's PRM:
+                if (!TokenDropped)
+                {
+                    Mass = MASS_TO_BIN(Node->Mass + MassDeltaTable[CellIndex]);
+                    Mass = min(MaxPRM, max(0, Mass));
+                    ScoreTable[CellIndex] += Tweak->PRMScores[Mass];
+                }
+                CellIndex++;
+            } // ModCountIndex loop
+        } // NodeIndex loop
+    } // AminoIndex loop
+
+    if (Info->VerboseFlag)
+    {
+        DebugPrintGeneralTable(Spectrum, Buffer, MaxAmino, MaxMods,
+            ScoreTable, PrevCellTable);
+    }
+
+    ////////////////////////////////////////////////////////////////////////////////////
+    // The d.p. table has been populated.  Now we must read off the candidate(s).
+    for (Node = Spectrum->Graph->LastNode; Node; Node = Node->Prev)
+    {
+        NodeIndex = Node->Index;
+        // We want Node->Mass to be about equal to Tweak->ParentMass - 1900
+        // If it's too small, STOP.  If it's too large, CONTINUE.
+        if (Node->Mass > Tweak->ParentMass - 1900 + GlobalOptions->ParentMassEpsilon)
+        {
+            continue;
+        }
+        if (Node->Mass < Tweak->ParentMass - 1900 - GlobalOptions->ParentMassEpsilon)
+        {
+            break;
+        }
+        for (AminoIndex = 0; AminoIndex < MaxAmino; AminoIndex++)
+        {
+            CellIndex = AminoIndex * AminoBlockSize + NodeIndex * ZSize + MaxMods;
+            Score = ScoreTable[CellIndex] + EndPointPenalty[AminoIndex];
+            if (Score > ScoreToBeat)
+            {
+                ///////////////////////////////////////
+                // We have a match.  Read off the sequence and the PTMs.
+                X = NodeIndex;
+                Y = AminoIndex;
+                Z = MaxMods;
+                if (Info->VerboseFlag)
+                {
+                    printf("\nMatch found at (%d, %d, %d)\n", X, Y, Z);
+                }
+                Match = NewPeptideNode();
+                Match->Tweak = Tweak;
+                MatchBufferPos = 0;
+                MatchPTMCount = 0;
+                while (1)
+                {
+                    if (Info->VerboseFlag)
+                    {
+                        printf("Move to (%d, %d, %d), match is '%s'\n", X, Y, Z, Match->Bases);
+                    }
+                    //CellIndex = Y * AminoBlock + Y * ZSize + Z;
+                    if (Y)
+                    {
+                        if (DeltaTable[CellIndex])
+                        {
+                            Match->AminoIndex[MatchPTMCount] = MatchBufferPos;
+                            Match->ModType[MatchPTMCount] = DeltaTable[CellIndex];
+                            MatchPTMCount++;
+                            if (Info->VerboseFlag)
+                            {
+                                printf("Apply PTM '%s' (%d)\n", DeltaTable[CellIndex]->Name, DeltaTable[CellIndex]->RealDelta);
+                            }
+                        }
+                    }
+                    CellIndex = PrevCellTable[CellIndex];
+                    if (CellIndex < 0)
+                    {
+                        break;
+                    }
+                    NextY = CellIndex / AminoBlockSize;
+                    X = (CellIndex - NextY * AminoBlockSize) / ZSize;
+                    Z = (CellIndex - NextY * AminoBlockSize) % ZSize;
+                    while (Y > NextY)
+                    {
+                        Match->Bases[MatchBufferPos] = Buffer[Y - 1];
+                        MatchBufferPos++;
+                        Y--;
+                    }
+                }
+                Match->FilePos = FilePos + Y;
+                Match->RecordNumber = Info->RecordNumber;
+                Match->Bases[MatchBufferPos] = '\0';
+                Match->SuffixAmino = Buffer[AminoIndex];
+                if (Y)
+                {
+                    Match->PrefixAmino = Buffer[Y - 1];
+                }
+                ReverseString(Match->Bases);
+                for (ModIndex = 0; ModIndex < MatchPTMCount; ModIndex++)
+                {
+                    Match->AminoIndex[ModIndex] = MatchBufferPos - 1 - Match->AminoIndex[ModIndex];
+                }
+                Match->DB = Info->DB;
+                //Match->Score = Score;
+                Match->InitialScore = Score;
+                Match = StoreSpectralMatch(Spectrum, Match, MatchBufferPos, 0);
+                if (Info->VerboseFlag && Match)
+                {
+                    printf("Store match '%c.%s.%c' score %d endpointpenalty %d\n", Match->PrefixAmino, Match->Bases, Match->SuffixAmino, Match->InitialScore, EndPointPenalty[AminoIndex]);
+                }
+            } // final AminoIndex loop
+        } // final node loop
+    }
+    return ReturnAmino;
+}
+
+// The MS-Alignment algorithm.
+// New and improved version of the d.p. algorithm for generating candidates with 0-2 PTMs
+int ExtendMatchRightwardDuo(SearchInfo* Info, char* Buffer, int BufferEnd, int MatchMass, 
+    int MaxMods, int ScoreToBeat, int FilePos, SpectrumTweak* Tweak)
+{
+    static int* PrevCellTable = NULL;
+    static int* DTable = NULL;
+    int MaxAmino;
+    int ReturnAmino = -1;
+    int Result;
+    int NodeIndex;
+    TagGraphNode* Node;
+    int AminoBlockSize;
+    int AminoBlock;
+    int CellIndex;
+    int PrevCellIndex = 0;
+    int Score;
+    int CellMass;
+    TagGraphBackEdge* Edge;
+    int StartAminoIndex;
+    int DeltaBin;
+    int Delta;
+    int MinPossibleDelta = -13000; // W->G mutation
+    int MaxPossibleDelta = GlobalOptions->MaxPTMDelta * 100;
+    MassDeltaNode* DeltaNode;
+    int AA;
+    int AAMass = 0;
+    int AA2;
+    int AA2Mass;
+    int AA3;
+    int EndAminoIndex;
+    int ComplementMass;
+    int CellScore;
+    int AminoIndex;
+    int Skew;
+    int AbsSkew;
+    int Mass;
+    int PRM;
+    int BackEdgeDoubleIndex = 0;
+    int BackEdgeTripleIndex = 0;
+    int MaxPRM = Tweak->PRMScoreMax - 1;
+    int StartPointPenalty[MAX_ROWS];
+    int EndPointPenalty[MAX_ROWS];
+    MSSpectrum* Spectrum = Info->Spectrum;
+    //
+    // Allocate tables, if necessary:
+    if (!PrefixTable)
+    {
+        PrefixTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_ROWS);
+        SuffixTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_ROWS);
+        PrefixMassTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_ROWS);
+        SuffixMassTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_ROWS);
+        DTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_NODES);
+        // MassDeltaTable stores mass delta used in reaching a cell of DTable:
+        MassDeltaTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_NODES);
+        PrevCellTable = (int*)malloc(sizeof(int) * MAX_ROWS * MAX_NODES);
+        DeltaTable = (MassDelta**)malloc(sizeof(MassDelta*) * MAX_ROWS * MAX_NODES);
+    }
+    /////////////////////////////////////////
+    // Find MaxAmino:
+    Result = FindPeptideBlockEnd(Spectrum, Buffer, BufferEnd, &MaxAmino, &ReturnAmino);
+    if (!Result)
+    {
+        // No extension necessary.  Advance the database pointer:
+        return ReturnAmino;
+    }
+
+    for (AminoIndex = 0; AminoIndex < MaxAmino; AminoIndex++)
+    {
+        StartPointPenalty[AminoIndex] = 0;
+        if (AminoIndex)
+        {
+            AA = Buffer[AminoIndex - 1];
+            if (AA != 'R' && AA != 'K' && AA != '*')
+            {
+                StartPointPenalty[AminoIndex] = -500;
+            }
+        }
+        EndPointPenalty[AminoIndex] = 0;
+        AA = Buffer[AminoIndex];
+        if ((AA != 'R' && AA != 'K') && (AminoIndex <= MaxAmino-1 && Buffer[AminoIndex + 1]!='*'))
+        {
+            EndPointPenalty[AminoIndex] = -500;
+        }
+    }
+    /////////////////////////////////////////
+    // Fill the Forward and Suffix tables:
+
+    FillPrefixSuffixTables(Spectrum, Tweak, MatchMass, Buffer, MaxAmino, PrefixTable, SuffixTable,
+        PrefixMassTable, SuffixMassTable);
+#ifdef VERBOSE_DEBUGGING
+    DebugPrintPrefixSuffixTables(MaxAmino, Buffer, PrefixTable, SuffixTable, PrefixMassTable, SuffixMassTable); 
+#endif
+    /////////////////////////////////////////
+    // Fill table D[]
+    AminoBlockSize = Spectrum->Graph->NodeCount;
+    AminoBlock = 0;
+    for (AminoIndex = 0; AminoIndex <= MaxAmino; AminoBlock += AminoBlockSize, AminoIndex++)
+    {
+
+        CellIndex = AminoBlock;
+        AA2 = 0;
+        AA3 = 0;
+        if (AminoIndex)
+        {
+            AA = Buffer[AminoIndex-1] - 'A';
+            AAMass = PeptideMass[Buffer[AminoIndex-1]];
+        }
+        if (AminoIndex>1)
+        {
+            AA2 = Buffer[AminoIndex-2] - 'A';
+            AA2Mass = PeptideMass[Buffer[AminoIndex-2]];
+            BackEdgeDoubleIndex = AA*AMINO_ACIDS + AA2;
+        }
+        if (AminoIndex>2)
+        {
+            AA3 = Buffer[AminoIndex-3] - 'A';
+            BackEdgeTripleIndex = AA*676 + AA2*26 + AA3;
+        }
+        for (NodeIndex = 0, Node = Spectrum->Graph->FirstNode; Node; Node = Node->Next, NodeIndex++, CellIndex++)
+        {
+            DTable[CellIndex] = FORBIDDEN_PATH; // default
+            DeltaTable[CellIndex] = NULL;
+            PrevCellTable[CellIndex] = -1;
+            MassDeltaTable[CellIndex] = 0;
+
+            ///////////////
+            // Free rides:
+            if (Node->Mass < GlobalOptions->ParentMassEpsilon)
+            {
+                DTable[CellIndex] = StartPointPenalty[AminoIndex];
+                PrevCellTable[CellIndex] = -1;
+                DeltaTable[CellIndex] = NULL;
+                MassDeltaTable[CellIndex] = Node->Mass;
+                continue;
+            }
+            if (AminoIndex == 0)
+            {
+                continue; // And that's all we do on the top row.
+            }
+           
+            ///////////////
+            // One unmodified amino acid:
+            Edge = Node->BackEdge[AA];
+            while (Edge)
+            {
+                PrevCellIndex = AminoBlock - AminoBlockSize + Edge->ToNode->Index;
+                Score = DTable[PrevCellIndex] + Edge->Score; 
+                if (Score > DTable[CellIndex])
+                {
+                    DTable[CellIndex] = Score;
+                    PrevCellTable[CellIndex] = PrevCellIndex;
+                    DeltaTable[CellIndex] = NULL;
+                    MassDeltaTable[CellIndex] = Edge->Skew + MassDeltaTable[PrevCellIndex];
+                }
+                Edge = Edge->Next;
+            }
+
+            ///////////////
+            // Two unmodified amino acids:
+            if (AminoIndex > 1)
+            {
+                Edge = Node->BackEdgeDouble[BackEdgeDoubleIndex];
+                while (Edge)
+                {
+                    PrevCellIndex = AminoBlock - AminoBlockSize - AminoBlockSize + Edge->ToNode->Index;
+                    // Accumulate points for the middle of the jump:
+                    PRM = MASS_TO_BIN(Edge->HalfMass);
+                    Score = Tweak->PRMScores[PRM] + DTable[PrevCellIndex] + Edge->Score;
+                    if (Score > DTable[CellIndex])
+                    {
+                        DTable[CellIndex] = Score;
+                        PrevCellTable[CellIndex] = PrevCellIndex;
+                        DeltaTable[CellIndex] = NULL;
+                        MassDeltaTable[CellIndex] = Edge->Skew + MassDeltaTable[PrevCellIndex];
+                    }
+                    Edge = Edge->Next;
+                }
+            }
+            ///////////////
+            // Three unmodified amino acids:
+            if (AminoIndex > 2)
+            {
+                Edge = Node->BackEdgeTriple[BackEdgeTripleIndex];
+                while (Edge)
+                {
+                    PrevCellIndex = AminoBlock - AminoBlockSize - AminoBlockSize - AminoBlockSize + Edge->ToNode->Index;
+                    // Accumulate points for the middle of the jump:
+                    PRM = MASS_TO_BIN(Edge->HalfMass);
+                    Score = Tweak->PRMScores[PRM] + DTable[PrevCellIndex] + Edge->Score;
+                    PRM = MASS_TO_BIN(Edge->HalfMass2);
+                    Score += Tweak->PRMScores[PRM];
+                    if (Score > DTable[CellIndex])
+                    {
+                        DTable[CellIndex] = Score;
+                        PrevCellTable[CellIndex] = PrevCellIndex;
+                        DeltaTable[CellIndex] = NULL;
+                        MassDeltaTable[CellIndex] = Edge->Skew + MassDeltaTable[PrevCellIndex];
+                    }
+                    Edge = Edge->Next;
+                }
+            }
+
+            ///////////////
+            // No amino acid at all, or modified amino acid.  Try using the prefix StartAminoIndex...EndAminoIndex.
+            // Also, try using an EMPTY prefix (the case where StartAminoIndex == AminoIndex > EndAminoIndex)
+            EndAminoIndex = AminoIndex - 1;
+            Mass = Node->Mass - AAMass;
+            for (StartAminoIndex = AminoIndex; StartAminoIndex>0; StartAminoIndex--)
+            {
+                if (StartAminoIndex == AminoIndex)
+                {
+                    Delta = Mass; // Modification on the first amino acid of the peptide
+                }
+                else
+                {
+                    Delta = Mass - PrefixMassTable[StartAminoIndex*MAX_ROWS + EndAminoIndex];
+                }
+                if (Delta > MaxPossibleDelta)
+                {
+                    continue;
+                }
+                if (Delta < MinPossibleDelta)
+                {
+                    break;
+                }
+                ROUND_MASS_TO_DELTA_BIN(Delta, DeltaBin);
+                DeltaNode = MassDeltaByMass[AA][DeltaBin];
+                while (DeltaNode)
+                {
+                    Skew = Delta - DeltaNode->Delta->RealDelta;
+                    //Skew = Delta - DeltaNode->RealDelta;
+                    AbsSkew = abs(Skew) / 10;
+                    if (AbsSkew <= GlobalOptions->Epsilon)
+                    {
+                        if (StartAminoIndex == AminoIndex)
+                        {
+                            Score = g_SkewPenalty[AbsSkew] + (int)(DeltaNode->Delta->Score * DELTA_SCORE_SCALER);
+                        }
+                        else
+                        {
+                            Score = g_SkewPenalty[AbsSkew] + (int)(DeltaNode->Delta->Score * DELTA_SCORE_SCALER + PrefixTable[StartAminoIndex*MAX_ROWS + EndAminoIndex]);
+                        }
+                        Score += StartPointPenalty[StartAminoIndex - 1];
+                        //Score += Spectrum->PRMScores[PRM];
+                        if (Score > DTable[CellIndex])
+                        {
+                            DTable[CellIndex] = Score;
+                            PrevCellTable[CellIndex] = (StartAminoIndex-1) * AminoBlockSize;
+                            DeltaTable[CellIndex] = DeltaNode->Delta;
+                            MassDeltaTable[CellIndex] = Skew;
+                        }
+                    }
+                    DeltaNode = DeltaNode->Next;
+                }
+                Skew = abs(Delta) / 10;
+                if (Skew < GlobalOptions->Epsilon)
+                {
+                    if (StartAminoIndex > EndAminoIndex)
+                    {
+                        Score = g_SkewPenalty[Skew];
+                    }
+                    else
+                    {
+                        Score = g_SkewPenalty[Skew] + PrefixTable[StartAminoIndex*MAX_ROWS + EndAminoIndex];
+                    }
+                    Score += StartPointPenalty[StartAminoIndex - 1];
+                    if (Score > DTable[CellIndex])
+                    {
+                        DTable[CellIndex] = Score;
+                        PrevCellTable[CellIndex] = (StartAminoIndex-1) * AminoBlockSize;
+                        DeltaTable[CellIndex] = NULL;
+                    }
+                }
+            }
+            //////////////////////////////////////////////////////////////////////////////
+            // We now have our move backwards (or our FORBIDDEN_PATH).  Get points for this node's PRM:
+            Mass = MASS_TO_BIN(Node->Mass + MassDeltaTable[CellIndex]);
+            Mass = min(MaxPRM, max(0, Mass));
+            DTable[CellIndex] += Tweak->PRMScores[Mass];
+        }
+    }
+#ifdef VERBOSE_DEBUGGING
+    DebugPrintDTable(Spectrum, Buffer, DTable, DeltaTable, PrevCellTable, MaxAmino); 
+#endif
+
+    /////////////////////////////////////////
+    // Find candidate peptides, using tables PrefixTable, SuffixTable, and D:
+    AminoBlock = 0;
+    for (AminoIndex = 0; AminoIndex <= MaxAmino; AminoIndex++, AminoBlock += AminoBlockSize)
+    {
+        AA = Buffer[AminoIndex] - 'A'; // amimoindex + 1 - 1
+        CellIndex = AminoBlock;
+        for (NodeIndex = 0, Node = Spectrum->Graph->FirstNode; Node; Node = Node->Next, NodeIndex++, CellIndex++)
+        {
+            CellMass = Node->Mass + MassDeltaTable[CellIndex];
+            CellScore = DTable[CellIndex];
+            ComplementMass = MatchMass - CellMass;
+            // We can end right here:
+            if (abs(ComplementMass) < GlobalOptions->FlankingMassEpsilon)
+            {
+                Spectrum->CandidatesScored++;
+                GlobalStats->CandidatesScored++;
+                Score = CellScore + EndPointPenalty[AminoIndex-1];
+                if (CellScore > ScoreToBeat)
+                {
+                    AddNewMatchDuo(Info, Tweak, Buffer, CellScore, PrevCellTable, DeltaTable, CellIndex, NULL,
+                        AminoBlockSize, AminoIndex, AminoIndex, FilePos);
+                    if (Spectrum->Node->MatchCount == GlobalOptions->StoreMatchCount)
+                    {
+                        ScoreToBeat = Spectrum->Node->LastMatch->InitialScore;
+                    }
+                }
+            }
+            for (EndAminoIndex = AminoIndex + 1; EndAminoIndex <= MaxAmino; EndAminoIndex++)
+            {
+                Delta = SuffixMassTable[(AminoIndex + 1)*MAX_ROWS + EndAminoIndex] - CellMass;
+                
+                if (Delta > MaxPossibleDelta)
+                {
+                    continue;
+                }
+                if (Delta < MinPossibleDelta)
+                {
+                    break;
+                }
+                //EndAA = Buffer[EndAminoIndex] - 'A'; // amimoindex+1-1
+                // Maybe we match a suffix mass:
+                if (abs(Delta) < GlobalOptions->Epsilon)
+                {
+                    Skew = abs(Delta) / 10;
+                    Score = CellScore + SuffixTable[(AminoIndex + 1)*MAX_ROWS + EndAminoIndex] + g_SkewPenalty[Skew];
+                    Score += EndPointPenalty[EndAminoIndex - 1];
+                    Spectrum->CandidatesScored++;
+                    GlobalStats->CandidatesScored++;
+                    if (Score > ScoreToBeat)
+                    {
+                        AddNewMatchDuo(Info, Tweak, Buffer, Score, PrevCellTable, DeltaTable, CellIndex, NULL,
+                            AminoBlockSize, AminoIndex, EndAminoIndex, FilePos);
+                        if (Spectrum->Node->MatchCount == GlobalOptions->StoreMatchCount)
+                        {
+                            ScoreToBeat = Spectrum->Node->LastMatch->InitialScore;
+                        }
+
+                    }
+                }
+                ROUND_MASS_TO_DELTA_BIN(Delta, DeltaBin);
+                DeltaNode = MassDeltaByMass[AA][DeltaBin];
+                while (DeltaNode)
+                {
+                    Skew = abs(DeltaNode->Delta->RealDelta - Delta);
+                    //Skew = abs(DeltaNode->RealDelta - Delta);
+                    if (Skew < GlobalOptions->Epsilon)
+                    {
+                        Score = CellScore + (int)(DeltaNode->Delta->Score * DELTA_SCORE_SCALER) + SuffixTable[(AminoIndex + 1)*MAX_ROWS + EndAminoIndex] + g_SkewPenalty[Skew / 10];
+                        Score += EndPointPenalty[EndAminoIndex - 1];
+                        Spectrum->CandidatesScored++;
+                        GlobalStats->CandidatesScored++;
+                        if (Score > ScoreToBeat)
+                        {
+                            AddNewMatchDuo(Info, Tweak, Buffer, Score, PrevCellTable, DeltaTable, CellIndex, DeltaNode->Delta,
+                                AminoBlockSize, AminoIndex, EndAminoIndex, FilePos);
+                            if (Spectrum->Node->MatchCount == GlobalOptions->StoreMatchCount)
+                            {
+                                ScoreToBeat = Spectrum->Node->LastMatch->InitialScore;
+                            }
+
+                        }
+                    }
+                    DeltaNode = DeltaNode->Next;
+                }
+            }
+        }
+    }
+    return ReturnAmino;
+}
+Peptide* ClonePeptide(Peptide* Match)
+{
+    Peptide* NewMatch = NewPeptideNode();
+    memcpy(NewMatch, Match, sizeof(Peptide));
+    return NewMatch;
+}
+
+// AddNewMatchDuo considers an unmodified peptide whenever it considers a modification of size +1..+n or -1..-n.
+// We only want to add one "undecorated" peptide, not re-add the same thing multiple times,
+// Store the FilePos, StartAminoIndex, EndAminoIndex of the last match 
+// We've got a new match!  It ends at EndAminoIndex, and its D-path ends at the cell CellIndex.  
+void AddNewMatchDuo(SearchInfo* Info, SpectrumTweak* Tweak, char* Buffer, int Score, int* PrevCellTable, MassDelta** DeltaTable, 
+    int CellIndex, MassDelta* FinalDelta, int AminoBlockSize, int AminoIndex, int EndAminoIndex,
+    int FilePos)
+{
+    int StartAminoIndex;
+    int PeptideLength;
+    int OldCellIndex;
+    Peptide* Match;
+    Peptide* VariantMatch;
+    int SlideLeftIndex = -1;
+    int SlideRightIndex = -1;
+    int ModCount = 0;
+    int PlainMass;
+    int ModdedMass;
+    float RunningScore;
+    float PlainScore;
+    float ModdedScore;
+    int Diff;
+    MassDeltaNode* Node;
+    int PRM;
+    int BestDiff;
+    MSSpectrum* Spectrum = Info->Spectrum;
+    //
+    Match = NewPeptideNode();
+    Match->Tweak = Tweak;
+    // Trace back through the d.p. table to find our starting amino index:
+
+    StartAminoIndex = AminoIndex;
+    OldCellIndex = CellIndex;
+    while (OldCellIndex >= 0)
+    {
+        StartAminoIndex = OldCellIndex / AminoBlockSize;
+        if (DeltaTable[OldCellIndex])
+        {
+            Match->ModType[0] = DeltaTable[OldCellIndex];
+            Match->AminoIndex[0] = StartAminoIndex;
+            ModCount++;
+        }
+        OldCellIndex = PrevCellTable[OldCellIndex];
+    }
+    if (ModCount)
+    {
+        Match->AminoIndex[0] -= (StartAminoIndex + 1);
+        SlideLeftIndex = Match->AminoIndex[0];
+    }
+    PeptideLength = EndAminoIndex - StartAminoIndex;
+    strncpy(Match->Bases, Buffer + StartAminoIndex, PeptideLength);
+    Match->Bases[PeptideLength] = '\0';
+    Match->InitialScore = Score;
+    Match->FilePos = FilePos + StartAminoIndex;
+    Match->RecordNumber = Info->RecordNumber;
+    if (StartAminoIndex)
+    {
+        Match->PrefixAmino = Buffer[StartAminoIndex - 1];
+    }
+    if (FinalDelta)
+    {
+        Match->AminoIndex[ModCount] = AminoIndex - StartAminoIndex;
+        SlideRightIndex = Match->AminoIndex[ModCount];
+        Match->ModType[ModCount] = FinalDelta;
+        ModCount++;
+    }
+    GetPeptideParentMass(Match);
+
+    Match->SuffixAmino = Buffer[EndAminoIndex];
+#ifdef VERBOSE_DEBUGGING
+    DebugPrintMatch(Match);
+#endif
+    // STRIP DECORATION:
+    // If we placed a small PTM (mass -3...4), then be sure to consider a match with no modification.
+    // If we placed the PTM only to make the parent mass match up, then the modless peptide will get a 
+    // better score, and we'll filter the spurious +1 modification.  (There are a few real +1 modifications,
+    // such as deamidation of N, but spurious +1 modifications are much more common.
+    if (FinalDelta && FinalDelta->RealDelta >= -300 && FinalDelta->RealDelta < 500)
+    {
+        VariantMatch = ClonePeptide(Match);
+        VariantMatch->InitialScore = Score;
+        VariantMatch->AminoIndex[0] = -1;
+        VariantMatch->ModType[0] = NULL;
+        VariantMatch->DB = Info->DB;
+        StoreSpectralMatch(Spectrum, VariantMatch, PeptideLength, 0);
+    }
+    // SLIDE LEFT:
+    // If we placed a PTM at the edge of our prefix, but the PTM could just as easily have been placed earlier,
+    // then do so:
+    if (SlideLeftIndex > 0)
+    {
+        PlainMass = 0;
+        //ModdedMass = Match->ModType[0]->RealDelta;
+        for (AminoIndex = 0; AminoIndex <= Match->AminoIndex[0]; AminoIndex++)
+        {
+            PlainMass += PeptideMass[Match->Bases[AminoIndex]];
+        }
+        ModdedMass = PlainMass + Match->ModType[0]->RealDelta;
+        RunningScore = (float)Match->InitialScore;
+        for (AminoIndex = Match->AminoIndex[0]; AminoIndex > 0; AminoIndex--)
+        {
+            PlainMass -= PeptideMass[Match->Bases[AminoIndex]];
+            ModdedMass -= PeptideMass[Match->Bases[AminoIndex]];
+            PRM = MASS_TO_BIN(PlainMass);
+            if (PRM < -PRM_ARRAY_SLACK || PRM >= Tweak->PRMScoreMax)
+            {
+                break;
+            }
+            PRM = max(0, PRM);
+            PlainScore = (float)Tweak->PRMScores[PRM];
+            PRM = MASS_TO_BIN(ModdedMass);
+            if (PRM < -PRM_ARRAY_SLACK || PRM >= Tweak->PRMScoreMax)
+            {
+                break;
+            }
+            PRM = max(0, PRM);
+            ModdedScore = (float)Tweak->PRMScores[PRM];
+            if (ModdedScore > 0)
+            {
+                // We've already had the chance to attach this ptm here.
+                break;
+            }
+            RunningScore += (ModdedScore - PlainScore);
+            if (RunningScore < Match->InitialScore - 100)
+            {
+                // We've hurt our score quite a bit; let's stop.
+                break;
+            }
+            // Make a variant-match:
+            VariantMatch = ClonePeptide(Match);
+            VariantMatch->InitialScore = (int)RunningScore;
+            BestDiff = -1;
+            VariantMatch->ModType[0] = NULL;
+            for (Node = MassDeltaByMass[Match->Bases[AminoIndex-1]-'A'][Match->ModType[0]->Delta]; Node; Node = Node->Next)
+            {
+                Diff = abs(Node->Delta->RealDelta - Match->ModType[0]->RealDelta);
+                if (BestDiff < 0 || Diff < BestDiff)
+                {
+                    BestDiff = Diff;
+                    VariantMatch->ModType[0] = Node->Delta;
+                }
+            }
+            VariantMatch->AminoIndex[0] = AminoIndex - 1;
+            if (VariantMatch->ModType[0])
+            {
+#ifdef VERBOSE_DEBUGGING
+                printf("Variant:\n");
+                DebugPrintMatch(VariantMatch);
+#endif
+                VariantMatch->DB = Info->DB;
+                StoreSpectralMatch(Spectrum, VariantMatch, PeptideLength, 0);
+            }
+            else
+            {
+                FreePeptideNode(VariantMatch);
+            }
+        }
+    }
+    // SLIDE RIGHT:
+    // If we placed a PTM at the edge of our prefix, but the PTM could just as easily have been placed earlier,
+    // then do so:
+    if (SlideRightIndex > 0)
+    {
+        PlainMass = 0;
+        if (ModCount>1)
+        {
+            PlainMass += Match->ModType[0]->RealDelta;
+        }
+        //ModdedMass = Match->ModType[0]->RealDelta;
+        for (AminoIndex = 0; AminoIndex < Match->AminoIndex[ModCount-1]; AminoIndex++)
+        {
+            PlainMass += PeptideMass[Match->Bases[AminoIndex]];
+        }
+        ModdedMass = PlainMass + Match->ModType[ModCount-1]->RealDelta;
+        RunningScore = (float)Match->InitialScore;
+
+        for (AminoIndex = Match->AminoIndex[ModCount-1]; Match->Bases[AminoIndex]; AminoIndex++)
+        {
+            PlainMass += PeptideMass[Match->Bases[AminoIndex]];
+            ModdedMass += PeptideMass[Match->Bases[AminoIndex]];
+            PRM = MASS_TO_BIN(PlainMass);
+            if (PRM < -PRM_ARRAY_SLACK || PRM >= Tweak->PRMScoreMax)
+            {
+                break;
+            }
+            PlainScore = (float)Tweak->PRMScores[PRM];
+            PRM = MASS_TO_BIN(ModdedMass);
+            if (PRM < -PRM_ARRAY_SLACK || PRM >= Tweak->PRMScoreMax)
+            {
+                break;
+            }
+            ModdedScore = (float)Tweak->PRMScores[PRM];
+            RunningScore += (PlainScore - ModdedScore);
+            if (RunningScore < Match->InitialScore - 100)
+            {
+                // We've hurt our score quite a bit; let's stop.
+                break;
+            }
+            if (AminoIndex > Match->AminoIndex[ModCount-1])
+            {
+                if (ModdedScore > 0)
+                {
+                    // We've already had the chance to attach this ptm here.
+                    break;
+                }
+
+                // Make a variant-match:
+                VariantMatch = ClonePeptide(Match);
+                VariantMatch->InitialScore = (int)RunningScore;
+                BestDiff = -1;
+                VariantMatch->ModType[ModCount-1] = NULL;
+                for (Node = MassDeltaByMass[Match->Bases[AminoIndex]-'A'][Match->ModType[ModCount-1]->Delta]; Node; Node = Node->Next)
+                {
+                    Diff = abs(Node->Delta->RealDelta - Match->ModType[ModCount-1]->RealDelta);
+                    if (BestDiff < 0 || Diff < BestDiff)
+                    {
+                        BestDiff = Diff;
+                        VariantMatch->ModType[ModCount-1] = Node->Delta;
+                    }
+                }
+                if (VariantMatch->ModType[ModCount-1])
+                {
+                    VariantMatch->AminoIndex[ModCount-1] = AminoIndex;
+#ifdef VERBOSE_DEBUGGING
+                    printf("Variant:\n");
+                    DebugPrintMatch(VariantMatch);
+#endif
+                    VariantMatch->DB = Info->DB;
+                    StoreSpectralMatch(Spectrum, VariantMatch, PeptideLength, 0);
+                }
+                else
+                {
+                    FreePeptideNode(VariantMatch);
+                }
+            }
+        }
+
+    }
+    Match->DB = Info->DB;
+    StoreSpectralMatch(Spectrum, Match, PeptideLength, 0);
+}
diff --git a/FreeMod.h b/FreeMod.h
new file mode 100644
index 0000000..593628f
--- /dev/null
+++ b/FreeMod.h
@@ -0,0 +1,91 @@
+//Title:          FreeMod.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef FREE_MOD_H
+#define FREE_MOD_H
+#include "Tagger.h"
+
+// FreeMod.h includes code and classes for handling mutations and large sets
+// of modifications.  (Most references to "mods" can be taken to mean "mutations and 
+// post-translational modifications")  This is a more powerful (but much slower) way to search 
+// spectra, and is most appropriate for second-pass searching.  (In multipass
+// searching, the database contains only the proteins identified with high
+// confidence during a restrictive first-pass search of a large database, like an IPI
+// species database or swiss-prot)  
+
+// DELTA_BIN_COUNT is the number of mass bins in the range [MIN_DELTA_AMU, MAX_DELTA_AMU], 400*10 = 4000
+// This bin count is the size of each MassDeltaByMass[AA] array.
+//#define DELTA_BIN_COUNT 4000
+
+// MASS_TO_BIN and BIN_TO_MASS convert between masses and mass-bins
+#define MASS_TO_BIN(mass) (int)((mass + 50) / 100)
+#define BIN_TO_MASS(bin) (int)((bin) * 100)
+
+#define MDBI_ALL_MODS 26
+
+// Scaling factor, compensating for the different score ranges of 
+// quick PRM-based scoring and final match-scoring.
+#define DELTA_SCORE_SCALER 200
+#define DELTA_SCORE_SCALER_FINAL 0.5
+
+// Search a database, using *no* tag-based filtering at all.  This is much slower than searching
+// with tag-based filters, but also more sensitive, particularly since tagging is harder in the presence of mods.  
+void SearchDatabaseTagless(SearchInfo* Info, int MaxMods, int VerboseFlag, SpectrumTweak* Tweak);
+
+// Set Spectrum->PRMScores, using the PRM scoring model.  When extending in blind mode,
+// we use the scores of these PRMs as an initial score for our peptides.
+void SetPRMScores(MSSpectrum* Spectrum);
+
+// Read, from the binary file Mutations.dat, the definitions of all mass modifications we will consider.
+void LoadMassDeltas(char* FileName, int ReadFlag);
+
+// Initialize the hash MassDeltaByMass.  The table entry MassDeltaByMass[AA][Delta] points to a linked list
+// of mass deltas for amino acid AA matching Delta.  
+void InitMassDeltaByMass();
+
+// Re-score spectral matches.  The matches in the list Spectrum->FirstMatch have been 
+// quick-scored, but we can sort them better if we score them more meticulously.  
+// Let's do so, and re-sort the list based on the new scores.
+void MQScoreSpectralMatches(SpectrumNode* Spectrum);
+
+// Print out a list of matches for the spectrum (Spectrum->FirstMatch through Spectrum->LastMatch).
+void DebugPrintMatchList(SpectrumNode* Spectrum);
+
+// Attach edges moving back by one, two, or three amino acid masses to nodes in the TagGraph 
+void TagGraphPopulateBackEdges(TagGraph* Graph);
+
+void FreeMassDeltaByMass();
+void FreeMassDeltas();
+void AddBlindMods();
+void AllocMassDeltaByIndex();
+
+#endif // FREE_MOD_H
diff --git a/GetByteOffset.py b/GetByteOffset.py
new file mode 100644
index 0000000..20268f4
--- /dev/null
+++ b/GetByteOffset.py
@@ -0,0 +1,169 @@
+#Title:          GetByteOffset.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+GetByteOffset.py
+Utility to find the byte offset of scans in a spectrum file
+Has no main
+"""
+
+import os
+import sys
+import xml.sax.handler
+import xml.sax
+
+## auxiliary for the mzxml files
+class XMLHandler(xml.sax.handler.ContentHandler):
+    def __init__(self):
+        self.inOffset = 0
+        self.mapping = {}
+    def startElement(self, name, attributes):
+        if name == "offset":
+            self.buffer = ""
+            self.scan = attributes["id"]
+            self.inOffset = 1
+    def characters(self, data):
+        if self.inOffset:
+            self.buffer += data
+    def endElement(self, name):
+        if name == "offset":
+            self.inOffset = 0
+            self.mapping[self.scan] = self.buffer
+
+class Abacus:
+    def __init__(self):
+        self.ScanOffset = {} #Scan = Offset
+
+    def GetByteOffset(self, FileName):
+        self.ScanOffset = {} #reset every time
+        (Stub, Ext) = os.path.splitext(FileName)
+        if Ext.lower() == ".mzxml":
+            return self.GetOffsetsMZXML(FileName)
+        elif Ext.lower() == ".mgf":
+            return self.GetOffsetsMGF(FileName)
+
+    def GetOffsetsMZXML(self, FilePath):
+        """Parses an individual mzXML file and saves the scan num and byte offset
+        into an dictionary called self.ScanOffset
+        Now uses real XML parsing looking for <offset id="SCAN">OFFSET</offset>
+        DOM is slow, so I'll use sax
+        """
+        print "Opening mzXML file %s"%FilePath
+        FileName = os.path.split(FilePath)[1]
+        Parser = xml.sax.make_parser()
+        Handler = XMLHandler()
+        Parser.setContentHandler(Handler)
+        Parser.parse(FilePath)
+        for (Scan, Offset) in Handler.mapping.items():
+            ScanNumber = int(Scan)
+            Offset = int(Offset)
+            #print (Scan, Offset)
+            self.ScanOffset[ScanNumber] = Offset
+        return self.ScanOffset
+
+    def GetOffsetsMGF(self, FileName):
+        """There is no pleasant way of doing this.  I suppose
+        that I can just read in line after line looking for BEGIN
+        """
+        File = open(FileName, "rb")
+        #read in a MEG of the file at a time, and search for beginning <scan tags
+        #Text holds the data, SeamText willhold the last few bytes of a block
+        #and get appended to the first few (to check for a tag that spans the block
+        MEG = 1024*1024
+        Text = ""
+        SeamText = ""
+        FileOffset = 0
+        
+        Counter = 0
+        while 1: # read in blocks loop
+            Block = File.read(MEG)
+            if not Block: #EOF
+                break
+            Text += Block
+            Pos = -1 #set up as dummy before the loop
+            while 1: #look for scans and offsets loop
+                ScanPos = Text.find("SCAN", Pos + 1)
+                if not ScanPos == -1:
+                    ## 1. Get the scan number
+                    ActualNumberPos = Text.find("=", ScanPos)
+                    EndNumberPos = Text.find("\n", ScanPos)
+                    ScanNumber = int (Text[ActualNumberPos + 1:EndNumberPos])
+                    #print ScanNumber
+                    ## 2. Get the BEGIN tag
+                    BeginPos = Text.rfind("BEGIN", 0, ScanPos)
+                    ScanOffset = FileOffset + BeginPos
+                    if not self.ScanOffset.has_key(ScanNumber):
+                        self.ScanOffset[ScanNumber] = ScanOffset
+                        #yes I got the above error for who knows why
+                else:
+                    ##did not find a scan number. Two possibilities
+                    ## Can or Cannot find a BEGIN
+                    BeginPos = Text.find("BEGIN", Pos + 1)
+                    if not BeginPos == -1:
+                        #Begin was found, seam text to begin here
+                        print "Most recent Scan was %s"%ScanNumber
+                        SeamText = Text[BeginPos:]
+                        break
+                    else:
+                        #here it is possible that the word begin spans the break
+                        #to prevent that case, we simply make some seam text
+                        SeamText = Text[-20:]
+                        break
+                Pos = EndNumberPos
+
+            #now we've broken out of the finding loop.  Need to reset some vars
+            LenBlock = len(Text)                
+            Text = SeamText
+            FileOffset += LenBlock # can't use MEG here, because Text included some seam text
+            FileOffset -= len(SeamText)
+        File.close()
+        self.Validate(FileName)
+        return self.ScanOffset
+
+    def Validate(self, FileName):
+        "simple check of scanoffset"
+        File = open(FileName, "rb")
+        ErrorFound =0
+        for (ScanNumber, ScanOffset) in self.ScanOffset.items():
+            File.seek(ScanOffset)
+            Text = File.read(300)
+            Place = Text.find("BEGIN")
+            #print "Found begin at place %d"%Place
+            if not Text.find("BEGIN") == 0:
+                print "Error with scan %d"%ScanNumber
+                ErrorFound = 1
+                print Text
+        if not ErrorFound:
+            print "Validation Successful"
+        
+            
diff --git a/Global.py b/Global.py
new file mode 100644
index 0000000..a1448a1
--- /dev/null
+++ b/Global.py
@@ -0,0 +1,64 @@
+#Title:          Global.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Global variables for mass-spec analysis
+"""
+
+IsotopeWeights = {}
+
+# Keys are ion type names, values are the corresponding ion instances
+AllIonDict = {}
+
+# Masses for amino acids (keys: 1-letter peptide abbreviations, like "G" or "D")
+AminoMass = {}
+AminoMassRight = {}
+
+# List of all amino acid (left) masses:
+AminoMasses = []
+
+# Dictionary of post-translational modifications.  Keys are modification
+# names (in lower-case).
+PTMods = {}
+
+# Truncated, 3- or 4-character keys:
+PTModByShortName = {}
+PTModList = []
+
+AminoAcids = {} # key: single-letter abbreviation ("A" -> Alanine)
+FixedMods = {"C":57.0518} # The protecting group on C is enabled, by default!
+
+#List of  ModificationTypeObject (see Utils.py) the user defines as invivo or invitro
+InVivoMods = []
+InVitroMods = []
+
diff --git a/InVitroModifications.txt b/InVitroModifications.txt
new file mode 100644
index 0000000..c4fcfad
--- /dev/null
+++ b/InVitroModifications.txt
@@ -0,0 +1,7 @@
+mod,57,C,fixed	#CAM
+mod,-17,CQ,nterminal	#pyroglutamate, Pyro-cmC
+mod,57,*,nterminal	#CAM
+mod,12,*,nterminal	#chemical adduct
+mod,43,*,nterminal	#carbamylation
+mod,16,MW	#oxidation
+mod,-48,M	#neutral loss on Met or oxM
\ No newline at end of file
diff --git a/InVivoModifications.txt b/InVivoModifications.txt
new file mode 100644
index 0000000..88194cf
--- /dev/null
+++ b/InVivoModifications.txt
@@ -0,0 +1,9 @@
+mod,80,STY,opt,phosphorylation	#phosphorylation
+mod,42,*,nterminal	#nterminal acetylation
+mod,14,KR	#methylation
+mod,42,K	#acetylation
+mod,203,ST	#GlcNAc
+mod,146,ST	#fucosylation
+mod,210,*,nterminal	#myristolation
+mod,16,P	#hydroxyproline
+mod,28,KR	#dimethylation
\ No newline at end of file
diff --git a/Inspect.exe b/Inspect.exe
new file mode 100644
index 0000000..a5fdc19
Binary files /dev/null and b/Inspect.exe differ
diff --git a/Inspect.h b/Inspect.h
new file mode 100644
index 0000000..18391b3
--- /dev/null
+++ b/Inspect.h
@@ -0,0 +1,190 @@
+//Title:          Inspect.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+#ifndef INSPECT_H
+#define INSPECT_H
+
+#define INSPECT_VERSION_NUMBER "20110313"
+
+#include <stdio.h>
+#include "Utils.h"
+
+//////////////////////////////////////////////////////////////////////////////
+// General-purpose #definitions.
+
+// Multiplier for scaling floating-point masses up to integers.
+// Represent 123.456Da as the integer 123456:
+#define MASS_SCALE 1000
+#define DALTON 1000
+#define HALF_DALTON 500
+#define DECI_DALTON 100
+
+// Mass (in amu) of hydrogen.  (Used for, e.g., finding the PRM of a b peak)
+//#define HYDROGEN_MASS (float)1.0078
+#define HYDROGEN_MASS (int)1008
+#define TWO_HYDROGEN_MASS (int)2016
+#define GLYCINE_MASS 57000
+#define WATER_MASS 18000
+#define CAM_MASS 57000
+#define PHOSPHATE_MASS 79966
+#define PHOSPHATE_WATER_MASS 97966
+
+// The parent mass boost is equal to the difference in mass between a precursor ion and
+// the parent *residue* mass (sum of amino acid masses).
+#define PARENT_MASS_BOOST (int)19000
+
+// Maximum length of a peptide tag that can be indexed.  (Char-arrays are limited to this size)
+// (Somewhat absurdly large, because we may want to use the trie to search for peptides 
+// outside the MS/MS context)
+#define MAX_TAG_LENGTH 50
+
+// How far can peaks be from theoretical prediction?
+#define DEFAULT_EPSILON 500
+
+#define DEFAULT_PARENT_MASS_EPSILON 2500
+#define DEFAULT_PARENT_MASS_PPM 2000
+#define DEFAULT_FLANKING_MASS_EPSILON 3000
+
+// How large are the tags we generate, by default?  (Overridable by -x, -y)
+#define DEFAULT_TAG_LENGTH 3
+
+// Maximum number of post-translational modification types:
+#define MAX_PT_MODTYPE 42
+
+// Maximum number of post-tranlational mods that can EVER be allowed:
+#define MAX_PT_MODS 8
+
+// How many entries in the match hash-table. (If we have many more matches than this,
+// performance will be slowed a bit)
+#define MATCH_HASH_SIZE 1000
+
+// Trie node's child array has one entry for each letter (some slots are wasted, 
+// since there are only 20 peptides, but it makes for fast searching)
+// The index into the array is the peptide char minus 'A'.  (alanine 0, cysteine 2, etc)
+#define TRIE_CHILD_COUNT 26 
+
+#define FILENAME_AMINO_ACID_MASSES "AminoAcidMasses.txt"
+#define FILENAME_PTMS "PTMods.txt"
+#define FILENAME_MASTER_TAGGING_MODEL "PRM.dat"
+#define FILENAME_MASS_DELTAS "Mutations.dat"
+#define FILENAME_PVALUE "PValue.dat"
+#define FILENAME_PVALUE_TRYPTIC "PValueTryptic.dat"
+#define FILENAME_SCORING_MODEL "ScoringModel.dat"
+#define FILENAME_ISOTOPE_PATTERNS "IsotopePatterns.txt"
+#define FILENAME_INTENSITY_RANK_ODDS "IntensityRankIonOdds.txt"
+#define FILENAME_WITNESS_SCORES "IonWitnessScores.dat"
+#define FILENAME_PRM_MODEL "PRMModel.dat"
+
+#define TWEAK_COUNT 6
+
+//used as switches for fragmentation models
+#define FRAGMENTATION_NORMAL 0
+#define FRAGMENTATION_PHOSPHO 1
+
+// We may try two or three different charge/parent-mass combinations for one 
+// spectrum.  We use SVMs to determine parent mass and charge state, but in
+// borderline cases, we try both.  
+typedef struct SpectrumTweak
+{
+    int ParentMass;
+    int Charge;
+    // Intensities(S, L) is the frequency of intensity level L in sector S
+    float Intensities[12]; // SECTOR_COUNT
+    int* PRMScores;
+    int PRMScoreMax;
+} SpectrumTweak;
+
+#define SPECTRUM_FORMAT_INVALID -1
+#define SPECTRUM_FORMAT_DTA 0
+#define SPECTRUM_FORMAT_PKL 1
+#define SPECTRUM_FORMAT_MS2 2
+#define SPECTRUM_FORMAT_MGF 3
+#define SPECTRUM_FORMAT_MS2_COLONS 4
+#define SPECTRUM_FORMAT_MZXML 5
+#define SPECTRUM_FORMAT_MZDATA 6
+#define SPECTRUM_FORMAT_CDTA 7
+
+// Create one InputFileNode for each file being searched.  
+// If the input file is a standard .dta file, then we create one child SpectrumNode.
+// If the input file is a .ms2 file, then we create many child SpectrumNodes.
+typedef struct InputFileNode
+{
+    char FileName[MAX_FILENAME_LEN];
+    int SpectrumCount;
+    int Format; // 0 dta, 1 pkl, 2 ms2, 3 mgf
+    struct InputFileNode* Prev;
+    struct InputFileNode* Next;
+} InputFileNode;
+
+typedef struct SpectrumNode
+{
+    struct MSSpectrum* Spectrum;
+    struct SpectrumNode* Next;
+    SpectrumTweak Tweaks[TWEAK_COUNT];
+    int PMCFlag; // Set to 1 after PMC is done and our tweak-array is populated.
+    int FilePosition; // seek to here before parsing 
+    
+  //The scan number is a user defined notion for each spectrum.
+  //In MGF files the Scan number is a 0-based indexing of the spectra
+  //In MZXML files the scan number is read from the field 'scanNum'
+  int ScanNumber;
+  
+  //The spectrum index is a 1-based indexing of MS2+ spectra in a file
+  int SpecIndex;
+    int MatchCount;
+    struct Peptide* FirstMatch;
+    struct Peptide* LastMatch;
+    InputFileNode* InputFile; // the file name (and file type)
+} SpectrumNode;
+
+// The Stats object is for keeping track of cumulative info (tags generated,
+// bytes read, spectra scored, that sort of thing)
+typedef struct InspectStats
+{
+    // Tags generated - raw count of all tripeptide paths through the PRM graph
+    long long TagsGenerated;
+    // Tag hits in the database (How many tripeptide tag matches were extended?)
+    long long TagMatches;
+    // Number of candidate peptides that were scored against the source spectrum
+    long long CandidatesScored;
+    long long TagGraphNodes;
+    long long TagGraphEdges;
+} InspectStats;
+
+extern InspectStats* GlobalStats;
+
+typedef void (*TrainingCallback)(SpectrumNode*, int, int, struct Peptide*);
+void TrainOnOracleFile(char* OracleFileName, char* SpectrumDir, TrainingCallback Callback);
+void AddSpectrumToList(InputFileNode* InputFile, int FilePos, int ScanNumber, int SpecIndex);
+
+#endif // INSPECT_H
diff --git a/Inspect.sln b/Inspect.sln
new file mode 100644
index 0000000..98c0b19
--- /dev/null
+++ b/Inspect.sln
@@ -0,0 +1,19 @@
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual Studio 2008
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Inspect", "Inspect.vcproj", "{5C4CDF65-87D6-4FE9-B269-4695FD7EC35B}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{5C4CDF65-87D6-4FE9-B269-4695FD7EC35B}.Debug|Win32.ActiveCfg = Debug|Win32
+		{5C4CDF65-87D6-4FE9-B269-4695FD7EC35B}.Debug|Win32.Build.0 = Debug|Win32
+		{5C4CDF65-87D6-4FE9-B269-4695FD7EC35B}.Release|Win32.ActiveCfg = Release|Win32
+		{5C4CDF65-87D6-4FE9-B269-4695FD7EC35B}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/Inspect.vcproj b/Inspect.vcproj
new file mode 100644
index 0000000..0c5e0d7
--- /dev/null
+++ b/Inspect.vcproj
@@ -0,0 +1,566 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="Inspect"
+	ProjectGUID="{5C4CDF65-87D6-4FE9-B269-4695FD7EC35B}"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory=".\Debug"
+			IntermediateDirectory=".\Debug"
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TypeLibraryName=".\Debug/Protri.tlb"
+				HeaderFileName=""
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="expat\lib"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="1"
+				UsePrecompiledHeader="0"
+				PrecompiledHeaderFile=".\Debug/Protri.pch"
+				AssemblerListingLocation=".\Debug/"
+				ObjectFile=".\Debug/"
+				ProgramDataBaseFileName=".\Debug/"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalOptions="/FIXED:NO"
+				AdditionalDependencies="libexpat.lib"
+				OutputFile=".\Inspect.exe"
+				LinkIncremental="1"
+				SuppressStartupBanner="true"
+				AdditionalLibraryDirectories="expat\lib\debug"
+				IgnoreDefaultLibraryNames=""
+				GenerateDebugInformation="true"
+				ProgramDatabaseFile=".\Debug/Protri.pdb"
+				SubSystem="1"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="0"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="."
+			IntermediateDirectory="."
+			ConfigurationType="1"
+			InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC71.vsprops"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="false"
+			CharacterSet="2"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TypeLibraryName=".\Release/Protri.tlb"
+				HeaderFileName=""
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				AdditionalIncludeDirectories="expat\lib"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
+				StringPooling="true"
+				RuntimeLibrary="0"
+				EnableFunctionLevelLinking="true"
+				UsePrecompiledHeader="0"
+				PrecompiledHeaderFile=".\Release/Protri.pch"
+				AssemblerListingLocation=".\Release/"
+				ObjectFile=".\Release/"
+				ProgramDataBaseFileName=".\Release/"
+				WarningLevel="3"
+				SuppressStartupBanner="true"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="libexpat.lib"
+				OutputFile=".\Inspect.exe"
+				LinkIncremental="1"
+				SuppressStartupBanner="true"
+				AdditionalLibraryDirectories="expat\lib\debug"
+				IgnoreDefaultLibraryNames=""
+				ProgramDatabaseFile=".\Release/Protri.pdb"
+				SubSystem="1"
+				RandomizedBaseAddress="1"
+				DataExecutionPrevention="0"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+			>
+			<File
+				RelativePath=".\base64.c"
+				>
+			</File>
+			<File
+				RelativePath=".\base64.h"
+				>
+			</File>
+			<File
+				RelativePath=".\BN.c"
+				>
+			</File>
+			<File
+				RelativePath=".\BN.h"
+				>
+			</File>
+			<File
+				RelativePath=".\BuildMS2DB.c"
+				>
+			</File>
+			<File
+				RelativePath=".\BuildMS2DB.h"
+				>
+			</File>
+			<File
+				RelativePath=".\ChargeState.c"
+				>
+			</File>
+			<File
+				RelativePath=".\ChargeState.h"
+				>
+			</File>
+			<File
+				RelativePath=".\CMemLeak.c"
+				>
+			</File>
+			<File
+				RelativePath=".\CMemLeak.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Errors.c"
+				>
+			</File>
+			<File
+				RelativePath=".\Errors.h"
+				>
+			</File>
+			<File
+				RelativePath=".\ExonGraphAlign.c"
+				>
+			</File>
+			<File
+				RelativePath=".\ExonGraphAlign.h"
+				>
+			</File>
+			<File
+				RelativePath=".\FreeMod.c"
+				>
+			</File>
+			<File
+				RelativePath=".\FreeMod.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Inspect.h"
+				>
+			</File>
+			<File
+				RelativePath=".\IonScoring.c"
+				>
+			</File>
+			<File
+				RelativePath=".\IonScoring.h"
+				>
+			</File>
+			<File
+				RelativePath=".\LDA.c"
+				>
+			</File>
+			<File
+				RelativePath=".\LDA.h"
+				>
+			</File>
+			<File
+				RelativePath="main.c"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath="Mods.c"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\Mods.h"
+				>
+			</File>
+			<File
+				RelativePath=".\MS2DB.c"
+				>
+			</File>
+			<File
+				RelativePath=".\MS2DB.h"
+				>
+			</File>
+			<File
+				RelativePath=".\ParentMass.c"
+				>
+			</File>
+			<File
+				RelativePath=".\ParentMass.h"
+				>
+			</File>
+			<File
+				RelativePath=".\ParseInput.c"
+				>
+			</File>
+			<File
+				RelativePath=".\ParseInput.h"
+				>
+			</File>
+			<File
+				RelativePath=".\ParseXML.c"
+				>
+			</File>
+			<File
+				RelativePath=".\ParseXML.h"
+				>
+			</File>
+			<File
+				RelativePath=".\PValue.c"
+				>
+			</File>
+			<File
+				RelativePath=".\PValue.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Run.c"
+				>
+			</File>
+			<File
+				RelativePath=".\Run.h"
+				>
+			</File>
+			<File
+				RelativePath="Score.c"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\Score.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Scorpion.c"
+				>
+			</File>
+			<File
+				RelativePath=".\Scorpion.h"
+				>
+			</File>
+			<File
+				RelativePath=".\SNP.c"
+				>
+			</File>
+			<File
+				RelativePath=".\SNP.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Spectrum.c"
+				>
+			</File>
+			<File
+				RelativePath=".\Spectrum.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Spliced.c"
+				>
+			</File>
+			<File
+				RelativePath=".\Spliced.h"
+				>
+			</File>
+			<File
+				RelativePath=".\SpliceDB.c"
+				>
+			</File>
+			<File
+				RelativePath=".\SpliceDB.h"
+				>
+			</File>
+			<File
+				RelativePath=".\SpliceScan.c"
+				>
+			</File>
+			<File
+				RelativePath=".\SpliceScan.h"
+				>
+			</File>
+			<File
+				RelativePath=".\SVM.c"
+				>
+			</File>
+			<File
+				RelativePath=".\SVM.h"
+				>
+			</File>
+			<File
+				RelativePath=".\TagFile.c"
+				>
+			</File>
+			<File
+				RelativePath=".\TagFile.h"
+				>
+			</File>
+			<File
+				RelativePath="Tagger.c"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\Tagger.h"
+				>
+			</File>
+			<File
+				RelativePath="Trie.c"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\Trie.h"
+				>
+			</File>
+			<File
+				RelativePath="Utils.c"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						PreprocessorDefinitions=""
+						BasicRuntimeChecks="3"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						PreprocessorDefinitions=""
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\Utils.h"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/InspectToPepXML.py b/InspectToPepXML.py
new file mode 100644
index 0000000..15163a2
--- /dev/null
+++ b/InspectToPepXML.py
@@ -0,0 +1,859 @@
+#!/usr/bin/python
+
+# Update Jan 3, 2012 by Natalie to remove dependence on Column Order
+
+UsageInfo = \
+"""InspectToPepXML.py: Converts output of InsPecT search engine
+to PepXML format.  Written by Samuel Payne, Venter Institute,
+and Terry Farrah, Institute for Systems Biology  October 2008
+
+Required Parameters
+-i [Filename] - InsPecT results file from search (input)
+-o [Filename] - Converted file in PepXML (output)
+
+Optional Parameters
+-p [Filename] - InsPecT input (parameter) file
+                  default: inspect.params
+-m [Dirname] - Dir containing .mzXML or .mgf spectrum file
+                  default: current working directory
+-d N   - write at most N hits per assumed charge
+
+Assumes InsPecT results file is TSV containing header line and
+ one record per peptide prediction sorted by scan #, then by rank.
+User must manually edit PepXML file and insert correct information
+  near top of file for precursor and fragment mass types --
+  either average or monoisotopic.
+If database file mentioned in parameter file is not in fasta
+  format (.fasta or .fa), you must create a fasta format file of
+  the same base name in the same dir. Use TrieToFASTA.py.
+This script, InspectToPepXML.py, must reside in the same directory
+  as the rest of the InsPecT code.
+"""
+
+import sys
+import os
+import glob
+import getopt
+import re
+import time
+import GetByteOffset
+import ResultsParser
+import Utils
+import Global
+from xml.sax import saxutils          #xml.sax is for reading mzXML
+from xml.sax import ContentHandler
+from xml.sax import make_parser
+from xml.sax.handler import feature_namespaces
+
+global initial_dir
+global spectrum_query_count
+
+# ========================================================
+# Read tables with standard data such as amino acid masses
+# ========================================================
+
+# chdir() is Hack to make pgm invokable from any dir
+#  (Utils makes use of auxiliary files in same dir as code)
+initial_dir = os.getcwd()
+os.chdir(sys.path[0])
+Utils.Initialize()  
+os.chdir(initial_dir)
+
+# ===========================================================
+# Define classes to hold spectra (scans) and peptides (hits)
+# ===========================================================
+
+class InspectSpectrumClass:
+    """Stores the relevant InsPecT output file data for a spectrum"""
+    def __init__(self):
+        self.ScanNumber = -1
+        self.PrecursorMz = -1.0
+        self.RetentionTime = -1.0
+        self.HitList = []   # store a hit list for each charge state
+        for i in range (1,6): self.HitList = self.HitList + [[]]
+
+    def WriteSpectrumQueries(self, PepXMLHandle, SpectrumFileName, enzyme,
+            MaxHitsPerCharge):
+        """ Write <spectrum_query> tags for this spectrum.
+
+            There is one tag for each assumed charge that has any hits.
+        """
+        global spectrum_query_count
+        SpectrumFileType = os.path.splitext(SpectrumFileName)[1].lower()
+
+        for charge in range(1,5):     # for each charge state
+            if len(self.HitList[charge]) > 0: # if any hits
+                spectrum_query_count = spectrum_query_count + 1
+                SpectrumTitle="%s.%05d.%05d.%s" % \
+                   (os.path.splitext(SpectrumFileName)[0],
+                     self.ScanNumber,self.ScanNumber,
+                     charge)
+                if SpectrumFileType == ".mgf":
+                  PrecursorNeutralMass = self.PrecursorNeutralMass
+                else:
+                  _proton_mass = 1.007276
+                  PrecursorNeutralMass =  \
+                      (self.PrecursorMz * charge) - \
+                      (charge * _proton_mass)
+                if PrecursorNeutralMass < 0:
+                  PrecursorNeutralMassString = ''
+                else:
+                  PrecursorNeutralMassString = \
+                    '    precursor_neutral_mass="%.6f"\n' % \
+                                  PrecursorNeutralMass
+                if self.RetentionTime < 0:
+                  RetentionTimeString = ''
+                else:
+                  RetentionTimeString = \
+                    '    retention_time_sec="%.2f"\n' % \
+                                  self.RetentionTime
+                Query = '<spectrum_query\n' + \
+                    '    spectrum="%s"\n' % SpectrumTitle + \
+                    '    start_scan="%s"\n' % self.ScanNumber + \
+                    '    end_scan="%s"\n' % self.ScanNumber + \
+                    PrecursorNeutralMassString + \
+                    '    assumed_charge="%s"\n' % charge + \
+                    '    index="%s"\n' % spectrum_query_count + \
+                    RetentionTimeString + \
+                    '>\n'
+                PepXMLHandle.write(Query)
+                PepXMLHandle.write('<search_result search_id="1">\n')
+                for i in range(min(MaxHitsPerCharge,
+                         len(self.HitList[charge]))):
+                    self.HitList[charge][i].PrecursorNeutralMass = \
+                          PrecursorNeutralMass
+                    self.HitList[charge][i].WriteSearchHit(PepXMLHandle,
+                          i+1, enzyme)
+                PepXMLHandle.write('</search_result>\n')
+                PepXMLHandle.write('</spectrum_query>\n')
+
+class InspectOutputRecordClass:
+    """Stores the relevant data from a single line of InsPecT output.
+
+    Each line represents a search hit--a predicted peptide for a spectrum.
+    """
+    def __init__(self):
+        self.Spectrum = None
+        self.FileOffset = -1
+        self.Protein = ""
+        self.Charge = -1
+        self.MQScore = ""
+        self.FScore = ""
+        self.DeltaScore = ""
+        self.PValue = ""
+        self.ProteinID = ""
+        self.Prefix = ""
+        self.Peptide = ""
+        self.Suffix = ""
+        self.OptModList = []
+        self.PrecursorNeutralMass = -1.0
+
+    def WriteSearchHit(self, PepXMLHandle, rank, enzyme):
+        """ Write <search_hit> tag for this this line of InsPecT output
+        """           
+        global initial_dir
+        os.chdir(sys.path[0]) # hack to make pgm invokable from any dir
+        # GetMass adds on fixed modifications, but not optional ones
+        CalcMass = Utils.GetMass(self.Peptide) + 18.01528 #add h2o mass
+        for mod in self.OptModList:
+            CalcMass = CalcMass + float(mod[2])
+        os.chdir(initial_dir)
+        MassDiff = self.PrecursorNeutralMass - CalcMass
+        # If the enzyme is trypsin, count all KR except
+        # final one, and except when followed by P (proline).
+        if enzyme.lower() == "trypsin":
+            MissedCleavages = self.Peptide[:-1].count("K") + \
+               self.Peptide[:-1].count("R") - \
+               (self.Peptide[:-1].count("KP") + self.Peptide[:-1].count("RP"))
+        elif enzyme.lower() == "none":
+            MissedCleavages = 0
+        else: MissedCleavages = -1
+        # Break up Protein into accession # and description
+        first_space = self.Protein.find(' ')
+        if first_space >= 0:
+          Protein = self.Protein[:first_space]
+          ProteinDescr = self.Protein[first_space+1:]
+          ProteinDescr = ProteinDescr.replace(">",">")
+          ProteinDescr = ProteinDescr.replace("<","<")
+          ProteinDescr = ProteinDescr.replace("&","&")
+          ProteinDescr = ProteinDescr.replace("\"",""")
+          ProteinDescr = ProteinDescr.replace("\'","‘")
+        else:
+          Protein = self.Protein
+          ProteinDescr = Protein
+        Hit = '<search_hit\n' + \
+              '    hit_rank="%s"\n' % (rank) + \
+              '    peptide="%s"\n' % (self.Peptide) + \
+              '    peptide_prev_aa="%s"\n' % (self.Prefix) + \
+              '    peptide_next_aa="%s"\n' % (self.Suffix) + \
+              '    protein="%s"\n' % (Protein) + \
+              '    protein_descr="%s"\n' % (ProteinDescr) + \
+              '    num_tot_proteins="0"\n' + \
+              '    num_matched_ions="0"\n'  + \
+              '    tot_num_ions="0"\n' + \
+              '    calc_neutral_pep_mass="%s"\n' % (CalcMass) + \
+              '    massdiff="%s"\n' % (MassDiff) + \
+              '    num_tol_term="%s"\n' % "2" + \
+              '    num_missed_cleavages="%d"\n'%(MissedCleavages) + \
+              '    is_rejected="0"\n' + \
+              '>\n'
+        PepXMLHandle.write(Hit)
+        # Create a dictionary of masses of all amino acids that
+        # are modified, indexed by peptide position.
+        # First, add to the dictionary all aa's that have optional mods.
+        # Then, add fixed mods. Use monoisotopic mass for basic AA.
+        ModMassDict = {}
+        for mod in self.OptModList:
+           aa = mod[0]
+           pos = mod[1]
+           mod_mass = mod[2]
+           if pos in ModMassDict:
+             ModMassDict[pos] += float(mod_mass)
+           else:
+             ModMassDict[pos] = float(mod_mass) + Global.AminoMass[aa]
+        for i in range(len(self.Peptide)):
+           aa = self.Peptide[i]
+           pos = i + 1
+           if aa in Global.FixedMods:
+               mod_mass = Global.FixedMods[aa]
+               if pos in ModMassDict:
+                   ModMassDict[pos] += float(mod_mass)
+               else:
+                   ModMassDict[pos] = float(mod_mass) + \
+                                            Global.AminoMass[aa]
+        # Now, create a pepXML string with an element for each modified AA.
+        ModString = ''
+        for i in range(len(self.Peptide)):
+            pos = i + 1
+            if pos in ModMassDict:
+                ModString = ModString + '<mod_aminoacid_mass ' + \
+                  'position="%d" ' % pos + \
+                  'mass="%.4f" />' % ModMassDict[pos]
+        if len(ModString) > 0:
+            ModInfo = '<modification_info>%s</modification_info>\n' % \
+                   ModString
+            PepXMLHandle.write(ModInfo)
+        PepXMLHandle.write(
+          '<search_score name="mqscore" value="%s"/>\n'%self.MQScore)
+        PepXMLHandle.write(
+          '<search_score name="expect" value="%s"/>\n'%self.PValue)
+        PepXMLHandle.write(
+          '<search_score name="fscore" value="%s"/>\n'%self.FScore)
+        PepXMLHandle.write(
+          '<search_score name="deltascore" value="%s"/>\n'%self.DeltaScore)
+        PepXMLHandle.write('</search_hit>\n')
+
+
+# ======================================================================
+# Virtually all the code is contained within class InspectToPepXMLClass
+# ======================================================================
+
+class InspectToPepXMLClass(ResultsParser.ResultsParser):
+    def __init__(self):
+        """Initialize fields of InspectToPepXMLClass instance to null values
+        """
+        self.InputFilePath = None
+        self.OutputFilePath = None
+        self.SpectraDir = os.getcwd()
+        self.MaxHitsPerCharge = 10000   #effectively maxint
+        self.ParamFilePath = os.path.join(os.getcwd(), "inspect.params")
+        self.ScanOffset = {}
+        self.ScanDict= {}
+        self.SpectrumFileType = ""
+        self.SpectrumFileBase = ""
+        self.Columns = ResultsParser.Columns()
+        ResultsParser.ResultsParser.__init__(self)
+        
+    #---------------------------------------------------------------------
+
+    def Main(self):
+        """Convert raw InsPecT output file to PepXML
+
+        Initially designed to handle entire directories of files.
+        """
+        try:
+            import psyco
+            psyco.full()
+        except:
+            print "(psyco not found - running in non-optimized mode)"
+        # Line directly below needed only if we want to handle directories
+        #self.self.ProcessResultsFiles(self.InputFilePath,
+        #    self.ConvertInspectToPepXML)
+        self.ConvertInspectToPepXML(self.InputFilePath)
+
+    #---------------------------------------------------------------------
+
+    def ConvertInspectToPepXML(self, FilePath):
+        """ Convert a single raw InsPecT output file to PepXML
+        """
+
+        global spectrum_query_count
+
+        # ------------------------------------------------------------
+        # Open input/output files and gather info from auxiliary files
+        # ------------------------------------------------------------
+
+        # Get input filename; open output file handle
+        #FileName = os.path.split(FilePath)[1]
+        #FileName = FileName.replace(".txt", ".xml")
+        #NewPath = os.path.join(self.OutputFilePath, FileName)
+        #PepXMLHandle = open(NewPath, "wb")
+        PepXMLHandle = open(self.OutputFilePath, "wb")
+
+        # Glean info from inspect params file
+        if not os.path.exists(self.ParamFilePath):
+            print >> sys.stderr, "Inspect params file %s does not exist" % \
+                  self.ParamFilePath
+            sys.exit()
+        ParamFile = open(self.ParamFilePath, "r")
+        nmods_allowed_per_spectrum = 0
+        nmods_in_params = 0
+        self.mod_weight = []
+        self.mod_aa = []
+        self.mod_type = []
+        self.mod_name = []
+        self.spec_file = []
+        # reset Global.FixedMods to empty; Global.py initializes it to
+        # {"C":57.0518}, but this is a hack we don't want
+        Global.FixedMods = {}
+        self.instrument = "UNKNOWN"
+        self.protease = "trypsin"
+        self.search_db = "UNKNOWN"
+        for Line in ParamFile.readlines():
+           Line = Line.strip()  #remove leading and trailing whitespace
+           if Line.lower().startswith("mods,"):
+               nmods_allowed_per_spectrum = int(Line[len("mods,"):])
+           elif Line.lower().startswith("spectra,"):
+               this_spec_file =  Line[len("spectra,"):].strip()
+               self.spec_file = self.spec_file + [this_spec_file]
+           elif Line.lower().startswith("mod,"):
+               tokens = Line.split(",")
+               this_mod_weight =  float(tokens[1])
+               this_mod_aa_string = tokens[2].strip()
+               if this_mod_aa_string == "*":
+                 this_mod_aa_string = "ACDEFGHIKLMNPQRSTVWY"
+               this_mod_type = "opt"
+               if len(tokens) > 3:
+                   this_mod_type = tokens[3].strip()
+               if len(tokens) > 4:
+                   this_mod_name = tokens[4].strip()
+               else: this_mod_name = None
+               for this_mod_aa in this_mod_aa_string:
+                   self.mod_weight = self.mod_weight + [this_mod_weight]
+                   self.mod_aa = self.mod_aa + [this_mod_aa]
+                   self.mod_type = self.mod_type + [this_mod_type]
+                   if this_mod_type == "fix":
+                       Global.FixedMods[this_mod_aa] = this_mod_weight
+                   self.mod_name = self.mod_name + [this_mod_name]
+                   nmods_in_params = nmods_in_params + 1
+           elif Line.lower().startswith("instrument,"):
+               self.instrument = Line[len("instrument,"):].strip()
+           elif Line.lower().startswith("protease,"):
+               self.protease = Line[len("protease,"):].strip()
+           elif Line.lower().startswith("db,"):
+               search_db = Line[len("db,"):].strip()
+               search_db_ext = os.path.splitext(search_db)[1]
+               # Find the Fasta format of the .trie file used
+               if search_db_ext not in [".fa", ".fasta"]:
+                   search_db_root = os.path.splitext(search_db)[0]
+                   search_db_file_list = set(
+                           glob.glob("%s.*" % search_db_root))
+                   #print search_db_file_list
+                   ext_list = [os.path.splitext(f)[1]
+                           for f in search_db_file_list]
+                   #print ext_list
+                   try: ext_list.remove(".index")
+                   except: pass
+                   try: ext_list.remove(".trie")
+                   except: pass
+                   #print ext_list
+                   if len(ext_list) == 1:
+                       search_db = search_db_root + ext_list[0]
+                   elif ".fasta" in ext_list:
+                       search_db = search_db_root + ".fasta"
+                   elif ".fa" in ext_list:
+                       search_db = search_db_root + ".fa"
+                   else:
+                       print >> sys.stderr, \
+           "WARNING: Can't find a RefreshParser compatible database " + \
+           "file corresponding to %s " % search_db + \
+           "(such as a .fasta or .fa file with same root); using UNKNOWN.\n" + \
+           "(%s is the database file listed in your params file.)\n" % search_db
+               self.search_db = search_db
+        self.nmods = nmods_in_params
+
+        # Read just first line of inspect output to get spectrum filename
+#        InspectHandle = open(FilePath, "r")
+#        for Line in InspectHandle.xreadlines():
+#            if Line[0] == "#":  # comments
+#                continue
+#            Bits = list(Line.split("\t"))
+#            break
+#        InspectHandle.close()
+        InspectHandle = open(FilePath, "rb")
+
+        # Glean RTs & precursor M/z's for each scan from each spectrum file
+        # Also, store the full path for each file in a dictionary
+        # keyed to the filename.
+        retentionTimeDict = dict()
+        precursorMzDict = dict()
+        spectrumPathDict = dict()      ###TMF_new
+        for SpectrumFilePath in self.spec_file:
+#	  SpectrumFilePath = Bits[self.Columns.SpectrumFile]
+	  SpectrumFileName = os.path.split(SpectrumFilePath)[1]
+# We used to force the path to be the cwd, but now we're leaving it alone. Dec-11
+#	  SpectrumFilePath = os.path.join(self.SpectraDir, SpectrumFileName)
+	  self.SpectrumFileBase = \
+	    SpectrumFilePath.replace(os.path.splitext(SpectrumFilePath)[1], "")
+	  #self.SpectrumFileType = os.path.splitext(SpectrumFilePath)[1]
+          spectrumPathDict[SpectrumFileName] = SpectrumFilePath
+
+	  if not os.path.exists(SpectrumFilePath):
+	      print >> sys.stderr, "Spectrum file %s does not exist" % \
+		    SpectrumFilePath
+	      sys.exit()
+	  (Stub, Ext) = os.path.splitext(SpectrumFilePath)
+	  if Ext.lower() == ".mzxml":
+	      self.SpectrumFileType = ".mzXML"
+	      (this_retentionTimeDict, this_precursorMzDict) =  \
+		  self.GetSpectrumInfoFromMzXML(SpectrumFilePath)
+              retentionTimeDict.update(this_retentionTimeDict)
+              precursorMzDict.update(this_precursorMzDict)
+	  elif Ext.lower() == ".mgf":
+	      self.SpectrumFileType = ".mgf"
+              break
+	  else:
+	      print >> sys.stderr, \
+		 "Spectrum file %s lacks .mzXML or .mgf extension" % \
+		    SpectrumFilePath
+	      sys.exit()
+
+        # ------------------------------------------------------------
+        # - Write opening info to PepXML
+        # - Process InsPecT output file line by line and write to PepXML
+        # - Write closing info to PepXML
+        # ------------------------------------------------------------
+
+        self.WritePepXMLOpening(PepXMLHandle, self.OutputFilePath)
+
+        LastScanNumber = -1
+        spectrum_query_count = 0
+
+        # Each line represents a predicted peptide for a spectrum (scan).
+        # A scan can have multiple predicted peptides (hits).
+        #  All hits for a scan are grouped together in the file.
+        #  Further, all scans for each spectrum file are grouped
+        #  together.
+        for Line in InspectHandle.xreadlines():
+            if Line[0] == "#": 
+                self.Columns.initializeHeaders(Line) #This is the header, so save it
+                continue  # skip comments
+            # create a record for this line and read the fields into Bits
+            this_rec = InspectOutputRecordClass()
+            Bits = list(Line.split("\t"))
+
+            try:
+              this_rec.FileOffset = int(Bits[self.Columns.getIndex("SpecFilePos")])
+            except:
+              print "WARNING: malformed FileOffset %s in/after scan %d" % (Bits[self.Columns.getIndex("SpecFilePos")], LastScanNumber)
+              continue
+
+            try:
+              ScanNumber = int(Bits[self.Columns.getIndex("Scan#")])
+            except:
+              print "WARNING: malformed ScanNumber %s in/after scan %d" % (Bits[self.Columns.getIndex("Scan#")], LastScanNumber)
+              continue
+
+            ### TMF_new
+            try:
+              SpectrumFilePath = Bits[self.Columns.getIndex("SpectrumFile")]
+              SpectrumFile = os.path.split(SpectrumFilePath)[1]
+            except:
+              print "WARNING: malformed SpectrumFile field in/after scan %d" % (LastScanNumber)
+              continue
+
+            ScanName = SpectrumFile + "." + str(ScanNumber)
+
+            if (LastScanNumber != ScanNumber): 
+                if (LastScanNumber != -1):
+                    # write results for last spectrum
+                    this_scan.WriteSpectrumQueries(PepXMLHandle,
+#                        SpectrumFileName, self.protease,
+                        SpectrumFile, self.protease,
+                        self.MaxHitsPerCharge)
+                # initialize new spectrum
+                this_scan = InspectSpectrumClass()
+                this_scan.ScanNumber = ScanNumber
+                this_scan.ScanName = ScanName
+                # get info about spectrum from spectrum file
+                if self.SpectrumFileType == ".mgf":
+                    SpectrumFilePath = spectrumPathDict[SpectrumFile] ### TMF_new
+                    (MgfPepMass, MgfRT) = \
+                       self.GetSpectrumInfoFromMGF(SpectrumFilePath,
+                       this_rec.FileOffset)
+                    this_scan.PrecursorNeutralMass = float(MgfPepMass)
+                    this_scan.RetentionTime = float(MgfRT)
+                elif self.SpectrumFileType == ".mzXML":
+                  if not retentionTimeDict.has_key(this_scan.ScanName):
+                    print "WARNING: RT for scan %s not found in spectrum file; retention_time_sec will not be output" % ScanName
+                    this_scan.RetentionTime = -1.0
+                  else:
+                    this_scan.RetentionTime = \
+                          retentionTimeDict[this_scan.ScanName]
+                  if not precursorMzDict.has_key(this_scan.ScanName):
+                    print "WARNING: m/z for scan %s not found in spectrum file; precursor_neutral_mass will not be output" % ScanName
+                    this_scan.PrecursorMz = -1.0
+                  else:
+                    this_scan.PrecursorMz = \
+                           precursorMzDict[this_scan.ScanName]
+
+            this_rec.Spectrum = this_scan
+            LastScanNumber = ScanNumber
+
+            # ---------------------------
+            # Process data about this hit
+            # ---------------------------
+            Annotation = Bits[self.Columns.getIndex("Annotation")]
+            Peptide = Annotation[2:-2]
+
+            # process peptide string --TMF
+            # I think there is already code to do this in Utils.py
+            # Sam, you may want to replace my code with a call to that.
+            def ExtractAAModifications(search, peptide):
+              '''Given peptide like TVAM+16GGKYphosLV, extract the numbers
+                 and other modification symbols.
+
+                 Return (a) the peptide without the mods, and
+                 (b) a list of (aa, aa-pos, number) tuples -- 
+                 aa/aa-pos describe the aa posessing the mod.
+              '''
+              i = 0
+              mod_list = []
+              stripped_peptide = ""
+              while i < len(peptide):
+                if peptide[i].isupper():
+                  stripped_peptide = stripped_peptide + peptide[i]
+                  i = i + 1
+                  continue
+                j = i + 1
+                while j < len(peptide) and not peptide[j].isupper():
+                  j = j + 1
+                aa = peptide[i-1]
+                added_mod = peptide[i:j]
+                added_mod_pos = len(stripped_peptide) #counting starts at 1
+                # modifications with names in the param file
+                # will be represented by their names embedded in the
+                # peptide. Look up their weights.
+                for k in range(nmods_in_params):
+                  if search.mod_name[k]:
+                     truncated_name = search.mod_name[k][:4]
+                     this_weight = int(search.mod_weight[k])
+                     if this_weight > 0:
+                       weight_string = "+" + str(this_weight)
+                     added_mod = added_mod.replace(truncated_name,
+                         weight_string)
+                # added_mod could be a concatenation of several mods,
+                #  as in AEQDNLGKSVM-5+16IPTK;
+                # store each one as a separate mod.
+                this_mod = ""
+                for i in range(len(added_mod)):
+                  c = added_mod[i]
+                  if (c == "+" or c == "-"):
+                    # store the previous mod
+                    if len(this_mod) > 0:
+                      mod_list = mod_list + [(aa, added_mod_pos, this_mod)]
+                    # start a new mod
+                    this_mod = c
+                  else:
+                    this_mod = this_mod + c
+                # store the last mod
+                mod_list = mod_list + [(aa, added_mod_pos, this_mod)]
+                i = j
+              return (stripped_peptide, mod_list)
+
+            (this_rec.Peptide, this_rec.OptModList) = \
+                 ExtractAAModifications(self, Peptide)
+
+            # done processing peptide string
+   
+            this_rec.Prefix = Annotation[0]
+            this_rec.Suffix = Annotation[-1]
+            this_rec.Protein = Bits[self.Columns.getIndex("Protein")]
+            this_rec.Charge = int(Bits[self.Columns.getIndex("Charge")])
+            this_rec.MQScore = Bits[self.Columns.getIndex("MQScore")]
+            this_rec.FScore = Bits[self.Columns.getIndex("F-Score")]
+            this_rec.DeltaScore = Bits[self.Columns.getIndex("DeltaScore")]
+            this_rec.PValue = Bits[self.Columns.getIndex("InspectFDR")]
+            this_rec.ProteinID = Bits[self.Columns.getIndex("RecordNumber")]
+
+            this_scan.HitList[this_rec.Charge] = \
+                this_scan.HitList[this_rec.Charge] + [this_rec]
+             
+            # done processing a single line of InsPecT output file
+
+        # write conversion of last line of InsPecT output file
+        this_scan.WriteSpectrumQueries(PepXMLHandle, \
+#                       SpectrumFileName, self.protease, \
+                       SpectrumFile, self.protease, \
+                       self.MaxHitsPerCharge)
+
+        self.WritePepXMLClosing(PepXMLHandle)
+        InspectHandle.close()
+        PepXMLHandle.close()
+
+
+    #---------------------------------------------------------------------
+
+    def WritePepXMLOpening(self, PepXMLHandle, PepXMLFilePath):
+        """Write stuff that belongs at the top of the pepXML file"""
+        PepXMLHandle.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+        PepXMLHandle.write('<?xml-stylesheet type="text/xsl" href="pepXML_std.xsl"?>\n')
+        datestr = time.strftime('%Y-%m-%dT%H:%M:%S')
+        PepXMLHandle.write(
+           '<msms_pipeline_analysis ' + 
+             'date="%s" ' % datestr  +
+             'summary_xml="%s" ' %PepXMLFilePath +
+             'xmlns="http://regis-web.systemsbiology.net/pepXML" ' +
+             'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ' +
+             'xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /tools/bin/TPP/tpp/schema/pepXML_v112.xsd" ' +
+           '>\n' )
+           #'xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML http://mascot1/mascot/xmlns/schema/pepXML_v18/pepXML_v18.xsd" ' +
+        PepXMLHandle.write(
+           '<msms_run_summary ' +
+             'base_name="%s" ' % self.SpectrumFileBase +
+             'search_engine="InsPecT" ' +
+             'msManufacturer="UNKNOWN" ' +
+             'msModel="%s" ' % self.instrument +
+             'msIonization="UNKNOWN" ' +
+             'msMassAnalyzer="UNKNOWN" ' +
+             'msDetector="UNKNOWN" ' +
+             'raw_data_type="raw" ' +
+             'raw_data="%s" ' % self.SpectrumFileType +
+           '>\n')
+        PepXMLHandle.write('<sample_enzyme name="%s">\n' % self.protease)
+        PepXMLHandle.write('<specificity cut="KR" no_cut="P" sense="C"/>\n')
+        PepXMLHandle.write('</sample_enzyme>\n')
+        PepXMLHandle.write(
+           '<search_summary ' +
+             'base_name="%s" ' % self.SpectrumFileBase +
+             'search_engine="InsPecT" ' +
+             'precursor_mass_type="monoisotopic" ' +
+             'fragment_mass_type="monoisotopic" ' +
+             'search_id="1" ' +
+             'out_data_type="out" ' +
+             'out_data=".txt" ' +
+           '>\n')
+        PepXMLHandle.write(
+           '<search_database ' +
+             'local_path="%s" ' % self.search_db +
+             'type="AA" ' +
+           '/>\n')
+        #'database_name="" ' +
+        #'database_release_identifier="" ' +
+        #'size_in_db_entries="" ' +
+        #'size_of_residues="" ' +
+        PepXMLHandle.write(
+           '<enzymatic_search_constraint ' +
+              'enzyme="%s" ' % self.protease +
+              'max_num_internal_cleavages="2" ' +
+              'min_number_termini="2" ' +
+           '/>\n')
+        for i in range(self.nmods):
+            mod_aa = self.mod_aa[i]
+            mod_weight = self.mod_weight[i]
+            mass = mod_weight + Global.AminoMass[mod_aa]
+            if self.mod_type[i] == "opt": mod_variable="Y"
+            elif self.mod_type[i]=="fix": mod_variable="N"
+            else: mod_variable="UNKNOWN"   # are there other types?
+            PepXMLHandle.write(
+               '<aminoacid_modification ' +
+                'aminoacid="%s" ' % mod_aa +
+                'massdiff="%.4f" ' % mod_weight +
+                'mass="%.4f" ' % mass +
+                'variable="%s" ' % mod_variable +
+               '/>\n')
+        PepXMLHandle.write('<parameter name="CHARGE" value="2+ and 3+"/>\n')
+        PepXMLHandle.write('<parameter name="CLE" value="Trypsin"/>\n')
+        PepXMLHandle.write('<parameter name="DB" value=""/>\n')
+        PepXMLHandle.write('<parameter name="FILE" value=""/>\n')
+        PepXMLHandle.write('<parameter name="FORMAT" value=""/>\n')
+        PepXMLHandle.write('<parameter name="FORMVER" value=""/>\n')
+        PepXMLHandle.write('<parameter name="INSTRUMENT" value="%s"/>\n' % \
+          self.instrument)
+        PepXMLHandle.write('<parameter name="ITOL" value=""/>\n')
+        PepXMLHandle.write('<parameter name="ITOLU" value="Da"/>\n')
+        PepXMLHandle.write('<parameter name="MASS" value="Monoisotopic"/>\n')
+        PepXMLHandle.write('<parameter name="REPORT" value=""/>\n')
+        PepXMLHandle.write('<parameter name="REPTYPE" value="Peptide"/>\n')
+        PepXMLHandle.write('<parameter name="RULES" value=""/>\n')
+        PepXMLHandle.write('<parameter name="SEARCH" value=""/>\n')
+        PepXMLHandle.write('<parameter name="TAXONOMY" value=""/>\n')
+        PepXMLHandle.write('<parameter name="TOL" value=""/>\n')
+        PepXMLHandle.write('<parameter name="TOLU" value="Da"/>\n')
+        PepXMLHandle.write('</search_summary>\n')
+
+    #---------------------------------------------------------------------
+
+    def WritePepXMLClosing(self, PepXMLHandle):
+        """Write stuff that belongs at the end of the pepXML file"""
+        PepXMLHandle.write('</msms_run_summary>\n')
+        PepXMLHandle.write('</msms_pipeline_analysis>\n')
+
+    #---------------------------------------------------------------------
+
+    def GetAllSpectrumInfoFromMGF(self, FilePath):
+      sys.exit(1)
+
+    #---------------------------------------------------------------------
+
+    def GetSpectrumInfoFromMGF(self, FilePath, FileOffset):
+        """ returns the spectrum title and peptide mass corresponding to
+            the spectrum at the given file offset in the given mgf file
+        """
+        File = open(FilePath, "r")
+
+        File.seek(FileOffset)
+        Mass = 0
+        RT = 0
+        Title = None
+
+        MatchMass = re.compile('^PEPMASS=(\S*)')
+        MatchRT = re.compile('^RTINSECONDS=(\S*)')
+        MatchTitle = re.compile('^TITLE=([^\n]*)')
+        # read one line at a time
+        for Line in File:
+           
+            # We are not currently using the title
+            #Match = MatchTitle.match(Line)
+            #if Match != None:   
+                #Title = Match.group(1)
+                #continue         
+
+            # is this a mass line?
+            Match = MatchMass.match(Line)
+            if Match != None:   
+                Mass = Match.group(1)
+                continue         
+
+            # is this an RT line?
+            Match = MatchRT.match(Line)
+            if Match != None:   
+                RT = Match.group(1)
+                continue
+
+            # this is not title, mass, charge, or RT. If we've read
+            # all of them already, stop reading.
+            if  Mass!=0 and RT!=0:
+                break
+           
+        File.close()
+        if  Mass==0 or  RT==0:
+          print >> sys.stderr, "WARNING: mass, and/or RT missing for spectrum at offset %s in %s" % ( FileOffset, FilePath )
+        return (Mass,RT)
+
+    #--------------------------------------------------------------------
+
+    def GetSpectrumInfoFromMzXML(self, FilePath):
+        """ compiles dictionaries of the precursorMz and retentionTime
+            for each spectrum in an mzXML file
+        """
+
+	def normalize_whitespace(text):
+	    "Remove redundant whitespace from a string"
+	    return ' '.join(text.split())
+
+	class MzXMLHandler(ContentHandler):
+
+	    def __init__(self):
+                self.this_scan = None
+                self.this_precursorMz = None
+                self.precursorMz = dict()
+                self.retentionTime = dict()
+                self.inPrecursorMzContent = 0
+                self.FileName = os.path.split(FilePath)[1]  ###TMF_new
+
+	    def startElement(self, name, attrs):
+		# If it's not a comic element, ignore it
+		if name == 'scan':
+		    # Look for the title and number attributes
+		    num = int(normalize_whitespace(attrs.get('num', None)))
+		    retentionTime = normalize_whitespace(
+				     attrs.get('retentionTime', None))
+		    self.this_scan = int(num)
+                    self.this_scan_name = self.FileName + "." + str(num) ###TMF_new
+		    self.retentionTime[self.this_scan_name] = \
+                        float(retentionTime[2:-1])
+#		    self.retentionTime[self.this_scan] = \
+#                        float(retentionTime[2:-1])
+		elif name == 'precursorMz':
+		    self.inPrecursorMzContent = 1
+		    self.thisprecursorMz = ""
+
+	    def characters(self, ch):
+		if self.inPrecursorMzContent:
+		    self.thisprecursorMz = self.thisprecursorMz + ch
+
+	    def endElement(self, name):
+		if name == 'precursorMz':
+		    self.inPrecursorMzContent = 0
+                    idx = self.this_scan_name  ###TMF_new
+                    self.precursorMz[idx] = float(self.thisprecursorMz)
+#                    i = self.this_scan
+#                    self.precursorMz[i] = float(self.thisprecursorMz)
+		elif name == 'scan':
+                    pass
+
+	# Create an XML parser and tell it
+	# we are not interested in XML namespaces
+	MzXMLparser = make_parser()
+	MzXMLparser.setFeature(feature_namespaces, 0)
+
+	# Create a handler and tell the parser to use it
+	mh = MzXMLHandler()
+	MzXMLparser.setContentHandler(mh)
+
+	# Parse the file
+        File = open(FilePath, "r")
+        try:
+	  MzXMLparser.parse(File)
+        except:
+          print >> sys.stderr, "ERROR: SAX parser cannot parse %s" % FilePath
+          sys.exit()
+
+        return (mh.retentionTime, mh.precursorMz)
+
+    #---------------------------------------------------------------------
+
+    def ParseCommandLine(self,Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "i:o:m:p:d:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-i":
+                if not os.path.exists(Value):
+                  print "** Error: couldn't find results file '%s'\n\n"%Value
+                  print UsageInfo
+                  sys.exit(1)
+                self.InputFilePath = Value
+            if Option == "-o":
+                self.OutputFilePath = Value
+            if Option == "-m":
+                self.SpectraDir = Value
+            if Option == "-p":
+                self.ParamFilePath = Value
+            if Option == "-d":
+                self.MaxHitsPerCharge = int(Value)
+        if not OptionsSeen.has_key("-i") or not OptionsSeen.has_key("-o"):
+            print UsageInfo
+            sys.exit(1)
+
+    def Finish(self):   
+        self.InputFile.close()
+        self.OutputFile.close()
+
+#-------------------------------------------------------------------------
+
+if __name__ == '__main__':
+    Fix = InspectToPepXMLClass()
+    Fix.ParseCommandLine(sys.argv[1:])  
+    Fix.Main()
diff --git a/IonScoring.c b/IonScoring.c
new file mode 100644
index 0000000..9153aef
--- /dev/null
+++ b/IonScoring.c
@@ -0,0 +1,1873 @@
+//Title:          IonScoring.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include <stdlib.h>
+#include <math.h>
+#include "Inspect.h"
+#include "Utils.h"
+#include "Errors.h"
+#include "IonScoring.h"
+#include "Spectrum.h"
+#include "Tagger.h"
+
+// Global variables: Bayesian networks for PRM scoring (for MS-Alignment) and
+// for cut scoring (for tagging and match-scoring)
+PRMBayesianModel* PRMModelCharge2 = NULL;
+PRMBayesianModel* PRMModelCharge3 = NULL;
+PRMBayesianModel* TAGModelCharge2 = NULL;
+PRMBayesianModel* TAGModelCharge3 = NULL;
+PRMBayesianModel* PhosCutModelCharge2 = NULL;
+PRMBayesianModel* PhosCutModelCharge3 = NULL;
+
+// Forward declarations:
+int IonScoringGetPrefixContainPhos(PRMBayesianNode* Node, Peptide* Match, int AminoIndex);
+int IonScoringGetSuffixContainPhos(PRMBayesianNode* Node, Peptide* Match, int BreakIndex);
+PRMBayesianModel* GetScoringModel(Peptide* Match, int Charge);
+void AnnotateParentPeaks(MSSpectrum* Spectrum, Peptide* Match, PRMBayesianModel* Model);
+void ClaimParentPeak(MSSpectrum* Spectrum, Peptide* Match, int Mass, PRMBayesianModel* Model);
+
+// Free a node from a Bayesian network; helper for FreePRMBayesianModel
+void FreePRMBayesianNode(PRMBayesianNode* Node)
+{
+    if (!Node)
+    {
+        return;
+    }
+    SafeFree(Node->Parents);
+    SafeFree(Node->ParentBlocks);
+    SafeFree(Node->CountTable);
+    SafeFree(Node->ProbTable);
+    SafeFree(Node);
+}
+
+// Free a Bayesian network model.
+void FreePRMBayesianModel(PRMBayesianModel* Model)
+{
+    PRMBayesianNode* Node;
+    PRMBayesianNodeHolder* Holder;
+    PRMBayesianNodeHolder* PrevHolder = NULL;
+    PRMBayesianNode* Prev = NULL;
+    //
+    if (!Model)
+    {
+        return;
+    }
+    // Free the linked list of node-holders that require flanking amino acid info:
+    for (Holder = Model->FirstFlank; Holder; Holder = Holder->Next)
+    {
+        SafeFree(PrevHolder);
+        PrevHolder = Holder;
+    }
+    SafeFree(PrevHolder);
+
+    // Free the linked list of all nodes:
+    for (Node = Model->Head; Node; Node = Node->Next)
+    {
+        FreePRMBayesianNode(Prev);
+        Prev = Node;
+    }
+    FreePRMBayesianNode(Prev);
+    SafeFree(Model->Nodes);
+    SafeFree(Model);
+}
+
+// Add a node to a Bayesian network.  Called from PyInspect when building up 
+// a network (semi)interactively, not used in production.
+void AddPRMBayesianNode(PRMBayesianModel* Model, char* Name, int NodeType, int NodeFlag, float NodeMassOffset, 
+    int FragmentType)
+{
+    PRMBayesianNode* Node;
+    //
+    // Create the node:
+    Node = (PRMBayesianNode*)calloc(1, sizeof(PRMBayesianNode));
+    Node->Type = NodeType;
+    strncpy(Node->Name, Name, 256);
+    Node->MassOffset = (int)(NodeMassOffset * DALTON);
+    Node->Flag = NodeFlag;
+    Node->Index = Model->NodeCount;
+    Node->FragmentType = FragmentType;
+    Model->NodeCount++;
+    // Insert the node into the list:
+    if (Model->Tail)
+    {
+        Model->Tail->Next = Node;
+    }
+    else
+    {
+        Model->Head = Node;
+    }
+    Model->Tail = Node;
+    
+    // Insert the node into the array:
+    if (Model->Nodes)
+    {
+        Model->Nodes = (PRMBayesianNode**)realloc(Model->Nodes, Model->NodeCount * sizeof(PRMBayesianNode*));
+    }
+    else
+    {
+        Model->Nodes = (PRMBayesianNode**)calloc(sizeof(PRMBayesianNode*), 1);
+    }
+    Model->Nodes[Model->NodeCount - 1] = Node;
+
+    // Now set the value count:
+    switch (Node->Type)
+    {
+    case evPRMBPrefix:
+    case evPRMBPrefix2:
+    case evPRMBSuffix:
+    case evPRMBSuffix2:
+        // The number of values is determined by the intensity scheme:
+        switch (Model->IntensityScheme)
+        {
+        case 0:
+        case 1:
+        case 4:
+            Node->ValueCount = 4;
+            break;
+        case 2:
+        case 3:
+            Node->ValueCount = 3;
+            break;
+        default:
+            REPORT_ERROR(0);
+            break;
+        }
+        break;
+    case evSector:
+        // The number of values is 2, 3, 4, or 5, depending on our sector count:
+        switch (Node->Flag)
+        {
+        case 0:
+            Node->ValueCount = 2;
+            break;
+        case 1:
+            Node->ValueCount = 3;
+            break;
+        case 2:
+            Node->ValueCount = 4;
+            break;
+        case 3:
+            Node->ValueCount = 5;
+            break;
+        case 4:
+            Node->ValueCount = 5;
+            break;
+        default:
+            REPORT_ERROR(0);
+            break;
+        }
+        break;
+    case evFlank:
+        // The number of values depends on the flank scheme flag:
+        switch (Node->Flag)
+        {
+        case 0:
+            Node->ValueCount = 4;
+            break;
+        case 1:
+            Node->ValueCount = 4;
+            break;
+        case 2:
+            Node->ValueCount = 3;
+            break;
+        case 3:
+            Node->ValueCount = 3;
+            break;
+        default:
+            REPORT_ERROR(0);
+            break;
+        }
+        break;
+    case evPrefixAA:
+    case evSuffixAA:
+        // PrefixAA and SuffixAA nodes are simple binary nodes.
+        Node->ValueCount = 2;
+        break;
+    case evPrefixContain:
+        switch (Node->Flag)
+        {
+        case 0:
+            // Acid residue (flag)
+            Node->ValueCount = 2;
+            break;
+        case 1:
+            // Acid residue (0, 1, many)
+            Node->ValueCount = 3;
+            break;
+        case 2:
+            // Basic residue (flag)
+            Node->ValueCount = 2;
+            break;
+        case 3:
+            // Basic residue (0, 1, many)
+            Node->ValueCount = 2;
+            break;
+        default:
+            REPORT_ERROR(0);
+            break;
+        }
+        break;
+    case evSuffixContain:
+        switch (Node->Flag)
+        {
+        case 0:
+            // Acid residue (flag)
+            Node->ValueCount = 2;
+            break;
+        case 1:
+            // Acid residue (0, 1, many)
+            Node->ValueCount = 3;
+            break;
+        case 2:
+            // Basic residue (flag)
+            Node->ValueCount = 2;
+            break;
+        case 3:
+            // Basic residue (0, 1, many)
+            Node->ValueCount = 2;
+            break;
+        default:
+            REPORT_ERROR(0);
+            break;
+        }
+        break;
+    case evPrefixContainPhos:
+    case evSuffixContainPhos:
+        //has phosphate on the fragment (flag)
+        Node->ValueCount = 2;
+        break;
+    default:
+        printf("* Error: Unknown Node->Type in AddPRMBayesianNode\n");
+        break;
+    }
+    // Allocate initial count/probability tables, assuming no parents for the node:
+    Node->CountTable = (int*)calloc(Node->ValueCount, sizeof(int));
+    Node->ProbTable = (float*)calloc(Node->ValueCount, sizeof(float));
+    Node->TableSize = Node->ValueCount;
+}
+
+// Given a spectrum, compute the intensity-thresholds for level 0 (strongest)
+// through level n (absent).
+int ComputeSpectrumIntensityThresholds(PRMBayesianModel* Model, MSSpectrum* Spectrum)
+{
+    int ThresholdCount;
+    int CutoffRank;
+    int WeakRank;
+    int PeakIndex;
+    float SortedIntensity[200];
+    int WeakPeakCount = 0;
+    float TotalIntensity = 0;
+    float GrassIntensity;
+    float StrongPeakIntensity;
+    //
+
+    switch (Model->IntensityScheme)
+    {
+    case 0:
+    case 1:
+        // Scheme 1: Top N peaks, high, low, absent
+        ThresholdCount = 4;
+        Spectrum->IntensityThresholds = (float*)calloc(5, sizeof(float));
+        StrongPeakIntensity = -1;
+        CutoffRank = (int)(Spectrum->ParentMass / (50 * DALTON));
+        WeakRank = max(CutoffRank, Spectrum->PeakCount - 200);
+        for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+        {
+            if (Spectrum->Peaks[PeakIndex].IntensityRank >= WeakRank)
+            {
+                SortedIntensity[WeakPeakCount] = Spectrum->Peaks[PeakIndex].Intensity;
+                WeakPeakCount++;
+            }
+            else
+            {
+                if (StrongPeakIntensity < 0 || StrongPeakIntensity > Spectrum->Peaks[PeakIndex].Intensity)
+                {
+                    StrongPeakIntensity = Spectrum->Peaks[PeakIndex].Intensity;
+                }
+            }
+            TotalIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+            if (WeakPeakCount == 200)
+            {
+                break;
+            }
+        }
+        if (!WeakPeakCount)
+        {
+            GrassIntensity = TotalIntensity / (2 * Spectrum->PeakCount);
+        }
+        else
+        {
+            qsort(SortedIntensity, WeakPeakCount, sizeof(float), (QSortCompare)CompareFloats);
+            GrassIntensity = SortedIntensity[WeakPeakCount / 2];
+        }
+        Spectrum->IntensityThresholds[0] = StrongPeakIntensity;
+        Spectrum->IntensityThresholds[1] = (float)min(StrongPeakIntensity * 0.5, GrassIntensity * 2);
+        //Spectrum->IntensityThresholds[2] = (float)0.5 * GrassIntensity;
+        Spectrum->IntensityThresholds[2] = 0;
+        Spectrum->IntensityThresholds[3] = -1;
+        break;
+    case 2:
+    case 3:
+        // Scheme 1: Top N peaks, present, absent
+        ThresholdCount = 3;
+        Spectrum->IntensityThresholds = (float*)calloc(5, sizeof(float));
+        StrongPeakIntensity = -1;
+        CutoffRank = (int)(Spectrum->ParentMass / (50 * DALTON));
+        WeakRank = max(CutoffRank, Spectrum->PeakCount - 200);
+        for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+        {
+            if (Spectrum->Peaks[PeakIndex].IntensityRank >= WeakRank)
+            {
+                SortedIntensity[WeakPeakCount] = Spectrum->Peaks[PeakIndex].Intensity;
+                WeakPeakCount++;
+            }
+            else
+            {
+                if (StrongPeakIntensity < 0 || StrongPeakIntensity > Spectrum->Peaks[PeakIndex].Intensity)
+                {
+                    StrongPeakIntensity = Spectrum->Peaks[PeakIndex].Intensity;
+                }
+            }
+            TotalIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+            if (WeakPeakCount == 200)
+            {
+                break;
+            }
+        }
+        if (!WeakPeakCount)
+        {
+            GrassIntensity = TotalIntensity / (2 * Spectrum->PeakCount);
+        }
+        else
+        {
+            qsort(SortedIntensity, WeakPeakCount, sizeof(float), (QSortCompare)CompareFloats);
+            GrassIntensity = SortedIntensity[WeakPeakCount / 2];
+        }
+        Spectrum->IntensityThresholds[0] = StrongPeakIntensity;
+        Spectrum->IntensityThresholds[1] = 0; //GrassIntensity * 0.5;
+        //Spectrum->IntensityThresholds[2] = (float)0.5 * GrassIntensity;
+        Spectrum->IntensityThresholds[2] = -1;
+        //Spectrum->IntensityThresholds[3] = -1;
+        break;
+    case 4:
+        //Scheme 4: partitioned by ratio to grass
+        ThresholdCount = 4;
+        Spectrum->IntensityThresholds = (float*)calloc(5, sizeof(float));
+        WeakRank = (Spectrum->PeakCount / 3 ); //AverageGrass = median of bottom 1/3 of peaks
+        WeakRank = min(200, WeakRank); //at most 200, limited by array size
+        for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+        {
+            if (Spectrum->Peaks[PeakIndex].IntensityRank >= WeakRank)
+            {
+                SortedIntensity[WeakPeakCount] = Spectrum->Peaks[PeakIndex].Intensity;
+                WeakPeakCount++;
+            }
+        }
+        if (!WeakPeakCount)
+        {
+            GrassIntensity = TotalIntensity / (2 * Spectrum->PeakCount);
+        }
+        else
+        {
+            qsort(SortedIntensity, WeakPeakCount, sizeof(float), (QSortCompare)CompareFloats);
+            GrassIntensity = SortedIntensity[WeakPeakCount / 2];
+        }
+        Spectrum->IntensityThresholds[0] = GrassIntensity * (float)10.0;
+        Spectrum->IntensityThresholds[1] = GrassIntensity * 2;
+        Spectrum->IntensityThresholds[2] = GrassIntensity * (float)0.1;
+        Spectrum->IntensityThresholds[3] = -1;
+        break;
+    default:
+        REPORT_ERROR(0);
+        return 0;
+    }
+    return ThresholdCount;
+}
+
+// Prepare a spectrum for PRM and cut scoring.  Compute intensity cutoffs, compute binned
+// intensity, comput binned intensity levels.  
+void PrepareSpectrumForIonScoring(PRMBayesianModel* Model, MSSpectrum* Spectrum, int ForceRefresh)
+{
+    int WeakPeakCount = 0;
+    float TotalIntensity = 0;
+    int ThresholdCount;
+    int PeakIndex;
+    int IntensityLevel;
+    int BinScalingFactor = 100; // One bin per 0.1Da
+    int CountByIntensityLevel[16];
+    int Bin;
+    int NearBin;
+    SpectralPeak* Peak;
+    int MaxParentMass;
+    float Intensity;
+    float Probability;
+    float Multiplier;
+    int Skew;
+    //
+    if (Spectrum->IntensityThresholds && !ForceRefresh)
+    {
+        return; // Already set!
+    }
+    if (!Spectrum->PeakCount)
+    {
+        return;
+    }
+    if (!Model)
+    {
+        return;
+    }
+
+    ///////////////////////////////
+    // Free any old info:
+    SafeFree(Spectrum->BinnedIntensities);
+    Spectrum->BinnedIntensities = NULL;
+    SafeFree(Spectrum->BinnedIntensitiesTight);
+    Spectrum->BinnedIntensitiesTight = NULL;
+    SafeFree(Spectrum->BinnedIntensityLevels);
+    Spectrum->BinnedIntensityLevels = NULL;
+    SafeFree(Spectrum->BinPeakIndex);
+    Spectrum->BinPeakIndex = NULL;
+    SafeFree(Spectrum->IonScoringNoiseProbabilities);
+    Spectrum->IonScoringNoiseProbabilities = NULL;
+    SafeFree(Spectrum->IntensityThresholds);
+    Spectrum->IntensityThresholds = NULL;
+    ///////////////////////////////
+    ThresholdCount = ComputeSpectrumIntensityThresholds(Model, Spectrum);
+
+    ////////////////////////////////////////////////////////////////////////////////////////////////
+    // We know our intensity thresholds; now let's compute the binned intensities:
+    MaxParentMass = Spectrum->MZ * 3 + (2 * HYDROGEN_MASS);
+    
+    Spectrum->IntensityBinCount = (MaxParentMass + DALTON) / BinScalingFactor; 
+    Spectrum->BinnedIntensities = (float*)calloc(Spectrum->IntensityBinCount, sizeof(float));
+    Spectrum->BinnedIntensitiesTight = (float*)calloc(Spectrum->IntensityBinCount, sizeof(float));
+    Spectrum->BinnedIntensityLevels = (int*)calloc(Spectrum->IntensityBinCount, sizeof(int));
+    
+    Spectrum->BinPeakIndex = (int*)calloc(Spectrum->IntensityBinCount, sizeof(int));
+    for (Bin = 0; Bin < Spectrum->IntensityBinCount; Bin++)
+    {
+        Spectrum->BinPeakIndex[Bin] = -1;
+    }
+    // Iterate over spectral peaks, putting intensity into bins:
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        Peak = Spectrum->Peaks + PeakIndex;
+        Bin = (Peak->Mass + 50) / BinScalingFactor;
+        for (NearBin = Bin - 6; NearBin < Bin + 7; NearBin++)
+        {
+            if (NearBin < 0 || NearBin >= Spectrum->IntensityBinCount)
+            {
+                continue;
+            }
+            Skew = abs(Peak->Mass - (NearBin * BinScalingFactor));
+            if (Skew > Model->IntensityRadius)
+            {
+                continue;
+            }
+            Multiplier = 1.0; // default
+            if (Model->IntensityScheme == 1 || Model->IntensityScheme == 3)
+            {
+                if (Skew >= Model->HalfIntensityRadius)
+                {
+                    Multiplier = 0.5;
+                }
+            }
+            Spectrum->BinnedIntensities[NearBin] += Peak->Intensity * Multiplier;
+            if (Skew < INTENSITY_BIN_RADIUS_TIGHT)
+            {
+                Spectrum->BinnedIntensitiesTight[NearBin] += Peak->Intensity;
+            }
+            if (Spectrum->BinPeakIndex[NearBin] < 0)
+            {
+                Spectrum->BinPeakIndex[NearBin] = PeakIndex;
+            }
+        }
+    }
+    // Compute the intensity level (absent, lo, med, hi) for each bin:
+    //ComputeSpectrumIntensityCutoffs(Spectrum);
+    memset(CountByIntensityLevel, 0, sizeof(int) * 16);
+    for (Bin = 0; Bin < Spectrum->IntensityBinCount; Bin++)
+    {
+        Intensity = Spectrum->BinnedIntensities[Bin];
+        for (IntensityLevel = 0; IntensityLevel < 99; IntensityLevel++)
+        {
+            if (Intensity > Spectrum->IntensityThresholds[IntensityLevel])
+            {
+                Spectrum->BinnedIntensityLevels[Bin] = IntensityLevel;
+                CountByIntensityLevel[IntensityLevel]++;
+                break;
+            }
+        }
+    }
+    ////////////////////////////////////////////////////////////////////////////////////////////////
+    // Now let's compute the fraction of mass bins which attain these intensity thresholds 'by chance'.
+    // This fraction is used for scoring PRMs; the bonus for having a y peak is smaller for a very
+    // thick spectrum than for a very sparse spectrum.
+    Spectrum->IonScoringNoiseProbabilities = (float*)calloc(ThresholdCount + 1, sizeof(float));
+    for (IntensityLevel = 0; IntensityLevel < ThresholdCount; IntensityLevel++)
+    {
+        Probability = (CountByIntensityLevel[IntensityLevel] + 1) / (float)Spectrum->IntensityBinCount;
+        Spectrum->IonScoringNoiseProbabilities[IntensityLevel] = (float)log(Probability);
+    }
+}
+
+// Return the intensity level for this mass.  If this is a cut, claim the peaks; otherwise,
+// just return the intensity level.
+int IonScoringGetPeakIntensity(PRMBayesianModel* Model, MSSpectrum* Spectrum, int Mass, int FragmentType, int SeizePeakAminoIndex)
+{
+    int Bin;
+    int MinMass;
+    int MaxMass;
+    float Intensity = 0;
+    int IntensityLevelIndex;
+    int PeakIndex;
+    int Skew;
+    float Multiplier;
+    //
+    Bin = (Mass + 50) / 100; // Bin width 0.1Da
+    MinMass = Mass - Model->IntensityRadius;
+    MaxMass = Mass + Model->IntensityRadius;
+
+    // If the mass is off the scale, then you get no peaks:
+    if (Bin >= Spectrum->IntensityBinCount || Bin < 0)
+    {
+        return Model->MinIntensityLevel;
+    }
+    
+    // If this is a PRM (not a cut), then look up the intensity level
+    // in the spectrum's array:
+    if (SeizePeakAminoIndex < 0)
+    {
+        return Spectrum->BinnedIntensityLevels[Bin];
+    }
+
+    PeakIndex = Spectrum->BinPeakIndex[Bin];
+    if (PeakIndex >= 0)
+    {
+        for ( ; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+        {
+            if (Spectrum->Peaks[PeakIndex].Mass > MaxMass)
+            {
+                break;
+            }
+            if (Spectrum->Peaks[PeakIndex].Mass < MinMass)
+            {
+                continue;
+            }
+
+            Multiplier = 1.0; // default
+            Skew = abs(Mass - Spectrum->Peaks[PeakIndex].Mass);
+            if (Model->IntensityScheme == 1 || Model->IntensityScheme == 3)
+            {
+                if (Skew >= Model->HalfIntensityRadius)
+                {
+                    Multiplier = 0.5;
+                }
+            }
+            if (Spectrum->Peaks[PeakIndex].IonType)
+            {
+                // This peak has already been CLAIMED by another ion type:
+                continue;
+            }
+            Intensity += Spectrum->Peaks[PeakIndex].Intensity * Multiplier;
+            // CLAIM this spectrum:
+            Spectrum->Peaks[PeakIndex].IonType = FragmentType;
+            Spectrum->Peaks[PeakIndex].AminoIndex = SeizePeakAminoIndex;
+        }
+    }
+    for (IntensityLevelIndex = 0; IntensityLevelIndex < 99; IntensityLevelIndex++)
+    {
+        if (Intensity > Spectrum->IntensityThresholds[IntensityLevelIndex])
+        {
+            return IntensityLevelIndex;
+        }
+    }
+    return 0;
+}
+
+// Compute the sector for a given mass.  The sector is a simple partition of
+// the mass range (low/high, or low/medium/high, etc).  
+int IonScoringGetSector(PRMBayesianNode* Node, int ParentMass, int Mass)
+{
+    switch (Node->Flag)
+    {
+    case 0:
+        // Two sectors, LOW and HIGH:
+        if (Mass < ParentMass / 2)
+        {
+            return 0;
+        }
+        else
+        {
+            return 1;
+        }
+        break;
+    case 1:
+        // Three sectors, LOW and MEDIUM and HIGH:
+        if (Mass < ParentMass * 0.33)
+        {
+            return 0;
+        }
+        if (Mass < ParentMass * 0.66)
+        {
+            return 1;
+        }
+        return 2;
+        break;
+    case 2:
+        // Four sectors:
+        if (Mass < ParentMass * 0.25)
+        {
+            return 0;
+        }
+        if (Mass < ParentMass * 0.5)
+        {
+            return 1;
+        }
+        if (Mass < ParentMass * 0.75)
+        {
+            return 2;
+        }
+        return 3;
+        break;
+    case 3:
+        // Five sectors:
+        if (Mass < ParentMass * 0.2)
+        {
+            return 0;
+        }
+        if (Mass < ParentMass * 0.4)
+        {
+            return 1;
+        }
+        if (Mass < ParentMass * 0.6)
+        {
+            return 2;
+        }
+        if (Mass < ParentMass * 0.8)
+        {
+            return 3;
+        }
+        return 4;
+        break;
+    default:
+        REPORT_ERROR(0);
+        break;
+    }
+    return 0;
+}
+
+// Compute the value of the Flank feature.  These features reflect flanking amino acids
+// which have effects on fragment intensities.
+int IonScoringGetFlank(PRMBayesianNode* Node, char Left, char Right)
+{
+    //
+    switch (Node->Flag)
+    {
+    case 0:
+        // Default B flank:
+        // G or P on left: Strong suppression
+        if (Left == 'G' || Left == 'P')
+        {
+            return 0;
+        }
+        // P on right: Augmentation
+        if (Right == 'P')
+        {
+            return 1;
+        }
+        // H or R on right: Suppression
+        if (Right == 'H' || Right == 'R')
+        {
+            return 2;
+        }
+        return 3;
+        break;
+    case 1:
+        // Default Y flank:
+        // P on right: Strong augmentation
+        if (Right == 'P')
+        {
+            return 0;
+        }
+        // K or R on right: Strong suppression
+        if (Right == 'R' || Right == 'K')
+        {
+            return 1;
+        }
+        // H on right or P on left: suppression
+        if (Left == 'P' || Right == 'H')
+        {
+            return 2;
+        }
+        return 3;
+    default:
+        REPORT_ERROR(0);
+        break;
+    }
+    return 0;
+}
+
+// Compute a feature for whether the N- or C-terminal portion of a peptide contains acidic
+// or basic residues.  (Not used in production)
+int IonScoringGetFragmentContain(PRMBayesianNode* Node, Peptide* Match, int AminoIndex, int SuffixFlag)
+{
+    int MinIndex;
+    int MaxIndex;
+    int CheckIndex;
+    int Count = 0;
+    //
+    if (SuffixFlag)
+    {
+        MinIndex = AminoIndex;
+        MaxIndex = strlen(Match->Bases);
+    }
+    else
+    {
+        MinIndex = 0;
+        MaxIndex = AminoIndex;
+    }
+    for (CheckIndex = MinIndex; CheckIndex < MaxIndex; CheckIndex++)
+    {
+        switch (Match->Bases[CheckIndex])
+        {
+        case 'D':
+        case 'E':
+            if (Node->Flag == 0 || Node->Flag == 1)
+            {
+                Count++;
+            }
+            break;
+        case 'R':
+        case 'K':
+        case 'H':
+            if (Node->Flag == 2 || Node->Flag == 3)
+            {
+                Count++;
+            }
+            break;
+        default:
+            break;
+        }
+    }
+    switch (Node->Flag)
+    {
+    case 0:
+    case 2:
+        if (Count)
+        {
+            return 1;
+        }
+        else
+        {
+            return 0;
+        }
+        break;
+    case 1:
+    case 3:
+        if (Count > 1)
+        {
+            return 2;
+        }
+        else if (Count)
+        {
+            return 1;
+        }
+        else
+        {
+            return 0;
+        }
+        break;
+    default:
+        REPORT_ERROR(0);
+        break;
+    }
+    return 0; // unreachable
+}
+
+int IonScoringGetPrefixContainPhos(PRMBayesianNode* Node, Peptide* Match, int AminoIndex)
+{
+    int ModIndex;
+    int ModifiedResidueIndex = -1;
+    //
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!Match->ModType[ModIndex])
+        {
+            break;
+        }
+        if (Match->ModType[ModIndex]->Flags & DELTA_FLAG_PHOSPHORYLATION)
+        {
+            ModifiedResidueIndex = Match->AminoIndex[ModIndex];
+            if (ModifiedResidueIndex < AminoIndex)
+            {
+                return 1;
+            }
+        }
+    }
+    //got all the way here without returning anything.
+    return 0;
+}
+
+int IonScoringGetSuffixContainPhos(PRMBayesianNode* Node, Peptide* Match, int BreakIndex)
+{
+    int ModIndex;
+    int ModifiedResidueIndex = -1;
+    //
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!Match->ModType[ModIndex])
+        {
+            break;
+        }
+        if (Match->ModType[ModIndex]->Flags & DELTA_FLAG_PHOSPHORYLATION)
+        {
+            ModifiedResidueIndex = Match->AminoIndex[ModIndex];
+            if (BreakIndex <= ModifiedResidueIndex)
+            {
+                return 1;
+            }
+        }
+    }
+    //got all the way here without returning anything.
+    return 0;
+}
+
+// Return the value for a particular PRM or cut.  This function calls the appropriate setter based 
+// on the node type.  
+// Important special note: AminoIndex should be -1 if we're getting PRM scores!
+int IonScoringGetNodeValue(PRMBayesianModel* Model, PRMBayesianNode* Node, MSSpectrum* Spectrum, int PRM,
+    Peptide* Match, int AminoIndex)
+{
+    int Suffix;
+    int PeptideLen;
+    char PrefixAA;
+    char SuffixAA;
+    ///////////////////////////////////////////////////////////////////////////////////////
+    // Set values for the current PRM:
+    switch (Node->Type)
+    {
+    case evPRMBPrefix:
+        // Handle b peak, or other N-terminal fragment:
+        return IonScoringGetPeakIntensity(Model, Spectrum, PRM + Node->MassOffset, Node->FragmentType, AminoIndex);
+        //Node->Value = IonScoringSetIntensityLevel(Spectrum, Intensity);
+        break;
+    case evPRMBPrefix2:
+        // Handle doubly-charged N-terminal fragment:
+        return IonScoringGetPeakIntensity(Model, Spectrum, (PRM + Node->MassOffset + HYDROGEN_MASS) / 2, Node->FragmentType, AminoIndex);
+        //Node->Value = IonScoringSetIntensityLevel(Spectrum, Intensity);
+        break;
+    case evPRMBSuffix:
+        // Handle C-terminal fragment:
+        Suffix = Spectrum->ParentMass - PRM;
+        return IonScoringGetPeakIntensity(Model, Spectrum, Suffix + Node->MassOffset, Node->FragmentType, AminoIndex);
+        //Node->Value = IonScoringSetIntensityLevel(Spectrum, Intensity);
+        break;
+    case evPRMBSuffix2:
+        // Handle doubly-charged C-terminal fragment:
+        Suffix = Spectrum->ParentMass - PRM;
+        return IonScoringGetPeakIntensity(Model, Spectrum, (Suffix + Node->MassOffset + HYDROGEN_MASS) / 2, Node->FragmentType, AminoIndex);
+        //Node->Value = IonScoringSetIntensityLevel(Spectrum, Intensity);
+        break;
+    case evSector:
+        // Handle "sector" (which part of the mass range this mass lies in)
+        return IonScoringGetSector(Node, Spectrum->ParentMass, PRM);
+        break;
+    case evFlank:
+        // Handle "flank" (for cuts only: based on prefix and suffix amino acids)
+        // If no peptide, return 0 (always the "default" intensity)
+        if (!Match)
+        {
+            return 0;
+        }
+        PeptideLen = strlen(Match->Bases);
+        if (AminoIndex > 0)
+        {
+            PrefixAA = Match->Bases[AminoIndex - 1];
+        }
+        else
+        {
+            PrefixAA = '\0';
+        }
+        if (AminoIndex < PeptideLen)
+        {
+            SuffixAA = Match->Bases[AminoIndex];
+        }
+        else
+        {
+            SuffixAA = '\0';
+        }
+        return IonScoringGetFlank(Node, PrefixAA, SuffixAA);
+        break;
+    case evPrefixAA:
+        if (AminoIndex > 0)
+        {
+            PrefixAA = Match->Bases[AminoIndex - 1];
+        }
+        else
+        {
+            PrefixAA = '\0';
+        }
+        if ((PrefixAA - 'A') == Node->Flag)
+        {
+            return 1;
+        }
+        else
+        {
+            return 0;
+        }
+        break;
+    case evSuffixAA:
+        PeptideLen = strlen(Match->Bases);
+        if (AminoIndex < PeptideLen)
+        {
+            SuffixAA = Match->Bases[AminoIndex];
+        }
+        else
+        {
+            SuffixAA = '\0';
+        }
+        if ((SuffixAA - 'A') == Node->Flag)
+        {
+            return 1;
+        }
+        else
+        {
+            return 0;
+        }
+        break;
+    case evPrefixContain:
+        return IonScoringGetFragmentContain(Node, Match, AminoIndex, 0);
+    case evSuffixContain:
+        return IonScoringGetFragmentContain(Node, Match, AminoIndex, 1);
+    case evPrefixContainPhos:
+        return IonScoringGetPrefixContainPhos(Node, Match, AminoIndex);
+    case evSuffixContainPhos:
+        return IonScoringGetSuffixContainPhos(Node, Match, AminoIndex);
+    default:
+        REPORT_ERROR(0);
+        break;
+    }
+    return 0;
+}
+
+// For debugging purposes, print out the definition of a PRMBayesianModel.
+void DebugPrintPRMBayesianModel(PRMBayesianModel* Model)
+{
+    PRMBayesianNode* Node;
+    PRMBayesianNode* Parent;
+    int NodeIndex;
+    int ParentIndex;
+    printf(">>>DebugPrintPRMBayesianModel\n");
+    printf("CutFlag %d IntensityRadius %.2f IntensityScheme %d\n", Model->CutFlag, Model->IntensityRadius / (float)DALTON, Model->IntensityScheme);
+    for (Node = Model->Head, NodeIndex = 0; Node; Node = Node->Next, NodeIndex++)
+    {
+        printf(">>Node %d of %d %s:\n", NodeIndex, Model->NodeCount, Node->Name);
+        printf("  Type %d flag %d mass offset %.2f\n", Node->Type, Node->Flag, Node->MassOffset / (float)DALTON);
+        printf("  Valuecount %d\n", Node->ValueCount);
+        for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+        {
+            Parent = Node->Parents[ParentIndex];
+            printf("  Parent %d of %d: %s\n", ParentIndex, Node->ParentCount, Parent->Name);
+        }
+    }
+    printf(">>> End of model <<<\n");
+}
+
+// The tag-scoring Bayesian network includes some features which rely upon flanking amino acids.
+// These nodes must be visited during tag generation, when the flanking amino acids are finally 
+// learned.  To save time, we build up a singly-linked list (Model->FirstFlank...Model->LastFlank)
+// to keep track of such nodes.
+void BuildModelFlankList(PRMBayesianModel* Model)
+{
+    int NodeIndex;
+    PRMBayesianNode* Node;
+    PRMBayesianNode* Parent;
+    PRMBayesianNodeHolder* Holder;
+    int ParentIndex;
+    //
+    // Set flank flags of all nodes:
+    for (NodeIndex = 0; NodeIndex < Model->NodeCount; NodeIndex++)
+    {
+        Node = Model->Nodes[NodeIndex];
+        Node->FlankFlag = 0; // default
+        if (Node->Type == evFlank || Node->Type == evPrefixAA || Node->Type == evSuffixAA)
+        {
+            Node->FlankFlag = 1;
+        }
+        for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+        {
+            Parent = Node->Parents[ParentIndex];
+            if (Parent->Type == evFlank || Parent->Type == evPrefixAA || Parent->Type == evSuffixAA)
+            {
+                Node->FlankFlag = 1;
+            }
+        }
+    }
+    // Build linked list of nodes which rely upon flanking amino acid info:
+    for (NodeIndex = 0; NodeIndex < Model->NodeCount; NodeIndex++)
+    {
+        Node = Model->Nodes[NodeIndex];
+        if (Node->FlankFlag)
+        {
+            // Add a NodeHolder for this node:
+            Holder = (PRMBayesianNodeHolder*)calloc(1, sizeof(PRMBayesianNodeHolder));
+            Holder->Node = Node;
+            if (Model->FirstFlank)
+            {
+                Model->LastFlank->Next = Holder;
+            }
+            else
+            {
+                Model->FirstFlank = Holder;
+            }
+            Model->LastFlank = Holder;
+        }
+    }
+}
+
+// Save a PRMBayesianNode to a binary file.  Helper function for SavePRMBayesianModel.
+void SavePRMBayesianNode(PRMBayesianNode* Node, FILE* ModelFile)
+{
+    int ParentIndex;
+    WriteBinary(&Node->Name, sizeof(char), 256, ModelFile);
+    WriteBinary(&Node->Type, sizeof(int), 1, ModelFile);
+    WriteBinary(&Node->Flag, sizeof(int), 1, ModelFile);
+    WriteBinary(&Node->FragmentType, sizeof(int), 1, ModelFile);
+    WriteBinary(&Node->MassOffset, sizeof(int), 1, ModelFile);
+    WriteBinary(&Node->ValueCount, sizeof(int), 1, ModelFile);
+    WriteBinary(&Node->ParentCount, sizeof(int), 1, ModelFile);
+    // Write parent indices:
+    for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+    {
+        WriteBinary(&Node->Parents[ParentIndex]->Index, sizeof(int), 1, ModelFile);
+    }
+    WriteBinary(Node->ParentBlocks, sizeof(int), Node->ParentCount, ModelFile);
+    WriteBinary(&Node->TableSize, sizeof(int), 1, ModelFile);
+    WriteBinary(Node->CountTable, sizeof(int), Node->TableSize, ModelFile);
+    WriteBinary(Node->ProbTable, sizeof(float), Node->TableSize, ModelFile);
+}
+
+// Load a PRMBayesianNode from a binary file.  Helper function for LoadPRMBayesianModel.
+PRMBayesianNode* LoadPRMBayesianNode(PRMBayesianModel* Model, FILE* ModelFile)
+{
+    PRMBayesianNode* Node;
+    int ParentIndex;
+    int ParentNodeIndex;
+    //
+    Node = (PRMBayesianNode*)calloc(1, sizeof(PRMBayesianNode));
+    ReadBinary(&Node->Name, sizeof(char), 256, ModelFile);
+    ReadBinary(&Node->Type, sizeof(int), 1, ModelFile);
+    ReadBinary(&Node->Flag, sizeof(int), 1, ModelFile);
+    ReadBinary(&Node->FragmentType, sizeof(int), 1, ModelFile);
+    ReadBinary(&Node->MassOffset, sizeof(int), 1, ModelFile);
+    ReadBinary(&Node->ValueCount, sizeof(int), 1, ModelFile);
+    ReadBinary(&Node->ParentCount, sizeof(int), 1, ModelFile);
+    if (Node->ParentCount < 0 || Node->ParentCount > 100)
+    {
+        REPORT_ERROR(0);
+        return NULL;
+    }
+    if (Node->ParentCount)
+    {
+        Node->Parents = (PRMBayesianNode**)calloc(Node->ParentCount, sizeof(PRMBayesianNode*));
+        for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+        {
+            ReadBinary(&ParentNodeIndex, sizeof(int), 1, ModelFile);
+            if (ParentNodeIndex < 0 || ParentNodeIndex >= Model->NodeCount)
+            {
+                REPORT_ERROR(0);
+                return NULL;
+            }
+            Node->Parents[ParentIndex] = Model->Nodes[ParentNodeIndex];
+        }
+        Node->ParentBlocks = (int*)calloc(Node->ParentCount, sizeof(int));
+        ReadBinary(Node->ParentBlocks, sizeof(int), Node->ParentCount, ModelFile);
+    }
+    ReadBinary(&Node->TableSize, sizeof(int), 1, ModelFile);
+    if (Node->TableSize <= 0 || Node->TableSize > 10000)
+    {
+        REPORT_ERROR(0);
+        return NULL;
+    }
+    Node->CountTable = (int*)calloc(Node->TableSize, sizeof(int));
+    ReadBinary(Node->CountTable, sizeof(int), Node->TableSize, ModelFile);
+    Node->ProbTable = (float*)calloc(Node->TableSize, sizeof(float));
+    ReadBinary(Node->ProbTable, sizeof(float), Node->TableSize, ModelFile);
+    return Node;
+}
+
+// Save a PRMBayesian model to a binary file.  In production, the model
+// is loaded (using LoadPRMBayesianModel) and then used.
+void SavePRMBayesianModel(PRMBayesianModel* Model, char* FileName)
+{
+    FILE* ModelFile;
+    PRMBayesianNode* Node;
+    //
+    if (!Model)
+    {
+        REPORT_ERROR(0);
+        return;
+    }
+    ModelFile = fopen(FileName, "wb");
+    if (!ModelFile)
+    {
+        REPORT_ERROR_S(8, FileName);
+        return;
+    }
+    WriteBinary(&Model->CutFlag, sizeof(int), 1, ModelFile);
+    WriteBinary(&Model->IntensityScheme, sizeof(int), 1, ModelFile);
+    WriteBinary(&Model->MinIntensityLevel, sizeof(int), 1, ModelFile);
+    WriteBinary(&Model->IntensityRadius, sizeof(int), 1, ModelFile);
+    fwrite(&Model->NoiseModel, sizeof(int), 1, ModelFile);
+    WriteBinary(Model->RandomIntensityCounts, sizeof(int), 10, ModelFile);
+    WriteBinary(Model->RandomIntensityScores, sizeof(float), 10, ModelFile);
+    WriteBinary(&Model->NodeCount, sizeof(int), 1, ModelFile);
+    for (Node = Model->Head; Node; Node = Node->Next)
+    {
+        SavePRMBayesianNode(Node, ModelFile);
+    }
+    fclose(ModelFile);
+}
+
+// Load a PRMBayesianModel from a binary file.
+PRMBayesianModel* LoadPRMBayesianModel(char* FileName)
+{
+    PRMBayesianModel* Model;
+    FILE* ModelFile;
+    int NodeIndex;
+    PRMBayesianNode* Node;
+
+    //
+    ModelFile = fopen(FileName, "rb");
+    if (!ModelFile)
+    {
+        REPORT_ERROR_S(8, FileName);
+        return NULL;
+    }
+    Model = (PRMBayesianModel*)calloc(1, sizeof(PRMBayesianModel));
+    ReadBinary(&Model->CutFlag, sizeof(int), 1, ModelFile);
+    ReadBinary(&Model->IntensityScheme, sizeof(int), 1, ModelFile);
+    ReadBinary(&Model->MinIntensityLevel, sizeof(int), 1, ModelFile);
+    ReadBinary(&Model->IntensityRadius, sizeof(int), 1, ModelFile);
+    ReadBinary(&Model->NoiseModel, sizeof(int), 1, ModelFile);
+    ReadBinary(Model->RandomIntensityCounts, sizeof(int), 10, ModelFile);
+    ReadBinary(Model->RandomIntensityScores, sizeof(float), 10, ModelFile);
+    ReadBinary(&Model->NodeCount, sizeof(int), 1, ModelFile);
+    Model->Nodes = (PRMBayesianNode**)calloc(Model->NodeCount, sizeof(PRMBayesianNode*));
+    for (NodeIndex = 0; NodeIndex < Model->NodeCount; NodeIndex++)
+    {
+        Node = LoadPRMBayesianNode(Model, ModelFile);
+        Node->Index = NodeIndex;
+        Model->Nodes[NodeIndex] = Node;
+        if (Model->Tail)
+        {
+            Model->Tail->Next = Node;
+        }
+        else
+        {
+            Model->Head = Node;
+        }
+        Model->Tail = Node;
+    }
+    BuildModelFlankList(Model);
+    fclose(ModelFile);
+    return Model;
+}
+
+// Translate the CountTables for this model's nodes into probability tables.
+// We use a "buffer" count for each node to pad out the probabilities; if our training
+// set was small, it may have left ZERO entries in some nodes, and we don't want 
+// probabilities of zero (since then we can't take their natural logarithm).
+void ComputePRMBayesianModelProbabilityTables(PRMBayesianModel* Model, int PaddingCount)
+{
+    PRMBayesianNode* Node;
+    int TotalEntries;
+    int TableIndex;
+    float Probability;
+    int Count;
+    int BlockStartIndex;
+    int IntensityLevel;
+    //
+
+    // Set global noise probabilities:
+    Count = 0;
+    for (IntensityLevel = 0; IntensityLevel <= Model->MinIntensityLevel; IntensityLevel++)
+    {
+        Count += (1 + Model->RandomIntensityCounts[IntensityLevel]);
+    }
+    for (IntensityLevel = 0; IntensityLevel <= Model->MinIntensityLevel; IntensityLevel++)
+    {
+        Probability = (1 + Model->RandomIntensityCounts[IntensityLevel]) / (float)Count;
+        Model->RandomIntensityScores[IntensityLevel] = (float)log(Probability);
+    }
+
+    // Set probabilities for each node:
+    for (Node = Model->Head; Node; Node = Node->Next)
+    {
+        // Compute the probability that this node will have a value,
+        // GIVEN the values of any parent nodes:
+        for (BlockStartIndex = 0; BlockStartIndex < Node->TableSize; BlockStartIndex += Node->ValueCount)
+        {
+            TotalEntries = 0;
+            for (TableIndex = BlockStartIndex; TableIndex < BlockStartIndex + Node->ValueCount; TableIndex++)
+            {
+                TotalEntries += Node->CountTable[TableIndex] + PaddingCount;
+            }
+            for (TableIndex = BlockStartIndex; TableIndex < BlockStartIndex + Node->ValueCount; TableIndex++)
+            {
+                if (TableIndex >= Node->TableSize)
+                {
+                    REPORT_ERROR(0);
+                }
+                Count = Node->CountTable[TableIndex] + PaddingCount;
+                Probability = Count / (float)TotalEntries;
+                Node->ProbTable[TableIndex] = (float)log(Probability);
+            }
+        }
+    }
+}
+
+// PRMBNGetCutScore returns the score for a cut-point.  
+// It's called AFTER setting the Values array for each node, with calls to IonScoringGetNodeValue
+float PRMBNGetCutScore(MSSpectrum* Spectrum, PRMBayesianModel* Model, int AminoIndex)
+{
+    float Score = 0;
+    float NodeScore = 0;
+    int TableIndex;
+    int ParentIndex;
+    PRMBayesianNode* Node;
+    int VerboseFlag = 0;
+    //
+    for (Node = Model->Head; Node; Node = Node->Next)
+    {
+        switch (Node->Type)
+        {
+        case evPRMBPrefix:
+        case evPRMBPrefix2:
+        case evPRMBSuffix:
+        case evPRMBSuffix2:
+            TableIndex = Node->Values[AminoIndex];
+            for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+            {
+                TableIndex += Node->Parents[ParentIndex]->Values[AminoIndex] * Node->ParentBlocks[ParentIndex];
+            }
+            if (TableIndex >= Node->TableSize)
+            {
+                REPORT_ERROR(0);
+            }
+            NodeScore = Node->ProbTable[TableIndex];
+            if (Model->NoiseModel)
+            {
+                // GLOBAL noise model, based on all spectra
+                NodeScore -= Model->RandomIntensityScores[Node->Values[AminoIndex]];
+            }
+            else
+            {
+                // SPECTRUM noise model:
+                NodeScore -= Spectrum->IonScoringNoiseProbabilities[Node->Values[AminoIndex]];
+            }
+            if (VerboseFlag)
+            {
+                printf("  AA %d: Node %d (%s) contributes %.3f - %.3f = %.5f\n", AminoIndex, Node->Index, Node->Name, 
+                    Node->ProbTable[TableIndex], Spectrum->IonScoringNoiseProbabilities[Node->Values[AminoIndex]], NodeScore);
+            }
+            Score += NodeScore;
+            break;
+        default:
+            // Other node-types don't contribute to the score.
+            break;
+        }
+    }
+    return Score;
+}
+
+// Compute the score for a PRM, using a bayesian network.  Sum the log-probabilities over
+// all ion fragment nodes.  
+float GetIonPRMFeatures(MSSpectrum* Spectrum, SpectrumTweak* Tweak, PRMBayesianModel* Model, int PRM, int VerboseFlag)
+{
+    PRMBayesianNode* Node;
+    int ParentIndex;
+    int TableIndex;
+    float Score = 0;
+    float NodeScore;
+    //
+    // Compute each node value:
+    for (Node = Model->Head; Node; Node = Node->Next)
+    {
+        Node->Value = IonScoringGetNodeValue(Model, Node, Spectrum, PRM, NULL, -1);
+        if (VerboseFlag)
+        {
+            printf("Score(%.2f): Node %d (%s) has value %d\n", PRM / (float)DALTON, Node->Index, Node->Name, Node->Value);
+        }
+    }
+    // Compute a SCORE for this collection of values:
+    for (Node = Model->Head; Node; Node = Node->Next)
+    {
+        switch (Node->Type)
+        {
+        case evPRMBPrefix:
+        case evPRMBPrefix2:
+        case evPRMBSuffix:
+        case evPRMBSuffix2:
+            TableIndex = Node->Value;
+            for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+            {
+                TableIndex += Node->Parents[ParentIndex]->Value * Node->ParentBlocks[ParentIndex];
+            }
+            if (TableIndex >= Node->TableSize)
+            {
+                REPORT_ERROR(0);
+            }
+            NodeScore = Node->ProbTable[TableIndex];
+            if (Model->NoiseModel)
+            {
+                // GLOBAL noise model, based on all spectra
+                NodeScore -= Model->RandomIntensityScores[Node->Value];
+            }
+            else
+            {
+                // SPECTRUM noise model:
+                NodeScore -= Spectrum->IonScoringNoiseProbabilities[Node->Value];
+            }
+            if (VerboseFlag)
+            {
+                printf("  Node %d (%s) contributes %.3f - %.3f = %.5f\n", Node->Index, Node->Name, 
+                    Node->ProbTable[TableIndex], Spectrum->IonScoringNoiseProbabilities[Node->Value], NodeScore);
+            }
+            Score += NodeScore;
+            break;
+        default:
+            // Other node-types don't contribute to the score.
+            break;
+        }
+    }
+    return Score;
+}
+
+// Iterate over all the nodes in our TagGraph, and assign a score to each.
+void TagGraphScorePRMNodes(PRMBayesianModel* Model, TagGraph* Graph, MSSpectrum* Spectrum, SpectrumTweak* Tweak)
+{
+    TagGraphNode* Node;
+
+    if (!Model)
+    {
+        if (Tweak->Charge < 3)
+        {
+            Model = TAGModelCharge2;
+        }
+        else
+        {
+            Model = TAGModelCharge3;
+        }
+    }
+
+    for (Node = Graph->FirstNode; Node; Node = Node->Next)
+    {
+        if (Node->NodeType != evGraphNodeB && Node->NodeType != evGraphNodeY)
+        {
+            Node->Score = 0;
+            continue;
+        }
+        Node->Score = GetIonPRMFeatures(Spectrum, Tweak, Model, Node->Mass, 0);
+        continue; 
+    }
+}
+
+void FreeBayesianModels()
+{
+    FreePRMBayesianModel(PRMModelCharge2);
+    PRMModelCharge2 = NULL;
+    FreePRMBayesianModel(PRMModelCharge3);
+    PRMModelCharge3 = NULL;
+    FreePRMBayesianModel(TAGModelCharge2);
+    TAGModelCharge2 = NULL;
+    FreePRMBayesianModel(TAGModelCharge3);
+    TAGModelCharge3 = NULL;
+    FreePRMBayesianModel(PhosCutModelCharge2);
+    PhosCutModelCharge2 = NULL;
+    FreePRMBayesianModel(PhosCutModelCharge3);
+    PhosCutModelCharge3 = NULL;
+}
+
+// Load PRMBayesianModel objects for scoring PRMs and for scoring tags.
+void InitBayesianModels()
+{
+    char FilePath[2048];
+    // Return immediately, if models are loaded already:
+    if (PRMModelCharge2)
+    {
+        return;
+    }
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PRM2.bn");
+    PRMModelCharge2 = LoadPRMBayesianModel(FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PRM3.bn");
+    PRMModelCharge3 = LoadPRMBayesianModel(FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "TAG2.bn");
+    TAGModelCharge2 = LoadPRMBayesianModel(FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "TAG3.bn");
+    TAGModelCharge3 = LoadPRMBayesianModel(FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PhosCut2.bn");
+    PhosCutModelCharge2 = LoadPRMBayesianModel(FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PhosCut3.bn");
+    PhosCutModelCharge3 = LoadPRMBayesianModel(FilePath);
+}
+
+// Replace a PRMScoring model with one specified in the input file (the "PRMModel" option).
+// Useful for handling new instrument types, etc.
+int ReplacePRMScoringModel(int Charge, char* FileName)
+{
+    PRMBayesianModel* Model;
+    //
+    Model = LoadPRMBayesianModel(FileName);
+    if (!Model)
+    {
+        return 0;
+    }
+    if (Charge == 2)
+    {
+        FreePRMBayesianModel(PRMModelCharge2);
+        PRMModelCharge2 = Model;
+    }
+    else if (Charge == 3)
+    {
+        FreePRMBayesianModel(PRMModelCharge3);
+        PRMModelCharge3 = Model;
+    }
+    else
+    {
+        REPORT_ERROR(0);
+    }
+
+    return 1;
+}
+
+// Replace a tag scoring model with one specified in the input file (the "TAGModel" option).
+// Useful for handling new instrument types, etc.
+int ReplaceTAGScoringModel(int Charge, char* FileName)
+{
+    PRMBayesianModel* Model;
+    //
+    Model = LoadPRMBayesianModel(FileName);
+    if (!Model)
+    {
+        return 0;
+    }
+    if (Charge == 2)
+    {
+        FreePRMBayesianModel(TAGModelCharge2);
+        TAGModelCharge2 = Model;
+    }
+    else if (Charge == 3)
+    {
+        FreePRMBayesianModel(TAGModelCharge3);
+        TAGModelCharge3 = Model;
+    }
+    else
+    {
+        REPORT_ERROR(0);
+    }
+    return 1;
+}
+
+// Set the array Tweak->PRMScores.  This is used in unrestrictive ("blind") searches.
+void SetSpectrumPRMScores(MSSpectrum* Spectrum, SpectrumTweak* Tweak)
+{
+    PRMBayesianModel* Model;
+    int PRM;
+    float fScore;
+    //
+    // Ensure models are loaded:
+    if (!PRMModelCharge2)
+    {
+        InitBayesianModels();
+    }
+    Tweak->PRMScoreMax = Tweak->ParentMass;
+    if (Spectrum->Graph)
+    {
+        Tweak->PRMScoreMax = max(Tweak->PRMScoreMax, Spectrum->Graph->LastNode->Mass);
+    }
+    Tweak->PRMScoreMax = PRM_ARRAY_SLACK + (Tweak->PRMScoreMax / PRM_BIN_SIZE);
+    SafeFree(Tweak->PRMScores);
+    Tweak->PRMScores = (int*)calloc(Tweak->PRMScoreMax + 5, sizeof(int)); // extra slack in alloc
+    if (Tweak->Charge > 2)
+    {
+        Model = PRMModelCharge3;
+    }
+    else
+    {
+        Model = PRMModelCharge2;
+    }
+    for (PRM = 0; PRM < Tweak->PRMScoreMax; PRM++)
+    {
+        fScore = GetIonPRMFeatures(Spectrum, Tweak, Model, PRM * PRM_BIN_SIZE, 0);
+        //GetPRMFeatures(Spectrum, Tweak, Model, PRM * PRM_BIN_SIZE, 0);
+        Tweak->PRMScores[PRM] = (int)(fScore * 1000);
+    }
+    //DebugPrintPRMScores(Spectrum, Tweak);
+}
+
+int CountTrypticTermini(Peptide* Match)
+{
+    int NTT = 0;
+    int PeptideLength = strlen(Match->Bases);
+    switch (GlobalOptions->DigestType)
+    {
+    case DIGEST_TYPE_TRYPSIN:
+        /////////////////////////////////
+        // Number of tryptic termini:
+        NTT = 0;
+        if (Match->PrefixAmino == '\0' || Match->PrefixAmino == '-' || Match->PrefixAmino == '*')
+        {
+            NTT++;
+        }
+        else if ((Match->PrefixAmino == 'K' || Match->PrefixAmino == 'R') && Match->Bases[0] != 'P')
+        {
+            NTT++;
+        }
+        if (Match->SuffixAmino == '\0' || Match->SuffixAmino == '-' || Match->SuffixAmino == '*')
+        {
+            NTT++;
+        }
+        else if ((Match->Bases[PeptideLength - 1] == 'K' || Match->Bases[PeptideLength - 1] == 'R') && Match->SuffixAmino != 'P')
+        {
+            NTT++;
+        }
+        break;
+    case DIGEST_TYPE_UNKNOWN:
+    default:
+        NTT = 2;
+        break;
+    }
+    return NTT;
+}
+
+void PopulateCutScores(PRMBayesianModel* Model, MSSpectrum* Spectrum, Peptide* Match, float* CutScores)
+{
+    int PRM = 0;  
+    int NodeIndex;
+    PRMBayesianNode* Node;
+    int AminoIndex;
+    int ModIndex;
+    int PeptideLength = strlen(Match->Bases);
+    int PeakIndex;
+
+    // Reset all peak annotations:
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        Spectrum->Peaks[PeakIndex].IonType = evFragmentTypeNone;
+    }
+    if (Match->SpecialFragmentation)
+    { // phosphorylated spectra
+        AnnotateParentPeaks(Spectrum, Match, Model);
+    }
+
+    for (NodeIndex = 0, Node = Model->Head; Node; NodeIndex++, Node = Node->Next)
+    {
+        PRM = 0;
+        for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+        {
+            ///////////////////////////////////////////////////////////////////////////////////////
+            // Set values, and accumulate table entries:
+            Node->Values[AminoIndex] = IonScoringGetNodeValue(Model, Node, Spectrum, PRM, Match, AminoIndex);
+            ///////////////////////////////////////////////////////////////////////////////////////
+            // Add to PRM:
+            if (AminoIndex == PeptideLength)
+            {
+                break;
+            }
+            PRM += PeptideMass[Match->Bases[AminoIndex]];
+            for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+            {
+                if (Match->AminoIndex[ModIndex] == AminoIndex)
+                {
+                    PRM += Match->ModType[ModIndex]->RealDelta;
+                }
+            }
+        } // Amino loop
+    } // NodeIndex loop
+
+    // Populate the CutScores array:
+    for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+    {
+        CutScores[AminoIndex] = PRMBNGetCutScore(Spectrum, Model, AminoIndex);
+    }
+}
+
+// Compute MQScore features, in preparation for MQScore calculation
+int ComputeMQScoreFeatures(MSSpectrum* Spectrum, Peptide* Match, float* MQFeatures, int VerboseFlag)
+{
+    int FeatureIndex = 0;
+    PRMBayesianModel* Model;
+    int PeptideLength;
+    float CutScores[256];
+    int PRM = 0;  
+    int AminoIndex;
+    int PRMCount;
+    float ScoreTotal;
+    int YFlag[256];
+    int BFlag[256];
+    int PeakIndex;
+    int PresentCount;
+    int FragmentType;
+    float PeakIntensity;
+    float TotalIntensity;
+    float IntensityY = 0;
+    float IntensityYSeries = 0;
+    float IntensityB = 0;
+    float IntensityBSeries = 0;
+    //
+    Spectrum->ParentMass = Match->ParentMass;
+    Model = GetScoringModel(Match, Spectrum->Charge);
+    PeptideLength = strlen(Match->Bases);
+    // If the peptide is very short (length 5 or less), wey may not even want to bother
+    // computing features.  Peptides that short are not informative!
+
+    MQFeatures[FeatureIndex++] = (float)PeptideLength; // #2
+    
+    ///////////////////////////////////////
+    // Cut score features (5, 11):
+    PopulateCutScores(Model, Spectrum, Match, CutScores);
+
+    // Total/mean for CENTRAL cut scores:
+    ScoreTotal = 0;
+    PRMCount = 0;
+    for (AminoIndex = 1; AminoIndex < PeptideLength; AminoIndex++)
+    {
+        ScoreTotal += CutScores[AminoIndex];
+        PRMCount++;
+        if (VerboseFlag)
+        {
+            printf("  Cut score %d: %.2f\n", AminoIndex, CutScores[AminoIndex]);
+        }
+    }
+    MQFeatures[FeatureIndex++] = ScoreTotal; // #5
+
+    // Median cut score: 
+    PRMCount = PeptideLength + 1;
+    MQFeatures[FeatureIndex++] = GetMedian(CutScores, PRMCount); // #11
+
+    // Count b and y peak presence:
+    memset(BFlag, 0, sizeof(int) * (PeptideLength + 1));
+    memset(YFlag, 0, sizeof(int) * (PeptideLength + 1));
+    TotalIntensity = 0;
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        FragmentType = Spectrum->Peaks[PeakIndex].IonType;
+        PeakIntensity = Spectrum->Peaks[PeakIndex].Intensity;
+        if (FragmentType == evParentLoss)
+        { 
+            // I don't want the parent loss peaks to count against
+            // phosphorylation, because it is typically very strong.
+            PeakIntensity = 0;
+        }
+        TotalIntensity += PeakIntensity;
+        switch (FragmentType)
+        {
+            case evFragmentY:
+                IntensityY += PeakIntensity;
+                IntensityYSeries += PeakIntensity;
+                YFlag[Spectrum->Peaks[PeakIndex].AminoIndex] = 1;
+                break;
+            case evFragmentYLoss:
+                IntensityYSeries += PeakIntensity;
+                break;
+            case evFragmentB:
+                IntensityB += PeakIntensity;
+                IntensityBSeries += PeakIntensity;
+                BFlag[Spectrum->Peaks[PeakIndex].AminoIndex] = 1;
+                break;
+            case evFragmentBLoss:
+                IntensityBSeries += PeakIntensity;
+                break;
+        }
+    }
+    // Fraction of B, Y present:
+    PresentCount = 0;
+    for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+    {
+        PresentCount += YFlag[AminoIndex];
+    }
+    MQFeatures[FeatureIndex++] = PresentCount / (float)(PeptideLength + 1); // #12
+    PresentCount = 0;
+    for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+    {
+        PresentCount += BFlag[AminoIndex];
+    }
+    MQFeatures[FeatureIndex++] = PresentCount / (float)(PeptideLength + 1); // #13
+    
+    // Fraction of total intensity in B and Y series:
+    MQFeatures[FeatureIndex++] = (IntensityY + IntensityB) / TotalIntensity; // #25
+
+    MQFeatures[FeatureIndex++] = (float)CountTrypticTermini(Match); // #30
+    return FeatureIndex;
+}
+
+// This is currently only called for Phosphorylated spectra, for that reason we
+// claim the peaks corresponding to Parent-phosphate, and parent-phosphate-water
+// this bears resemblance to IonScoringGetPeakIntensity, when we claim the peak
+// as this does not correspond to an AminoIndex, yet requires peak claiming.
+// we have to rewrite it here.
+void AnnotateParentPeaks(MSSpectrum* Spectrum, Peptide* Match, PRMBayesianModel* Model)
+{
+    int Loss;
+    //the loss of phosphate from the spectrum is actually 98 daltons.(80 + 18)
+    int PMMinusPhosphate;
+    //actually phosphate and twoWaters (80+18+18)
+    int PMMinusPhosphateAndWater;
+    //
+    //set mz according to the current parent mass of the spectrum
+    Spectrum->MZ = (Spectrum->ParentMass + (Spectrum->Charge - 1) * HYDROGEN_MASS) / Spectrum->Charge;
+    Loss = PHOSPHATE_WATER_MASS / Spectrum->Charge;
+    PMMinusPhosphate = Spectrum->MZ - Loss;
+    Loss = (PHOSPHATE_WATER_MASS + WATER_MASS)/Spectrum->Charge;
+    PMMinusPhosphateAndWater = Spectrum->MZ - Loss;
+    ClaimParentPeak(Spectrum, Match, PMMinusPhosphate, Model);
+    ClaimParentPeak(Spectrum, Match, PMMinusPhosphateAndWater, Model);
+    // Now look for +1 isotopes
+    Loss = PHOSPHATE_WATER_MASS / Spectrum->Charge;
+    PMMinusPhosphate = Spectrum->MZ - Loss + (HYDROGEN_MASS/Spectrum->Charge);
+    Loss = (PHOSPHATE_WATER_MASS + WATER_MASS) / Spectrum->Charge;
+    PMMinusPhosphateAndWater = Spectrum->MZ - Loss + (HYDROGEN_MASS / Spectrum->Charge);
+    ClaimParentPeak(Spectrum, Match, PMMinusPhosphate, Model);
+    ClaimParentPeak(Spectrum, Match, PMMinusPhosphateAndWater, Model);
+}
+
+void ClaimParentPeak(MSSpectrum* Spectrum, Peptide* Match, int Mass, PRMBayesianModel* Model)
+{
+    int Bin;
+    int MinMass;
+    int MaxMass;
+    float Intensity = 0;
+    int PeakIndex;
+    int Skew;
+    float Multiplier;
+    //
+    Bin = (Mass + 50) / 100; // Bin width 0.1Da
+    MinMass = Mass - Model->IntensityRadius;
+    MaxMass = Mass + Model->IntensityRadius;
+    
+    // If the mass is off the scale, then you get no peaks:
+    if (Bin >= Spectrum->IntensityBinCount || Bin < 0)
+    {
+        return;
+    }
+
+    PeakIndex = Spectrum->BinPeakIndex[Bin];
+    if (PeakIndex >= 0)
+    {
+        for ( ; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+        {
+            if (Spectrum->Peaks[PeakIndex].Mass > MaxMass)
+            {
+                break;
+            }
+            if (Spectrum->Peaks[PeakIndex].Mass < MinMass)
+            {
+                continue;
+            }
+
+            Multiplier = 1.0; // default
+            Skew = abs(Mass - Spectrum->Peaks[PeakIndex].Mass);
+            if (Model->IntensityScheme == 1 || Model->IntensityScheme == 3)
+            {
+                if (Skew >= Model->HalfIntensityRadius)
+                {
+                    Multiplier = 0.5;
+                }
+            }
+            if (Spectrum->Peaks[PeakIndex].IonType)
+            {
+                // This peak has already been CLAIMED by another ion type:
+                continue;
+            }
+            Intensity += Spectrum->Peaks[PeakIndex].Intensity * Multiplier;
+            // CLAIM this spectrum:
+            Spectrum->Peaks[PeakIndex].IonType = evParentLoss;
+            Spectrum->Peaks[PeakIndex].AminoIndex = -1; //not an amino index.  is this a problem?
+        }
+    }
+}
+
+PRMBayesianModel* GetScoringModel(Peptide* Match, int Charge)
+{
+    int ModIndex;
+    //
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!Match->ModType[ModIndex])
+        {
+            break;
+        }
+        if (Match->ModType[ModIndex]->Flags & DELTA_FLAG_PHOSPHORYLATION)
+        {
+            Match->SpecialFragmentation = FRAGMENTATION_PHOSPHO;
+            Match->SpecialModPosition = Match->AminoIndex[ModIndex];
+            break;
+        }
+    }
+    if (Match->SpecialFragmentation)
+    {
+        if (Charge > 2)
+        {
+            return PhosCutModelCharge3;
+        }
+        return PhosCutModelCharge2;
+    }
+    if (Charge > 2)
+    {
+        return TAGModelCharge3;
+    }
+    return TAGModelCharge2; //default
+}
+
+char* GetFragmentTypeName(int FragmentType)
+{
+    switch (FragmentType)
+    {
+    case evFragmentY:
+        return "Y";
+    case evFragmentB:
+        return "B";
+    case evFragmentYLoss:
+        return "Y loss";
+    case evFragmentBLoss:
+        return "B loss";
+    case evParentLoss:
+        return "Parent loss";
+    case evFragmentTypeNone:
+    default:
+        return "";
+    }
+}
+
diff --git a/IonScoring.h b/IonScoring.h
new file mode 100644
index 0000000..0e492c2
--- /dev/null
+++ b/IonScoring.h
@@ -0,0 +1,195 @@
+//Title:          IonScoring.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef ION_SCORING_H
+#define ION_SCORING_H
+
+
+
+// New code to support scoring of PRMs, and of cuts.  
+// Key ideas:
+// - Feature meta-data is read from the bayesian network.  New network topologies,
+// and even new ion types, can be used without the need to recompile code.
+// - Ion types and network topologies will be learned in an objective, repeatable way.
+#include "Spectrum.h"
+#include "Tagger.h"
+#include "Trie.h"
+
+#define UNKNOWN_AMINO 'Z'
+
+typedef enum FragmentTypes
+{
+    evFragmentTypeNone = 0,
+    evFragmentY,
+    evFragmentB,
+    evFragmentYLoss,
+    evFragmentBLoss,
+    evParentLoss //used for parent-phosphate.  not really a fragment, but used to claim the peak.
+} FragmentTypes;
+
+typedef enum PRMBayesianNodeType
+{
+    evPRMBInvalid = 0,
+    evPRMBPrefix,
+    evPRMBSuffix,
+    evPRMBPrefix2,
+    evPRMBSuffix2,
+    evSector,
+    evFlank,
+    evPrefixAA,
+    evSuffixAA,
+    evPrefixContain,
+    evSuffixContain,
+    evPrefixContainPhos,
+    evSuffixContainPhos
+} PRMBayesianNodeType;
+
+// A node in a Bayesian Network for scoring PRMs or cuts:
+typedef struct PRMBayesianNode
+{
+    // Index of the node in the bayesian network (0, 1, etc).  By convention, 
+    // parents will always have lower indices than their children.
+    int Index;
+    PRMBayesianNodeType Type;
+    // For ion type nodes:
+    int MassOffset;
+    int FragmentType; // from the FragmentTypes enum
+    // The Flag on a BayesianNode of special type is
+    // used to control how the node's values are computed.
+    // Examples: For type evFlank, the flag tells us whether
+    // we're looking for flanking aminos that affect b fragments,
+    // or y fragments.
+    int Flag;
+    struct PRMBayesianNode* Next;
+    // ValueCount is the number of distinct values this node can take on (2 or more).
+    // ValueCount is determined by our Type and Flag, and possibly by the network's
+    // intensity scheme.
+    int ValueCount;
+    int ParentCount;
+    struct PRMBayesianNode** Parents;
+    // ParentBlock[n] is the multiplier for parent n's value when indexing into 
+    // the CountTable/ProbTable arrays.  For instance, if we have 4 possible values
+    // and one parent, then ParentBlock[0] will be 4, and the index of a table entry
+    // is ParentValue*4 + ChildValue.
+    int* ParentBlocks;
+    // Size of CountTable and ProbTable:
+    int TableSize; 
+    // Table counting the number of occurrences of a given value combination:
+    int* CountTable;
+    // Table giving natural logarithm of the probability of a given value combination:
+    float* ProbTable;
+    // Value is transiently set while scoring a PRM or cut point:
+    int Value; 
+    // An array of values for cut points:
+    int Values[256]; 
+    // Human-readable name of the node, mostly for debugging:
+    char Name[256 + 1];
+    // Flag to indicate whether this node, or an immediate parent, requires knowledge of 
+    // flanking amino acids.  If this flag is set, then during tagging, we will delay
+    // full scoring of this node until tag construction.
+    int FlankFlag;
+} PRMBayesianNode;
+
+typedef struct PRMBayesianNodeHolder
+{
+    PRMBayesianNode* Node;
+    struct PRMBayesianNodeHolder* Next;
+} PRMBayesianNodeHolder;
+
+typedef struct PRMBayesianModel
+{
+    PRMBayesianNode* Head;
+    PRMBayesianNode* Tail;
+    // Array of the nodes, for quickly looking them up by index:
+    PRMBayesianNode** Nodes;
+    int NodeCount;
+    // Scheme for assigning intensity-levels to ion nodes:
+    int IntensityScheme;
+    // 0 is spectrum-specific, 1 is global
+    int NoiseModel;
+    // Radius (in daltons) of the window over which to sum intensities when
+    // finding peaks:
+    int IntensityRadius;
+    int HalfIntensityRadius;
+    // CutFlag is true if this model is used to score cut points.  A few operations
+    // differ; in particular, we seize intensities for b and y peaks first, THEN consider
+    // neutral losses.
+    int CutFlag;
+    // Intensity levels are sorted from highest (0) to lowest (MinIntensityLevel).
+    int MinIntensityLevel;
+    // RandomIntensityCounts and RandomIntensityScores track how often a *random* mass 
+    // has a particular intensity level.  We'll try using a spectrum-specific noise 
+    // model as well as this "global" noise model.
+    int RandomIntensityCounts[10];
+    float RandomIntensityScores[10];
+    // Linked list of nodes which require flanking amino acid information (either directly,
+    // or via parents):
+    PRMBayesianNodeHolder* FirstFlank;
+    PRMBayesianNodeHolder* LastFlank;
+} PRMBayesianModel;
+
+void AddPRMBayesianNode(PRMBayesianModel* Model, char* Name, int NodeType, int NodeFlag, float NodeMassOffset, int FragmentType);
+void FreePRMBayesianModel(PRMBayesianModel* Model);
+void FreePRMBayesianNode(PRMBayesianNode* Node);
+void PrepareSpectrumForIonScoring(PRMBayesianModel* Model, MSSpectrum* Spectrum, int ForceRefresh);
+int IonScoringGetNodeValue(PRMBayesianModel* Model, PRMBayesianNode* Node, MSSpectrum* Spectrum, int PRM,
+    Peptide* Match, int AminoIndex);
+void ComputePRMBayesianModelProbabilityTables(PRMBayesianModel* Model, int PaddingCount);
+void SavePRMBayesianModel(PRMBayesianModel* Model, char* FileName);
+PRMBayesianModel* LoadPRMBayesianModel(char* FileName);
+void DebugPrintPRMBayesianModel(PRMBayesianModel* Model);
+void TagGraphScorePRMNodes(PRMBayesianModel* Model, struct TagGraph* Graph, MSSpectrum* Spectrum, SpectrumTweak* Tweak);
+float GetIonPRMFeatures(MSSpectrum* Spectrum, SpectrumTweak* Tweak, PRMBayesianModel* Model, int PRM, int VerboseFlag);
+void BuildModelFlankList(PRMBayesianModel* Model);
+void LoadFlankingAminoEffects();
+int IonScoringGetFlank(PRMBayesianNode* Node, char Left, char Right);
+float PRMBNGetCutScore(MSSpectrum* Spectrum, PRMBayesianModel* Model, int AminoIndex);
+void InitBayesianModels();
+int ReplacePRMScoringModel(int Charge, char* FileName);
+int ReplaceTAGScoringModel(int Charge, char* FileName);
+void SetSpectrumPRMScores(MSSpectrum* Spectrum, SpectrumTweak* Tweak);
+void PopulateCutScores(PRMBayesianModel* Model, MSSpectrum* Spectrum, Peptide* Match, float* CutScores);
+int CountTrypticTermini(Peptide* Match);
+int ComputeMQScoreFeatures(MSSpectrum* Spectrum, Peptide* Match, float* MQFeatures, int VerboseFlag);
+char* GetFragmentTypeName(int FragmentType);
+void FreeBayesianModels();
+
+extern PRMBayesianModel* PRMModelCharge2;
+extern PRMBayesianModel* PRMModelCharge3;
+extern PRMBayesianModel* TAGModelCharge2;
+extern PRMBayesianModel* TAGModelCharge3;
+extern PRMBayesianModel* PhosCutModelCharge2;
+extern PRMBayesianModel* PhosCutModelCharge3;
+
+#endif // ION_SCORING_H
+
+
diff --git a/IsotopePatterns.txt b/IsotopePatterns.txt
new file mode 100644
index 0000000..95489fe
--- /dev/null
+++ b/IsotopePatterns.txt
@@ -0,0 +1,1750 @@
+0	0.0	
+1	0.0	
+2	0.0	
+3	0.0	
+4	0.0	
+5	0.0	
+6	0.0	
+7	0.0	
+8	0.0	
+9	0.0	
+10	0.0	
+11	0.0	
+12	0.0	
+13	0.0	
+14	0.0	
+15	0.0	
+16	0.0	
+17	0.0	
+18	0.0	
+19	0.0	
+20	0.0	
+21	0.0	
+22	0.0	
+23	0.0	
+24	0.0	
+25	0.0	
+26	0.0	
+27	0.0	
+28	0.0	
+29	0.0	
+30	0.0	
+31	0.0	
+32	0.0	
+33	0.0	
+34	0.0223878333148	
+35	0.0223878333148	
+36	0.0223878333148	
+37	0.0223878333148	
+38	0.0226878783215	
+39	0.0226878783215	
+40	0.0226878783215	
+41	0.0226878783215	
+42	0.0226878783215	
+43	0.0226878783215	
+44	0.0226878783215	
+45	0.0226878783215	
+46	0.0226878783215	
+47	0.0226878783215	
+48	0.0226878783215	
+49	0.0226878783215	
+50	0.0226878783215	
+51	0.0226878783215	
+52	0.0226878783215	
+53	0.0228379008249	
+54	0.0228379008249	
+55	0.0228379008249	
+56	0.0340318174823	
+57	0.0340318174823	
+58	0.0340318174823	
+59	0.0340318174823	
+60	0.0340318174823	
+61	0.0340318174823	
+62	0.0340318174823	
+63	0.0340318174823	
+64	0.0340318174823	
+65	0.0340318174823	
+66	0.0340318174823	
+67	0.0340318174823	
+68	0.0341818399857	
+69	0.0341818399857	
+70	0.0341818399857	
+71	0.0341818399857	
+72	0.0341818399857	
+73	0.0341818399857	
+74	0.0341818399857	
+75	0.0341818399857	
+76	0.0341818399857	
+77	0.0341818399857	
+78	0.0453757566431	
+79	0.0453757566431	
+80	0.0453757566431	
+81	0.0453757566431	
+82	0.0453757566431	
+83	0.0455257791465	
+84	0.0455257791465	
+85	0.0455257791465	
+86	0.0455257791465	
+87	0.0455257791465	
+88	0.0455257791465	
+89	0.0455257791465	
+90	0.0455257791465	
+91	0.0455257791465	
+92	0.0455257791465	
+93	0.0455257791465	
+94	0.0455257791465	
+95	0.0455257791465	
+96	0.0455257791465	
+97	0.0455257791465	
+98	0.0456758016498	
+99	0.0456758016498	
+100	0.0568697183072	
+101	0.0568697183072	
+102	0.0568697183072	
+103	0.0568697183072	
+104	0.0568697183072	
+105	0.0568697183072	
+106	0.0568697183072	
+107	0.0568697183072	
+108	0.0568697183072	
+109	0.0568697183072	
+110	0.0568697183072	
+111	0.0568697183072	
+112	0.0568697183072	
+113	0.0570197408106	
+114	0.0570197408106	
+115	0.0570197408106	
+116	0.0570197408106	
+117	0.0570197408106	
+118	0.0570197408106	
+119	0.0570197408106	
+120	0.0570197408106	
+121	0.0570197408106	
+122	0.0570197408106	
+123	0.068213657468	
+124	0.068213657468	
+125	0.068213657468	
+126	0.068213657468	
+127	0.068213657468	
+128	0.0683636799714	
+129	0.0683636799714	
+130	0.0683636799714	
+131	0.0683636799714	
+132	0.0683636799714	
+133	0.0683636799714	
+134	0.0683636799714	
+135	0.0683636799714	
+136	0.0683636799714	
+137	0.0683636799714	
+138	0.0683636799714	
+139	0.0683636799714	
+140	0.0683636799714	
+141	0.0683636799714	
+142	0.0683636799714	
+143	0.0685137024748	
+144	0.0685137024748	
+145	0.0797076191322	
+146	0.0797076191322	
+147	0.0797076191322	
+148	0.0797076191322	
+149	0.0797076191322	
+150	0.0797076191322	
+151	0.0797076191322	
+152	0.0797076191322	
+153	0.0797076191322	
+154	0.0797076191322	
+155	0.0797076191322	
+156	0.0797076191322	
+157	0.0797076191322	
+158	0.0798576416355	
+159	0.0798576416355	
+160	0.0798576416355	
+161	0.0798576416355	
+162	0.0798576416355	
+163	0.0798576416355	
+164	0.0798576416355	
+165	0.0798576416355	
+166	0.0798576416355	
+167	0.0984044920894	
+168	0.0984044920894	
+169	0.0984044920894	
+170	0.0984044920894	
+171	0.0984044920894	
+172	0.0984044920894	
+173	0.0985545145928	
+174	0.0985545145928	
+175	0.0985545145928	
+176	0.0985545145928	
+177	0.0985545145928	
+178	0.0985545145928	
+179	0.0985545145928	
+180	0.0985545145928	
+181	0.0985545145928	
+182	0.0985545145928	
+183	0.0985545145928	
+184	0.0985545145928	
+185	0.0985545145928	
+186	0.0985545145928	
+187	0.0985545145928	
+188	0.0987045370962	
+189	0.109898453754	
+190	0.109898453754	
+191	0.109898453754	
+192	0.109898453754	
+193	0.109898453754	
+194	0.109898453754	
+195	0.109898453754	
+196	0.109898453754	
+197	0.109898453754	
+198	0.109898453754	
+199	0.109898453754	
+200	0.109898453754	
+201	0.109898453754	
+202	0.109898453754	
+203	0.110048476257	
+204	0.110048476257	
+205	0.110048476257	
+206	0.110048476257	
+207	0.110048476257	
+208	0.110048476257	
+209	0.110048476257	
+210	0.110048476257	
+211	0.110048476257	
+212	0.121242392914	
+213	0.121242392914	
+214	0.121242392914	
+215	0.121242392914	
+216	0.121242392914	
+217	0.121242392914	
+218	0.121392415418	
+219	0.121392415418	
+220	0.121392415418	
+221	0.121392415418	
+222	0.121392415418	
+223	0.121392415418	
+224	0.121392415418	
+225	0.121392415418	
+226	0.121392415418	
+227	0.121392415418	
+228	0.121392415418	
+229	0.121392415418	
+230	0.121392415418	
+231	0.121392415418	
+232	0.121392415418	
+233	0.121542437921	
+234	0.132736354579	
+235	0.132736354579	
+236	0.132736354579	
+237	0.132736354579	
+238	0.132736354579	
+239	0.132736354579	
+240	0.132736354579	
+241	0.132736354579	
+242	0.132736354579	
+243	0.132736354579	
+244	0.132736354579	
+245	0.132736354579	
+246	0.132736354579	
+247	0.132736354579	
+248	0.132886377082	
+249	0.13656284398	
+250	0.13656284398	
+251	0.13656284398	
+252	0.13656284398	
+253	0.13656284398	
+254	0.13656284398	
+255	0.13656284398	
+256	0.147756760638	
+257	0.147756760638	
+258	0.147756760638	
+259	0.147756760638	
+260	0.147756760638	
+261	0.147756760638	
+262	0.147756760638	
+263	0.147906783141	
+264	0.147906783141	
+265	0.147906783141	
+266	0.147906783141	
+267	0.147906783141	
+268	0.147906783141	
+269	0.147906783141	
+270	0.147906783141	
+271	0.147906783141	
+272	0.147906783141	
+273	0.147906783141	
+274	0.147906783141	
+275	0.147906783141	
+276	0.147906783141	
+277	0.147906783141	
+278	0.159250722302	
+279	0.159250722302	
+280	0.159250722302	
+281	0.159250722302	
+282	0.159250722302	
+283	0.159250722302	
+284	0.159250722302	
+285	0.159250722302	
+286	0.159250722302	
+287	0.159250722302	
+288	0.159250722302	
+289	0.159250722302	
+290	0.159250722302	
+291	0.159250722302	
+292	0.159250722302	
+293	0.159400744805	
+294	0.159400744805	
+295	0.159400744805	
+296	0.159400744805	
+297	0.159400744805	
+298	0.159400744805	
+299	0.159400744805	
+300	0.170594661462	
+301	0.170594661462	
+302	0.170594661462	
+303	0.170594661462	
+304	0.170594661462	
+305	0.170594661462	
+306	0.170594661462	
+307	0.170594661462	
+308	0.170744683966	
+309	0.170744683966	
+310	0.170744683966	
+311	0.170744683966	
+312	0.170744683966	
+313	0.170744683966	
+314	0.170744683966	
+315	0.170744683966	
+316	0.170744683966	
+317	0.170744683966	
+318	0.170744683966	
+319	0.170744683966	
+320	0.170744683966	
+321	0.170744683966	
+322	0.170744683966	
+323	0.182088623127	
+324	0.182088623127	
+325	0.182088623127	
+326	0.182088623127	
+327	0.182088623127	
+328	0.182088623127	
+329	0.182088623127	
+330	0.182088623127	
+331	0.185765090025	
+332	0.185765090025	
+333	0.185765090025	
+334	0.185765090025	
+335	0.185765090025	
+336	0.185765090025	
+337	0.185765090025	
+338	0.185915112528	
+339	0.185915112528	
+340	0.185915112528	
+341	0.185915112528	
+342	0.185915112528	
+343	0.185915112528	
+344	0.185915112528	
+345	0.197109029186	
+346	0.197109029186	
+347	0.197109029186	
+348	0.197109029186	
+349	0.197109029186	
+350	0.197109029186	
+351	0.197109029186	
+352	0.197109029186	
+353	0.197259051689	
+354	0.197259051689	
+355	0.197259051689	
+356	0.197259051689	
+357	0.197259051689	
+358	0.197259051689	
+359	0.197259051689	
+360	0.197259051689	
+361	0.197259051689	
+362	0.197259051689	
+363	0.197259051689	
+364	0.197259051689	
+365	0.197259051689	
+366	0.197259051689	
+367	0.208452968346	
+368	0.20860299085	
+369	0.20860299085	
+370	0.20860299085	
+371	0.20860299085	
+372	0.20860299085	
+373	0.20860299085	
+374	0.20860299085	
+375	0.20860299085	
+376	0.20860299085	
+377	0.20860299085	
+378	0.20860299085	
+379	0.20860299085	
+380	0.20860299085	
+381	0.20860299085	
+382	0.20860299085	
+383	0.208753013353	
+384	0.208753013353	
+385	0.208753013353	
+386	0.208753013353	
+387	0.208753013353	
+388	0.208753013353	
+389	0.219946930011	
+390	0.219946930011	
+391	0.219946930011	
+392	0.219946930011	
+393	0.219946930011	
+394	0.219946930011	
+395	0.219946930011	
+396	0.219946930011	
+397	0.219946930011	
+398	0.220096952514	
+399	0.220096952514	
+400	0.220096952514	
+401	0.220096952514	
+402	0.220096952514	
+403	0.220096952514	
+404	0.220096952514	
+405	0.220096952514	
+406	0.220096952514	
+407	0.220096952514	
+408	0.220096952514	
+409	0.220096952514	
+410	0.220096952514	
+411	0.220096952514	
+412	0.231290869171	
+413	0.235117358573	
+414	0.235117358573	
+415	0.235117358573	
+416	0.235117358573	
+417	0.235117358573	
+418	0.235117358573	
+419	0.235117358573	
+420	0.235117358573	
+421	0.235117358573	
+422	0.235117358573	
+423	0.235117358573	
+424	0.235117358573	
+425	0.235117358573	
+426	0.235117358573	
+427	0.235117358573	
+428	0.235267381076	
+429	0.235267381076	
+430	0.235267381076	
+431	0.235267381076	
+432	0.235267381076	
+433	0.235267381076	
+434	0.246461297734	
+435	0.246461297734	
+436	0.246461297734	
+437	0.246461297734	
+438	0.246461297734	
+439	0.246461297734	
+440	0.246461297734	
+441	0.246461297734	
+442	0.246461297734	
+443	0.246611320237	
+444	0.246611320237	
+445	0.246611320237	
+446	0.246611320237	
+447	0.246611320237	
+448	0.246611320237	
+449	0.246611320237	
+450	0.246611320237	
+451	0.246611320237	
+452	0.246611320237	
+453	0.246611320237	
+454	0.246611320237	
+455	0.246611320237	
+456	0.257805236894	
+457	0.257805236894	
+458	0.257955259398	
+459	0.257955259398	
+460	0.257955259398	
+461	0.257955259398	
+462	0.257955259398	
+463	0.257955259398	
+464	0.257955259398	
+465	0.257955259398	
+466	0.257955259398	
+467	0.257955259398	
+468	0.257955259398	
+469	0.257955259398	
+470	0.257955259398	
+471	0.257955259398	
+472	0.257955259398	
+473	0.258105281901	
+474	0.258105281901	
+475	0.258105281901	
+476	0.258105281901	
+477	0.258105281901	
+478	0.269299198559	
+479	0.269299198559	
+480	0.269299198559	
+481	0.269299198559	
+482	0.269299198559	
+483	0.269299198559	
+484	0.269299198559	
+485	0.269299198559	
+486	0.269299198559	
+487	0.269299198559	
+488	0.269449221062	
+489	0.269449221062	
+490	0.269449221062	
+491	0.269449221062	
+492	0.269449221062	
+493	0.269449221062	
+494	0.269449221062	
+495	0.27312568796	
+496	0.27312568796	
+497	0.27312568796	
+498	0.27312568796	
+499	0.27312568796	
+500	0.284319604618	
+501	0.284319604618	
+502	0.284319604618	
+503	0.284319604618	
+504	0.284469627121	
+505	0.284469627121	
+506	0.284469627121	
+507	0.284469627121	
+508	0.284469627121	
+509	0.284469627121	
+510	0.284469627121	
+511	0.284469627121	
+512	0.284469627121	
+513	0.284469627121	
+514	0.284469627121	
+515	0.284469627121	
+516	0.284469627121	
+517	0.284469627121	
+518	0.284469627121	
+519	0.284619649624	
+520	0.284619649624	
+521	0.284619649624	
+522	0.284619649624	
+523	0.295813566282	
+524	0.295813566282	
+525	0.295813566282	
+526	0.295813566282	
+527	0.295813566282	
+528	0.295813566282	
+529	0.295813566282	
+530	0.295813566282	
+531	0.295813566282	
+532	0.295813566282	
+533	0.295813566282	
+534	0.295963588785	
+535	0.295963588785	
+536	0.295963588785	
+537	0.295963588785	
+538	0.295963588785	
+539	0.295963588785	
+540	0.295963588785	
+541	0.295963588785	
+542	0.295963588785	
+543	0.295963588785	
+544	0.295963588785	
+545	0.307157505443	
+546	0.307157505443	
+547	0.307157505443	
+548	0.307157505443	
+549	0.307307527946	
+550	0.307307527946	
+551	0.307307527946	
+552	0.307307527946	
+553	0.307307527946	
+554	0.307307527946	
+555	0.307307527946	
+556	0.307307527946	
+557	0.307307527946	
+558	0.307307527946	
+559	0.307307527946	
+560	0.307307527946	
+561	0.307307527946	
+562	0.307307527946	
+563	0.307307527946	
+564	0.307457550449	
+565	0.307457550449	
+566	0.307457550449	
+567	0.318651467107	
+568	0.318651467107	
+569	0.318651467107	
+570	0.318651467107	
+571	0.318651467107	
+572	0.318651467107	
+573	0.318651467107	
+574	0.318651467107	
+575	0.318651467107	
+576	0.318651467107	
+577	0.322327934005	
+578	0.322327934005	
+579	0.322477956508	
+580	0.322477956508	
+581	0.322477956508	
+582	0.322477956508	
+583	0.322477956508	
+584	0.322477956508	
+585	0.322477956508	
+586	0.322477956508	
+587	0.322477956508	
+588	0.322477956508	
+589	0.333671873166	
+590	0.333671873166	
+591	0.333671873166	
+592	0.333671873166	
+593	0.333671873166	
+594	0.333821895669	
+595	0.333821895669	
+596	0.333821895669	
+597	0.333821895669	
+598	0.333821895669	
+599	0.333821895669	
+600	0.333821895669	
+601	0.333821895669	
+602	0.333821895669	
+603	0.333821895669	
+604	0.333821895669	
+605	0.333821895669	
+606	0.333821895669	
+607	0.333821895669	
+608	0.333821895669	
+609	0.333971918172	
+610	0.333971918172	
+611	0.333971918172	
+612	0.34516583483	
+613	0.34516583483	
+614	0.34516583483	
+615	0.34516583483	
+616	0.34516583483	
+617	0.34516583483	
+618	0.34516583483	
+619	0.34516583483	
+620	0.34516583483	
+621	0.34516583483	
+622	0.34516583483	
+623	0.34516583483	
+624	0.345315857333	
+625	0.345315857333	
+626	0.345315857333	
+627	0.345315857333	
+628	0.345315857333	
+629	0.345315857333	
+630	0.345315857333	
+631	0.345315857333	
+632	0.345315857333	
+633	0.345315857333	
+634	0.356509773991	
+635	0.356509773991	
+636	0.356509773991	
+637	0.356509773991	
+638	0.356509773991	
+639	0.356659796494	
+640	0.356659796494	
+641	0.356659796494	
+642	0.356659796494	
+643	0.356659796494	
+644	0.356659796494	
+645	0.356659796494	
+646	0.356659796494	
+647	0.356659796494	
+648	0.356659796494	
+649	0.356659796494	
+650	0.356659796494	
+651	0.356659796494	
+652	0.356659796494	
+653	0.356659796494	
+654	0.356809818997	
+655	0.356809818997	
+656	0.368003735655	
+657	0.368003735655	
+658	0.368003735655	
+659	0.371680202553	
+660	0.371680202553	
+661	0.371680202553	
+662	0.371680202553	
+663	0.371680202553	
+664	0.371680202553	
+665	0.371680202553	
+666	0.371680202553	
+667	0.371680202553	
+668	0.371680202553	
+669	0.371830225056	
+670	0.371830225056	
+671	0.371830225056	
+672	0.371830225056	
+673	0.371830225056	
+674	0.371830225056	
+675	0.371830225056	
+676	0.371830225056	
+677	0.371830225056	
+678	0.383024141714	
+679	0.383024141714	
+680	0.383024141714	
+681	0.383024141714	
+682	0.383024141714	
+683	0.383024141714	
+684	0.383174164217	
+685	0.383174164217	
+686	0.383174164217	
+687	0.383174164217	
+688	0.383174164217	
+689	0.383174164217	
+690	0.383174164217	
+691	0.383174164217	
+692	0.383174164217	
+693	0.383174164217	
+694	0.383174164217	
+695	0.383174164217	
+696	0.383174164217	
+697	0.383174164217	
+698	0.383174164217	
+699	0.383324186721	
+700	0.394518103378	
+701	0.394518103378	
+702	0.394518103378	
+703	0.394518103378	
+704	0.394518103378	
+705	0.394518103378	
+706	0.394518103378	
+707	0.394518103378	
+708	0.394518103378	
+709	0.394518103378	
+710	0.394518103378	
+711	0.394518103378	
+712	0.394518103378	
+713	0.394518103378	
+714	0.394668125881	
+715	0.394668125881	
+716	0.394668125881	
+717	0.394668125881	
+718	0.394668125881	
+719	0.394668125881	
+720	0.394668125881	
+721	0.394668125881	
+722	0.394668125881	
+723	0.405862042539	
+724	0.405862042539	
+725	0.405862042539	
+726	0.405862042539	
+727	0.405862042539	
+728	0.405862042539	
+729	0.406012065042	
+730	0.406012065042	
+731	0.406012065042	
+732	0.406012065042	
+733	0.406012065042	
+734	0.406012065042	
+735	0.406012065042	
+736	0.406012065042	
+737	0.406012065042	
+738	0.406012065042	
+739	0.406012065042	
+740	0.406012065042	
+741	0.40968853194	
+742	0.40968853194	
+743	0.40968853194	
+744	0.409838554444	
+745	0.421032471101	
+746	0.421032471101	
+747	0.421032471101	
+748	0.421032471101	
+749	0.421032471101	
+750	0.421032471101	
+751	0.421032471101	
+752	0.421032471101	
+753	0.421032471101	
+754	0.421032471101	
+755	0.421032471101	
+756	0.421032471101	
+757	0.421032471101	
+758	0.421032471101	
+759	0.421182493605	
+760	0.421182493605	
+761	0.421182493605	
+762	0.421182493605	
+763	0.421182493605	
+764	0.421182493605	
+765	0.421182493605	
+766	0.421182493605	
+767	0.432376410262	
+768	0.432376410262	
+769	0.432376410262	
+770	0.432376410262	
+771	0.432376410262	
+772	0.432376410262	
+773	0.432376410262	
+774	0.432526432765	
+775	0.432526432765	
+776	0.432526432765	
+777	0.432526432765	
+778	0.432526432765	
+779	0.432526432765	
+780	0.432526432765	
+781	0.432526432765	
+782	0.432526432765	
+783	0.432526432765	
+784	0.432526432765	
+785	0.432526432765	
+786	0.432526432765	
+787	0.432526432765	
+788	0.432526432765	
+789	0.443870371926	
+790	0.443870371926	
+791	0.443870371926	
+792	0.443870371926	
+793	0.443870371926	
+794	0.443870371926	
+795	0.443870371926	
+796	0.443870371926	
+797	0.443870371926	
+798	0.443870371926	
+799	0.443870371926	
+800	0.443870371926	
+801	0.443870371926	
+802	0.443870371926	
+803	0.443870371926	
+804	0.444020394429	
+805	0.444020394429	
+806	0.444020394429	
+807	0.444020394429	
+808	0.444020394429	
+809	0.444020394429	
+810	0.444020394429	
+811	0.444020394429	
+812	0.455214311087	
+813	0.455214311087	
+814	0.455214311087	
+815	0.455214311087	
+816	0.455214311087	
+817	0.455214311087	
+818	0.455214311087	
+819	0.45536433359	
+820	0.45536433359	
+821	0.45536433359	
+822	0.45536433359	
+823	0.459040800488	
+824	0.459040800488	
+825	0.459040800488	
+826	0.459040800488	
+827	0.459040800488	
+828	0.459040800488	
+829	0.459040800488	
+830	0.459040800488	
+831	0.459040800488	
+832	0.459040800488	
+833	0.459040800488	
+834	0.470384739649	
+835	0.470384739649	
+836	0.470384739649	
+837	0.470384739649	
+838	0.470384739649	
+839	0.470384739649	
+840	0.470384739649	
+841	0.470384739649	
+842	0.470384739649	
+843	0.470384739649	
+844	0.470384739649	
+845	0.470384739649	
+846	0.470384739649	
+847	0.470384739649	
+848	0.470384739649	
+849	0.470534762153	
+850	0.470534762153	
+851	0.470534762153	
+852	0.470534762153	
+853	0.470534762153	
+854	0.470534762153	
+855	0.470534762153	
+856	0.48172867881	
+857	0.48172867881	
+858	0.48172867881	
+859	0.48172867881	
+860	0.48172867881	
+861	0.48172867881	
+862	0.48172867881	
+863	0.48172867881	
+864	0.481878701313	
+865	0.481878701313	
+866	0.481878701313	
+867	0.481878701313	
+868	0.481878701313	
+869	0.481878701313	
+870	0.481878701313	
+871	0.481878701313	
+872	0.481878701313	
+873	0.481878701313	
+874	0.481878701313	
+875	0.481878701313	
+876	0.481878701313	
+877	0.481878701313	
+878	0.493072617971	
+879	0.493222640474	
+880	0.493222640474	
+881	0.493222640474	
+882	0.493222640474	
+883	0.493222640474	
+884	0.493222640474	
+885	0.493222640474	
+886	0.493222640474	
+887	0.493222640474	
+888	0.493222640474	
+889	0.493222640474	
+890	0.493222640474	
+891	0.493222640474	
+892	0.493222640474	
+893	0.493222640474	
+894	0.493372662978	
+895	0.493372662978	
+896	0.493372662978	
+897	0.493372662978	
+898	0.493372662978	
+899	0.493372662978	
+900	0.504566579635	
+901	0.504566579635	
+902	0.504566579635	
+903	0.504566579635	
+904	0.504566579635	
+905	0.508243046533	
+906	0.508243046533	
+907	0.508243046533	
+908	0.508243046533	
+909	0.508393069037	
+910	0.508393069037	
+911	0.508393069037	
+912	0.508393069037	
+913	0.508393069037	
+914	0.508393069037	
+915	0.508393069037	
+916	0.508393069037	
+917	0.508393069037	
+918	0.508393069037	
+919	0.508393069037	
+920	0.508393069037	
+921	0.508393069037	
+922	0.508393069037	
+923	0.519586985694	
+924	0.519737008197	
+925	0.519737008197	
+926	0.519737008197	
+927	0.519737008197	
+928	0.519737008197	
+929	0.519737008197	
+930	0.519737008197	
+931	0.519737008197	
+932	0.519737008197	
+933	0.519737008197	
+934	0.519737008197	
+935	0.519737008197	
+936	0.519737008197	
+937	0.519737008197	
+938	0.519737008197	
+939	0.519887030701	
+940	0.519887030701	
+941	0.519887030701	
+942	0.519887030701	
+943	0.519887030701	
+944	0.519887030701	
+945	0.531080947358	
+946	0.531080947358	
+947	0.531080947358	
+948	0.531080947358	
+949	0.531080947358	
+950	0.531080947358	
+951	0.531080947358	
+952	0.531080947358	
+953	0.531080947358	
+954	0.531230969861	
+955	0.531230969861	
+956	0.531230969861	
+957	0.531230969861	
+958	0.531230969861	
+959	0.531230969861	
+960	0.531230969861	
+961	0.531230969861	
+962	0.531230969861	
+963	0.531230969861	
+964	0.531230969861	
+965	0.531230969861	
+966	0.531230969861	
+967	0.542424886519	
+968	0.542424886519	
+969	0.542574909022	
+970	0.542574909022	
+971	0.542574909022	
+972	0.542574909022	
+973	0.542574909022	
+974	0.542574909022	
+975	0.542574909022	
+976	0.542574909022	
+977	0.542574909022	
+978	0.542574909022	
+979	0.542574909022	
+980	0.542574909022	
+981	0.542574909022	
+982	0.542574909022	
+983	0.542574909022	
+984	0.542724931526	
+985	0.542724931526	
+986	0.546401398424	
+987	0.546401398424	
+988	0.546401398424	
+989	0.557595315081	
+990	0.557595315081	
+991	0.557595315081	
+992	0.557595315081	
+993	0.557595315081	
+994	0.557595315081	
+995	0.557595315081	
+996	0.557595315081	
+997	0.557595315081	
+998	0.557595315081	
+999	0.557745337585	
+1000	0.557745337585	
+1001	0.557745337585	
+1002	0.557745337585	
+1003	0.557745337585	
+1004	0.557745337585	
+1005	0.557745337585	
+1006	0.557745337585	
+1007	0.557745337585	
+1008	0.557745337585	
+1009	0.557745337585	
+1010	0.557745337585	
+1011	0.557745337585	
+1012	0.568939254242	
+1013	0.568939254242	
+1014	0.569089276745	
+1015	0.569089276745	
+1016	0.569089276745	
+1017	0.569089276745	
+1018	0.569089276745	
+1019	0.569089276745	
+1020	0.569089276745	
+1021	0.569089276745	
+1022	0.569089276745	
+1023	0.569089276745	
+1024	0.569089276745	
+1025	0.569089276745	
+1026	0.569089276745	
+1027	0.569089276745	
+1028	0.569089276745	
+1029	0.569239299249	
+1030	0.569239299249	
+1031	0.569239299249	
+1032	0.569239299249	
+1033	0.569239299249	
+1034	0.580433215906	
+1035	0.580433215906	
+1036	0.580433215906	
+1037	0.580433215906	
+1038	0.580433215906	
+1039	0.580433215906	
+1040	0.580433215906	
+1041	0.580433215906	
+1042	0.580433215906	
+1043	0.580433215906	
+1044	0.58058323841	
+1045	0.58058323841	
+1046	0.58058323841	
+1047	0.58058323841	
+1048	0.58058323841	
+1049	0.58058323841	
+1050	0.58058323841	
+1051	0.58058323841	
+1052	0.58058323841	
+1053	0.58058323841	
+1054	0.58058323841	
+1055	0.58058323841	
+1056	0.591777155067	
+1057	0.591777155067	
+1058	0.591777155067	
+1059	0.59192717757	
+1060	0.59192717757	
+1061	0.59192717757	
+1062	0.59192717757	
+1063	0.59192717757	
+1064	0.59192717757	
+1065	0.59192717757	
+1066	0.59192717757	
+1067	0.59192717757	
+1068	0.595603644469	
+1069	0.595603644469	
+1070	0.595603644469	
+1071	0.595603644469	
+1072	0.595603644469	
+1073	0.595603644469	
+1074	0.595753666972	
+1075	0.595753666972	
+1076	0.595753666972	
+1077	0.595753666972	
+1078	0.606947583629	
+1079	0.606947583629	
+1080	0.606947583629	
+1081	0.606947583629	
+1082	0.606947583629	
+1083	0.606947583629	
+1084	0.606947583629	
+1085	0.606947583629	
+1086	0.606947583629	
+1087	0.606947583629	
+1088	0.606947583629	
+1089	0.607097606133	
+1090	0.607097606133	
+1091	0.607097606133	
+1092	0.607097606133	
+1093	0.607097606133	
+1094	0.607097606133	
+1095	0.607097606133	
+1096	0.607097606133	
+1097	0.607097606133	
+1098	0.607097606133	
+1099	0.607097606133	
+1100	0.61829152279	
+1101	0.61829152279	
+1102	0.61829152279	
+1103	0.61829152279	
+1104	0.618441545294	
+1105	0.618441545294	
+1106	0.618441545294	
+1107	0.618441545294	
+1108	0.618441545294	
+1109	0.618441545294	
+1110	0.618441545294	
+1111	0.618441545294	
+1112	0.618441545294	
+1113	0.618441545294	
+1114	0.618441545294	
+1115	0.618441545294	
+1116	0.618441545294	
+1117	0.618441545294	
+1118	0.618441545294	
+1119	0.618591567797	
+1120	0.618591567797	
+1121	0.618591567797	
+1122	0.618591567797	
+1123	0.629785484454	
+1124	0.629785484454	
+1125	0.629785484454	
+1126	0.629785484454	
+1127	0.629785484454	
+1128	0.629785484454	
+1129	0.629785484454	
+1130	0.629785484454	
+1131	0.629785484454	
+1132	0.629785484454	
+1133	0.629785484454	
+1134	0.629935506958	
+1135	0.629935506958	
+1136	0.629935506958	
+1137	0.629935506958	
+1138	0.629935506958	
+1139	0.629935506958	
+1140	0.629935506958	
+1141	0.629935506958	
+1142	0.629935506958	
+1143	0.629935506958	
+1144	0.629935506958	
+1145	0.641129423615	
+1146	0.641129423615	
+1147	0.641129423615	
+1148	0.641129423615	
+1149	0.641279446118	
+1150	0.644955913017	
+1151	0.644955913017	
+1152	0.644955913017	
+1153	0.644955913017	
+1154	0.644955913017	
+1155	0.644955913017	
+1156	0.644955913017	
+1157	0.644955913017	
+1158	0.644955913017	
+1159	0.644955913017	
+1160	0.644955913017	
+1161	0.644955913017	
+1162	0.644955913017	
+1163	0.644955913017	
+1164	0.64510593552	
+1165	0.64510593552	
+1166	0.64510593552	
+1167	0.656299852177	
+1168	0.656299852177	
+1169	0.656299852177	
+1170	0.656299852177	
+1171	0.656299852177	
+1172	0.656299852177	
+1173	0.656299852177	
+1174	0.656299852177	
+1175	0.656299852177	
+1176	0.656299852177	
+1177	0.656299852177	
+1178	0.656299852177	
+1179	0.656449874681	
+1180	0.656449874681	
+1181	0.656449874681	
+1182	0.656449874681	
+1183	0.656449874681	
+1184	0.656449874681	
+1185	0.656449874681	
+1186	0.656449874681	
+1187	0.656449874681	
+1188	0.656449874681	
+1189	0.667643791338	
+1190	0.667643791338	
+1191	0.667643791338	
+1192	0.667643791338	
+1193	0.667643791338	
+1194	0.667793813842	
+1195	0.667793813842	
+1196	0.667793813842	
+1197	0.667793813842	
+1198	0.667793813842	
+1199	0.667793813842	
+1200	0.667793813842	
+1201	0.667793813842	
+1202	0.667793813842	
+1203	0.667793813842	
+1204	0.667793813842	
+1205	0.667793813842	
+1206	0.667793813842	
+1207	0.667793813842	
+1208	0.667793813842	
+1209	0.667943836345	
+1210	0.667943836345	
+1211	0.667943836345	
+1212	0.679137753002	
+1213	0.679137753002	
+1214	0.679137753002	
+1215	0.679137753002	
+1216	0.679137753002	
+1217	0.679137753002	
+1218	0.679137753002	
+1219	0.679137753002	
+1220	0.679137753002	
+1221	0.679137753002	
+1222	0.679137753002	
+1223	0.679137753002	
+1224	0.679287775506	
+1225	0.679287775506	
+1226	0.679287775506	
+1227	0.679287775506	
+1228	0.679287775506	
+1229	0.679287775506	
+1230	0.679287775506	
+1231	0.679287775506	
+1232	0.682964242404	
+1233	0.682964242404	
+1234	0.694158159061	
+1235	0.694158159061	
+1236	0.694158159061	
+1237	0.694158159061	
+1238	0.694158159061	
+1239	0.694308181565	
+1240	0.694308181565	
+1241	0.694308181565	
+1242	0.694308181565	
+1243	0.694308181565	
+1244	0.694308181565	
+1245	0.694308181565	
+1246	0.694308181565	
+1247	0.694308181565	
+1248	0.694308181565	
+1249	0.694308181565	
+1250	0.694308181565	
+1251	0.694308181565	
+1252	0.694308181565	
+1253	0.694308181565	
+1254	0.694458204068	
+1255	0.694458204068	
+1256	0.705652120726	
+1257	0.705652120726	
+1258	0.705652120726	
+1259	0.705652120726	
+1260	0.705652120726	
+1261	0.705652120726	
+1262	0.705652120726	
+1263	0.705652120726	
+1264	0.705652120726	
+1265	0.705652120726	
+1266	0.705652120726	
+1267	0.705652120726	
+1268	0.705652120726	
+1269	0.705802143229	
+1270	0.705802143229	
+1271	0.705802143229	
+1272	0.705802143229	
+1273	0.705802143229	
+1274	0.705802143229	
+1275	0.705802143229	
+1276	0.705802143229	
+1277	0.705802143229	
+1278	0.716996059886	
+1279	0.716996059886	
+1280	0.716996059886	
+1281	0.716996059886	
+1282	0.716996059886	
+1283	0.716996059886	
+1284	0.71714608239	
+1285	0.71714608239	
+1286	0.71714608239	
+1287	0.71714608239	
+1288	0.71714608239	
+1289	0.71714608239	
+1290	0.71714608239	
+1291	0.71714608239	
+1292	0.71714608239	
+1293	0.71714608239	
+1294	0.71714608239	
+1295	0.71714608239	
+1296	0.71714608239	
+1297	0.71714608239	
+1298	0.71714608239	
+1299	0.717296104893	
+1300	0.72849002155	
+1301	0.72849002155	
+1302	0.72849002155	
+1303	0.72849002155	
+1304	0.72849002155	
+1305	0.72849002155	
+1306	0.72849002155	
+1307	0.72849002155	
+1308	0.72849002155	
+1309	0.72849002155	
+1310	0.72849002155	
+1311	0.72849002155	
+1312	0.72849002155	
+1313	0.72849002155	
+1314	0.732316510952	
+1315	0.732316510952	
+1316	0.732316510952	
+1317	0.732316510952	
+1318	0.732316510952	
+1319	0.732316510952	
+1320	0.732316510952	
+1321	0.732316510952	
+1322	0.732316510952	
+1323	0.743510427609	
+1324	0.743510427609	
+1325	0.743510427609	
+1326	0.743510427609	
+1327	0.743510427609	
+1328	0.743510427609	
+1329	0.743660450113	
+1330	0.743660450113	
+1331	0.743660450113	
+1332	0.743660450113	
+1333	0.743660450113	
+1334	0.743660450113	
+1335	0.743660450113	
+1336	0.743660450113	
+1337	0.743660450113	
+1338	0.743660450113	
+1339	0.743660450113	
+1340	0.743660450113	
+1341	0.743660450113	
+1342	0.743660450113	
+1343	0.743660450113	
+1344	0.743810472616	
+1345	0.755004389274	
+1346	0.755004389274	
+1347	0.755004389274	
+1348	0.755004389274	
+1349	0.755004389274	
+1350	0.755004389274	
+1351	0.755004389274	
+1352	0.755004389274	
+1353	0.755004389274	
+1354	0.755004389274	
+1355	0.755004389274	
+1356	0.755004389274	
+1357	0.755004389274	
+1358	0.755004389274	
+1359	0.755154411777	
+1360	0.755154411777	
+1361	0.755154411777	
+1362	0.755154411777	
+1363	0.755154411777	
+1364	0.755154411777	
+1365	0.755154411777	
+1366	0.755154411777	
+1367	0.766348328434	
+1368	0.766348328434	
+1369	0.766348328434	
+1370	0.766348328434	
+1371	0.766348328434	
+1372	0.766348328434	
+1373	0.766348328434	
+1374	0.766498350938	
+1375	0.766498350938	
+1376	0.766498350938	
+1377	0.766498350938	
+1378	0.766498350938	
+1379	0.766498350938	
+1380	0.766498350938	
+1381	0.766498350938	
+1382	0.766498350938	
+1383	0.766498350938	
+1384	0.766498350938	
+1385	0.766498350938	
+1386	0.766498350938	
+1387	0.766498350938	
+1388	0.766498350938	
+1389	0.777842290099	
+1390	0.777842290099	
+1391	0.777842290099	
+1392	0.777842290099	
+1393	0.777842290099	
+1394	0.777842290099	
+1395	0.777842290099	
+1396	0.781518756997	
+1397	0.781518756997	
+1398	0.781518756997	
+1399	0.781518756997	
+1400	0.781518756997	
+1401	0.781518756997	
+1402	0.781518756997	
+1403	0.781518756997	
+1404	0.7816687795	
+1405	0.7816687795	
+1406	0.7816687795	
+1407	0.7816687795	
+1408	0.7816687795	
+1409	0.7816687795	
+1410	0.7816687795	
+1411	0.7816687795	
+1412	0.792862696158	
+1413	0.792862696158	
+1414	0.792862696158	
+1415	0.792862696158	
+1416	0.792862696158	
+1417	0.792862696158	
+1418	0.792862696158	
+1419	0.793012718661	
+1420	0.793012718661	
+1421	0.793012718661	
+1422	0.793012718661	
+1423	0.793012718661	
+1424	0.793012718661	
+1425	0.793012718661	
+1426	0.793012718661	
+1427	0.793012718661	
+1428	0.793012718661	
+1429	0.793012718661	
+1430	0.793012718661	
+1431	0.793012718661	
+1432	0.793012718661	
+1433	0.793012718661	
+1434	0.804356657822	
+1435	0.804356657822	
+1436	0.804356657822	
+1437	0.804356657822	
+1438	0.804356657822	
+1439	0.804356657822	
+1440	0.804356657822	
+1441	0.804356657822	
+1442	0.804356657822	
+1443	0.804356657822	
+1444	0.804356657822	
+1445	0.804356657822	
+1446	0.804356657822	
+1447	0.804356657822	
+1448	0.804356657822	
+1449	0.804506680325	
+1450	0.804506680325	
+1451	0.804506680325	
+1452	0.804506680325	
+1453	0.804506680325	
+1454	0.804506680325	
+1455	0.804506680325	
+1456	0.815700596982	
+1457	0.815700596982	
+1458	0.815700596982	
+1459	0.815700596982	
+1460	0.815700596982	
+1461	0.815700596982	
+1462	0.815700596982	
+1463	0.815700596982	
+1464	0.815850619486	
+1465	0.815850619486	
+1466	0.815850619486	
+1467	0.815850619486	
+1468	0.815850619486	
+1469	0.815850619486	
+1470	0.815850619486	
+1471	0.815850619486	
+1472	0.815850619486	
+1473	0.815850619486	
+1474	0.815850619486	
+1475	0.815850619486	
+1476	0.815850619486	
+1477	0.815850619486	
+1478	0.830721003042	
+1479	0.830871025545	
+1480	0.830871025545	
+1481	0.830871025545	
+1482	0.830871025545	
+1483	0.830871025545	
+1484	0.830871025545	
+1485	0.830871025545	
+1486	0.830871025545	
+1487	0.830871025545	
+1488	0.830871025545	
+1489	0.830871025545	
+1490	0.830871025545	
+1491	0.830871025545	
+1492	0.830871025545	
+1493	0.830871025545	
+1494	0.831021048048	
+1495	0.831021048048	
+1496	0.831021048048	
+1497	0.831021048048	
+1498	0.831021048048	
+1499	0.831021048048	
+1500	0.842214964706	
+1501	0.842214964706	
+1502	0.842214964706	
+1503	0.842214964706	
+1504	0.842214964706	
+1505	0.842214964706	
+1506	0.842214964706	
+1507	0.842214964706	
+1508	0.842214964706	
+1509	0.842214964706	
+1510	0.842364987209	
+1511	0.842364987209	
+1512	0.842364987209	
+1513	0.842364987209	
+1514	0.842364987209	
+1515	0.842364987209	
+1516	0.842364987209	
+1517	0.842364987209	
+1518	0.842364987209	
+1519	0.842364987209	
+1520	0.842364987209	
+1521	0.842364987209	
+1522	0.842364987209	
+1523	0.853558903866	
+1524	0.853558903866	
+1525	0.85370892637	
+1526	0.85370892637	
+1527	0.85370892637	
+1528	0.85370892637	
+1529	0.85370892637	
+1530	0.85370892637	
+1531	0.85370892637	
+1532	0.85370892637	
+1533	0.85370892637	
+1534	0.85370892637	
+1535	0.85370892637	
+1536	0.85370892637	
+1537	0.85370892637	
+1538	0.85370892637	
+1539	0.85370892637	
+1540	0.853858948873	
+1541	0.853858948873	
+1542	0.853858948873	
+1543	0.853858948873	
+1544	0.853858948873	
+1545	0.865052865531	
+1546	0.865052865531	
+1547	0.865052865531	
+1548	0.865052865531	
+1549	0.865052865531	
+1550	0.865052865531	
+1551	0.865052865531	
+1552	0.865052865531	
+1553	0.865052865531	
+1554	0.865052865531	
+1555	0.865202888034	
+1556	0.865202888034	
+1557	0.865202888034	
+1558	0.865202888034	
+1559	0.865202888034	
+1560	0.868879354932	
+1561	0.868879354932	
+1562	0.868879354932	
+1563	0.868879354932	
+1564	0.868879354932	
+1565	0.868879354932	
+1566	0.868879354932	
+1567	0.88007327159	
+1568	0.88007327159	
+1569	0.88007327159	
+1570	0.880223294093	
+1571	0.880223294093	
+1572	0.880223294093	
+1573	0.880223294093	
+1574	0.880223294093	
+1575	0.880223294093	
+1576	0.880223294093	
+1577	0.880223294093	
+1578	0.880223294093	
+1579	0.880223294093	
+1580	0.880223294093	
+1581	0.880223294093	
+1582	0.880223294093	
+1583	0.880223294093	
+1584	0.880223294093	
+1585	0.880373316596	
+1586	0.880373316596	
+1587	0.880373316596	
+1588	0.880373316596	
+1589	0.891567233254	
+1590	0.891567233254	
+1591	0.891567233254	
+1592	0.891567233254	
+1593	0.891567233254	
+1594	0.891567233254	
+1595	0.891567233254	
+1596	0.891567233254	
+1597	0.891567233254	
+1598	0.891567233254	
+1599	0.891567233254	
+1600	0.891717255757	
+1601	0.891717255757	
+1602	0.891717255757	
+1603	0.891717255757	
+1604	0.891717255757	
+1605	0.891717255757	
+1606	0.891717255757	
+1607	0.891717255757	
+1608	0.891717255757	
+1609	0.891717255757	
+1610	0.891717255757	
+1611	0.891717255757	
+1612	0.902911172415	
+1613	0.902911172415	
+1614	0.902911172415	
+1615	0.903061194918	
+1616	0.903061194918	
+1617	0.903061194918	
+1618	0.903061194918	
+1619	0.903061194918	
+1620	0.903061194918	
+1621	0.903061194918	
+1622	0.903061194918	
+1623	0.903061194918	
+1624	0.903061194918	
+1625	0.903061194918	
+1626	0.903061194918	
+1627	0.903061194918	
+1628	0.903061194918	
+1629	0.903061194918	
+1630	0.903211217421	
+1631	0.903211217421	
+1632	0.903211217421	
+1633	0.903211217421	
+1634	0.914405134079	
+1635	0.914405134079	
+1636	0.914405134079	
+1637	0.914405134079	
+1638	0.914405134079	
+1639	0.914405134079	
+1640	0.914405134079	
+1641	0.914405134079	
+1642	0.918081600977	
+1643	0.918081600977	
+1644	0.918081600977	
+1645	0.91823162348	
+1646	0.91823162348	
+1647	0.91823162348	
+1648	0.91823162348	
+1649	0.91823162348	
+1650	0.91823162348	
+1651	0.91823162348	
+1652	0.91823162348	
+1653	0.91823162348	
+1654	0.91823162348	
+1655	0.91823162348	
+1656	0.929425540138	
+1657	0.929425540138	
+1658	0.929425540138	
+1659	0.929425540138	
+1660	0.929575562641	
+1661	0.929575562641	
+1662	0.929575562641	
+1663	0.929575562641	
+1664	0.929575562641	
+1665	0.929575562641	
+1666	0.929575562641	
+1667	0.929575562641	
+1668	0.929575562641	
+1669	0.929575562641	
+1670	0.929575562641	
+1671	0.929575562641	
+1672	0.929575562641	
+1673	0.929575562641	
+1674	0.929575562641	
+1675	0.929725585144	
+1676	0.929725585144	
+1677	0.929725585144	
+1678	0.940919501802	
+1679	0.940919501802	
+1680	0.940919501802	
+1681	0.940919501802	
+1682	0.940919501802	
+1683	0.940919501802	
+1684	0.940919501802	
+1685	0.940919501802	
+1686	0.940919501802	
+1687	0.940919501802	
+1688	0.940919501802	
+1689	0.940919501802	
+1690	0.941069524305	
+1691	0.941069524305	
+1692	0.941069524305	
+1693	0.941069524305	
+1694	0.941069524305	
+1695	0.941069524305	
+1696	0.941069524305	
+1697	0.941069524305	
+1698	0.941069524305	
+1699	0.941069524305	
+1700	0.952263440963	
+1701	0.952263440963	
+1702	0.952263440963	
+1703	0.952263440963	
+1704	0.952263440963	
+1705	0.952413463466	
+1706	0.952413463466	
+1707	0.952413463466	
+1708	0.952413463466	
+1709	0.952413463466	
+1710	0.952413463466	
+1711	0.952413463466	
+1712	0.952413463466	
+1713	0.952413463466	
+1714	0.952413463466	
+1715	0.952413463466	
+1716	0.952413463466	
+1717	0.952413463466	
+1718	0.952413463466	
+1719	0.952413463466	
+1720	0.952563485969	
+1721	0.952563485969	
+1722	0.952563485969	
+1723	0.963757402627	
+1724	0.967433869525	
+1725	0.967433869525	
+1726	0.967433869525	
+1727	0.967433869525	
+1728	0.967433869525	
+1729	0.967433869525	
+1730	0.967433869525	
+1731	0.967433869525	
+1732	0.967433869525	
+1733	0.967433869525	
+1734	0.967433869525	
+1735	0.967583892028	
+1736	0.967583892028	
+1737	0.967583892028	
+1738	0.967583892028	
+1739	0.967583892028	
+1740	0.967583892028	
+1741	0.967583892028	
+1742	0.967583892028	
+1743	0.967583892028	
+1744	0.967583892028	
+1745	0.978777808686	
+1746	0.978777808686	
+1747	0.978777808686	
+1748	0.978777808686	
+1749	0.978777808686	
diff --git a/LDA.c b/LDA.c
new file mode 100644
index 0000000..49cc141
--- /dev/null
+++ b/LDA.c
@@ -0,0 +1,280 @@
+//Title:          LDA.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+// LDA support functions.
+
+#include "CMemLeak.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "LDA.h"
+#include "Utils.h"
+#include "Inspect.h"
+#include "Errors.h"
+#include "Spectrum.h"
+#include "Trie.h"
+#include "Score.h"
+
+// Global variables:
+LDAModel* PMCCharge1LDA = NULL;
+LDAModel* PMCCharge2LDA = NULL;
+LDAModel* PMCCharge3LDA = NULL;
+
+LDAModel* CCModel1LDA = NULL;
+LDAModel* CCModel2LDA = NULL;
+
+LDAModel* MQModel2LDA = NULL;
+LDAModel* MQModel3LDA = NULL;
+
+
+void LoadCCModelLDA(int ForceRefresh)
+{
+    char FilePath[2048];
+    if (CCModel1LDA)
+    {
+        if (ForceRefresh)
+        {
+            FreeLDAModel(CCModel1LDA);
+            FreeLDAModel(CCModel2LDA);
+        }
+        else
+        {
+            return;
+        }
+    }
+    sprintf(FilePath, "%sCCLDA1.model", GlobalOptions->ResourceDir);
+    CCModel1LDA = LoadLDAModel(FilePath);
+    sprintf(FilePath, "%sCCLDA2.model", GlobalOptions->ResourceDir);
+    CCModel2LDA = LoadLDAModel(FilePath);
+}
+
+void FreeLDAModels()
+{
+    FreeLDAModel(PMCCharge1LDA);
+    PMCCharge1LDA = NULL;
+    FreeLDAModel(PMCCharge2LDA);
+    PMCCharge2LDA = NULL;
+    FreeLDAModel(PMCCharge3LDA);
+    PMCCharge3LDA = NULL;
+    FreeLDAModel(MQModel2LDA);
+    MQModel2LDA = NULL;
+    FreeLDAModel(MQModel3LDA);
+    MQModel3LDA = NULL;
+}
+
+// Load linear discriminant analysis (LDA) model for parent mass
+// correction (PMC).  Special models for phosphorylation searches
+void LoadPMCLDA(int ForceLoad)
+{
+    char FilePath[2048];
+    if (PMCCharge1LDA)
+    {
+        if (ForceLoad)
+        {
+            FreeLDAModel(PMCCharge1LDA);
+            FreeLDAModel(PMCCharge2LDA);
+            FreeLDAModel(PMCCharge3LDA);
+        }
+        else
+        {
+            return;
+        }
+    }
+    sprintf(FilePath, "%sPMCLDA1.model", GlobalOptions->ResourceDir);
+    PMCCharge1LDA = LoadLDAModel(FilePath);
+	if (GlobalOptions->PhosphorylationFlag)
+	{//Load phosphorylation specific models, only different for charge 2 and 3
+		sprintf(FilePath, "%sPMCLDA2Phos.model", GlobalOptions->ResourceDir);
+		PMCCharge2LDA = LoadLDAModel(FilePath);
+		sprintf(FilePath, "%sPMCLDA3Phos.model", GlobalOptions->ResourceDir);
+		PMCCharge3LDA = LoadLDAModel(FilePath);
+	}
+	else
+	{ 
+		sprintf(FilePath, "%sPMCLDA2.model", GlobalOptions->ResourceDir);
+		PMCCharge2LDA = LoadLDAModel(FilePath);
+		sprintf(FilePath, "%sPMCLDA3.model", GlobalOptions->ResourceDir);
+		PMCCharge3LDA = LoadLDAModel(FilePath);
+	}
+}
+
+LDAModel* LoadLDAModel(char* LDAModelFileName)
+{
+    FILE* File;
+    LDAModel* Model;
+    double Value;
+    int BytesRead;
+    //
+    File = fopen(LDAModelFileName, "rb");
+    if (!File)
+    {
+        return NULL;
+    }
+    Model = (LDAModel*)calloc(1, sizeof(LDAModel));
+    //ReadBinary(&Value, sizeof(float), 1, File);
+    ReadBinary(&Model->FeatureCount, sizeof(int), 1, File);
+    assert(Model->FeatureCount >= 1 && Model->FeatureCount < 100);
+    Model->ScaledVector = (double*)calloc(Model->FeatureCount, sizeof(double));
+    Model->TempProductVector = (double*)calloc(Model->FeatureCount, sizeof(double));
+    
+    // Read min and max values:
+    Model->MinValues = (double*)calloc(Model->FeatureCount, sizeof(double));
+    ReadBinary(Model->MinValues, sizeof(double), Model->FeatureCount, File);
+    Model->MaxValues = (double*)calloc(Model->FeatureCount, sizeof(double));
+    ReadBinary(Model->MaxValues, sizeof(double), Model->FeatureCount, File);
+    // Read mean true vector and mean false vector:
+    Model->MeanVectorTrue = (double*)calloc(Model->FeatureCount, sizeof(double));
+    ReadBinary(Model->MeanVectorTrue, sizeof(double), Model->FeatureCount, File);
+    Model->MeanVectorFalse = (double*)calloc(Model->FeatureCount, sizeof(double));
+    ReadBinary(Model->MeanVectorFalse, sizeof(double), Model->FeatureCount, File);
+    // Read constant ture and constant false:
+    ReadBinary(&Model->ConstantTrue, sizeof(double), 1, File);
+    ReadBinary(&Model->ConstantFalse, sizeof(double), 1, File);
+    // Read inverted covariance matrix:
+    Model->CovInv = (double*)calloc(Model->FeatureCount * Model->FeatureCount, sizeof(double));
+    ReadBinary(Model->CovInv, sizeof(double), Model->FeatureCount * Model->FeatureCount, File);
+    // Verify that we're at EOF:
+    BytesRead = ReadBinary(&Value, sizeof(float), 1, File);
+    assert(!BytesRead);
+    //printf("\nLoading LDA from %s:\n", LDAModelFileName);
+    //printf("%d features\n", Model->FeatureCount);
+    //printf("MinValues: %.4f...%.4f\n", Model->MinValues[0], Model->MinValues[Model->FeatureCount - 1]);
+    //printf("MaxValues: %.4f...%.4f\n", Model->MaxValues[0], Model->MaxValues[Model->FeatureCount - 1]);
+    //printf("MeanVectorTrue: %.4f...%.4f\n", Model->MeanVectorTrue[0], Model->MeanVectorTrue[Model->FeatureCount - 1]);
+    //printf("MeanVectorFalse: %.4f...%.4f\n", Model->MeanVectorFalse[0], Model->MeanVectorFalse[Model->FeatureCount - 1]);
+    //printf("CovInv: %.4f, %.4f, ..., %.4f, %.4f\n",  Model->CovInv[0], Model->CovInv[1], 
+    //    Model->CovInv[Model->FeatureCount * Model->FeatureCount - 2], 
+    //    Model->CovInv[Model->FeatureCount * Model->FeatureCount - 1]);
+    //printf("ConstantTrue %.4f, ConstantFalse %.4f\n", Model->ConstantTrue, Model->ConstantFalse);
+    fclose(File);
+    return Model;
+}
+
+void FreeLDAModel(LDAModel* Model)
+{
+    if (!Model)
+    {
+        return;
+    }
+    SafeFree(Model->MinValues);
+    SafeFree(Model->MaxValues);
+    SafeFree(Model->CovInv);
+    SafeFree(Model->ScaledVector);
+    SafeFree(Model->TempProductVector);
+    SafeFree(Model->MeanVectorTrue);
+    SafeFree(Model->MeanVectorFalse);
+    SafeFree(Model);
+}
+
+float ApplyLDAModel(LDAModel* Model, float* Features)
+{
+    int FeatureIndex;
+    double HalfRange;
+    int ColumnIndex;
+    double ProductTrue;
+    double ProductFalse;
+    //
+    //printf("\nCFeatures %.4f...%.4f\n", Features[0], Features[Model->FeatureCount - 1]);
+    // Scale the features into [-1, 1]:
+    for (FeatureIndex = 0; FeatureIndex < Model->FeatureCount; FeatureIndex++)
+    {
+        HalfRange = (float)((Model->MaxValues[FeatureIndex] - Model->MinValues[FeatureIndex]) / 2.0);
+        Model->ScaledVector[FeatureIndex] = (float)((Features[FeatureIndex] - Model->MinValues[FeatureIndex]) / HalfRange - 1.0);
+    }
+    //printf("Scaled vector %.4f...%.4f\n", Model->ScaledVector[0], Model->ScaledVector[Model->FeatureCount - 1]);
+    // Compute the product of the inverse covariance matrix with our feature vector:
+    for (FeatureIndex = 0; FeatureIndex < Model->FeatureCount; FeatureIndex++)
+    {
+        Model->TempProductVector[FeatureIndex] = 0;
+        for (ColumnIndex = 0; ColumnIndex < Model->FeatureCount; ColumnIndex++)
+        {
+            Model->TempProductVector[FeatureIndex] += (float)(Model->ScaledVector[ColumnIndex] * Model->CovInv[FeatureIndex * Model->FeatureCount + ColumnIndex]);
+        }
+    }
+    //printf("Temp product vector vector %.4f...%.4f\n", Model->TempProductVector[0], Model->TempProductVector[Model->FeatureCount - 1]);
+
+    // Compute u0 * C-1 * X and u1 * C-1 * X
+    ProductTrue = 0;
+    ProductFalse = 0;
+    for (FeatureIndex = 0; FeatureIndex < Model->FeatureCount; FeatureIndex++)
+    {
+        ProductTrue += (float)(Model->MeanVectorTrue[FeatureIndex] * Model->TempProductVector[FeatureIndex]);
+        ProductFalse += (float)(Model->MeanVectorFalse[FeatureIndex] * Model->TempProductVector[FeatureIndex]);
+    }
+    ProductTrue += Model->ConstantTrue;
+    ProductFalse += Model->ConstantFalse;
+    //printf("ProdTrue %.4f ProdFalse %.4f result %.4f\n", ProductTrue, ProductFalse, ProductTrue - ProductFalse);
+    //ProductTrue += (float)Model->Sub;
+    //ProdFalse += (float)SubProdFalse;
+    //printf("%.2f\t%.2f\t%.2f\t\n", (ProdTrue - ProdFalse), ProdTrue, ProdFalse);
+    return (float)(ProductTrue - ProductFalse);
+}
+
+void InitPValueLDA()
+{
+    char FilePath[MAX_FILENAME_LEN];
+    //
+    if (!MQModel2LDA)
+    {
+        sprintf(FilePath, "%s%s.model", GlobalOptions->ResourceDir, "MQScoreLDA2");
+        MQModel2LDA = LoadLDAModel(FilePath);
+    }
+    if (!MQModel3LDA)
+    {
+        sprintf(FilePath, "%s%s.model", GlobalOptions->ResourceDir, "MQScoreLDA3");
+        MQModel3LDA = LoadLDAModel(FilePath);
+    }
+}
+
+float LDAComputeMQScore(MSSpectrum* Spectrum, Peptide* Match, float* MQFeatures)
+{
+    LDAModel* Model;
+    float Score;
+
+    if (Spectrum->Charge < 3)
+    {
+        Model = MQModel2LDA;
+    }
+    else
+    {
+        Model = MQModel3LDA;
+    }
+    if (!Model)
+    {
+        return 0.0;
+    }
+    Score = ApplyLDAModel(Model, MQFeatures);
+    Score = GetPenalizedScore(Spectrum, Match, Score);
+    return Score;
+
+}
diff --git a/LDA.h b/LDA.h
new file mode 100644
index 0000000..43289a1
--- /dev/null
+++ b/LDA.h
@@ -0,0 +1,59 @@
+//Title:          LDA.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef LDA_H
+#define LDA_H
+
+typedef struct LDAModel
+{
+    int FeatureCount;
+    double* MinValues;
+    double* MaxValues;
+    double* CovInv;
+    double* MeanVectorTrue;
+    double* MeanVectorFalse;
+    double* ScaledVector;
+    double* TempProductVector;
+    double ConstantFalse;
+    double ConstantTrue;
+} LDAModel;
+
+LDAModel* LoadLDAModel(char* LDAModelFileName);
+void FreeLDAModel(LDAModel* Model);
+float ApplyLDAModel(LDAModel* Model, float* Features);
+void LoadPMCLDA();
+void FreeLDAModels();
+void LoadCCModelLDA(int ForceRefresh);
+void InitPValueLDA();
+
+#endif // LDA_H
+
diff --git a/LDA.py b/LDA.py
new file mode 100644
index 0000000..c20f373
--- /dev/null
+++ b/LDA.py
@@ -0,0 +1,469 @@
+#Title:          LDA.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Linear discriminant analysis
+Assumes the input file is tab-delimited, with category in the first column, and float
+values in the remaining columns.
+"""
+
+USE_NUMPY = 1
+import traceback
+try:
+    if USE_NUMPY:
+        from numpy import *
+        import numpy.linalg
+        MatrixMulitply = dot
+        InvertMatrix = numpy.linalg.inv
+        FloatType = float
+    else:
+        from Numeric import *
+        import LinearAlgebra
+        InvertMatrix = LinearAlgebra.inverse
+        MatrixMulitply = matrixmultiply
+        FloatType = Float
+except:
+    print "\n* Warning: Unable to import numpy.  LDA training not available."
+    print "  Please install NumPy (see http://numpy.scipy.org/ for details)"
+    print "  Error details are shown here:"
+    traceback.print_exc()
+    
+import math
+import os
+import sys
+import random
+import struct
+import traceback
+
+ForbiddenFeatures = [2, 3, 4, 5, 13, 31, 32, 33, 34, 43, 47] #[2,3,4]
+
+def PrintHistogram(Histogram, HistoFile):
+    Bins = Histogram.keys()
+    Bins.sort()
+    #Bins.reverse()
+    TotalBads = 0
+    TotalGoods = 0
+    print "\nHistogram results:"
+    for Bin in Bins:
+        TotalBads += Histogram[Bin][0]
+    Bads = TotalBads
+    for Bin in range(Bins[0], Bins[-1]):
+        if Histogram.has_key(Bin):
+            Bads -= Histogram[Bin][0]
+        PValue = Bads / float(TotalBads)
+        print "%s\t%s\t%s\t%s\t"%(Bin, PValue, Bads, TotalBads)
+        if HistoFile and Bin >=- 70 and Bin < 150:
+            PValue = min(0.99, max(0.0001, PValue))
+            Str = struct.pack("<f", PValue)
+            #print "PValue struct:", Str
+            HistoFile.write(Str)
+            
+class LDAClassifier:
+    def __init__(self):
+        pass
+    def GetCovarianceArray(self, VectorList):
+        Size = len(VectorList[0])
+        VectorCount = float(len(VectorList))
+        C = zeros((Size, Size), FloatType)
+        for Vector in VectorList:
+            for X in range(Size):
+                for Y in range(Size):
+                    C[X][Y] += Vector[X] * Vector[Y] / VectorCount
+        return C
+    def LoadVectors(self, FileName, CategoryBit, FeatureList):
+        Size = len(FeatureList)
+        self.GoodVectors = []
+        self.BadVectors = []
+        # Iterate over file lines, and read vetors in:
+        File = open(FileName, "r")
+        for FileLine in File.xreadlines():
+            if FileLine[0] == "#":
+                continue # comment
+            Bits = FileLine.split("\t")
+            try:
+                Category = int(Bits[CategoryBit])
+            except:
+                continue
+            # Turn -1 vs 1 into 0 vs 1:
+            if Category < 0:
+                Category = 0
+            #for X in range(len(Bits)):
+            #    print "%s: %s"%(X, Bits[X])
+            Vector = []
+            try:
+                for Index in FeatureList:
+                    if Index >= len(Bits) or not Bits[Index].strip():
+                        Vector.append(0)
+                    else:
+                        Vector.append(float(Bits[Index]))
+            except:
+                traceback.print_exc()
+                print Bits
+                continue
+            if Category:
+                self.GoodVectors.append(Vector)
+            else:
+                self.BadVectors.append(Vector)
+        print "First GoodVector:\n", self.GoodVectors[0]
+        print "First BadVector:\n", self.BadVectors[0]
+    def ScaleVectors(self):
+        """
+        Scale all vectors so that 90% of all values lie in the range [-1, 1]
+        """
+        Values = []
+        MinValues = []
+        MaxValues = []
+        FeatureCount = len(self.GoodVectors[0])
+        for X in range(FeatureCount):
+            Values.append([])
+        for Vector in self.GoodVectors:
+            for X in range(FeatureCount):
+                Values[X].append(Vector[X])
+        for Vector in self.BadVectors:
+            for X in range(FeatureCount):
+                Values[X].append(Vector[X])
+        print "Value count:", len(Values[0])
+        for X in range(FeatureCount):
+            Values[X].sort()
+            ValueCount = len(Values[X])
+            MinValues.append(Values[X][int(round(ValueCount * 0.05))])
+            MaxValues.append(Values[X][int(round(ValueCount * 0.95))])
+        print "Range:"
+        for X in range(FeatureCount):
+            print "%s: %.4f ... %.4f"%(X, MinValues[X], MaxValues[X])
+        for X in range(FeatureCount):
+            HalfRange = (MaxValues[X] - MinValues[X]) / 2.0
+            if not HalfRange:
+                continue
+            for Vector in self.BadVectors:            
+                Vector[X] = (Vector[X] - MinValues[X]) / HalfRange - 1.0
+                #Vector[X] = max(-1.0, min(Vector[X], 1.0))
+            for Vector in self.GoodVectors:            
+                Vector[X] = (Vector[X] - MinValues[X]) / HalfRange - 1.0
+                #Vector[X] = max(-1.0, min(Vector[X], 1.0))
+    def PerformLDA(self, FileName, CategoryBit, FeatureList, ScaleVectors = 1, FoldValidation = 0):
+        VerboseFlag = 1
+        Size = len(FeatureList)
+        self.LoadVectors(FileName, CategoryBit, FeatureList)
+        if ScaleVectors:
+            self.ScaleVectors()
+        if FoldValidation:
+            random.seed(1)
+            random.shuffle(self.GoodVectors)
+            random.shuffle(self.BadVectors)
+        # n-fold validation:
+        self.MasterGoodVectors = self.GoodVectors
+        self.MasterBadVectors = self.BadVectors
+        WorstAccuracy = 1.0
+        for Fold in range(max(1, FoldValidation)):
+            # Slice the master lists of good and bad vectors to separate 1/FoldValidation of
+            # them into a test set.  FoldValidation can be 0, to do no such splitting.
+            self.GoodVectors = []
+            self.GoodTestVectors = []
+            for X in range(len(self.MasterGoodVectors)):
+                if FoldValidation and X % FoldValidation == Fold:
+                    self.GoodTestVectors.append(self.MasterGoodVectors[X])
+                else:
+                    self.GoodVectors.append(self.MasterGoodVectors[X])
+            self.BadVectors = []
+            self.BadTestVectors = []
+            for X in range(len(self.MasterBadVectors)):
+                if FoldValidation and X % FoldValidation == Fold:
+                    self.BadTestVectors.append(self.MasterBadVectors[X])
+                else:
+                    self.BadVectors.append(self.MasterBadVectors[X])
+            ############################################################
+            # Compute the mean vectors:
+            GoodCount = float(len(self.GoodVectors))
+            BadCount = float(len(self.BadVectors))
+            AllCount = GoodCount + BadCount
+            self.MeanGood = [0]*Size
+            self.MeanBad = [0]*Size
+            self.MeanGlobal = [0]*Size
+            for Vector in self.GoodVectors:
+                for Index in range(Size):
+                    self.MeanGood[Index] += Vector[Index] / GoodCount
+                    self.MeanGlobal[Index] += Vector[Index] / AllCount
+            for Vector in self.BadVectors:
+                for Index in range(Size):
+                    self.MeanBad[Index] += Vector[Index] / BadCount
+                    self.MeanGlobal[Index] += Vector[Index] / AllCount
+            print "MeanGood:\n  ", self.MeanGood
+            print "MeanBad:\n  ", self.MeanBad
+            print "MeanGlobal:\n  ", self.MeanGlobal
+            ############################################################
+            # Compute the mean-corrected vectors:
+            MeanCorrectedGoodVectors = []
+            MeanCorrectedBadVectors = []
+            for Vector in self.GoodVectors:
+                NewVector = []
+                for X in range(Size):
+                    NewVector.append(Vector[X] - self.MeanGlobal[X])
+                MeanCorrectedGoodVectors.append(NewVector)
+            for Vector in self.BadVectors:
+                NewVector = []
+                for X in range(Size):
+                    NewVector.append(Vector[X] - self.MeanGlobal[X])
+                MeanCorrectedBadVectors.append(NewVector)
+            ############################################################
+            # Compute covariance matrices:
+            CovarArrayGood = self.GetCovarianceArray(MeanCorrectedGoodVectors)
+            if VerboseFlag:
+                print "CovarArrayGood:", CovarArrayGood
+            CovarArrayBad = self.GetCovarianceArray(MeanCorrectedBadVectors)
+            if VerboseFlag:
+                print "CovarArrayBad:", CovarArrayBad
+            # CovarArrayFull is the pooled within-group covariance matrix, it's
+            # computed componentwise as weighted sum of CovarArrayGood and CovarArrayBad.
+            CovarArrayFull = zeros((Size, Size), FloatType)
+            for X in range(Size):
+                for Y in range(Size):
+                    CovarArrayFull[X][Y] += CovarArrayGood[X][Y] * GoodCount / AllCount
+                    CovarArrayFull[X][Y] += CovarArrayBad[X][Y] * BadCount / AllCount
+            if VerboseFlag:
+                print "CovarArrayFull:", CovarArrayFull
+            ############################################################
+            # Invert the covariance array:
+            try:
+                self.CI = InvertMatrix(CovarArrayFull)
+            except:
+                traceback.print_exc()
+                print "Unable to invert covariance matrix!  Invalid feature set."
+                return 0
+            if VerboseFlag:
+                print "CI:", self.CI
+            self.GoodMuC = MatrixMulitply(self.CI, self.MeanGood)
+            if VerboseFlag:
+                print "GoodMuC:", self.GoodMuC
+            self.BadMuC = MatrixMulitply(self.CI, self.MeanBad)
+            if VerboseFlag:
+                print "BadMuC:", self.BadMuC
+            self.ConstantGood = -MatrixMulitply(self.MeanGood, self.GoodMuC) / 2.0
+            self.ConstantBad = -MatrixMulitply(self.MeanBad, self.BadMuC) / 2.0
+            if VerboseFlag:
+                print "Constant good %.4f constant bad %.4f"%(self.ConstantGood, self.ConstantBad)
+            #######################################################
+            if VerboseFlag:
+                # Print C initializers:
+                for X in range(Size):
+                    Str = "double CovInv%s[] = {"%chr(ord("A") + X)
+                    for Y in range(Size):
+                        Str += "%.3f,"%self.CI[X][Y]
+                    Str = Str[:-1] + "};"
+                    print Str
+                Str = "double MeanVectorTrue[] = {"
+                for X in range(Size):
+                    Str += "%.3f,"%self.MeanGood[X]
+                Str = Str[:-1] + "};"
+                print Str
+                Str = "double MeanVectorFalse[] = {"
+                for X in range(Size):
+                    Str += "%.3f,"%self.MeanBad[X]
+                Str = Str[:-1] + "};"
+                print Str
+                print "double SubProdTrue = (float)%.3f;"%self.ConstantGood
+                print "double SubProdFalse = (float)%.3f;"%self.ConstantBad
+                print "CG and CB:", self.ConstantGood, self.ConstantBad
+            #######################################################
+            Weights = []
+            for X in range(Size):
+                Weights.append(self.GoodMuC[X] - self.BadMuC[X])
+            Str = "*-*->Weights:"
+            for X in range(Size):
+                Str += " %s: %.4f"%(X, Weights[X])
+            print Str
+            #######################################################
+            # Compute our accuracy on the testing set:
+            if FoldValidation:
+                CorrectCount = 0
+                IncorrectCount = 0
+                for Vector in self.GoodTestVectors:
+                    NewVector = []
+                    for X in range(Size):
+                        NewVector.append(Vector[X] - self.MeanGlobal[X])
+                    Reading = self.GetReading(NewVector)
+                    if Reading > 0:
+                        CorrectCount += 1
+                    else:
+                        IncorrectCount += 1
+                for Vector in self.BadTestVectors:
+                    NewVector = []
+                    for X in range(Size):
+                        NewVector.append(Vector[X] - self.MeanGlobal[X])
+                    Reading = self.GetReading(NewVector)
+                    if Reading > 0:
+                        IncorrectCount += 1
+                    else:
+                        CorrectCount += 1
+                TotalCount = CorrectCount + IncorrectCount
+                Accuracy = CorrectCount / float(TotalCount)
+                print "Cross-validation accuracy: %d of %d (%.3f%%)"%(CorrectCount, TotalCount, Accuracy*100)
+                WorstAccuracy = min(Accuracy, WorstAccuracy)
+            else:
+                # Compute accuracy on all vectors:
+                CorrectCount = 0
+                IncorrectCount = 0
+                for Vector in MeanCorrectedGoodVectors:
+                    Reading = self.GetReading(NewVector)
+                    if Reading > 0:
+                        CorrectCount += 1
+                    else:
+                        IncorrectCount += 1
+                for Vector in MeanCorrectedBadVectors:
+                    Reading = self.GetReading(NewVector)
+                    if Reading > 0:
+                        IncorrectCount += 1
+                    else:
+                        CorrectCount += 1
+                TotalCount = CorrectCount + IncorrectCount
+                Accuracy = CorrectCount / float(TotalCount)
+                print "Accuracy: %d of %d (%.3f%%)"%(CorrectCount, TotalCount, Accuracy*100)
+                WorstAccuracy = min(Accuracy, WorstAccuracy)
+        print "Min. cross-validation accuracy: %.3f%%"%(WorstAccuracy*100)
+        return WorstAccuracy
+    def GetReading(self, Vector):
+        CIProduct = MatrixMulitply(self.CI, Vector)
+        ReadingGood = MatrixMulitply(self.MeanGood, CIProduct) + self.ConstantGood
+        ReadingBad = MatrixMulitply(self.MeanBad, CIProduct) + self.ConstantBad
+        print
+        print "Vector:", Vector
+        print "CIProduct:", CIProduct
+        print "ReadingGood %s ReadingBad %s Net %s"%(ReadingGood, ReadingBad, ReadingGood - ReadingBad)
+        return (ReadingGood - ReadingBad)
+    def ReportROCCurve(self):
+        SortedList = []
+        MeanCorrectedGoodVectors = []
+        MeanCorrectedBadVectors = []
+        PositiveCount = len(self.GoodVectors)
+        NegativeCount = len(self.BadVectors)
+        Size = len(self.GoodVectors[0])
+        for Vector in self.GoodVectors:
+            NewVector = []
+            for X in range(Size):
+                NewVector.append(Vector[X] - self.MeanGlobal[X])
+            CIProduct = MatrixMulitply(self.CI, NewVector)
+            ReadingGood = MatrixMulitply(self.MeanGood, CIProduct) + self.ConstantGood
+            ReadingBad = MatrixMulitply(self.MeanBad, CIProduct) + self.ConstantBad
+            SortedList.append((ReadingGood - ReadingBad, 1))
+        for Vector in self.BadVectors:
+            NewVector = []
+            for X in range(Size):
+                NewVector.append(Vector[X] - self.MeanGlobal[X])
+            CIProduct = MatrixMulitply(self.CI, NewVector)
+            ReadingGood = MatrixMulitply(self.MeanGood, CIProduct) + self.ConstantGood
+            ReadingBad = MatrixMulitply(self.MeanBad, CIProduct) + self.ConstantBad
+            SortedList.append((ReadingGood - ReadingBad, 0))
+        SortedList.sort()
+        SortedList.reverse()
+        TPCount = 0
+        FPCount = 0
+        Area = 0
+        ROCCurveFile = open("ROCCurve.txt", "wb")
+        for (Reading, TrueFlag) in SortedList:
+            #print Reading, TrueFlag
+            if (TrueFlag):
+                TPCount += 1
+            else:
+                FPCount += 1
+                Area += (TPCount / float(PositiveCount))
+            TPRate = TPCount / float(PositiveCount)
+            FPRate = FPCount / float(NegativeCount)
+            ROCCurveFile.write("%s\t%s\t\n"%(FPRate, TPRate))
+            if FPRate < 0.05:
+                HappyTPRate = TPRate
+        Area /= float(FPCount)
+        print "ROC curve area:", Area
+        print "TP rate for FP < 0.05: %s"%HappyTPRate
+        return Area
+    def ProducePValueCurve(self):
+        Histogram = {}
+        HistogramShort = {}
+        HistogramMedium = {}
+        HistogramLong = {}
+        MediumCutoff = 9
+        LongCutoff = 13
+        for X in range(len(self.BadVectors)):
+            Vector = self.BadVectors[X]
+            MQScore = self.GetReading(Vector)
+            Bin = int(round(MQScore * 10))
+            if not Histogram.has_key(Bin):
+                Histogram[Bin] = [0,0]
+            if not HistogramShort.has_key(Bin):
+                HistogramShort[Bin] = [0,0]
+            if not HistogramMedium.has_key(Bin):
+                HistogramMedium[Bin] = [0,0]
+            if not HistogramLong.has_key(Bin):
+                HistogramLong[Bin] = [0,0]
+            #Len = self.BadVectorPepLengths[X]
+            Histogram[Bin][0] += 1
+        PrintHistogram(Histogram, None)
+
+def FeatureSelectMain():
+    BestAccuracy = 0
+    FeatureList = [6, 49]
+    FeatureCount = 55
+    while len(FeatureList)<12:
+        BestAccuracy = 0
+        BestList = FeatureList
+        for FeatureA in range(4, FeatureCount):
+            if FeatureA in FeatureList:
+                continue
+            if FeatureA in ForbiddenFeatures:
+                continue
+            AugmentedFeatureList = FeatureList[:]
+            AugmentedFeatureList.append(FeatureA)
+            LDA = LDAClassifier()
+            LDA.PerformLDA("TrainingSet.Table.txt", 0, AugmentedFeatureList, 1, 0)
+            Accuracy = LDA.ReportROCCurve()
+            if Accuracy > BestAccuracy:
+                BestAccuracy = Accuracy
+                BestList = AugmentedFeatureList
+            print "Feature set %s has accuracy %.4f%%"%(AugmentedFeatureList, Accuracy*100)
+            print "So far...best accuracy %.4f%%, feature set %s"%(BestAccuracy*100, BestList)
+        FeatureList = BestList
+        print "Best accuracy %s, feature list %s"%(BestAccuracy, FeatureList)
+            
+
+def Main():
+    LDA = LDAClassifier()
+    LDA.PerformLDA("LDATrainingSet.txt", 0, [1, 5], 0, 0)
+    LDA.ReportROCCurve()
+    #LDA.ProducePValueCurve()
+        
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "(psyco not loaded)"
+    Main()
diff --git a/Label.py b/Label.py
new file mode 100644
index 0000000..132458c
--- /dev/null
+++ b/Label.py
@@ -0,0 +1,576 @@
+#Title:          Label.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+import sys
+import os
+import getopt
+import PyInspect
+import MakeImage
+from Utils import *
+import MSSpectrum
+import GetByteOffset
+import PLSUtils
+UsageInfo = """
+Label.py - Generate a labeled spectrum, given a peptide.
+
+Required Options
+ -r [FileName] Spectrum file
+ -b [Offset] The byte offset in the file for the spectrum, as reported in
+     the Inspect output, it can be left blank for single-spectrum
+ -a [Peptide] The annotation for the spectrum
+ -c [Charge] The charge of the peptide
+
+Additional Options
+ -w [FileName] Output file name.  Default is to temp.png
+ -v [FileName] Write verbose scoring details to the specified file
+ -d [Width]: Image width
+ -h [Height]: Image height
+ -s [ScanNumber]: Scan number
+ -x: Use black and white (for Printing)
+Example:
+    Label.py -r Sample346.ms2 -b 38289818 -a R.A+226LLAAFDFPFR.K
+"""
+
+class LabelClass:
+    def __init__(self):
+        self.SpectrumPath = None
+        self.SpectrumFilePos = 0
+        self.Peptide = None
+        self.OutputFileName = "temp.png"
+        self.VerboseFileName = None
+        self.LabeledPeaks = None
+        self.InspectFeatures = None
+        self.InspectFeatureNames = ["MQScore", "Length", "Total Cut Score", "Median Cut Score", "Y present", "B present", "Intensity in BY", "NTT"]
+        self.AutoPopUp = 1
+        self.PeptideHasPhosphorylation = 0
+        self.InstrumentType = "ESI-ION-TRAP" # or QTOF or FT-HYBRID
+        self.Charge = 0 #guessed or set by user.
+        self.ImageWidth = 600
+        self.ImageHeight = 400
+        self.ScanNumber = None
+        self.DoPLS = 0 #don't do this by default
+    def ParseCommandLineSimple(self, Arguments):
+        self.SpectrumPath = Arguments[0]
+        ColonPos = self.SpectrumPath.rfind(":")
+        try:
+            self.SpectrumFilePos = int(self.SpectrumPath[ColonPos + 1:])
+            self.SpectrumPath = self.SpectrumPath[:ColonPos]
+        except:
+            self.SpectrumFilePos = 0
+        self.Peptide = GetPeptideFromModdedName(Arguments[1])
+        if Arguments[1].find("phos") > 0:
+            self.PeptideHasPhosphorylation = 1
+        if len(Arguments) > 2:
+            self.OutputFileName = Arguments[2]
+    def ParseCommandLine(self, Arguments):
+        # Hack:
+        if len(Arguments) > 0 and Arguments[0][1] == ":":
+            return self.ParseCommandLineSimple(Arguments)
+        (Options, Args) = getopt.getopt(Arguments, "r:b:s:a:w:v:pi:c:d:h:s:xP")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-a":
+                Annotation = Value
+                self.Peptide = GetPeptideFromModdedName(Annotation)
+                if Annotation.find("phos") > 0:
+                    self.PeptideHasPhosphorylation = 1
+            elif Option == "-r":
+                self.SpectrumPath = Value
+            elif Option == "-b":
+                self.SpectrumFilePos = int(Value)
+            elif Option == "-w":
+                self.OutputFileName = Value
+            elif Option == "-v":
+                self.VerboseFileName = Value
+            elif Option == "-i":
+                self.InstrumentType = Value
+            elif Option == "-c":
+                self.Charge = int(Value)
+            elif Option == "-p":
+                self.AutoPopUp = 0
+                #secret option to supress the image from popping up to the screen
+            elif Option == "-d":
+                self.ImageWidth = int(Value)
+            elif Option == "-h":
+                self.ImageHeight = int(Value)
+            elif Option == "-s":
+                self.ScanNumber = int(Value)
+            elif Option == "-x":
+                MakeImage.SetColors(1)
+            elif Option == "-P":
+                self.DoPLS = 1
+            else:
+                raise ValueError, "* Unknown option %s"%Option
+        # Filename and annotation are required.  (Byte position is optional,
+        # since there are many single-scan .dta files out there)
+        if not OptionsSeen.has_key("-a") or not OptionsSeen.has_key("-r"):
+            print UsageInfo
+            sys.exit(1)
+    def Main(self):
+        if self.ScanNumber != None:     # scan number is provided in input
+            # get byte offset using scan number
+            Abacus = GetByteOffset.Abacus()
+            self.ScanOffset = Abacus.GetByteOffset(self.SpectrumPath)
+            self.SpectrumFilePos = self.ScanOffset[self.ScanNumber]
+            print "ByteOffset # = %s"%self.SpectrumFilePos
+        self.LabelPeaks()
+        #self.ConvertDoublyChargedPeakLabelsOLD()
+        self.LabeledPeaks = self.ConvertDoublyChargedPeakLabels(self.LabeledPeaks, self.Peptide)
+        #self.ConvertYPeakNumberingOLD()
+        self.LabeledPeaks = self.ConvertYPeakNumbering(self.LabeledPeaks)
+        self.ConvertParentLossLabels()
+        self.VerboseOutput()
+        Maker = MakeImage.MSImageMaker(Width = self.ImageWidth, Height = self.ImageHeight)
+        Maker.ConvertPeakAnnotationToImage(self.LabeledPeaks, self.OutputFileName, self.Peptide,
+            Width = self.ImageWidth, Height = self.ImageHeight)
+        #if self.AutoPopUp:
+        #    os.startfile(self.OutputFileName)
+    def VerboseOutput(self):
+        """
+        Extra output for the curious
+        1. Inspect scoring features
+        """
+        if not self.VerboseFileName:
+            return
+        ##1. Inspect scoring features
+        VerboseHandle = open(self.VerboseFileName, "wb")
+        VerboseHandle.write("M/z %f\n"%self.MZ)
+        VerboseHandle.write("Annotation %s\n"%self.Peptide.GetFullModdedName())
+        VerboseHandle.write("ParentMass: Hypothetical, Observered, Error: %.2f, %.2f, %.2f\n"%(self.HypotheticalParentMass, self.ObservedParentMass, self.ObservedParentMassError))
+        for Index in range(len(self.InspectFeatures)):
+            VerboseHandle.write("%s\t%.3f\n"%(self.InspectFeatureNames[Index],self.InspectFeatures[Index]))
+        ##If they want the Phosphate localization Score do it here.
+        if self.DoPLS:
+            PLS = self.CalculatePLS()
+            if PLS:
+                #here it is possible that we get a different peptide as winner
+                VerboseHandle.write("Phosphate Localization Score: %.3f\n"%PLS[0])
+                if len(PLS) >  1:
+                    VerboseHandle.write("WARNING: Better annotation than input. %.4f, %s"%(PLS[1], PLS[2]))
+            else:
+                VerboseHandle.write("Phosphate Localization Score: N/A\n")
+
+        ## 2. Write out the peaks found, not found
+        String = self.GetFoundPeaksTable()
+        VerboseHandle.write("\n\nPeaksFoundTable\n%s\n\n"%String)
+
+        ## 3. Dump the peak list
+        VerboseHandle.write("Mass\tIntensity\tLabel\tAminoIndex\n")
+        for Tuple in self.LabeledPeaks:
+            Label = Tuple[2] 
+            if not Label:
+                Label = "UnLabeled"
+            Str = "%f\t%f\t%s\t%d\n"%(Tuple[0], Tuple[1], Label, Tuple[3])
+            VerboseHandle.write(Str)
+        VerboseHandle.write("\n\n")
+            
+        VerboseHandle.close()
+
+    def GetFoundPeaksTable(self):
+        ## Get masses from Peptide object
+        ## mark the ones found with bold something
+        IonMasses = {} #key = IonName, value\tvalue\t
+        IonsFound = {} #key = IonNameAndIndex, value = 1 if found
+        IonNamesSorted = ("b2", "b", "y", "y2")
+        ##Mark Ions found
+        for Tuple in self.LabeledPeaks:
+            Label = Tuple[2]
+            if not Label:
+                continue
+            Label = Label.lower()
+            if Label in IonNamesSorted:
+                Index = Tuple[3]
+                Key = "%s:%s"%(Label,Index)
+                IonsFound[Key] = 1
+                #print "%s\t%s"%(Key, Tuple)
+        ##get predicted values
+        ReturnString = ""
+        for IonName in IonNamesSorted:
+            IonMasses[IonName] = "%s\t"%IonName
+            Ion = Global.AllIonDict[IonName]
+            for Index in range(1, len(self.Peptide.Masses)-1):
+                Mass = self.Peptide.Masses[Index] # offset by 1, since mass 0 is in there.
+                MassForIonType = Ion.GetPeakMass(Mass,self.Peptide.GetParentMass())
+                if IonName[0] == "b":
+                    Key = "%s:%s"%(IonName,Index)
+                else:
+                    NewIndex = len(self.Peptide.Aminos) - Index
+                    Key = "%s:%s"%(IonName,NewIndex)
+                if IonsFound.has_key(Key):
+                    #print "I found %s at mass %s"%(Key,MassForIonType)
+                    IonMasses[IonName]+="F%.3f\t"%MassForIonType
+                else:
+                    IonMasses[IonName]+="%.3f\t"%MassForIonType
+                #print "%s, %s"%(Key, MassForIonType)
+            ## Make String now that we are done with this IonName
+            ReturnString += "%s\n"%IonMasses[IonName]
+            if IonName == "b":
+                ReturnString += " \t%s\n"%self.Peptide.GetModdedName()
+        return ReturnString
+
+    def CalculatePLS(self):
+        """This function calculates the Phosphate Localization Score which is the ambuiguity score
+        for phosphorylation placement to a specific amino acid in the sequence.  This
+        is reported in Albuquerque et al.  Mol Cell Prot 2008
+        """
+        ##1. Get all the potential annotations for the peptide
+        ##      If there are none, then the score is "N/A"
+        ##2. Get the score of each alternate annotation
+        ##3. determine winner and runner up
+        ##4. calcualte PLS
+        Abacus = PLSUtils.PLSClass()
+        PotentialAnnotations = Abacus.GetAlternateAnnotations(self.Peptide)
+        
+        if len(PotentialAnnotations) == 0:
+            return None
+        ## 2. Try each individual annotation, keeping track of the top and runner up
+        BestAlternateMQScore = -10
+        RunnerUpAlternateMQScore = -10
+        BestAlternatePeakList = None
+        RunnerUpAlternatePeakList = None
+        BestAlternatePeptide = None
+        RunnerUpAlternatePeptide = None
+        for Annotation in PotentialAnnotations:
+            NewPeptide = GetPeptideFromModdedName(Annotation) # needed for peak label conversion
+            NewPeptide.Prefix = self.Peptide.Prefix
+            NewPeptide.Suffix = self.Peptide.Suffix
+            ##have to load it each time, it was not getting the correct results if I reused the same object
+            PySpectrum = PyInspect.Spectrum(self.SpectrumPath, self.SpectrumFilePos)
+            PySpectrum.SetParentMass(self.HypotheticalParentMass, self.Charge)
+            Features = PySpectrum.ScorePeptideDetailed(Annotation, self.Charge)
+            PeakAnnotations = PySpectrum.LabelPeaks(Annotation, self.Charge)
+            MQScore = Features[0]
+            #print "The score is %s, %s"%(MQScore, Annotation)
+            if MQScore > BestAlternateMQScore:
+                ##swap with runner up
+                RunnerUpAlternateMQScore = BestAlternateMQScore
+                RunnerUpAlternatePeakList = BestAlternatePeakList
+                RunnerUpAlternatePeptide = BestAlternatePeptide
+                BestAlternateMQScore = MQScore
+                BestAlternatePeakList = PeakAnnotations
+                BestAlternatePeptide = NewPeptide
+            elif MQScore > RunnerUpAlternateMQScore:
+                RunnerUpAlternateMQScore = MQScore
+                RunnerUpAlternatePeakList = PeakAnnotations
+                RunnerUpAlternatePeptide = NewPeptide
+        ## 3. Determine ther real winner and runner up.  we assume that the original inspect
+        ## annotation is right, unless something beats it by say 0.3 units HARD CODED MAGIC!!!!!!!!!!!!!!!!
+        ## this is hard coded magic, but I tested it and it performs better than range(0,1,0.1)
+        ## then we swap out the top annotation.
+        if BestAlternateMQScore > (self.MQScore + 0.3):
+            TopPeakList = BestAlternatePeakList
+            TopPeptide = BestAlternatePeptide
+            TopMQScore = BestAlternateMQScore
+            #now also consider the fate of the RU score
+            if RunnerUpAlternateMQScore > (self.MQScore + 0.3):
+                RunnerUpPeakList = RunnerUpAlternatePeakList
+                RunnerUpPeptide = RunnerUpAlternatePeptide
+                RunnerUpMQScore = RunnerUpAlternateMQScore
+            else:
+                RunnerUpPeakList = self.LabeledPeaks
+                RunnerUpPeptide = self.Peptide
+                RunnerUpMQScore = self.MQScore
+        else:
+            TopPeakList = self.LabeledPeaks
+            TopPeptide = self.Peptide
+            TopMQScore = self.MQScore
+            RunnerUpPeakList = BestAlternatePeakList
+            RunnerUpPeptide = BestAlternatePeptide
+            RunnerUpMQScore = BestAlternateMQScore
+        ## 4. Find the distinguishing peaks between the top 2 
+        #print "Winner ", TopPeptide.GetFullModdedName()
+        #print "runner up", RunnerUpPeptide.GetFullModdedName()
+        TopPeakList = self.ConvertDoublyChargedPeakLabels(TopPeakList, TopPeptide)
+        TopPeakList = self.ConvertYPeakNumbering(TopPeakList)
+        RunnerUpPeakList = self.ConvertDoublyChargedPeakLabels(RunnerUpPeakList, RunnerUpPeptide)
+        RunnerUpPeakList = self.ConvertYPeakNumbering(RunnerUpPeakList)
+        DistinguishingPeakList = Abacus.GetDistinguishingPeaks(TopPeptide, RunnerUpPeptide)
+        #print "finding peaks for %s"%TopPeptide.GetModdedName()
+        nWinner = Abacus.GetSupportingPeaks(TopPeakList, DistinguishingPeakList)
+        #print "finding peaks for %s"%RunnerUpPeptide.GetModdedName()
+        nRunnerUp = Abacus.GetSupportingPeaks(RunnerUpPeakList, DistinguishingPeakList)
+        ## 4.5 Here we take a slight detour.  If nWinner < nRunnerUp, then PLS predicts something
+        ## different from Inspect.  This happens, scoring functions will have different opinions
+        ## We simply swap the two and call it a day.
+        #print "Getting the ambuiguity score with %s, %s, %s (top, ru, total)"%(nWinner, nRunnerUp, len(DistinguishingPeakList))
+        AmbuigityScore = Abacus.ComputePLS(len(DistinguishingPeakList), nWinner, nRunnerUp)
+        if AmbuigityScore < 0:
+            ## means that nWinner < nRunnerUp
+            AmbuigityScore *= -1
+            ##now we shamelessly dump the top guy
+            TopMQScore = RunnerUpMQScore
+            TopPeptide = RunnerUpPeptide
+        #print "Ascore is %s"%AmbuigityScore
+        if not TopMQScore == self.MQScore:
+            print "WARNING::Top score was %.2f for peptide %s"%(TopMQScore, TopPeptide.GetModdedName())
+            print "\tInput was %.2f and %s"%(self.MQScore, self.Peptide.GetModdedName())
+            return (AmbuigityScore, TopMQScore, TopPeptide.GetFullModdedName())
+        return (AmbuigityScore,)
+
+            
+    def ConvertParentLossLabels(self):
+        """
+        Special case for phorphorylated spectra. Change the label
+        'Parent loss' to M-p or M-p-h2o.
+        """
+        PhosLoss = 98.0 / self.Charge
+        PhosWaterLoss = 116.0 / self.Charge
+        PhosLabel = "M-p"
+        PhosWaterLabel = "M-p-h2o"
+        Error = 3.0
+        for Index in range(len(self.LabeledPeaks)):
+            Tuple = self.LabeledPeaks[Index]
+            Label = Tuple[2]
+            if not Label == "Parent loss":
+                continue
+            Mass = Tuple[0]
+            Diff = abs(Mass - self.MZ)
+            MaybePhosLoss = abs(Diff - PhosLoss)
+            #print Tuple
+            if MaybePhosLoss < Error:
+                NewTuple = (Tuple[0], Tuple[1], PhosLabel, Tuple[3])
+                self.LabeledPeaks[Index] = NewTuple
+                #print self.LabeledPeaks[Index]
+                continue
+            MaybePhosWaterLoss = abs(Diff - PhosWaterLoss)
+            if MaybePhosWaterLoss < Error:
+                NewTuple = (Tuple[0], Tuple[1], PhosWaterLabel, Tuple[3])
+                self.LabeledPeaks[Index] = NewTuple
+                #print self.LabeledPeaks[Index]
+    def ConvertDoublyChargedPeakLabelsOLD(self):
+        """
+        The inspect output does not distinguish between single, and doubly charged peaks.
+        so in order for labeling to go well, we have to rewrite the labels as B2, Y2, etc
+        """
+        for Index in range(len(self.LabeledPeaks)):
+            Tuple = self.LabeledPeaks[Index]
+            Label = Tuple[2]
+            TupleMass = Tuple[0]
+            AminoIndex = Tuple[3]
+            NewLabel = None
+            PeptideMass = self.Peptide.Masses[AminoIndex]
+            if Label == "B":
+                PeptideMass += 1.0
+                if abs(PeptideMass - TupleMass) > 5:
+                    NewLabel = "B2"
+            if Label == "Y":
+                PeptideMass = self.Peptide.GetParentMass() - PeptideMass
+                if abs(PeptideMass - TupleMass) > 5:
+                    #a doublycharged peak, no isotope or error is this big
+                    NewLabel = "Y2"
+            if Label == "Y loss":  #hacky, but I can't think of a good way
+                YPeptideMass = self.Peptide.GetParentMass() - PeptideMass
+                Found = 0
+                for CommonLoss in [17, 18, 98]:
+                    YLossMass = YPeptideMass - CommonLoss
+                    if abs(YLossMass - TupleMass) < 5:
+                        Found = 1
+                        break
+                if not Found:
+                    NewLabel = "Y2 Loss"
+            if Label == "B loss":  #hacky, but I can't think of a good way
+                BPeptideMass = PeptideMass + 1
+                Found = 0
+                for CommonLoss in [17, 18, 98]:
+                    BLossMass = BPeptideMass - CommonLoss
+                    if abs(BLossMass - TupleMass) < 5:
+                        Found = 1
+                        break
+                if not Found:
+                    NewLabel = "B2 Loss"
+
+            if NewLabel:
+                NewTuple = (Tuple[0], Tuple[1], NewLabel, Tuple[3])
+                self.LabeledPeaks[Index] = NewTuple
+    def ConvertDoublyChargedPeakLabels(self, Peaks, Peptide):
+        """
+        The inspect output does not distinguish between single, and doubly charged peaks.
+        so in order for labeling to go well, we have to rewrite the labels as B2, Y2, etc
+        """
+        for Index in range(len(Peaks)):
+            Tuple = Peaks[Index]
+            Label = Tuple[2]
+            TupleMass = Tuple[0]
+            AminoIndex = Tuple[3]
+            if abs(Tuple[0] - 402) < 1:
+                Verbose = 1
+            else:
+                Verbose = 0
+            NewLabel = None
+            PeptideMass = Peptide.Masses[AminoIndex]
+            if Label == "B":
+                PeptideMass += 1.0
+                if abs(PeptideMass - TupleMass) > 5:
+                    NewLabel = "B2"
+            if Label == "Y":
+                PeptideMass = Peptide.GetParentMass() - PeptideMass
+                if abs(PeptideMass - TupleMass) > 5:
+                    #a doublycharged peak, no isotope or error is this big
+                    NewLabel = "Y2"
+            if Label == "Y loss":  #hacky, but I can't think of a good way
+                YPeptideMass = Peptide.GetParentMass() - PeptideMass
+                Found = 0
+                for CommonLoss in [17, 18, 98]:
+                    YLossMass = YPeptideMass - CommonLoss
+                    if abs(YLossMass - TupleMass) < 5:
+                        Found = 1
+                        break
+                if not Found:
+                    NewLabel = "Y2 Loss"
+            if Label == "B loss":  #hacky, but I can't think of a good way
+                BPeptideMass = PeptideMass + 1
+                Found = 0
+                for CommonLoss in [17, 18, 98]:
+                    BLossMass = BPeptideMass - CommonLoss
+                    if abs(BLossMass - TupleMass) < 5:
+                        Found = 1
+                        break
+                if not Found:
+                    NewLabel = "B2 Loss"
+
+            if NewLabel:
+                NewTuple = (Tuple[0], Tuple[1], NewLabel, Tuple[3])
+                Peaks[Index] = NewTuple
+        return Peaks
+
+    def ConvertYPeakNumberingOLD(self):
+        """
+        The amino indicies are numbered from the N- to C-terminus, but MakeImage numbers
+        its Y peaks from y1 (nearest the C-terminus) upwards.  We re-number them here.
+        """
+        TempList = self.LabeledPeaks
+        self.LabeledPeaks = [] #clean it out
+        for Tuple in TempList:
+            Label = Tuple[2]
+            if not Label: #not a labeled peak.  proceede normally
+                self.LabeledPeaks.append(Tuple)        
+                continue
+            if not Label[0] == "Y":
+                self.LabeledPeaks.append(Tuple)
+                continue
+            ## should only have y derivates here.  switch indices
+            AminoIndex = Tuple[-1]
+            #print Tuple
+            NewIndex = len(self.Peptide.Aminos) - AminoIndex
+            NewTuple = (Tuple[0], Tuple[1], Tuple[2], NewIndex)
+            self.LabeledPeaks.append(NewTuple)
+    def ConvertYPeakNumbering(self, Peaks):
+        """ SAME, just takes a parameter.  I know it's messy.
+        The amino indicies are numbered from the N- to C-terminus, but MakeImage numbers
+        its Y peaks from y1 (nearest the C-terminus) upwards.  We re-number them here.
+        """
+        TempList = Peaks
+        Peaks = [] #clean it out
+        for Tuple in TempList:
+            Label = Tuple[2]
+            if not Label: #not a labeled peak.  proceede normally
+                Peaks.append(Tuple)        
+                continue
+            if not Label[0] == "Y":
+                Peaks.append(Tuple)
+                continue
+            ## should only have y derivates here.  switch indices
+            AminoIndex = Tuple[-1]
+            #print Tuple
+            NewIndex = len(self.Peptide.Aminos) - AminoIndex
+            NewTuple = (Tuple[0], Tuple[1], Tuple[2], NewIndex)
+            Peaks.append(NewTuple)
+        return Peaks
+            
+    def LabelPeaks(self):
+        """
+        Uses PyInspect to label peaks in the spectrum according to Inspect's scoring
+        PyInspect will always be current, so let's use it.
+        """
+        ## load a spectrum, set charge, parent mass, then label the peaks
+        PySpectrum = PyInspect.Spectrum(self.SpectrumPath, self.SpectrumFilePos)
+        self.MZ = PySpectrum.GetMZ()
+        #print "m/z is %f"%self.MZ
+        ParentMass = self.Peptide.GetParentMass()
+        if not self.Charge:  ## Guess charge if not input
+            BestDiff = 99999
+            for Charge in range(1, 5):
+                ParentMassFromCharge = self.MZ * Charge - (Charge - 1)*1.0078
+                Diff = abs(ParentMass - ParentMassFromCharge)
+                if Diff < BestDiff:
+                    BestDiff = Diff
+                    BestCharge = Charge
+                    BestMass = ParentMassFromCharge
+            self.Charge = BestCharge
+            print "Appears to be charge %d with mass %.2f (oracle %.2f, error %.2f)"%(self.Charge, BestMass, ParentMass, BestDiff)
+            if BestDiff > 5:
+                print "\n** WARNING: Parent mass is off by %.2f!\n"%BestDiff
+        else: #chage given, calculate observed mass
+            BestMass = self.MZ * self.Charge - (self.Charge - 1)*1.0078
+        self.HypotheticalParentMass = ParentMass
+        self.ObservedParentMass = BestMass
+        self.ObservedParentMassError = abs(self.HypotheticalParentMass - self.ObservedParentMass)
+        PySpectrum.SetParentMass(ParentMass, self.Charge)
+        Annotation = self.Peptide.GetModdedName() # lacks prefix/suffix
+        ## self.LabeledPeaks is list of (Mass, intensity, ion, amino index)
+        self.LabeledPeaks = PySpectrum.LabelPeaks(Annotation, self.Charge) 
+        if self.VerboseFileName:
+            self.InspectFeatures = PySpectrum.ScorePeptideDetailed(Annotation, self.Charge)
+            print "The MQScore for %s is %f"%(Annotation, self.InspectFeatures[0])
+            self.MQScore = self.InspectFeatures[0] 
+
+
+def LabelSpectrum(Spectrum, Peptide, PeakTolerance):
+    Labeler = LabelClass()
+    Labeler.Peptide = Peptide
+    Labeler.SpectrumPath = Spectrum.FilePath
+    Labeler.SpectrumFilePos = Spectrum.FilePos
+    #print "Label.LabelSpectrum(%s:%s)"%(Spectrum.FilePath, Spectrum.FilePos)
+    Labeler.LabelPeaks()
+    # Paired iteration through Spectrum.Peaks and Labeler.LabeledPeaks:
+    IndexA = 0
+    IndexB = 0
+    while IndexA < len(Spectrum.Peaks) and IndexB < len(Labeler.LabeledPeaks):
+        Diff = Spectrum.Peaks[IndexA].Mass - Labeler.LabeledPeaks[IndexB][0]
+        if Diff > 0.01:
+            # Mass A is too large; let B catch up
+            IndexB += 1
+            continue
+        if Diff < 0.01:
+            # Mass A is too small; iterate forward
+            IndexA += 1
+            continue
+        Spectrum.Peaks[PeakIndex].IonType = Labeler.LabeledPeaks[IndexB][2]
+        Spectrum.Peaks[PeakIndex].AminoIndex = Labeler.LabeledPeaks[IndexB][3]
+    return Spectrum
+
+if __name__ == "__main__":
+    Dymo = LabelClass()
+    Dymo.ParseCommandLine(sys.argv[1:])
+    Dymo.Main()
diff --git a/Learning.py b/Learning.py
new file mode 100644
index 0000000..2562565
--- /dev/null
+++ b/Learning.py
@@ -0,0 +1,1276 @@
+#Title:          Learning.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+The LearnerClass is an abstract machine learner.  It can be trained,
+saved, and loaded.
+"""
+import os
+import sys
+import struct
+import random
+import math
+import traceback
+import cPickle
+import LDA
+import RunPySVM
+try:
+    import PySVM
+except:
+    print "(Warning: PySVM not present!)"
+
+try:
+    from numpy import *
+    import numpy.linalg
+    FloatType = float
+    MatrixMultiply = dot
+    InvertMatrix = numpy.linalg.inv
+except:
+    print "\n* Warning: Unable to import NumPy.  Logit training not available"
+    print "  Please install NumPy (see http://numpy.scipy.org/ for details)"
+    print "  Error details are shown here:"
+    traceback.print_exc()
+    
+random.seed(1)
+
+MaxSVMFeatureCount = 500
+
+if sys.platform == "win32":
+    PATH_SVMSCALE = r"C:\libsvm\windows\svmscale.exe"
+    PATH_SVMTRAIN = r"C:\libsvm\windows\svmtrain.exe"
+else:
+    PATH_SVMSCALE = os.path.join(os.environ["HOME"], "libsvm", "svm-scale")
+    PATH_SVMTRAIN = os.path.join(os.environ["HOME"], "libsvm", "svm-train")
+
+SQRT2PI = math.sqrt(2 * math.pi)
+SQRT2 = math.sqrt(2)
+Cof = [76.18009172947146, -86.50532032941677,
+    24.01409824083091, -1.231739572450155, 
+    0.1208650973866179e-2, -0.5395239384952e-5]
+
+def Gamma(Z):
+    X = Z
+    Y = Z
+    Temp = X + 5.5
+    Temp -= (X + 0.5) * math.log(Temp)
+    Ser = 1.000000000190015
+    for J in range(6):
+        Y += 1
+        Ser += Cof[J] / Y
+    Z = -Temp + math.log(2.5066282746310005 * Ser / X)
+    return math.exp(Z)
+
+
+
+
+class MixtureModelClass:
+    def __init__(self, BinMultiplier = 10.0):
+        self.BinMultiplier = BinMultiplier
+    def Model(self, Values, Histogram = None):
+        if Values:
+            print "Model scores.  Range is %s...%s"%(min(Values), max(Values))
+        else:
+            if not Histogram.keys():
+                # There's nothing to model!
+                self.MinBin = 0
+                self.MaxBin = 0
+                self.OddsTrue = {}
+                return
+            print "Model scores.  Range is %s...%s"%(min(Histogram.keys()), max(Histogram.keys()))
+        self.MaxCycleCount = 300
+        self.VerboseFlag = 0     
+        if Histogram:
+            self.ScoreHistogram = Histogram
+        else:
+            self.ScoreHistogram = {}
+            for Value in Values:
+                Bin = int(round(Value * self.BinMultiplier))
+                self.ScoreHistogram[Bin] = self.ScoreHistogram.get(Bin, 0) + 1
+        Keys = self.ScoreHistogram.keys()
+        self.MinBin = min(Keys)
+        self.MaxBin = max(Keys) + 1
+        self.InitializeModel()
+        try:
+            self.ModelDistribution()
+        except:
+            print "* Warning: Unable to compute p-values via mixture model"
+            print "* Error trace follows:"
+            traceback.print_exc()
+            print "self.VarianceFalse:", self.VarianceFalse
+            print "self.VarianceTrue:", self.VarianceTrue
+            print "MeanFalse:", self.MeanFalse
+            print "GammaOffset:", self.GammaOffset
+            print "ThetaFalse:", self.ThetaFalse
+            print "KFalse:", self.KFalse
+    def GetOddsTrue(self, X):
+##        if self.CumulativeFlag:
+##            # Set our odds using the cumulative probability p(score >= X) instead of
+##            # the odds that the score is in this bin.
+##            ErfArg = (X - self.MeanTrue) / (self.StdDevTrue * SQRT2)
+##            NormalCDF = 0.5 + 0.5 * PyInspect.erf(ErfArg)
+##            GX = max(0.01, X + self.GammaOffset)
+##            GammaCDF = PyInspect.GammaIncomplete(self.KFalse, GX / self.ThetaFalse) #/ Gamma(self.KFalse)
+##            TrueNormal = 1.0 - NormalCDF
+##            FalseGamma = 1.0 - GammaCDF
+##        else:
+        Pow = - ((X - self.MeanTrue)**2) / (2 * self.VarianceTrue)
+        TrueNormal = math.exp(Pow) / (self.StdDevTrue * SQRT2PI)
+        GX = max(0.01, X + self.GammaOffset)
+        
+        
+        FalseGamma = math.pow(GX, self.KFalse - 1) * math.exp(-GX / self.ThetaFalse) / self.GammaDemonFalse
+        # Special patch-up code:
+        # Toward the edges of the mixture model, odd behavior may occur where one curve falls off
+        # slower than the other.  We force very low scores to get a bad odds-true, and very
+        # high scores to get a good odds-true.
+        if X < self.MeanTrue - self.VarianceTrue:
+            FalseGamma = max(FalseGamma, 0.001)
+        if X > self.MeanTrue + self.VarianceTrue:
+            TrueNormal = max(TrueNormal, 0.001)
+        OddsTrue = (TrueNormal * self.PriorProbabilityTrue) / (TrueNormal * self.PriorProbabilityTrue + FalseGamma * (1 - self.PriorProbabilityTrue))
+        return OddsTrue
+    def InitializeModel(self):
+        # Initialize mixture model:
+        MinValue = self.MinBin / self.BinMultiplier
+        MaxValue = self.MaxBin / self.BinMultiplier
+        self.MeanFalse = MinValue + (MaxValue - MinValue) * 0.25
+        self.MeanTrue = MaxValue - (MaxValue - MinValue) * 0.25
+        self.VarianceFalse = (MaxValue - MinValue) * 0.1
+        self.VarianceTrue = (MaxValue - MinValue) * 0.1
+        self.PriorProbabilityTrue = 0.1
+        if MinValue < 0:
+            self.GammaOffset = -MinValue
+        elif MinValue > 0.1:
+            self.GammaOffset = -MinValue
+        else:
+            self.GammaOffset = 0
+        self.OddsTrue = {}
+    def ModelDistribution(self):
+        self.ThetaFalse = self.VarianceFalse / (self.MeanFalse + self.GammaOffset)
+        self.StdDevTrue = math.sqrt(self.VarianceTrue)
+        self.KFalse = (self.MeanFalse + self.GammaOffset) / self.ThetaFalse
+        self.GammaDemonFalse = math.pow(self.ThetaFalse, self.KFalse) * Gamma(self.KFalse)
+        for Cycle in range(self.MaxCycleCount):
+            self.Cycle = Cycle
+            self.EstimateOddsTrue()
+            self.ComputeDistributionParameters()
+    def EstimateOddsTrue(self):
+        """
+        One half of the E/M cycle: Estimate the probability true for each bin.
+        """
+        # For each bin, compute the probability that it's true:
+        BestOddsTrue = 0
+        for Bin in range(self.MinBin, self.MaxBin):
+            X = Bin / self.BinMultiplier
+            self.OddsTrue[Bin] = self.GetOddsTrue(X)
+            # Somewhat hacky: If the left tail of the normal distribution falls off more slowly
+            # than that of the gamma distribution, the value of OddsTrue is often rather
+            # high for these (very bad!) bins.  We fix that.
+            if Bin < 0:
+                self.OddsTrue[Bin] = min(self.OddsTrue[Bin], 1 / float(-Bin))
+            # Somewhat hacky: If the right tail of the normal distribution falls off too quickly,
+            # then the odds true will decay:
+            BestOddsTrue = max(BestOddsTrue, self.OddsTrue[Bin])
+            if X >= self.MeanTrue:
+                self.OddsTrue[Bin] = max(BestOddsTrue, self.OddsTrue[Bin])
+            #print "%s: %s"%(X, self.OddsTrue[Bin])
+    def ComputeDistributionParameters(self):
+        """
+        One half of the E/M cycle: Optimize the distribution parameters.
+        """
+        # Compute the new mean and variance for the true and the false distributions:
+        self.CountTrue = 0
+        self.MeanTrue = 0
+        self.CountFalse = 0
+        self.MeanFalse = 0
+        for Bin in range(self.MinBin, self.MaxBin):
+            X = Bin / self.BinMultiplier
+            Count = self.ScoreHistogram.get(Bin, 0)
+            self.MeanTrue += X * self.OddsTrue[Bin] * Count
+            self.CountTrue += self.OddsTrue[Bin] * Count
+            self.MeanFalse += X * (1.0 - self.OddsTrue[Bin]) * Count
+            self.CountFalse += (1.0 - self.OddsTrue[Bin]) * Count
+        if self.CountTrue <= 0 or self.CountFalse <= 0:
+            print "** Error: Unable to fit mixture model.  Appears to be %s true and %s false matches."%(self.CountTrue, self.CountFalse)
+            return 0
+        self.MeanTrue /= self.CountTrue
+        self.MeanFalse /= self.CountFalse
+        self.PriorProbabilityTrue = self.CountTrue / (self.CountTrue + self.CountFalse)
+        # Adjust GammaOffset, if the false distribution's mean is getting close to 0:
+        if self.MeanFalse + self.GammaOffset < 0.1:
+            print "False distribution mean is small; BUMP gamma offset up"
+            self.GammaOffset += 0.5
+        ##################################
+        # Compute the new variation for the true and the false distributions:
+        self.VarianceTrue = 0
+        self.VarianceFalse = 0
+        for Bin in range(self.MinBin, self.MaxBin):
+            X = Bin / self.BinMultiplier
+            Count = self.ScoreHistogram.get(Bin, 0)
+            try:
+                self.VarianceTrue += (X - self.MeanTrue)**2 * Count * self.OddsTrue[Bin]
+                self.VarianceFalse += (X - self.MeanFalse)**2 * Count * (1.0 - self.OddsTrue[Bin])
+            except:
+                print X
+                print self.MeanTrue
+                print self.MeanFalse
+                print self.OddsTrue[Bin]
+                raise
+        self.VarianceTrue /= self.CountTrue
+        self.StdDevTrue = math.sqrt(self.VarianceTrue)
+        self.VarianceFalse /= self.CountFalse
+        #print "  True mean %.4f var %.4f"%(self.MeanTrue, self.VarianceTrue)
+        #print " False mean %.4f var %.4f"%(self.MeanFalse, self.VarianceFalse)
+        self.ThetaFalse = self.VarianceFalse / (self.MeanFalse + self.GammaOffset)
+        self.KFalse = (self.MeanFalse + self.GammaOffset) / self.ThetaFalse
+        self.GammaDemonFalse = math.pow(self.ThetaFalse, self.KFalse) * Gamma(self.KFalse)
+        if self.VerboseFlag:
+            print "-----------------------"
+            print "Cycle %s report:"%self.Cycle
+            print "Theta %.4f K %.4f GammaDenominator %.8f GammaOffset %.2f"%(self.ThetaFalse, self.KFalse, self.GammaDemonFalse, self.GammaOffset)
+            print "True: Count %s mean %s variance %s"%(self.CountTrue, self.MeanTrue, self.VarianceTrue)
+            print "False: Count %s mean %s variance %s"%(self.CountFalse, self.MeanFalse, self.VarianceFalse)
+            print "Prior probability true: %s"%self.PriorProbabilityTrue
+    def PlotDistribution(self, FileName):
+        File = open(FileName, "wb")
+        Header = "Bin\tValue\tHistogram\tOddsTrue\tTrueNormal\tFalseGamma\tMixture\t"
+        File.write(Header + "\n")
+        for Bin in range(self.MinBin, self.MaxBin):
+            Str = "%s\t%s\t"%(Bin, Bin / self.BinMultiplier)
+            Str += "%s\t%s\t"%(self.ScoreHistogram.get(Bin, 0), self.OddsTrue[Bin])
+            X = Bin / self.BinMultiplier
+            # Plot gamma and normal curves:
+            Pow = - ((X - self.MeanTrue)**2) / (2 * self.VarianceTrue)
+            TrueNormal = math.exp(Pow) / (self.StdDevTrue * SQRT2PI)
+            GX = max(0.01, X + self.GammaOffset)
+            FalseGamma = math.pow(GX, self.KFalse - 1) * math.exp(-GX / self.ThetaFalse) / self.GammaDemonFalse
+            Str += "%s\t%s\t"%(TrueNormal, FalseGamma)
+##            # Plot gamma and noraml CDF:
+##            ErfArg = (X - self.MeanTrue) / (self.StdDevTrue * SQRT2)
+##            NormalCDF = 0.5 + 0.5 * PyInspect.erf(ErfArg)
+##            GX = max(0.01, X + self.GammaOffset)
+##            GammaCDF = PyInspect.GammaIncomplete(self.KFalse, GX / self.ThetaFalse) #/ Gamma(self.KFalse)
+##            Str += "%s\t%s\t"%(NormalCDF, GammaCDF)
+            MergedMixture = TrueNormal * self.PriorProbabilityTrue
+            MergedMixture += FalseGamma * (1.0 - self.PriorProbabilityTrue)
+            Str += "%s\t"%(MergedMixture)
+            File.write(Str + "\n")
+    def PickleSelf(self, File):
+        cPickle.dump(self.BinMultiplier, File)
+        cPickle.dump(self.PriorProbabilityTrue, File)
+        cPickle.dump(self.MeanTrue, File)
+        cPickle.dump(self.VarianceTrue, File)
+        cPickle.dump(self.StdDevTrue, File)
+        cPickle.dump(self.GammaOffset, File)
+        cPickle.dump(self.KFalse, File)
+        cPickle.dump(self.ThetaFalse, File)
+        cPickle.dump(self.GammaDemonFalse, File)
+
+def UnpickleMixtureModel(File):
+    Model = MixtureModelClass()
+    Model.BinMultiplier = cPickle.load(File)
+    Model.PriorProbabilityTrue = cPickle.load(File)
+    Model.MeanTrue = cPickle.load(File)
+    Model.VarianceTrue = cPickle.load(File)
+    Model.StdDevTrue = cPickle.load(File)
+    Model.GammaOffset = cPickle.load(File)
+    Model.KFalse = cPickle.load(File)
+    Model.ThetaFalse = cPickle.load(File)
+    Model.GammaDemonFalse = cPickle.load(File)
+    return Model
+
+class FeatureVector:
+    def __init__(self):
+        self.FileBits = []
+        self.Features = []
+        self.ScaledFeatures = None
+        self.TrueFlag = 0
+        self.Score = 0 # as assigned by an owning model
+        
+class FeatureSetClass:
+    """
+    A feature-set is a list of TRUE tuples, and a list of FALSE tuples.  Normally there
+    is a FeatureSetClass for testing, and one for training.
+    """
+    def __init__(self):
+        self.TrueVectors = []
+        self.FalseVectors = []
+        self.AllVectors = []
+        self.TrueCount = 0
+        self.FalseCount = 0
+        self.Count = 0
+        self.PriorProbabilityFalse = 0.5
+    def SetCounts(self):
+        self.Count = len(self.AllVectors)
+        self.TrueCount = len(self.TrueVectors)
+        self.FalseCount = len(self.FalseVectors)
+        if len(self.AllVectors):
+            self.Size = len(self.AllVectors[0].Features)
+    def FindFeatureRanges(self):
+        """
+        Simple scaling function: Find min and max values to push features into [-1, 1]
+        """
+        Values = []
+        Vector = self.AllVectors[0]
+        Size = len(Vector.Features)
+        print "SIZE:", Size
+        for X in range(Size):
+            Values.append([])
+        for Vector in self.AllVectors:
+            for X in range(Size):
+                Values[X].append(Vector.Features[X])
+        self.MinValues = []
+        self.MaxValues = []
+        for X in range(Size):
+            Values[X].sort()
+            ValIndex = int(round(len(Values[X]) * 0.025))
+            MinValue = Values[X][ValIndex]
+            self.MinValues.append(MinValue)
+            ValIndex = int(round(len(Values[X]) * 0.975))
+            MaxValue = Values[X][ValIndex]
+            self.MaxValues.append(MaxValue)
+##        print "Range:"
+##        for X in range(Size):
+##            print "%s: %.4f-%.4f"%(X, self.MinValues[X], self.MaxValues[X])
+        pass
+    def ScaleFeatures(self):
+        """
+        Simple scaling function: Pushes 90% of feature values into the range [-1, 1].
+        Assumes that self.MinValues and self.MaxValues are set!
+        """
+        self.Size = len(self.AllVectors[0].Features)
+        for Vector in self.AllVectors:
+            Vector.ScaledFeatures = [0]*self.Size
+        for X in range(self.Size):
+            HalfRange = (self.MaxValues[X] - self.MinValues[X]) / 2.0
+            if not HalfRange:
+                continue
+            #print "Feature %s: Range %s...%s"%(X, self.MinValues[X], self.MaxValues[X])
+            for Vector in self.AllVectors:
+                Vector.ScaledFeatures[X] = (Vector.Features[X] - self.MinValues[X]) / HalfRange - 1.0
+    def __str__(self):
+        return "<%sT %sF>"%(len(self.TrueVectors), len(self.FalseVectors))
+    def GetPriorProbabilityFalse(self, DBTrueToFalseRatio):
+        """
+        Compute the prior probability that an arbitrary peptide is false.
+        """
+        # In a 1:1 database, there's 1 bogus peptide in a valid protein
+        # for each bogus peptide in an invalid protein; in that case, DBTrueToFalseRatio is 1.0
+        # In a 1:99 database, the ratio is 1/99.
+        FalseWithinTrue = self.FalseCount * DBTrueToFalseRatio
+        if FalseWithinTrue >= self.TrueCount:
+            # Uh-oh; there are FEWER peptides from valid proteins than we would expect
+            # to see by chance!  Let's (arbitrarily) cap the prior probability false
+            # at 99%.
+            print "Warning: FalseWithinTrue = %s >= %s!"%(FalseWithinTrue, self.TrueCount)
+            self.PriorProbabilityFalse = 0.99
+            return
+        VectorCount = len(self.AllVectors)
+        self.PriorProbabilityFalse = (VectorCount - (self.TrueCount - FalseWithinTrue)) / float(VectorCount)
+        print "==>>PriorProbabilityFalse: %s"%(self.PriorProbabilityFalse)
+    def SaveTabDelimited(self, File):
+        if type(File) == type(""):
+            File = open(File, "wb")
+            CloseFlag = 1
+        else:
+            CloseFlag = 0
+        #File = open(FileName, "wb")
+        for VectorIndex in range(len(self.AllVectors)):
+            Vector = self.AllVectors[VectorIndex]
+            String = "%s\t%s\t"%(VectorIndex, Vector.TrueFlag)
+            for Value in Vector.Features:
+                String += "%s\t"%Value
+            File.write(String + "\n")
+        if CloseFlag:
+            File.close()
+
+def LoadGeneralModel(FileName):
+    File = open(FileName, "rb")
+    ModelType = cPickle.load(File)
+    File.close()
+    if ModelType == "LDA":
+        Model = LDAModel()
+        Model.LoadModel(FileName)
+    elif ModelType == "SVM":
+        Model = SVMModel()
+        Model.LoadModel(FileName)
+    elif ModelType == "LOGIT":
+        Model = LogitModel()
+        Model.LoadModel(FileName)
+    else:
+        print "** Error: Unable to load model type '%s'"%ModelType
+        return None
+    return Model
+
+class LearnerClass:
+    def __init__(self, FeatureList = None):
+        # The entries in FeatureList are indices into the
+        # available features of our training and testing sets.
+        self.FeatureList = FeatureList
+        # OddsTrue[Bin] = probability that an instance with a score
+        # in this bin or HIGHER is correct.
+        self.OddsTrue = {}
+        self.PValue = {}
+        # Bin = int(round(Score * self.BinScalingFactor))
+        self.BinScalingFactor = 10
+        self.MixtureModel = None
+    def SaveModel(self, FileName):
+        raise ValueError, "Abstract method - override in subclass!"
+    def LoadModel(self, FileName):
+        raise ValueError, "Abstract method - override in subclass!"
+    def Train(self, FeatureSet):
+        raise ValueError, "Abstract method - override in subclass!"
+    def Test(self, FeatureSet):
+        raise ValueError, "Abstract method - override in subclass!"
+    def ReportROC(self, FeatureSet, OutputFileName = None):
+        SortedList = []
+        for Vector in FeatureSet.AllVectors:
+            SortedList.append((Vector.Score, random.random(), Vector))
+        SortedList.sort()
+        SortedList.reverse()
+        OverallTrueCount = 0
+        OverallFalseCount = 0
+        # If there are many many vectors, then we'll end up with an unwieldy curve that's
+        # awkward to plot.  So, consider PHASING things:
+        Slice = (len(SortedList) / 30000) + 1 # 2 or larger
+        OldSortedList = SortedList
+        SortedList = []
+        print "SLICE roc-curve list: Take every %sth entry"%Slice
+        for X in range(len(OldSortedList)):
+            if X % Slice == 0:
+                SortedList.append(OldSortedList[X])
+                Vector = OldSortedList[X][-1]
+                if Vector.TrueFlag:
+                    OverallTrueCount += 1
+                else:
+                    OverallFalseCount += 1
+        OldSortedList = None
+        TrueCount = 0
+        FalseCount = 0
+        if OutputFileName:
+            ROCCurvePlotFile = open(OutputFileName, "wb")
+        RowCount = 0
+        ROCTPForFP = {}
+        ROCTPForFPCount = {}
+        for (Score, Dummy, Vector) in SortedList:
+            RowCount += 1
+            if Vector.TrueFlag:
+                TrueCount += 1
+            else:
+                FalseCount += 1
+            OverallTPRate = TrueCount / float(max(1, OverallTrueCount))
+            OverallFPRate = FalseCount / float(max(1, OverallFalseCount))
+            Bin = int(round(OverallFPRate * 100))
+            ROCTPForFP[Bin] = ROCTPForFP.get(Bin, 0) + OverallTPRate
+            ROCTPForFPCount[Bin] = ROCTPForFPCount.get(Bin, 0) + 1
+            if OutputFileName:
+                ROCCurvePlotFile.write("%s\t%s\t%s\t%s\t%s\t\n"%(RowCount, TrueCount, FalseCount, OverallFPRate, OverallTPRate))
+        if OutputFileName:
+            ROCCurvePlotFile.close()
+        # Compute the area under the ROC curve.
+        for Bin in range(0, 100):
+            if ROCTPForFP.has_key(Bin):
+                ROCTPForFP[Bin] /= float(ROCTPForFPCount[Bin])
+        ROCArea = 0
+        for Bin in range(0, 100):
+            if ROCTPForFP.has_key(Bin):
+                ROCArea += 0.01 * ROCTPForFP[Bin]
+                #print "%s: %s"%(Bin, ROCTPForFP[Bin])
+            else:
+                # Interpolate between points:
+                PrevX = 0 # default
+                PrevY = 0 # default
+                for PrevBin in range(Bin - 1, -1, -1):
+                    if ROCTPForFP.has_key(PrevBin):
+                        PrevX = PrevBin
+                        PrevY = ROCTPForFP[PrevBin]
+                        break
+                NextX = 100
+                NextY = 1
+                for NextBin in range(Bin + 1, 101):
+                    if ROCTPForFP.has_key(NextBin):
+                        NextX = NextBin
+                        NextY = ROCTPForFP[NextBin]
+                        break
+                InterpolatedValue = PrevY + (Bin - PrevX) * float(NextY - PrevY) / (NextX - PrevX)
+                ROCArea += 0.01 * InterpolatedValue
+        print "ROC curve area:", ROCArea
+    def ReportAccuracy(self, FeatureSet, ROCFilePath = None):
+        """
+        Called after Test(FeatureSet), to measure how well we did at separating
+        the true and false vectors by score.  Compute OddsTrue, as well.
+        """
+        SortedList = []
+        for Vector in FeatureSet.AllVectors:
+            SortedList.append((Vector.Score, Vector))
+        # sort from HIGHEST to LOWEST score:
+        SortedList.sort()
+        SortedList.reverse()
+        #self.ComputeOddsTrue(SortedList)
+        self.ComputePValues(SortedList)
+        #self.ComputePValues(SortedList, FeatureSet.PriorProbabilityFalse)
+        Rates = [0.05, 0.01, 0.1, 0.5, 0.005]
+        CountsByRate = [0, 0, 0, 0, 0, 0]
+        CumulativeTrue = 0
+        CumulativeFalse = 0
+        Cumulative = 0
+        PrevScore = None
+        for (Score, Vector) in SortedList:
+            if Vector.TrueFlag:
+                CumulativeTrue += 1
+            else:
+                CumulativeFalse += 1
+            Cumulative += 1
+            FractionFalse = CumulativeFalse / float(Cumulative)
+            if Score != PrevScore:
+                for RateIndex in range(len(Rates)):
+                    if FractionFalse < Rates[RateIndex]:
+                        CountsByRate[RateIndex] = CumulativeTrue
+                #print Score, FractionFalse, CumulativeTrue, CumulativeFalse
+            PrevScore = Score
+        print "Counts by FDRate: %d at 5%% %d at 1%% %d at 10%% %d at 50%%"%(CountsByRate[0], CountsByRate[1], CountsByRate[2], CountsByRate[3])
+        SensitivityByRate = []
+        for Count in CountsByRate:
+            SensitivityByRate.append(100 * Count / float(max(1, FeatureSet.TrueCount)))
+        print "FeatureSet.TrueCount:", FeatureSet.TrueCount
+        print "  Sensitivity: %.2f at 5%% %.2f at 1%% %.2f at 10%% %.2f at 50%%"%(SensitivityByRate[0], SensitivityByRate[1], SensitivityByRate[2], SensitivityByRate[3])
+        if ROCFilePath:
+            self.ReportROC(FeatureSet, ROCFilePath)
+        return CountsByRate
+    def ComputeOddsTrue(self, SortedList):
+        "DEPRECATED; use ComputePValue instead"
+        if len(SortedList) < 200:
+            BlockSize = len(SortedList) / 4
+        else:
+            BlockSize = 100
+        WindowTrueSum = 0
+        WindowFalseSum = 0
+        for Entry in SortedList[:BlockSize - 1]:
+            if Entry[1].TrueFlag:
+                WindowTrueSum += 1
+            else:
+                WindowFalseSum += 1
+        for Index in range(len(SortedList)):
+            # Add one entry to the window:
+            if Index + BlockSize < len(SortedList):
+                Entry = SortedList[Index + BlockSize]
+                if Entry[1].TrueFlag:
+                    WindowTrueSum += 1
+                else:
+                    WindowFalseSum += 1
+            # Compute the probability-true for this window:
+            Vector = SortedList[Index][1]
+            OddsTrue = WindowTrueSum / float(WindowTrueSum + WindowFalseSum)
+            Bin = int(round(Vector.Score * self.BinScalingFactor))
+            self.OddsTrue[Bin] = OddsTrue
+            # Remove leftmost entry from the window:
+            if Index >= BlockSize:
+                Entry = SortedList[Index - BlockSize]
+                if Entry[1].TrueFlag:
+                    WindowTrueSum -= 1
+                else:
+                    WindowFalseSum -= 1
+    def GetPValue(self, Score):
+        if self.MixtureModel:
+            return 1.0 - self.MixtureModel.GetOddsTrue(Score)
+        Bin = int(round(Score * self.BinScalingFactor))
+        Keys = self.PValue.keys()
+        MinKey = min(Keys)
+        MaxKey = max(Keys)
+        if Bin < MinKey:
+            return self.PValue[MinKey]
+        if Bin > MaxKey:
+            return self.PValue[MaxKey]
+        return self.PValue[Bin]
+    def ComputePValuesMixtureModel(self, SortedList):
+        """
+        Our feature-set has an empirical distribution of scores.  We'll approximate
+        this distribution as a mixture of two distributions: gamma (false) and
+        normal (true).  Then we'll derive p-value (probability false) for each
+        score-bin.
+        """
+        Scores = []
+
+        
+        for (Score, Vector) in SortedList:
+            Scores.append(Score)
+        #for Bin in range(Model.MinBin, Model.MaxBin):
+        #    self.PValue[Bin] = 1.0 - Model.OddsTrue[Bin]
+        self.MixtureModel = MixtureModelClass(self.GetMMBinMultiplier())
+        self.MixtureModel.Model(Scores)
+        for (Score, Vector) in SortedList:
+            Vector.PValue = 1.0 - self.MixtureModel.GetOddsTrue(Score)        
+        self.MixtureModel.PlotDistribution("PValues.txt")
+    def ComputePValues(self, SortedList):
+        self.ComputePValuesMixtureModel(SortedList)
+    def ComputePValuesEmpirical(self, SortedList, PriorProbabilityFalse):
+        """
+        DEPRECATED - called only if mixture model fails.
+        
+        Input SortedList is a list of the form (ModelScore, FeatureVector), sorted from
+        highest to lowest ModelScore.
+        
+        self.PValue[Bin] is the probability that a peptide P is FALSE, given a score of
+        Bin or better.  Formally, it equals P(not P | S(P)>=Bin).  By Bayes' Theorem, this
+        is equal to:
+        P(S(P)>=Bin | not P) * P(not P) / P(S(P)>=Bin) 
+        """
+        # Passed in: PriorProbabilityFalse == P(not P)
+        # And set up dictionaries PValueTotals / PValueCounts to
+        #   compute ProbRatio == P(S(P)>=Bin | not P) / P(S(P)>=Bin)
+        #PriorProbabilityFalse = 0
+        TrueInstanceCount = 0
+        HighScoringCount = 0
+        HighScoringFalseInstanceCount = 0
+        TotalInstances = len(SortedList)
+        PValueTotals = {}
+        PValueCounts = {}
+        TotalFalseInstances = 0
+        for (Score, Vector) in SortedList:
+            if not Vector.TrueFlag:
+                TotalFalseInstances += 1
+        HistoGood = {}
+        HistoBad = {}
+        TempOutputFile = open("TempPTMPValue.txt", "wb")
+        TempOutputFile.write("Bin\tHighFalse\tTotalInstances\tTotalFalseInstances\tHighScoringCount\t\n")
+        for (Score, Vector) in SortedList:
+            HighScoringCount += 1
+            if Vector.TrueFlag:
+                TrueInstanceCount += 1
+            else:
+                HighScoringFalseInstanceCount += 1
+            ProbRatio = (HighScoringFalseInstanceCount * TotalInstances) / float(TotalFalseInstances * HighScoringCount)
+            #ProbRatio = FalseInstanceCount / float(TrueInstanceCount + FalseInstanceCount)
+            Bin = int(round(Score * self.BinScalingFactor))
+            PValueTotals[Bin] = PValueTotals.get(Bin, 0) + ProbRatio
+            PValueCounts[Bin] = PValueCounts.get(Bin, 0) + 1
+            TempOutputFile.write("%s\t%s\t%s\t%s\t%s\t\n"%(Bin, HighScoringFalseInstanceCount, TotalInstances, TotalFalseInstances, HighScoringCount))
+            if Vector.TrueFlag:
+                HistoGood[Bin] = HistoGood.get(Bin, 0) + 1
+            else:
+                HistoBad[Bin] = HistoBad.get(Bin, 0) + 1
+        #PriorProbabilityFalse = FalseInstanceCount / float(FalseInstanceCount + TrueInstanceCount)
+        Keys = PValueTotals.keys()
+        Keys.sort()
+        for Bin in Keys:
+            AverageProbRatio = PValueTotals[Bin] / float(PValueCounts[Bin])
+            self.PValue[Bin] = AverageProbRatio * PriorProbabilityFalse
+            self.PValue[Bin] = max(self.PValue[Bin], 0.0001)
+            print "%s: %s"%(Bin, self.PValue[Bin])
+        TempOutputFile.write("\n\n\n")
+        for Bin in Keys:
+            TempOutputFile.write("%s\t%s\t%s\t\n"%(Bin, HistoGood.get(Bin, 0), HistoBad.get(Bin, 0)))
+        ############################################################
+        # Interpolate p-values, for missing bins:
+        Keys = self.PValue.keys()
+        Keys.sort()
+        MinKey = min(Keys)
+        MaxKey = max(Keys)
+        for Bin in range(MinKey, MaxKey):
+            if self.PValue.has_key(Bin):
+                PrevBin = Bin
+                PrevPValue = self.PValue[Bin]
+                continue
+            # Find the next bin:
+            for NextBin in range(Bin + 1, MaxKey + 1):
+                if self.PValue.has_key(NextBin):
+                    NextPValue = self.PValue[NextBin]
+                    break
+            # Interpolate from (PrevBin, PrevPValue) to (NextBin, NextPValue):
+            Slope = (NextPValue - PrevPValue) / float(NextBin - PrevBin)
+            Intermediate = PrevPValue + Slope * (Bin - PrevBin)
+            self.PValue[Bin] = Intermediate
+    def GetMMBinMultiplier(self):
+        return 10.0 #default
+
+
+class LDAModel(LearnerClass):
+    def __init__(self, FeatureList = None):
+        if FeatureList:
+            self.Size = len(FeatureList)
+        LearnerClass.__init__(self, FeatureList)
+    def GetCovarianceArray(self, VectorList):
+        VectorCount = float(len(VectorList))
+        C = zeros((self.Size, self.Size), FloatType)
+        for Vector in VectorList:
+            for X in range(self.Size):
+                for Y in range(self.Size):
+                    C[X][Y] += Vector[X] * Vector[Y] / VectorCount
+        return C
+    def LoadModel(self, FileName):
+        File = open(FileName, "rb")
+        cPickle.load(File) # model type
+        self.FeatureList = cPickle.load(File)
+        self.PValue = cPickle.load(File)
+        self.MinValues = cPickle.load(File)
+        self.MaxValues = cPickle.load(File)
+        self.CI = cPickle.load(File)
+        self.MeanGood = cPickle.load(File)
+        self.ConstantGood = cPickle.load(File)
+        self.MeanBad = cPickle.load(File)
+        self.ConstantBad = cPickle.load(File)
+        self.Size = len(self.FeatureList)
+        self.MixtureModel = UnpickleMixtureModel(File)
+        # Verbose stuff:
+        print "\n>>>PyLoadLDAModel(%s)"%FileName
+        print "Features: %s"%self.Size
+        print "MinValues: %.4f...%.4f"%(self.MinValues[0], self.MinValues[-1])
+        print "MaxValues: %.4f...%.4f"%(self.MaxValues[0], self.MaxValues[-1])
+        print "MeanGood: %.4f...%.4f"%(self.MeanGood[0], self.MeanGood[-1])
+        print "MeanBad: %.4f...%.4f"%(self.MeanBad[0], self.MeanBad[-1])
+        if self.Size > 1:
+            print "CI: %.4f, %.4f...%.4f, %.4f"%(self.CI[0][0], self.CI[0][1],
+                self.CI[self.Size - 1][self.Size - 2],
+                self.CI[self.Size - 1][self.Size - 1]
+                                         )
+        print "ConstTrue %.4f, ConstFalse %.4f"%(self.ConstantGood, self.ConstantBad)
+        File.close()
+    def SaveBinaryModel(self, FileName):
+        """
+        Write out a binary representation of this model.  
+        """
+        File = open(FileName, "wb")
+        File.write(struct.pack("<i", self.Size))
+        for FeatureIndex in range(self.Size):
+            File.write(struct.pack("<d", self.MinValues[FeatureIndex]))
+        for FeatureIndex in range(self.Size):
+            File.write(struct.pack("<d", self.MaxValues[FeatureIndex]))
+        for FeatureIndex in range(self.Size):
+            File.write(struct.pack("<d", self.MeanGood[FeatureIndex]))
+        for FeatureIndex in range(self.Size):
+            File.write(struct.pack("<d", self.MeanBad[FeatureIndex]))
+        File.write(struct.pack("<d", self.ConstantGood))
+        File.write(struct.pack("<d", self.ConstantBad))
+        for Row in range(self.Size):
+            for Column in range(self.Size):
+                File.write(struct.pack("<d", self.CI[Row][Column]))
+        File.close()
+    def SaveModel(self, FileName):
+        File = open(FileName, "wb")
+        cPickle.dump("LDA", File)
+        cPickle.dump(self.FeatureList, File)
+        cPickle.dump(self.PValue, File)
+        cPickle.dump(self.MinValues, File)
+        cPickle.dump(self.MaxValues, File)
+        cPickle.dump(self.CI, File)
+        cPickle.dump(self.MeanGood, File)
+        cPickle.dump(self.ConstantGood, File)
+        cPickle.dump(self.MeanBad, File)
+        cPickle.dump(self.ConstantBad, File)
+        try:
+            self.MixtureModel.PickleSelf(File)
+        except:
+            cPickle.dump(None, File)
+        File.close()
+    def ScoreInstance(self, RawFeatures):
+        Features = []
+        for FeatureIndex in self.FeatureList:
+            Features.append(RawFeatures[FeatureIndex])
+        for FeatureIndex in range(self.Size):
+            X = self.FeatureList[FeatureIndex]
+            HalfRange = (self.MaxValues[X] - self.MinValues[X]) / 2.0
+            if not HalfRange:
+                continue
+            Features[FeatureIndex] = (Features[FeatureIndex] - self.MinValues[X]) / HalfRange - 1.0
+        CIProduct = MatrixMultiply(self.CI, Features)
+        ReadingGood = MatrixMultiply(self.MeanGood, CIProduct) + self.ConstantGood
+        ReadingBad = MatrixMultiply(self.MeanBad, CIProduct) + self.ConstantBad
+        return (ReadingGood - ReadingBad)
+    def Test(self, FeatureSet):
+        # Scale features, according to our trained scaling
+        FeatureSet.MinValues = self.MinValues
+        FeatureSet.MaxValues = self.MaxValues
+        FeatureSet.ScaleFeatures()
+        # Compute scores:
+        for Vector in FeatureSet.AllVectors:
+            FixedVector = []
+            for FeatureIndex in self.FeatureList:
+                FixedVector.append(Vector.ScaledFeatures[FeatureIndex])
+            CIProduct = MatrixMultiply(self.CI, FixedVector)
+            ReadingGood = MatrixMultiply(self.MeanGood, CIProduct) + self.ConstantGood
+            ReadingBad = MatrixMultiply(self.MeanBad, CIProduct) + self.ConstantBad
+            Vector.Score = (ReadingGood - ReadingBad)
+    def Train(self, FeatureSet, VerboseFlag = 0):
+        # Get the feature range (training only):
+        FeatureSet.FindFeatureRanges()
+        self.MinValues = FeatureSet.MinValues
+        self.MaxValues = FeatureSet.MaxValues
+        # Sanity-checking: If a feature's range is a single point,
+        # then it's not useful - AND, it will generate a non-invertible
+        # matrix.  So, let's filter out any such features.
+        InputFeatureList = self.FeatureList
+        self.FeatureList = []
+        for FeatureIndex in InputFeatureList:
+            if self.MinValues[FeatureIndex] < self.MaxValues[FeatureIndex]:
+                self.FeatureList.append(FeatureIndex)
+            else:
+                print "* Warning: Discarding feature '%s', every entry is %s"%(FeatureIndex, self.MinValues[FeatureIndex])
+        self.Size = len(self.FeatureList)
+        if self.Size == 0:
+            print "<< no features - bailing out >>"
+            return
+        # Scale features:
+        FeatureSet.ScaleFeatures()
+        AllVectors = []
+        TrueVectors = []
+        FalseVectors = []
+        for Vector in FeatureSet.AllVectors:
+            ScaledVector = []
+            for FeatureIndex in self.FeatureList:
+                ScaledVector.append(Vector.ScaledFeatures[FeatureIndex])
+            AllVectors.append(ScaledVector)
+            if Vector.TrueFlag:
+                TrueVectors.append(ScaledVector)
+            else:
+                FalseVectors.append(ScaledVector)
+        print "First true vector:", TrueVectors[0]
+        print "First false vector:", FalseVectors[0]
+##        # Temp: Ensure the vector lists are the same size!
+##        VectorCount = min(len(TrueVectors), len(FalseVectors))
+##        random.shuffle(FalseVectors)
+##        FalseVectors = FalseVectors[:VectorCount]
+        ############################################################
+        # Compute the mean vectors (training only):
+        self.MeanGood = [0] * self.Size
+        self.MeanBad = [0] * self.Size
+        self.MeanGlobal = [0] * self.Size
+        for Vector in TrueVectors:
+            for X in range(self.Size):
+                self.MeanGlobal[X] += Vector[X] / float(FeatureSet.Count)
+                self.MeanGood[X] += Vector[X] / float(FeatureSet.TrueCount)
+        for Vector in FalseVectors:
+            for X in range(self.Size):
+                self.MeanGlobal[X] += Vector[X] / float(FeatureSet.Count)
+                self.MeanBad[X] += Vector[X] / float(FeatureSet.FalseCount)
+        if VerboseFlag:
+            print "MeanGood:\n  ",
+            for Value in self.MeanGood:
+                print "%.3f"%Value,
+            print
+            print "MeanBad:\n  ",
+            for Value in self.MeanBad:
+                print "%.3f"%Value,
+            print
+            print "MeanGlobal:\n  ",
+            for Value in self.MeanGlobal:
+                print "%.3f"%Value,
+            print
+        ############################################################
+        # Compute mean-corrected vectors:
+        MeanCorrectedGoodVectors = []
+        MeanCorrectedBadVectors = []
+        for Vector in TrueVectors:
+            NewVector = []
+            for X in range(self.Size):
+                NewVector.append(Vector[X] - self.MeanGlobal[X])
+            MeanCorrectedGoodVectors.append(NewVector)
+        for Vector in FalseVectors:
+            NewVector = []
+            for X in range(self.Size):
+                NewVector.append(Vector[X] - self.MeanGlobal[X])
+            MeanCorrectedBadVectors.append(NewVector)
+        ############################################################
+        # Compute covariance matrices:
+        CovarArrayGood = self.GetCovarianceArray(MeanCorrectedGoodVectors)
+        if VerboseFlag:
+            print "CovarArrayGood:", CovarArrayGood
+        CovarArrayBad = self.GetCovarianceArray(MeanCorrectedBadVectors)
+        if VerboseFlag:
+            print "CovarArrayBad:", CovarArrayBad
+        # CovarArrayFull is the pooled within-group covariance matrix, it's
+        # computed componentwise as weighted sum of CovarArrayGood and CovarArrayBad.
+        CovarArrayFull = zeros((self.Size, self.Size), FloatType)
+        for X in range(self.Size):
+            for Y in range(self.Size):
+                CovarArrayFull[X][Y] += CovarArrayGood[X][Y] * FeatureSet.TrueCount / float(FeatureSet.Count)
+                CovarArrayFull[X][Y] += CovarArrayBad[X][Y] * FeatureSet.FalseCount / float(FeatureSet.Count)
+        if VerboseFlag:
+            print "CovarArrayFull:", CovarArrayFull
+        ############################################################
+        # Invert the covariance array:
+        try:
+            self.CI = InvertMatrix(CovarArrayFull)
+        except:
+            traceback.print_exc()
+            print "Unable to invert covariance matrix!  Invalid feature set."
+            return 0
+        if VerboseFlag:
+            print "CI:", self.CI
+        self.GoodMuC = MatrixMultiply(self.CI, self.MeanGood)
+        if VerboseFlag:
+            print "GoodMuC:", self.GoodMuC
+        self.BadMuC = MatrixMultiply(self.CI, self.MeanBad)
+        if VerboseFlag:
+            print "BadMuC:", self.BadMuC
+        self.ConstantGood = -MatrixMultiply(self.MeanGood, self.GoodMuC) / 2.0
+        self.ConstantBad = -MatrixMultiply(self.MeanBad, self.BadMuC) / 2.0
+        #if VerboseFlag:
+        print "LDA Constant good %.4f constant bad %.4f"%(self.ConstantGood, self.ConstantBad)
+
+class SVMModel(LearnerClass):
+    def __init__(self, FeatureList = None):
+        self.Scaling = None
+        self.SupportVectors = None
+        self.PySVMReadyFlag = 0
+        LearnerClass.__init__(self, FeatureList)
+    def WriteSVMFeaturesToFile(self, FilePath, FeatureSet, ForceEqualCounts = 0):
+        #print "TRUE vectors %s, FALSE vectors %s"%(len(FeatureSet.TrueVectors), len(FeatureSet.FalseVectors))
+        #print "TRUE count %s, FALSE count %s"%(FeatureSet.TrueCount, FeatureSet.FalseCount)
+        File = open(FilePath, "wb")
+        if ForceEqualCounts:
+            # Shuffle the tuples:
+            TrueVectors = FeatureSet.TrueVectors[:]
+            random.shuffle(TrueVectors)
+            FalseVectors = FeatureSet.FalseVectors[:]
+            random.shuffle(FalseVectors)
+            # Try writing out equal numbers of true and false tuples:
+            MaxIndex = min(FeatureSet.TrueCount, FeatureSet.FalseCount)
+            MaxIndex = min(MaxIndex, MaxSVMFeatureCount)
+            TrueVectors = TrueVectors[:MaxIndex]
+            FalseVectors = FalseVectors[:MaxIndex]
+        else:
+            TrueVectors = FeatureSet.TrueVectors
+            FalseVectors = FeatureSet.FalseVectors
+        for Vector in TrueVectors:
+            Str = "+1 "
+            for FeatureIndex in range(len(self.FeatureList)):
+                Str += "%d:%.8f "%(FeatureIndex + 1, Vector.Features[self.FeatureList[FeatureIndex]])
+            File.write(Str + "\n")
+        for Vector in FalseVectors:
+            Str = "-1 "
+            for FeatureIndex in range(len(self.FeatureList)):
+                Str += "%d:%.8f "%(FeatureIndex + 1, Vector.Features[self.FeatureList[FeatureIndex]])
+            File.write(Str + "\n")
+        File.close()
+    def Train(self, FeatureSet, VerboseFlag = 0):
+        print "TRAINSVM()...", FeatureSet
+        TempFeaturesFileName = "PTMFeatures.SVM.txt"
+        TempScalingFileName = "PTMFeaturesSVMScale.txt"
+        TempScaledFeaturesFileName = "PTMFeatures.SVMScaled.txt"
+        TempModelFileName = "PTMFeatures.SVMScaled.txt.model"        
+        # Write feature vectors, forcing equal true and false instance-counts:
+        self.WriteSVMFeaturesToFile(TempFeaturesFileName, FeatureSet, 1)
+        ###############################################################
+        # SCALE the features, and remember the scaling:
+        Command = r"%s -s %s %s > %s"%(PATH_SVMSCALE, TempScalingFileName, TempFeaturesFileName, TempScaledFeaturesFileName)
+        print Command
+        os.system(Command)
+        # Read the scaling limits, for later use:
+        File = open(TempScalingFileName, "rb")
+        self.Scaling = File.read()
+        File.close()
+        os.remove(TempScalingFileName)
+        print "Train!"
+        ###############################################################
+        # TRAIN the model.  We won't use cross-validation here, because in the future there'll be
+        # a testing-set.
+        Command = r"%s %s"%(PATH_SVMTRAIN, TempScaledFeaturesFileName)
+        print Command
+        os.system(Command)
+        File = open(TempModelFileName, "rb")
+        self.SupportVectors = File.read()
+        File.close()
+        ###############################################
+        # Clean up temp-files:
+        os.remove(TempFeaturesFileName)
+        os.remove(TempScaledFeaturesFileName)
+        os.remove(TempModelFileName)
+    def Test(self, FeatureSet):
+        if not self.Scaling or not self.SupportVectors:
+            print "Error in SVMModel.Test(): We haven't trained (or loaded) yet!"
+            return
+        TempFeaturesFileName = "TestFeatures.SVM.txt"
+        TempScalingFileName = "PTMFeaturesSVMScale.txt"
+        TempScaledFeaturesFileName = "TestFeatures.SVMScaled.txt"
+        TempModelFileName = "SVM.model"
+        TempOutputFileName = "SVMPrediction.txt"
+        TrueFlags = []
+        for Tuple in FeatureSet.TrueVectors:
+            TrueFlags.append(1)
+        for Tuple in FeatureSet.FalseVectors:
+            TrueFlags.append(0)        
+        ########################################################################
+        # WRITE the testing set to file:
+        self.WriteSVMFeaturesToFile(TempFeaturesFileName, FeatureSet)
+        # Write our scaling-info and our model to files:
+        File = open(TempScalingFileName, "wb")
+        File.write(self.Scaling)
+        File.close()
+        File = open(TempModelFileName, "wb")
+        File.write(self.SupportVectors)
+        File.close()
+        # SCALE the testing set:
+        Command = r"%s -r %s %s > %s"%(PATH_SVMSCALE, TempScalingFileName, TempFeaturesFileName, TempScaledFeaturesFileName)
+        print Command
+        os.system(Command)
+        os.remove(TempFeaturesFileName)
+        os.remove(TempScalingFileName)
+        # Ok, now let's run svmpredict on all the instances in the TESTING set:
+##        Command = r"%s %s %s %s"%(PATH_SVMPREDICT, TempScaledFeaturesFileName, TempModelFileName, TempOutputFileName)
+##        print Command
+##        os.system(Command)
+        RunPySVM.Predict(TempScaledFeaturesFileName, TempModelFileName, TempOutputFileName)
+        # Now read in the results, and assign scores to the vectors of the set:
+        File = open(TempOutputFileName, "rb")
+        InstanceIndex = 0
+        TrueIndex = 0
+        FalseIndex = 0
+        SortedList = []
+        for FileLine in File.xreadlines():
+            Score = float(FileLine)
+            if TrueFlags[InstanceIndex]:
+                Vector = FeatureSet.TrueVectors[TrueIndex]
+                TrueIndex += 1
+            else:
+                Vector = FeatureSet.FalseVectors[FalseIndex]
+                FalseIndex += 1
+            Vector.Score = float(FileLine)
+            InstanceIndex += 1
+        File.close()
+        ########################################################################
+        # Clean up temp-files:
+        os.remove(TempScaledFeaturesFileName)
+        os.remove(TempModelFileName)
+        os.remove(TempOutputFileName)
+    def SaveTextModel(self, Stub):
+        ModelPath = "%s.model"%Stub
+        File = open(ModelPath, "wb")
+        File.write(self.SupportVectors)
+        File.close()
+        ScalingPath = "%s.range"%Stub
+        File = open(ScalingPath, "wb")
+        File.write(self.Scaling)
+        File.close()
+    def SaveModel(self, FileName):
+        File = open(FileName, "wb")
+        cPickle.dump("SVM", File)
+        cPickle.dump(self.FeatureList, File)
+        cPickle.dump(self.PValue, File)
+        cPickle.dump(self.Scaling, File)
+        cPickle.dump(self.SupportVectors, File)
+        self.MixtureModel.PickleSelf(File)
+        File.close()
+    def LoadModel(self, FileName):
+        File = open(FileName, "rb")
+        cPickle.load(File) # model type
+        self.FeatureList = cPickle.load(File)
+        self.PValue = cPickle.load(File)
+        self.Scaling = cPickle.load(File)
+        self.SupportVectors = cPickle.load(File)
+        self.MixtureModel = UnpickleMixtureModel(File)
+        File.close()
+    def PreparePySVM(self):
+        """
+        Prepare PySVM to score some features using our model.
+        """
+        # Support vectors:
+        TempModelFileName = "TempModel.txt"
+        File = open(TempModelFileName, "wb")
+        File.write(self.SupportVectors)
+        File.close()
+        PySVM.LoadModel(TempModelFileName)
+        os.remove(TempModelFileName)
+        # Feature ranges:
+        TempScalingFileName = "TempScaling.txt"
+        File = open(TempScalingFileName, "wb")
+        File.write(self.Scaling)
+        File.close()
+        PySVM.LoadScaling(TempScalingFileName)
+        os.remove(TempScalingFileName)
+        # And now, we can score many vectors quickly!
+        self.PySVMReadyFlag = 1
+    def ScoreInstance(self, Features):
+        """
+        Compute the score for this instance. 
+        """
+        if not self.PySVMReadyFlag:
+            self.PreparePySVM()
+        Vector = []
+        for FeatureIndex in self.FeatureList:
+            Vector.append(Features[FeatureIndex])
+        Score = PySVM.ScaleAndScore(Vector)
+        return Score
+    
+class LogitModel(LearnerClass):
+    """
+    A maximum-likelihood logistic regression model.  The model's parameters
+    are tuned using the Newton-Raphson algorithm.  See p98 in Hastie/Tibshirani,
+    The Elements of Statistical Learning.  
+    """
+    def GetMMBinMultiplier(self):
+        return 40.0 #default
+    def ComputePValues(self, SortedList):
+        pass # The score we output IS a probability!
+    def GetFixedTuples(self, FeatureSet):
+        # Return fixed-up tuples.  Keep a random selection of
+        # true and of false tuples.  
+        VectorSize = len(self.FeatureList) + 1 # add one for the CONSTANT input
+        AllTuples = [] # entries (TrueFlag, FeatureTuple)
+        KeepCount = min(FeatureSet.TrueCount, FeatureSet.FalseCount, 500)
+        random.shuffle(FeatureSet.TrueVectors)
+        random.shuffle(FeatureSet.FalseVectors)
+        for Vector in FeatureSet.TrueVectors[:KeepCount]:
+            FixedTuple = [1.0]
+            for FeatureIndex in self.FeatureList:
+                FixedTuple.append(Vector.Features[FeatureIndex])
+            AllTuples.append((1, tuple(FixedTuple)))
+        for Vector in FeatureSet.FalseVectors[:KeepCount]:
+            FixedTuple = [1.0]
+            for FeatureIndex in self.FeatureList:
+                FixedTuple.append(Vector.Features[FeatureIndex])
+            AllTuples.append((0, tuple(FixedTuple)))        
+        return AllTuples
+    def Train(self, FeatureSet, VerboseFlag = 0):
+        VectorSize = len(self.FeatureList) + 1 # add one for the CONSTANT input
+        AllTuples = self.GetFixedTuples(FeatureSet)
+        random.shuffle(AllTuples)
+        #################################################################
+        # Train the model - set the weight-vector self.Beta
+        # Initialize vector self.Beta, all zeroes:
+        self.Beta = zeros(VectorSize)
+        TupleCount = len(AllTuples)
+        # Initialize vector Y, indicating which vectors are true:
+        Y = zeros(TupleCount, FloatType)
+        for I in range(TupleCount):
+            if AllTuples[I][0]:
+                Y[I] = 1.0
+            else:
+                Y[I] = 0.0
+        # Initialize the input matrix X:
+        X = zeros((TupleCount, VectorSize), FloatType)
+        for I in range(TupleCount):
+            #X[I][0] = 1.0
+            for J in range(VectorSize):
+                X[I][J] = AllTuples[I][1][J]
+        XT = transpose(X)
+        PrevLogLikelihood = None
+        CycleCount = 0
+        while 1:
+            # Compute the current log-likelihood:
+            LogLikelihood = 0
+            for I in range(TupleCount):
+                BX = MatrixMultiply(self.Beta, X[I])
+                LogLikelihood += Y[I] * BX
+                LogLikelihood -= math.log(1 + math.exp(BX))
+            if PrevLogLikelihood != None:
+                if VerboseFlag:
+                    print "Log likelihood: %s (prev %s)"%(LogLikelihood, PrevLogLikelihood)
+                Improvement = PrevLogLikelihood - LogLikelihood
+                if Improvement < 0.001 and Improvement >= 0:
+                    print "Reached optimum: Stop now!"
+                    break
+            PrevLogLikelihood = LogLikelihood
+            # Compute the vector P:
+            P = zeros(TupleCount, FloatType)
+            for I in range(TupleCount):
+                self.BetaSum = 0
+                Tuple = AllTuples[I][1]
+                for J in range(VectorSize):
+                    self.BetaSum += self.Beta[J] * Tuple[J]
+                Exp = math.exp(self.BetaSum)
+                P[I] = Exp / (1.0 + Exp)
+            # Compute the diagonal matrix W:
+            W = zeros((TupleCount, TupleCount), FloatType)
+            for I in range(TupleCount):
+                W[I][I] = P[I] * (1.0 - P[I])
+            try:
+                WI = numpy.linalg.inv(W)
+            except:
+                traceback.print_exc()
+                print "** Error: Unable to perform logistic regression due to singular matrix."
+                print "Feature list was:", self.FeatureList
+                return None
+            # Compute the "response vector" z:
+            z = MatrixMultiply(X, self.Beta)
+            Diff = Y - P
+            z += MatrixMultiply(WI, Diff)
+            # Compute the new self.Beta:
+            Product = MatrixMultiply(XT, W)
+            Product = MatrixMultiply(Product, X)
+            ProdI = numpy.linalg.inv(Product)
+            Product = MatrixMultiply(ProdI, XT)
+            Product = MatrixMultiply(Product, W)
+            NewBeta = MatrixMultiply(Product, z)
+            if VerboseFlag:
+                print "Old self.Beta:", self.Beta
+                print "New self.Beta:", NewBeta
+            self.Beta = NewBeta
+            CycleCount += 1
+            if CycleCount >= 100:
+                print "100 cycles performed; stopping now!"
+                break
+    def ScoreInstance(self, Features):
+        FixedFeatures = [1.0,]
+        for FeatureIndex in self.FeatureList:
+            FixedFeatures.append(Features[FeatureIndex])
+        BX = 0
+        for I in range(len(FixedFeatures)):
+            BX += self.Beta[I] * FixedFeatures[I]
+        try:
+            Exp = math.exp(BX)
+        except:
+            print "** exponent unreachable:", BX
+            print "Features:", FixedFeatures
+            raise ValueError, "Features out-of-range!"
+        Score = Exp / (1.0 + Exp)
+        return Score
+    def Test(self, FeatureSet):
+        for Vector in FeatureSet.AllVectors:
+            FixedFeatures = [1.0,]
+            for FeatureIndex in self.FeatureList:
+                FixedFeatures.append(Vector.Features[FeatureIndex])
+            BX = 0
+            for I in range(len(FixedFeatures)):
+                BX += self.Beta[I] * FixedFeatures[I]
+            Exp = math.exp(BX)
+            Vector.Score = Exp / (1.0 + Exp)
+    def SaveModel(self, FileName):
+        File = open(FileName, "wb")
+        cPickle.dump("LOGIT", File)
+        cPickle.dump(self.FeatureList, File)
+        cPickle.dump(self.PValue, File)
+        cPickle.dump(self.Beta, File)
+        File.close()
+    def LoadModel(self, FileName):
+        File = open(FileName, "rb")
+        cPickle.load(File) # model type
+        self.FeatureList = cPickle.load(File)
+        self.PValue = cPickle.load(File)
+        self.Beta = cPickle.load(File)
+        File.close()
+   
+def Test():
+    pass
+
+if __name__ == "__main__":
+    # Command-line invocation: Test model loading/saving
+    Test()
diff --git a/MQScoreLDA2.model b/MQScoreLDA2.model
new file mode 100644
index 0000000..c8c3103
Binary files /dev/null and b/MQScoreLDA2.model differ
diff --git a/MQScoreLDA3.model b/MQScoreLDA3.model
new file mode 100644
index 0000000..92606eb
Binary files /dev/null and b/MQScoreLDA3.model differ
diff --git a/MQScoreSVM2.model b/MQScoreSVM2.model
new file mode 100644
index 0000000..28e1bcc
--- /dev/null
+++ b/MQScoreSVM2.model
@@ -0,0 +1,269 @@
+svm_type c_svc
+kernel_type rbf
+gamma 0.142857
+nr_class 2
+total_sv 260
+rho -0.532044
+label 1 -1
+nr_sv 127 133
+SV
+1 1:-0.241379 2:-0.48623 3:-0.90809 4:-0.518519 5:-0.571429 6:-0.0310774 
+1 1:-0.37931 2:-0.583895 3:-0.920724 4:-0.1875 5:-0.839286 6:-0.660705 7:1 
+1 1:-0.931034 2:-0.290438 3:-0.537272 4:-0.458333 5:-0.0357143 6:0.535738 
+1 1:-0.310345 2:-0.5338 3:-0.907917 4:-0.362745 5:-0.243697 6:-0.847589 7:1 
+1 1:-1 2:-0.276413 3:-0.675221 4:0.547619 5:-0.632653 6:0.24428 7:1 
+1 1:-0.37931 2:-0.390898 3:-0.886994 4:-0.0520833 5:-0.357143 6:-0.367445 
+1 1:-0.241379 2:-0.582448 3:-0.883201 4:0.324074 5:-0.428571 6:-0.525268 
+1 1:-0.724138 2:-0.468985 3:-0.877878 4:0.378788 5:-0.0649351 6:0.0650669 
+1 1:-0.931034 2:-0.288454 3:-0.506243 4:-0.1875 5:-0.357143 6:0.369617 
+1 1:-0.37931 2:-0.307705 3:-0.897673 4:-0.1875 5:-0.517857 6:-0.207538 7:1 
+1 1:-0.517241 2:-0.344343 3:-0.914243 4:0.702381 5:-0.0816326 6:-0.4745 
+1 1:-0.37931 2:-0.252088 3:-0.736665 4:0.0833333 5:-0.357143 6:-0.0929526 7:1 
+1 1:-0.517241 2:-0.371896 3:-0.831565 4:-0.380952 5:-0.44898 6:-0.0896593 7:1 
+0.9640805968402784 1:-1 2:-0.215181 3:-0.289894 4:-0.690476 5:0.469388 6:0.726722 
+1 1:0.862069 2:-0.663598 3:-0.909536 4:-0.617647 5:-0.773109 6:-0.412346 7:-1 
+1 1:-0.724138 2:-0.267428 3:-0.456891 4:-0.0151515 5:-0.532468 6:0.100579 7:1 
+1 1:-0.862069 2:-0.433816 3:-0.874294 4:-0.277778 5:0.428571 6:0.563044 
+1 1:-0.793103 2:-0.285441 3:-0.789383 4:-0.133333 5:-0.742857 6:0.461847 
+1 1:-0.724138 2:-0.20629 3:-0.701326 4:0.575758 5:-0.766234 6:-0.0335733 7:1 
+1 1:-0.931034 2:-0.172582 3:-0.722064 4:-0.1875 5:-0.357143 6:0.407466 7:1 
+1 1:-0.310345 2:-0.393416 3:-0.865847 4:0.27451 5:-0.243697 6:-0.319272 7:1 
+1 1:-0.310345 2:-0.419821 3:-0.768026 4:0.0196078 5:0.512605 6:-0.0311913 
+1 1:0.172414 2:-0.648493 3:-0.900489 4:-0.458333 5:-0.678571 6:-0.802595 
+1 1:0.37931 2:-0.600103 3:-0.909536 4:-0.277778 5:-0.428571 6:-0.547839 7:-1 
+1 1:-0.172414 2:-0.0824265 3:-0.651707 4:0.254386 5:-0.0526316 6:-0.282169 
+1 1:0.241379 2:-0.120562 3:-0.900489 4:-0.0466667 5:-0.28 6:-0.0313917 
+1 1:-0.310345 2:-0.0915925 3:-0.598523 4:0.0196078 5:-0.546218 6:0.436612 
+1 1:0.724138 2:-0.829349 3:-0.925247 4:-0.255208 5:-0.517857 6:-0.666227 7:-1 
+1 1:-0.793103 2:-0.354364 3:-0.530451 4:-0.35 5:-0.485714 6:0.368674 7:1 
+1 1:-0.655172 2:-0.35028 3:-0.782299 4:-0.0972222 5:-0.571429 6:0.153881 
+1 1:0.103448 2:-0.77115 3:-0.915565 4:-0.528986 5:-0.776398 6:-0.859992 
+1 1:-0.862069 2:-0.250968 3:-0.701443 4:0.444444 5:-0.142857 6:0.444627 
+1 1:0.241379 2:-0.619707 3:-0.900489 4:0.3 5:-0.485714 6:-0.049499 7:1 
+1 1:-0.586207 2:-0.468397 3:-0.901703 4:0.333333 5:-0.010989 6:-0.217662 7:1 
+1 1:-0.655172 2:-0.286756 3:-0.579113 4:-0.277778 5:-0.142857 6:0.252305 7:-1 
+1 1:-0.517241 2:-0.243184 3:-0.524577 4:-0.22619 5:-0.0816326 6:0.32376 
+1 1:-0.793103 2:-0.343474 3:-0.753986 4:-0.133333 5:-0.228571 6:-0.122669 
+1 1:-0.724138 2:-0.330754 3:-0.646377 4:-0.212121 5:0.168831 6:0.160098 7:1 
+1 1:-0.931034 2:-0.223299 3:-0.42933 4:-0.458333 5:0.285714 6:0.184891 
+1 1:-0.724138 2:-0.230862 3:-0.514869 4:0.181818 5:-0.298701 6:0.235952 
+1 1:-0.931034 2:-0.323058 3:-0.362464 4:0.354167 5:-0.678571 6:0.0569779 
+1 1:-0.655172 2:-0.285406 3:-0.579994 4:0.263889 5:-0.571429 6:-0.465278 
+1 1:-0.37931 2:-0.355082 3:-0.757722 4:0.489583 5:-0.196429 6:-0.20073 
+1 1:-0.103448 2:-0.3892 3:-0.825284 4:0.3 5:-0.228571 6:-0.362396 
+1 1:-0.517241 2:-0.338635 3:-0.803881 4:-0.690476 5:-0.0816326 6:0.0556964 
+1 1:0.103448 2:-0.223735 3:-0.918002 4:0.130435 5:0.00621114 6:0.229439 
+1 1:-0.862069 2:-0.236869 3:-0.320418 4:-0.277778 5:0.142857 6:-0.0131616 
+1 1:-0.103448 2:-0.297841 3:-0.824209 4:-0.025 5:0.414286 6:0.158518 
+1 1:-0.793103 2:-0.380259 3:-0.798152 4:-0.133333 5:-1 6:-0.461338 7:1 
+1 1:-0.241379 2:-0.618069 3:-0.882975 4:0.203704 5:0.142857 6:0.0430099 7:1 
+1 1:-0.586207 2:-0.245859 3:-0.623829 4:0.166667 5:-0.208791 6:-0.289937 7:1 
+1 1:-0.655172 2:-0.34345 3:-0.729769 4:0.263889 5:-0.785714 6:0.492403 7:1 
+1 1:-0.310345 2:0.0057331 3:-0.640964 4:0.0196078 5:0.361345 6:0.755297 7:-1 
+1 1:-0.586207 2:-0.395567 3:-0.740535 4:0.333333 5:-0.802198 6:-0.237142 7:1 
+1 1:-0.793103 2:-0.376799 3:-0.628935 4:0.0833333 5:-0.228571 6:-0.459936 7:1 
+1 1:-0.586207 2:-0.372921 3:-0.828409 4:-0.166667 5:0.186813 6:-0.145837 7:1 
+1 1:-0.793103 2:-0.63468 3:-0.935978 4:-0.35 5:-1 6:-0.688348 7:-1 
+1 1:0.517241 2:-0.902187 3:-0.940958 4:-0.775862 5:-0.73399 6:-0.701773 7:-1 
+1 1:-0.793103 2:-0.546968 3:-0.952616 4:-0.566667 5:-0.742857 6:0.131487 
+1 1:0.37931 2:-0.476981 3:-0.900489 4:-0.037037 5:-0.52381 6:-0.0628567 7:1 
+1 1:-0.586207 2:-0.332219 3:-0.7446 4:-0.5 5:0.582418 6:-0.456102 7:1 
+1 1:-0.586207 2:-0.446724 3:-0.870618 4:-1.08333e-008 5:-0.406593 6:-0.150417 
+1 1:-0.724138 2:-0.402211 3:-0.857637 4:-0.0151515 5:0.168831 6:0.146792 7:1 
+1 1:-0.793103 2:-0.419513 3:-0.863119 4:-0.133333 5:-0.485714 6:-0.403859 
+1 1:-0.862069 2:-0.336106 3:-0.7446 4:0.444444 5:-0.142857 6:-0.576431 7:1 
+1 1:-0.448276 2:-0.452956 3:-0.719941 4:-0.133333 5:-0.142857 6:0.346591 7:1 
+1 1:-0.448276 2:-0.178046 3:-0.696151 4:-0.133333 5:0.542857 6:0.71148 7:-1 
+1 1:0.655172 2:-0.936618 3:-0.951321 4:-0.72043 5:-0.751152 6:-0.721208 
+1 1:-0.310345 2:-0.224934 3:-0.589522 4:0.27451 5:-0.092437 6:-0.435616 
+1 1:-0.586207 2:-0.401171 3:-0.655289 4:-1.08333e-008 5:-0.010989 6:-0.32191 
+1 1:-0.0344828 2:-0.709157 3:-0.957154 4:-0.484127 5:-1 6:-0.865976 
+1 1:-0.586207 2:-0.385129 3:-0.716265 4:-1.08333e-008 5:0.384615 6:-0.43438 
+1 1:-1 2:-0.352717 3:-0.459725 4:-0.0714286 5:0.102041 6:0.610971 
+1 1:-0.655172 2:-0.503127 3:-0.873498 4:-0.277778 5:-0.357143 6:-0.404742 7:1 
+1 1:-0.310345 2:-0.50236 3:-0.809868 4:-0.107843 5:-0.243697 6:-0.237648 
+1 1:0.310345 2:-0.419544 3:-0.850887 4:-0.0833333 5:-0.406593 6:-0.174632 7:1 
+1 1:1 2:-0.833087 3:-0.912551 4:-0.638889 5:-0.428571 6:-0.510315 7:-1 
+1 1:-0.37931 2:-0.361767 3:-0.669208 4:-0.0520833 5:-0.0357143 6:-0.506907 7:1 
+1 1:-0.655172 2:-0.430158 3:-0.66526 4:0.444444 5:-0.571429 6:-0.0674941 7:1 
+1 1:-0.862069 2:-0.576875 3:-0.885538 4:-0.277778 5:0.142857 6:-0.766175 7:-1 
+1 1:-0.724138 2:-0.355254 3:-0.788071 4:0.378788 5:0.168831 6:-0.536547 
+1 1:0.241379 2:-0.634731 3:-0.870644 4:0.3 5:-0.28 6:-0.200825 7:1 
+1 1:-0.724138 2:-0.330181 3:-0.807739 4:0.378788 5:-0.0649351 6:-0.171485 7:1 
+1 1:-0.310345 2:-0.218355 3:-0.837711 4:-0.235294 5:0.0588235 6:-0.214493 7:-1 
+1 1:-0.655172 2:-0.215307 3:-0.777217 4:0.0833333 5:-0.142857 6:0.0727197 
+1 1:-0.793103 2:-0.415335 3:-0.76492 4:-0.133333 5:0.0285714 6:-0.469124 7:1 
+1 1:-0.655172 2:-0.500687 3:-0.873423 4:0.0833333 5:-0.357143 6:-0.5819 7:1 
+1 1:0.37931 2:-0.795349 3:-0.918951 4:-0.358025 5:-0.809524 6:0.115482 7:1 
+1 1:-0.241379 2:-0.664206 3:-0.943007 4:-0.398148 5:1.28571e-008 6:-0.652305 7:-1 
+1 1:-0.655172 2:-0.385729 3:-0.653315 4:-0.0972222 5:-0.357143 6:-0.242609 
+1 1:-0.793103 2:-0.275356 3:-0.69472 4:-0.133333 5:-0.485714 6:0.45331 
+1 1:0.37931 2:-0.418604 3:-0.900489 4:-0.037037 5:-0.142857 6:-0.106769 7:1 
+1 1:-0.862069 2:-0.261951 3:-0.623804 4:0.203704 5:-0.142857 6:-0.28015 
+1 1:0.310345 2:-0.436923 3:-0.900489 4:-0.833333 5:-0.010989 6:0.00381001 
+1 1:-0.793103 2:-0.535227 3:-0.877469 4:-0.783333 5:-0.742857 6:-0.0880047 
+1 1:-0.931034 2:-0.284357 3:-0.523207 4:-0.458333 5:0.607143 6:0.221204 7:1 
+1 1:-0.0344828 2:-0.204539 3:-0.88494 4:0.34127 5:-0.265306 6:0.0131454 
+1 1:-0.655172 2:-0.267018 3:-0.588588 4:-0.277778 5:-0.357143 6:0.485339 7:1 
+1 1:-0.793103 2:-0.286724 3:-0.772944 4:-0.35 5:0.285714 6:0.166651 
+1 1:-0.448276 2:-0.377137 3:-0.844638 4:0.444444 5:-0.314286 6:-0.0379484 
+1 1:-0.724138 2:-0.495351 3:-0.900489 4:0.181818 5:-0.766234 6:-0.406567 7:-1 
+1 1:0.517241 2:-0.55354 3:-0.932627 4:-0.178161 5:-0.46798 6:0.621521 7:1 
+1 1:-0.517241 2:-0.45223 3:-0.685305 4:0.0833333 5:-0.265306 6:-0.165251 7:1 
+1 1:-0.310345 2:-0.26761 3:-0.900489 4:-0.107843 5:0.0588235 6:-0.19112 
+1 1:-0.862069 2:-0.244531 3:-0.23871 4:0.203704 5:-0.714286 6:0.296864 
+1 1:-0.862069 2:-0.30754 3:-0.47998 4:-0.277778 5:0.428571 6:-0.252382 7:1 
+1 1:-0.724138 2:-0.646875 3:-0.974077 4:-0.409091 5:-0.766234 6:-0.852232 
+1 1:-0.586207 2:-0.478723 3:-0.803049 4:0.5 5:-0.010989 6:-0.192285 7:1 
+1 1:-0.793103 2:-0.118171 3:-0.491243 4:0.0833333 5:-0.228571 6:0.184716 
+1 1:-0.862069 2:-0.370359 3:-0.602779 4:0.444444 5:-1 6:0.504577 7:1 
+1 1:-0.724138 2:-0.291917 3:-0.813544 4:0.378788 5:-0.298701 6:-0.0712661 
+0.9339870694824339 1:-0.586207 2:-0.261986 3:-0.366721 4:0.166667 5:-0.406593 6:0.192552 
+1 1:0.172414 2:-0.667548 3:-0.900489 4:0.173611 5:-0.25 6:-0.537069 7:1 
+1 1:-0.793103 2:-0.24428 3:-0.501567 4:0.0833333 5:-0.742857 6:0.412821 
+1 1:-0.655172 2:-0.560635 3:-0.940958 4:-0.0972222 5:-0.785714 6:-0.412343 7:-1 
+1 1:-0.793103 2:-0.211425 3:-0.576712 4:0.0833333 5:0.0285714 6:-0.0393997 
+0.9270779802951742 1:-0.586207 2:-0.142073 3:-0.703038 4:0.5 5:-0.406593 6:0.186126 
+1 1:-0.724138 2:-0.179104 3:-0.422208 4:0.181818 5:-0.0649351 6:-0.0716954 
+1 1:-0.862069 2:-0.29406 3:-0.810751 4:0.203704 5:-0.142857 6:-7.71596e-005 7:1 
+1 1:-0.448276 2:-0.363591 3:-0.700354 4:-0.277778 5:-0.142857 6:0.528887 7:1 
+1 1:-0.172414 2:-0.179011 3:-0.875021 4:0.0263158 5:0.353383 6:-0.241964 
+1 1:-0.586207 2:-0.215836 3:-0.849651 4:-1.08333e-008 5:-0.208791 6:-0.287691 7:1 
+1 1:-0.793103 2:-0.299007 3:-0.408911 4:0.516667 5:-0.485714 6:-0.111755 
+1 1:-1 2:-0.521259 3:-0.853444 4:-0.0714286 5:-0.632653 6:0.424164 
+1 1:-0.793103 2:-0.335992 3:-0.637842 4:0.0833333 5:0.285714 6:-0.368557 
+1 1:-0.724138 2:-0.363766 3:-0.900489 4:-0.0151515 5:-0.766234 6:-0.592984 
+1 1:-0.862069 2:-0.337604 3:-0.352754 4:0.203704 5:-0.428571 6:-0.149161 
+-1 1:-0.793103 2:-0.300039 3:-0.685204 4:0.0833333 5:-0.228571 6:-0.287248 7:1 
+-1 1:-0.586207 2:-0.430449 3:-0.834167 4:-1.08333e-008 5:-0.604396 6:-0.331483 7:1 
+-1 1:0.0344828 2:-0.584435 3:-0.851202 4:0.280303 5:-0.415584 6:-0.629043 7:1 
+-1 1:-0.862069 2:-0.286357 3:-0.623804 4:-0.037037 5:-0.428571 6:0.0477259 7:-1 
+-1 1:-0.0344828 2:-0.390569 3:-0.900489 4:-0.277778 5:-0.265306 6:-0.328902 
+-1 1:-0.37931 2:-0.508333 3:-0.900489 4:-0.0520833 5:-0.678571 6:-0.488107 7:1 
+-0.3461158665854429 1:0.517241 2:-0.760923 3:-0.909536 4:-0.402299 5:-0.46798 6:-0.676679 7:-1 
+-1 1:0.172414 2:-0.609246 3:-0.884986 4:-0.00694444 5:-0.142857 6:-0.640898 
+-1 1:-0.103448 2:-0.749119 3:-0.957154 4:-0.566667 5:-0.357143 6:0.390458 7:1 
+-0.1986656559283165 1:-0.103448 2:-0.505162 3:-0.821382 4:-0.458333 5:-0.228571 6:-0.640563 7:1 
+-1 1:-0.931034 2:-0.450195 3:-0.735707 4:-0.1875 5:-0.357143 6:-0.156017 7:1 
+-1 1:-0.586207 2:-0.395101 3:-0.680834 4:-0.333333 5:0.78022 6:-0.378797 7:1 
+-1 1:-0.0344828 2:-0.35478 3:-0.865847 4:-0.174603 5:-0.142857 6:-0.0275198 
+-1 1:-0.241379 2:-0.526708 3:-0.911543 4:-0.037037 5:-0.714286 6:-0.559884 7:1 
+-1 1:-0.655172 2:-0.200549 3:-0.76409 4:0.0833333 5:-0.571429 6:0.270576 7:-1 
+-1 1:-1 2:-0.375035 3:-0.832913 4:-0.380952 5:0.102041 6:-0.0804503 7:1 
+-1 1:-0.103448 2:-0.628021 3:-0.944364 4:-0.241667 5:-0.228571 6:0.0472809 
+-1 1:-0.862069 2:-0.343055 3:-0.870644 4:-0.037037 5:-0.142857 6:0.345233 
+-1 1:-0.241379 2:-0.442694 3:-0.87441 4:0.0833333 5:0.142857 6:-0.588923 
+-1 1:-0.724138 2:-0.406114 3:-0.837092 4:-0.0151515 5:-0.532468 6:-0.00378432 7:1 
+-1 1:-0.724138 2:-0.233608 3:-0.662433 4:-0.212121 5:0.168831 6:-0.0642961 7:1 
+-1 1:-0.862069 2:-0.440206 3:-0.855997 4:0.203704 5:-0.142857 6:-0.389992 
+-1 1:-0.862069 2:-0.456476 3:-0.681721 4:-0.277778 5:-0.142857 6:-0.403798 7:1 
+-0.1188327971508159 1:-0.448276 2:-0.226053 3:-0.88494 4:-0.133333 5:-0.314286 6:-0.131822 7:-1 
+-1 1:0.0344828 2:-0.489735 3:-0.894901 4:-0.409091 5:-0.298701 6:-0.524047 7:1 
+-1 1:-0.655172 2:-0.287704 3:-0.897673 4:0.0833333 5:0.0714285 6:0.131022 7:-1 
+-1 1:-0.655172 2:-0.48276 3:-0.828798 4:-0.277778 5:-0.571429 6:-0.111716 7:1 
+-1 1:-0.655172 2:-0.392846 3:-0.8066 4:0.263889 5:-1 6:-0.542138 
+-1 1:-0.448276 2:-0.373677 3:-0.909536 4:-0.133333 5:-0.485714 6:-0.164521 
+-0.1911177572129832 1:-1 2:-0.390458 3:-0.834381 4:-0.380952 5:-0.632653 6:0.570309 
+-1 1:-0.862069 2:-0.404916 3:-0.73092 4:-0.037037 5:-0.428571 6:-0.394964 
+-1 1:-0.793103 2:-0.424249 3:-0.544062 4:-0.35 5:-0.228571 6:-0.389475 
+-1 1:-0.793103 2:-0.270138 3:-0.611044 4:0.0833333 5:0.0285714 6:0.196232 
+-1 1:-0.862069 2:-0.358739 3:-0.900489 4:-0.277778 5:0.428571 6:0.270363 
+-1 1:-0.862069 2:-0.243284 3:-0.801284 4:-0.037037 5:-0.142857 6:0.270616 7:1 
+-1 1:-0.517241 2:-0.591815 3:-0.917591 4:0.0833333 5:-0.44898 6:-0.637813 7:1 
+-1 1:0.0344828 2:-0.662881 3:-0.921478 4:-0.212121 5:-0.0649351 6:-0.463562 
+-1 1:0.517241 2:-0.75733 3:-0.909536 4:-0.477011 5:-0.73399 6:-0.566725 7:-1 
+-1 1:-0.862069 2:-0.358723 3:-0.844736 4:-0.037037 5:-0.714286 6:0.0890594 7:1 
+-1 1:-0.793103 2:-0.36291 3:-0.820885 4:-0.35 5:-0.485714 6:0.754638 
+-1 1:-0.793103 2:-0.306744 3:-0.756006 4:0.0833333 5:0.285714 6:0.0400099 7:-1 
+-1 1:-0.517241 2:-0.397803 3:-0.932062 4:-0.22619 5:-0.265306 6:0.123637 
+-1 1:-0.0344828 2:-0.434735 3:-0.909536 4:-0.380952 5:0.102041 6:-0.418503 
+-1 1:0.724138 2:-0.164899 3:-0.903678 4:0.015625 5:-0.357143 6:-0.4674 
+-1 1:-0.448276 2:-0.322144 3:-0.900489 4:-0.133333 5:-0.142857 6:-0.499298 
+-1 1:-0.931034 2:-0.309543 3:-0.638945 4:0.0833333 5:0.285714 6:-0.240527 
+-1 1:-0.931034 2:-0.293594 3:-0.510508 4:0.0833333 5:-0.357143 6:0.157461 7:1 
+-1 1:-0.862069 2:-0.45875 3:-0.915472 4:0.203704 5:0.142857 6:0.274205 
+-1 1:-0.931034 2:-0.429477 3:-0.544742 4:-0.458333 5:-0.0357143 6:-0.262015 7:1 
+-1 1:-1 2:-0.516554 3:-0.957154 4:0.238095 5:-0.632653 6:0.232113 
+-1 1:-0.655172 2:-0.306011 3:-0.679549 4:-0.0972222 5:-0.785714 6:-0.118516 7:-1 
+-1 1:-0.37931 2:-0.542896 3:-0.788843 4:-0.1875 5:0.125 6:0.12049 7:-1 
+-1 1:0.37931 2:-0.674517 3:-0.908419 4:-0.277778 5:-0.333333 6:-0.372233 
+-1 1:-0.517241 2:-0.380952 3:-0.92255 4:0.0833333 5:-0.44898 6:-0.51216 7:1 
+-1 1:0.724138 2:-0.817156 3:-0.928262 4:-0.59375 5:-0.919643 6:-0.367139 7:-1 
+-1 1:-0.172414 2:-0.54858 3:-0.900489 4:0.140351 5:-0.323308 6:0.0448816 
+-1 1:-1 2:-0.316595 3:-0.901703 4:-0.0714286 5:0.102041 6:-0.541845 7:1 
+-1 1:-1 2:-0.357677 3:-0.281824 4:-0.0714286 5:0.102041 6:0.610997 7:1 
+-1 1:-0.517241 2:-0.435548 3:-0.883168 4:-0.0714286 5:-0.265306 6:-0.0986343 
+-1 1:-0.862069 2:-0.187698 3:-0.598523 4:0.203704 5:0.714286 6:0.300126 
+-1 1:0.37931 2:-0.401564 3:-0.900489 4:-0.037037 5:0.047619 6:-0.476828 7:-1 
+-1 1:-0.517241 2:-0.497936 3:-0.908423 4:-0.0714286 5:-0.0816326 6:-0.195657 7:1 
+-1 1:-0.586207 2:-0.404157 3:-0.730835 4:0.5 5:-0.406593 6:-0.60907 
+-1 1:-0.37931 2:-0.403389 3:-0.882191 4:-0.1875 5:-0.517857 6:-0.0374236 
+-1 1:-0.241379 2:-0.544544 3:-0.883168 4:-0.398148 5:-0.571429 6:-0.102519 7:1 
+-1 1:-0.793103 2:-0.345087 3:-0.898462 4:-0.133333 5:0.542857 6:0.195684 
+-1 1:-0.517241 2:-0.256095 3:-0.876263 4:0.0833333 5:0.102041 6:-0.182766 7:-1 
+-1 1:-0.862069 2:-0.535223 3:-0.934693 4:-0.037037 5:-0.714286 6:0.384471 7:1 
+-1 1:-0.37931 2:-0.324887 3:-0.796261 4:0.0833333 5:-0.678571 6:-0.0394031 7:-1 
+-1 1:-0.931034 2:-0.390808 3:-0.493696 4:0.354167 5:-0.357143 6:0.0723801 
+-1 1:-0.862069 2:-0.321522 3:-0.459725 4:-0.277778 5:-0.142857 6:0.0106136 7:1 
+-1 1:-0.931034 2:-0.196354 3:-0.838718 4:-0.1875 5:-0.0357143 6:-0.181903 7:1 
+-1 1:0.448276 2:-0.68652 3:-0.900489 4:-0.0714286 5:-0.265306 6:-0.61311 7:1 
+-1 1:0.37931 2:-0.66825 3:-0.900489 4:-0.358025 5:-0.428571 6:-0.617936 7:1 
+-1 1:-0.931034 2:-0.191597 3:-0.782381 4:-0.458333 5:0.285714 6:0.719136 7:-1 
+-1 1:-0.862069 2:-0.301338 3:-0.71994 4:0.444444 5:-0.714286 6:0.0272952 
+-1 1:-0.655172 2:-0.297984 3:-0.866199 4:-0.0972222 5:-0.142857 6:0.246378 
+-0.00194321300395835 1:-0.517241 2:-0.455946 3:-0.868246 4:-0.380952 5:-1 6:0.506886 
+-1 1:-0.655172 2:-0.480972 3:-0.845555 4:-0.277778 5:0.0714285 6:-0.187785 
+-1 1:-0.655172 2:-0.483952 3:-0.836982 4:0.444444 5:-0.785714 6:-0.519897 7:1 
+-1 1:-0.172414 2:-0.288114 3:-0.900489 4:-0.31579 5:-0.729323 6:-0.0571029 
+-1 1:-0.37931 2:-0.331432 3:-0.811018 4:0.625 5:-0.196429 6:0.277337 
+-1 1:-0.517241 2:-0.388103 3:-0.745131 4:-0.0714286 5:-0.265306 6:-0.207788 
+-1 1:-0.241379 2:-0.543316 3:-0.84407 4:-0.037037 5:0.142857 6:-0.625793 
+-1 1:-0.0344828 2:-0.481044 3:-0.900489 4:-0.174603 5:-0.387755 6:-0.39622 7:1 
+-1 1:0.241379 2:-0.516918 3:-0.900489 4:-0.393333 5:-0.485714 6:-0.379752 
+-1 1:-0.0344828 2:-0.528156 3:-0.900489 4:-0.793651 5:-0.510204 6:0.0612873 7:1 
+-1 1:-0.931034 2:-0.393626 3:-0.631499 4:0.0833333 5:-0.0357143 6:-0.341374 
+-1 1:-0.862069 2:-0.391379 3:-0.902731 4:-0.277778 5:-0.142857 6:-0.037767 7:1 
+-1 1:-0.793103 2:-0.28445 3:-0.521809 4:-0.35 5:0.285714 6:-0.180078 
+-0.09605236892975597 1:-0.172414 2:-0.520633 3:-0.900489 4:-0.429825 5:0.218045 6:-0.532098 
+-1 1:-0.241379 2:-0.402277 3:-0.843681 4:-0.157407 5:-0.285714 6:-0.0713978 7:-1 
+-1 1:-1 2:-0.19968 3:-0.598831 4:0.547619 5:0.102041 6:-0.106444 
+-1 1:-1 2:-0.486976 3:-0.740206 4:0.238095 5:-0.632653 6:0.902842 
+-1 1:-0.103448 2:-0.681568 3:-0.973066 4:-0.241667 5:-0.485714 6:0.216375 7:1 
+-1 1:0.655172 2:-0.867517 3:-0.928277 4:-0.370968 5:-0.419355 6:-0.719936 7:1 
+-1 1:-0.655172 2:-0.360159 3:-0.810736 4:-0.0972222 5:-0.571429 6:-0.596847 7:1 
+-1 1:-0.517241 2:-0.494547 3:-0.880431 4:0.392857 5:-1 6:-0.243662 7:1 
+-0.64657580806115 1:0.862069 2:-1 3:-0.969184 4:-0.362745 5:-0.394958 6:-0.726131 
+-1 1:-0.793103 2:-0.332484 3:-0.797365 4:-0.133333 5:-0.228571 6:-0.259068 
+-1 1:-0.931034 2:-0.557352 3:-0.892522 4:0.625 5:-0.678571 6:-0.350259 
+-1 1:-0.793103 2:-0.467025 3:-0.860495 4:-0.35 5:-0.228571 6:0.319392 7:1 
+-1 1:-0.655172 2:-0.368204 3:-0.900489 4:-0.0972222 5:-0.357143 6:-0.138288 
+-1 1:-0.862069 2:-0.427017 3:-0.498743 4:-0.277778 5:-0.714286 6:0.288458 
+-1 1:-0.862069 2:-0.501067 3:-0.839876 4:0.203704 5:-1 6:-0.124326 7:1 
+-0.2258421797454636 1:-1 2:-0.494694 3:-0.753129 4:0.238095 5:-1 6:0.521165 7:-1 
+-1 1:-0.241379 2:-0.543085 3:-0.863171 4:0.0833333 5:-0.714286 6:-0.373437 
+-1 1:-0.931034 2:-0.414547 3:-0.705799 4:-0.1875 5:-0.0357143 6:-0.10521 7:1 
+-1 1:-0.724138 2:-0.281936 3:-0.72282 4:-0.212121 5:-0.298701 6:-0.204346 7:1 
+-1 1:-0.655172 2:-0.436776 3:-0.724993 4:-0.0972222 5:-0.571429 6:0.300358 
+-1 1:-0.793103 2:-0.336565 3:-0.369472 4:-0.35 5:-1 6:0.249166 
+-1 1:-0.37931 2:-0.445792 3:-0.79556 4:0.21875 5:-0.0357143 6:-0.441957 7:1 
+-1 1:-0.862069 2:-0.210173 3:-0.826262 4:-0.037037 5:-0.142857 6:-0.168494 
+-1 1:-0.862069 2:-0.50748 3:-0.870644 4:-0.037037 5:-0.714286 6:0.0609867 7:1 
+-1 1:-1 2:-0.391642 3:-0.746024 4:-0.0714286 5:-0.632653 6:0.327364 7:1 
+-1 1:-0.931034 2:-0.455162 3:-0.80779 4:-0.1875 5:-0.678571 6:0.76007 7:1 
+-1 1:-0.37931 2:-0.356715 3:-0.725025 4:-0.0520833 5:-0.517857 6:-0.163341 
+-1 1:-0.310345 2:-0.442106 3:-0.837711 4:-0.107843 5:-0.697479 6:0.0491281 
+-1 1:0.0344828 2:-0.325442 3:-0.898997 4:-0.113636 5:-0.649351 6:-0.303956 
+-1 1:-0.862069 2:-0.584626 3:-0.957154 4:-0.277778 5:-0.428571 6:0.904204 
+-1 1:0.310345 2:-0.672325 3:-0.900489 4:-0.25 5:-0.10989 6:-0.48558 7:-1 
+-1 1:-0.655172 2:-0.410823 3:-0.889056 4:0.0833333 5:0.0714285 6:-0.0893431 7:-1 
+-1 1:-0.793103 2:-0.411392 3:-0.853012 4:-0.133333 5:-0.485714 6:-0.13411 7:1 
+-1 1:-0.655172 2:-0.330315 3:-0.667955 4:-0.638889 5:0.0714285 6:-0.0845123 
+-1 1:-0.862069 2:-0.421901 3:-0.809639 4:0.203704 5:-0.142857 6:-0.245588 
+-1 1:-0.241379 2:-0.512065 3:-0.797986 4:-0.037037 5:0.142857 6:-0.205094 7:-1 
+-1 1:-1 2:-0.470644 3:-0.76633 4:0.238095 5:-0.632653 6:0.381814 
+-1 1:-0.448276 2:-0.450273 3:-0.900489 4:0.0111111 5:0.0285714 6:-0.240713 
+-1 1:-0.103448 2:-0.531 3:-0.826215 4:0.0833333 5:0.157143 6:-0.187406 
+-1 1:-0.241379 2:-0.60538 3:-0.892715 4:0.203704 5:-0.428571 6:-0.55211 
+-1 1:-0.862069 2:-0.578037 3:-0.975431 4:0.203704 5:-0.428571 6:-0.165502 7:1 
+-1 1:-1 2:-0.415978 3:-0.509378 4:-0.0714286 5:-0.265306 6:-0.400697 
+-1 1:0.37931 2:-0.605814 3:-0.900489 4:-0.277778 5:-0.428571 6:-0.720478 7:1 
diff --git a/MQScoreSVM2.range b/MQScoreSVM2.range
new file mode 100644
index 0000000..a62e96a
--- /dev/null
+++ b/MQScoreSVM2.range
@@ -0,0 +1,9 @@
+x
+-1 1
+1 6 35
+2 -62.2700119 172.55780029
+3 -1.98770213 13.04912186
+4 0 0.92307693
+5 0 0.77777779
+6 0.00559854 0.97112942
+7 0 2
diff --git a/MQScoreSVM3.model b/MQScoreSVM3.model
new file mode 100644
index 0000000..7b3209e
--- /dev/null
+++ b/MQScoreSVM3.model
@@ -0,0 +1,282 @@
+svm_type c_svc
+kernel_type rbf
+gamma 0.142857
+nr_class 2
+total_sv 273
+rho -0.954745
+label 1 -1
+nr_sv 134 139
+SV
+1 1:-0.272727 2:0.0899649 3:-0.55925 4:-0.0260869 5:-0.233333 6:-0.303918 7:1 
+1 1:-0.454545 2:-0.148332 3:-0.890517 4:-0.49913 5:-0.693333 6:-0.360163 7:-1 
+1 1:-0.490909 2:-0.0506354 3:-0.619732 4:-0.0434782 5:-0.25463 6:-0.339243 7:1 
+1 1:-0.854545 2:0.0834544 3:-0.556154 4:0.0434783 5:-0.0873015 6:-0.565834 7:1 
+1 1:-0.127273 2:-0.503121 3:-0.936973 4:-0.386189 5:-0.699346 6:-0.644947 
+1 1:0.163636 2:-0.100406 3:-0.611421 4:-0.403727 5:-0.574074 6:-0.578468 7:-1 
+1 1:-0.490909 2:-0.291929 3:-0.860365 4:-0.391304 5:-0.467593 6:-0.621126 7:-1 
+1 1:0.2 2:-0.626494 3:-0.905317 4:-0.854398 5:-0.762274 6:-0.685701 7:-1 
+1 1:-0.563636 2:0.115605 3:-0.577448 4:0.422925 5:-0.186869 6:-0.197135 
+1 1:-0.6 2:-0.0945951 3:-0.826219 4:-0.10559 5:-0.391534 6:-0.134741 7:1 
+1 1:-0.745455 2:0.0514783 3:-0.826219 4:-0.0179028 5:0.0522876 6:-0.294844 7:1 
+1 1:-0.381818 2:0.294622 3:-0.405372 4:0.15942 5:-0.621399 6:-0.574228 
+1 1:-0.309091 2:0.101064 3:-0.5954 4:-0.424288 5:-0.295019 6:0.21399 7:1 
+1 1:-0.636364 2:0.0927785 3:-0.545102 4:0.147826 5:-0.744444 6:-0.209692 
+1 1:-0.2 2:0.128124 3:-0.64662 4:0.108696 5:-0.121528 6:-0.57484 7:1 
+1 1:-0.236364 2:-0.529273 3:-0.936973 4:-0.932679 5:-0.752688 6:-0.769762 7:-1 
+1 1:-0.818182 2:-0.0711355 3:-0.7052 4:0.252174 5:-0.488889 6:-0.532768 7:1 
+1 1:-0.636364 2:0.239167 3:-0.498404 4:0.147826 5:-0.488889 6:-0.170057 
+1 1:-0.6 2:0.159823 3:-0.826219 4:0.0931678 5:0.338624 6:0.161563 
+1 1:0.2 2:-0.620566 3:-0.936973 4:-0.660263 5:-0.524548 6:-0.6463 7:-1 
+1 1:-0.381818 2:-0.0359829 3:-0.611421 4:-0.690821 5:-0.8107 6:-0.689936 7:-1 
+1 1:-0.309091 2:-0.00291753 3:-0.611421 4:-0.424288 5:-0.559387 6:0.348496 7:1 
+1 1:-0.818182 2:0.0103802 3:-0.606302 4:-0.0260869 5:-0.148148 6:-0.223739 7:1 
+1 1:-0.563636 2:0.0300404 3:-0.591458 4:-0.146245 5:-0.30303 6:-0.33707 7:1 
+1 1:-0.127273 2:-0.173354 3:-0.678562 4:-0.386189 5:-0.54902 6:-0.779538 7:1 
+1 1:-0.781818 2:-0.186729 3:-1 4:-0.217391 5:0.118056 6:-0.252483 7:1 
+1 1:-0.563636 2:-0.153324 3:-0.776537 4:-0.146245 5:-0.419192 6:-0.454266 7:1 
+1 1:-0.0181818 2:-0.139491 3:-0.611421 4:-0.548766 5:-0.861862 6:-0.772751 7:-1 
+1 1:-0.709091 2:0.0405295 3:-0.552922 4:0.0434783 5:-0.858025 6:-0.290584 7:-1 
+1 1:-0.527273 2:0.212174 3:-0.277383 4:0.542533 5:-0.555556 6:-0.21165 
+1 1:0.418182 2:-0.57029 3:-0.905317 4:-0.616681 5:-0.791383 6:-0.504048 7:-1 
+1 1:-0.272727 2:-0.144955 3:-0.602385 4:-0.791304 5:-0.659259 6:-0.873601 7:-1 
+1 1:-0.963636 2:0.289556 3:-0.187726 4:0.13834 5:-0.30303 6:0.612709 
+1 1:0.381818 2:-0.593798 3:-0.905317 4:-0.695652 5:-0.733796 6:-0.521568 7:-1 
+1 1:-0.381818 2:-0.377484 3:-0.905317 4:-0.536232 5:-1 6:0.274908 7:-1 
+1 1:-0.345455 2:0.0193282 3:-0.581561 4:-0.254658 5:-0.269841 6:-0.469301 7:1 
+1 1:-0.0181818 2:-0.419755 3:-0.905317 4:-0.774383 5:-0.585586 6:-0.533297 7:-1 
+1 1:0.272727 2:-0.132596 3:-0.591458 4:-0.768116 5:-0.88642 6:-0.570232 7:-1 
+1 1:-0.890909 2:0.266417 3:-0.555322 4:0.123746 5:-0.213675 6:0.54908 
+1 1:-0.490909 2:-0.333979 3:-0.905317 4:-0.217391 5:-0.787037 6:-0.684209 7:-1 
+1 1:0.0181818 2:-0.536225 3:-0.905317 4:-0.505721 5:-0.52924 6:-0.304684 7:-1 
+1 1:-0.309091 2:0.0246884 3:-0.611421 4:-0.352324 5:-0.206897 6:-0.272258 7:1 
+1 1:-0.127273 2:-0.387605 3:-0.905317 4:-0.570332 5:-0.77451 6:-0.460648 7:-1 
+1 1:0.0909091 2:-0.575961 3:-0.905317 4:-0.895652 5:-0.808333 6:-0.656387 7:-1 
+1 1:0.0909091 2:-0.102027 3:-0.591458 4:-0.686956 5:-0.616667 6:-0.735073 7:-1 
+1 1:-0.0181818 2:-0.167028 3:-0.611421 4:-0.60517 5:-0.792793 6:-0.694207 7:-1 
+1 1:-0.309091 2:0.0412439 3:-0.591458 4:0.00749625 5:-0.206897 6:-0.237956 7:1 
+1 1:-0.890909 2:0.0751203 3:-0.591458 4:0.284281 5:-0.606838 6:-0.0229926 7:1 
+1 1:-0.0909091 2:-0.113376 3:-0.591458 4:-0.880745 5:-1 6:-0.873557 
+1 1:-0.309091 2:-0.297658 3:-0.843902 4:-0.568216 5:-0.911877 6:-0.52133 7:-1 
+1 1:-0.345455 2:-0.050942 3:-0.596014 4:-0.776398 5:-0.452381 6:-0.788429 7:-1 
+1 1:-0.381818 2:0.155907 3:-0.5954 4:-0.304348 5:-0.526749 6:0.0555391 7:1 
+1 1:-0.454545 2:0.0369172 3:-0.611421 4:-0.749565 5:-0.08 6:-0.150702 
+1 1:-0.163636 2:-0.219047 3:-0.702456 4:-0.177866 5:0.00673406 6:-0.582193 7:-1 
+1 1:-0.563636 2:-0.0202913 3:-0.912263 4:-0.0513833 5:-0.535354 6:0.169819 7:1 
+1 1:-0.127273 2:0.0487245 3:-0.60144 4:-0.0179028 5:-0.323529 6:-0.638323 7:-1 
+1 1:0.0181818 2:-0.578235 3:-0.936973 4:-0.505721 5:-0.730994 6:-0.605073 7:-1 
+1 1:-0.2 2:-0.455601 3:-0.905317 4:-0.608696 5:-0.920139 6:-1 7:-1 
+1 1:-0.490909 2:-0.286568 3:-0.860246 4:-0.565217 5:-0.680556 6:-0.869284 7:-1 
+1 1:0.454545 2:-0.697657 3:-0.905317 4:-0.833043 5:-0.948889 6:-0.773849 7:-1 
+1 1:0.236364 2:-0.291848 3:-0.678562 4:-0.193676 5:-0.593434 6:-0.781461 7:-1 
+1 1:-0.854545 2:0.0856328 3:-0.571902 4:-0.10559 5:-0.269841 6:-0.355254 7:1 
+1 1:-0.381818 2:0.0797368 3:-0.579788 4:-0.381642 5:-0.148148 6:-0.0679593 7:1 
+1 1:-0.272727 2:-0.155045 3:-0.668056 4:-0.373913 5:-0.403704 6:-0.628571 7:-1 
+0.09087011095639566 1:-0.381818 2:0.058083 3:-0.584987 4:-0.381642 5:0.135802 6:-0.0235703 7:1 
+1 1:-0.854545 2:-0.0110974 3:-0.611421 4:-0.254658 5:-0.634921 6:-0.145705 7:1 
+1 1:-0.381818 2:-0.017075 3:-0.611421 4:-0.149758 5:-0.242798 6:-0.265751 7:1 
+1 1:-0.163636 2:-0.0153693 3:-0.591458 4:-0.177866 5:-0.612795 6:-0.466029 7:-1 
+1 1:-0.672727 2:0.209886 3:-0.676371 4:-0.231121 5:0.210526 6:-0.177047 
+1 1:-0.709091 2:0.224216 3:-0.492565 4:0.275362 5:-0.148148 6:0.364779 
+1 1:-0.854545 2:-0.0389125 3:-0.661235 4:-0.254658 5:-0.269841 6:-0.750148 
+1 1:-0.890909 2:-0.0382322 3:-0.82403 4:-0.197324 5:-0.410256 6:0.176862 7:-1 
+1 1:-0.418182 2:-0.109256 3:-0.806766 4:-0.357859 5:0.179487 6:0.282263 
+1 1:-0.345455 2:0.12064 3:-0.591458 4:0.118012 5:0.00396823 6:-0.266596 
+1 1:-0.818182 2:0.238362 3:-0.542748 4:-0.0260869 5:0.362963 6:0.371868 
+1 1:-0.0181818 2:-0.501633 3:-0.905317 4:-0.717979 5:-0.585586 6:-0.628016 7:-1 
+1 1:-0.490909 2:-0.254761 3:-0.905317 4:-0.652174 5:-0.893519 6:-0.538317 7:-1 
+1 1:-0.309091 2:0.048215 3:-0.609596 4:0.00749625 5:-0.0306513 6:-0.351411 7:1 
+1 1:-0.6 2:-0.0666729 3:-0.826219 4:-0.10559 5:-0.148148 6:-0.0614172 
+1 1:-0.563636 2:-0.173841 3:-0.812676 4:-0.146245 5:0.277778 6:-0.197305 7:1 
+1 1:0.236364 2:-0.713155 3:-0.936973 4:-0.905138 5:-1 6:-0.913983 
+1 1:-0.381818 2:0.152434 3:-0.547043 4:0.31401 5:-0.432099 6:-0.0954 
+1 1:-0.381818 2:-0.0557794 3:-0.587214 4:-0.536232 5:-0.526749 6:-0.548287 7:-1 
+1 1:-0.0181818 2:-0.111359 3:-0.587214 4:-0.548766 5:-0.792793 6:-0.707813 7:-1 
+1 1:-0.709091 2:-0.0426993 3:-0.591458 4:-0.42029 5:-0.858025 6:-0.675295 
+1 1:-0.818182 2:0.265098 3:-0.537694 4:-0.165217 5:0.192593 6:0.304924 
+1 1:-0.672727 2:-0.0963553 3:-0.75851 4:-0.121281 5:-0.730994 6:0.0472178 7:1 
+1 1:-0.309091 2:-0.0503368 3:-0.591458 4:-0.208396 5:-0.118774 6:-0.370396 7:1 
+1 1:-0.418182 2:0.233661 3:-0.257027 4:0.0434783 5:-0.213675 6:-0.492917 7:1 
+1 1:0.0909091 2:-0.173653 3:-0.611421 4:-0.478261 5:-0.872222 6:-0.698216 7:-1 
+1 1:-0.527273 2:-0.0688602 3:-0.587214 4:-0.637051 5:-1 6:-0.744235 7:-1 
+1 1:-0.6 2:-0.124142 3:-0.826219 4:-0.10559 5:-0.148148 6:-0.0407903 7:1 
+1 1:-0.490909 2:-0.236045 3:-0.913266 4:-0.826087 5:-0.25463 6:-0.605505 
+1 1:0.309091 2:-0.790448 3:-0.905317 4:-1 5:-0.888889 6:-0.632747 7:-1 
+1 1:-0.163636 2:0.0221065 3:-0.591458 4:-0.114624 5:0.00673406 6:-0.308139 7:1 
+1 1:0.0181818 2:-0.0518016 3:-0.591458 4:-0.560641 5:-0.932749 6:-0.742613 7:-1 
+1 1:-0.127273 2:-0.421377 3:-0.905317 4:-0.754476 5:-0.849673 6:-0.754287 7:-1 
+1 1:-0.854545 2:-0.092065 3:-0.612299 4:-0.403727 5:-0.634921 6:-0.614144 
+0.8219388582076175 1:-0.818182 2:0.264198 3:-0.316984 4:0.252174 5:0.192593 6:0.130613 
+1 1:-0.490909 2:-0.145743 3:-0.795196 4:-0.217391 5:-0.574074 6:-0.641057 7:-1 
+1 1:-0.0181818 2:-0.0894404 3:-0.611421 4:-0.548766 5:-0.585586 6:-0.404195 7:-1 
+1 1:1 2:-1 3:-0.905317 4:-0.935786 5:-0.921368 6:-0.745086 7:-1 
+1 1:-0.709091 2:-0.14343 3:-0.797298 4:-0.42029 5:-0.432099 6:-0.400887 7:1 
+1 1:0.563636 2:-0.86391 3:-0.905317 4:-1 5:-0.855346 6:-0.916095 7:-1 
+1 1:-0.672727 2:0.049684 3:-0.591458 4:-0.340961 5:-0.327485 6:-0.385873 7:1 
+1 1:-0.927273 2:0.0955423 3:-0.60144 4:0.0434783 5:-0.574074 6:-0.140094 7:1 
+1 1:-0.2 2:0.0159662 3:-0.5956 4:0.369565 5:-0.28125 6:-0.597218 7:1 
+1 1:-0.236364 2:-0.0456932 3:-0.591458 4:-0.663394 5:-0.587814 6:-0.288249 
+1 1:-0.563636 2:0.0163316 3:-0.591458 4:-0.241107 5:-0.535354 6:-0.319443 7:1 
+1 1:-0.454545 2:-0.385983 3:-0.905317 4:-0.916522 5:-0.693333 6:-0.672395 7:-1 
+1 1:-0.781818 2:0.122266 3:-0.591458 4:-0.217391 5:-0.520833 6:-0.175818 7:1 
+1 1:-0.381818 2:0.0776673 3:-0.591458 4:0.15942 5:-0.526749 6:-0.360997 
+1 1:-0.563636 2:0.150934 3:-0.77094 4:-0.241107 5:0.393939 6:0.483988 7:-1 
+1 1:0.0909091 2:-0.651052 3:-0.921145 4:-1 5:-0.936111 6:-0.946851 7:-1 
+1 1:-0.818182 2:0.30899 3:-0.33703 4:-0.0260869 5:0.362963 6:0.273405 
+1 1:0.454545 2:-0.792387 3:-0.905317 4:-0.874783 5:-0.897778 6:-0.817506 7:-1 
+1 1:-0.672727 2:-0.0224593 3:-0.826219 4:-0.231121 5:-0.327485 6:-0.114133 7:1 
+1 1:-0.490909 2:0.359379 3:-0.575952 4:-0.217391 5:-0.680556 6:0.294065 
+1 1:-0.854545 2:0.0704604 3:-0.570923 4:-0.254658 5:-0.0873015 6:-0.44293 
+1 1:-0.527273 2:0.196481 3:-0.581406 4:0.0888469 5:0.222222 6:0.0283912 
+1 1:0.0909091 2:-0.576766 3:-0.905317 4:-0.791304 5:-0.936111 6:-0.547386 7:-1 
+1 1:-0.454545 2:-0.38071 3:-0.936973 4:-0.833043 5:-0.693333 6:-0.910928 7:-1 
+1 1:0.0909091 2:-0.11038 3:-0.591458 4:-0.530435 5:-0.744444 6:-0.605258 7:-1 
+1 1:-0.236364 2:-0.24063 3:-0.936973 4:-0.192146 5:-0.175627 6:-0.272347 
+1 1:-0.454545 2:0.00996574 3:-0.81553 4:0.168696 5:0.124444 6:0.10609 
+1 1:-0.309091 2:-0.401403 3:-0.905317 4:-0.856072 5:-0.559387 6:-0.549036 7:-1 
+1 1:0.2 2:-0.2221 3:-0.635208 4:-0.611729 5:-0.762274 6:-0.785483 7:-1 
+1 1:0.0181818 2:-0.101844 3:-0.611421 4:-0.670481 5:-0.663743 6:-0.474902 7:-1 
+1 1:-0.2 2:-0.159109 3:-0.591458 4:-0.478261 5:-0.760417 6:-0.504889 7:-1 
+1 1:0.127273 2:-0.167568 3:-0.611421 4:-0.541888 5:-0.688347 6:-0.777346 7:-1 
+1 1:-0.636364 2:-0.0458581 3:-0.764868 4:-0.0608696 5:-0.488889 6:-0.283288 7:1 
+1 1:0.0545455 2:-0.128377 3:-0.587214 4:-0.732441 5:-0.672365 6:-0.664909 7:-1 
+1 1:-0.781818 2:-0.115252 3:-0.695252 4:0.173913 5:-0.361111 6:-0.580836 7:-1 
+1 1:-0.381818 2:0.140729 3:-0.537694 4:0.00483096 5:-0.90535 6:-0.0959735 
+-1 1:-0.418182 2:0.0246992 3:-0.611421 4:0.0434783 5:-0.017094 6:-0.62525 
+-1 1:-0.0181818 2:-0.42518 3:-0.905317 4:-0.379553 5:-0.792793 6:-0.631876 7:-1 
+-0.8415080727149786 1:-0.527273 2:-0.0638818 3:-0.611421 4:-0.455577 5:-0.444444 6:-0.660896 7:1 
+-1 1:-0.527273 2:0.0671946 3:-0.611421 4:-0.0926276 5:-0.333333 6:-0.405899 
+-1 1:-0.527273 2:0.00797 3:-0.681303 4:-0.00189033 5:-0.333333 6:-0.667839 
+-1 1:-0.709091 2:0.00571178 3:-0.611421 4:-0.304348 5:-0.290123 6:-0.400502 7:1 
+-1 1:-0.854545 2:-0.0139259 3:-0.60144 4:-0.10559 5:-0.634921 6:-0.463205 7:1 
+-1 1:-0.0545455 2:-0.612309 3:-0.936973 4:-0.942029 5:-1 6:-0.918739 7:-1 
+-1 1:-0.418182 2:-0.0729808 3:-0.826219 4:-0.197324 5:-0.311966 6:-0.149266 7:-1 
+-1 1:-0.672727 2:0.00858909 3:-0.611421 4:-0.0114416 5:0.0760234 6:-0.543789 7:1 
+-1 1:0.0545455 2:-0.642649 3:-0.936973 4:-0.839465 5:-0.803419 6:-0.450206 7:-1 
+-1 1:0.163636 2:-0.53689 3:-0.905317 4:-0.652174 5:-0.452381 6:-0.752031 7:-1 
+-1 1:-0.818182 2:0.0618789 3:-0.611421 4:-0.582609 5:-0.488889 6:0.301099 
+-1 1:0.309091 2:-0.712624 3:-0.927801 4:-1 5:-0.833333 6:-0.507934 7:-1 
+-1 1:0.236364 2:-0.526204 3:-0.905317 4:-0.667984 5:-0.709596 6:-0.489389 7:-1 
+-1 1:-0.563636 2:-0.022088 3:-0.591458 4:-0.146245 5:-0.767677 6:-0.681754 7:1 
+-1 1:-0.0909091 2:-0.496776 3:-0.905317 4:-0.880745 5:-0.926984 6:-0.767767 7:-1 
+-1 1:-0.890909 2:0.00483222 3:-0.611636 4:-0.197324 5:-0.017094 6:-0.0546463 
+-0.7494969301875734 1:-0.854545 2:0.163124 3:-0.60144 4:-0.10559 5:-0.0873015 6:-0.462605 7:-1 
+-1 1:-0.0545455 2:-0.405538 3:-0.936973 4:-0.362319 5:-0.503086 6:-0.631079 7:-1 
+-1 1:-0.0545455 2:-0.477893 3:-0.886726 4:-0.478261 5:-0.716049 6:-0.632654 7:-1 
+-1 1:-0.381818 2:0.0339564 3:-0.825444 4:-0.227053 5:-0.053498 6:0.300788 7:-1 
+-1 1:0.236364 2:-0.0857729 3:-0.60144 4:-0.667984 5:-0.477273 6:-0.543342 
+-1 1:-0.272727 2:-0.443833 3:-0.905317 4:-0.791304 5:-1 6:-0.868337 7:-1 
+-1 1:-0.709091 2:-0.0400854 3:-0.826219 4:0.15942 5:-0.716049 6:-0.102501 7:-1 
+-1 1:-0.781818 2:-0.0221854 3:-0.650554 4:-0.347826 5:-0.520833 6:-0.196758 7:1 
+-1 1:-0.418182 2:-0.0599039 3:-0.650441 4:-0.117057 5:-0.115385 6:-0.653608 7:1 
+-1 1:-0.418182 2:-0.0606882 3:-0.642602 4:0.0434783 5:-0.508547 6:0.427144 
+-0.3512983867561833 1:-0.0909091 2:-0.488606 3:-0.936973 4:-0.582609 5:-0.634921 6:-0.84491 7:-1 
+-1 1:-0.127273 2:-0.567727 3:-0.936973 4:-1 5:-0.849673 6:-0.817741 7:-1 
+-1 1:-0.636364 2:-0.0148505 3:-0.542748 4:-0.165217 5:-0.233333 6:-0.488847 7:1 
+-1 1:-0.2 2:-0.0997141 3:-0.591458 4:-0.478261 5:-0.760417 6:-0.597278 7:-1 
+-1 1:-0.163636 2:-0.0900011 3:-0.611421 4:-0.683794 5:-0.612795 6:-0.350616 7:-1 
+-1 1:0.0909091 2:-0.644727 3:-0.936973 4:-1 5:-0.808333 6:-0.701207 7:-1 
+-1 1:-0.927273 2:0.065699 3:-0.549035 4:-0.478261 5:0.0648148 6:0.163265 
+-1 1:-0.2 2:-0.167746 3:-0.611421 4:-0.673913 5:-0.680556 6:-0.707054 7:-1 
+-1 1:-0.2 2:-0.13371 3:-0.613205 4:-0.543478 5:-0.520833 6:-0.503354 7:1 
+-1 1:-0.781818 2:-0.0455872 3:-0.599737 4:-0.347826 5:-0.0416667 6:-0.668734 7:1 
+-1 1:-0.6 2:-0.082194 3:-0.700873 4:-0.10559 5:-0.026455 6:-0.619646 
+-1 1:-0.709091 2:-0.170798 3:-0.864976 4:-0.42029 5:-0.148148 6:0.246331 
+-1 1:0.0545455 2:-0.532674 3:-0.905317 4:-0.839465 5:-0.803419 6:-0.299747 7:-1 
+-1 1:-0.454545 2:0.202734 3:-0.685363 4:0.168696 5:0.124444 6:-0.358694 
+-1 1:-0.236364 2:-0.0876724 3:-0.611421 4:-0.528752 5:-0.752688 6:-0.391895 7:1 
+-1 1:-0.672727 2:-0.0794497 3:-0.678562 4:-0.340961 5:-0.596491 6:-0.394035 7:1 
+-1 1:-0.2 2:-0.181021 3:-0.611421 4:-0.478261 5:-0.361111 6:-0.783595 7:1 
+-0.2088601833578218 1:-0.163636 2:-0.0591844 3:-0.618165 4:-0.683794 5:-0.457912 6:-0.543508 7:-1 
+-1 1:0.0909091 2:-0.524417 3:-0.905317 4:-0.269565 5:-0.361111 6:-0.617018 7:-1 
+-0.2444993380120906 1:-0.490909 2:-0.0527465 3:-0.611421 4:-0.565217 5:-0.680556 6:-0.405953 7:1 
+-1 1:-0.818182 2:0.122344 3:-0.506547 4:-0.0260869 5:-0.148148 6:0.154165 7:-1 
+-1 1:-0.745455 2:0.00956307 3:-0.627254 4:-0.263427 5:-0.0980392 6:-0.430725 7:1 
+-1 1:0.163636 2:-0.231461 3:-0.611421 4:-0.602484 5:-0.574074 6:-0.573201 7:-1 
+-1 1:-0.381818 2:0.0543607 3:-0.611421 4:-0.0724637 5:-0.526749 6:-0.38618 
+-1 1:-0.236364 2:-0.10504 3:-0.611421 4:-0.663394 5:-0.917563 6:-0.707793 7:-1 
+-1 1:-0.927273 2:0.101661 3:-0.330395 4:0.217391 5:-0.361111 6:-0.278765 
+-1 1:-0.454545 2:-0.269005 3:-0.855685 4:-0.165217 5:-0.693333 6:0.282706 7:1 
+-1 1:-0.672727 2:-0.165249 3:-0.826219 4:-0.121281 5:-0.596491 6:0.135902 
+-1 1:0.236364 2:-0.571522 3:-0.929989 4:-0.810277 5:-0.593434 6:-0.639986 7:-1 
+-1 1:-0.0181818 2:-0.101771 3:-0.613178 4:-0.548766 5:-0.585586 6:-0.332644 7:-1 
+-1 1:-0.927273 2:0.0470043 3:-0.611421 4:-0.130435 5:-0.148148 6:0.390503 
+-1 1:-0.781818 2:0.109517 3:-0.429301 4:0.173913 5:-0.520833 6:-0.496133 
+-1 1:-0.636364 2:-0.359085 3:-0.936973 4:-0.478261 5:-0.233333 6:-0.453834 7:1 
+-1 1:-0.454545 2:0.0248246 3:-0.611421 4:0.168696 5:-0.182222 6:-0.622496 
+-1 1:-0.890909 2:-0.0157021 3:-0.826219 4:-0.0367893 5:-0.017094 6:0.313367 7:-1 
+-1 1:0.0909091 2:-0.582931 3:-0.936973 4:-0.73913 5:-0.744444 6:-0.765332 7:-1 
+-1 1:-0.309091 2:-0.040331 3:-0.613178 4:-0.28036 5:-0.295019 6:-0.61389 
+-1 1:-0.672727 2:-0.0809354 3:-0.630219 4:-0.340961 5:-0.461988 6:-0.0198897 
+-1 1:-0.781818 2:0.133051 3:-0.790454 4:-0.217391 5:-0.0416667 6:0.236305 7:-1 
+-1 1:0.0181818 2:-0.132608 3:-0.611421 4:-0.78032 5:-0.798246 6:-0.717319 7:-1 
+-1 1:-0.781818 2:0.0225945 3:-0.561739 4:0.0434783 5:-0.361111 6:-0.440202 
+-0.6494670709789474 1:-0.636364 2:0.210515 3:-0.591458 4:-0.373913 5:-0.488889 6:0.00938966 7:-1 
+-1 1:-0.709091 2:-0.0227442 3:-0.591458 4:-0.42029 5:-0.716049 6:0.0218606 7:1 
+-0.01740323773261104 1:-0.709091 2:-0.105364 3:-0.674621 4:0.0434783 5:-0.716049 6:-0.375908 7:-1 
+-1 1:-0.127273 2:-0.157604 3:-0.619244 4:-0.508951 5:-0.699346 6:-0.635428 7:-1 
+-1 1:-0.890909 2:0.0469361 3:-0.407931 4:0.284281 5:-0.606838 6:-0.506912 7:1 
+-1 1:-0.2 2:-0.232313 3:-0.881596 4:-0.478261 5:-0.680556 6:-0.764303 7:-1 
+-1 1:0.0909091 2:-0.550348 3:-0.934601 4:-0.582609 5:-0.616667 6:-0.396859 7:-1 
+-1 1:-0.527273 2:0.0596615 3:-0.591458 4:-0.274102 5:-0.444444 6:-0.250664 
+-1 1:-0.381818 2:0.0936253 3:-0.587214 4:-0.381642 5:-0.716049 6:-0.52036 7:-1 
+-1 1:-0.490909 2:-0.050703 3:-0.707439 4:-0.130435 5:-0.574074 6:-0.57135 7:1 
+-1 1:-0.236364 2:0.0243524 3:-0.591458 4:-0.528752 5:-0.752688 6:-0.407722 7:1 
+-1 1:-0.781818 2:-0.0646515 3:-0.826219 4:-0.347826 5:0.118056 6:0.529903 7:-1 
+-1 1:-0.2 2:-0.0601066 3:-0.589336 4:-0.608696 5:-0.440972 6:-0.629401 7:-1 
+-1 1:-0.309091 2:-0.111427 3:-0.649367 4:-0.352324 5:0.145594 6:-0.778008 
+-1 1:-0.0181818 2:-0.559929 3:-0.905317 4:-0.548766 5:-0.792793 6:-0.651412 7:-1 
+-1 1:-0.0181818 2:-0.120456 3:-0.611421 4:-0.548766 5:-0.585586 6:-0.772799 7:-1 
+-1 1:-0.272727 2:0.131706 3:-0.591472 4:-0.373913 5:-0.574074 6:-0.206779 7:1 
+-1 1:0.0181818 2:-0.187157 3:-0.611421 4:-0.670481 5:-0.596491 6:-0.77588 7:-1 
+-1 1:-0.854545 2:0.017969 3:-0.611421 4:-0.10559 5:-0.81746 6:0.401227 
+-1 1:-0.163636 2:-0.055253 3:-0.660318 4:-0.43083 5:-0.457912 6:-0.21914 
+-1 1:-0.345455 2:-0.176929 3:-0.734192 4:-0.0310559 5:-0.72619 6:-0.795786 7:1 
+-1 1:-0.272727 2:-0.12477 3:-0.702316 4:-0.304348 5:-0.403704 6:-0.716498 7:1 
+-1 1:-0.927273 2:0.0457185 3:-0.564576 4:-0.130435 5:-0.148148 6:-0.066251 7:1 
+-0.4671105187941058 1:-0.309091 2:-0.163095 3:-0.613823 4:-0.28036 5:-0.206897 6:-0.629506 7:-1 
+-1 1:-0.672727 2:0.209898 3:-0.611421 4:-0.121281 5:-0.192982 6:-0.0768508 
+-0.01182579585874976 1:-0.0909091 2:-0.106798 3:-0.611421 4:-0.403727 5:-0.342857 6:-0.588994 
+-1 1:-0.854545 2:-0.0165895 3:-0.660564 4:-0.403727 5:-0.634921 6:-0.168617 7:1 
+-1 1:-0.636364 2:0.0707515 3:-0.623186 4:0.356522 5:-0.616667 6:-0.204432 
+-1 1:-0.272727 2:-0.0206975 3:-0.611421 4:-0.513043 5:-0.659259 6:-0.284838 7:1 
+-1 1:-0.781818 2:-0.0197077 3:-0.591458 4:0.173913 5:-0.520833 6:-0.5623 
+-1 1:-0.454545 2:-0.249197 3:-0.826219 4:0.0852174 5:-0.488889 6:-0.576205 7:-1 
+-1 1:-0.709091 2:0.026341 3:-0.826219 4:-0.304348 5:-0.716049 6:0.759555 
+-1 1:-0.781818 2:-0.0885713 3:-0.826219 4:0.0434783 5:-0.361111 6:-0.280461 
+-1 1:0.0545455 2:-0.467114 3:-0.905317 4:-0.785953 5:-0.606838 6:-0.762643 7:-1 
+-1 1:-0.781818 2:0.0130292 3:-0.618827 4:-0.217391 5:-0.840278 6:0.388225 
+-1 1:0.527273 2:-0.804917 3:-0.936973 4:-0.759197 5:-0.803419 6:-0.625366 7:-1 
+-1 1:-0.563636 2:-0.118494 3:-0.68463 4:-0.146245 5:-0.651515 6:-0.363501 7:1 
+-1 1:-0.927273 2:0.0696857 3:-0.67799 4:0.0434783 5:0.0648148 6:-0.163748 
+-1 1:0.127273 2:-0.585144 3:-0.905317 4:-0.592789 5:-0.688347 6:-0.751939 7:-1 
+-1 1:-0.0181818 2:-0.551865 3:-0.905317 4:-0.943596 5:-0.930931 6:-0.657208 7:-1 
+-1 1:-0.563636 2:-0.119306 3:-0.620539 4:-0.146245 5:-0.419192 6:-0.710607 7:1 
+-1 1:-0.345455 2:-0.0397587 3:-0.620817 4:-0.254658 5:-0.178571 6:-0.696565 
+-1 1:0.0545455 2:-0.319987 3:-0.826219 4:-0.518395 5:-0.410256 6:-0.612174 7:-1 
+-1 1:-0.854545 2:0.0353539 3:-0.471227 4:-0.552795 5:0.0952381 6:-0.244297 
+-1 1:-0.0909091 2:-0.491882 3:-0.905317 4:-0.880745 5:-0.853968 6:-0.789059 7:-1 
+-1 1:0.490909 2:-0.8078 3:-0.905317 4:-1 5:-0.899782 6:-0.818685 7:-1 
+-1 1:-0.236364 2:-0.123518 3:-0.611421 4:-0.326788 5:-0.670251 6:-0.527405 7:-1 
+-1 1:-0.2 2:-0.127267 3:-0.611421 4:-0.543478 5:-0.680556 6:-0.48972 7:1 
+-1 1:-0.890909 2:-0.00932866 3:-0.826219 4:-0.357859 5:-0.213675 6:0.281728 7:1 
+-1 1:0.2 2:-0.513874 3:-0.889033 4:-0.660263 5:-0.643411 6:-0.63252 7:-1 
+-1 1:-0.163636 2:-0.122697 3:-0.591458 4:-0.620553 5:-0.845118 6:-0.617291 7:-1 
+-1 1:-0.127273 2:-0.130189 3:-0.591458 4:-0.508951 5:-0.849673 6:-0.69219 7:-1 
+-1 1:-0.490909 2:0.103316 3:-0.60144 4:-0.0434782 5:-0.680556 6:-0.489871 
+-1 1:-0.818182 2:-0.0500891 3:-0.591458 4:-0.304348 5:-0.318518 6:-0.5543 7:1 
+-1 1:-0.309091 2:-0.0364527 3:-0.591458 4:-0.712144 5:-0.206897 6:-0.438237 7:1 
+-1 1:-0.490909 2:-0.0807608 3:-0.611421 4:-0.391304 5:-0.574074 6:-0.604395 7:1 
+-0.3713394347709519 1:-0.0545455 2:-0.418901 3:-0.905317 4:-0.478261 5:-0.503086 6:-0.61105 7:-1 
+-1 1:-0.127273 2:-0.484589 3:-0.936973 4:-0.938619 5:-1 6:-0.677746 7:-1 
+-1 1:-0.818182 2:0.168407 3:-0.555322 4:-0.0260869 5:0.0222223 6:0.180617 7:-1 
+-1 1:-0.563636 2:-0.0494529 3:-0.591458 4:-0.43083 5:-0.186869 6:-0.759459 7:1 
+-1 1:-0.672727 2:-0.00816984 3:-0.591458 4:0.0983982 5:-0.192982 6:-0.160027 
+-1 1:-0.0181818 2:-0.14728 3:-0.611421 4:-0.323149 5:-0.447447 6:-0.685109 
+-1 1:-0.0181818 2:-0.559724 3:-0.936973 4:-0.661575 5:-0.723724 6:-0.802932 7:-1 
+-1 1:-0.854545 2:0.0184649 3:-0.60144 4:-0.10559 5:-0.81746 6:0.335871 
+-1 1:-0.0181818 2:-0.0845103 3:-0.591458 4:-0.60517 5:-0.516516 6:-0.71556 7:1 
+-1 1:0.0181818 2:-0.531072 3:-0.905317 4:-0.78032 5:-0.865497 6:-0.627418 7:-1 
+-1 1:-0.127273 2:0.179987 3:-0.611421 4:-0.44757 5:-0.699346 6:-0.367645 7:-1 
+-1 1:-0.381818 2:-0.00539883 3:-0.611421 4:-0.149758 5:0.135802 6:-0.694783 7:1 
+-1 1:-0.345455 2:0.0139488 3:-0.611421 4:-0.180124 5:-0.361111 6:-0.55566 
+-1 1:0.0909091 2:-0.135784 3:-0.591458 4:-0.791304 5:-0.297222 6:-0.646688 7:-1 
diff --git a/MQScoreSVM3.range b/MQScoreSVM3.range
new file mode 100644
index 0000000..e4e1a5e
--- /dev/null
+++ b/MQScoreSVM3.range
@@ -0,0 +1,9 @@
+x
+-1 1
+1 9 64
+2 -117.65477753 122.49903107
+3 -1.90801275 5.78528643
+4 0 0.95833331
+5 0 0.78260869
+6 0.02718957 0.96912074
+7 0 2
diff --git a/MS2DB.c b/MS2DB.c
new file mode 100644
index 0000000..3fc2d7f
--- /dev/null
+++ b/MS2DB.c
@@ -0,0 +1,688 @@
+//Title:          MS2DB.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+// Code to parse databases in MS2DB format.
+
+#include "CMemLeak.h"
+#include "MS2DB.h"
+#include "Spliced.h"
+#include "expat.h"
+#include "Errors.h"
+
+#define MS2DB_BUFFER_SIZE 102400
+
+// Macro for basic error-checking.  Assumes Cursor is set.  If the given 
+// expression isn't true, we set the error flag and bail out of our current
+// function.  (When Cursor->ErrorFlag is set, all our callback functions
+// will terminate immediately)
+#define XML_ASSERT(expr) \
+    if (!(expr)) \
+    {\
+        Cursor->ErrorFlag = 1;\
+        REPORT_ERROR_S(25, #expr);\
+        return;\
+    }
+
+#define XML_ASSERT_RETVAL(expr) \
+    if (!(expr)) \
+    {\
+        Cursor->ErrorFlag = 1;\
+        REPORT_ERROR_S(25, #expr);\
+        return 0;\
+    }
+
+// MS2ParseState tells us which tag we are currently inside.  
+// The allowed "moves" (from tags to children) are those listed 
+// in the XML schema.  However, in the interest of extensibility,
+// we simply *ignore* any tags we aren't expecting to see.
+typedef enum MS2ParseState
+{
+    evMS2DBNone = 0,
+    evMS2DBDatabase,
+    evMS2DBGene,
+    evMS2DBGeneLocus,
+    evMS2DBGeneNotes,
+    evMS2DBExon,
+    evMS2DBExonSequence,
+    evMS2DBExonExtends,
+    evMS2DBExonLinkFrom,
+    evMS2DBExonMod,
+    evMS2DBExonModCrossReference,
+    evMS2DBGeneCrossReference
+} MS2ParseState;
+
+typedef struct MS2ParseCursor
+{
+    SearchInfo* Info;
+    int State;
+    GeneStruct* CurrentGene;
+    ExonStruct* CurrentExon;
+    int CurrentExonIndex;
+    int CurrentLinkIndex;
+    int CurrentExonSequenceIndex;
+    int ErrorFlag;
+    TrieNode* Root;
+    XML_Parser Parser;
+    int DBNumber;
+} MS2ParseCursor;
+
+// Free an MS2ParseCursor, including its attached gene.
+void FreeMS2ParseCursor(MS2ParseCursor* Cursor)
+{
+    if (!Cursor)
+    {
+        return;
+    }
+    if (Cursor->CurrentGene)
+    {
+        FreeGene(Cursor->CurrentGene);
+        Cursor->CurrentGene = NULL;
+    }
+    SafeFree(Cursor);
+}
+
+// expat callback: Handle character data in the body of a tag.
+void MS2CharacterDataHandler(void* UserData, const XML_Char* String, int Length)
+{
+    MS2ParseCursor* Cursor;
+    int NewLength;
+    //
+    Cursor = (MS2ParseCursor*)UserData;
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    switch (Cursor->State)
+    {
+    case evMS2DBExonSequence:
+        // Incorporate this sequence into the exon sequence.
+        XML_ASSERT(Cursor->CurrentExon);
+        //XML_ASSERT(Cursor->CurrentExon->Sequence);
+        if (!Cursor->CurrentExon->Sequence)
+        {
+            printf("* Warning: No sequence!?\n");
+        }
+        NewLength = strlen(Cursor->CurrentExon->Sequence) + Length;
+        if (NewLength > Cursor->CurrentExon->Length)
+        {
+            REPORT_ERROR_IS(29, Cursor->CurrentExonIndex, Cursor->CurrentGene->Name);
+            Cursor->ErrorFlag = 1;
+            return;
+        }
+        strncat(Cursor->CurrentExon->Sequence, String, Length);
+        break;
+    default:
+        break;
+    }
+}
+
+// Parse attributes of a Gene tag.  
+void ParseGeneAttributes(MS2ParseCursor* Cursor, const char** Attributes)
+{
+    int AttributeIndex;
+    const char* Name;
+    const char* Value;
+    //
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+
+    XML_ASSERT(Cursor->CurrentGene);
+
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!CompareStrings(Name, "Name"))
+        {
+            strncpy(Cursor->CurrentGene->Name, Value, GENE_NAME_LENGTH);
+        }
+        else if (!CompareStrings(Name, "ExonCount"))
+        {
+            Cursor->CurrentGene->ExonCount = atoi(Value);
+        }
+        else if (GlobalOptions->XMLStrictFlag)
+        {
+            REPORT_WARNING_ISS(28, XML_GetCurrentLineNumber(Cursor->Parser), Name, "Gene");
+        }
+	else if(!CompareStrings(Name,"Chromosome"))
+	  {
+	    Cursor->CurrentGene->ChromosomeNumber = atoi(Value);
+	  }
+	else if(!CompareStrings(Name,"ForwardFlag"))
+	  {
+	    Cursor->CurrentGene->ForwardFlag = atoi(Value);
+	  }
+    }
+    // Allocate exons:
+    XML_ASSERT(Cursor->CurrentGene->ExonCount >= 1 && Cursor->CurrentGene->ExonCount <= 10000);
+    Cursor->CurrentGene->Exons = (ExonStruct*)calloc(Cursor->CurrentGene->ExonCount, sizeof(ExonStruct));
+}
+
+// Parse attributes of a Locus tag.  
+void ParseLocusAttributes(MS2ParseCursor* Cursor, const char** Attributes)
+{
+    int AttributeIndex;
+    const char* Name;
+    const char* Value;
+    //
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+
+    XML_ASSERT(Cursor->CurrentGene);
+
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!CompareStrings(Name, "chromosome"))
+        {
+            Cursor->CurrentGene->ChromosomeNumber = atoi(Value);
+        }
+        else if (!CompareStrings(Name, "ForwardFlag"))
+        {
+            Cursor->CurrentGene->ForwardFlag = atoi(Value);
+        }
+        else if (GlobalOptions->XMLStrictFlag)
+        {
+            REPORT_WARNING_ISS(28, XML_GetCurrentLineNumber(Cursor->Parser), Name, "Locus");
+        }
+    }
+}
+
+// Parse attributes of an LinkFrom tag.  
+// <LinkFrom Index="0" Score="3.14" AA="G" />
+// If ExtendsFlag is true, this exon EXTENDS the previous one (no splicing required)
+void ParseLinkFromAttributes(MS2ParseCursor* Cursor, const char** Attributes, int ExtendsFlag)
+{
+    int AttributeIndex;
+    const char* Name;
+    const char* Value;
+    char EdgeAA = '\0';
+    ExonEdge* Edge = NULL;
+    int BackExonIndex;
+    ExonStruct* Exon;
+    //
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    Exon = Cursor->CurrentExon;
+    XML_ASSERT(Exon);
+
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!CompareStrings(Name, "Index"))
+        {
+            BackExonIndex = atoi(Value);
+            XML_ASSERT(BackExonIndex >= 0 && BackExonIndex < Cursor->CurrentGene->ExonCount);
+            Edge = (ExonEdge*)calloc(1, sizeof(ExonEdge));
+            if (ExtendsFlag)
+            {
+                Edge->Power = 0;
+            }
+            else
+            {
+                Edge->Power = 1;
+            }
+            Edge->Exon = Cursor->CurrentGene->Exons + BackExonIndex;
+            Edge->Source = Exon;
+            // Insert the exon into the list:
+            if (Exon->BackEdgeTail)
+            {
+                Exon->BackEdgeTail->Next = Edge;
+            }
+            else
+            {
+                Exon->BackEdgeHead = Edge;
+            }
+            Exon->BackEdgeTail = Edge;
+            Exon->BackEdgeCount++;
+            Edge->Exon->ForwardEdgeCount++;
+        }
+        else if (!CompareStrings(Name, "AA"))
+        {
+            EdgeAA = Value[0];
+        }
+        else if (GlobalOptions->XMLStrictFlag)
+        {
+            REPORT_WARNING_ISS(28, XML_GetCurrentLineNumber(Cursor->Parser), Name, "Link");
+        }
+    }
+    if (Edge)
+    {
+        Edge->AA = EdgeAA;
+    }
+}
+// Parse attributes of an Exon tag.  
+void ParseExonAttributes(MS2ParseCursor* Cursor, const char** Attributes)
+{
+    int AttributeIndex;
+    const char* Name;
+    const char* Value;
+    //
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+
+    XML_ASSERT(Cursor->CurrentGene);
+
+    Cursor->CurrentExonIndex = -1; // invalidate it; the attributes will fix it
+    Cursor->CurrentExon = NULL;
+
+    // First, loop through the attributes to get the index, so we can point at the correct exon:
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!CompareStrings(Name, "index"))
+        {
+            Cursor->CurrentExonIndex = atoi(Value);
+        }
+    }
+    XML_ASSERT(Cursor->CurrentExonIndex >= 0 && Cursor->CurrentExonIndex < Cursor->CurrentGene->ExonCount);
+
+    Cursor->CurrentExon = Cursor->CurrentGene->Exons + Cursor->CurrentExonIndex;
+    Cursor->CurrentExon->Gene = Cursor->CurrentGene;
+    // Initialize the exon START and END to -1 (that is, not on a known chromosome):
+    Cursor->CurrentExon->Start = -1;
+    Cursor->CurrentExon->End = -1;
+    Cursor->CurrentExon->Index = Cursor->CurrentExonIndex;
+
+    // Now loop through and read attribute values:
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!CompareStrings(Name, "Start"))
+        {
+            Cursor->CurrentExon->Start = atoi(Value);
+        }
+        else if (!CompareStrings(Name, "End"))
+        {
+            Cursor->CurrentExon->End = atoi(Value);
+        }
+        else if (!CompareStrings(Name, "Prefix"))
+        {
+            strncpy(Cursor->CurrentExon->Prefix, Value, 2);
+        }
+        else if (!CompareStrings(Name, "Suffix"))
+        {
+            strncpy(Cursor->CurrentExon->Suffix, Value, 2);
+        }
+        else if (!CompareStrings(Name, "Index"))
+        {
+            ;
+        }
+        else if (GlobalOptions->XMLStrictFlag)
+        {
+            REPORT_WARNING_ISS(28, XML_GetCurrentLineNumber(Cursor->Parser), Name, "Exon");
+        }
+    }
+    XML_ASSERT(Cursor->CurrentExonIndex >= 0 && Cursor->CurrentExonIndex < Cursor->CurrentGene->ExonCount);
+}
+
+// Parse attributes of an ExonSequence tag.  
+void ParseExonSequenceAttributes(MS2ParseCursor* Cursor, const char** Attributes)
+{
+    int AttributeIndex;
+    const char* Name;
+    const char* Value;
+    //
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    XML_ASSERT(Cursor->CurrentExon);
+
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!CompareStrings(Name, "Length"))
+        {
+            Cursor->CurrentExon->Length = atoi(Value);
+        }
+        else if (GlobalOptions->XMLStrictFlag)
+        {
+            REPORT_WARNING_ISS(28, XML_GetCurrentLineNumber(Cursor->Parser), Name, "ExonSequence");
+        }
+    }
+
+    XML_ASSERT(Cursor->CurrentExon->Length >= 0 && Cursor->CurrentExon->Length < 1024*1024);
+    if (Cursor->CurrentExon->Length)
+    {
+        Cursor->CurrentExon->Sequence = (char*)calloc(sizeof(char), Cursor->CurrentExon->Length + 1);
+    }
+}
+
+// expat callback: Handle a tag and its attributes.
+void MS2StartElement(void* UserData, const char* Tag, const char** Attributes)
+{
+    MS2ParseCursor* Cursor;
+    int ExpectedTag = 0;
+    //
+    Cursor = (MS2ParseCursor*)UserData;
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+
+    // Switch on our current state, and handle the tags we expect to see in our current state.
+    // Tags we don't expect are ignored (i.e. new tags can be added without breaking the parser)
+    switch (Cursor->State)
+    {
+    case evMS2DBNone:
+        if (!CompareStrings(Tag, "Database"))
+        {
+            ExpectedTag = 1;
+            Cursor->State = evMS2DBDatabase;
+            // ignore database attributes for now
+        }
+        break;
+    case evMS2DBDatabase:
+        if (!CompareStrings(Tag, "Gene"))
+        {
+            ExpectedTag = 1;
+            XML_ASSERT(!Cursor->CurrentGene);
+            Cursor->State = evMS2DBGene;
+            Cursor->CurrentGene = (GeneStruct*)calloc(1, sizeof(GeneStruct));
+            Cursor->CurrentExonIndex = 0;
+            Cursor->CurrentGene->ChromosomeNumber = -1;
+            Cursor->CurrentGene->ForwardFlag = 1; // default
+            ParseGeneAttributes(Cursor, Attributes);
+        }
+        if (!CompareStrings(Tag, "Locus"))
+        {
+            ExpectedTag = 1;
+            Cursor->State = evMS2DBGeneLocus;
+            ParseLocusAttributes(Cursor, Attributes);
+        }
+        break;
+    case evMS2DBGene:
+        XML_ASSERT(Cursor->CurrentGene);
+        if (!CompareStrings(Tag, "Exon"))
+        {
+            ExpectedTag = 1;
+            Cursor->State = evMS2DBExon;
+            ParseExonAttributes(Cursor, Attributes);
+        }
+        if (!CompareStrings(Tag, "CrossReference"))
+        {
+            // We don't do anything with the attributes, but cross-references
+            // are "expected", so we don't raise a warning.
+            ExpectedTag = 1;
+            Cursor->State = evMS2DBGeneCrossReference;
+        }
+        break;
+    case evMS2DBGeneCrossReference:
+        if (!CompareStrings(Tag, "CRExons"))
+        {
+            ExpectedTag = 1;
+        }
+        break;
+    case evMS2DBExon:
+        XML_ASSERT(Cursor->CurrentExon);
+        if (!CompareStrings(Tag, "ExonSequence"))
+        {
+            ExpectedTag = 1;
+            Cursor->State = evMS2DBExonSequence;
+            ParseExonSequenceAttributes(Cursor, Attributes);
+        }
+        if (!CompareStrings(Tag, "ExtendsExon"))
+        {
+            ExpectedTag = 1;
+            // Don't change states, ExtendsExon has no body
+            ParseLinkFromAttributes(Cursor, Attributes, 1);
+        }
+        if (!CompareStrings(Tag, "LinkFrom"))
+        {
+            ExpectedTag = 1;
+            // Don't change states, LinkFrom has no body
+            ParseLinkFromAttributes(Cursor, Attributes, 0);
+        }
+        break;
+    default:
+        break;
+    }
+    if (!ExpectedTag)
+    {
+        REPORT_ERROR_IS(27, XML_GetCurrentLineNumber(Cursor->Parser), Tag);
+    }
+}
+
+// Confirm that this gene is, indeed, searchable.
+int IntegrityCheckXMLGene(MS2ParseCursor* Cursor)
+{
+    int ExonIndex;
+    int EdgeIndex;
+    ExonEdge* Edge;
+    ExonEdge* PrevEdge;
+    GeneStruct* Gene;
+    ExonStruct* Exon;
+    //
+    Gene = Cursor->CurrentGene;
+    XML_ASSERT_RETVAL(Gene);
+    for (ExonIndex = 0; ExonIndex < Cursor->CurrentGene->ExonCount; ExonIndex++)
+    {
+        // Confirm that we did, in fact, observe this exon:
+        Exon = Gene->Exons + ExonIndex;
+        if (!Exon->Gene)
+        {
+            printf("* Error: Exon '%d' from Gene '%s' not present!\n", ExonIndex, Gene->Name);
+            return 0;
+        }
+    }
+    // All exons have been initialized.  Now let's fix up the backward edges from each exon.
+    // We MOVE the backward edges from the linked list Exon->BackEdgeHead->Next->...->Exon->BackEdgeTail
+    // into an array, Exon->BackwardEdges.
+    for (ExonIndex = 0; ExonIndex < Cursor->CurrentGene->ExonCount; ExonIndex++)
+    {
+        Exon = Gene->Exons + ExonIndex;
+        PrevEdge = NULL;
+        if (Exon->BackEdgeCount)
+        {
+            Exon->BackwardEdges = (ExonEdge*)calloc(Exon->BackEdgeCount, sizeof(ExonEdge));
+            for (EdgeIndex = 0, Edge = Exon->BackEdgeHead; Edge; EdgeIndex++, Edge = Edge->Next)
+            {
+                memcpy(Exon->BackwardEdges + EdgeIndex, Edge, sizeof(ExonEdge));
+                SafeFree(PrevEdge);
+                PrevEdge = Edge;
+            }
+            SafeFree(PrevEdge);
+            Exon->BackEdgeHead = NULL;
+            Exon->BackEdgeTail = NULL;
+        }
+        // Allocate forward-edge array:
+        Exon->ForwardEdges = (ExonEdge*)calloc(Exon->ForwardEdgeCount, sizeof(ExonEdge));
+    }
+    // Finally, we'll set the forward edges from each exon:
+    SetExonForwardEdges(Cursor->CurrentGene);
+    return 1;
+}
+
+void MS2EndElement(void* UserData, const char* Tag)
+{
+    MS2ParseCursor* Cursor;
+    int Result;
+    int Index;
+
+    //
+    Cursor = (MS2ParseCursor*)UserData;
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    //printf("End tag '%s', current state %d\n", Tag, Cursor->State);
+    switch (Cursor->State)
+    {
+    case evMS2DBDatabase:
+        if (!CompareStrings(Tag, "Database"))
+        {
+            Cursor->State = evMS2DBNone;
+        }
+        break;
+    case evMS2DBGene:
+        if (!CompareStrings(Tag, "Gene"))
+        {
+            Cursor->State = evMS2DBDatabase;
+            // We search a gene immediately after we finish parsing it.  (Note that by the 
+            // time control returns to SearchMS2DB, we may have shot through 10 genes!)
+            Result = IntegrityCheckXMLGene(Cursor);
+            if (Result)
+            {
+	      //printf("**Gene: %s\n",Cursor->CurrentGene->Name);
+	      //printf("Root: %p\n",Cursor->Info->Root);
+	      //for(Index = 0; Index < TRIE_CHILD_COUNT; ++Index)
+	      //	{
+	      //	  printf(" Child[%c] = %p\n",Index + 'A',Cursor->Info->Root->Children[Index+'A']);
+	      //	}
+	      //getchar();
+
+	      //fflush(stdout);
+
+
+                SearchSplicableGene(Cursor->Info, Cursor->CurrentGene);
+            }
+            FreeGene(Cursor->CurrentGene);
+            Cursor->CurrentGene = NULL;
+            Cursor->Info->RecordNumber++;
+        }
+        break;
+    case evMS2DBExon:
+        if (!CompareStrings(Tag, "Exon"))
+        {
+            Cursor->State = evMS2DBGene;
+            //Cursor->CurrentExonIndex++;
+        }
+        break;
+    case evMS2DBExonSequence:
+        if (!CompareStrings(Tag, "ExonSequence"))
+        {
+            Cursor->State = evMS2DBExon;
+        }
+        break;
+    case evMS2DBGeneLocus:
+        if (!CompareStrings(Tag, "Locus"))
+        {
+            Cursor->State = evMS2DBGene;
+        }
+        break;
+    case evMS2DBGeneCrossReference:
+        if (!CompareStrings(Tag, "CrossReference"))
+        {
+            Cursor->State = evMS2DBGene;
+        }
+        break;
+    default:
+        printf("* Error: End-tag '%s' not handled from state %d\n", Tag, Cursor->State);
+        Cursor->ErrorFlag = 1;
+        break;
+    }
+}
+
+void SearchMS2DB(SearchInfo* Info)
+{
+    FILE* DBFile;
+    XML_Parser Parser = NULL;
+    int ParseUserData = 0;
+    int XMLParseResult;
+    int BytesRead;
+    int DoneFlag = 0;
+    int FilePos = 0;
+    void* XMLBuffer;
+    MS2ParseCursor* Cursor;
+    int Error;
+    //
+    DBFile = Info->DB->DBFile;
+    if (!DBFile)
+    {
+        printf("** Error: Unable to open database file '%s'\n", Info->DB->FileName);
+        return;
+    }
+    fseek(DBFile, 0, 0);
+    AllocSpliceStructures();
+    Cursor = (MS2ParseCursor*)calloc(sizeof(MS2ParseCursor), 1);
+    Cursor->Info = Info;
+    Parser = XML_ParserCreate(NULL);
+    Cursor->Parser = Parser;
+    XML_SetUserData(Parser, Cursor);
+    XML_SetElementHandler(Parser, MS2StartElement, MS2EndElement);
+    XML_SetCharacterDataHandler(Parser, MS2CharacterDataHandler);
+
+    while (!DoneFlag)
+    {
+        // Get a buffer (parser handles the memory):
+        XMLBuffer = XML_GetBuffer(Parser, sizeof(char) * MS2DB_BUFFER_SIZE);
+        if (!XMLBuffer)
+        {
+            printf("* Error: Unable to get XML buffer of size %d\n", MS2DB_BUFFER_SIZE);
+            break;
+        }
+
+        // Read into the buffer:
+        BytesRead = ReadBinary(XMLBuffer, sizeof(char), MS2DB_BUFFER_SIZE, DBFile);
+        if (!BytesRead)
+        {
+            // We'll call XML_Parse once more, this time with DoneFlag set to 1. 
+            DoneFlag = 1;
+        }
+
+        // Parse this block o' text:
+        XMLParseResult = XML_Parse(Parser, XMLBuffer, BytesRead, DoneFlag);
+        if (!XMLParseResult)
+        {
+            printf("XML Parse error - file position ~%d\n", FilePos);
+            Error = XML_GetErrorCode(Parser);
+            printf("Error code %d description '%s'\n", Error, XML_ErrorString(Error));
+        }
+
+        // If Cursor->ErrorFlag is set, then the file isn't valid!  Error out
+        // now, since recovery could be difficult.
+        if (Cursor->ErrorFlag)
+        {
+            break;
+        }
+        FilePos += BytesRead;
+    }
+
+    XML_ParserFree(Parser);
+    SafeFree(Cursor);
+}
+
+
diff --git a/MS2DB.h b/MS2DB.h
new file mode 100644
index 0000000..10cacf9
--- /dev/null
+++ b/MS2DB.h
@@ -0,0 +1,45 @@
+//Title:          MS2DB.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#ifndef MS2DB_H
+#define MS2DB_H
+
+
+
+// Code to parse databases in MS2DB format.
+#include "Inspect.h"
+#include "Trie.h"
+
+void SearchMS2DB(SearchInfo* Info);
+
+#endif
+
+
diff --git a/MS2DBShuffler.jar b/MS2DBShuffler.jar
new file mode 100644
index 0000000..8483fff
Binary files /dev/null and b/MS2DBShuffler.jar differ
diff --git a/MSSpectrum.py b/MSSpectrum.py
new file mode 100644
index 0000000..61030b8
--- /dev/null
+++ b/MSSpectrum.py
@@ -0,0 +1,663 @@
+#Title:          MSSpectrum.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Classes representing an MS/MS spectrum and its peaks.
+"""
+import traceback
+import sys
+import os
+import types
+import string
+import re
+import struct
+import math
+import base64 # for mzxml parsing
+import ParseXML
+from Utils import *
+Initialize()
+
+# Some systems have old versions of base64, without the new decode interface.
+# As a workaround, set "B64Decode" to the decoding function.
+if hasattr(base64, "b64decode"):
+    B64Decode = base64.b64decode
+    B64Encode = base64.b64encode
+else:
+    B64Decode = base64.decodestring
+    B64Encode = base64.encodestring
+
+class PeakClass:
+    """
+    One peak from an ms/ms spectrum.  Mostly just a mass, but we track the intensity and
+    (perhaps putative) ion-type, as well.  Note that this may be a spectral peak (in
+    which case the mass is a spectral mass) or a PRM peak (in which case the mass is
+    a prefix residue mass).  Each spectral peak gives rise to n PRM peaks, where n
+    is the number of ion types available.
+    """
+    def __init__(self, Mass, Intensity):
+        self.Mass = Mass
+        self.Intensity = Intensity
+        self.IonType = None # Assigned only for PRM peaks
+        self.Score = 0 # Score, based on current filtering scheme.
+        self.PeptideIndex = None
+        self.FilterRank = None
+        self.IntensityRank = None 
+        self.IsPlausibleIsotopicPeak = 0
+        self.HasPlausibleIsotopicPeak = 0
+    def __cmp__(self, Other):
+        "Sort two peak objects - compare the masses"
+        if not isinstance(Other, PeakClass):
+            return 1
+        if (self.Mass < Other.Mass):
+            return -1
+        if (self.Mass > Other.Mass):
+            return 1
+        return 0
+    def __str__(self):
+        return "<peak %s>"%self.Mass
+    def GetPeakMass(self, ParentMass):
+        "Get the corresponding spectral mass from this PRM peak"
+        return self.IonType.GetPeakMass(self.Mass, ParentMass)
+    def GetPRMMass(self, ParentMass):
+        "Get the corresponding PRM mass from this spectral peak"
+        return self.IonType.GetPRMMass(self.Mass, ParentMass)
+    def PrintMe(self):
+        print "Printing information for a PeakClass object"
+        print "Mass %f Intensity %f "%(self.Mass,self.Intensity)
+        print "IonType %s PeptideIndex %s IntensityRank %d"%(self.IonType, self.PeptideIndex, self.IntensityRank)
+
+class SpectrumClass:
+    """
+    Mass-spec data, and some functions to filter peaks and such.  
+    """
+    InstanceCount = 0
+    def __del__(self):
+        SpectrumClass.InstanceCount -= 1
+    def __init__(self, Scoring = None):
+        SpectrumClass.InstanceCount += 1
+        # Init our attributes:
+        self.Name = None
+        self.ParentMass = None
+        self.PrecursorMZ = None
+        self.PrecursorIntensity = None
+        self.Charge = 1 # default
+        self.Peaks = None # list of PeakClass instances
+        self.PRMPeaks = None # list of PeakClass instances
+        # The actual parent peptide (instance of PeptideClass), if known:
+        self.CorrectPeptide = None
+        self.Scoring = Scoring
+    def GetSignalToNoise(self):
+        "Return signal-to-noise ratio for this spectrum"
+        Intensities = []
+        for Peak in self.Peaks:
+            Intensities.append(Peak.Intensity)
+        Intensities.sort()
+        IntenseCount = min(len(Intensities), 5)
+        if not IntenseCount:
+            return 0
+        Signal = Intensities[-IntenseCount/2]
+        Noise = Intensities[len(Intensities)/2]
+        return Signal / float(Noise)
+    def GetTotalIntensity(self):
+        Intensity = 0
+        for Peak in self.Peaks:
+            Intensity += Peak.Intensity
+        return Intensity 
+    def SetCharge(self, NewCharge):
+        self.Charge = NewCharge
+        self.ParentMass = self.PrecursorMZ * NewCharge - (NewCharge - 1)*1.0078
+    def ReadPeaksMGF(self, File):
+        self.Peaks = []
+        for FileLine in File.xreadlines():
+            if FileLine[:8] == "END IONS":
+                break
+            if FileLine[:6] == "CHARGE":
+                # Note: "2+ and 3+" is NOT supported.  Use the MultiCharge option,
+                # or include two scans in the input file.
+                Charge = int(FileLine[7:9].replace("+",""))
+                continue
+            if FileLine[:7] == "PEPMASS":
+                #self.ParentMass = float(FileLine[8:])
+                self.PrecursorMZ = float(FileLine[8:].split()[0])
+                continue
+            Bits = FileLine.split()
+            try:
+                Mass = float(Bits[0])
+                Intensity = float(Bits[1])
+            except:
+                continue # some other header line we don't eat.
+            Peak = PeakClass(Mass, Intensity)
+            self.Peaks.append(Peak)
+        if self.Charge == 0:
+            # Guess!
+            self.Charge = 2
+        self.ParentMass = (self.PrecursorMZ * self.Charge) - (self.Charge-1)*1.0078
+        #print "PrecursorMZ %s charge %s"%(self.PrecursorMZ, self.Charge)
+    def ReadPeaksDTA(self, File):
+        "Read a spectrum from a file, assuming .dta or .pkl or .ms2 format."
+        HeaderLine = File.readline()
+        if not HeaderLine.strip():
+            HeaderLine = File.readline()
+            if not HeaderLine.strip():
+                HeaderLine = File.readline()
+                if not HeaderLine.strip():
+                    HeaderLine = File.readline()
+                    if not HeaderLine.strip():
+                        HeaderLine = File.readline()
+        #print "HeaderLine: '%s'"%HeaderLine.strip()
+        Bits = HeaderLine.strip().split()
+        if HeaderLine[:7]=="CHARGE=":
+            self.Charge = int(HeaderLine[7])
+            HeaderLine = File.readline()
+            self.PrecursorMZ = float(HeaderLine[8:])
+            self.ParentMass = (self.PrecursorMZ * self.Charge) - (self.Charge-1)*1.0078
+        elif Bits[0] == "Z": # MS2 format:
+            self.Charge = int(Bits[1])
+            self.ParentMass = float(Bits[2])
+        elif Bits[0] == "S": #MS2 format:
+            HeaderLine = File.readline()
+            Bits = HeaderLine.strip().split()
+            if Bits[0] == "Z":
+              self.Charge = int(Bits[1])
+              self.ParentMass = float(Bits[2])  
+            else:
+                print "ERROR: Expecting a line starting with Z but instead found %s"%HeaderLine
+                sys.exit(0)
+        elif HeaderLine[0] == ":": # MS2 colon format:
+            HeaderLine = File.readline()
+            Bits = HeaderLine.strip().split()
+            self.ParentMass = float(Bits[0])
+            self.Charge = int(Bits[1]) # always an integer!
+        elif len(Bits) == 3: #PKL format:
+            self.PrecursorMZ = float(Bits[0])
+            self.Charge = int(Bits[2])
+            self.ParentMass = (self.PrecursorMZ * self.Charge) - (self.Charge-1)*1.0078
+        else:
+            self.ParentMass = float(Bits[0])
+            self.Charge = int(Bits[1]) # always an integer!
+        if self.Charge == 0:
+            # Guess!
+            self.Charge = 2
+            self.PrecursorMZ = self.ParentMass
+            self.ParentMass = (self.PrecursorMZ * self.Charge) - (self.Charge-1)*1.0078
+            #print "Prescursor MZ is %.2f, so guess a parent mass of %.2f"%(self.PrecursorMZ, self.ParentMass)
+        else:
+            self.PrecursorMZ = (self.ParentMass + (self.Charge-1)*1.0078) / self.Charge
+        self.Peaks = []
+        for FileLine in File.xreadlines():
+            Bits = FileLine.split()
+            # Skip comments:
+            if FileLine[0] == "#":
+                continue
+            Bits = FileLine.split()
+            if not Bits:
+                break
+            if Bits[0] == "Z":
+                continue # special for ms2: ignore.
+            if len(Bits) > 2:
+                break # no more!
+            try:
+                Mass = float(Bits[0])
+                Intensity = float(Bits[1])
+            except:
+                # It's over, over, over.
+                break
+            Peak = PeakClass(Mass, Intensity)
+            self.Peaks.append(Peak)
+        #File.close()
+        self.Peaks.sort() # sort by mass
+
+    def ReadPeakDTALine(self, FileLine):
+        FileLine = FileLine.strip()
+        Bits = FileLine.split()
+        if len(Bits) < 2:
+            return # blank (or broken) line, skip
+        Peak = PeakClass(float(Bits[0]), float(Bits[1]))
+        # If this is a labeled .dta file, read the ion types and the peptide indices:
+        if len(Bits)>2:
+            Peak.IonType = Global.AllIonDict.get(Bits[2], None)
+        if len(Bits)>3:
+            try:
+                Peak.PeptideIndex = int(Bits[3])
+            except:
+                pass # silent failure (a novel)
+        self.Peaks.append(Peak)
+    def RankPeaksByIntensity(self):
+        "Set Peak.IntensityRank for each of our peaks."
+        PeaksSortedByIntensity = []
+        for Peak in self.Peaks:
+            PeaksSortedByIntensity.append((Peak.Intensity, Peak))
+        PeaksSortedByIntensity.sort()
+        PeaksSortedByIntensity.reverse()
+        for Index in range(len(PeaksSortedByIntensity)):
+            PeaksSortedByIntensity[Index][1].IntensityRank = Index
+    def ReadPeaksPKL(self, File):
+        "Read peaks from a file in .pkl format"
+        HeaderLine = File.readline()
+        Bits = HeaderLine.split()
+        if len(Bits)!=3:
+            # .pkl files should have precursor m/z, precursor peak intensity, and
+            # guessed charge.  If we don't have three pieces, then this isn't a
+            # valid .pkl file...
+            raise ValueError, "Invalid input file: Header line '%s' not a .pkl header."%HeaderLine
+        self.PrecursorMZ = float(Bits[0])
+        self.PrecursorIntensity = float(Bits[1])
+        self.Charge = int(Bits[2])
+        # We hope to be called with an actual charge.  If we didn't get one at all, then guess 2.
+        if not self.Charge:
+            self.Charge = 2
+        self.ParentMass = (self.PrecursorMZ * self.Charge) - (1.0078 * (self.Charge - 1))
+        ##print "Prec %.2f times charge %s gives pm %s"%(self.PrecursorMZ, self.Charge, self.ParentMass)
+        # All subsequent lines: Mass and intensity
+        self.Peaks = []
+        for FileLine in File.xreadlines():
+            Bits = FileLine.split()
+            if len(Bits) > 2:
+                break
+            self.ReadPeakDTALine(FileLine)
+        File.close()
+        self.Peaks.sort() # sort by mass
+    def ReadPeaksFromFile(self, File, FileName):
+        # Given a file 'blah123.dta', name the spectrum 'blah123'.
+        self.Name = os.path.split(FileName)[1]
+        (self.Name, FileExtension) = os.path.splitext(self.Name)
+        FileExtension = FileExtension.lower()
+        # Strip ".mzxml:444279" to ".mzxml":
+        if FileExtension.find(":")!=-1:
+            FileExtension = FileExtension[:FileExtension.find(":")]
+        # Use the appropriate parser
+        if FileExtension == ".pkl":
+            self.ReadPeaksPKL(File)
+        elif FileExtension == ".mzxml":
+            self.ReadPeaksMZXML(File)
+        elif FileExtension == ".mzdata":
+            self.ReadPeaksMZData(File)
+        elif FileExtension == ".mgf":
+            self.ReadPeaksMGF(File)
+        else:
+            # default case: DTA
+            self.ReadPeaksDTA(File)
+    def ReadPeaks(self, FileName, FilePos = None):
+        """
+        Instantiator - Read a spectrum from a file.  Sets ParentMass, Charge,
+        and Peaks list.  Doesn't filter, yet.
+        """
+        if FilePos == None:
+            try:
+                ColonBits = FileName.split(":")
+                FilePos = int(ColonBits[-1])
+                FileName = string.join(ColonBits[:-1], ":")
+            except:
+                FilePos = 0
+        try:
+            File = open(FileName, "rb")
+        except:
+            print "Error in ReadPeaks(): File '%s' couldn't be opened."%FileName
+            traceback.print_exc()
+            return
+        File.seek(FilePos)
+        self.ReadPeaksFromFile(File, FileName)
+        self.FilePath = FileName
+        self.FilePos = FilePos
+        File.close()
+    def ReadPeaksMZData(self, File):
+        """
+        Parse peaks from an .mzdata format file.  This format is slightly inferior
+        to .mzxml, and not as commonly used.
+        """
+        ParseXML.GetSpectrumPeaksMZData(self, File)
+        self.Charge = 2 # guess!
+        self.ParentMass = (self.PrecursorMZ * self.Charge) - (self.Charge - 1) * 1.0078
+        return 
+        
+    def ReadPeaksMZXML(self, File):
+        """
+        Parse peaks from an .mzXML format file.  Assumes we've already scanned to the
+        desired file offset.
+        """
+        ParseXML.GetSpectrumPeaksMZXML(self, File)
+        self.Charge = 2 # guess!
+        self.ParentMass = (self.PrecursorMZ * self.Charge) - (self.Charge - 1) * 1.0078
+        return 
+    def DebugPrint(self, ShowPeaks = 0):
+        """
+        Print information on our spectrum, for debugging.
+        """
+        print "Spectrum '%s' has parent mass %f,\n   charge %f, and %d peaks"%(self.Name, self.ParentMass,
+            self.Charge, len(self.Peaks))
+        if self.CorrectPeptide:
+            print " True parent peptide is: %s"%self.CorrectPeptide
+        if ShowPeaks:
+            for Peak in self.Peaks:
+                if Peak.IonType:
+                    print "  %f\t%f\t%s"%(Peak.Mass, Peak.Intensity, Peak.IonType.Name)
+                else:
+                    print "  %f\t%f"%(Peak.Mass, Peak.Intensity)
+    def GetBestPeak(self, Mass, MaxIntensity = None, Epsilon = 1.0):
+        "Used in labeling.  Find the best nearby peak whose intensity doesn't exceed our limit."
+        if MaxIntensity == 0:
+            return (None, None)
+        BestPeak = None
+        BestPeakError = None
+        ClosestError = None
+        for Peak in self.Peaks:
+            Error = Peak.Mass - Mass
+            if Error < -Epsilon:
+                continue
+            if Error > Epsilon:
+                break
+            if MaxIntensity and Peak.Intensity > MaxIntensity:
+                continue # forbid neutral losses which are taller than the original            
+            if (BestPeak == None or BestPeak.Intensity < Peak.Intensity):
+                BestPeak = Peak
+                BestPeakError = Error
+        return (BestPeak, BestPeakError)
+        
+    def GetPeak(self, Mass, Epsilon = 1.0):
+        """
+        Get the closest peak to the specified mass, with a maximum error of Epsilon.
+        """
+        ClosestPeak = None
+        ClosestError = None
+        for Peak in self.Peaks:
+            Error = abs(Peak.Mass - Mass)
+            if Error < Epsilon:
+                if (ClosestPeak == None or ClosestError > Error):
+                    ClosestPeak = Peak
+                    ClosestError = Error
+            if Peak.Mass > Mass:
+                break
+        return ClosestPeak
+    def GetPRMPeak(self, Mass, Epsilon = 1.0):
+        """
+        Get the closest peak to the specified mass, with a maximum error of Epsilon.
+        """
+        ClosestPeak = None
+        ClosestError = None
+        for Peak in self.PRMPeaks:
+            Error = abs(Peak.Mass - Mass)
+            if Error < Epsilon:
+                if (ClosestPeak == None or ClosestError > Error):
+                    ClosestPeak = Peak
+                    ClosestError = Error
+            if Peak.Mass > Mass:
+                break
+        return ClosestPeak
+    def GetPRMPeaks(self, Mass, Epsilon = 1.0):
+        """
+        Get all peaks within Epsilon of Mass
+        """
+        Peaks = []
+        for Peak in self.PRMPeaks:
+            Error = abs(Peak.Mass - Mass)
+            if Error < Epsilon:
+                Peaks.append(Peak)
+            if Peak.Mass > Mass:
+                break
+        return Peaks
+    def AssignIonTypesFromPeptide(self):
+        """
+        Assign ion types to our peaks, based on the CorrectPeptide.
+        """
+        # The true PRMPeaks are sums of peptide masses.  Iterate
+        # over the length of the peptide:
+        LeftMass = 0
+        for Index in range(0, len(self.CorrectPeptide.Aminos)):
+            LeftMass += Global.AminoMass[self.CorrectPeptide.Aminos[Index]]
+            LeftMass += Global.FixedMods.get(self.CorrectPeptide.Aminos[Index], 0)
+            # For this PRMPeak, look for all the possible spectral peaks
+            # corresponding to the various ion types:
+            for IonType in AllIons:
+                Mass = IonType.GetPeakMass(LeftMass, self.ParentMass)
+                Peak = self.GetPeak(Mass, 1.0)
+                if Peak:
+                    Peak.IonType = IonType
+                    Peak.Pep = self.CorrectPeptide[:Index+1]
+    def ApplyWindowFilter(self, RegionCutoffs, WindowSizes, MaxRankInclusive):
+        """
+        Apply this window-fiter to our peaks.  RegionCutoffs describe the edges
+        of "early", "medium" and "late" spectral portions; WindowSizes are the
+        sizes (in AMUs) of windows for these portions.  MaxRankInclusive is the
+        worst rank to keep.  
+        """
+        #print "Apply window:", WindowSizes, RegionCutoffs, MaxRankInclusive
+        GoodPeaks = []
+        # List of region-edges:
+        Borders = []
+        for Cutoff in RegionCutoffs:
+            Borders.append(self.ParentMass * Cutoff)
+        NextBorderIndex = 0
+        LastBorderIndex = len(RegionCutoffs)
+        WindowIndex = 0
+        BadPeakIntensityList = []
+        for Peak in self.Peaks:
+            while (NextBorderIndex < LastBorderIndex and Peak.Mass > Borders[NextBorderIndex]):
+                NextBorderIndex += 1
+                WindowIndex += 1
+            WindowSize = WindowSizes[WindowIndex]
+            MinMass = Peak.Mass - WindowSize/2
+            MaxMass = Peak.Mass + WindowSize/2
+            List = []
+            for OtherPeak in self.Peaks:
+                if OtherPeak.Mass > MaxMass:
+                    break
+                if OtherPeak.Mass > MinMass:
+                    List.append((OtherPeak.Intensity, OtherPeak))
+            List.sort()
+            List.reverse() # best to worst
+            if (len(List) < MaxRankInclusive+1) or (Peak.Intensity >= List[MaxRankInclusive][0]):
+                GoodPeaks.append(Peak)
+            else:
+                BadPeakIntensityList.append(Peak.Intensity)
+        #print "Kept %d of %d original peaks."%(len(GoodPeaks), len(self.Peaks))
+        self.Peaks = GoodPeaks
+        if len(BadPeakIntensityList):
+            BadPeakIntensityList.sort() 
+            return BadPeakIntensityList[len(BadPeakIntensityList)/2]
+        else:
+            return -1
+    def FilterPeaks(self, WindowSize = 50, PeakCount = 6):
+        self.ApplyWindowFilter([], (WindowSize,), PeakCount - 1)       
+    def WritePeaks(self, FilePath):
+        "Write out a .dta file."
+        File = open(FilePath, "w")
+        File.write("%f\t%d\n"%(self.ParentMass, self.Charge))
+        for Peak in self.Peaks:
+            File.write("%f\t%f\n"%(Peak.Mass, Peak.Intensity))
+        File.close()
+    def WritePKLPeaks(self,FilePath):
+        """"
+        Append to the end of a .pkl file.  Note this APPENDS a file.
+        if no precursor intensity is known, then we say zero.  I hope that does not break things.
+        """
+        FileHandle = open(FilePath, "a")
+        if self.PrecursorIntensity:
+            FileHandle.write("%s %s %s\n"%(self.PrecursorMZ,self.PrecursorIntensity,self.Charge))
+        else:
+            FileHandle.write("%s 0.0 %s\n"%(self.PrecursorMZ,self.Charge))
+        for Peak in self.Peaks:
+            FileHandle.write("%f\t%f\n"%(Peak.Mass, Peak.Intensity))
+        FileHandle.write("\n") #need a blank line to separate different scans
+        FileHandle.close
+    def WriteMGFPeaks(self, TheFile, Title = "Spectrum", ScanNumber = None):
+        """
+        Append to the end of an mgf file.  Pass in an open file, or
+        (as a string) the path of a file to be APPENDED to.
+        """
+        if type(TheFile) == type(""):
+            File = open(FilePath, "a")
+        else:
+            File = TheFile
+        File.write("BEGIN IONS\n")
+        File.write("TITLE=%s\n"%Title)
+        if ScanNumber != None:
+            File.write("SCAN=%s\n"%ScanNumber)
+        File.write("CHARGE=%d\n"%self.Charge)
+        File.write("PEPMASS=%f\n"%self.PrecursorMZ)
+        for Peak in self.Peaks:
+            File.write("%f\t%f\n"%(Peak.Mass, Peak.Intensity))
+        File.write("END IONS\n")
+        if type(TheFile) == type(""):
+            File.close()
+    def WriteMZXMLPeaks(self, File, ScanNumber):
+        PeakCount = len(self.Peaks)
+        Str = """<scan num="%s" msLevel="2" peaksCount="%s" polarity="+" scanType="Full" lowMz="125" highMz="2000" """%(ScanNumber, PeakCount)
+        Str += """\n<precursorMz """
+        if self.PrecursorIntensity:
+            Str += """ precursorIntensity = "%.2f" """%self.PrecursorIntensity
+        Str += ">%.5f</precursorMz>\n"%self.PrecursorMZ
+        Str += """\n<peaks precision="32" byteOrder="network" pairOrder="m/z-int">"""
+        PeakString = ""
+        for Peak in self.Peaks:
+            PeakString += struct.pack(">ff", Peak.Mass, Peak.Intensity)
+        PeakString = B64Encode(PeakString)
+        Str += PeakString
+        Str += "</peaks>\n</scan>\n"
+        File.write(Str + "\n")
+        
+    def GetTopUnexplainedPeak(self):
+        TopUXRank = len(self.Peaks)
+        for Peak in self.Peaks:
+            if Peak.IonType == None and Peak.IntensityRank < TopUXRank:
+                TopUXRank = Peak.IntensityRank
+        return TopUXRank
+    def FindIsotopicPeaks(self):
+        for PeakIndex in range(len(self.Peaks)):
+            Peak = self.Peaks[PeakIndex]
+            RoundMass = int(round(Peak.Mass))
+            ExpectedFraction = Global.IsotopeWeights.get(RoundMass, None)
+            if ExpectedFraction==None:
+                continue
+            for IsotopePeakIndex in range(PeakIndex+1, len(self.Peaks)):
+                OtherPeak = self.Peaks[IsotopePeakIndex]
+                if OtherPeak.Mass < Peak.Mass + 0.8:
+                    continue
+                if OtherPeak.Mass > Peak.Mass + 1.2:
+                    break
+                Fraction = OtherPeak.Intensity / Peak.Intensity
+                # magic numbers ahoy:
+                if abs(Fraction - ExpectedFraction) < 0.5 or (abs(Fraction - ExpectedFraction) < 0.8 and OtherPeak.Mass > Peak.Mass + 0.9 and OtherPeak.Mass < Peak.Mass + 1.1): 
+                    OtherPeak.IsPlausibleIsotopicPeak = 1
+                    Peak.HasPlausibleIsotopicPeak = 1
+    def GetExplainedIntensity(self):
+        """
+        Callable *after* the spectrum has been labeled.  Returns the percentage
+        of total spectral intensity that has been explained by labels.  All
+        things being equal, a candidate peptide with a higher explained
+        intensity is BETTER.
+        """
+        TotalIntensity = 0
+        ExplainedIntensity = 0
+        for Peak in self.Peaks:
+            TotalIntensity += Peak.Intensity
+            if Peak.IonType != None:
+                ExplainedIntensity += Peak.Intensity
+            #print "%s\t%s\t%s\t%s\t%s\t"%(Peak.Mass, Peak.Intensity, Peak.IonType, Peak.PeptideIndex, Peak.RescueFlag)
+        return ExplainedIntensity / float(max(1, TotalIntensity))
+    def GetExplainedIons(self, Peptide, DynamicRangeMin = 150, DynamicRangeMax = 2000):
+        "Return the percentage of b and y peaks present."
+        Annotated = {}
+        
+        PhosphorylationFlag = 0
+        PhosB = [0]*40
+        PhosY = [0]*40
+        for (Pos, ModList) in Peptide.Modifications.items():
+            for Mod in ModList:
+                if Mod.Name == "Phosphorylation":
+                    PhosphorylationFlag = 1
+        for Peak in self.Peaks:
+            if Peak.IonType:
+                Annotated[(Peak.IonType.Name, Peak.PeptideIndex)] = 1
+        Count = 0
+        Present = 0
+        TotalCutPresent = 0
+        PM = 19 + Peptide.Masses[-1]
+        for Index in range(len(Peptide.Masses)):
+            CutPresent = 0
+            BMass = Peptide.Masses[Index] + 1.0078
+            if BMass > DynamicRangeMin and BMass < DynamicRangeMax:
+                Count += 1
+                BPresent = Annotated.get(("b", Index),0)
+                BPresent |= Annotated.get(("b-p", Index),0)
+                Present += BPresent
+                CutPresent |= BPresent
+            YMass = PM - Peptide.Masses[Index]
+            if YMass > DynamicRangeMin and YMass < DynamicRangeMax:
+                Count += 1
+                YPresent = Annotated.get(("y", len(Peptide.Aminos) - Index), 0)
+                YPresent |= Annotated.get(("y-p", Index),0)
+                Present += YPresent
+                CutPresent |= YPresent
+            # Count the CUT POINTS that are witnessed:
+            if (Index and Index<len(Peptide.Masses)-1) and CutPresent:
+                TotalCutPresent += 1
+        CutCount = len(Peptide.Masses) - 1
+        #print Peptide.Aminos, "%s cut points of %s"%(TotalCutPresent, CutCount)
+        return (Present, Count, Present / max(1, float(Count)),
+                TotalCutPresent, CutCount, TotalCutPresent/max(1, float(CutCount)))
+            
+        
+    def GetExplainedPeaks(self, MaxRank = 24):
+        """
+        Returns the percentage of the top n peaks that have been explained
+        by peak labeling, where n = MaxRank.  The output of GetExplainedPeaks()
+        should be high for a good candidate peptide.
+        """
+        TotalGoodPeaks = 0
+        ExplainedGoodPeaks = 0
+        for Peak in self.Peaks:
+            if Peak.IntensityRank <= MaxRank:
+                TotalGoodPeaks += 1
+                #print Peak.IntensityRank, Peak.Mass, Peak.IonType
+                if Peak.IonType != None:
+                    ExplainedGoodPeaks += 1
+        return ExplainedGoodPeaks / float(max(1, TotalGoodPeaks))
+    def GetLogMeanStdev(self):
+        """computes the mean and standard deviation of the peak intensities
+        This can be done at any time before or after filtering.  just
+        make sure that you know what it means.
+        This computes things based on the LOG intensity values
+        """
+        IntensitySum = 0.0
+        NumPeaks = len(self.Peaks)
+        for Peak in self.Peaks:
+            IntensitySum += math.log(Peak.Intensity)
+        Mean = IntensitySum / NumPeaks
+        VarSum = 0.0
+        for Peak in self.Peaks:
+            Diff = math.log(Peak.Intensity) - Mean
+            VarSum += Diff*Diff
+        Variance = VarSum / NumPeaks
+        Stdev = math.sqrt(Variance)
+        return (Mean,Stdev)
+    
diff --git a/MakeImage.py b/MakeImage.py
new file mode 100644
index 0000000..800d4c4
--- /dev/null
+++ b/MakeImage.py
@@ -0,0 +1,623 @@
+#Title:          MakeImage.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+MakeImage.py takes a spectrum labeled with ion types (built by Label.py) and produces a graph.
+"""
+from Utils import *
+import traceback
+import MSSpectrum
+import math
+import traceback
+
+try:
+    from PIL import Image
+    from PIL import ImageDraw
+    from PIL import ImageFont
+    # Fonts don't seem to work on Linux.  (Tried pdf, pcf, and pil formats...but no luck)
+    # So, we'll content ourselves with a default font if we must:
+    try:
+        TheFont = ImageFont.truetype("Times.ttf", 12)
+    except:
+        TheFont = ImageFont.load_default()
+except:
+    traceback.print_exc()
+    print "WARNING: Python Imaging Library (PIL) not installed.\n  Image creation is NOT available."
+    Image = None
+
+
+class WebColors:
+    "Color scheme for web display.  Colorful."
+    White = (255,255,255)
+    Green = (0,255,0)
+    Blue = (0,0,255)
+    PaleBlue = (10,10,80)
+    Red = (255,0,0)
+    Grey = (155,155,155)
+    #Grey = (0,0,0)
+    Background = (255, 255, 255)
+    Peak = (199, 199, 199)
+    #Peak = (0,0,0)
+    AnnotatedPeak = (0, 0, 0)
+    Axis = (155, 155, 155)
+    LabeledPeak = (0, 0, 0)
+    PeakLabel = (200, 0, 0)
+    BSeries = (55,55,200)
+    BSeriesPale = (155,155,255)
+    YSeries = (155,155,55)
+    YSeriesPale = (200,200,100)
+
+class PrintingColors:
+    "Color scheme for printing.  Black-and-white, dark shades."
+    White = (255,255,255)
+    Green = (0,0,0)
+    Blue = (0,0,0)
+    PaleBlue = (80,80,80)
+    Red = (0,0,0)
+    Grey = (80,80,80)
+    Background = (255, 255, 255)
+    Peak = (200, 200, 200)
+    AnnotatedPeak = (80, 80, 80)
+    Axis = (80, 80, 80)
+    LabeledPeak = (10, 10, 10)
+    PeakLabel = (0, 0, 0)
+    BSeries = (0,0,0)
+    BSeriesPale = (180,180,180)
+    YSeries = (0,0,0)
+    YSeriesPale = (180,180,180)
+    
+#Colors = PrintingColors
+Colors = WebColors
+
+def SetColors(PrintFlag):
+    global Colors
+    if PrintFlag:
+        Colors = PrintingColors
+    else:
+        Colors = WebColors
+    print "COLORS SET!", Colors.YSeries
+    
+def RoundOff(Int):
+    "Round an integer to the nearest power of ten"
+    PowerOfTen = 10
+    while (1):
+        if PowerOfTen>Int:
+            return Int
+        Radix = Int % PowerOfTen
+        if Radix == 0:
+            PowerOfTen *= 10
+            continue
+        if Radix > PowerOfTen/2:
+            return (Int-Radix) + PowerOfTen
+        else:
+            return (Int-Radix)
+        
+    
+def GetTickWidth(MassWidth):
+    "Look for a nice round number that divides the width into about 20 pieces."
+    IdealSliceCount = 20
+    # If 6-15, round to 10; if 26-35, round to 20...  If 151...
+    #print "GetTickWidth:", MassWidth
+    Width = int(round(MassWidth / IdealSliceCount))
+    SliceCount = MassWidth / Width
+    while (1):
+        # Try rounding off another digit:
+        RoundedWidth = RoundOff(Width)
+        if RoundedWidth==Width:
+            break # we can't round off any more!
+        RoundedSliceCount = MassWidth / RoundedWidth
+        if (RoundedSliceCount < IdealSliceCount / 2) or (RoundedSliceCount > IdealSliceCount * 2):
+            break # Slices are too skinny or too fat
+        Width = RoundedWidth
+    return Width
+
+
+class BaseImageMaker:
+    """
+    Graph generating class.  Not very MS-specific.  Subclassed by the spectrum plotter.
+    """
+    LeftPad = 30
+    RightPad = 3
+    UpperPad = 50
+    LowerPad = 20
+    def __init__(self, Width = 600, Height = 400):
+        """
+        Width is the total image width, in pixels.  The plot body ranges from
+        self.LeftPad to self.Width.
+        Height is the total image height, in pixels.  The plot body ranges from
+        self.UpperPad to (self.Height-self.LowerPad)
+        """
+        self.MinX = 0
+        self.MaxX = 100
+        self.MinY = 0
+        self.MaxY = 1
+        self.Width = Width
+        self.Height = Height
+        self.BaseLine = self.Height - self.LowerPad
+        self.YBreak = None
+    def GetNiceTickMark(self, Width, GoodTickCount):
+        """
+        Given a width (axis size), compute a good interval for major tick marks.
+        We want around 10 ticks, but we round the tick size up or down in order
+        to get cleaner numbers.
+        """
+        if (Width <= 0):
+            return 0.1
+        Tick = Width / float(GoodTickCount)
+        MinimumPower = int(math.log10(Tick)) - 1
+        PossibleTicks = []
+        for Power in range(MinimumPower, MinimumPower+5):
+            PossibleTicks.append(10**Power)
+            PossibleTicks.append(10**Power * 2)
+            PossibleTicks.append(10**Power * 5)
+        for Index in range(len(PossibleTicks) - 1):
+            if PossibleTicks[Index+1] > Tick:
+                return PossibleTicks[Index+1]
+        return 0.1 # hacky fall-back case!
+    def GetValueName(self, Value):
+        """
+        Take a number - potentially a very large or small one - and format it so as
+        to use few characters, so as to make a usable axis label.
+        1.2002 -> 1.2
+        0.149 -> 0.15
+        10000 -> 1e5
+        -0.00005 -> -5e-5
+        """
+        if Value == 0:
+            return "0"
+        if Value < 0:
+            Sign = "-"
+            Value *= -1
+        else:
+            Sign = ""
+        if Value > 1000:
+            Exp = int(math.log10(Value))
+            Abscissa = 10**(math.log10(Value) - int(math.log10(Value)))
+            return "%s%.1fe%d"%(Sign,Abscissa, Exp)
+        if Value < 0.01:
+            Exp = math.floor(math.log10(Value))
+            Abscissa = 10**(math.log10(Value) - Exp)
+            return "%s%.1fe%d"%(Sign,Abscissa, Exp)
+        if abs(Value) < 1:
+            return "%s%.2f"%(Sign, Value)
+        else:
+            return "%s%.1f"%(Sign, Value)
+    def DrawYAxis(self):
+        if self.YBreak:
+            self.DrawYAxisHelper(self.MinLowY, self.MaxLowY, 8)
+            self.DrawYAxisHelper((self.MinHighY + self.MaxHighY) / 2.0, self.MaxHighY, 1)
+            self.Draw.line((self.LeftPad, self.UpperPad, self.LeftPad, self.YBreak - 2), Colors.Axis)
+            self.Draw.line((self.LeftPad - 4, self.YBreak + 4, self.LeftPad + 4, self.YBreak), Colors.Axis)
+            self.Draw.line((self.LeftPad - 4, self.YBreak, self.LeftPad + 4, self.YBreak - 4), Colors.Axis)
+            self.Draw.line((self.LeftPad, self.YBreak + 2, self.LeftPad, self.BaseLine), Colors.Axis)
+        else:
+            self.DrawYAxisHelper(self.MinY, self.MaxY, 10)
+            self.Draw.line((self.LeftPad, self.UpperPad, self.LeftPad, self.BaseLine), Colors.Axis)
+    def DrawYAxisHelper(self, MinVal, MaxVal, TickCount):
+        "Draw the y-axis (including tick-marks and labels)"
+        TickSize = self.GetNiceTickMark(MaxVal - MinVal, TickCount)
+        ##print "Y axis: %s ticks from %s to %s; tick size %s"%(TickCount, MinVal, MaxVal, TickCount)
+        MaxPowers = int(math.log10(MaxVal)) + 1
+        LastY = None
+        try:
+            IntensityLevel = MinVal
+            while IntensityLevel < MaxVal:
+                Y = self.GetY(IntensityLevel)
+                self.Draw.line((self.LeftPad-2,Y,self.LeftPad,Y),Colors.Axis)
+                Str = self.GetValueName(IntensityLevel)
+                self.Draw.text((0, Y-5), Str, Colors.Axis, font = TheFont)
+                LastY = Y
+                IntensityLevel += TickSize
+        except:
+            traceback.print_exc()
+            print 0, self.MinY, self.MaxY, TickSize
+            raise
+    def BreakY(self, SmallestOfBig, BiggestOfSmall):
+        self.MinLowY = self.MinY
+        self.MaxLowY = BiggestOfSmall * 1.05
+        self.MinHighY = max(self.MaxLowY * 1.001, SmallestOfBig * 0.95)
+        self.MaxHighY = self.MaxY
+        self.BrokenY = 1
+        self.YBreak = int((self.Height - self.UpperPad - self.LowerPad) * 0.2)
+        #print "Y break is:", self.YBreak
+    def GetY(self, YValue):
+        if self.YBreak:
+            if YValue > self.MaxLowY:
+                YPercent = (YValue - self.MinHighY) / max(1, self.MaxHighY - self.MinHighY)
+                return self.YBreak - int((self.YBreak - self.UpperPad)*YPercent)
+            else:
+                YPercent = YValue / max(1, self.MaxLowY - self.MinLowY)
+                return self.BaseLine - int((self.BaseLine - self.YBreak)*YPercent)
+        YPercent = YValue / float(max(1, self.MaxY - self.MinY))
+        return self.BaseLine - int((self.BaseLine - self.UpperPad) * YPercent)
+    def GetX(self, XValue):
+        XPercent = (XValue - self.MinX) / max(1, (self.MaxX - self.MinX))
+        TotalWidth = self.Width - (self.LeftPad + self.RightPad)
+        return self.LeftPad + int(XPercent * TotalWidth)
+    def DrawTickMarks(self):
+        TickPos = 0
+        while TickPos < self.MaxX-1.0:
+            if TickPos < self.MinX:
+                TickPos += self.TickWidth
+                continue
+            X = self.GetX(TickPos)
+            # Draw text, unless it would go over the edge:
+            TextWidth = len(str(TickPos))*6
+            TextX = X - TextWidth/2
+            if TextX < self.Width - TextWidth:
+                self.Draw.line((X, self.BaseLine, X, self.BaseLine+3), Colors.Axis)
+                self.Draw.text((TextX, self.BaseLine+2), str(TickPos), Colors.Axis, font = TheFont)
+            TickPos += self.TickWidth
+    
+class MSImageMaker(BaseImageMaker):
+    def __init__(self, *args, **kw):
+        # Some options:
+        self.YBreakThreshold = 0.3333
+        #self.Labels = {} Label -> (PeakX, PeakY, Label, PeakIntensity, IntensityRank)
+        self.IntensityRank = {} # PeakIndex -> Rank
+        BaseImageMaker.__init__(self, *args, **kw)
+    def ConvertPeakAnnotationToImage(self, PeakAnnotationList, OutputFileName, Peptide = None, Width = 600, Height = 400):
+        if not Image: # catch for no PIL
+            return None
+        self.Width = Width
+        self.Height = Height
+        self.BaseLine = self.Height - self.LowerPad
+        self.PeakAnnotationList = PeakAnnotationList
+        self.GetPeakDemographics(Peptide) #computes min, max, intensityrank
+        self.PlotImage = Image.new("RGB", (Width, Height), Colors.Background)  # mode, size, [startcolor]
+        self.Draw = ImageDraw.Draw(self.PlotImage)
+        self.RoofLine = Height * 0.5
+        MassWidth = self.Width - (self.LeftPad + self.RightPad)
+        # Draw baseline
+        self.Draw.line((self.LeftPad, self.BaseLine, Width - self.RightPad, self.BaseLine), Colors.Axis)
+        # Draw x axis tickmarks (and labels):
+        self.TickWidth = 200
+        self.DrawTickMarks()
+        # Draw y axis:
+        self.DrawYAxis()
+        # Draw peaks, with labels
+        self.DrawPeaks()
+        if Peptide:
+            self.DrawBSeries(Peptide)
+            self.DrawYSeries(Peptide)
+        self.DrawPeakLabels()
+        self.PlotImage.save(OutputFileName, "png")
+    def GetPeakDemographics(self, Peptide = None):
+        """
+        Because this version uses a list of peak annotations given by
+        PyInspect, and not peak objects, they don't come with an associated
+        rank.  here I do a quick bubble sort and rank stuff (Ali, I wrote this bubble sort)
+        """
+        Intensities = []
+        self.MinX = 1000
+        self.MaxX = 0
+        # Sort peak, from most to least intense:
+        PeaksByIntensity = []
+        for PeakIndex in range(len(self.PeakAnnotationList)):
+            Tuple = self.PeakAnnotationList[PeakIndex]
+            PeaksByIntensity.append((Tuple[1], Tuple[0], PeakIndex))
+            ## set min and max masses
+            if Tuple[0] > self.MaxX:
+                self.MaxX = Tuple[0]
+            if Tuple[0] < self.MinX:
+                self.MinX = Tuple[0]
+            self.MaxY = max(self.MaxY, Tuple[1])
+        PeaksByIntensity.sort()
+        PeaksByIntensity.reverse()
+        ###
+        self.IntensityRank = [None] * len(self.PeakAnnotationList)
+        for IntensityRank in range(len(PeaksByIntensity)):
+            self.IntensityRank[Tuple[-1]] = IntensityRank
+        ## possibly reset x min and max
+        if Peptide:
+            self.MaxX = max(self.MaxX, Peptide.Masses[-1] + 10)
+            self.MinX = 0
+        FullMass = self.MaxX - self.MinX
+        # Move the left and right edges a little bit, so that peaks don't
+        # hit the edges of the image.
+        self.MinX = max(0, self.MinX - FullMass * 0.05)
+        self.MaxX = self.MaxX + FullMass * 0.05
+        self.MaxY = max(10.0, self.MaxY)
+        ##############
+        ## do some Y breakage
+        Intensity1 = PeaksByIntensity[0][0]
+        if len(PeaksByIntensity) < 2:
+            return # why we would have a spectrum with only one peak, I will never know.
+        Intensity2 = PeaksByIntensity[1][0]
+        if (Intensity2 / Intensity1 < self.YBreakThreshold):
+            self.BreakY(Intensity1, Intensity2)
+            return
+        if len(PeaksByIntensity) < 3:
+            return
+        Intensity3 = PeaksByIntensity[2][0]
+        if Intensity3 / Intensity2 < self.YBreakThreshold:
+            self.BreakY(Intensity2, Intensity3)
+            return
+    def DrawPeaks(self):
+        self.Labels = {}
+        MaxIntensity = 0
+        # First, no-ion-type peaks:
+        for PeakTuple in self.PeakAnnotationList:
+            (Mass, Intensity, Label, AminoIndex) = PeakTuple
+            MaxIntensity = max(MaxIntensity, Intensity)
+            if Label: # don't draw in grey anything which has been labeled
+                continue
+            PeakX = self.GetX(Mass)
+            PeakY = self.GetY(Intensity)
+            self.Draw.line((PeakX, PeakY, PeakX, self.BaseLine), Colors.Peak)
+        MinLabelIntensity = MaxIntensity * 0.25 # any annotated peak above this threshold receives a text label, even if it's a neutral loss.
+        # Next, peaks with assigned ions:
+        for PeakIndex in range(len(self.PeakAnnotationList)):
+            (Mass, Intensity, Label, AminoIndex) = self.PeakAnnotationList[PeakIndex]
+            if not Label: #skip all the unlabeled peaks this round
+                continue
+            PeakX = self.GetX(Mass)
+            PeakY = self.GetY(Intensity)
+            self.Draw.line((PeakX, PeakY, PeakX, self.BaseLine), Colors.AnnotatedPeak)
+            TextLabelPeakNames = ("B", "Y", "GlcNAc", "Y2", "B2")
+            if Label in TextLabelPeakNames:  
+                PeptideIndex = AminoIndex
+                if Label in ("B", "Y"):
+                    Label = Label.lower()
+                if Label in ("B2", "Y2"):
+                    Label = "%s2"%Label[0].lower() 
+                if AminoIndex != None:
+                    Label = "%s %s"%(Label, AminoIndex)
+                OldLabelInfo = self.Labels.get(Label, None)
+                if OldLabelInfo!=None and OldLabelInfo[-2] > Intensity:
+                    continue
+                self.Labels[Label] = (PeakX, PeakY, Label, Intensity, self.IntensityRank[PeakIndex])
+            if Label[0] == "M":
+                ##special case for phorphorylated spectra. the label
+                ## Parent loss has been changed to M-p or M-p-h2o
+                OldLabelInfo = self.Labels.get(Label, None)
+                if OldLabelInfo != None and OldLabelInfo[-2] > Intensity:
+                    continue
+                self.Labels[Label] = (PeakX, PeakY, Label, Intensity, self.IntensityRank[PeakIndex])
+    def CollideRectangles(self, X1, Y1, X2, Y2, Rectangles):
+        for (CX1, CY1, CX2, CY2) in Rectangles:
+            if CX1 <= X1 <= CX2 and CY1 <= Y1 <= CY2:
+                return (CX1, CY1, CX2, CY2)
+            if CX1 <= X2 <= CX2 and CY1 <= Y1 <= CY2:
+                return (CX1, CY1, CX2, CY2)
+            if CX1 <= X1 <= CX2 and CY1 <= Y2 <= CY2:
+                return (CX1, CY1, CX2, CY2)
+            if CX1 <= X2 <= CX2 and CY1 <= Y2 <= CY2:
+                return (CX1, CY1, CX2, CY2)
+        return None
+    def DrawPeakLabels(self):
+        # Sort labels by priority.  b and y take precedence over all else;
+        # intense peaks take precedence over others.
+        SortedLabels = []
+        for (X, Y, Label, Intensity, Rank) in self.Labels.values():
+            if Rank < 10:
+                Priority = 0
+            else:
+                Priority = 1
+            SortedLabels.append([Priority, Y, X, Label, Intensity, None, None])
+        SortedLabels.sort()
+        SortedLabels = SortedLabels[:25]
+        DirtyRectangles = []
+        for List in SortedLabels:
+            (IsBY, Y, X, Label, Intensity, Dummy, Dummy) = List
+            (Width, Height) = self.Draw.textsize(Label, font = TheFont)
+            Height *= 2 # for superscript and subscript
+            X1 = X - Width/2
+            X2 = X + Width/2
+            Y1 = Y - Height
+            Y2 = Y
+            Tuple = self.CollideRectangles(X1, Y1, X2, Y2, DirtyRectangles)
+            if Tuple == None:
+                List[5] = (X1, Y1, X2, Y2)
+                DirtyRectangles.append((X1, Y1, X2, Y2))
+                continue
+            (CX1, CY1, CX2, CY2) = Tuple
+            # Try moving this label off to the side:
+            if (X1 + X2) / 2 < (CX1 + CX2) / 2:
+                Move = (X2 - CX1) + 1
+                X1 -= Move
+                X2 -= Move
+                Y1 -= 5
+                Y2 -= 5
+            else:
+                Move = (CX2 - X1) + 1
+                X1 += Move
+                X2 += Move
+                Y1 -= 5
+                Y2 -= 5
+            Tuple = self.CollideRectangles(X1, Y1, X2, Y2, DirtyRectangles)
+            if Tuple == None:
+                List[5] = (X1, Y1, X2, Y2)
+                DirtyRectangles.append((X1, Y1, X2, Y2))
+                List[6] = ((X1 + X2) / 2, Y2, X, Y)
+                continue
+        for Index in range(len(SortedLabels)-1, -1, -1):
+            List = SortedLabels[Index]
+            if List[5]:
+                (X1, Y1, X2, Y2) = List[5]
+                self.Draw.line((List[2], List[1], List[2], self.BaseLine), Colors.LabeledPeak) # color the peak
+        for Index in range(len(SortedLabels)-1, -1, -1):
+            List = SortedLabels[Index]
+            if List[5]:
+                (X1, Y1, X2, Y2) = List[5]
+                # Most peaks are drawn using superscripts and subscripts:
+                PeakName = List[3]
+                if PeakName == "GlcNAc":
+                    self.Draw.text((X1, Y1 + 5), PeakName, Colors.PeakLabel, font = TheFont)
+                else:
+                    self.Draw.text((X1, Y1 + 5), PeakName[0], Colors.PeakLabel, font = TheFont)
+                    NumIndex = len(PeakName) - 1
+                    while PeakName[NumIndex] in ("0123456789"):
+                        NumIndex -= 1
+                    if NumIndex > 0:
+                        SuperScript = PeakName[1:NumIndex+1].strip()
+                    else:
+                        SuperScript = ""
+                    if SuperScript:
+                        self.Draw.text((X1+7, Y1+10), SuperScript, Colors.PeakLabel, font = TheFont)
+                        self.Draw.text((X1+7, Y1), PeakName[NumIndex+1:], Colors.PeakLabel, font = TheFont)
+                    else:
+                        self.Draw.text((X1+7, Y1+5), PeakName[NumIndex+1:], Colors.PeakLabel, font = TheFont)
+            # Draw the dotted line from the label to its peak:                
+            if List[6]:
+                self.Draw.line(List[6], Colors.LabeledPeak)
+    def DrawDottedLine(self, X1, Y1, X2, Y2, Color):
+        Distance = math.sqrt((X2-X1)**2 + (Y2-Y1)**2)
+        if Distance == 0:
+            return
+        DX = (X2-X1)/Distance
+        DY = (Y2-Y1)/Distance
+        OldLineLength = 0
+        Dot = 1
+        while (1):
+            LineLength = min(Distance, OldLineLength + 5)
+            XA = int(X1 + DX*OldLineLength)
+            XB = int(X1 + DX*LineLength)
+            YA = int(Y1 + DY*OldLineLength)
+            YB = int(Y1 + DY*LineLength)
+            if Dot:
+                self.Draw.line((XA, YA, XB, YB), Color)
+            OldLineLength = LineLength
+            Dot = not Dot
+            if (LineLength == Distance):
+                break
+    def DrawBSeries(self, Peptide):
+        BHeight = 17
+        if getattr(Peptide, "Seed", None):
+            SeedIndex = Peptide.Aminos.rfind(Peptide.Seed)
+        else:
+            SeedIndex = -999
+        self.Draw.text((10, BHeight-7), "b", Colors.BSeries, font = TheFont)
+        # First, draw tickmarks for the b peaks
+        for MassIndex in range(len(Peptide.Masses)):
+            Mass = Peptide.Masses[MassIndex]
+            X = self.GetX(Mass)
+            Label = ("b %s"%MassIndex)
+            if self.Labels.has_key(Label):
+                PeakIntensity = self.Labels[Label][-2]
+                Y = self.GetY(PeakIntensity)
+                self.DrawDottedLine(X, BHeight-2, X, Y, Colors.BSeriesPale)
+            else:
+                self.Draw.line((X, BHeight-2, X, BHeight+2), Colors.BSeriesPale)
+        # Now draw horizontal lines, and amino labels:
+        for AminoIndex in range(len(Peptide.Aminos)):
+            LabelA = "b %s"%AminoIndex
+            LabelB = "b %s"%(AminoIndex+1)
+            XA = self.GetX(Peptide.Masses[AminoIndex])
+            XB = self.GetX(Peptide.Masses[AminoIndex+1])
+            HasLabelA = self.Labels.has_key(LabelA)
+            if AminoIndex == 0:
+                HasLabelA = 1
+            HasLabelB = self.Labels.has_key(LabelB)
+            if AminoIndex ==len(Peptide.Aminos)-1:
+                HasLabelB = 1
+            if HasLabelA and HasLabelB:
+                self.Draw.line((XA, BHeight, XB, BHeight), Colors.BSeries)
+            else:
+                self.DrawDottedLine(XA, BHeight, XB, BHeight, Colors.BSeriesPale)
+            if AminoIndex in (SeedIndex, SeedIndex+1, SeedIndex+2):
+                self.Draw.line((XA, BHeight-1, XB, BHeight-1), Colors.BSeries)
+                self.Draw.line((XA, BHeight, XB, BHeight), Colors.BSeries)
+                self.Draw.line((XA, BHeight+1, XB, BHeight+1), Colors.BSeries)
+                
+            X = (XA+XB)/2 - 3
+            Str = Peptide.Aminos[AminoIndex]
+            if Peptide.Modifications.get(AminoIndex):
+                self.Draw.text((X-4, BHeight-14), "%s*"%Str, Colors.BSeries, font = TheFont)
+            else:
+                self.Draw.text((X, BHeight-14), Str, Colors.BSeries, font = TheFont)
+    def DrawYSeries(self, Peptide): # Copied and modded from DrawBSeries
+        YHeight = 34
+        if getattr(Peptide, "Seed", None):
+            SeedIndex = Peptide.Aminos.rfind(Peptide.Seed)
+        else:
+            SeedIndex = -999
+        
+        self.Draw.text((10, YHeight-7), "y", Colors.YSeries, font = TheFont)
+        # First, draw tickmarks for the y peaks
+        PM = Peptide.Masses[-1] + 19 # parent mass
+        for MassIndex in range(len(Peptide.Masses)):
+            Mass = PM - Peptide.Masses[MassIndex]
+            X = self.GetX(Mass)
+            Label = "y %s"%(len(Peptide.Masses) - MassIndex - 1)
+            #Label = "y %s"%(MassIndex)
+            #print "Y series y %d, mass %f, label %s"%(MassIndex, Mass, Label)
+            if self.Labels.has_key(Label):
+                PeakIntensity = self.Labels[Label][-2]
+                Y = self.GetY(PeakIntensity)
+                self.DrawDottedLine(X, YHeight-2, X, Y, Colors.YSeriesPale)
+            else:
+                self.Draw.line((X, YHeight-2, X, YHeight+2), Colors.YSeriesPale)
+        # Now draw horizontal lines, and amino labels:
+        for AminoIndex in range(len(Peptide.Aminos)):
+            LabelA = "y %s"%(len(Peptide.Aminos) - AminoIndex)
+            LabelB = "y %s"%(len(Peptide.Aminos) - AminoIndex - 1)
+            XA = self.GetX(PM - Peptide.Masses[AminoIndex])
+            XB = self.GetX(PM - Peptide.Masses[AminoIndex+1])
+            HasLabelA = self.Labels.has_key(LabelA)
+            if AminoIndex == 0:
+                HasLabelA = 1
+            HasLabelB = self.Labels.has_key(LabelB)
+            if AminoIndex ==len(Peptide.Aminos)-1:
+                HasLabelB = 1
+            if HasLabelA and HasLabelB:
+                self.Draw.line((XA, YHeight, XB, YHeight), Colors.YSeries)
+            else:
+                self.DrawDottedLine(XA, YHeight, XB, YHeight, Colors.YSeriesPale)
+            if AminoIndex in (SeedIndex, SeedIndex+1, SeedIndex+2):
+                self.Draw.line((XA, YHeight-1, XB, YHeight-1), Colors.YSeries)
+                self.Draw.line((XA, YHeight, XB, YHeight), Colors.YSeries)
+                self.Draw.line((XA, YHeight+1, XB, YHeight+1), Colors.YSeries)
+                
+            X = (XA+XB)/2 - 3
+            Str = Peptide.Aminos[AminoIndex]
+            if Peptide.Modifications.get(AminoIndex):
+                self.Draw.text((X-4, YHeight-14), "%s*"%Str, Colors.YSeries, font = TheFont)
+            else:
+                self.Draw.text((X, YHeight-14), Str, Colors.YSeries, font = TheFont)
+
+UsageInfo = """
+Usage:
+   MakeImage.py <LabeledSpectrum> [<OutputFileName>]
+"""
+
+if __name__ == "__main__":
+    if len(sys.argv)<2:
+        print UsageInfo
+        sys.exit(1)
+    InputFileName = sys.argv[1]
+    if len(sys.argv)>2:
+        OutputFileName = sys.argv[2]
+    else:
+        OutputFileName = os.path.splitext(InputFileName)[0] + ".png"
+    Maker = MSImageMaker()
+    Maker.ConvertSpectrumFileToImage(InputFileName, OutputFileName)
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0eb8aba
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,36 @@
+SHELL = /bin/sh
+.SUFFIXES:
+.SUFFIXES: .c .o
+CC = cc
+CFLAGS = -g -DDEBUG -D_CONSOLE -O1 -funroll-loops -lz
+LDFLAGS = -lm -lexpat
+
+OBJS = base64.o BN.o BuildMS2DB.o ChargeState.o CMemLeak.o Errors.o ExonGraphAlign.o \
+	FreeMod.o IonScoring.o \
+	LDA.o main.o Mods.o MS2DB.o ParentMass.o ParseInput.o ParseXML.o PValue.o \
+	Run.o Score.o Scorpion.o SNP.o Spectrum.o Spliced.o TagFile.o\
+	SpliceDB.o SpliceScan.o SVM.o Tagger.o Trie.o Utils.o 
+
+HDRS = base64.h BN.h BuildMS2DB.h ChargeState.h CMemLeak.h Errors.h ExonGraphAlign.h FreeMod.h \
+	Inspect.h IonScoring.h LDA.h Mods.h MS2DB.h ParentMass.h ParseInput.h ParseXML.h PValue.h \
+	Run.h Score.h Scorpion.h TagFile.h \
+	SNP.h Spectrum.h Spliced.h SpliceDB.h SpliceScan.h SVM.h Tagger.h \
+	Trie.h Utils.h 
+
+EXE = inspect
+
+all: $(EXE)
+
+.c.o: $(HDRS)
+	$(CC) $(CFLAGS) -c $<
+
+$(EXE): $(OBJS)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $(EXE) $(OBJS)
+
+clean-objs:
+	@rm -f $(OBJS) $(MPI_OBJS)
+
+clean-exe:
+	@rm -f $(EXE) $(MPI_EXE) 
+
+clean: clean-objs clean-exe
diff --git a/Mods.c b/Mods.c
new file mode 100644
index 0000000..3bc0ef9
--- /dev/null
+++ b/Mods.c
@@ -0,0 +1,340 @@
+//Title:          Mods.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include "Inspect.h"
+#include "Trie.h"
+#include "Utils.h"
+#include "FreeMod.h"
+#include "Mods.h"
+#include "Errors.h"
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <stdlib.h>
+
+// AllKnownPTMods is initialized once and only once.  AllowedPTMods is a sub-array, 
+// set before doing a search or batch of searches.  AllPTModCount is the size
+// of array AllKnownPTMods, and AllowedPTModCount is the size of array AllowedPTMods.
+PTMod AllKnownPTMods[MAX_PT_MODTYPE];
+int AllPTModCount = 0;
+
+int g_PhosphorylationMod = -1;
+
+// PTMLimit[n] is a limit on how many modifications of type n can be placed
+// on a peptide.  For each n, PTMLimit[n] <= GlobalOptions->MaxPTMods.
+int g_PTMLimit[MAX_PT_MODTYPE];
+
+int PlainOldDecorationIndex = 0;
+
+Decoration* AllDecorations = NULL;
+int AllDecorationCount = 0;
+int AllDecorationAllocation = 0;
+
+int CompareDecorations(const Decoration* A, const Decoration* B)
+{
+    if (A->Mass < B->Mass)
+    {
+        return -1;
+    }
+    if (A->Mass > B->Mass)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+void ExpandDecorationList(int SourceDecorationIndex, int MinPTModIndex, int* PTMRemaining, int ModsLeft)
+{
+    int PTModIndex;
+    int Decor;
+    //
+    if (ModsLeft <= 0)
+    {
+        return;
+    }
+    for (PTModIndex = MinPTModIndex; PTModIndex < AllPTModCount; PTModIndex++)
+    {
+        if (PTMRemaining[PTModIndex] <= 0)
+        {
+            continue;
+        }
+        // If we have a lot of decorations, expand the memory available for them:
+        if (AllDecorationCount == AllDecorationAllocation-1)
+        {
+            AllDecorationAllocation *= 2;
+            AllDecorations = (Decoration*)realloc(AllDecorations, sizeof(Decoration) * AllDecorationAllocation);
+        }
+        Decor = AllDecorationCount;
+        AllDecorationCount++;
+        //printf("ExpandDecorationList memcpy\n");
+        //fflush(stdout);
+	memcpy(AllDecorations[Decor].Mods, AllDecorations[SourceDecorationIndex].Mods, sizeof(int) * MAX_PT_MODTYPE);
+        AllDecorations[Decor].Mods[PTModIndex]++;
+        AllDecorations[Decor].TotalMods = AllDecorations[SourceDecorationIndex].TotalMods + 1;
+        AllDecorations[Decor].Mass = AllDecorations[SourceDecorationIndex].Mass + MassDeltaByIndex[AMINO_ACIDS*MAX_PT_MODTYPE + PTModIndex]->RealDelta; 
+	//printf("ExpandDecorationList memcpy done\n");
+        //fflush(stdout);
+        //printf("Added decoration %d (%.2f) ", Decor, AllDecorations[Decor].Mass);
+        //for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+        //{
+        //    printf("%d ", AllDecorations[Decor].Mods[ModIndex]);
+        //}
+        //printf("\n");
+
+        PTMRemaining[PTModIndex] -= 1;
+        ExpandDecorationList(Decor, PTModIndex, PTMRemaining, ModsLeft - 1);
+        PTMRemaining[PTModIndex] += 1;
+	//printf("Considering PTModIndex %d/%d\n",PTModIndex,AllPTModCount);
+    }
+}
+
+
+//Populates the AllPTMList as if all 400 mutations were specified in the input file
+int PopulatePTMListWithMutations()
+{
+    int ModFlags;
+    
+    char* StrAminos = NULL;
+    char* StrType = NULL;
+    char* StrName = NULL;
+    float MassDelta;
+    int ScaledMassDelta;
+        
+    int FromIndex;
+    int ToIndex;
+    
+    int Bin;
+    int ModIndex;
+    char ModNameBuffer[64];
+
+
+    AllPTModCount = 0;
+    //
+    if (!MassDeltas)
+    {
+        LoadMassDeltas(NULL, 0);
+    }
+    if (AllPTModCount == MAX_PT_MODTYPE)
+    {
+        // Too many!
+      REPORT_ERROR_S(35, "??");
+        return 0;
+    }
+    
+
+    //printf("Starting to populatePTMList!!!\n");
+    StrAminos = (char*)calloc(2,sizeof(char));
+    StrName = (char*)calloc(5,sizeof(char));
+    for(FromIndex = 0; FromIndex < AMINO_ACIDS; ++FromIndex)
+    {
+      if(PeptideMass[FromIndex + (int)('A')] == 0)
+	continue;
+      sprintf(StrAminos,"%c",(char)(FromIndex + 'A'));
+      ModFlags = DELTA_FLAG_VALID;
+      for(ToIndex = 0; ToIndex < AMINO_ACIDS; ++ToIndex)
+	{
+	  if(PeptideMass[ToIndex + (int)('A')] == 0)
+	    continue;
+	  if(FromIndex == ToIndex)
+	    continue;
+	  
+	  ScaledMassDelta = PeptideMass[ToIndex + (int)('A')] - PeptideMass[FromIndex +(int)('A')];
+	  
+	  MassDelta = ((float)(ScaledMassDelta))/MASS_SCALE;
+	  sprintf(StrName,"%c->%c",(char)(FromIndex + 'a'),(char)(ToIndex + 'a'));
+
+	  //printf("Scaled mass of %c->%c = %d, %.3f, %s\n",(char)(FromIndex + 'A'),(char)(ToIndex+'A'),ScaledMassDelta,MassDelta,StrName);
+	  // Default modification type is OPTIONAL.
+	  if (!StrType)
+	    {
+	      StrType = "opt";
+	    }
+	  
+	  AllKnownPTMods[AllPTModCount].Flags = ModFlags;
+	  strncpy(AllKnownPTMods[AllPTModCount].Name, StrName, 5);
+	  AllKnownPTMods[AllPTModCount].Allowed[FromIndex] = 1;
+	  // Add to the first still-available slot:
+	  for (ModIndex = 0; ModIndex < GlobalOptions->DeltasPerAA; ModIndex++)
+	    {
+	      if (!MassDeltas[FromIndex][ModIndex].Flags)
+		{
+		  strncpy(MassDeltas[FromIndex][ModIndex].Name, StrName, 40);
+		  MassDeltas[FromIndex][ModIndex].RealDelta = ScaledMassDelta;
+		  ROUND_MASS_TO_DELTA_BIN(MassDelta, Bin);
+		  MassDeltas[FromIndex][ModIndex].Delta = Bin;
+		  MassDeltas[FromIndex][ModIndex].Index = AllPTModCount;
+		  MassDeltaByIndex[FromIndex * MAX_PT_MODTYPE + AllPTModCount] = &MassDeltas[FromIndex][ModIndex];
+		  MassDeltaByIndex[MDBI_ALL_MODS * MAX_PT_MODTYPE + AllPTModCount] = &MassDeltas[FromIndex][ModIndex];
+		  MassDeltas[FromIndex][ModIndex].Flags = ModFlags;
+		  break;
+		}
+	    }
+			  
+	  AllKnownPTMods[AllPTModCount].Mass = ScaledMassDelta;
+	  g_PTMLimit[AllPTModCount] = 1; // allow 1 per peptide by default
+	  
+	  
+	  AllPTModCount++;
+	  //printf("Total mods %d\n",AllPTModCount);
+	}
+    }
+    //printf("Populate: MaxPTMods: %d\n",GlobalOptions->MaxPTMods);
+    free(StrName);
+    free(StrAminos);
+    return 1;
+}
+
+// Entries of form IsSubDecoration[DecorIndex][OtherDecorIndex]
+int** IsSubDecoration = NULL;
+
+
+// After reading the definitions of all the post-translational modifications, we construct 
+// a list of decorations.
+// Special case:  If GlobalOptions->MandatoryModName is set, then we set MandatoryModIndex, and
+// we only allow decorations that *do* contain that mod.
+void BuildDecorations()
+{
+    int DecorIndex;
+    int OtherDecorIndex;
+    int ModIndex;
+    int ValidSubDecoration;
+    int PTMRemaining[MAX_PT_MODTYPE];
+    int TotalPTMsPermitted;
+    //
+
+    // Free the old IsSubDecoration array, if allocated:
+    if (IsSubDecoration)
+    {
+        for (DecorIndex = 0; DecorIndex < AllDecorationCount; DecorIndex++)
+        {
+            SafeFree(IsSubDecoration[DecorIndex]);
+        }
+        SafeFree(IsSubDecoration);
+        IsSubDecoration = NULL;
+    }
+    AllDecorationAllocation = 100;
+    SafeFree(AllDecorations); // Remove old ones!
+    AllDecorations = NULL;
+    AllDecorations = (Decoration*)calloc(AllDecorationAllocation, sizeof(Decoration));
+    // AllDecorations[0] is now prepared.  (Mass 0, no mods)
+    AllDecorationCount = 1;
+    //printf("MAX_PT_MODTYPE: %d\n",MAX_PT_MODTYPE);
+    //printf("Command: memcpy(%d,%d,%d)\n",PTMRemaining, g_PTMLimit, sizeof(int) * MAX_PT_MODTYPE);
+    //fflush(stdout);
+    memcpy(PTMRemaining, g_PTMLimit, sizeof(int) * MAX_PT_MODTYPE);
+    TotalPTMsPermitted = GlobalOptions->MaxPTMods;
+    //printf("DOne memcopy\n");
+    //fflush(stdout);
+    ExpandDecorationList(0, 0, PTMRemaining, TotalPTMsPermitted);
+    qsort(AllDecorations, AllDecorationCount, sizeof(Decoration), (QSortCompare)CompareDecorations);
+
+    //DEBUG-NEC
+    /*for (DecorIndex = 0; DecorIndex < AllDecorationCount; DecorIndex++)
+      {
+	printf("AllDecorations[%d]: Mass=%d,TotalMods=%d\n",DecorIndex,AllDecorations[DecorIndex].Mass,AllDecorations[DecorIndex].TotalMods);
+	for(ModIndex = 0; ModIndex < MAX_PT_MODTYPE; ++ModIndex)
+	  printf(" - MassDeltas with Index %d = %d\n",ModIndex,AllDecorations[DecorIndex].Mods[ModIndex]);
+      }
+    */
+    // Locate the index of the unmodified null-decoration.  (Usually it's #0, because
+    // it has mass 0, but it's possible for PTMs to have a *negative* mass)
+    for (DecorIndex = 0; DecorIndex < AllDecorationCount; DecorIndex++)
+    {
+        if (AllDecorations[DecorIndex].TotalMods == 0)
+        {
+            PlainOldDecorationIndex = DecorIndex;
+            break;
+        }
+    }
+    for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+    {
+        if (!CompareStrings(GlobalOptions->MandatoryModName, MassDeltaByIndex[AMINO_ACIDS*MAX_PT_MODTYPE + ModIndex]->Name))
+        {
+            GlobalOptions->MandatoryModIndex = ModIndex;
+        }
+    }
+
+    IsSubDecoration = (int**)calloc(AllDecorationCount, sizeof(int*));
+    for (DecorIndex = 0; DecorIndex < AllDecorationCount; DecorIndex++)
+    {
+        IsSubDecoration[DecorIndex] = (int*)calloc(AllDecorationCount, sizeof(int));
+        for (OtherDecorIndex = 0; OtherDecorIndex < AllDecorationCount; OtherDecorIndex++)
+        {
+            ValidSubDecoration = 1; // default
+            for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+            {
+                if (AllDecorations[OtherDecorIndex].Mods[ModIndex] < AllDecorations[DecorIndex].Mods[ModIndex])
+                {
+                    ValidSubDecoration = 0;
+                    break;
+                }
+            }
+            if (ValidSubDecoration)
+            {
+                IsSubDecoration[DecorIndex][OtherDecorIndex] = 1;
+            }
+        }
+    }
+}
+
+void FreeIsSubDecoration()
+{
+    int ModIndex;
+    for (ModIndex = 0; ModIndex < AllDecorationCount; ModIndex++)
+    {
+        SafeFree(IsSubDecoration[ModIndex]);
+        IsSubDecoration[ModIndex] = NULL;
+    }
+    SafeFree(IsSubDecoration);
+    IsSubDecoration = NULL;
+}
+
+// Returns a PTM with this name.  Returns NULL if no match found.
+// Case-insensitive (pHoSpHoRyLaTiOn is ok).
+MassDelta* FindPTModByName(char Amino, char* Name)
+{
+    int ModIndex;
+    int AminoIndex = Amino - 'A';
+    for (ModIndex = 0; ModIndex < GlobalOptions->DeltasPerAA; ModIndex++)
+    {
+        if (!MassDeltas[AminoIndex][ModIndex].Flags)
+        {
+            break;
+        }
+        if (!CompareStrings(MassDeltas[AminoIndex][ModIndex].Name, Name))
+        {
+            return &MassDeltas[AminoIndex][ModIndex];
+        }
+    }
+    return NULL;
+}
diff --git a/Mods.h b/Mods.h
new file mode 100644
index 0000000..e1d3917
--- /dev/null
+++ b/Mods.h
@@ -0,0 +1,110 @@
+//Title:          Mods.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#ifndef MODS_H
+#define MODS_H
+
+// Structs to support search with post-translational modifications.
+
+#include "Inspect.h"
+
+#define DELTA_FLAG_VALID 1
+#define DELTA_FLAG_PHOSPHORYLATION 2
+#define DELTA_FLAG_C_TERMINAL 4
+#define DELTA_FLAG_N_TERMINAL 8
+
+// A MassDeltaNode is part of a linked list, each node of which wraps
+// a MassDelta.  Given a modification that affects multiple amino acids (e.g.
+// oxidation of M or W), we build one MassDelta struct...but there's one
+// MassDeltaNode for MassDeltas[M] and one for MassDeltas[W].
+typedef struct MassDeltaNode
+{
+    struct MassDelta* Delta;
+    struct MassDeltaNode* Next;
+} MassDeltaNode;
+
+typedef struct MassDelta
+{ 
+    float Score; 
+    int Delta;  // in bin-units
+    int RealDelta; // in actual mass-units.  RealDelta = Delta * 10
+    char Name[21]; 
+    char Amino; // if this is a mutation to an amino acid
+    int Flags; // Used for noting which is phosphorylation.  If flags == 0, this record is an end-of-array marker!
+
+    // Index of type.  For instance, all phosphorylations have same index.  Offset into AllKnownPTMods.
+    int Index; 
+} MassDelta;
+
+extern MassDelta** MassDeltas;
+extern MassDelta** MassDeltaByIndex;
+// A decoration is a collection of post-translational modification.  This includes the
+// 'empty decoration', with no modifications, and mass 0.  When we examine the flanking
+// regions of a tag match to see whether the masses are valid, we consider each possible
+// decoration.  (For instance, if the prefix mass is too low by 80 but phosphorylation
+// is available - and there's a phosphorylatable residue in the prefix - then we have a match
+// via the decoration of mass 80)
+
+typedef struct PTMod
+{
+    char Name[40];
+    int Mass;
+    // How many of this modification can be attached to a base?  (Generally zero or one!)
+    int Allowed[TRIE_CHILD_COUNT]; 
+    int Flags;
+} PTMod;
+
+typedef struct Decoration
+{
+    int Mass;
+    int TotalMods;
+    int Mods[MAX_PT_MODTYPE]; // Decoration->Mods[n] is how many MassDeltas with Index of n are in this decoration.
+} Decoration;
+
+extern Decoration* AllDecorations;
+extern int AllDecorationCount;
+extern int PlainOldDecorationIndex;
+
+// AllKnownPTMods - initialized at startup
+extern PTMod AllKnownPTMods[MAX_PT_MODTYPE];
+extern int AllPTModCount;
+extern int g_PTMLimit[MAX_PT_MODTYPE];
+extern int g_PhosphorylationMod; // index of the phosphorylation PTM
+
+// Returns index of the PTM with this name.  Returns -1 if no match found.
+// Case-insensitive (pHoSpHoRyLaTiOn is ok).
+MassDelta* FindPTModByName(char Amino, char* Name);
+
+void BuildDecorations();
+void FreeIsSubDecoration();
+int PopulatePTMListWithMutations();
+extern int** IsSubDecoration;
+#endif // MODS_H
diff --git a/PLSUtils.py b/PLSUtils.py
new file mode 100644
index 0000000..df1f275
--- /dev/null
+++ b/PLSUtils.py
@@ -0,0 +1,265 @@
+#Title:          PLSUtils.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""PLSUtils.py
+
+These are a set of related functions which help calculate the Phosphate
+Localization Score or PLS.  The differences between this and the Ascore
+are described in Albuquerque MCP 2008.
+1. The winner and runner up are determined by the MQScore, not peptide score.
+2. We do not repeatedly filter the peaks to find the optimal
+AScore.  That just takes too long.  We use the default Inspect peak filtering which
+leaves ~12 peaks / 100 m/z.  This corresponds to a p of 0.12 for the
+binomial.
+3. We will not do any thing for peptides which contain
+more than 2 sites of phosphorylation.  I don't trust those annotations
+anyway.
+"""
+
+import math
+import copy
+from Utils import*
+Initialize()
+
+class PLSClass:
+    def __init__(self):
+        self.ChooseTable = {} # (N, k) -> value of NchooseK
+        self.Factorial = {}
+        self.Factorial[0] = 1
+
+    def ComputeBinomial(self, N, n, p=0.12):
+        """ Make sure that you have populated the Choose table and Factorial table
+            I have defaulted the p = 0.12 because it's inspect peak density.
+        """
+        #print "computing binomial with N %s and n %s"%(N,n)
+        Sum = 0
+        for k in range(n, N+1): #stupid range operator, it's gonna kill me
+            # n<=k<=N
+            Sum += self.ChooseTable[(N,k)] * pow(p,k) * pow((1-p),(N-k))
+        return Sum
+
+    def FillOutChooseTable(self, N):
+        """Simply dose all the possible N choose k for k < N
+        """
+        self.FillOutFactorialTable(N)
+        for k in range(N+1):
+            if self.ChooseTable.has_key((N,k)):
+                continue
+            NFac = self.Factorial[N]
+            kFac = self.Factorial[k]
+            NMinusKFac = self.Factorial[N-k]
+            Value = NFac / (kFac * NMinusKFac)
+            self.ChooseTable[(N, k)] = Value
+
+    def FillOutFactorialTable(self, X):
+        """Get all the factorials of numbers <= X """
+        for x in range(1, X+1):
+            if self.Factorial.has_key(x):
+                continue # already have this
+            ##here we calculate it for the value x
+            Prod = x
+            for num in range(1, x):
+                Prod *= num
+            self.Factorial[x] = Prod
+        #for Pair in self.Factorial.items():
+        #    print "Factorials ",Pair
+
+    def ComputePLS(self, N, nWinner, nRunnerUp):
+        """From the Ascore Paper paper:
+        AScore = ScoreWinner - ScoreRunnerUp
+        Score = -10 * Log(P)
+        P = Sum_{k=n}^N [N choose k] p^k * (1-p)^{N-k}
+        k = iterator
+        N = number of distinguishing ions
+        n = number of distinguishing ions found
+        p = some very poorly explained probability number.  It appears to be the probabilty of
+            a peak in 100 m/z.  Inspect filters stuff automatically and I don't want to mess
+            with it. so this number will be PeakDensity of 100 m/z units /100
+        """
+        self.FillOutChooseTable(N) # the N, or number of potential peaks
+        TopBinomial = self.ComputeBinomial(N, nWinner)
+        RunnerUpBinomial = self.ComputeBinomial(N, nRunnerUp)
+        TopScore = -10 * math.log(TopBinomial, 10)
+        RunnerUpScore = -10 * math.log(RunnerUpBinomial, 10)
+        PLS = TopScore - RunnerUpScore # The AScore
+        return PLS
+
+    def GetDistinguishingPeaks(self, Peptide1, Peptide2):
+        """ Given two peptides, find all the peaks that
+        distinguish between the two phos placements, and return
+        those in a list.  e.g. SphosEPTIDE vs. SEPTphosIDE
+        distinguishing B fragments = b1: Sphos or S
+                                    b2: SphosE or SE
+                                    b3: SphosEP or SEP
+                                    b4: SphosEPT and SEPTphos have the same mass = NOT DISTINGUISHING
+    
+        The general case is: if the b fragment has different number of phosphorylations
+        between the two annotations, then both b and y are distinguishing.
+        """
+        Differences = [] #list of indicies that are different [1] would be differences on b1, yn-1
+        ModIndex1 = Peptide1.Modifications.keys() # a list of indicies
+        #print "Peptide %s"%Peptide1.GetModdedName()
+        for Index in ModIndex1:
+            #print "This is my mod on %s, %s"%(Index, Peptide1.Modifications[Index])
+            PTMList = Peptide1.Modifications[Index]
+            FoundPhos =0
+            for Item in PTMList:
+                if Item.Name == "Phosphorylation":
+                    FoundPhos = 1
+            if not FoundPhos:
+                ModIndex1.remove(Index)
+        ModIndex2 = Peptide2.Modifications.keys() # a list of indicies
+        #print "Peptide %s"%Peptide2.GetModdedName()
+        for Index in ModIndex2:
+            #print "This is my mod on %s, %s"%(Index, Peptide2.Modifications[Index])
+            PTMList = Peptide2.Modifications[Index]
+            FoundPhos =0
+            for Item in PTMList:
+                if Item.Name == "Phosphorylation":
+                    FoundPhos = 1
+            if not FoundPhos:
+                ModIndex2.remove(Index)
+        Count1 =0
+        Count2 =0
+        for Index in range(len(Peptide1.Aminos)):
+            if Index in ModIndex1:
+                Count1 +=1
+            if Index in ModIndex2:
+                Count2 +=1
+            if not Count1 == Count2:
+                Differences.append(Index+1)
+        ## now we have a list of the b indicies.  Let's make the return list
+        DistinguishingPeaks = []
+        for B in Differences:
+            YIndex = len(Peptide1.Aminos) - B
+            #print "a distinguishing peak B%s, Y%s"%(B, YIndex)
+            DistinguishingPeaks.append("B%s"%B)
+            DistinguishingPeaks.append("Y%s"%YIndex)
+        return DistinguishingPeaks
+            
+
+    def GetAlternateAnnotations(self, Peptide):
+        """Given an annotation(SAMPAYphosNE), return all
+        alternate annotations.  This version should work correctly
+        in the presence of non-phosphorylation modifications,
+        e.g. SAM+16PAYphosNE
+        """
+        #Dummy = GetPeptideFromModdedName("SAM+16PAYphosNESphosT")
+        #Peptide = Dummy
+        NumPhos = Peptide.GetModdedName().count("phos")
+        AllAnnotations = []
+        if not NumPhos in [1,2]:
+            return AllAnnotations # empty list
+        ## now see if the number of phos == number of potential residues
+        ## this means that an AScore is impossible, only one possible placement
+        Count = Peptide.Aminos.count("S")
+        Count += Peptide.Aminos.count("T")
+        Count += Peptide.Aminos.count("Y")
+        if Count == NumPhos:
+            return AllAnnotations #key for "N/A"
+
+        (Dephos, PhosPTM) = self.RemovePhosFromPeptide(Peptide)
+        for Index in range(len(Dephos.Aminos)):
+            if Dephos.Aminos[Index] in ["S", "T", "Y"]:
+                #place a phosphorylation
+                CreateNewLevel1 =0
+                if not Dephos.Modifications.has_key(Index):
+                    CreateNewLevel1 = 1
+                    Dephos.Modifications[Index] = []
+                Dephos.Modifications[Index].append(PhosPTM)
+                #do I need to place a second phosphate?
+                if NumPhos == 2:
+                    for Jndex in range(Index + 1, len(Dephos.Aminos)):
+                        if Dephos.Aminos[Jndex] in ["S", "T", "Y"]:
+                            CreateNewLevel2 = 0
+                            if not Dephos.Modifications.has_key(Jndex):
+                                Dephos.Modifications[Jndex] = []
+                                CreateNewLevel2 = 1
+                            Dephos.Modifications[Jndex].append(PhosPTM)
+                            #add string to list, remove phos and move on.
+                            #print Dephos.GetModdedName()
+                            AllAnnotations.append(Dephos.GetModdedName())
+                            Dephos.Modifications[Jndex].pop()
+                            if CreateNewLevel2:
+                                del Dephos.Modifications[Jndex]
+                else:
+                    #only one phos.  Add string to list
+                    AllAnnotations.append(Dephos.GetModdedName())
+                    #print Dephos.GetModdedName()
+                #only add string to the list if it's a single phos peptide
+                #regardless, get rid of the modification now that we've done all it's combinations
+                Dephos.Modifications[Index].pop() # get rid of the most recently added Phos PTM
+                if CreateNewLevel1:
+                    del Dephos.Modifications[Index]
+        ## now one last thing remains.  We have to remove the original annotation from the list
+        try:
+            AllAnnotations.remove(Peptide.GetModdedName())
+        except:
+            print "The original peptide was not in the list.  that's BAAAAAAAADDDDDDDDDDDDDD"
+            return []
+        return AllAnnotations
+                    
+
+    def RemovePhosFromPeptide(self, Peptide):
+        """Given a Peptide object, remove the phosphorylation PTMod
+        objects and return the neutered copy
+        """
+        Clone = copy.deepcopy(Peptide)
+        PTModPhos = None
+        RemoveElement = [] #indicies in Clone.Modifications that become empty
+        for AminoIndex in Clone.Modifications:
+            ModificationList = Clone.Modifications[AminoIndex]
+            for PTMod in ModificationList:
+                if PTMod.Name == "Phosphorylation":
+                    if not PTModPhos:
+                        PTModPhos = PTMod
+                    ModificationList.remove(PTMod)
+            if len(ModificationList) == 0:
+                RemoveElement.append(AminoIndex)
+        ## now clone is without phosphorylations, but with other modifications
+        ## clean up the empty Modification keys
+        for Index in RemoveElement:
+            del Clone.Modifications[Index]
+        return (Clone, PTModPhos)
+
+    def GetSupportingPeaks(self, FullPeakList, DistinguishingPeakList):
+        SupportingPeaks = {} # key = b8 value = 1
+        for Tuple in FullPeakList:
+            Ion = Tuple[2]
+            Index = Tuple[3]
+            if Ion in ["B", "Y", "B2", "Y2"]:
+                Peak = "%s%s"%(Ion[0], Index)
+                if Peak in DistinguishingPeakList:
+                    #print "Supporting peak found %s"%Peak
+                    SupportingPeaks[Peak]= 1
+        return len(SupportingPeaks)
diff --git a/PMCLDA1.model b/PMCLDA1.model
new file mode 100644
index 0000000..4d68c06
Binary files /dev/null and b/PMCLDA1.model differ
diff --git a/PMCLDA2.model b/PMCLDA2.model
new file mode 100644
index 0000000..0dd9477
Binary files /dev/null and b/PMCLDA2.model differ
diff --git a/PMCLDA2Phos.model b/PMCLDA2Phos.model
new file mode 100644
index 0000000..5cc8d51
Binary files /dev/null and b/PMCLDA2Phos.model differ
diff --git a/PMCLDA3.model b/PMCLDA3.model
new file mode 100644
index 0000000..e8ab123
Binary files /dev/null and b/PMCLDA3.model differ
diff --git a/PMCLDA3Phos.model b/PMCLDA3Phos.model
new file mode 100644
index 0000000..e28937c
Binary files /dev/null and b/PMCLDA3Phos.model differ
diff --git a/PRM2.bn b/PRM2.bn
new file mode 100644
index 0000000..a063c46
Binary files /dev/null and b/PRM2.bn differ
diff --git a/PRM2.dat b/PRM2.dat
new file mode 100644
index 0000000..08dc4b6
Binary files /dev/null and b/PRM2.dat differ
diff --git a/PRM3.bn b/PRM3.bn
new file mode 100644
index 0000000..04b5337
Binary files /dev/null and b/PRM3.bn differ
diff --git a/PRM3.dat b/PRM3.dat
new file mode 100644
index 0000000..f12c457
Binary files /dev/null and b/PRM3.dat differ
diff --git a/PRMQ2.dat b/PRMQ2.dat
new file mode 100644
index 0000000..e68f8c9
Binary files /dev/null and b/PRMQ2.dat differ
diff --git a/PRMQ3.dat b/PRMQ3.dat
new file mode 100644
index 0000000..bfeb6aa
Binary files /dev/null and b/PRMQ3.dat differ
diff --git a/PTMAnalysis.py b/PTMAnalysis.py
new file mode 100644
index 0000000..12080f3
--- /dev/null
+++ b/PTMAnalysis.py
@@ -0,0 +1,523 @@
+#Title:          PTMAnalysis.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+PTMAnalysis.py
+This wrapper script automates the steps involved in processing raw Insepct
+results into finalized PTM site identifications. 
+1. FDRUtils.py - obtain a consistent pvalue across all runs
+2. ComputePTMFeatures.py - group individual annotations into peptides and
+    compute some features for each.
+3. BuildMGF.py - builds a single .mgf file out of all the consensus spectra
+    created in step 2 (in preparation for the Inspect run)
+4. Inspect run.  Search clustered spectra (unmodified) against a large database
+5. PTMSearchBigDB.py - Integrates the results of the inspect search against a
+    large database, findig a delta-score.
+6. TrainPTMFeatures.py - Computes the PValue of each site from a model
+7. AdjustPTM.py - attempts to merge and reconcile sites
+8. AdjustPTM.py (known chemistry) - attempts to find known explanations for
+    the site
+
+Depending on the size of your dataset, this program may take quite a while
+(up to 1 day per million spectra).  It is reasonably easy to parallelize
+the time-intensive steps (2) and (4) above; doing so is up to the user, since
+compute clusters are so heterogenous.
+
+To perform only a subset of the steps, you can designate start and stop steps.
+This is a useful feature if perhaps the program crashes at some point, or if
+you would like to run some steps on a grid, and some locally.
+To start from a step other than one, the program assumes that all the previous
+steps have been executed, and that their outputs are in the proper directories.
+See the functions below for the expected directory and file names.
+"""
+import os
+import getopt
+import sys
+import traceback
+from Utils import *
+
+UsageInfo = """
+PTMAnalysis.py - produces a set of PTM sites.
+
+Required options:
+ -r [FileName] - The name of the results file to parse.  If a directory is
+    specified, then all .txt files within the directory will be combined into
+    one report
+ -d [FileName] - The name of the database file (.trie format) searched.
+ -w [FileName] - The final output filename
+ -s [Dir] - Directory containing the MS/MS spectra
+ -B [FilePath] - Path to the large database (for unmodified "decoy" search).
+ -S [Value] - The fraction of sequences in the database that are shuffled.
+ -k [FileName]: Known chemistry filename.  If specified, consider altering
+   sites to match known chemical adducts; report the best site-score
+   attainable by using known chemical adducts.
+
+Additional options:
+ -p [Value] - The pvalue cutoff for spectra annotations.  Default to 0.1
+ -q [Value] - The pvalue cutoff for PTM site annotations. Default to 0.05
+ -t [Path] - The path where all intermediate results will be written.
+        Default to PTMTempFiles
+ -i [Instrument] - The type of instrument that the Spectra were created on.
+     Default is ESI-ION-TRAP.  Valid values are: QTOF, FT-Hybrid
+ -n [FileName] The parameters file to use in the BigSearch (spectra and DB will be replaced, blind will be removed)
+     
+Advanced options: to run only a subset of the steps
+ -x [StartStep] - Start with step X (assume that previous steps are done)
+ -y [StopStep] - Stop with step Y (inclusive)
+
+Protein selection can be performed, replacing the protein identification
+with a parsimonious set of protein IDs (using a simple iterative
+approach).  The following options are required for protein selection:
+ -a: Replace protein identifications with a "parsimonious" set of protein IDs.
+
+"""
+
+class Steps:
+    First = 1
+    PValue = 1
+    ComputePTMFeatures = 2
+    BuildMGF = 3
+    RunInspect = 4
+    SearchBigDB = 5
+    TrainPTMFeatures = 6
+    AdjustPeptides = 7
+    AdjustToKnownChemistry = 8
+    Last = 8
+
+class WrapperClass:
+    def __init__ (self):
+        self.SpectrumPValue = 0.1 # for the pvalue.py program
+        self.SitePValue = 0.05 # for the final output
+        self.BasePath = "PTMTempFiles"
+        self.PValuePath = None
+        self.PValueOutput = None
+        self.ComputePTMPath = None
+        self.ComputePTMOutput = None
+        self.TrainPTMPath = None
+        self.TrainPTMOutput = None
+        self.TrainPTMModelOutput = None
+        self.AdjustDir = None
+        self.AdjustOutputPath = None
+        self.AdjustModelOutput = None
+        self.SearchBigDBPath = None
+        self.SearchBigDBOutput = None
+        self.InputResults = None
+        self.DatabaseFile = None
+        self.PercentShuffled = None
+        self.SpectraDir = None
+        self.SelectProteins = 0 #default
+        self.FinalOutputFile = None
+        self.BigDB = None
+        self.Instrument = "ESI-ION-TRAP"
+        self.BuildMGFPath = None
+        self.InspectOutDir = None
+        self.MGFPath = None
+        self.SpawnFlag = 0
+        self.StartStep = Steps.First
+        self.StopStep = Steps.Last
+        self.KnownChemistryFileName = None
+        self.TrainPTMModelType = "svm" # default
+        self.ParamsFile = None
+    def SetupDirectories(self):
+        """
+        Below the basepath there will be a group of directories, one for
+        each major step
+        """
+        self.PValuePath = os.path.join(self.BasePath, "PValued")
+        self.ComputePTMPath = os.path.join(self.BasePath, "ComputePTMFeatures")
+        self.TrainPTMPath = os.path.join(self.BasePath, "TrainPTMFeatures")
+        self.AdjustDir = os.path.join(self.BasePath, "AdjustPTM")
+        self.SearchBigDBPath = os.path.join(self.BasePath, "SearchBigDB")
+        self.BuildMGFPath = os.path.join(self.BasePath, "BuildMGF")
+        self.InspectOutDir = os.path.join(self.BasePath, "InspectOut")
+        print "Making temporary directories in %s for all intermediate output"%self.BasePath
+        MakeDirectory(self.PValuePath)
+        MakeDirectory(self.ComputePTMPath)
+        MakeDirectory(self.TrainPTMPath)
+        MakeDirectory(self.AdjustDir)
+        MakeDirectory(self.SearchBigDBPath)
+        MakeDirectory(self.BuildMGFPath)
+        MakeDirectory(self.InspectOutDir)
+    def RunPValue(self):
+        """
+        FDRUtils.py
+            -r InputResults
+            -w OutputResults
+            -S Percent of database shuffled (optional)
+            -p pvalue cutoff
+            -s Distribution file
+            -i Distribution image
+            -H write out results from shuffled protein
+        """
+        self.PValueOutput = self.PValuePath # default, a directory for directory input
+        DistributionFile = os.path.join(self.PValuePath, "Distribution.txt")
+        if not os.path.isdir(self.InputResults): # the InputResults is a single file
+            FileName = os.path.split(self.InputResults)[1]
+            self.PValueOutput = os.path.join(self.PValueOutput, FileName)
+        PValueArgs = ""
+        if self.PercentShuffled:
+            PValueArgs = "-r %s -w %s -S %f -p %f -s %s -i -H" %(self.InputResults, self.PValueOutput, self.PercentShuffled, self.SpectrumPValue, DistributionFile)
+        else:
+            PValueArgs = "-r %s -w %s -p %f -s %s -i -H" %(self.InputResults, self.PValueOutput, self.SpectrumPValue, DistributionFile)
+        if self.SelectProteins:
+            PValueArgs += " -a -d %s"%self.DatabaseFile
+        PValueArgs += " -b "
+        if self.StartStep <= Steps.PValue and Steps.PValue <= self.StopStep:
+            print "Step %s: FDRUtils"%Steps.PValue
+            print "Arguments: %s"%PValueArgs
+            if self.SpawnFlag:
+                Command = "python FDRUtils.py %s"%PValueArgs
+                print Command
+                os.system(Command)
+            else:
+                import FDRUtils
+                ArgsList = PValueArgs.split()
+                Parser = FDRUtils.PValueParser()
+                Parser.ParseCommandLine(ArgsList)
+                FDRUtils.Main(Parser)
+                del FDRUtils
+        else:
+            print "Skipping Step %s: FDRUtils"%Steps.PValue
+    def RunComputePTMFeatures(self):
+        """
+        ComputePTMFeatures.py
+            -r InputResults
+            -w OutputDir
+            -d Database
+            -s spectra
+        """
+        self.ComputePTMOutput = os.path.join(self.ComputePTMPath, "PTMFeatures.txt")
+        Args = " -r %s -w %s -d %s -s %s"%(self.PValueOutput, self.ComputePTMPath, self.DatabaseFile, self.SpectraDir)
+        if self.StartStep <= Steps.ComputePTMFeatures and Steps.ComputePTMFeatures <= self.StopStep:
+            print "Step %s: ComputePTMFeatures"%Steps.ComputePTMFeatures
+            print "Arguments: %s"%Args
+            if self.SpawnFlag:
+                Command = "ComputePTMFeatures.py %s"%Args
+                print Command
+                os.system(Command)
+            else:
+                import ComputePTMFeatures
+                ArgsList = Args.split()
+                Computer = ComputePTMFeatures.PTMFeatureComputer()
+                Computer.ParseCommandLine(ArgsList)
+                Computer.ComputeFeaturesMain()
+                del ComputePTMFeatures
+        else:
+            print "Skipping Step %s: ComputePTMFeatures"%Steps.ComputePTMFeatures
+    def RunBuildMGF(self):
+        """
+        BuildMGF.py
+            -d PTM feature directory
+            -m MGF file to make
+        """
+        self.MGFPath = os.path.join(self.BuildMGFPath, "spectra.mgf")
+        Args = " -d %s -m %s"%(self.ComputePTMPath, self.MGFPath)
+        
+        if self.StartStep <= Steps.BuildMGF and Steps.BuildMGF <= self.StopStep:
+            print "Step %s: BuildMGF"%Steps.BuildMGF
+            print "Arguments:  %s"%Args
+            if self.SpawnFlag:
+                Command = "BuildMGF.py %s"%Args
+                print Command
+                os.system(Command)
+            else:
+                ArgsList = Args.split()
+                import BuildMGF
+                Builder = BuildMGF.MGFBuilder()
+                Builder.ParseCommandLine(ArgsList)
+                Builder.Main()
+                del BuildMGF
+        else:
+            print "Skipping Step %s: BuildMGF"%Steps.BuildMGF
+    def RunInspect(self):
+        """
+        Given that the mgf file was previously created, here we create an
+        input file for Inspect and run it.
+        """
+        InspectExe = None
+        if sys.platform == "win32":
+            InspectExe = "Inspect.exe"
+        else:
+            InspectExe = "./inspect"
+        InspectIn = "BigSearch.in"
+        self.InspectOutFile = os.path.join(self.InspectOutDir, "Results.txt")
+        Command = "%s -i %s -o %s"%(InspectExe, InspectIn, self.InspectOutFile)
+        
+        Dict = {}
+        if self.ParamsFile:
+            File = open(self.ParamsFile,'r')
+            
+            for Line in File:
+                Line = Line.strip()
+                if Line == "":
+                    continue
+                Bits = Line.split(",")
+                if Dict.has_key(Bits[0].lower()):
+                    Dict[Bits[0].lower()].append((",".join(Bits[1:])).lower())
+                else:
+                    Dict[Bits[0].lower()] = [(",".join(Bits[1:])).lower()]
+            File.close()
+            if Dict.has_key("blind"):
+                del Dict["blind"]
+            if Dict.has_key("mods"):
+                del Dict["mods"]
+            if Dict.has_key("sequencefile"):
+                del Dict["sequencefile"]
+            if Dict.has_key("unrestrictive"):
+                del Dict["unrestrictive"]
+            if Dict.has_key("maxptmsize"):
+                del Dict["maxptmsize"]
+            
+        else:
+            
+            Dict["protease"] = ["Trypsin"]
+            Dict["mod"] = ["57,C,fix"]
+            Dict["tagcount"] = ["25"]
+
+        if not Dict.has_key("instrument"):
+            Dict["instrument"] = [self.Instrument]
+        Dict["db"] = [self.BigDB]
+        Dict["spectra"] = [self.MGFPath]
+        
+        InFileCommands = ""
+        for Key in Dict.keys():
+            List = Dict[Key]
+            Str = ""
+            for L in List:
+                Str += "%s,%s\n"%(Key,L)
+            InFileCommands += Str
+
+        #InFileCommands = "spectra,%s\n"%self.MGFPath
+        #InFileCommands += "instrument,%s\n"%self.Instrument
+        #InFileCommands += "protease,Trypsin\n"
+        #InFileCommands += "DB,%s\n"%self.BigDB
+        #InFileCommands += "mod,57,C,fix\n"
+        #InFileCommands += "tagcount,25\n"
+        #print InFileCommands
+        #raw_input()
+        Handle = open(InspectIn, "wb")
+        Handle.write(InFileCommands)
+        Handle.close()
+        if self.StartStep <= Steps.RunInspect and Steps.RunInspect <= self.StopStep:
+            print "Step %s: Run Inspect, searching consensus spectra"%Steps.RunInspect
+            print "Arguments: %s"%Command
+            os.system(Command)
+        else:
+            print "Skipping Step %s: Run Inspect, searching consensus spectra"%Steps.RunInspect
+    def RunPTMSearchBigDB(self):
+        """
+        PTMSearchBigDB.py
+            -d PTM feature directory
+            -w Outputfile to write
+            -r Inspect Search Results (default to directory, not file)
+        """
+        self.SearchBigDBOutput = os.path.join(self.SearchBigDBPath, "Results.txt")
+        # we use the InspectOutDir in case there are multiple files within the directory.
+        Args = " -d %s -w %s -r %s"%(self.ComputePTMPath, self.SearchBigDBOutput, self.InspectOutDir)
+        if self.StartStep <= Steps.SearchBigDB and Steps.SearchBigDB <= self.StopStep:
+            print "Step %s: PTMSearchBigDB"%Steps.SearchBigDB
+            print "Arguments:  %s"%Args
+            if self.SpawnFlag:
+                Command = "PTMSearchBigDB.py %s"%Args
+                print Command
+                os.system(Command)
+            else:
+                ArgsList = Args.split()
+                import PTMSearchBigDB
+                Searcher = PTMSearchBigDB.PTMSearcher()
+                Searcher.ParseCommandLine(ArgsList)
+                Searcher.Main()
+                del PTMSearchBigDB
+        else:
+            print "Skipping Step %s: PTMSearchBigDB"%Steps.SearchBigDB
+    def RunTrainPTMFeatures(self):
+        """
+        TrainPTMFeatures.py
+            -u InputResults
+            -v OutputResults
+            -m ModelType
+            -w OutputModel
+        """
+        self.TrainPTMOutput = os.path.join(self.TrainPTMPath, "Results.txt")
+        self.TrainPTMModelOutput = os.path.join(self.TrainPTMPath, "model.%s.txt"%self.TrainPTMModelType)
+        Args = "-u %s -v %s -m %s -w %s"%(self.SearchBigDBOutput, self.TrainPTMOutput, self.TrainPTMModelType, self.TrainPTMModelOutput)
+        if self.StartStep <= Steps.TrainPTMFeatures and Steps.TrainPTMFeatures <= self.StopStep:
+            print "Step %s: TrainPTMFeatures"%Steps.TrainPTMFeatures
+            print "Arguments: %s"%Args
+            if self.SpawnFlag:
+                Command = "TrainPTMFeatures.py %s"%Args
+                print Command
+                os.system(Command)
+            else:
+                ArgsList = Args.split()
+                import TrainPTMFeatures
+                Trainer = TrainPTMFeatures.PTMFeatureTrainer()
+                Trainer.ParseCommandLine(ArgsList)
+                Trainer.TrainModel()
+                del TrainPTMFeatures
+        else:
+            print "Skipping Step %s: TrainPTMFeatures"%Steps.TrainPTMFeatures
+    def AdjustPeptides(self):
+        """
+        AdjustPTM.py
+            -r InputResults
+            -w OutputResults
+            -d Database File
+            -c Cluster directory from ComputePTMFeatures
+            -m model INPUT filename
+            -M model OUTPUT filename
+        """
+        self.AdjustOutputPath = os.path.join(self.AdjustDir, "Results.txt")
+        self.AdjustModelOutput = os.path.join(self.AdjustDir, "model.%s.txt"%self.TrainPTMModelType)
+        Args = "-r %s -w %s -d %s -c %s -m %s -M %s -z "%(self.TrainPTMOutput, self.AdjustOutputPath, self.DatabaseFile, self.ComputePTMPath, self.TrainPTMModelOutput, self.AdjustModelOutput)
+        if self.StartStep <= Steps.AdjustPeptides and Steps.AdjustPeptides <= self.StopStep:
+            print "Step %s: AdjustPTM"%Steps.AdjustPeptides
+            print "Arguments: %s"%Args
+            if self.SpawnFlag:
+                Command = "AdjustPTM.py %s"%Args
+                print Command
+                os.system(Command)
+            else:
+                ArgsList = Args.split()
+                import AdjustPTM
+                Adjutant = AdjustPTM.PTMAdjuster()
+                Adjutant.ParseCommandLine(ArgsList)
+                Adjutant.Main()
+                del AdjustPTM
+        else:
+            print "Skipping Step %s: AdjustPTM"%Steps.AdjustPeptides
+    def AdjustToKnownChemistry(self):
+        """
+        AdjustPTM.py
+            -r InputResults
+            -w OutputResults
+            -m input Model
+            -d Database
+            -c Clusters 
+            -k Known PTM file
+            -v verbose output file
+        """
+        if not self.KnownChemistryFileName:
+            print "* Skipping AdjustToKnownChemistry: Requires a file (-k) of 'common' modifications."
+            return
+        KnownPTMVerboseOutputPath = os.path.join(self.BasePath, "KnownPTMOutput.txt")
+        Args = "-r %s -w %s -m %s -d %s -c %s -k %s -v %s"%(self.AdjustOutputPath, self.FinalOutputFile, self.AdjustModelOutput, self.DatabaseFile, self.ComputePTMPath, self.KnownChemistryFileName, KnownPTMVerboseOutputPath)
+        
+        if self.StartStep <= Steps.AdjustToKnownChemistry and Steps.AdjustToKnownChemistry <= self.StopStep:
+            print "Step %s: AdjustToKnownChemistry"%Steps.AdjustToKnownChemistry
+            print "Arguments: %s"%Args
+            if self.SpawnFlag == 1:
+                Command = "AdjustPTM.py %s"%Args
+                print Command
+                os.system(Command)
+            else:
+                ArgsList = Args.split()
+                import AdjustPTM
+                Adjutant = AdjustPTM.PTMAdjuster()
+                Adjutant.ParseCommandLine(ArgsList)
+                Adjutant.Main()
+                del AdjustPTM
+        else:
+            print "Skipping Step %s: AdjustToKnownChemistry"%Steps.AdjustToKnownChemistry
+    def ParseCommandLine(self, Arguments):
+        "Args is a list of arguments only (does not include sys.argv[0] == script name)"
+        (Options, Args) = getopt.getopt(Arguments, "r:d:w:s:p:q:t:S:B:i:x:y:k:bZm:n:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-r":
+                self.InputResults = Value
+            elif Option == "-m":
+                self.TrainPTMModelType = Value # "svm" or "lda"
+            elif Option == "-d":
+                self.DatabaseFile = Value
+            elif Option == "-w":
+                self.FinalOutputFile = Value
+            elif Option == "-s":
+                self.SpectraDir = Value
+            elif Option == "-p":
+                self.SpectrumPValue = float(Value)
+            elif Option == "-q":
+                self.SitePValue = float(Value)
+            elif Option == "-t":
+                self.BasePath = Value
+            elif Option == "-S":
+                self.PercentShuffled = float (Value)
+            elif Option == "-B":
+                self.BigDB = Value
+            elif Option == "-x":
+                self.StartStep = int (Value)
+            elif Option == "-y":
+                self.StopStep = int (Value)
+            elif Option == "-k":
+                self.KnownChemistryFileName = Value
+            elif Option == "-Z":
+                self.SpawnFlag = 1
+            elif Option == "-n":
+                self.ParamsFile = Value
+            else:
+                print "** Unknown option:", Option, Value
+        if not OptionsSeen.has_key("-r") or not OptionsSeen.has_key("-d") or not OptionsSeen.has_key("-w") or not OptionsSeen.has_key("-s") or not OptionsSeen.has_key("-S"):
+            print "Missing required options (r, d, w, s, S)"
+            print UsageInfo
+            sys.exit(-1)
+        if not self.BigDB:
+            print "Missing large DB (-B)"
+            print UsageInfo
+            sys.exit(-1)
+
+def Main():
+    "Main control box for the script"
+    Wrap = WrapperClass()
+    Wrap.ParseCommandLine(sys.argv[1:])
+    Wrap.SetupDirectories()
+    #now we run the parts of the scripts one after another
+    print "\n*** Starting to run components ***"
+    Wrap.RunPValue()
+    Wrap.RunComputePTMFeatures()
+    Wrap.RunBuildMGF()
+    Wrap.RunInspect()
+    Wrap.RunPTMSearchBigDB()
+    Wrap.RunTrainPTMFeatures()
+    Wrap.AdjustPeptides()
+    Wrap.AdjustToKnownChemistry()
+    
+
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "psyco not found - running without optimization"
+    Main()    
+        
diff --git a/PTMChooserLM.py b/PTMChooserLM.py
new file mode 100644
index 0000000..c030b5e
--- /dev/null
+++ b/PTMChooserLM.py
@@ -0,0 +1,1294 @@
+#Title:          PTMChooserLM.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+""" 
+Low-memory-usage version of PTMChooser.
+"""
+import sys
+import getopt
+import struct
+import types
+import traceback
+import os
+import time
+import Label
+import MakeImage
+import ExplainPTMs
+from Utils import *
+Initialize()
+        
+UsageInfo = """PTMChooser - Parse database search output, and select a parsimonious
+  set of PTMs to explain the results.
+
+Required parameters:
+  -r [FileName]: Results file to parse.  If a directory is specified,
+    then ALL files in the directory shall be parsed.
+  -d [FileName]: Database .trie file searched
+
+Optional parameters:  
+  -s [Dir]: Summary directory to write findings to (default: PTMSummary)
+  -v [value]: p-value cutoff.  Annotations with inferior p-values are
+    discarded
+  -l [count]: Maximum number of lines to read from the results file(s).
+    Use this option to preview full results quickly.
+  -p Generate PTM frequency matrix.  This option detects common,
+    non-site-specific modifications such as oxidized methionine.  It is
+    not well-suited to finding point mutations. 
+  
+Options for PTM site mode:
+  -i Generate spectrum images for the representative spectra for
+    each site
+  -w [value]: p-value cutoff for selecting a site-specific PTM.
+    Defaults to the value of -v; cannot be larger.
+  -c Size of the protecting group on cysteine residues (defaults to 57).
+  -t Maximum sites to report (defaults to 1000)
+  -m Minimum size of mass delta (defaults to 3).  Mass differences of
+    less than three daltons on ion trap spectra are most likely due
+    to incorrect parent mass reporting, and so are filtered.
+  -k [file]: File enumerating known PTMs, such as M+16.  Used to override
+    name reporting.
+"""
+
+MaxLineCount = None # by default, read the entire results file.  Override with -l option.
+
+class SiteClass:
+    "For a putative PTM."
+    def __init__(self):
+        self.Residue = "" # M1, Q155, that sort of thing
+        self.DBPos = None
+        self.Mass = None
+        self.BestPeptides = [] # sorted list (pvalue, -score, peptide) for the best 10 hits
+        self.BestModlessPeptides = [] # sorted list (pvalue, -score, peptide) for the best 10 hits
+        self.BestOtherModPeptides = [] # sorted list (pvalue, -score, peptide) for the best 10 hits
+        self.ModdedSpecies = {}
+        self.AnnotationCount = 0
+        self.ModlessAnnotationCount = 0
+        self.OtherModAnnotationCount = 0
+        self.AA = "X"
+        # Count how many annotations use N-terminus, middle, C-terminus.
+        self.TerminalCount = [0, 0, 0]
+
+class PTMClass:
+    "For known PTMs"
+    def __init__(self, Mass):
+        self.Mass = Mass
+        self.AA = {}
+        self.Terminus = ""
+        self.Name = str(Mass)
+    def GetNameWithLink(self):
+        return self.Name
+    def BuildPTModClass(self):
+        self.PTMod = PTModClass("%+d"%self.Mass)
+        self.PTMod.Mass = self.Mass
+        self.PTMod.Bases = self.AA
+    
+class Processor:
+    def __init__(self):
+        # Cutoff for using spectra to propose a new modification:
+        self.PValueCutoff = 0.05
+        # Cutoff for reporting spectra:
+        self.PValueReportCutoff = 0.08
+        self.SiteList = []
+        self.PTMFrequencyMatrix = {} # (DBPos, Mass)->Count
+        self.PTMBestScoreMatrix = {} # (DBPos, Mass)->BestScore
+        self.NTerminalFlag = 0
+        self.PTMSummaryDir = "PTMSummary"
+        self.DB = ""
+        self.MinSpectraForSite = 1 # can override
+        self.MaxSiteCount = 1000 # can override
+        self.GenerateSpectrumImagesFlag = 0 # disabled by default
+        self.CysteineProtection = 57
+        self.MinimumPTMSize = 3
+        self.DeltaScoreCutoff = -2
+        self.MQScoreCutoff = -3
+        # how many peptides to report for a PTM:
+        self.ReportPeptideCount = 10
+        self.KnownPTMFileName = None
+        self.KnownPTMs = []
+        self.KnownPTMDict = {} # (AA, Mass, Terminus) -> PTMClass instance
+        self.BuildPTMFrequencyMatrix = 0
+        self.MinimumPeptideLength = 7
+    def ReadKnownPTMs(self):
+        if not self.KnownPTMFileName:
+            return
+        if not os.path.exists(self.KnownPTMFileName):
+            print "** Known PTM file '%s' not found, ignoring!"%self.KnownPTMFileName
+            return
+        File = open(self.KnownPTMFileName, "rb")
+        # Parse a line of the form: mod,+88,*,nterminal
+        for FileLine in File.xreadlines():
+            FileLine = FileLine.strip()
+            if not FileLine:
+                continue
+            Bits = FileLine.split(",")
+            if len(Bits)<3:
+                continue
+            if Bits[0].lower() != "mod":
+                continue
+            if len(Bits) > 3:
+                PTMType = Bits[3]
+            else:
+                PTMType = "opt"
+            if PTMType[:3].lower() == "fix":
+                continue
+            Mass = int(Bits[1])
+            PTM = PTMClass(Mass)
+            Aminos = Bits[2]
+            if Aminos == "*":
+                Aminos = "ACDEFGHIKLMNPQRSTVWY"
+            for Amino in Aminos:
+                PTM.AA[Amino] = 1
+            if PTMType.lower() == "nterminal":
+                PTM.Terminus = "N"
+            if PTMType.lower() == "cterminal":
+                PTM.Terminus = "C"
+            if len(Bits) > 4:
+                PTM.Name = Bits[4]
+            self.KnownPTMs.append(PTM)
+            PTM.BuildPTModClass()
+            for Amino in Aminos:
+                Key = (Amino, int(round(PTM.Mass)), PTM.Terminus)
+                self.KnownPTMDict[Key] = PTM
+                if PTM.Terminus == "":
+                    # A non-terminal PTM is still legal at a terminus:
+                    Key = (Amino, int(round(PTM.Mass)), "C")
+                    self.KnownPTMDict[Key] = PTM
+                    Key = (Amino, int(round(PTM.Mass)), "N")
+                    self.KnownPTMDict[Key] = PTM
+                    
+    def ReadSpectrumAnnotations(self, FileName):
+        "Parse annotations from a file (or a directory containing many results files)"
+        self.LinesReadCount = 0
+        self.AnnotationFileName = FileName
+        if not os.path.exists(FileName):
+            print "* Error in PTMChooser: Results file '%s' does not exist!"%FileName
+            return
+        if os.path.isdir(FileName):
+            print "Parsing results files from directory %s..."%FileName
+            SubFileNames = os.listdir(FileName)
+            SubFileNames.sort()
+            for SubFileNameIndex in range(len(SubFileNames)):
+                SubFileName = SubFileNames[SubFileNameIndex]
+                print "File %s/%s: %s"%(SubFileNameIndex, len(SubFileNames), SubFileName)
+                Path = os.path.join(FileName, SubFileName)
+                if os.path.isdir(Path):
+                    print "Skip subdirectory %s"%Path
+                else:
+                    self.ReadSpectrumAnnotationsFromFile(Path)
+                if MaxLineCount != None and self.LinesReadCount > MaxLineCount:
+                    break                
+        else:
+            self.ReadSpectrumAnnotationsFromFile(FileName)
+    def ReadPTMWitnesses(self, FileName):
+        self.LinesReadCount = 0
+        Path = os.path.join(self.PTMSummaryDir, "PTMAnnotations.txt")
+        self.OutputAnnotationFile = open(Path, "wb")
+        "Parse annotations from a file (or a directory containing many results files)"
+        self.AnnotationFileName = FileName
+        if not os.path.exists(FileName):
+            print "* Error in PTMChooser: Results file '%s' does not exist!"%FileName
+            return
+        if os.path.isdir(FileName):
+            print "Parsing results files from directory %s..."%FileName
+            SubFileNames = os.listdir(FileName)
+            for SubFileNameIndex in range(len(SubFileNames)):
+                SubFileName = SubFileNames[SubFileNameIndex]
+                print "File %s/%s: %s"%(SubFileNameIndex, len(SubFileNames), SubFileName)
+                Path = os.path.join(FileName, SubFileName)
+                if os.path.isdir(Path):
+                    print "Skip subdirectory %s"%Path
+                else:
+                    self.ReadPTMWitnessesFromFile(Path)
+                if MaxLineCount != None and self.LinesReadCount > MaxLineCount:
+                    break
+        else:
+            self.ReadPTMWitnessesFromFile(FileName)
+        self.OutputAnnotationFile.close()
+    def TweakIncorrectEndpoints(self, Peptide):
+        """
+        Some putative modifications can be explained away by altering the endpoints of
+        a peptide.  Examples include "K.AYGSTNPINIVR-71A.T" (right endpoint off by one),
+        and "S.D+87KFSTVEQQASYGVGR.Q" (left endpoint off by one).  If we can explain away
+        a modification by shifting the endpoint, then we'll do so, and in doing so get
+        rid of a major source of delta-correct annotations.
+        """
+        if not Peptide.Modifications.keys():
+            return Peptide 
+        if self.KnownPTMFileName:
+            # Check whether this peptide's PTMs match known PTMs. If so, tweak them
+            # if necessary, and return the fixed PTMs.
+            UnknownPTMFlag = 0
+            NewModifications = {}
+            for (Index, ModList) in Peptide.Modifications.items():
+                Terminus = ""
+                if Index == 0:
+                    Terminus = "N"
+                elif Index == len(Peptide.Aminos) - 1:
+                    Terminus = "C"
+                for Mod in ModList:
+                    Key = (Peptide.Aminos[Index], Mod.Mass, Terminus)
+                    if self.KnownPTMDict.has_key(Key):
+                        # Acceptable!
+                        if not NewModifications.has_key(Index):
+                            NewModifications[Index] = []
+                        NewModifications[Index].append(Mod)
+                        continue
+                    # Look for a nearby PTM:
+                    MinIndex = max(0, Index - 3)
+                    MaxIndex = min(len(Peptide.Aminos) - 1, Index + 4)
+                    FoundFlag = 0
+                    for NearIndex in range(MinIndex, MaxIndex):
+                        if FoundFlag:
+                            break
+                        for NearMass in range(Mod.Mass - 1, Mod.Mass + 2):
+                            NearTerminus = ""
+                            if NearIndex == 0:
+                                NearTerminus = "N"
+                            elif NearIndex == len(Peptide.Aminos) - 1:
+                                NearTerminus = "C"
+                            Key = (Peptide.Aminos[NearIndex], NearMass, NearTerminus)
+                            PTM = self.KnownPTMDict.get(Key, None)
+                            if PTM:
+                                # Aha!  This appears to be a delta-correct annotation.
+                                if not NewModifications.has_key(NearIndex):
+                                    NewModifications[NearIndex] = []
+                                NewModifications[NearIndex].append(PTM.PTMod)
+                                FoundFlag = 1
+                                break
+                    if not FoundFlag:
+                        UnknownPTMFlag = 1 # known PTMs don't explain this annotation!
+            # Loop is finished.  Did we see any with no explanation?
+            if UnknownPTMFlag == 0:
+                OldName = Peptide.GetFullModdedName()
+                Peptide.Modifications = NewModifications
+                NewName = Peptide.GetFullModdedName()
+                #print "MASSAGED:", OldName, NewName
+                return Peptide
+        Q17Mod = PTModClass("-17")
+        Q17Mod.Mass = -17
+        #print "Tweaking incorrect annotation endpoints."
+        EditThisAnnot = 0
+        Len = len(Peptide.Aminos)
+        if not Peptide.UnexplainedModList or len(Peptide.UnexplainedModList) != 1:
+            return Peptide
+        (AA, Mass, Pos, Terminus) = Peptide.UnexplainedModList[0]
+        Endpoint = Peptide.DBPos + len(Peptide.Aminos)
+        # Try moving endpoint left, to repair things like X.XXXX-57G.X:
+        if Pos >= len(Peptide.Aminos) - 3:
+            Diff = abs(Mass + Global.AminoMass[Peptide.Aminos[-1]])
+            if Diff < 1.1:
+                NewPeptide = GetPeptideFromModdedName(Peptide.Aminos[:-1])
+                NewPeptide.DBPos = Peptide.DBPos
+                NewPeptide.Prefix = Peptide.Prefix
+                NewPeptide.Suffix = self.DB[Peptide.DBPos + len(NewPeptide.Aminos)]
+                NewPeptide.UnexplainedModList = []
+                return NewPeptide
+        # Try moving startpoint left by 1-3 bases, to repair thinks like A.X+71XXX.X
+        if (Pos < 3 and Mass > 0):
+            ExtraMass = 0
+            for AACount in range(1, 4):
+                DBPos = Peptide.DBPos - AACount
+                if (DBPos < 0):
+                    break
+                ExtraMass += Global.AminoMass.get(self.DB[DBPos], 9999)
+                Diff = abs(Mass - ExtraMass)
+                if Diff < 1.1:
+                    NewPeptide = GetPeptideFromModdedName(self.DB[DBPos:Endpoint])
+                    NewPeptide.DBPos = DBPos
+                    NewPeptide.Prefix = self.DB[DBPos-1]
+                    NewPeptide.Suffix = Peptide.Suffix
+                    NewPeptide.UnexplainedModList = []
+                    return NewPeptide
+                # Consider using Q-17 here, instead of a spurious +111:
+                if self.DB[DBPos] == "Q":
+                    Diff = abs(Mass - (ExtraMass - 17))
+                    if Diff < 1.1:
+                        NewPeptide = GetPeptideFromModdedName(self.DB[DBPos:Endpoint])
+                        NewPeptide.DBPos = DBPos
+                        NewPeptide.Prefix = self.DB[DBPos-1]
+                        NewPeptide.Suffix = Peptide.Suffix
+                        NewPeptide.Modifications[0] = [Q17Mod]
+                        NewPeptide.UnexplainedModList = [("Q", -17, 0, "N")]
+                        NewPeptide.ComputeMasses()
+                        return NewPeptide
+        # Try moving endpoint right by 1-3 bases, to repair things like X.XXX+128.K
+        if (Pos > len(Peptide.Aminos) - 3 and Mass > 0):
+            ExtraMass = 0
+            for AACount in range(1, 4):
+                DBPos = Endpoint + AACount - 1
+                if (DBPos >= len(self.DB)):
+                    break
+                ExtraMass += Global.AminoMass.get(self.DB[DBPos], 9999)
+                Diff = abs(Mass - ExtraMass)
+                if Diff < 1.1:
+                    NewPeptide = GetPeptideFromModdedName(self.DB[Peptide.DBPos:DBPos + 1])
+                    NewPeptide.DBPos = Peptide.DBPos
+                    NewPeptide.Prefix = Peptide.Prefix
+                    NewPeptide.Suffix = self.DB[DBPos + 1]
+                    NewPeptide.UnexplainedModList = []
+                    return NewPeptide
+        return Peptide
+    def ReadPTMWitnessesFromFile(self, FileName):
+        """
+        Read annotations (again!) from the specified file.  This time around, we know
+        which PTM sites we accept, and we count how many spectra are present.
+        """
+        try:
+            File = open(FileName, "rb")
+        except:
+            print "* Error in PTMChooser: Cannot open results file '%s'!"%FileName
+            return
+        LineNumber = 0
+        AnnotationCount = 0
+        OldSpectrumName = None
+        AnnotatedFlag = 0
+        File = open(FileName, "r")
+        for FileLine in File.xreadlines():
+            LineNumber += 1
+            self.LinesReadCount += 1
+            if LineNumber%10000 == 0:
+                print "Line#%s %s annotations accepted"%(LineNumber, AnnotationCount)
+                sys.stdout.flush()
+                if MaxLineCount != None and self.LinesReadCount > MaxLineCount:
+                    break
+            if FileLine[0]=="#":
+                continue # skip comment-line                
+            Bits = FileLine.strip().split("\t")
+            ##################################################################
+            # Skip invalid lines, or poor annotations:
+            if len(Bits)<16:
+                # This isn't a valid annotation line!
+                continue
+            try:
+                MQScore = float(Bits[5])
+                PValue = float(Bits[10])
+                DeltaScore = float(Bits[11])
+            except:
+                print "** Warning: Invalid annotation line in %s line %s"%(FileName, LineNumber)
+                continue
+            if PValue > self.PValueCutoff or DeltaScore < self.DeltaScoreCutoff or MQScore < self.MQScoreCutoff:
+                continue
+            SpectrumName = (Bits[0], Bits[1])
+            if SpectrumName != OldSpectrumName:
+                # It's a new spectrum; reset the AnnotatedFlag
+                AnnotatedFlag = 0
+            OldSpectrumName = SpectrumName
+            if AnnotatedFlag:
+                continue                
+            Peptide = GetPeptideFromModdedName(Bits[2][2:-2])
+            Peptide.UnexplainedModList = []
+            if len(Peptide.Aminos) < self.MinimumPeptideLength:
+                continue # Short peptides are too likely to be spurious!  (Length 1-4 is rubbish, 5 is marginal)
+            ##################################################################
+            Peptide.MQScore = MQScore
+            Peptide.DeltaScore = float(Bits[12])
+            Peptide.PValue = PValue
+            Peptide.SpectrumName = Bits[0].replace("/","\\").split("\\")[-1]
+            Peptide.SpectrumPath = Bits[0]
+            Peptide.ScanNumber = Bits[1]
+            Peptide.ProteinName = Bits[3]
+            Peptide.ScanByteOffset = Bits[15]
+            Peptide.DBPos = self.DB.find(Peptide.Aminos)
+            Peptide.Prefix = Bits[2][0]
+            Peptide.Suffix = Bits[2][-1]
+            if Peptide.DBPos == -1:
+                print "** Warning: Annotation '%s' for spectrum '%s' not found in database!"%(Peptide.Aminos, SpectrumName)
+                continue
+            # Accept modless peptides immediately:
+            if len(Peptide.Modifications.keys()) == 0:
+                self.AcceptPTMWitness(Peptide, Bits)
+                AnnotatedFlag = 1
+                AnnotationCount += 1
+                continue
+            # Fixup endpoints, and if we removed all mods, accept:
+            Peptide = self.TweakIncorrectEndpoints(Peptide)
+            if len(Peptide.Modifications.keys()) == 0:
+                self.AcceptPTMWitness(Peptide, Bits)
+                AnnotatedFlag = 1
+                AnnotationCount += 1
+                continue
+            # Check to see whether all the PTMs in the peptide are correct,
+            # or at least delta-correct.  Note that SiteDict contains
+            # "shadow" entries already!
+            OKSiteList = []
+            InvalidPTM = 0
+            for (Index, ModList) in Peptide.Modifications.items():
+                DBPos = Peptide.DBPos + Index
+                for Mod in ModList:
+                    Site = self.SiteDict.get((DBPos, Mod.Mass))
+                    # Check that Site.DBPos is in the range [Peptide.DBPos, Peptide.DBpos + len(Peptide.Aminos)),
+                    # because Site.DBPos must actually fall within the peptide!
+                    if Site and Site.DBPos >= Peptide.DBPos and Site.DBPos <= (Peptide.DBPos + len(Peptide.Aminos) - 1):
+                        OKSiteList.append((Site, Index))
+                        continue
+##                    # We didn't find anything at (DBPos, Mod.Mass), so consider shadows:
+##                    MinDBPos = DBPos - min(Index, 3)
+##                    MaxDBPos = DBPos + min(3, len(Peptide.Aminos) - Index - 1)
+##                    FoundFlag = 0
+##                    for NearMass in (Mod.Mass - 1, Mod.Mass, Mod.Mass + 2):
+##                        if FoundFlag:
+##                            break
+##                        for NearDBPos in range(MinDBPos, MaxDBPos):
+##                            Site = self.SiteDict.get((NearDBPos, NearMass))
+##                            if Site:
+##                                OKSiteList.append((Site, Index))
+##                                FoundFlag = 1
+##                                print "Accept close:", DBPos, Mod.Mass, Site.DBPos, Site.Mass, Peptide.GetFullModdedName()
+##                                break
+##                    if not FoundFlag:
+                    InvalidPTM = 1
+                    break
+            if not InvalidPTM:
+                for (Site, Index) in OKSiteList:
+                    self.AcceptPTMWitness(Peptide, Bits, Site)
+                AnnotationCount += 1
+                AnnotatedFlag = 1
+    def AcceptPTMWitness(self, Peptide, Bits, AnnotatedSite = None):
+        """
+        Second pass: We found a legal peptide annotation.  If Index==None and AnnotatedSite==None,
+        the peptide is unmodified.  Otherwise AnnotatedSite is a legal PTM which this
+        peptide uses (although it may need tweaking!)  
+        """
+        Bits = list(Bits)
+        # Add extra bits for the modification site:
+        if AnnotatedSite:
+            Bits.append(str(AnnotatedSite.Residue))
+            Bits.append(str(AnnotatedSite.Mass))
+        else:
+            Bits.append("")
+            Bits.append("")
+        # Add extra bit for the ORIGINAL annotation:
+        Bits.append(Bits[2])
+        # Tweak the peptide annotation, if necessary:
+        DBStart = Peptide.DBPos
+        DBEnd = Peptide.DBPos + len(Peptide.Aminos)
+        ScoredTuple = (Peptide.PValue, -Peptide.MQScore, Peptide)
+        if AnnotatedSite == None:
+            # Note this unmodified spectrum overlapping modified sites:
+            for Site in self.SiteList:
+                if Site.DBPos >= DBStart and Site.DBPos < DBEnd:
+                    Site.BestModlessPeptides.append(ScoredTuple)
+                    Site.BestModlessPeptides.sort()
+                    Site.BestModlessPeptides = Site.BestModlessPeptides[:self.ReportPeptideCount]
+                    Site.ModlessAnnotationCount += 1
+        else:
+            # Tweak the protein if necessary:
+            TweakFlag = 0
+            for (Index, ModList) in Peptide.Modifications.items():
+                if TweakFlag:
+                    break
+                for Mod in ModList:
+                    ModDBPos = Peptide.DBPos + Index
+                    if (ModDBPos != AnnotatedSite.DBPos or Mod.Mass != AnnotatedSite.Mass):
+                        # This isn't the same as the modification.  If it's CLOSE, then
+                        # tweak it:
+                        if abs(AnnotatedSite.DBPos - ModDBPos) <= 3 and abs(AnnotatedSite.Mass - Mod.Mass) < 1.2:
+                            ModList.remove(Mod)
+                            if not ModList:
+                                del Peptide.Modifications[Index]
+                            NewIndex = AnnotatedSite.DBPos - Peptide.DBPos
+                            if not Peptide.Modifications.has_key(NewIndex):
+                                Peptide.Modifications[NewIndex] = []
+                            NewMod = PTModClass("%+d"%AnnotatedSite.Mass)
+                            NewMod.Mass = AnnotatedSite.Mass
+                            Peptide.Modifications[NewIndex].append(NewMod)
+                            break
+            # Note this spectrum:
+            AnnotatedSite.BestPeptides.append(ScoredTuple)
+            AnnotatedSite.BestPeptides.sort()
+            AnnotatedSite.BestPeptides = AnnotatedSite.BestPeptides[:self.ReportPeptideCount]
+            AnnotatedSite.AnnotationCount += 1
+            AnnotatedSite.ModdedSpecies[Peptide.Aminos] = AnnotatedSite.ModdedSpecies.get(Peptide.Aminos, 0) + 1
+            # Note the terminus:
+            PeptidePos = Peptide.DBPos - AnnotatedSite.DBPos
+            if PeptidePos == 0:
+                AnnotatedSite.TerminalCount[0] += 1
+            elif PeptidePos == len(Peptide.Aminos) - 1:
+                AnnotatedSite.TerminalCount[2] += 1
+            else:
+                AnnotatedSite.TerminalCount[1] += 1
+            # And note this alternative modification for other sites:
+            for Site in self.SiteList:
+                if Site != AnnotatedSite and Site.DBPos >= DBStart and Site.DBPos < DBEnd:
+                    Site.BestOtherModPeptides.append(ScoredTuple)
+                    Site.BestOtherModPeptides.sort()
+                    Site.BestOtherModPeptides = Site.BestOtherModPeptides[:self.ReportPeptideCount]
+                    Site.OtherModAnnotationCount += 1
+        Bits[2] = Peptide.GetFullModdedName()
+        Str = string.join(Bits, "\t")
+        self.OutputAnnotationFile.write(Str + "\n")
+    def ReadSpectrumAnnotationsFromFile(self, FileName):
+        """
+        Parse annotations.  We've already verified that it's a file (not a directory) and it exists.
+        ASSUMPTION: All annotations for the same spectrum appear consecutively. 
+        """
+        try:
+            File = open(FileName, "rb")
+        except:
+            print "* Error in PTMChooser: Cannot open results file '%s'!"%FileName
+            return
+        LineNumber = 0
+        AnnotationCount = 0
+        OldSpectrumName = None
+        AnnotatedFlag = 0
+        MatrixEntryCount = 0
+        File = open(FileName, "r")
+        for FileLine in File.xreadlines():
+            LineNumber += 1
+            self.LinesReadCount += 1
+            if LineNumber%10000 == 0:
+                print "Line#%s %s modless %s matrix entries"%(LineNumber, AnnotationCount, MatrixEntryCount)
+                sys.stdout.flush()
+                if MaxLineCount != None and self.LinesReadCount > MaxLineCount:
+                    break
+            if FileLine[0]=="#":
+                continue # skip comment-line                
+            Bits = FileLine.strip().split("\t")
+            ##################################################################
+            # Skip invalid lines, or poor annotations:
+            if len(Bits)<16:
+                # This isn't a valid annotation line!
+                continue
+            try:
+                MQScore = float(Bits[5])
+                PValue = float(Bits[10])
+                DeltaScore = float(Bits[11])
+            except:
+                print "** Warning: Invalid annotation line in %s line %s"%(FileName, LineNumber)
+                continue
+            if PValue > self.PValueCutoff or DeltaScore < self.DeltaScoreCutoff:
+                ##print "%s Ignore match %s %s"%(LineNumber, PValue, DeltaScore) #%%%
+                continue
+            SpectrumName = (Bits[0], Bits[1])
+            if SpectrumName != OldSpectrumName:
+                # It's a new spectrum; reset the AnnotatedFlag
+                AnnotatedFlag = 0
+            OldSpectrumName = SpectrumName
+            Peptide = GetPeptideFromModdedName(Bits[2][2:-2])
+            Peptide.UnexplainedModList = None
+            if len(Peptide.Aminos) < self.MinimumPeptideLength:
+                ##print "%s Ignore short peptide %s"%(LineNumber, Bits[2]) #%%%
+                continue # Short peptides are too likely to be spurious!  (Length 1-4 is rubbish, 5 is marginal)
+            Peptide.Prefix = Bits[2][0]
+            Peptide.Suffix = Bits[2][-1]
+            ##################################################################
+            # If this peptide is unmodified, then ignore any further (lower-scoring) peptides for
+            # the same spectrum:
+            Keys = Peptide.Modifications.keys()
+            if len(Keys) == 0:
+                AnnotatedFlag = 1
+                AnnotationCount += 1
+                ##print "%s Accept modless %s"%(LineNumber, Bits[2]) #%%%
+                continue
+            if AnnotatedFlag:
+                continue
+            Peptide.DBPos = self.DB.find(Peptide.Aminos)
+            if Peptide.DBPos == -1:
+                print "** Warning: Annotation '%s' for spectrum '%s' not found in database!"%(Peptide.Aminos, SpectrumName)
+                continue
+            UnknownPTMSeen = 0
+            for (Index, ModList) in Peptide.Modifications.items():
+                for Mod in ModList:
+                    Terminus = None
+                    if Index == 0:
+                        Terminus = "N"
+                    if Index == len(Peptide.Aminos)-1:
+                        Terminus = "C"
+                    Key = (Peptide.Aminos[Index], Mod.Mass, Index, Terminus)
+                    if Peptide.UnexplainedModList == None:
+                        Peptide.UnexplainedModList = []
+                    Peptide.UnexplainedModList.append(Key)
+                    Key = (Peptide.Aminos[Index], Mod.Mass, Terminus)
+                    if not self.KnownPTMDict.has_key(Key):
+                        UnknownPTMSeen = 1
+            # Tweak any known mistakes in peptide annotation:
+            Peptide = self.TweakIncorrectEndpoints(Peptide)
+            # If it's modless now, note that and continue:
+            if len(Peptide.Modifications.keys()) == 0:
+                AnnotatedFlag = 1
+                AnnotationCount += 1
+                #print "%s Tweaked %s to %s"%(LineNumber, Bits[2], Peptide.GetFullModdedName()) #%%%
+                continue
+            # Accumulate entries in PTMFrequencyMatrix:
+            for (Index, ModList) in Peptide.Modifications.items():
+                for Mod in ModList:
+                    if self.BuildPTMFrequencyMatrix:
+                        Key = (Peptide.Aminos[Index], Mod.Mass)
+                        self.PTMFrequencyMatrix[Key] = self.PTMFrequencyMatrix.get(Key, 0) + 1
+                        self.PTMBestScoreMatrix[Key] = max(self.PTMBestScoreMatrix.get(Key, -999), MQScore)
+                        if Index == 0:
+                            Key = ("^", Mod.Mass)
+                            self.PTMFrequencyMatrix[Key] = self.PTMFrequencyMatrix.get(Key, 0) + 1
+                            self.PTMBestScoreMatrix[Key] = max(self.PTMBestScoreMatrix.get(Key, -999), MQScore)
+                        if Index == len(Peptide.Aminos) - 1:
+                            Key = ("$", Mod.Mass)
+                            self.PTMFrequencyMatrix[Key] = self.PTMFrequencyMatrix.get(Key, 0) + 1
+                            self.PTMBestScoreMatrix[Key] = max(self.PTMBestScoreMatrix.get(Key, -999), MQScore)
+                    else:
+                        Key = (Peptide.DBPos + Index, Mod.Mass)
+                        self.PTMFrequencyMatrix[Key] = self.PTMFrequencyMatrix.get(Key, 0) + 1
+                        self.PTMBestScoreMatrix[Key] = max(self.PTMBestScoreMatrix.get(Key, -999), MQScore)
+                        #print "%s Peptide %s %s key %s"%(LineNumber, Bits[2], Peptide.GetFullModdedName(), Key) #%%%
+                        MatrixEntryCount += 1
+            if not UnknownPTMSeen:
+                AnnotatedFlag = 1 # ignore all subsequent annotations
+        File.close()
+        return AnnotationCount
+    def SelectSites(self):
+        """
+        Iterate: Find the largest entry in self.PTMFrequencyMatrix.  Remove entries
+        from this cell and neighboring cells, and append a new SiteClass instance
+        to self.SiteList.  Stop when the next entry is too small, or when we have
+        already generated enough sites.
+        """
+        while (1):
+            BestCount = 0
+            BestMQScore = -999
+            BestKey = None
+            for (Key, Count) in self.PTMFrequencyMatrix.items():
+                (AA, Mass) = Key
+                MQScore = self.PTMBestScoreMatrix.get(Key, -999)
+                # Filter out +1, -1 here:
+                if abs(Mass) >= self.MinimumPTMSize:
+                    if (Count > BestCount) or (Count == BestCount and MQScore > BestMQScore):
+                        BestCount = Count
+                        BestMQScore = MQScore
+                        BestKey = Key
+                        #print BestCount, BestMQScore, Key
+            if not BestKey:
+                break
+            if BestCount < self.MinSpectraForSite:
+                print "Next PTM explains %s<%s spectra, stop now"%(BestCount, self.MinSpectraForSite)
+                break
+            (DBPos, Mass) = BestKey
+            Site = SiteClass()
+            Site.DBPos = DBPos
+            Site.Mass = Mass
+            (ProteinName, ProteinNumber, ResidueNumber) = self.GetProteinInfo(DBPos)
+            Site.ProteinName = ProteinName
+            Site.Residue = "%s%s"%(self.DB[DBPos], ResidueNumber)
+            Site.AA = self.DB[DBPos]
+            print "%s Accept PTM: %s on %s from %s"%(BestCount, Mass, Site.Residue, ProteinName[:40])
+            self.SiteList.append(Site)
+            if len(self.SiteList) >= self.MaxSiteCount:
+                print "Acquired %s sites - stop now"%self.MaxSiteCount
+                break
+            # Remove matrix entries:
+            for NearPos in range(DBPos - 3, DBPos + 4):
+                if NearPos in (DBPos-1, DBPos, DBPos+1):
+                    Masses = (Mass-1, Mass, Mass+1)
+                else:
+                    Masses = (Mass,)
+                for NearMass in Masses:
+                    Key = (NearPos, NearMass)
+                    if self.PTMFrequencyMatrix.has_key(Key):
+                        print "Absorb adjacent entry:", Key
+                        del self.PTMFrequencyMatrix[Key]
+        # Keep a dictionary of the accepted sites, for easy lookup:
+        self.SiteDict = {}
+        for Site in self.SiteList:
+            for NearPos in range(Site.DBPos - 3, Site.DBPos + 4):
+                if NearPos in (DBPos-1, DBPos, DBPos+1):
+                    Masses = (Mass-1, Mass, Mass+1)
+                else:
+                    Masses = (Mass,)
+                for NearMass in range(Site.Mass - 1, Site.Mass + 2):
+                    Key = (Site.DBPos, NearMass)
+                    if not self.SiteDict.has_key(Key):
+                        self.SiteDict[Key] = Site
+    def GetProteinInfo(self, DBPos):
+        "Return the protein# and the residue# for this file position."
+        for Index in range(1, len(self.ProteinNames)):
+            if self.ProteinStartPositions[Index] > DBPos:
+                ResidueNumber = DBPos - self.ProteinStartPositions[Index - 1] + 1
+                #return (self.ProteinNames[Index - 1], ResidueNumber)
+                return (self.ProteinNames[Index - 1], Index - 1, ResidueNumber)
+        # The match must come from the last protein:
+        ResidueNumber = DBPos - self.ProteinStartPositions[-1] + 1
+        return (self.ProteinNames[-1], len(self.ProteinNames) - 1, ResidueNumber)
+    def OutputResults(self):
+        # Remove existing files AllSiteSummary and AllSiteDetails, so we start them fresh:
+        Path = os.path.join(self.PTMSummaryDir, "AllSiteSummary.html")
+        print Path
+        if os.path.exists(Path):
+            os.remove(Path)
+        Path = os.path.join(self.PTMSummaryDir, "AllSiteDetails.html")
+        print Path
+        if os.path.exists(Path):
+            os.remove(Path)
+        # Sort the sites by annotation-count:
+        SortedSites = []
+        for Site in self.SiteList:
+            if Site.AnnotationCount:
+                SortedSites.append((Site.AnnotationCount, Site))
+        SortedSites.sort()
+        SortedSites.reverse()
+        self.TotalSpectraForPTM = {} # (AA, Mass) -> Count
+        self.SitesForPTM = {} # (AA, Mass) -> list of Site instances
+        self.TerminusForPTM = {} # (AA, Mass) -> terminus-tuple
+        for (Count, Site) in SortedSites:
+            # Note this site in the PTM lists:
+            Key = (Site.AA, Site.Mass)
+            if not self.SitesForPTM.has_key(Key):
+                self.SitesForPTM[Key] = []
+            self.SitesForPTM[Key].append(Site)
+            self.TotalSpectraForPTM[Key] = self.TotalSpectraForPTM.get(Key, 0) + Site.AnnotationCount
+            if not self.TerminusForPTM.has_key(Key):
+                self.TerminusForPTM[Key] = [0, 0, 0]
+            for X in range(3):
+                self.TerminusForPTM[Key][X] += Site.TerminalCount[X]
+            # Write a table summarizing this sites to the PTM page, and to the
+            # overall details page:
+            HTML = self.WriteSiteSummary(Site, 1)
+            DetailsFilePath = os.path.join(self.PTMSummaryDir, "%s%sDetails.html"%(Site.AA, Site.Mass))
+            if len(self.SitesForPTM[Key]) == 1:
+                File = open(DetailsFilePath, "w")
+            else:
+                File = open(DetailsFilePath, "a")
+            File.write(HTML)
+            File.close()
+            File = open(os.path.join(self.PTMSummaryDir, "AllSiteDetails.html"), "a")
+            File.write(HTML)
+            File.close()
+            #
+        #######################################
+        # Write the index file, which summarizes things by PTM (possibly several
+        # different sites correspond to each row)
+        IndexFilePath = os.path.join(self.PTMSummaryDir, "index.html")
+        IndexFile = open(IndexFilePath, "w")
+        IndexFile.write("<h3>PTM Summary Report</h3>\n")
+        if not SortedSites:
+            IndexFile.write("<b> * * * No PTMs found * * *</b>\n")
+            return
+        IndexFile.write("<a href=\"AllSiteSummary.html\">Summary table for all sites</a>")
+        IndexFile.write("   <a href=\"AllSiteDetails.html\">Details for all sites</a><br><br>\n")
+        IndexFile.write("<table><tr><td><b>Terminus</b></td><td><b>AA</b></td><td><b>Mass<br>delta</b></td>")
+        IndexFile.write("<td><b>Spectra</b></td><td><b>Sites</b></td><td><b>Top-site<br>spectra</b></td>")
+        IndexFile.write("<td><b>Results</b></td><td><b>Possible explanations</b></td></tr>\n")
+        ############
+        SortedPTMs = []
+        for (Key, Count) in self.TotalSpectraForPTM.items():
+            SortedPTMs.append((Count, Key))
+        SortedPTMs.sort()
+        SortedPTMs.reverse()
+        for (Count, Key) in SortedPTMs:
+            (AA, Mass) = Key
+            if self.TotalSpectraForPTM.get(Key, 0) < 1:
+                continue # Skip this, we don't have a single spectra for it!
+            print "Write summary for %s %s"%(AA, Mass)
+            # Decide whether we think it's terminal:
+            N = self.TerminusForPTM[Key][0]
+            Body = self.TerminusForPTM[Key][1]
+            C = self.TerminusForPTM[Key][2]
+            if N > Body:
+                Terminus = "N"
+            elif C > Body:
+                Terminus = "C"
+            else:
+                Terminus = ""
+            ######################################################################
+            # Write terse records for each site for this PTM:
+            HTML = self.WriteTerseSummary(self.SitesForPTM[Key])
+            File = open(os.path.join(self.PTMSummaryDir, "%s%sSummary.html"%(AA, Mass)), "w")
+            File.write(HTML)
+            File.close()
+            File = open(os.path.join(self.PTMSummaryDir, "AllSiteSummary.html"), "a")
+            File.write(HTML)
+            File.close()
+            ######################################################################
+            # Add links to the index page:
+            DetailLink = "%s%sDetails.html"%(AA, Mass)
+            SummaryLink = "%s%sSummary.html"%(AA, Mass)
+            ExplanationList = self.GetKnownPTMExplanation(AA, Mass, Terminus)
+            if AA == "C":
+                ExplanationList.extend(ExplainPTMs.GetExplanation(AA, Mass, Terminus, BasePTM = self.CysteineProtection))
+            else:
+                ExplanationList.extend(ExplainPTMs.GetExplanation(AA, Mass, Terminus))
+            if len(ExplanationList) == 0:
+                Explanations = "Unknown"
+            else:
+                Explanations = ""
+                for Entry in ExplanationList[:3]:
+                    Explanations += "%s, "%Entry.GetNameWithLink()
+                Explanations = Explanations[:-2] # remove trailing comma+space
+            IndexFile.write("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td>"%(\
+                Terminus, AA, Mass, self.TotalSpectraForPTM[Key], len(self.SitesForPTM[Key]),
+                self.SitesForPTM[Key][0].AnnotationCount))
+            IndexFile.write("<td><a href=\"%s\">Details</a> <a href=\"%s\">Summary</a></td><td>%s</td></tr>\n"%(DetailLink, SummaryLink, Explanations))
+        IndexFile.close()
+    def GetKnownPTMExplanation(self, AA, Mass, Terminus):
+        """
+        Return a list of known PTMs that fit this description.  Mostly just so
+        that we can report their correct names.
+        """
+        ExplanationList = []
+        for PTM in self.KnownPTMs:
+            if PTM.Mass != Mass:
+                continue
+            if PTM.Terminus == Terminus and PTM.AA.has_key(AA):
+                ExplanationList.append(PTM)
+        return ExplanationList
+    def WriteTerseSummary(self, SiteList):
+        if not SiteList:
+            return ""
+        HTML = ""
+        AA = self.DB[SiteList[0].DBPos]
+        Mass = SiteList[0].Mass
+        HTML += "<h3>Sites for %+d on %s</h3>"%(Mass, AA)
+        TotalSpectra = 0
+        for Site in SiteList:
+            TotalSpectra += Site.AnnotationCount
+        HTML += "<b>%s spectra in all<br>\n"%TotalSpectra
+        HTML += "<table><tr><td><b>Protein</b></td><td><b>Residue</b></td><td><b>Spectra</b></td><td><b>Species</b></td><td><b>Unmodified</b></td></tr>\n"
+        for Site in SiteList:
+            (ProteinName, ProteinIndex, ResidueNumber) = self.GetProteinInfo(Site.DBPos)
+            Residue = "%s%s"%(AA, ResidueNumber)
+            HTML += "<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n"%(ProteinName, Residue, Site.AnnotationCount, len(Site.ModdedSpecies.keys()), Site.ModlessAnnotationCount)
+        HTML += "</table>"
+        return HTML
+            
+    def WriteSiteSummaryLine(self, Notes, Peptide):
+        Annotation = Peptide.GetFullModdedName()
+        #print "WriteSiteSummaryLine: Notes %s Pval %s score %s Pep %s"%(Notes, Peptide.PValue, Peptide.MQScore, Peptide.GetFullModdedName())
+        WroteLine = 0
+        HTML = ""
+        if self.GenerateSpectrumImagesFlag:
+            try:
+                ImageFileName = "%s%s.png"%(Peptide.SpectrumName, Peptide.ScanNumber)
+                ImagePath = os.path.join(self.PTMSummaryDir, "Images", ImageFileName)
+                SpecFilePath = self.GetSpectrumFilePath(Peptide.SpectrumPath)
+                FileName = "%s:%s"%(SpecFilePath, Peptide.ScanByteOffset)
+                LabeledSpectrum = Label.LabelDTAFile(Peptide, FileName, None)
+                Maker = MakeImage.MSImageMaker()
+                Maker.ConvertSpectrumToImage(LabeledSpectrum, ImagePath, Peptide)
+                HTML = "<tr><td>%s</td><td>%s</td><td>%s</td><td><a href=\"Images/%s\">%s</a></td>"%(Notes, Peptide.SpectrumName, Peptide.ScanNumber, ImageFileName, Annotation)
+                HTML += "<td>%s</td><td>%s</td><td>%s</td></tr>\n"%(Peptide.MQScore, Peptide.DeltaScore, Peptide.PValue)
+                WroteLine = 1
+            except:
+                # Error generating image - perhaps the file isn't available on disk?
+                print SpecFilePath, Peptide.ScanByteOffset, Peptide.SpectrumPath, Peptide.ScanNumber
+                traceback.print_exc()
+                #pass
+        if not WroteLine:
+            HTML = "<tr><td>%s</td><td>%s</td><td>%s</td>"%(Notes, Peptide.SpectrumName, Peptide.ScanNumber)
+            HTML += "<td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n"%(Annotation, Peptide.MQScore, Peptide.DeltaScore, Peptide.PValue)
+        return HTML
+    def WriteSiteSummary(self, Site, VerboseFlag):
+        """
+        Write out a verbose summary of this putative modification site to the specified file.
+        """
+        ModlessPeptides = []
+        OtherModPeptides = []
+        Site.SpectrumCount = 0
+        # Sort the peptide species:
+        SortedSpecies = []
+        for (Aminos, Count) in Site.ModdedSpecies.items():
+            SortedSpecies.append((Count, Aminos))
+        SortedSpecies.sort()
+        ###############################################################
+        # Report up to 10 annotations per species:
+        HTML = ""
+        (ProteinName, ProteinIndex, ResidueNumber) = self.GetProteinInfo(Site.DBPos)
+        SortedSpecies.sort()
+        HTML += "<h3>%+d on Residue %s of protein %s</h3>\n"%(Site.Mass, Site.Residue, ProteinName)
+        HTML += "<b>%s spectra</b> annotated this residue with %+d <br>\n"%(Site.AnnotationCount, Site.Mass)
+        HTML += "<b>%s spectra</b> cover this site without modification<br>\n"%Site.ModlessAnnotationCount
+        if Site.OtherModAnnotationCount:
+            HTML += "<b>%s spectra</b> containing different modifications cover this site<br>\n"%Site.OtherModAnnotationCount
+        HTML += "Details for the top-scoring spectra follow:<br>\n"
+        HTML += "<table><tr><td><b>Notes</b></td><td><b>Spectrum</b></td>"
+        HTML += "<td><b>Scan</b></td><td><b>Annotation</b></td>"
+        HTML += "<td><b>MQScore</b></td><td><b>Delta-score</b></td><td><b>p-value</b></td></tr>\n"
+        for (Dummy1, Dummy2, Peptide) in Site.BestPeptides:
+            HTML += self.WriteSiteSummaryLine("", Peptide)
+        ######################################################
+        # Without PTM:
+        ModlessPeptides.sort()
+        for (Dummy1, Dummy2, Peptide) in Site.BestModlessPeptides:
+            HTML += self.WriteSiteSummaryLine("No PTM", Peptide)
+        ######################################################
+        # Other PTMs:
+        OtherModPeptides.sort()
+        for (Dummy1, Dummy2, Peptide) in Site.BestOtherModPeptides:
+            HTML += self.WriteSiteSummaryLine("Other PTM", Peptide)
+        HTML += "</table><hr>"
+        return HTML                
+    def ReadDatabase(self, DBPath):
+        try:
+            File = open(DBPath, "rb")
+        except:
+            print "** Unable to open database file '%s'!"%DBPath
+            raise
+        self.DB = File.read()
+        File.close()
+        # Read the database index, if found:
+        self.ProteinStartPositions = []
+        self.ProteinNames = []
+        IndexPath = os.path.splitext(DBPath)[0] + ".index"
+        if os.path.exists(IndexPath):
+            File = open(IndexPath, "rb")
+            while (1):
+                Data = File.read(92)
+                if not Data:
+                    break
+                Tuple = struct.unpack("<qi80s", Data)
+                self.ProteinStartPositions.append(Tuple[1])
+                Name = Tuple[2]
+                NullPos = Name.find(chr(0))
+                if NullPos != -1:
+                    Name = Name[:NullPos]
+                self.ProteinNames.append(Name)
+            File.close()
+        else:
+            print "** Error: Database index file '%s' not found!"%IndexPath
+    def GetSpectrumFilePath(self, FileName):
+        # this can be overridden, if spectra are moved
+        #Bits = FileName.replace("/", "\\").split("\\")
+        #return os.path.join(r"E:\ms\OMICS04", Bits[-2], Bits[-1])
+        return FileName 
+    def PerformIterativeNSSSelection(self, MinExplanationCount):
+        self.NSSPTMList = []
+        Matrix = self.PTMFrequencyMatrix.copy()
+        while (1):
+            SortedList = []
+            for (Key, Count) in Matrix.items():
+                if Key[0] in ("$","^"):
+                    continue
+                SortedList.append((Count, Key))
+            SortedList.sort()
+            SortedList.reverse()
+            if not SortedList:
+                return
+            (Count, Key) = SortedList[0]
+            if Count < MinExplanationCount:
+                return
+            # Grab nearby 'shadow' entries:
+            (AA, Mass) = Key
+            for NearMass in (Mass - 1, Mass + 1):
+                NearKey = (AA, NearMass)
+                Matrix[Key] += Matrix.get(NearKey, 0)
+                Matrix[NearKey] = 0
+            Matrix[Key] = 0 # selected already
+            PTM = PTMClass(Mass)
+            PTM.AA = AA
+            self.NSSPTMList.append(PTM)
+    def PerformNonSiteSpecificPTMSelection(self, ResultsFileName):
+        print "\n\nRead spectrum annotations:"
+        self.ReadSpectrumAnnotations(ResultsFileName)
+        # Output the PTM frequency matrix:
+        FileName = os.path.join(self.PTMSummaryDir, "NonSiteSpecific.html")
+        HTMLFile = open(FileName, "w")
+        self.WriteHTMLMatrix(self.PTMFrequencyMatrix, HTMLFile)
+        #####
+        FileName = os.path.join(self.PTMSummaryDir, "NonSiteSpecific.txt")
+        TextFile = open(FileName, "w")
+        self.WriteTextMatrix(self.PTMFrequencyMatrix, TextFile)
+        TextFile.close()
+        #######################################################
+        # Now, perform iterative PTM selection.  This will tidy up the matrix significantly.
+        # We'll stop selecting PTMs when the next one explains fewer than X entries, where X
+        # is twice the median matrix entry:
+        OrderedEntries = []
+        for (Key, Count) in self.PTMFrequencyMatrix.items():
+            if Key[0] not in "^$":
+                OrderedEntries.append(Count)
+        OrderedEntries.sort()
+        MinExplanationCount = self.LightShadingCutoff #max(OrderedEntries[len(OrderedEntries)/2] * 4, 10)
+        self.PerformIterativeNSSSelection(MinExplanationCount)
+        #######################################################
+        # Write details on each PTM.
+        HTMLFile.write("<hr>")
+        #self.GeneratePTMFrequencyMatrix(1)
+        HTMLFile.write("<h3>Putative modifications</h3>\n")
+        # Get a list of PTMs, sorted by the number of spectra they explain:
+        SortedList = []
+        for NSSPTM in self.NSSPTMList:
+            if NSSPTM.AA:
+                Count = self.PTMFrequencyMatrix.get((NSSPTM.AA, NSSPTM.Mass), 0)
+            else:
+                if NSSPTM.Terminus == "N":
+                    Count = self.PTMFrequencyMatrix.get(("^", NSSPTM.Mass), 0)
+                elif NSSPTM.Terminus == "C":
+                    Count = self.PTMFrequencyMatrix.get(("$", NSSPTM.Mass), 0)
+            SortedList.append((Count, NSSPTM))
+        SortedList.sort()
+        SortedList.reverse()
+        for (Count, NSSPTM) in SortedList:
+            if Count < self.LightShadingCutoff:
+                continue # garbage PTM
+            ModStr = "%s%+d"%(NSSPTM.AA, NSSPTM.Mass)
+            HTMLFile.write("Modification %s applied to %s spectra<br>\n"%(ModStr, Count))
+            HTMLFile.write("   ")
+            if NSSPTM.AA == "C":
+                ExplanationList = ExplainPTMs.GetExplanation(NSSPTM.AA, NSSPTM.Mass, "", BasePTM = self.CysteineProtection)
+            else:
+                ExplanationList = ExplainPTMs.GetExplanation(NSSPTM.AA, NSSPTM.Mass, "")
+            if not ExplanationList:
+                HTMLFile.write("(unknown mass-delta)<br><br>\n")
+            else:
+                Str = ""
+                for Explanation in ExplanationList:
+                    Str += "%s, "%Explanation.GetNameWithLink()
+                Str = Str[:-2] # remove trailing comma
+                HTMLFile.write("Possible annotations: %s<br><br>\n"%Str)
+##        #######################################################
+##        # Write the cleaned-up matrix:
+##        #self.GeneratePTMFrequencyMatrix(0)
+##        TabbedMatrixFileName = os.path.join(self.PTMSummaryDir, "ProcessedMatrix.txt")
+##        TabbedMatrixFile = open(TabbedMatrixFileName, "w")
+##        self.WriteTextMatrix(self.PTMFrequencyMatrix, TabbedMatrixFile)
+##        TabbedMatrixFile.close()
+##        HTMLFile.write("<h3>Resultant PTM frequency matrix</h3>")
+##        self.WriteHTMLMatrix(self.Matrix, HTMLFile)
+##        # Finish and cleanup:
+##        Path = os.path.join(self.PTMSummaryDir, "NonSiteSpecificAnnotations.txt")
+##        self.OutputAnnotations(Path)
+##        HTMLFile.close()
+        
+    def WriteHTMLMatrix(self, Matrix, HTMLFile):
+        """
+        Write PTM frequency matrix to a webpage, with shading on well-filled cells.
+        """
+        AAList = "^ACDEFGHIKLMNPQRSTVWY$"
+        # First, let's decide what the cutoffs are for heavy, medium, and light shading.
+        EntryList = []
+        for (Key, Value) in Matrix.items():
+            if Key[0] not in ("^$"):
+                EntryList.append(Value)
+        EntryList.sort()
+        if not len(EntryList):
+            HTMLFile.write("<b>** Error - no entries, so no PTM matrix written<br>\n")
+            self.LightShadingCutoff = 1
+            return
+        MaximumEntry = EntryList[-1]
+        HeavyShadingCutoff = max(10, MaximumEntry / 2.0)
+        MediumShadingCutoff = max(5, MaximumEntry / 10.0)
+        self.LightShadingCutoff = max(2, MaximumEntry / 100.0)
+        MedianEntry = EntryList[len(EntryList) / 2]
+        Str = "Maximum entry %s, median entry %s.<br>\nRows containing an entry of <b>%d</b> or larger are displayed.<br>\n"
+        HTMLFile.write(Str%(MaximumEntry, MedianEntry, int(self.LightShadingCutoff + 1.0)))
+        HTMLFile.write("<table><tr>")
+        ####################
+        # Write the header:
+        HeaderRow = ""
+        HeaderRow += "<td><b>Mass</b></td>"
+        for AA in AAList:
+            if AA == "^":
+                AA = " (N)"
+            elif AA == "$":
+                AA = " (C)"
+            else:
+                AA = "   " + AA
+            HeaderRow += "<td><b>%s</b></td>"%AA
+        HeaderRow += "</tr>\n"
+        HTMLFile.write(HeaderRow)
+        # Get mass range:
+        MinimumMass = 999
+        MaximumMass = -999
+        for Key in Matrix.keys():
+            MinimumMass = min(MinimumMass, Key[1])
+            MaximumMass = max(MaximumMass, Key[1])
+        # Write out one row for each feasible mass:
+        RowsPrinted = 0
+        for Mass in range(MinimumMass, MaximumMass + 1):
+            # Get the total number of entries on this row.  If it's low, then skip the row!
+            EntriesForThisRow = 0
+            BestEntryThisRow = 0
+            for AA in AAList[1:-1]:
+                EntriesForThisRow += Matrix.get((AA, Mass), 0)
+                BestEntryThisRow = max(BestEntryThisRow, Matrix.get((AA, Mass), 0))
+            # Only display a row if it has an entry equal to at least twice the median cell:
+            if BestEntryThisRow <= self.LightShadingCutoff:
+                continue
+            HTMLFile.write("<tr><td>%s</td>"%Mass)
+            for AA in AAList:
+                Key = (AA, Mass)
+                Count = Matrix.get(Key, 0)
+                if Count < 10:
+                    CountStr = "   %s"%Count
+                elif Count < 100:
+                    CountStr = "  %s"%Count
+                elif Count < 1000:
+                    CountStr = " %s"%Count
+                else:
+                    CountStr = "%s"%Count
+                if Count > HeavyShadingCutoff:
+                    HTMLFile.write("<td bgcolor=\"#999999\">%s</td>"%CountStr)
+                elif Count > MediumShadingCutoff:
+                    HTMLFile.write("<td bgcolor=\"#bbbbbb\">%s</td>"%CountStr)
+                elif Count > self.LightShadingCutoff:
+                    HTMLFile.write("<td bgcolor=\"#dddddd\">%s</td>"%CountStr)
+                else:
+                    HTMLFile.write("<td>%s</td>"%CountStr)
+            HTMLFile.write("</tr>\n")
+            RowsPrinted += 1
+            if RowsPrinted%25 == 0:
+                HTMLFile.write(HeaderRow)
+        HTMLFile.write("</table>\n")
+    def WriteTextMatrix(self, Matrix, TabbedMatrixFile):
+        """
+        Write PTM frequency matrix, in tab-delimited format (for easy parsing).  Similar code
+        in WriteHTMLMatrix, for easy reading by eye.
+        """
+        AAList = "^ACDEFGHIKLMNPQRSTVWY$"
+        HeaderLine = "Mass\t"
+        for AA in AAList:
+            if AA == "^":
+                AA = "(N)"
+            if AA == "$":
+                AA = "(C)"            
+            HeaderLine += "%s\t"%AA
+        HeaderLine += "\n"
+        TabbedMatrixFile.write(HeaderLine)
+        MinimumMass = 999
+        MaximumMass = -999
+        for Key in Matrix.keys():
+            MinimumMass = min(MinimumMass, Key[1])
+            MaximumMass = max(MaximumMass, Key[1])
+        # Write out one row for each feasible mass:
+        for Mass in range(MinimumMass, MaximumMass + 1):
+            Str = "%s\t"%Mass
+            for AA in AAList:
+                Key = (AA, Mass)
+                Str += "%s\t"%Matrix.get(Key, 0)
+            Str += "\n"
+            TabbedMatrixFile.write(Str)
+        
+        
+def Main(PTMProcessor):
+    global MaxLineCount
+    if len(sys.argv) < 3:
+        print UsageInfo
+        sys.exit(1)
+    ResultsFileName = None
+    #PTMProcessor = Processor()
+    (Options, Args) = getopt.getopt(sys.argv[1:], "r:d:s:c:iv:w:t:l:m:k:p")
+    OptionsSeen = {}
+    for (Option, Value) in Options:
+        OptionsSeen[Option] = 1
+        if Option == "-r":
+            # -r results file(s)
+            ResultsFileName = Value
+        elif Option == "-c":
+            # -c Mass of cysteine protecting group (57 by default)
+            PTMProcessor.CysteineProtection = int(Value)
+        elif Option == "-k":
+            # -k File specifying known, non-site-specific PTMs
+            PTMProcessor.KnownPTMFileName = Value
+        elif Option == "-t":
+            # -t Max number of sites to report (1000 by default)
+            PTMProcessor.MaxSiteCount = int(Value)
+        elif Option == "-v":
+            # -v p-value cutoff (0.01 by default)
+            PTMProcessor.PValueReportCutoff = float(Value)
+            if PTMProcessor.PValueReportCutoff <= 0 or PTMProcessor.PValueReportCutoff > 1:
+                print "** Error: Invalid p-value cutoff '%s'"%Value
+                print UsageInfo
+                sys.exit(1)
+        elif Option == "-w":
+            # -w p-value cutoff for the spectra used to pick a ptm (same as -v by default)
+            PTMProcessor.PValueCutoff = float(Value)
+            if PTMProcessor.PValueCutoff <= 0 or PTMProcessor.PValueCutoff > 1:
+                print "** Error: Invalid p-value cutoff '%s'"%Value
+                print UsageInfo
+                sys.exit(1)
+        elif Option == "-d":
+            # -d database
+            print "Read database:", Value
+            Path = FixupPath(Value)
+            PTMProcessor.ReadDatabase(Path)
+        elif Option == "-s":
+            # -s SummaryDir
+            PTMProcessor.PTMSummaryDir = Value
+        elif Option == "-i":
+            # -i -> generate spectrum images
+            PTMProcessor.GenerateSpectrumImagesFlag = 1
+        elif Option == "-l":
+            # -l -> Maximum number of lines to read in
+            MaxLineCount = int(Value)
+        elif Option == "-m":
+            # -m -> Minimum PTM size (defaults to 2)
+            PTMProcessor.MinimumPTMSize = int(Value)
+        elif Option == "-p":
+            # -p -> Generate PTM frequency matrix
+            PTMProcessor.BuildPTMFrequencyMatrix = 1
+        else:
+            print "Option not understood: '%s' '%s'"%(Option, Value)
+    if not OptionsSeen.get("-r"):
+        print "** Please specify a search results file (-r)"
+        print UsageInfo
+        sys.exit(1)
+    if not OptionsSeen.get("-d"):
+        print "** Please specify a database file (-d)"
+        print UsageInfo
+        sys.exit(1)
+    if not OptionsSeen.get("-w"):
+        PTMProcessor.PValueCutoff = PTMProcessor.PValueReportCutoff
+    # Make necessary directories:
+    try:
+        os.makedirs(PTMProcessor.PTMSummaryDir)
+    except:
+        pass
+    try:
+        Dir = os.path.join(PTMProcessor.PTMSummaryDir, "Images")
+        os.makedirs(Dir)
+    except:
+        pass
+    if PTMProcessor.BuildPTMFrequencyMatrix:
+        PTMProcessor.PerformNonSiteSpecificPTMSelection(ResultsFileName)
+        return
+    PTMProcessor.ReadKnownPTMs()
+    # Read annotations, generate the PTM frequency matrix:
+    print "\n\nRead spectrum annotations:"
+    sys.stdout.flush()
+    PTMProcessor.ReadSpectrumAnnotations(ResultsFileName)
+    # Select sites by 'peak finding' among large matrix entries:
+    print "\n\nSelect sites:"
+    sys.stdout.flush()
+    PTMProcessor.SelectSites()
+    # Re-read annotations, keeping a few in memory:
+    print "\n\nRead PTM witnesses:"
+    sys.stdout.flush()    
+    PTMProcessor.ReadPTMWitnesses(ResultsFileName)
+    # Output our findings:
+    print "\n\nOutput results:"
+    sys.stdout.flush()    
+    PTMProcessor.OutputResults()
+    
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "(psyco optimization system not loaded - running normally)"
+    Main(Processor())
diff --git a/PTMDatabase.txt b/PTMDatabase.txt
new file mode 100644
index 0000000..81e3ee3
--- /dev/null
+++ b/PTMDatabase.txt
@@ -0,0 +1,563 @@
+#Database	ID	Mass	Name	AA	Terminus
+"#UniMOD PTM reference, parsed from http://www.unimod.org on 2/20/6. "					
+UniMOD	1	42.0367	Acetylation		N-term
+UniMOD	1	42.0367	Acetylation	K	
+UniMOD	1	42.0367	Acetylation	C	
+UniMOD	1	42.0367	Acetylation	S	
+UniMOD	2	-0.9848	Amidation		C-term
+UniMOD	3	226.2954	Biotinylation		N-term
+UniMOD	3	226.2954	Biotinylation	K	
+UniMOD	4	57.0513	Iodoacetamide derivative	D	
+UniMOD	4	57.0513	Iodoacetamide derivative	H	
+UniMOD	4	57.0513	Iodoacetamide derivative		N-term
+UniMOD	4	57.0513	Iodoacetamide derivative	K	
+UniMOD	4	57.0513	Iodoacetamide derivative	C	
+UniMOD	4	57.0513	Iodoacetamide derivative	E	
+UniMOD	5	43.0247	Carbamylation	C	
+UniMOD	5	43.0247	Carbamylation	R	
+UniMOD	5	43.0247	Carbamylation		N-term
+UniMOD	5	43.0247	Carbamylation	K	
+UniMOD	6	58.0361	Iodoacetic acid derivative	C	
+UniMOD	7	0.9848	Deamidation	Q	
+UniMOD	7	0.9848	Deamidation	N	
+UniMOD	8	486.6253	Gygi ICAT(TM) d0	C	
+UniMOD	9	494.6746	Gygi ICAT(TM) d8	C	
+UniMOD	10	-30.0922	Homoserine	M	
+UniMOD	11	-48.1075	Homoserine lactone	M	
+UniMOD	12	450.6221	Applied Biosystems original ICAT(TM) d8	C	
+UniMOD	13	442.5728	Applied Biosystems original ICAT(TM) d0	C	
+UniMOD	14	14.0266	Methyl ester	T	
+UniMOD	14	14.0266	Methyl ester	S	
+UniMOD	14	14.0266	Methyl ester	E	
+UniMOD	14	14.0266	Methyl ester	D	
+UniMOD	14	14.0266	Methyl ester		C-term
+UniMOD	15	42.0367	N-Acetylation		N-term
+UniMOD	16	28.0101	N-Formylation		N-term
+UniMOD	17	99.1311	N-isopropylcarboxamidomethyl	C	
+UniMOD	18	1.9998	O18 label		C-term
+UniMOD	19	15.9994	Oxidation	W	
+UniMOD	19	15.9994	Oxidation	H	
+UniMOD	19	15.9994	Oxidation	M	
+UniMOD	20	414.5196	"Biotinyl-iodoacetamidyl-3,6-dioxaoctanediamine"	C	
+UniMOD	21	79.9799	Phosphorylation	Y	
+UniMOD	21	79.9799	Phosphorylation	D	
+UniMOD	21	79.9799	Phosphorylation	C	
+UniMOD	21	79.9799	Phosphorylation	H	
+UniMOD	21	79.9799	Phosphorylation	T	
+UniMOD	21	79.9799	Phosphorylation	S	
+UniMOD	21	79.9799	Phosphorylation	R	
+UniMOD	22	79.9799	Phosphorylation without neutral loss	T	
+UniMOD	22	79.9799	Phosphorylation without neutral loss	S	
+UniMOD	23	-18.0153	Phosphorylation with prompt loss of phosphate	S	
+UniMOD	23	-18.0153	Phosphorylation with prompt loss of phosphate	T	
+UniMOD	23	-18.0153	Phosphorylation with prompt loss of phosphate	Y	
+UniMOD	24	71.0779	Acrylamide adduct	C	
+UniMOD	25	119.1207	pyridylacetyl		N-term
+UniMOD	25	119.1207	pyridylacetyl	K	
+UniMOD	26	-17.0305	S-carbamoylmethylcysteine cyclization (N-terminus)	C	
+UniMOD	27	-18.0153	Pyro-glu from E	E	
+UniMOD	28	-17.0305	Pyro-glu from Q	Q	
+UniMOD	29	127.1412	N-Succinimidyl-3-morpholine acetate		N-term
+UniMOD	29	127.1412	N-Succinimidyl-3-morpholine acetate	K	
+UniMOD	30	21.9818	Sodium adduct	D	
+UniMOD	30	21.9818	Sodium adduct		C-term
+UniMOD	30	21.9818	Sodium adduct	E	
+UniMOD	31	105.1372	S-pyridylethylation	C	
+UniMOD	32	31.9988	Sulphone	M	
+UniMOD	33	0.9848	Citrullination	R	
+UniMOD	34	14.0266	Methylation	I	
+UniMOD	34	14.0266	Methylation	Q	
+UniMOD	34	14.0266	Methylation	R	
+UniMOD	34	14.0266	Methylation		N-term
+UniMOD	34	14.0266	Methylation	N	
+UniMOD	34	14.0266	Methylation	K	
+UniMOD	34	14.0266	Methylation	H	
+UniMOD	34	14.0266	Methylation	C	
+UniMOD	34	14.0266	Methylation	L	
+UniMOD	35	15.9994	Hydroxylation	R	
+UniMOD	35	15.9994	Hydroxylation	Y	
+UniMOD	35	15.9994	Hydroxylation	F	
+UniMOD	35	15.9994	Hydroxylation	P	
+UniMOD	35	15.9994	Hydroxylation	N	
+UniMOD	35	15.9994	Hydroxylation	K	
+UniMOD	35	15.9994	Hydroxylation	D	
+UniMOD	36	28.0532	di-Methylation		N-term
+UniMOD	36	28.0532	di-Methylation	R	
+UniMOD	36	28.0532	di-Methylation	K	
+UniMOD	36	28.0532	di-Methylation	N	
+UniMOD	37	42.0797	tri-Methylation	R	
+UniMOD	37	42.0797	tri-Methylation	K	
+UniMOD	38	44.0095	Gamma-carboxylation	E	
+UniMOD	38	44.0095	Gamma-carboxylation	D	
+UniMOD	39	46.0916	Beta-methylthiolation	D	
+UniMOD	39	46.0916	Beta-methylthiolation	N	
+UniMOD	40	80.0632	O-Sulfonation	Y	
+UniMOD	40	80.0632	O-Sulfonation	T	
+UniMOD	40	80.0632	O-Sulfonation	S	
+UniMOD	41	162.1406	Hexose	R	
+UniMOD	41	162.1406	Hexose	C	
+UniMOD	41	162.1406	Hexose	T	
+UniMOD	41	162.1406	Hexose	W	
+UniMOD	41	162.1406	Hexose	N	
+UniMOD	41	162.1406	Hexose		N-term
+UniMOD	41	162.1406	Hexose	K	
+UniMOD	41	162.1406	Hexose	Y	
+UniMOD	42	188.3103	Lipoyl	K	
+UniMOD	43	203.1925	N-Acetylhexosamine	T	
+UniMOD	43	203.1925	N-Acetylhexosamine	S	
+UniMOD	43	203.1925	N-Acetylhexosamine	N	
+UniMOD	44	204.3511	Farnesylation	C	
+UniMOD	45	210.3556	Myristoylation	K	
+UniMOD	45	210.3556	Myristoylation	G	
+UniMOD	45	210.3556	Myristoylation	C	
+UniMOD	46	229.1266	Pyridoxal phosphate	K	
+UniMOD	47	238.4088	Palmitoylation	T	
+UniMOD	47	238.4088	Palmitoylation	S	
+UniMOD	47	238.4088	Palmitoylation	K	
+UniMOD	47	238.4088	Palmitoylation	C	
+UniMOD	48	272.4681	Geranyl-geranyl	C	
+UniMOD	49	340.333	Phosphopantetheine	S	
+UniMOD	50	783.5339	Flavin adenine dinucleotide	Y	
+UniMOD	50	783.5339	Flavin adenine dinucleotide	H	
+UniMOD	50	783.5339	Flavin adenine dinucleotide	C	
+UniMOD	51	789.3049	N-acyl diglyceride cysteine	C	
+UniMOD	52	42.04	Guanidination	K	
+UniMOD	53	156.2221	4-hydroxynonenal (HNE)	C	
+UniMOD	53	156.2221	4-hydroxynonenal (HNE)	H	
+UniMOD	53	156.2221	4-hydroxynonenal (HNE)	K	
+UniMOD	54	176.1241	N-glucuronylation		N-term
+UniMOD	55	305.3076	glutathione disulfide	C	
+UniMOD	56	45.0552	"Acetate labeling reagent (N-term & K) (heavy form, +3amu)"		N-term
+UniMOD	56	45.0552	"Acetate labeling reagent (N-term & K) (heavy form, +3amu)"	K	
+UniMOD	57	42.0367	Acetate labeling reagent light form (N-term & K)		N-term
+UniMOD	57	42.0367	Acetate labeling reagent light form (N-term & K)	K	
+UniMOD	58	56.0633	Propionate labeling reagent light form (N-term & K)		N-term
+UniMOD	58	56.0633	Propionate labeling reagent light form (N-term & K)	K	
+UniMOD	59	59.0412	"Propionate labeling reagent heavy form (+3amu), N-term&K"		N-term
+UniMOD	59	59.0412	"Propionate labeling reagent heavy form (+3amu), N-term&K"	K	
+UniMOD	60	127.1842	Quaternary amine labeling reagent light form (N-term & K)		N-term
+UniMOD	60	127.1842	Quaternary amine labeling reagent light form (N-term & K)	K	
+UniMOD	61	130.2027	"Quaternary amine labeling reagent heavy (+3amu) form, N-term & K"		N-term
+UniMOD	61	130.2027	"Quaternary amine labeling reagent heavy (+3amu) form, N-term & K"	K	
+UniMOD	62	133.2212	"Quaternary amine labeling reagent heavy form (+6amu), N-term & K"		N-term
+UniMOD	62	133.2212	"Quaternary amine labeling reagent heavy form (+6amu), N-term & K"	K	
+UniMOD	63	136.2397	"Quaternary amine labeling reagent heavy form (+9amu), N-term & K"		N-term
+UniMOD	63	136.2397	"Quaternary amine labeling reagent heavy form (+9amu), N-term & K"	K	
+UniMOD	64	100.0728	Succinic anhydride labeling reagent light form (N-term & K)		N-term
+UniMOD	64	100.0728	Succinic anhydride labeling reagent light form (N-term & K)	K	
+UniMOD	65	104.0974	"Succinic anhydride labeling reagent, heavy form (+4amu, 4H2), N-term & K"		N-term
+UniMOD	65	104.0974	"Succinic anhydride labeling reagent, heavy form (+4amu, 4H2), N-term & K"	K	
+UniMOD	66	104.0434	"Succinic anhydride labeling reagent, heavy form (+4amu, 4C13), N-term & K"		N-term
+UniMOD	66	104.0434	"Succinic anhydride labeling reagent, heavy form (+4amu, 4C13), N-term & K"	K	
+UniMOD	89	225.3106	Iminobiotinylation		N-term
+UniMOD	89	225.3106	Iminobiotinylation	K	
+UniMOD	90	338.4682	ESP-Tag light d0		N-term
+UniMOD	90	338.4682	ESP-Tag light d0	K	
+UniMOD	91	348.5299	ESP-Tag heavy d10		N-term
+UniMOD	91	348.5299	ESP-Tag heavy d10	K	
+UniMOD	92	339.453	NHS-LC-Biotin		N-term
+UniMOD	92	339.453	NHS-LC-Biotin	K	
+UniMOD	93	601.8021	EDT-maleimide-PEO-biotin	T	
+UniMOD	93	601.8021	EDT-maleimide-PEO-biotin	S	
+UniMOD	94	68.0773	IMID d0	K	
+UniMOD	95	72.1019	IMID d4	K	
+UniMOD	97	74.0964	Acrylamide d3	C	
+UniMOD	105	227.2603	Applied Biosystems cleavable ICAT(TM) light	C	
+UniMOD	106	236.1942	Applied Biosystems cleavable ICAT(TM) heavy	C	
+UniMOD	107	160.2141	Addition of N-formyl met		N-term
+UniMOD	108	125.1253	N-ethylmaleimide on cysteines	C	
+UniMOD	112	354.4676	"Oxidized lysine biotinylated with biotin-LC-hydrazide, reduced"	K	
+UniMOD	113	352.4518	Oxidized lysine biotinylated with biotin-LC-hydrazide	K	
+UniMOD	114	371.4982	"Oxidized proline biotinylated with biotin-LC-hydrazide, reduced"	P	
+UniMOD	115	369.4823	Oxidized Proline biotinylated with biotin-LC-hydrazide	P	
+UniMOD	116	310.4118	Oxidized arginine biotinylated with biotin-LC-hydrazide	R	
+UniMOD	117	312.4277	"Oxidized arginine biotinylated with biotin-LC-hydrazide, reduced"	R	
+UniMOD	118	490.7034	EDT-iodo-PEO-biotin	T	
+UniMOD	118	490.7034	EDT-iodo-PEO-biotin	S	
+UniMOD	119	316.3759	Thio Ether Formation - BTP Adduct	C	
+UniMOD	121	114.1026	ubiquitinylation residue	K	
+UniMOD	122	28.0101	Formylation		N-term
+UniMOD	122	28.0101	Formylation	T	
+UniMOD	122	28.0101	Formylation	S	
+UniMOD	122	28.0101	Formylation	K	
+UniMOD	123	345.7754	"N-iodoacetyl, p-chlorobenzyl-12C6-glucamine"	C	
+UniMOD	124	351.7313	"N-iodoacetyl, p-chlorobenzyl-13C6-glucamine"	C	
+UniMOD	125	32.0778	reductive amination-D		N-term
+UniMOD	125	32.0778	reductive amination-D	K	
+UniMOD	126	88.1283	thioacylation of primary amines (N-term and Lys)		N-term
+UniMOD	126	88.1283	thioacylation of primary amines (N-term and Lys)	K	
+UniMOD	127	17.9905	fluorophenylalanine replacement of phenylalanine	F	
+UniMOD	128	388.3497	"5-Iodoacetamidofluorescein (Molecular Probe, Eugene, OR)"	C	
+UniMOD	129	125.8965	Iodination	H	
+UniMOD	129	125.8965	Iodination	Y	
+UniMOD	130	251.7931	di-Iodination	Y	
+UniMOD	131	377.6896	tri-Iodination	Y	
+UniMOD	134	208.3398	(cis-delta 5)-tetradecaenoyl	G	
+UniMOD	135	206.3239	"(cis,cis-delta 5, delta 8)-tetradecadienoyl"	G	
+UniMOD	136	104.1061	labeling reagent light form (N-term & K)		N-term
+UniMOD	136	104.1061	labeling reagent light form (N-term & K)	K	
+UniMOD	137	1217.088	N-linked glycan core	N	
+UniMOD	139	233.2862	5-dimethylaminonaphthalene-1-sulfonyl	K	
+UniMOD	139	233.2862	5-dimethylaminonaphthalene-1-sulfonyl		N-term
+UniMOD	140	-29.018	ISD a-series (C-Term)		C-term
+UniMOD	141	41.0519	amidination of lysines or N-terminal amines with methyl acetimidate	K	
+UniMOD	141	41.0519	amidination of lysines or N-terminal amines with methyl acetimidate		N-term
+UniMOD	142	349.3337	HexNAc1dHex1	N	
+UniMOD	143	406.385	HexNAc2	N	
+UniMOD	144	486.4218	Hex3	N	
+UniMOD	145	495.4749	HexNAc1dHex2	N	
+UniMOD	146	511.4743	Hex1HexNAc1dHex1	N
+UniMOD	147	552.5262	HexNAc2dHex1	N
+UniMOD	148	568.5256	Hex1HexNAc2	N
+UniMOD	149	656.5877	Hex1HexNAc1NeuAc1	N
+UniMOD	150	698.6674	HexNAc2dHex2	N
+UniMOD	151	700.6403	Hex1HexNAc2Pent1	N
+UniMOD	152	714.6668	Hex1HexNAc2dHex1	N
+UniMOD	153	730.6662	Hex2HexNAc2	N
+UniMOD	154	821.7289	Hex3HexNAc1Pent1	N
+UniMOD	155	846.7815	Hex1HexNAc2dHex1Pent1	N
+UniMOD	156	860.808	Hex1HexNAc2dHex2	N
+UniMOD	157	862.7809	Hex2HexNAc2Pent1	N
+UniMOD	158	876.8074	Hex2HexNAc2dHex1	N
+UniMOD	159	892.8068	Hex3HexNAc2	N
+UniMOD	160	947.8423	Hex1HexNAc1NeuAc2	N
+UniMOD	161	923.7806	Hex3HexNAc2P1	N
+UniMOD	162	46.895	Selenium replaces sulphur in Methionine	M
+UniMOD	170	3.0077	glycosylated asparagine 18O labeling	N
+UniMOD	171	159.1144	Shimadzu 13CNBS	W
+UniMOD	172	153.1585	Shimadzu 12CNBS	W
+UniMOD	176	218.3346	Michael addition of BHT quinone methide to Cysteine and Lysine	H
+UniMOD	176	218.3346	Michael addition of BHT quinone methide to Cysteine and Lysine	C
+UniMOD	176	218.3346	Michael addition of BHT quinone methide to Cysteine and Lysine	K
+UniMOD	178	87.1866	phosphorylation to amine thiol	T
+UniMOD	178	87.1866	phosphorylation to amine thiol	S
+UniMOD	179	-15.9994	Serine to Alanine	S
+UniMOD	182	-15.9994	Threonine to a-aminobutyrate	T
+UniMOD	184	8.9339	C13 label	Y
+UniMOD	185	88.9138	C13 label (Phosphotyrosine)	Y
+UniMOD	186	132.1162	Hydroxyphenylglyoxal arginine	R
+UniMOD	187	282.2476	2 Hydroxyphenylglyoxal arginine	R
+UniMOD	188	5.9559	C13 label	F
+UniMOD	188	5.9559	C13 label	L	
+UniMOD	188	5.9559	C13 label	K	
+UniMOD	188	5.9559	C13 label	R	
+UniMOD	193	3.9995	O18 label at both C-terminal oxygens		C-term
+UniMOD	194	170.1674	6-aminoquinolyl-N-hydroxysuccinimidyl carbamate		N-term
+UniMOD	194	170.1674	6-aminoquinolyl-N-hydroxysuccinimidyl carbamate	K	
+UniMOD	195	170.252	APTA-d0	C	
+UniMOD	196	174.2784	APTA d3	C	
+UniMOD	197	184.2786	EAPTA d0	C	
+UniMOD	198	189.3094	EAPTA d5	C	
+UniMOD	199	32.0778	DiMethyl-CHD2		N-term
+UniMOD	199	32.0778	DiMethyl-CHD2	K	
+UniMOD	200	76.1838	EDT	T	
+UniMOD	200	76.1838	EDT	S	
+UniMOD	202	170.252	APTA- d0 with no neutral loss	C	
+UniMOD	203	170.252	APTA-d0 with quaternary amine loss	C	
+UniMOD	205	94.1112	Acrolein addition +94	K
+UniMOD	206	56.0633	Acrolein addition +56	K
+UniMOD	206	56.0633	Acrolein addition +56	H
+UniMOD	206	56.0633	Acrolein addition +56	C
+UniMOD	207	38.048	Acrolein addition +38	K
+UniMOD	208	76.096	Acrolein addition +76	K
+UniMOD	209	112.1265	Acrolein addition +112	K
+UniMOD	211	85.1045	N-ethyl iodoacetamide-d0	Y
+UniMOD	211	85.1045	N-ethyl iodoacetamide-d0	C
+UniMOD	212	90.1353	N-ethyl iodoacetamide-d5	Y
+UniMOD	212	90.1353	N-ethyl iodoacetamide-d5	C
+UniMOD	213	541.3005	ADP  Ribose addition	S
+UniMOD	213	541.3005	ADP  Ribose addition	N
+UniMOD	213	541.3005	ADP  Ribose addition	C
+UniMOD	213	541.3005	ADP  Ribose addition	R
+UniMOD	213	541.3005	ADP  Ribose addition	E
+UniMOD	214	144.1544	Applied Biosystems iTRAQ(TM) multiplexed quantitation chemistry	Y	
+UniMOD	214	144.1544	Applied Biosystems iTRAQ(TM) multiplexed quantitation chemistry		N-term
+UniMOD	214	144.1544	Applied Biosystems iTRAQ(TM) multiplexed quantitation chemistry	K	
+UniMOD	215	0.9848	deglycosylated asparagine	N	
+UniMOD	243	297.1478	label Cysteine with IGBP reagent	C	
+UniMOD	253	70.0898	Crotonaldehyde	K	
+UniMOD	253	70.0898	Crotonaldehyde	H	
+UniMOD	253	70.0898	Crotonaldehyde	C	
+UniMOD	254	26.0373	Acetaldehyde +26	K	
+UniMOD	254	26.0373	Acetaldehyde +26	H	
+UniMOD	255	28.0532	Acetaldehyde +28	K	
+UniMOD	255	28.0532	Acetaldehyde +28	H	
+UniMOD	256	40.0639	Propionaldehyde +40	K	
+UniMOD	256	40.0639	Propionaldehyde +40	H	
+UniMOD	258	1.9998	"O18 Labeling of Serine, Threonine or Tyrosine"	Y	
+UniMOD	258	1.9998	"O18 Labeling of Serine, Threonine or Tyrosine"	T	
+UniMOD	258	1.9998	"O18 Labeling of Serine, Threonine or Tyrosine"	S	
+UniMOD	259	7.9427	C13 and N15 label	K	
+UniMOD	260	96.0455	Thiophosphorylation	Y	
+UniMOD	260	96.0455	Thiophosphorylation	T	
+UniMOD	260	96.0455	Thiophosphorylation	S	
+UniMOD	261	215.2495	4-sulfophenyl isothiocyanate	K	
+UniMOD	261	215.2495	4-sulfophenyl isothiocyanate		N-term
+UniMOD	262	3.0185	Trideuteration	L	
+UniMOD	264	121.2028	phosphorylation to pyridyl thiol	T	
+UniMOD	264	121.2028	phosphorylation to pyridyl thiol	S	
+UniMOD	267	9.9296	C13 and N15 label	R	
+UniMOD	268	5.9567	C13 and N15 label	V	
+UniMOD	269	9.9273	C13 and N15 label	F	
+UniMOD	270	362.3738	nucleophilic addtion to cytopiloyne	Y	
+UniMOD	270	362.3738	nucleophilic addtion to cytopiloyne	S	
+UniMOD	270	362.3738	nucleophilic addtion to cytopiloyne	R	
+UniMOD	270	362.3738	nucleophilic addtion to cytopiloyne	P	
+UniMOD	270	362.3738	nucleophilic addtion to cytopiloyne		N-term
+UniMOD	270	362.3738	nucleophilic addtion to cytopiloyne	K	
+UniMOD	270	362.3738	nucleophilic addtion to cytopiloyne	C	
+UniMOD	271	380.3891	nucleophilic addition to cytopiloyne+H2O	Y	
+UniMOD	271	380.3891	nucleophilic addition to cytopiloyne+H2O	T	
+UniMOD	271	380.3891	nucleophilic addition to cytopiloyne+H2O	S	
+UniMOD	271	380.3891	nucleophilic addition to cytopiloyne+H2O	R	
+UniMOD	271	380.3891	nucleophilic addition to cytopiloyne+H2O		N-term
+UniMOD	271	380.3891	nucleophilic addition to cytopiloyne+H2O	K	
+UniMOD	271	380.3891	nucleophilic addition to cytopiloyne+H2O	C	
+UniMOD	272	136.1265	sulfonation of N-terminus		N-term
+UniMOD	273	253.2512	covalent modification of lysine by cross-linking reagent	K	
+UniMOD	275	28.9982	S-nitrosylation	C	
+UniMOD	276	183.2276	Aminoethylbenzenesulfonylation		N-term
+UniMOD	276	183.2276	Aminoethylbenzenesulfonylation	K	
+UniMOD	276	183.2276	Aminoethylbenzenesulfonylation	S	
+UniMOD	276	183.2276	Aminoethylbenzenesulfonylation	H	
+UniMOD	276	183.2276	Aminoethylbenzenesulfonylation	Y	
+UniMOD	277	46.0916	Methyl methanethiosulfonate	C	
+UniMOD	278	44.0526	Ethanolation of Cys	C	
+UniMOD	279	15.9994	Cysteine sulfenic acid	C	
+UniMOD	280	28.0532	Ethylation		N-term
+UniMOD	280	28.0532	Ethylation	K	
+UniMOD	280	28.0532	Ethylation	E	
+UniMOD	281	765.5182	Cysteine modified Coenzyme A	C	
+UniMOD	282	14.0266	N-methylation		N-term
+UniMOD	283	28.0532	N-ethylation		N-term
+UniMOD	284	16.0389	Deuterium Methylation of Lysine	K	
+UniMOD	285	155.1744	Light Sulfanilic Acid (SA) C12	E	
+UniMOD	285	155.1744	Light Sulfanilic Acid (SA) C12	D	
+UniMOD	285	155.1744	Light Sulfanilic Acid (SA) C12		C-term
+UniMOD	286	161.1303	Heavy Sulfanilic Acid (SA) C13	E	
+UniMOD	286	161.1303	Heavy Sulfanilic Acid (SA) C13	D	
+UniMOD	286	161.1303	Heavy Sulfanilic Acid (SA) C13		C-term
+UniMOD	288	13.9835	Tryptophan oxidation to oxolactone	W	
+UniMOD	289	356.4835	Biotin polyethyleneoxide amine		C-term
+UniMOD	289	356.4835	Biotin polyethyleneoxide amine	D	
+UniMOD	289	356.4835	Biotin polyethyleneoxide amine	E	
+UniMOD	290	428.6124	Pierce EZ-Link Biotin-HPDP	C	
+UniMOD	291	200.59	Mercury Mercaptan	C	
+UniMOD	292	322.1654	"Cross-link of (Iodo)-uracil MP with W,F,Y"	W	
+UniMOD	292	322.1654	"Cross-link of (Iodo)-uracil MP with W,F,Y"	Y	
+UniMOD	292	322.1654	"Cross-link of (Iodo)-uracil MP with W,F,Y"	F	
+UniMOD	293	145.1796	3-(carbamidomethylthio)propanoyl	K	
+UniMOD	293	145.1796	3-(carbamidomethylthio)propanoyl		N-term
+UniMOD	294	326.4145	biotinoyl-iodoacetyl-ethylenediamine	C	
+UniMOD	295	146.1412	Fucose	T	
+UniMOD	295	146.1412	Fucose	S	
+UniMOD	298	17.0451	deuterated methyl ester	E	
+UniMOD	298	17.0451	deuterated methyl ester	D	
+UniMOD	298	17.0451	deuterated methyl ester		C-term
+UniMOD	299	44.0095	Carboxylation	K	
+UniMOD	299	44.0095	Carboxylation	W	
+UniMOD	299	44.0095	Carboxylation	D	
+UniMOD	300	58.0361	Hydroxylethanone	W	
+UniMOD	301	190.1986	Monobromobimane derivative	C	
+UniMOD	302	170.1641	Menadione derivative	K	
+UniMOD	302	170.1641	Menadione derivative	C	
+UniMOD	303	76.1176	Cysteine mercaptoethanol	C	
+UniMOD	305	1445.3331	Fucosylated biantennary (-2 galactose)	N	
+UniMOD	306	80.0632	Sulfitolysis	C	
+UniMOD	307	1607.4737	Fucosylated biantennary (-1 galactose)	N	
+UniMOD	308	1769.6143	Fucosylated biantennary	N	
+UniMOD	309	1299.1919	Biantennary (-2 galactose)	N
+UniMOD	310	1461.3325	Biantennary (-1 galactose)	N
+UniMOD	311	1623.4731	Biantennary	N
+UniMOD	312	120.1502	Cysteinylation	C
+UniMOD	313	-128.1723	C terminal -K from HC of MAb	K
+UniMOD	314	111.0987	N-methylmaleimide	C
+UniMOD	316	78.1118	"2,5-dimethypyrrole"	K
+UniMOD	317	-18.0153	D-Succinimide	D
+UniMOD	318	62.0694	MDA adduct +62	K
+UniMOD	319	54.0474	MDA adduct +54	K
+UniMOD	320	143.1406	N-ethylmaleimide hydrolysis	C
+UniMOD	321	-17.0073	N-Succinimide	N
+UniMOD	323	713.5626	bis-N-I-sulfonerahodamine	C
+UniMOD	324	87.1435	"dimethyl 3,3'-dithiobispropionimidate"	R
+UniMOD	324	87.1435	"dimethyl 3,3'-dithiobispropionimidate"	Q
+UniMOD	324	87.1435	"dimethyl 3,3'-dithiobispropionimidate"	N
+UniMOD	324	87.1435	"dimethyl 3,3'-dithiobispropionimidate"	K	
+UniMOD	324	87.1435	"dimethyl 3,3'-dithiobispropionimidate"		N-term
+UniMOD	325	573.7485	10-ethoxyphosphinyl-N-(biotinamidopentyl)decanamide	T	
+UniMOD	325	573.7485	10-ethoxyphosphinyl-N-(biotinamidopentyl)decanamide	Y	
+UniMOD	325	573.7485	10-ethoxyphosphinyl-N-(biotinamidopentyl)decanamide	S	
+UniMOD	327	44.1188	S-Ethylcystine from Serine	S	
+UniMOD	329	18.0377	monomethylated arginine	R	
+UniMOD	330	36.0754	dimethylated arginine	R	
+UniMOD	332	525.6658	thiophosphate labeled with biotin-HPDP	Y	
+UniMOD	332	525.6658	thiophosphate labeled with biotin-HPDP	T	
+UniMOD	332	525.6658	thiophosphate labeled with biotin-HPDP	S	
+UniMOD	333	448.5371	6-N-biotinylaminohexyl isopropyl phosphate	S	
+UniMOD	333	448.5371	6-N-biotinylaminohexyl isopropyl phosphate	Y	
+UniMOD	333	448.5371	6-N-biotinylaminohexyl isopropyl phosphate	T	
+UniMOD	334	146.1875	CAMthiopropanoyl of Lys	K	
+UniMOD	335	158.238	reduced 4-Hydroxynonenal	K	
+UniMOD	335	158.238	reduced 4-Hydroxynonenal	H
+UniMOD	335	158.238	reduced 4-Hydroxynonenal	C
+UniMOD	337	13.0418	Michael addition with methylamine	S
+UniMOD	337	13.0418	Michael addition with methylamine	T
+UniMOD	340	78.8961	bromination	F
+UniMOD	340	78.8961	bromination	H
+UniMOD	340	78.8961	bromination	W
+UniMOD	341	-2.0159	threonine oxidation to 2-amino-3-oxo-butanoic acid	T
+UniMOD	342	15.0146	Tyrosine oxidation to 2-aminotyrosine	Y
+UniMOD	343	199.27	oxidized Arginine biotinylated with biotin hydrazide	R
+UniMOD	344	-43.0711	Arginine oxidation to glutamic semialdehyde	R
+UniMOD	345	47.9982	cysteine oxidation to cysteic acid	C
+UniMOD	346	31.9988	phenylalanine oxidation to dihydroxyphenylalanine	F
+UniMOD	347	31.9988	tryptophan oxidation to formylkynurenin	W
+UniMOD	348	-23.0366	histidine oxidation to aspargine	H
+UniMOD	349	-22.0519	histidine oxidation to aspartic acid	H
+UniMOD	350	19.9881	tryptophan oxidation to hydroxykynurenin	W	
+UniMOD	351	3.9887	tryptophan oxidation to kynurenin	W	
+UniMOD	352	-1.0311	Lysine oxidation to aminoadipic semialdehyde	K	
+UniMOD	353	241.31	oxidized Lysine biotinylated with biotin hydrazide	K	
+UniMOD	354	44.9976	Oxidation to nitro	Y	
+UniMOD	354	44.9976	Oxidation to nitro	W	
+UniMOD	357	258.3405	oxidized proline biotinylated with biotin hydrazide	P	
+UniMOD	358	15.9994	Proline oxidation to glutamic semialdehyde	P	
+UniMOD	359	13.9835	proline oxidation to pyroglutamic acid	P	
+UniMOD	360	-30.026	Proline oxidation to pyrrolidinone	P	
+UniMOD	361	240.3252	oxidized Threonine biotinylated with biotin hydrazide	T	
+UniMOD	362	164.1394	Diisopropylphosphate	S	
+UniMOD	362	164.1394	Diisopropylphosphate	Y	
+UniMOD	363	122.0596	monoisopropyl phosphate	Y	
+UniMOD	363	122.0596	monoisopropyl phosphate	S	
+UniMOD	364	111.05	"Bruker Daltonics SERVA-ICPL(TM) quantification chemistry, heavy form"		N-term
+UniMOD	364	111.05	"Bruker Daltonics SERVA-ICPL(TM) quantification chemistry, heavy form"	K	
+UniMOD	365	105.0941	"Bruker Daltonics SERVA-ICPL(TM) quantification chemistry, light form"		N-term
+UniMOD	365	105.0941	"Bruker Daltonics SERVA-ICPL(TM) quantification chemistry, light form"	K	
+UniMOD	366	2.9845	Deamidation in presence of O18	Q	
+UniMOD	366	2.9845	Deamidation in presence of O18	N	
+UniMOD	367	-43.0711	Arginine oxidation to gamma-glutamyl semialdehyde	R	
+UniMOD	368	-34.0809	Dehydroalanine (from Cysteine)	C	
+UniMOD	369	-28.0101	Pyrrolidone from Proline	P	
+UniMOD	371	86.0892	Michael addition of hydroxymethylvinyl ketone to cysteine	C	
+UniMOD	372	-42.04	Ornithine from Arginine	R	
+UniMOD	373	46.895	Selenium replaces sulphur in cysteine	C	
+UniMOD	374	-1.0079	Half of a disulfide bridge	C	
+UniMOD	375	143.2068	Diphthamide	H	
+UniMOD	376	220.3505	hydroxyfarnesyl	C	
+UniMOD	377	576.9334	diacylglycerol	C	
+UniMOD	378	72.0627	carboxyethyl	K	
+UniMOD	379	87.1204	hypusine	K	
+UniMOD	380	266.4204	retinal	K	
+UniMOD	381	14.9683	alpha-amino adipic acid	K	
+UniMOD	382	-33.0961	pyruvic acid from N-term cys	C	
+UniMOD	383	-17.0305	pyruvic acid from N-term ser	S	
+UniMOD	384	0.9848	phenyllactyl from N-term Phe	F	N-term
+UniMOD	385	-17.0305	oxobutanoic acid from N term Thr	T	N-term
+UniMOD	386	100.0728	succinylated N-term Trp	W	N-term
+UniMOD	387	586.678	phycocyanobilin	C	
+UniMOD	388	588.6939	phycoerythrobilin	C	
+UniMOD	389	584.6621	phytochromobilin	C	
+UniMOD	390	616.4873	heme	H	
+UniMOD	390	616.4873	heme	C	
+UniMOD	391	520.2668	molybdopterin	C	
+UniMOD	392	29.9829	quinone	W	
+UniMOD	392	29.9829	quinone	Y	
+UniMOD	393	340.2806	glucosylgalactosyl hydroxylysine	K	
+UniMOD	394	123.0477	glycosylphosphatidylinositol		C-term
+UniMOD	395	881.6335	phosphoribosyl dephospho-coenzyme A	S	
+UniMOD	396	197.1262	glycerylphosphorylethanolamine	E	
+UniMOD	397	469.7849	triiodo	Y	
+UniMOD	398	595.6815	tetraiodo	Y	
+UniMOD	399	-18.0153	Dehydro	S	
+UniMOD	399	-18.0153	Dehydro	T	
+UniMOD	400	-94.1112	Dehydroalanine (from Tyrosine)	Y	
+UniMOD	401	-2.0159	didehydro	S	
+UniMOD	401	-2.0159	didehydro	Y	
+UniMOD	402	-18.0815	oxoalanine	C	
+UniMOD	403	-15.0146	lactic acid from N-term Ser		N-term
+UniMOD	405	329.2059	AMP binding site	T	
+UniMOD	405	329.2059	AMP binding site	K	
+UniMOD	405	329.2059	AMP binding site	Y	
+UniMOD	405	329.2059	AMP binding site	H	
+UniMOD	407	146.1427	hydroxycinnamyl	C	
+UniMOD	408	148.114	glycosyl-L-hydroxyproline	P	
+UniMOD	409	454.3279	flavin mononucleotide	H	
+UniMOD	409	454.3279	flavin mononucleotide	C	
+UniMOD	410	635.1417	S-diphytanylglycerol diether	C	
+UniMOD	411	119.1207	phenyl isocyanate		N-term
+UniMOD	412	124.1515	d5-phenyl isocyanate		N-term
+UniMOD	413	345.2053	phospho-guanosine	K	
+UniMOD	413	345.2053	phospho-guanosine	H	
+UniMOD	414	30.026	hydroxymethyl	N	
+UniMOD	415	1618.9096	L-selenocysteinyl molybdenum bis(molybdopterin guanine dinucleotide)	C	
+UniMOD	416	418.3973	dipyrrolylmethanemethyl	C	
+UniMOD	417	306.166	uridine phosphodiester	Y	
+UniMOD	417	306.166	uridine phosphodiester	H	
+UniMOD	418	31.9988	trihydroxyphenylalanine	Y	
+UniMOD	419	154.0584	glycerophospho	S
+UniMOD	420	16.0656	thiocarboxylic acid	G
+UniMOD	421	32.065	persulfide	C
+UniMOD	422	70.0468	N-pyruvic acid 2-iminyl	V
+UniMOD	422	70.0468	N-pyruvic acid 2-iminyl	C
+UniMOD	422	70.0468	N-pyruvic acid 2-iminyl	K
+UniMOD	423	78.96	selenyl	C
+UniMOD	424	1572.0146	molybdenum bis(molybdopterin guanine dinucleotide)	C
+UniMOD	424	1572.0146	molybdenum bis(molybdopterin guanine dinucleotide)	D
+UniMOD	425	31.9988	dihydroxy	R
+UniMOD	425	31.9988	dihydroxy	P
+UniMOD	425	31.9988	dihydroxy	K
+UniMOD	426	126.1962	octanoyl	S
+UniMOD	426	126.1962	octanoyl	T
+UniMOD	427	176.1241	glucuronosyl	S
+UniMOD	428	283.1724	N-acetylglucosamine-1-phosphoryl	S
+UniMOD	429	242.1205	phosphoglycosyl-D-mannose-1-phosphoryl	S	
+UniMOD	430	-18.0153	C-term blocking imide		C-term
+UniMOD	431	236.3929	palmitoleyl	C	
+UniMOD	432	368.6383	cholesterol ester		C-term
+UniMOD	433	264.4046	"3,4-didehydroretinylidene"	K	
+UniMOD	434	294.3859	"cis-14-hydroxy-10,13-dioxo-7-heptadecenoic ester"	D	
+UniMOD	435	109.1259	4-methyl-delta-1-pyrroline-5-carboxyl	K	
+UniMOD	436	614.4714	hydroxyheme	E	
+UniMOD	437	386.3003	(3-aminopropyl)(L-aspartyl-1-amino)phosphoryl-5-adenosine		C-term
+UniMOD	438	25.0095	cyano	C	
+UniMOD	439	342.876	hydrogenase diiron subcluster	C	
+UniMOD	440	42.04	amidino	C	
+UniMOD	441	238.4088	N-palmitoyl		N-term
+UniMOD	442	438.3285	O3-(riboflavin phosphoryl)	T	
+UniMOD	442	438.3285	O3-(riboflavin phosphoryl)	S	
+UniMOD	443	456.3438	S-(4a-FMN)	C	
+UniMOD	444	922.067	copper sulfido molybdopterin cytosine dinuncleotide	C	
+UniMOD	445	59.0871	"5-hydroxy-N6,N6,N6-trimethyl"	K	
+UniMOD	446	44.0095	N-carboxylation of Met		N-term
+UniMOD	447	-15.9994	reduction	D	
+UniMOD	448	831.6871	microcin E492 siderophore ester from serine		C-term
+UniMOD	449	154.2493	lipid	S	
+UniMOD	449	154.2493	lipid	T	
+UniMOD	450	129.114	monoglutamyl	E	
+UniMOD	450	129.114	monoglutamyl		C-term
+UniMOD	451	258.228	diglutamyl		C-term
+UniMOD	451	258.228	diglutamyl	E	
+UniMOD	452	387.3419	triglutamyl		C-term
+UniMOD	452	387.3419	triglutamyl	E	
+UniMOD	453	516.4559	tetraglutamyl		C-term
+UniMOD	453	516.4559	tetraglutamyl	E	
+UniMOD	454	161.1558	Hexosamine	W	
+UniMOD	454	161.1558	Hexosamine	T	
+UniMOD	454	161.1558	Hexosamine	N	
+UniMOD	454	161.1558	Hexosamine	K	
+UniMOD	455	154.2096	"One end of crosslink attached, one end free"		N-term
+UniMOD	455	154.2096	"One end of crosslink attached, one end free"	K	
+UniMOD	456	122.1677	Both ends of crosslink attached to same peptide		N-term
+UniMOD	456	122.1677	Both ends of crosslink attached to same peptide	K	
+UniMOD	457	175.1855	"naphthalene-2,3-dicarboxaldehyde"		N-term
+UniMOD	457	175.1855	"naphthalene-2,3-dicarboxaldehyde"	K	
+UniMOD	464	221.2054	4-sulfophenyl isothiocyanate (Heavy C13)		N-term
+UniMOD	464	221.2054	4-sulfophenyl isothiocyanate (Heavy C13)	K	
+UniMOD	465	32.0778	N-reductive amination-D		N-term
+UniMOD	472	59.1334	aminoethylcysteine	S	
+UniMOD	472	59.1334	aminoethylcysteine	T	
+UniMOD	475	136.1265	Sulfonation of Lysine	K	
+UniMOD	476	128.1922	4-trimethyllammoniumbutyryl-		N-term
+UniMOD	476	128.1922	4-trimethyllammoniumbutyryl-	K	
+UniMOD	477	137.2476	d9-4-trimethyllammoniumbutyryl-	K	
+UniMOD	477	137.2476	d9-4-trimethyllammoniumbutyryl-		N-term
diff --git a/PTMSearchBigDB.py b/PTMSearchBigDB.py
new file mode 100644
index 0000000..b3a0adb
--- /dev/null
+++ b/PTMSearchBigDB.py
@@ -0,0 +1,171 @@
+#Title:          PTMSearchBigDB.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Context:
+  We've performed an unrestrictive search of many spectra against a small database.
+  We've found a collection of modified peptides, after taking a p-value threshold at
+  the spectrum level.  We want to better distinguish between VALID and INVALID annotations.
+
+Plan:
+  Let's take each modified peptide and search its consensus spectrum against
+  a large database (Swiss-Prot).  The resulting delta-score should be an informative feature
+  when in comes to distinguishing beween the VALID and INVALID annotations.  The delta-score
+  will be zero (actually, slightly negative) if the consensus spectrum matches an unmodified
+  peptide (e.g. an unanticipated contaminant).
+"""
+
+import os
+import sys
+import string
+import getopt
+import MSSpectrum
+from Utils import *
+Initialize()
+from TrainPTMFeatures import FormatBits
+
+class PeptideFeatureBag:
+    pass
+
+class PTMSearcher:
+    def __init__(self):
+        self.HeaderLines = []
+        self.ConsensusSpectrumDir = "ptmscore\\LensLTQ-99-5\\spectra"
+        self.PeptideFeatureFileName = "PTMScore\\LensLTQ-99-5.txt"
+        self.FixedFeatureFileName = None
+        self.ModifiedPeptides = []
+        self.InspectOut = None        
+    def ParsePeptideFeatureFile(self):
+        """
+        Parse the contents of the peptide feature-file.  We need to know the
+        path to the consensus spectrum file, the consensus annotation MQScore,
+        and the index.
+        """
+        File = open(self.PeptideFeatureFileName, "rb")
+        LineNumber = 0
+        for FileLine in File.xreadlines():
+            LineNumber +=1
+            if FileLine[0] == "#":
+                self.HeaderLines.append(FileLine)
+                continue
+            Bits = list(FileLine.replace("\r", "").replace("\n", "").split("\t"))
+            try:
+                ConsensusMQScore = float(Bits[FormatBits.ConsensusMQScore])
+            except:
+                print "** Error: Can't parse consensus MQScore from line %s!"%LineNumber
+                print Bits
+                continue
+            PeptideFeatures = PeptideFeatureBag()
+            PeptideFeatures.Bits = Bits
+            PeptideFeatures.ConsensusMQScore = ConsensusMQScore
+            NiceAnnotation = Bits[FormatBits.Peptide].replace("*", "-")
+            PeptideFeatures.Bits[FormatBits.Peptide] = NiceAnnotation
+            FirstResidue = NiceAnnotation[2]
+            Charge = Bits[FormatBits.Charge]
+            PeptideFeatures.SpectrumPath = os.path.join(self.ConsensusSpectrumDir, FirstResidue, "%s.%s.dta"%(NiceAnnotation, Charge))
+            self.ModifiedPeptides.append(PeptideFeatures)
+        File.close()
+        print "Parsed %s modified peptides from %s file lines."%(len(self.ModifiedPeptides), LineNumber)
+    def ComputeDeltaScoreFeatureFile(self, FileName):
+        File = open(FileName, "rb")
+        OldSpectrum = None
+        for FileLine in File.xreadlines():
+            if FileLine[0] == "#":
+                continue
+            Bits = FileLine.split("\t")
+            Spectrum = (Bits[0], Bits[1])
+            if Spectrum == OldSpectrum:
+                continue
+            OldSpectrum = Spectrum
+            MQScore = float(Bits[5])
+            ScanNumber = int(Bits[1])
+            PeptideFeatures = self.ModifiedPeptides[ScanNumber]
+            while len(PeptideFeatures.Bits) <= FormatBits.ConsensusDeltaBigDB:
+                  PeptideFeatures.Bits.append("")
+            PeptideFeatures.Bits[FormatBits.BigDBAnnotation] = Bits[2]
+            PeptideFeatures.Bits[FormatBits.BigDBMQScore] = Bits[5]
+            DeltaScore = float(PeptideFeatures.ConsensusMQScore - MQScore)
+            PeptideFeatures.Bits[FormatBits.ConsensusDeltaBigDB] = str(DeltaScore)
+        File.close()
+    def ComputeDeltaScoreFeature(self):
+        """
+        Parse annotations from the inspect search.  Tweak the corresponding modified-peptides
+        to know their modless-annotation and delta-score.
+        """
+        # Iterate over just one result file, or a directory full of results-files:
+        if os.path.isdir(self.InspectOut):
+            for FileName in os.listdir(self.InspectOut):
+                Path = os.path.join(self.InspectOut, FileName)
+                self.ComputeDeltaScoreFeatureFile(Path)
+        else:
+            self.ComputeDeltaScoreFeatureFile(self.InspectOut)
+        # Write out the fixed feature-rows:
+        File = open(self.FixedFeatureFileName, "wb")
+        for HeaderLine in self.HeaderLines:
+            File.write(HeaderLine)
+        for Peptide in self.ModifiedPeptides:
+            FileLine = string.join(Peptide.Bits, "\t")
+            File.write(FileLine + "\n")
+        File.close()
+    def Main(self):
+        self.ParsePeptideFeatureFile()
+        self.ComputeDeltaScoreFeature()
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "d:w:r:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-d":
+                self.PeptideFeatureDir = Value
+            elif Option == "-w":
+                self.FixedFeatureFileName = Value
+            elif Option == "-r":
+                self.InspectOut = Value
+        if not self.PeptideFeatureDir:
+            print UsageInfo
+            sys.exit(-1)
+        self.PeptideFeatureFileName = os.path.join(self.PeptideFeatureDir, "PTMFeatures.txt")
+        self.ConsensusSpectrumDir = os.path.join(self.PeptideFeatureDir, "Clusters")
+        
+UsageInfo = """
+PTMSearchBigDB arguments:
+  -d [DIR]: Peptide directory.  This directory should contain PTMFeatures.txt, as well
+     as the consensus spectra and clusters.
+  -w [FILE]: Output file, for peptides with delta-score included
+  -r [FILE]: Inspect output filename
+"""
+  
+if __name__ == "__main__":
+    Searcher = PTMSearcher()
+    Searcher.ParseCommandLine(sys.argv[1:])
+    Searcher.Main()
diff --git a/PTMods.txt b/PTMods.txt
new file mode 100644
index 0000000..a0884a7
--- /dev/null
+++ b/PTMods.txt
@@ -0,0 +1,105 @@
+#Name	Mass	Monisotopic	Residues
+#Acetylation	42.0106	K	
+#Alkylation	14.01564	CKRHDENQ	
+#Amidation	-0.984		
+#S-archaeol	634.6628		
+	
+#Biotin	226.0776	K	
+#Bromination	77.9105	HFW	
+#Carbamylation	43.00581	K	
+#Cholesterol	368.3443		
+#plants	CHDH	294.39	D
+#Citrullination	0.9840276	R	
+#C-Mannosylation	162.052823		
+#Deamidation	0.984	NQ	
+#S-diacylglycerol cysteine	576.51171	
+#Dimethylation	28.0314	CKRHDENQ
+#FAD	783.1415	CH
+#Farnesylation	204.1878	C
+#Formylation	27.9949	
+#Geranyl-geranylation	272.2504	C
+#Gamma-carboxyglutamic acid	43.98983	E
+#O-GlcNAc	203.0794	ST
+#Glucosylation (glycation)	162.0528	NTK
+#Glutathionylation	305.0680814	C
+#Hydroxylation	15.9949	PKDN
+#Lipoyl	188.033	K
+#Myristoylation	210.1984	
+
+#n-Octanoate	126.1044	S
+#Omega-hydroxyceramide glutamate ester	760.73082	E	
+#Palmitoylation	238.2297	STCK	
+#yeast	PALE	236.39	C
+#Phosphatidylethanolamine amidated glycine	773.54443		
+# Generally phosphorylation only affects S, T, and Y.  It *can* affect CDH, but that's relatively rare.
+#Phosphorylation	79.9663	STY
+Phosphorylation	80	STY
+#Phosphorylation	79.9663	STYHCD
+Methylation	14.0157	CKRHDENQ	3
+krmethylation	14.0157	KR	2
+CMethylation	14.0157	C	3
+Biotin	339.16	K
+HelgeProbe	511.6	C
+Oxidation	16	M	2
+#MissingCarb	-57	C
+CSmall	-14	C
+CBig	103.143	C
+DoubleOxidation	32	M
+CysteineMod	57.0518	C
+MCysteineMod	71.067	C
+#Pyridoxal phosphate	229.014	K	
+#Phosphopantetheine	339.078	S	
+#Pyrrolidone carboxylic acid	-17.0266	Q	
+#Sulfation	79.9568	Y	
+#Trimethylation	42.0471	CKRHDENQ	
+Acetylation	42.0106	K
+Deamidation	-17	QC
+#Hydroxylation	15.9949	PKDN
+Hydroxylation	15.9949	PK
+Sulfation	80	Y
+S-Nitrosylation	28.99017	C
+sprobe	766.7	S
+cprobe	511.6	C
+Beta-methylthiolation	45.9877118	C
+1Cysteine sulfenic acid (-SOH)	15.9949146	C	
+2Cysteine sulfinic acid (-SO2H)	31.9898292	C
+OxoHist	16.0	C
+Desmosine	-58	K
+EpsImine	12	K
+Citruline	1	R
+terminal	14.01564	A
+cter	14.01564	A
+nomet	-14.02	A
+nt4	4.0	ACDEFGHIKLMNPQRSTVWY
+nt6	6.0	ACDEFGHIKLMNPQRSTVWY
+nt10	10.0	ACDEFGHIKLMNPQRSTVWY
+lys4	4.0	K
+lys6	6.0	K
+lys10	10.0	K
+#-57ikkb	-57.0	C
+
+#1lens	1.0	CK
+
+##-18ikkb	-18.0	DET
+##1ikkb	1.0	N
+##10ikkb	10.0	A
+##14ikkb	14.0	CK
+##16ikkb	16.0	MW
+##22ikkb	22.0	DE
+##25ikkb	25.0	L
+##28ikkb	28.01	K
+##32ikkb	32.01	MW
+##40ikkb	40.0	P
+##+43k	43.0	K
+##42lens	42.0	A
+##43lens	43.0	A
+##38lens	38.0	A
+##21lens	21.0	A
+cysl	-2	C
+cyst	12	C
+g14	14	G
+dehydration	-18	DES
+sodium	22	DE
+dopa	16	Y
+gtod	58	G
+atov	29	A
\ No newline at end of file
diff --git a/PValue.c b/PValue.c
new file mode 100644
index 0000000..f64de02
--- /dev/null
+++ b/PValue.c
@@ -0,0 +1,662 @@
+//Title:          PValue.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "PValue.h"
+#include "Score.h"
+#include "Errors.h"
+
+double GammaCof[] = {76.18009172947146, -86.50532032941677,
+    24.01409824083091, -1.231739572450155, 
+    0.1208650973866179e-2, -0.5395239384952e-5};
+
+double Gamma(double Z)
+{
+    double X;
+    double Y;
+    double Temp;
+    double Ser;
+    int J;
+    //////////
+    X = Z;
+    Y = Z;
+    Temp = X + 5.5;
+    Temp -= (X + 0.5) * log(Temp);
+    Ser = 1.000000000190015;
+    for (J = 0; J < 6; J++)
+    {
+        Y += 1;
+        Ser += GammaCof[J] / Y;
+    }
+    Z = -Temp + log(2.5066282746310005 * Ser / X);
+    return exp(Z);
+}
+
+#define F_BIN_MAX 511
+#define F_BIN_OFFSET 40 //used because Fscores are negative, but array indexes cannot be
+
+void DebugPrintPValueCurve(char* FileName, int* FScoreHistogram, double* OddsTrue)
+{
+    FILE* PValueFile;
+    int FBin;
+    float X;
+    //
+    PValueFile = fopen(FileName, "wb");
+    if (!PValueFile)
+    {
+        printf("** Not debug-printing the p-value curve.\n");
+        return;
+    }
+    for (FBin = 0; FBin <= F_BIN_MAX; FBin++)
+    {
+        X = (FBin - F_BIN_OFFSET) / (float)10.0;
+        fprintf(PValueFile, "%d\t%d\t%.3f\t%.3f\t\n", FBin, FScoreHistogram[FBin], X, 1.0 - OddsTrue[FBin]);
+    }
+    fclose(PValueFile);
+}
+
+#define EM_CYCLE_COUNT 300
+#define SQRT_2_PI (float)2.5066282746310002
+
+#define MAX_BITS 30
+#define PROCESS_COMPUTE_MEAN_DELTA 0
+#define PROCESS_INITIALIZE_SCORE_HISTOGRAM 1
+#define PROCESS_WRITE_PVALUES 2
+
+#define BIT_INDEX_CHARGE 4
+#define BIT_INDEX_MQSCORE 5
+#define BIT_INDEX_PVALUE 13
+#define BIT_INDEX_FSCORE 14
+#define BIT_INDEX_DELTA_SCORE 16
+#define DEFAULT_MQ_SCORE_WEIGHT (float)0.3
+#define DEFAULT_DELTA_SCORE_WEIGHT (float)1.5
+#define BLIND_MQ_SCORE_WEIGHT (float)0.3
+#define BLIND_DELTA_SCORE_WEIGHT (float)2.25
+
+float MQScoreWeight;
+float DeltaScoreWeight;
+
+typedef struct PValueParseInfo
+{
+    FILE* OutputFile;
+    int TotalMatches;
+    float MeanDeltaScore;
+    int Action;
+    char CurrentSpectrum[512];
+} PValueParseInfo;
+
+typedef struct PValueInfo
+{
+    float MeanDeltaScore2;
+    float MeanDeltaScore3;
+    int TotalMatches2;
+    int TotalMatches3;
+    char* FinalOutputPath;
+    int FScoreHistogram2[F_BIN_MAX + 1];
+    double OddsTrue2[F_BIN_MAX + 1];
+    int FScoreHistogram3[F_BIN_MAX + 1];
+    double OddsTrue3[F_BIN_MAX + 1];
+    PValueParseInfo* ParseInfo;
+} PValueInfo;
+
+typedef struct PValueModel
+{
+    double MeanTrue;
+    double VarianceTrue;
+    double MeanFalse;
+    double VarianceFalse;
+    double PriorProbabilityTrue;
+    double ThetaFalse;
+    double KFalse;
+    double GammaOffset;
+    double StdDevTrue;
+    double GammaDemonFalse;
+    double CountTrue;
+    double CountFalse;
+} PValueModel;
+
+// CustomTok is a variant of strtok: It returns once for every occurrence of a delimiter,
+// rather than once for every contiguous block of delimiters.  Why the difference?  
+// When processing tab-delimited files, we want to properly handle empty columns (corresponding to 
+// occurrences of \t\t in the text).
+static char* CustomTokEnd;
+static char* CustomTokNext;
+char* CustomTok(char* Buffer, char* Delimiters)
+{
+    char* CheckPos;
+    char* StringStart;
+    char* CheckDelimiter;
+    //
+    if (Buffer)
+    {
+        CustomTokEnd = Buffer + strlen(Buffer);
+        CheckPos = Buffer;
+        StringStart = Buffer;
+    }
+    else
+    {
+        CheckPos = CustomTokNext;
+        StringStart = CustomTokNext;
+    }
+
+    // If we're out of bits, then say so:
+    if (CheckPos >= CustomTokEnd)
+    {
+        return NULL;
+    }
+    // Scan forward until you see a delimiter, or until the end of the string:
+    for (; CheckPos < CustomTokEnd; CheckPos++)
+    {
+        for (CheckDelimiter = Delimiters; *CheckDelimiter; CheckDelimiter++)
+        {
+            if (*CheckPos == *CheckDelimiter)
+            {
+                *CheckPos = '\0';
+                CustomTokNext = CheckPos + 1;
+                return StringStart;
+            }
+        }
+    }
+    // We didn't see the delimiter.  
+    CustomTokNext = CustomTokEnd;
+    return StringStart;
+}
+
+int PValueProcessResultsFileLine(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    PValueInfo* Info;
+    PValueParseInfo* ParseInfo;
+    char* Bits[MAX_BITS];
+    int BitCount;
+    char* Bit;
+    char Spectrum[512];
+    int TopMatchFlag;
+    float MQScore;
+    float DeltaScore;
+    float FScore;
+    int FBin;
+    float PValue;
+    char PValueBuffer[256];
+    char FScoreBuffer[256];
+    int BitIndex;
+    int Charge;
+    
+    //
+    Info = (PValueInfo*)UserData;
+    ParseInfo = Info->ParseInfo;
+    if (ParseInfo->Action == PROCESS_WRITE_PVALUES)
+    {
+        INSPECT_ASSERT(ParseInfo->OutputFile);
+    }
+
+    // Handle comments:
+    if (LineBuffer[0] == '#')
+    {
+        // Comment lines are written out as-is:
+        if (ParseInfo->Action == PROCESS_WRITE_PVALUES)
+        {
+            fprintf(ParseInfo->OutputFile, "%s\n", LineBuffer);
+        }
+        return 1;
+    }
+    // Split the line into tab-delimited bits:
+    Bit = CustomTok(LineBuffer, "\t");
+    Bits[0] = Bit;
+    BitCount = 1;
+    while (1)
+    {
+        Bit = CustomTok(NULL, "\t");
+        if (!Bit)
+        {
+            break;
+        }
+        Bits[BitCount] = Bit;
+        BitCount++;
+        if (BitCount >= MAX_BITS)
+        {
+            break;
+        }
+    }
+
+    // If we don't have enough tab-bits, then this isn't a valid line, and we skip it:
+    if (BitCount < BIT_INDEX_PVALUE + 1)
+    {
+        return 1;
+    }
+
+    // Note whether this is the top-scoring match for the spectrum:
+    sprintf(Spectrum, "%256s%50s", Bits[0], Bits[1]);
+    if (strcmp(Spectrum, ParseInfo->CurrentSpectrum))
+    {
+        TopMatchFlag = 1;
+        strncpy(ParseInfo->CurrentSpectrum, Spectrum, 512);
+    }
+    else
+    {
+        TopMatchFlag = 0;
+    }
+    Charge = atoi(Bits[BIT_INDEX_CHARGE]);
+    
+    // Now take various actions:
+    switch (ParseInfo->Action)
+    {
+    case PROCESS_COMPUTE_MEAN_DELTA:
+        if (TopMatchFlag)
+        {
+            if (Charge < 3)
+            {
+                Info->MeanDeltaScore2 += (float)atof(Bits[BIT_INDEX_DELTA_SCORE]);
+                Info->TotalMatches2++;
+            }
+            else
+            {
+                Info->MeanDeltaScore3 += (float)atof(Bits[BIT_INDEX_DELTA_SCORE]);
+                Info->TotalMatches3++;
+            }
+        }
+        break;
+    case PROCESS_INITIALIZE_SCORE_HISTOGRAM:
+        if (TopMatchFlag)
+        {
+            MQScore = (float)atof(Bits[BIT_INDEX_MQSCORE]);
+            DeltaScore = (float)atof(Bits[BIT_INDEX_DELTA_SCORE]) / (Charge < 3 ? Info->MeanDeltaScore2 : Info->MeanDeltaScore3);
+            FBin = (int)floor((10 * (MQScoreWeight * MQScore + DeltaScoreWeight * DeltaScore)));
+            FBin = min(max(FBin + F_BIN_OFFSET, 0), F_BIN_MAX);
+            if (Charge < 3)
+            {
+                Info->FScoreHistogram2[FBin]++;
+            }
+            else
+            {
+                Info->FScoreHistogram3[FBin]++;
+            }
+        }
+        break;
+    case PROCESS_WRITE_PVALUES:
+        MQScore = (float)atof(Bits[BIT_INDEX_MQSCORE]);
+        DeltaScore = (float)atof(Bits[BIT_INDEX_DELTA_SCORE]) / (Charge < 3 ? Info->MeanDeltaScore2 : Info->MeanDeltaScore3);
+        FScore = (MQScoreWeight * MQScore) + (DeltaScoreWeight * DeltaScore);
+        sprintf(FScoreBuffer, "%.5f", FScore);
+        Bits[BIT_INDEX_FSCORE] = FScoreBuffer;
+        FBin = (int)(10 * FScore);
+        FBin = min(max(FBin + F_BIN_OFFSET, 0), F_BIN_MAX);
+        if (Charge < 3)
+        {
+            PValue = (float)(1.0 - Info->OddsTrue2[FBin]);
+        }
+        else
+        {
+            PValue = (float)(1.0 - Info->OddsTrue3[FBin]);
+        }
+        sprintf(PValueBuffer, "%.5f", PValue);
+        Bits[BIT_INDEX_PVALUE] = PValueBuffer;
+        for (BitIndex = 0; BitIndex < BitCount; BitIndex++)
+        {
+            fprintf(ParseInfo->OutputFile, "%s\t", Bits[BitIndex]);
+        }
+        fprintf(ParseInfo->OutputFile, "\n");
+        break;
+    default:
+        printf("** Error: Unknown action '%d' in ProcessResultsFile!\n", ParseInfo->Action);
+        return 0;
+    }
+    return 1;
+}
+
+void ProcessResultsFile(PValueInfo* Info, char* FilePath, int Action)
+{
+    FILE* File;
+    PValueParseInfo ParseInfo;
+    memset(&ParseInfo,0,sizeof(ParseInfo));
+    //
+    Info->ParseInfo = &ParseInfo;
+    File = fopen(FilePath, "rb");
+    if (!File)
+    {
+        REPORT_ERROR_S(8, FilePath);
+        return;
+    }
+    ParseInfo.Action = Action;
+    if (Action == PROCESS_WRITE_PVALUES)
+    {
+        ParseInfo.OutputFile = fopen(Info->FinalOutputPath, "wb");
+        if (!ParseInfo.OutputFile)
+        {
+            REPORT_ERROR_S(8, Info->FinalOutputPath);
+            return;
+        }
+    }
+    ParseFileByLines(File, PValueProcessResultsFileLine, Info, 1);
+    fclose(File);
+}
+
+#define DEFAULT_MEAN_TRUE 4.48
+#define DEFAULT_VARIANCE_TRUE 1.50
+#define DEFAULT_MEAN_FALSE 0.19
+#define DEFAULT_VARIANCE_FALSE 0.18
+#define DEFAULT_PRIOR_PROBABILITY 0.25
+
+PValueModel* InitPValueModel(int Charge3Flag)
+{
+    PValueModel* Model;
+    //
+    Model = (PValueModel*)calloc(1, sizeof(PValueModel));
+    Model->MeanTrue = DEFAULT_MEAN_TRUE;
+    Model->VarianceTrue = DEFAULT_VARIANCE_TRUE;
+    Model->MeanFalse = DEFAULT_MEAN_FALSE;
+    Model->VarianceFalse = DEFAULT_VARIANCE_FALSE;
+    Model->PriorProbabilityTrue = DEFAULT_PRIOR_PROBABILITY;
+    Model->GammaOffset = 0;
+    Model->GammaOffset = max(Model->GammaOffset, -Model->MeanFalse + 0.1);
+    Model->ThetaFalse = Model->VarianceFalse / (Model->MeanFalse + Model->GammaOffset);
+    Model->KFalse = (Model->MeanFalse + Model->GammaOffset) / Model->ThetaFalse;
+    Model->GammaDemonFalse = pow(Model->ThetaFalse, Model->KFalse) * Gamma(Model->KFalse);
+    Model->StdDevTrue = sqrt(Model->VarianceTrue);
+    return Model;
+}
+
+int FitPValueMixtureModel(PValueInfo* Info, PValueModel* Model, int Charge3Flag)
+{
+    int FBin;
+    int* FScoreHistogram;
+    double* OddsTrue;
+    int LowestFScoreBin = -1;
+    //double GammaOffset;
+    int TotalMatches;
+    int EMCycle;
+    double X;
+    int Count;
+    int MaxBinPopulated = 0;
+    float LowestFScore = 0;
+    int CurveFitComplete;
+    double Pow;
+    double GX;
+    double TrueNormal;
+    double FalseGamma;
+    //
+
+    if (Charge3Flag)
+    {
+        FScoreHistogram = Info->FScoreHistogram3;
+        OddsTrue = Info->OddsTrue3;
+        TotalMatches = Info->TotalMatches3;
+    }
+    else
+    {
+        FScoreHistogram = Info->FScoreHistogram2;
+        OddsTrue = Info->OddsTrue2;
+        TotalMatches = Info->TotalMatches2;
+    }
+
+    // Note the highest score-bin that has any entries at all:
+    for (FBin = 0; FBin <= F_BIN_MAX; FBin++)
+    {
+        if (FScoreHistogram[FBin] > 0 && LowestFScoreBin < 0)
+        {
+            LowestFScoreBin = FBin;
+        }
+        if (FScoreHistogram[FBin])
+        {
+            MaxBinPopulated = FBin;
+        }
+    }
+
+    // Convert the lowest F-score bin# into the corresponding score:
+    LowestFScore = (LowestFScoreBin - F_BIN_OFFSET) / (float)10.0;
+    Model->GammaOffset = 0.0;
+    if (LowestFScore <= 0)
+    {
+        Model->GammaOffset = max(Model->GammaOffset, -LowestFScore + 0.1);
+    }
+    if (Model->MeanFalse <= 0)
+    {
+        Model->GammaOffset = max(Model->GammaOffset, -Model->MeanFalse + 0.1);
+    }
+
+    ////////////////////////////////////////////////////////////////////
+    // Fit the mixture model, using a gamma distribution for false match f-scores and a 
+    // normal distribution for true match f-scores.
+    Model->ThetaFalse = Model->VarianceFalse / (Model->MeanFalse + Model->GammaOffset);
+    Model->KFalse = (Model->MeanFalse + Model->GammaOffset) / Model->ThetaFalse;
+    Model->GammaDemonFalse = pow(Model->ThetaFalse, Model->KFalse) * Gamma(Model->KFalse);
+    Model->StdDevTrue = sqrt(Model->VarianceTrue);
+    if (TotalMatches < 200)
+    {
+        REPORT_WARNING_I(10, TotalMatches);
+        CurveFitComplete = 0;
+    }
+    else
+    {
+        for (EMCycle = 0; EMCycle < EM_CYCLE_COUNT; EMCycle++)
+        {
+            // For each bin, compute the probability that it's true:
+            for (FBin = 0; FBin <= F_BIN_MAX; FBin++)
+            {
+                // After the last histogram entry, just inherit the last true-probability.
+                if (FBin > MaxBinPopulated)
+                {
+                    OddsTrue[FBin] = OddsTrue[FBin - 1];
+                    continue;
+                }
+                X = (FBin - F_BIN_OFFSET) / 10.0;
+                Pow = (X - Model->MeanTrue);
+                Pow = - (Pow * Pow / (2 * Model->VarianceTrue));
+                TrueNormal = exp(Pow) / (Model->StdDevTrue * SQRT_2_PI);
+                GX = max(0.01, X + Model->GammaOffset);
+                FalseGamma = pow(GX, Model->KFalse - 1) * exp(-GX / Model->ThetaFalse) / Model->GammaDemonFalse;
+                // Avoid underflow:
+                if (TrueNormal < 0.00001)
+                {
+                    if (X > 5)
+                    {
+                        OddsTrue[FBin] = (float)0.99999;
+                    }
+                    else
+                    {
+                        OddsTrue[FBin] = (float)0.0;
+                    }
+                }
+                else
+                {
+                    OddsTrue[FBin] = (TrueNormal * Model->PriorProbabilityTrue) / (TrueNormal * Model->PriorProbabilityTrue + FalseGamma * (1 - Model->PriorProbabilityTrue));
+                }
+                
+                //printf("%.8f\t%.8f\t%.8f\t%.8f\n", X, TrueNormal, FalseGamma, OddsTrue[FBin]);
+                // Because the left tail of the normal distribution falls off slowly, the value of OddsTrue can be
+                // high for negative values.  Cap it.
+                if (FBin < F_BIN_OFFSET)
+                {
+                    OddsTrue[FBin] = min(OddsTrue[FBin], 1.0 / (F_BIN_OFFSET - FBin));
+                }
+            }
+            /////////////////////////////////////////////////
+            // Compute the mean of the true and the false distributions:
+            Model->CountTrue = 0;
+            Model->MeanTrue = 0;
+            Model->CountFalse = 0;
+            Model->MeanFalse = 0;
+            for (FBin = 0; FBin <= F_BIN_MAX; FBin++)
+            {
+                X = (FBin - F_BIN_OFFSET) / 10.0;
+                Count = FScoreHistogram[FBin];
+                Model->MeanTrue += X * OddsTrue[FBin] * Count;
+                Model->CountTrue += OddsTrue[FBin] * Count;
+                Model->MeanFalse += X * (1.0 - OddsTrue[FBin]) * Count;
+                Model->CountFalse += (1.0 - OddsTrue[FBin]) * Count;
+            }
+            Model->MeanTrue /= Model->CountTrue;
+            Model->MeanFalse /= Model->CountFalse;
+            Model->PriorProbabilityTrue = Model->CountTrue / (Model->CountTrue + Model->CountFalse);
+            /////////////////////////////////////////////////
+            // Compute the variance of the true and the false distributions:
+            Model->VarianceTrue = 0;
+            Model->VarianceFalse = 0;
+            for (FBin = 0; FBin <= F_BIN_MAX; FBin++)
+            {
+                X = (FBin - F_BIN_OFFSET) / 10.0;
+                Count = FScoreHistogram[FBin];
+                Model->VarianceTrue += (X - Model->MeanTrue) * (X - Model->MeanTrue) * Count * OddsTrue[FBin];
+                Model->VarianceFalse += (X - Model->MeanFalse) * (X - Model->MeanFalse) * Count * (1.0 - OddsTrue[FBin]);
+            }
+            Model->VarianceTrue /= Model->CountTrue;
+            Model->StdDevTrue = sqrt(Model->VarianceTrue);
+            Model->VarianceFalse /= Model->CountFalse;
+            // Recompute other distribution parameters:
+            Model->ThetaFalse = Model->VarianceFalse / (Model->MeanFalse + Model->GammaOffset);
+            Model->KFalse = (Model->MeanFalse + Model->GammaOffset) / Model->ThetaFalse;
+            Model->GammaDemonFalse = pow(Model->ThetaFalse, Model->KFalse) * Gamma(Model->KFalse);
+            //printf("Cycle %d:\n", EMCycle);
+            //printf("True: Count %.4f mean %.4f variance %.4f\n", CountTrue, MeanTrue, VarianceTrue);
+            //printf("False: Count %.4f mean %.4f variance %.4f\n", CountFalse, MeanFalse, VarianceFalse);
+            //printf("Prior probability true: %.4f\n", PriorProbabilityTrue);
+        } // E-M cycle loop
+        CurveFitComplete = 1;
+    }
+
+    ///////////////////////////////////////
+    // Check to make sure the distribution is sensible.  If curve-fitting failed
+    // due to underflow/overflow, then fall back to the default curve:
+    if (Model->GammaDemonFalse <= 0 || Model->KFalse <= 0)
+    {
+        printf("** Error fitting p-value distribution; using default.  Consider running PValue.py\n");
+        CurveFitComplete = 0;
+    }
+
+    if (!CurveFitComplete)
+    {
+        // COPY-PASTA: Fill in the OddsTrue array using all default parameters.
+        Model = InitPValueModel(Charge3Flag);
+
+        // For each bin, compute the probability that it's true:
+        for (FBin = 0; FBin <= F_BIN_MAX; FBin++)
+        {
+            // After the last histogram entry, just inherit the last true-probability.
+            if (FBin > MaxBinPopulated)
+            {
+                OddsTrue[FBin] = OddsTrue[FBin - 1];
+                continue;
+            }
+            X = (FBin - F_BIN_OFFSET) / 10.0;
+            Pow = (X - Model->MeanTrue);
+            Pow = - (Pow * Pow / (2 * Model->VarianceTrue));
+            TrueNormal = exp(Pow) / (Model->StdDevTrue * SQRT_2_PI);
+            GX = max(0.01, X + Model->GammaOffset);
+            FalseGamma = pow(GX, Model->KFalse - 1) * exp(-GX / Model->ThetaFalse) / Model->GammaDemonFalse;
+            // Avoid underflow:
+            if (TrueNormal < 0.00001)
+            {
+                if (X > 5)
+                {
+                    OddsTrue[FBin] = (float)0.99999;
+                }
+                else
+                {
+                    OddsTrue[FBin] = (float)0.0;
+                }
+            }
+            else
+            {
+                OddsTrue[FBin] = (TrueNormal * Model->PriorProbabilityTrue) / (TrueNormal * Model->PriorProbabilityTrue + FalseGamma * (1 - Model->PriorProbabilityTrue));
+            }
+            
+            //printf("%.8f\t%.8f\t%.8f\t%.8f\n", X, TrueNormal, FalseGamma, OddsTrue[FBin]);
+            // Because the left tail of the normal distribution falls off slowly, the value of OddsTrue can be
+            // high for negative values.  Cap it.
+            if (FBin < F_BIN_OFFSET)
+            {
+                OddsTrue[FBin] = (float)min(OddsTrue[FBin], 1.0 / (F_BIN_OFFSET - FBin));
+            }
+        }
+        // free the temp-model:
+        SafeFree(Model);
+    } // if curve fit didn't complete
+    return 1;
+}
+
+// Iterate over all the matches, and get the distribution
+// of F-scores; use those to derive p-values.  We compute
+// one distribution for charge 1 and 2 spectra, a second 
+// distribution for charge 3 spectra.
+void CalculatePValues(char* ResultsFilePath, char* FinalOutputPath)
+{
+    PValueModel* Model2;
+    PValueModel* Model3;
+    PValueInfo* Info;
+    //    
+    Model2 = InitPValueModel(0);
+    Model3 = InitPValueModel(1);
+    Info = (PValueInfo*)calloc(1, sizeof(PValueInfo));
+
+    if (GlobalOptions->RunMode & (RUN_MODE_MUTATION | RUN_MODE_BLIND))
+    {
+        MQScoreWeight = BLIND_MQ_SCORE_WEIGHT;
+        DeltaScoreWeight = BLIND_DELTA_SCORE_WEIGHT;
+    }
+    else
+    {
+        MQScoreWeight = DEFAULT_MQ_SCORE_WEIGHT;
+        DeltaScoreWeight = DEFAULT_DELTA_SCORE_WEIGHT;
+    }
+
+    //////////////////////////////////////////////////////////
+    // Compute mean delta-score:
+    ProcessResultsFile(Info, ResultsFilePath, PROCESS_COMPUTE_MEAN_DELTA);
+    Info->MeanDeltaScore2 /= max(1, Info->TotalMatches2);
+    Info->MeanDeltaScore2 = max(Info->MeanDeltaScore2, (float)0.01);
+    Info->MeanDeltaScore3 /= max(1, Info->TotalMatches3);
+    Info->MeanDeltaScore3 = max(Info->MeanDeltaScore3, (float)0.01);
+
+    //////////////////////////////////////////////////////////
+    // Initialze FScoreHistogram:
+    memset(Info->FScoreHistogram2, 0, sizeof(int) * (F_BIN_MAX + 1));
+    memset(Info->FScoreHistogram3, 0, sizeof(int) * (F_BIN_MAX + 1));
+    ProcessResultsFile(Info, ResultsFilePath, PROCESS_INITIALIZE_SCORE_HISTOGRAM);
+
+    //////////////////////////////////////////////////////////
+    // Fit the mixture model, populating OddsTrue:
+    FitPValueMixtureModel(Info, Model2, 0);
+    FitPValueMixtureModel(Info, Model3, 1);
+
+    // Verbose ouptut of the p-value curve:
+    // (Disabled in production, especially for the web server!)
+    //DebugPrintPValueCurve("PValueCurve2.txt", Info->FScoreHistogram2, Info->OddsTrue2);
+    //DebugPrintPValueCurve("PValueCurve3.txt", Info->FScoreHistogram3, Info->OddsTrue3);
+    
+    // Write the p-values to the final output file:
+    Info->FinalOutputPath = FinalOutputPath;
+    ProcessResultsFile(Info, ResultsFilePath, PROCESS_WRITE_PVALUES);
+    // Now we can erase the intermediate output file:
+    remove(GlobalOptions->OutputFileName);
+}
diff --git a/PValue.h b/PValue.h
new file mode 100644
index 0000000..07a5684
--- /dev/null
+++ b/PValue.h
@@ -0,0 +1,42 @@
+//Title:          PValue.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#ifndef PVALUE_H
+#define PVALUE_H
+#include "Inspect.h"
+#include "Trie.h"
+
+// PValue.c derives p-values for spectrum annotation using a mixture model for the 
+// histogram of match scores.  The computation is based on the PeptideProphet paper.
+
+void CalculatePValues();
+
+#endif // PVALUE_H
diff --git a/ParentMass.c b/ParentMass.c
new file mode 100644
index 0000000..3f4b6f5
--- /dev/null
+++ b/ParentMass.c
@@ -0,0 +1,710 @@
+//Title:          ParentMass.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+// ParentMass.c: Routines for parent mass correction.  The precursor mass, as supplied,
+// may be off by up to 1 Da (or more, depending on the experiment).  Here we determine
+// which parent mass is correct by considering the spectrum's self-convolution: The
+// overlap between b and y peaks should be highest when the parent mass is exactly right.
+//
+// Our implementation: We construct a PMCSpectrumInfo object for the spectrum, which keeps
+// track of PMCInfo nodes.  We build one PMCInfo node for each mass we're testing.  We
+// compute self-convolutions for each PMCInfo node, and we also compare convolutions across
+// PMCInfo nodes and across mass offsets.  Finally, we feed these features into a model
+// which assigns each PMCInfo a score, and we keep the best PMCInfo.
+
+#include "CMemLeak.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "Utils.h"
+#include "ChargeState.h"
+#include "Spectrum.h"
+#include "Inspect.h"
+#include "SVM.h"
+#include "Errors.h"
+#include "LDA.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#else
+#include <dirent.h>
+#include <sys/stat.h>
+#endif
+
+#define EPSILON (float)0.000001
+
+// Models, for parent mass correction in various charge states:
+extern LDAModel* PMCCharge1LDA;
+extern LDAModel* PMCCharge2LDA;
+extern LDAModel* PMCCharge3LDA;
+SVMModel* PMCCharge1SVM;
+SVMModel* PMCCharge2SVM;
+SVMModel* PMCCharge3SVM;
+
+// For converting parts-per-million:
+#define ONE_MILLION 1000000
+
+///////////////////////////////////////////////////
+// Forward declarations:
+
+///////////////////////////////////////////////////
+// Functions:
+void CharacterizePhosphatePeaks(PMCInfo* Info, PMCSpectrumInfo* SpectrumInfo, int Offset, int FeatureIndex);
+
+// Free PMCSpectrumInfo, which is only kept around during 
+// parent mass and charge state correction.
+void FreePMCSpectrumInfo(PMCSpectrumInfo* SpectrumInfo)
+{
+    PMCInfo* Info;
+    PMCInfo* Prev;
+    SelfConvolutionNode* Node;
+    SelfConvolutionNode* PrevNode;
+    int HashIndex;
+    //
+    if (!SpectrumInfo)
+    {
+        return;
+    }
+    // Free PMCInfo list:
+    Prev = NULL;
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        SafeFree(Prev);
+        Prev = Info;
+    }
+    SafeFree(Prev);
+    // Free SelfConvolution list:
+    for (HashIndex = 0; HashIndex < SC_HASH_SIZE; HashIndex++)
+    {
+        PrevNode = NULL;
+        for (Node = SpectrumInfo->SCHash[HashIndex]; Node; Node = Node->Next)
+        {
+            SafeFree(PrevNode);
+            PrevNode = Node;
+        }
+        SafeFree(PrevNode);
+    }
+    // Free SelfConvolution2 list:
+    for (HashIndex = 0; HashIndex < SC_HASH_SIZE; HashIndex++)
+    {
+        PrevNode = NULL;
+        for (Node = SpectrumInfo->SC2Hash[HashIndex]; Node; Node = Node->Next)
+        {
+            SafeFree(PrevNode);
+            PrevNode = Node;
+        }
+        SafeFree(PrevNode);
+    }
+    // Free the parent:
+    SafeFree(SpectrumInfo);
+}
+
+// Build PMCInfo nodes for the masses we'll consider adjusting to.  We'll add one node
+// for the core mass, and we'll add some more nodes in the neighborhood (MinMass, MaxMass).
+// The PMCInfo nodes are children of SpectrumInfo.
+void AddPMCNodes(PMCSpectrumInfo* SpectrumInfo, int CoreMass, int MinMass, int MaxMass)
+{
+    PMCInfo* Info;
+    int MassChange;
+    int Mass;
+    //
+    // Iterate from the core mass downward.  When you reach the end, iterate
+    // from the core mass (+0.1Da) upward.  (Use the 'two-way iteration' instead
+    // of two loops)
+    Mass = CoreMass;
+    
+    MassChange = -DECI_DALTON;
+    while (1)
+    {
+        if (Mass < MinMass)
+        {
+            MassChange = DECI_DALTON;
+            Mass = CoreMass + MassChange;
+        }
+        if (Mass > MaxMass)
+        {
+            break;
+        }
+        Info = (PMCInfo*)calloc(1, sizeof(PMCInfo));
+        Info->Charge = SpectrumInfo->Charge;
+        Info->ParentMass = Mass;
+        if (!SpectrumInfo->Head)
+        {
+            SpectrumInfo->Head = Info;
+        }
+        else
+        {
+            SpectrumInfo->Tail->Next = Info;
+        }
+        SpectrumInfo->Tail = Info;
+        Mass += MassChange;
+    }
+}
+
+// Compute features for performing parent mass correction.
+// Assumes that the charge state is set!
+void ComputePMCFeatures(PMCSpectrumInfo* SpectrumInfo)
+{
+    int OffsetIndex;
+    int FeatureIndex;
+    int Charge;
+    int BestScoreIndex = 0;
+    int BestRunnerUpIndex = -1;
+    float PMRadius;
+    float AverageConvolution;
+    PMCInfo* Info;
+    MSSpectrum* Spectrum;
+    float MaxConvolution;
+    int InfoCount;
+    float Diff;
+    //
+ 
+
+    Spectrum = SpectrumInfo->Spectrum;
+
+    // Set the spectrum's mass:
+    Spectrum->ParentMass = Spectrum->MZ * SpectrumInfo->Charge - HYDROGEN_MASS * (SpectrumInfo->Charge - 1); // base mass
+    Charge = min(3, SpectrumInfo->Charge);
+
+
+
+    ////////////////////////////////////////////////////////////
+    // Build PMCInfo structs for allowed masses.  We're always allowed a +1 or -1 isotope. 
+    // And we're allowed to move around by 0.1Da until our mass error (in PPM) becomes too large.
+    PMRadius = (float)Spectrum->ParentMass;
+    PMRadius *= GlobalOptions->ParentMassPPM / (float)ONE_MILLION;
+    AddPMCNodes(SpectrumInfo, Spectrum->ParentMass, 
+        (int)(Spectrum->ParentMass - PMRadius), (int)(Spectrum->ParentMass + PMRadius));
+
+    // We're always allowed a +1 and -1 shift:
+    if (PMRadius < DALTON)
+    {
+        AddPMCNodes(SpectrumInfo, Spectrum->ParentMass - DALTON,
+            (int)(Spectrum->ParentMass - DALTON - PMRadius), 
+		    (int)(min(Spectrum->ParentMass - DALTON + PMRadius, Spectrum->ParentMass - PMRadius)));
+        AddPMCNodes(SpectrumInfo, Spectrum->ParentMass + DALTON,
+		    (int)(max(Spectrum->ParentMass + DALTON - PMRadius, Spectrum->ParentMass + PMRadius)),
+            (int)(Spectrum->ParentMass + DALTON + PMRadius));
+    }
+    // Ok, PMCInfo nodes have now been created.
+    // Perform self-conovolution, at various parent masses.  This populates Info->Convolve, Info->Convolve2.
+    // Along the way, track the *average* and *maximum* self-convolutions.
+    InfoCount = 0;
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        ConvolveMassCorrectedSpectrum(Info, SpectrumInfo);
+        InfoCount++;
+    }
+
+    // Use the self-convolution info to populate the feature-vector for each PMCInfo:
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        FeatureIndex = 0;
+
+        // First feature is derived from the mass offset:
+        if (SpectrumInfo->Charge == 1)
+        {
+            // Absolute Mass offset
+            Info->Features[FeatureIndex++] = (float)fabs((Spectrum->ParentMass - Info->ParentMass) / (float)MASS_SCALE);
+        }
+        else
+        {
+            // Mass offset:
+            Diff = (Spectrum->ParentMass - Info->ParentMass) / (float)MASS_SCALE;
+            Info->Features[FeatureIndex++] = Diff;
+            // Absolute mass offset:
+            Info->Features[FeatureIndex++] = Diff * Diff;
+        }
+
+        ////////////////////////////////////////////////////////////////
+        // Convolution features:
+        // Find the average convolution for several masses:
+        AverageConvolution = 0;
+        MaxConvolution = 0;
+        for (OffsetIndex = 0; OffsetIndex < SELF_CONVOLVE_OFFSETS; OffsetIndex++)
+        {
+            AverageConvolution += Info->Convolve[OffsetIndex];
+            MaxConvolution = max(MaxConvolution, Info->Convolve[OffsetIndex]);
+        }
+        AverageConvolution /= (float)SELF_CONVOLVE_OFFSETS;
+        AverageConvolution = max(EPSILON, AverageConvolution);
+        MaxConvolution = max(EPSILON, MaxConvolution);
+        for (OffsetIndex = 0; OffsetIndex < SELF_CONVOLVE_OFFSETS; OffsetIndex++)
+        {
+            if (OffsetIndex < 4)
+            {
+                Info->Features[FeatureIndex++] = Info->Convolve[OffsetIndex];
+                Info->Features[FeatureIndex++] = Info->Convolve[OffsetIndex] / AverageConvolution;
+            }
+        }
+        // Convolutions of singly- and doubly-charged peaks.
+        // (These features aren't computed for charge 1!)
+        if (SpectrumInfo->Charge > 1) 
+        {
+            ////////////////////////////////////////////////////////////////
+            // Convolution2 features:
+            // Find the average convolution for several masses:
+            AverageConvolution = 0;
+            MaxConvolution = 0;
+            for (OffsetIndex = 0; OffsetIndex < SELF_CONVOLVE2_OFFSETS; OffsetIndex++)
+            {
+                AverageConvolution += Info->Convolve2[OffsetIndex];
+                MaxConvolution = max(MaxConvolution, Info->Convolve2[OffsetIndex]);
+            }
+            AverageConvolution /= (float)SELF_CONVOLVE2_OFFSETS;
+            AverageConvolution = max(EPSILON, AverageConvolution);
+            MaxConvolution = max(EPSILON, MaxConvolution);
+            for (OffsetIndex = 0; OffsetIndex < SELF_CONVOLVE2_OFFSETS; OffsetIndex++)
+            {
+                if (OffsetIndex < 3)
+                {
+                    Info->Features[FeatureIndex++] = Info->Convolve2[OffsetIndex];
+                    Info->Features[FeatureIndex++] = Info->Convolve2[OffsetIndex] / AverageConvolution;
+                }
+            }
+        }
+		if (GlobalOptions->PhosphorylationFlag) // sam's new insertion
+		{ //feature is simple sum of M-p and M-p-h20 intensity and skew
+			CharacterizePhosphatePeaks(Info, SpectrumInfo, PHOSPHATE_WATER_MASS / Info->Charge, 0);
+			CharacterizePhosphatePeaks(Info, SpectrumInfo, (PHOSPHATE_WATER_MASS + WATER_MASS) / Info->Charge, 1);
+			Info->Features[FeatureIndex++] = Info->IntensePeakIntensity[0] + Info->IntensePeakIntensity[1];
+			Info->Features[FeatureIndex++] = (float)(Info->IntensePeakSkew[0] + Info->IntensePeakSkew[1]);
+			//save this information for the charge state model.
+			Info->IntensePeakIntensity[2] = Info->IntensePeakIntensity[0] + Info->IntensePeakIntensity[1];
+		}
+    }
+}
+
+// Get Features of possible phosphorylated peak
+// looking for the most intense peak within a given range
+void CharacterizePhosphatePeaks(PMCInfo* Info, PMCSpectrumInfo* SpectrumInfo, int Offset, int FeatureIndex)
+{
+    MSSpectrum* Spectrum;
+    int PeakIndex = -1;
+    int MZ; // Of this PMCInfo guess at parent mass, not what is listed in the file.
+    int Epsilon = (int)(0.5 * DALTON);
+    int SavedPeakIndex = -1;
+    int Difference;
+    int Skew = 0;
+    float Intensity = 0;
+    float TotalIntensity = 0;
+    int ExpectedPeakMass;
+    //
+    Spectrum = SpectrumInfo->Spectrum;
+    MZ = (Info->ParentMass + (Info->Charge - 1) * HYDROGEN_MASS) / Info->Charge;
+    ExpectedPeakMass = MZ - Offset;
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        TotalIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        Difference = abs(Spectrum->Peaks[PeakIndex].Mass - ExpectedPeakMass);
+        if (Difference > Epsilon)
+        {
+            continue;
+        }
+        if (Spectrum->Peaks[PeakIndex].Intensity > Intensity)
+        {
+            Intensity = Spectrum->Peaks[PeakIndex].Intensity;
+            Skew = Difference;
+            SavedPeakIndex = PeakIndex;
+        }
+    }
+    if (SavedPeakIndex > 0)
+    {
+        Info->IntensePeakIndex[FeatureIndex] = SavedPeakIndex;
+        Info->IntensePeakIntensity[FeatureIndex] = Intensity / TotalIntensity; //percent total
+		Info->IntensePeakSkew[FeatureIndex] = Skew;
+    }
+	else
+	{//nothing found, save zeros
+        Info->IntensePeakIndex[FeatureIndex] = 0;
+        Info->IntensePeakIntensity[FeatureIndex] = 0; 
+		Info->IntensePeakSkew[FeatureIndex] = 0;
+	}
+}
+
+
+// Carry out parent mass correction on this spectrum.
+void PerformPMC(PMCSpectrumInfo* SpectrumInfo)
+{
+    PMCInfo* Info;
+    
+    //
+#ifdef PMC_USE_SVM
+    LoadPMCSVM(0);
+#else
+    LoadPMCLDA(0);
+#endif
+    ComputePMCFeatures(SpectrumInfo);
+    // If we don't have a model (yet), then give the FIRST mass the best score:
+
+
+    switch (SpectrumInfo->Head->Charge)
+    {
+    case 1:
+        if (!PMCCharge1LDA && !PMCCharge1SVM)
+        {
+            SpectrumInfo->BestInfo = SpectrumInfo->Head;
+            SpectrumInfo->Head->SVMScore = 1.0;
+            return;
+        }
+        break;
+    case 3:
+        if (!PMCCharge3LDA && !PMCCharge3SVM)
+        {
+            SpectrumInfo->BestInfo = SpectrumInfo->Head;
+            SpectrumInfo->Head->SVMScore = 1.0;
+            return;
+        }
+        break;
+    default:
+        if (!PMCCharge2LDA && !PMCCharge2SVM)
+        {
+            SpectrumInfo->BestInfo = SpectrumInfo->Head;
+            SpectrumInfo->Head->SVMScore = 1.0;
+            return;
+        }
+        break;
+    }
+
+    // Apply the machine learning model to each one:
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        if (Info->Charge == 1)
+        {
+#ifdef PMC_USE_SVM
+            Info->SVMScore = SVMClassify(PMCCharge1SVM, Info->Features, 0);
+#else
+            Info->SVMScore = ApplyLDAModel(PMCCharge1LDA, Info->Features);
+#endif
+        }
+        else if (Info->Charge == 2)
+        {
+#ifdef PMC_USE_SVM
+            Info->SVMScore = SVMClassify(PMCCharge2SVM, Info->Features, 0);
+#else
+            Info->SVMScore = ApplyLDAModel(PMCCharge2LDA, Info->Features);
+#endif
+        }
+        else
+        {
+#ifdef PMC_USE_SVM
+            Info->SVMScore = SVMClassify(PMCCharge3SVM, Info->Features, 0);
+#else
+            Info->SVMScore = ApplyLDAModel(PMCCharge3LDA, Info->Features);
+#endif
+        }
+    }
+    // Remember the best one:
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        if (!SpectrumInfo->BestInfo || Info->SVMScore > SpectrumInfo->BestInfo->SVMScore)
+        {
+            SpectrumInfo->BestInfo = Info;
+        }
+    }
+    // Remember the second-best one:
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        //if (Info == SpectrumInfo->BestInfo)
+        //{
+        //    continue;
+        //}
+        if (abs(Info->ParentMass - SpectrumInfo->BestInfo->ParentMass) <= 400)
+        {
+            continue;
+        }
+
+        if (!SpectrumInfo->RunnerUpInfo || (Info->SVMScore > SpectrumInfo->RunnerUpInfo->SVMScore))
+        {
+            SpectrumInfo->RunnerUpInfo = Info;
+        }
+    }
+}
+
+// Load parent mass correction SVM models.
+int LoadPMCSVM()
+{
+    char FilePath[1024];
+    if (PMCCharge1SVM)
+    {
+        return 1;
+    }
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PMCSVM1.model");
+    PMCCharge1SVM = ReadSVMModel(FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PMCSVM1.range");
+    ReadSVMScaling(PMCCharge1SVM, FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PMCSVM2.model");
+    PMCCharge2SVM = ReadSVMModel(FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PMCSVM2.range");
+    ReadSVMScaling(PMCCharge2SVM, FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PMCSVM3.model");
+    PMCCharge3SVM = ReadSVMModel(FilePath);
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "PMCSVM3.range");
+    ReadSVMScaling(PMCCharge3SVM, FilePath);
+    return 1;
+}
+
+// Build a PMCSpectrumInfo instance for the spectrum.  Assumes the charge state is set.
+PMCSpectrumInfo* GetPMCSpectrumInfo(MSSpectrum* Spectrum)
+{
+    PMCSpectrumInfo* SpectrumInfo;
+    float SelfConvolve;
+    int PeakIndex;
+    int Bin;
+    float Intensity;
+    //
+    SpectrumInfo = (PMCSpectrumInfo*)calloc(1, sizeof(PMCSpectrumInfo));
+    SpectrumInfo->Spectrum = Spectrum;
+    SpectrumInfo->Charge = Spectrum->Charge;
+    SpectrumInfo->Mass = (Spectrum->MZ * Spectrum->Charge) - ((Spectrum->Charge - 1) * HYDROGEN_MASS);
+
+
+    //printf("A2\n");
+    //fflush(stdout);
+
+    // Scale spectrum peaks to a TOTAL intensity of 100:
+    Intensity = 0;
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        Intensity += Spectrum->Peaks[PeakIndex].Intensity;
+    }
+    //printf("B2\n");
+    //fflush(stdout);
+    SpectrumInfo->PeakScalingFactor = 100 / Intensity;
+    SpectrumInfo->PeakScalingFactor *= SpectrumInfo->PeakScalingFactor;
+
+    // Compute self-convolution:
+    SelfConvolve = EPSILON;
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        Bin = (Spectrum->Peaks[PeakIndex].Mass + 50) / 100;
+        if (Bin >= 0 && Bin < Spectrum->IntensityBinCount)
+        {
+            Intensity = Spectrum->BinnedIntensitiesTight[Bin];
+            SelfConvolve += Spectrum->Peaks[PeakIndex].Intensity * Intensity * SpectrumInfo->PeakScalingFactor;
+        }
+    }
+    //printf("C2\n");
+    //fflush(stdout);
+    SpectrumInfo->SelfConvolution = SelfConvolve;
+    //printf("D2\n");
+    //fflush(stdout);
+    return SpectrumInfo;
+}
+
+float SpectrumGetSelfConvolution(MSSpectrum* Spectrum, PMCSpectrumInfo* SpectrumInfo, int Offset, int DoublyChargedFlag)
+{
+    SelfConvolutionNode* Node;
+    SelfConvolutionNode* OldNode;
+    
+    int PeakIndex;
+    int OtherMass;
+    int Bin;
+    float Product;
+    int VerboseFlag = 0;
+    float Convolution;
+    int HashIndex;
+    //
+
+    HashIndex = abs(Offset / 100) % SC_HASH_SIZE;
+    // If the self-convolution has already been computed, then we simply look it up:
+    if (DoublyChargedFlag)
+    {
+        Node = SpectrumInfo->SC2Hash[HashIndex];
+    }
+    else
+    {
+        Node = SpectrumInfo->SCHash[HashIndex];
+    }
+    for (; Node; Node = Node->Next)
+    {
+        if (Node->MassOffset == Offset)
+        {
+            //printf("SGSC%d: Return already-computed for offset %d\n", DoublyChargedFlag, Offset);
+            return Node->Value;
+        }
+    }
+
+    //printf("SGSC%d: Compute for offset %d\n", DoublyChargedFlag, Offset);
+    // Compute convolution value for these parameters:
+    Convolution = 0;
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        if (DoublyChargedFlag)
+        {
+            OtherMass = SpectrumInfo->Mass + 2 * HYDROGEN_MASS - (2 * Spectrum->Peaks[PeakIndex].Mass) + Offset;
+        }
+        else
+        {
+            OtherMass = SpectrumInfo->Mass + HYDROGEN_MASS - Spectrum->Peaks[PeakIndex].Mass + Offset;
+        }
+        Bin = ((OtherMass + 50) / 100);
+        if (Bin < 0 || Bin >= Spectrum->IntensityBinCount)
+        {
+            continue;
+        }
+        Product = Spectrum->Peaks[PeakIndex].Intensity * Spectrum->BinnedIntensitiesTight[Bin] * SpectrumInfo->PeakScalingFactor;
+        if (VerboseFlag && Product)
+        {
+            printf("Peak@%.2f and binned intensity %d (%.2f) -> %.5f\n", Spectrum->Peaks[PeakIndex].Mass / (float)DALTON,
+                Bin, OtherMass / (float)DALTON, Product);
+        }
+        Convolution += Product;
+    }
+    Node = (SelfConvolutionNode*)calloc(1, sizeof(SelfConvolutionNode));
+    Node->MassOffset = Offset;
+    Node->Value = Convolution / SpectrumInfo->SelfConvolution;
+    if (DoublyChargedFlag)
+    {
+        if (SpectrumInfo->SC2Hash[HashIndex])
+        {
+            OldNode = SpectrumInfo->SC2Hash[HashIndex];
+            while (OldNode->Next)
+            {
+                OldNode = OldNode->Next;
+            }
+            OldNode->Next = Node;
+        }
+        else
+        {
+            SpectrumInfo->SC2Hash[HashIndex] = Node;
+        }
+    }
+    else
+    {
+        if (SpectrumInfo->SCHash[HashIndex])
+        {
+            OldNode = SpectrumInfo->SCHash[HashIndex];
+            while (OldNode->Next)
+            {
+                OldNode = OldNode->Next;
+            }
+            OldNode->Next = Node;
+        }
+        else
+        {
+            SpectrumInfo->SCHash[HashIndex] = Node;
+        }
+    }
+    return Node->Value;
+}
+
+
+// ConvolveMassCorrectedSpectrum computes self-convolution for the given charge 
+// and parent mass.  
+void ConvolveMassCorrectedSpectrum(PMCInfo* Info, PMCSpectrumInfo* SpectrumInfo)
+{
+    MSSpectrum* Spectrum;
+    int OffsetIndex;
+    int VerboseFlag = 0;
+    int OverallOffset;
+
+    // Offsets consists of some masses where we expect LARGE self-convolution, followed by others where we expect SMALL
+    // self-convolution:
+    int Offsets[SELF_CONVOLVE_OFFSETS] = {-18 * DALTON, -17 * DALTON, 0 * DALTON, 1 * DALTON,
+        -1 * DALTON, (int)(0.5 * DALTON), (int)(-16.5 * DALTON)};
+    int Offsets2[SELF_CONVOLVE2_OFFSETS] = {(int)(0.4 * DALTON), (int)(1.2 * DALTON), (int)(-17.5 * DALTON),
+        -1 * DALTON, 4 * DALTON};
+	
+	if(GlobalOptions->PhosphorylationFlag)
+	{//for phos searches, these offsets produce much better results.
+		Offsets2[0] = (int)(0.2 * DALTON);
+		Offsets2[2] = (int)(-18.0 * DALTON);
+	}
+
+    //
+    Spectrum = SpectrumInfo->Spectrum;
+    if (!Spectrum->BinnedIntensities) // move to caller!
+    {
+        REPORT_ERROR_S(4, "Error in ConvolveMassCorrectedSpectrum(): Spectrum binned intensities not set.\n");
+        return;
+    }
+    for (OffsetIndex = 0; OffsetIndex < SELF_CONVOLVE_OFFSETS; OffsetIndex++)
+    {
+        if (VerboseFlag)
+        {
+            printf("\n>>>Offset %d: %.2f\n", OffsetIndex, Offsets[OffsetIndex] / (float)DALTON);
+        }
+        OverallOffset = Offsets[OffsetIndex] + (Info->ParentMass - SpectrumInfo->Mass);
+        Info->Convolve[OffsetIndex] = SpectrumGetSelfConvolution(Spectrum, SpectrumInfo, OverallOffset, 0);
+
+        //// Compute convolution value for these parameters:
+        //Convolution = 0;
+        //for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+        //{
+        //    OtherMass = Info->ParentMass + HYDROGEN_MASS - Spectrum->Peaks[PeakIndex].Mass + Offsets[OffsetIndex];
+        //    Bin = ((OtherMass + 50) / 100);
+        //    if (Bin < 0 || Bin >= Spectrum->IntensityBinCount)
+        //    {
+        //        continue;
+        //    }
+        //    Product = Spectrum->Peaks[PeakIndex].Intensity * Spectrum->BinnedIntensitiesTight[Bin] * SpectrumInfo->PeakScalingFactor;
+        //    if (VerboseFlag && Product)
+        //    {
+        //        printf("Peak@%.2f and binned intensity %d (%.2f) -> %.5f\n", Spectrum->Peaks[PeakIndex].Mass / (float)DALTON,
+        //            Bin, OtherMass / (float)DALTON, Product);
+        //    }
+        //    Convolution += Product;
+        //}
+        //Info->Convolve[OffsetIndex] = Convolution / SpectrumInfo->SelfConvolution;
+        //if (VerboseFlag)
+        //{
+        //    printf(">>Convolve[%d] = %.4f\n", OffsetIndex, Convolution);
+        //}
+    }
+    
+    if (Spectrum->Charge > 1)//I compute these values for phos charge 2, but don't use them for the PMC model.  
+    {                        //they do go into the ChargeCorrection model, so they are still calculated.
+        // Compute convolution of charge-1 and charge-2 peaks:
+        for (OffsetIndex = 0; OffsetIndex < SELF_CONVOLVE2_OFFSETS; OffsetIndex++)
+        {
+            OverallOffset = Offsets2[OffsetIndex] + (Info->ParentMass - SpectrumInfo->Mass);
+            Info->Convolve2[OffsetIndex] = SpectrumGetSelfConvolution(Spectrum, SpectrumInfo, OverallOffset, 1);
+            //Convolution = 0;
+            //for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+            //{
+            //    OtherMass = Info->ParentMass + 2 * HYDROGEN_MASS - (2 * Spectrum->Peaks[PeakIndex].Mass) + Offsets2[OffsetIndex];
+            //    Bin = ((OtherMass + 50) / 100);
+            //    if (Bin < 0 || Bin >= Spectrum->IntensityBinCount)
+            //    {
+            //        continue;
+            //    }
+            //    Convolution += Spectrum->Peaks[PeakIndex].Intensity * Spectrum->BinnedIntensitiesTight[Bin] * SpectrumInfo->PeakScalingFactor;
+            //}
+            //Info->Convolve2[OffsetIndex] = Convolution / SpectrumInfo->SelfConvolution;
+        }
+    }
+}
diff --git a/ParentMass.h b/ParentMass.h
new file mode 100644
index 0000000..1e5e691
--- /dev/null
+++ b/ParentMass.h
@@ -0,0 +1,105 @@
+//Title:          ParentMass.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef PARENT_MASS_H
+#define PARENT_MASS_H
+
+
+
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include "Utils.h"
+#include "Inspect.h"
+#include "Spectrum.h"
+
+#define SELF_CONVOLVE_OFFSETS 7
+#define SELF_CONVOLVE2_OFFSETS 5
+
+// A linked list of self-convolutions for a spectrum.  
+// We keep this list in the PMCSpectrumInfo, because many
+// of the PMCInfo objects will re-use the same self-convolutions;
+// it's expensive to re-compute them.
+typedef struct SelfConvolutionNode
+{
+    int MassOffset;
+    float Value;
+    struct SelfConvolutionNode* Next;
+} SelfConvolutionNode;
+
+#define SC_HASH_SIZE 64
+// PMCSpectrumInfo is data used during parent mass correction; here we store
+// intermediate values which are general across the whole spectrum, so that
+// we needn't re-compute them for each PMCInfo
+typedef struct PMCSpectrumInfo
+{ 
+    MSSpectrum* Spectrum;
+    int Charge;
+    float PeakScalingFactor;
+    float SelfConvolution;
+    struct PMCInfo* Head;
+    struct PMCInfo* Tail;
+    struct PMCInfo* BestInfo;
+    struct PMCInfo* RunnerUpInfo;
+    int Mass; // base mass, from the file
+    SelfConvolutionNode* SCHash[SC_HASH_SIZE];
+    //SelfConvolutionNode* SCTail;
+    //SelfConvolutionNode* SC2Head;
+    SelfConvolutionNode* SC2Hash[SC_HASH_SIZE];
+} PMCSpectrumInfo;
+
+// We allocate one PMCInfo struct for each candidate parent mass.  We store 
+// the SVM features here, along with the mass and other bookkeeping info.
+// The PMCInfo structs are kept in a list.  The final tweaks that we keep are 
+// the best 1..3 PMCInfos
+typedef struct PMCInfo
+{
+    int Charge;
+    int ParentMass;
+    
+    float Features[64];
+    float Convolve[SELF_CONVOLVE_OFFSETS];
+    float Convolve2[SELF_CONVOLVE_OFFSETS];
+    float SVMScore;
+    struct PMCInfo* Next;
+    int IntensePeakIndex[6];//these are for keeping track of possible M-p related peaks, which are superintense
+    float IntensePeakIntensity[6]; //ratio to mean
+    int IntensePeakSkew[6]; //0 = M-p, 1 = m-p-h2o, 2 = feature used
+} PMCInfo;
+
+void PerformPMC(PMCSpectrumInfo* SpectrumInfo);
+void FreePMCSpectrumInfo(PMCSpectrumInfo* SpectrumInfo);
+void ComputePMCFeatures(PMCSpectrumInfo* SpectrumInfo);
+PMCSpectrumInfo* GetPMCSpectrumInfo(MSSpectrum* Spectrum);
+void ConvolveMassCorrectedSpectrum(PMCInfo* Info, PMCSpectrumInfo* SpectrumInfo);
+
+#endif // PARENT_MASS_H
diff --git a/ParseInput.c b/ParseInput.c
new file mode 100644
index 0000000..0698661
--- /dev/null
+++ b/ParseInput.c
@@ -0,0 +1,1653 @@
+//Title:          ParseInput.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+// ParseInput.c is responsible for parsing the Inspect input file.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include <locale.h>
+#include "Trie.h"
+#include "Utils.h"
+#include "Spectrum.h"
+#include "Mods.h"
+#include "Score.h"
+#include "Tagger.h"
+#include "FreeMod.h"
+#include "CMemLeak.h"
+#include "SVM.h"
+#include "BN.h"
+#include "Run.h"
+#include "SNP.h"
+#include "SpliceDB.h"
+#include "ChargeState.h"
+#include "Scorpion.h"
+#include "ParseXML.h"
+#include "SpliceScan.h"
+#include "Errors.h"
+#include "IonScoring.h"
+#include "TagFile.h" //ARI_MOD
+//#include "SuffixArray.h"
+
+// If the input file specifies a directory full of spectra, we must iterate over the files in that directory.
+// That works a bit differently on Windows and on Unix.
+#ifdef _WIN32
+#include <windows.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#else
+#include <dirent.h>
+#include <sys/stat.h>
+#endif
+
+// Global variables:
+extern Options* GlobalOptions;
+extern MSSpectrum* Spectrum;
+
+// Array of spectra to be searched.  We put them into an array so that we can qsort
+// them.  (Not crucial, but it's nice to get output in order)
+SpectrumNode* g_BigNodeArray = NULL;
+
+extern StringNode* FirstTagCheckNode;
+extern StringNode* LastTagCheckNode;
+
+void AddDatabase(DatabaseFile* Database)
+{
+    if (!Database)
+    {
+        return;
+    }
+    if (!GlobalOptions->FirstDatabase)
+    {
+        GlobalOptions->FirstDatabase = Database;
+        GlobalOptions->LastDatabase = Database;
+    }
+    else
+    {
+        GlobalOptions->LastDatabase->Next = Database;
+        GlobalOptions->LastDatabase = Database;
+    }
+}
+
+// Parse a FASTA file, and convert it into a .trie file.  This is the same
+// thing as PrepDB.py.
+void PrepareSecondarySequenceFile(char* FileName)
+{
+    FILE* FastaFile;
+    FILE* DBFile;
+    FILE* IndexFile;
+    int Dummy = 0;
+    char Char;
+    int BytesRead;
+    int NameLength = 0;
+    int ReadingName = 0;
+    int TargetDBPos = 0;
+    int SourceFilePos = 0;
+    char* StarChar = "*";
+    char* NullChar = "\0";
+    char TempDBName[MAX_FILENAME_LEN + 1];
+    
+    DatabaseFile* Database;
+    //
+    FastaFile = fopen(FileName, "rb");
+    if (!FastaFile)
+    {
+      printf("Couldn't open %s\n",FileName);
+      /// If what we got looks like a complete path, then keep the path:
+      if (FileName[0]=='/' || FileName[0]=='.' || FileName[1]==':')
+	{
+	  REPORT_ERROR_S(8, FileName);
+	  return;
+
+	}
+      else
+	{
+	  // Otherwise, go to $resourcedir\database
+	  sprintf(TempDBName, "%sDatabase%c%s", GlobalOptions->ResourceDir, SEPARATOR, FileName);
+    
+	  FastaFile = fopen(TempDBName,"rb");
+	  if(!FastaFile)
+	    {
+	      //if not in /Database, look in just the resourcedir
+	      sprintf(TempDBName, "%s%s", GlobalOptions->ResourceDir,FileName);
+	      
+	      FastaFile = fopen(TempDBName,"rb");
+	    }
+	}
+      if(!FastaFile)
+	{
+	  REPORT_ERROR_S(8, FileName);
+	  return;
+	}
+
+    }
+
+      
+      
+    DBFile = fopen("AdditionalDB.trie", "wb");
+    IndexFile = fopen("AdditionalDB.index", "wb");
+    if (!DBFile || !IndexFile)
+    {
+        printf("Unable to write out processed secondary database!  Skipping.\n");
+        return;
+    }
+    while (1)
+    {
+        BytesRead = ReadBinary(&Char, sizeof(char), 1, FastaFile);
+        if (!BytesRead)
+        {
+            break;
+        }
+        if (Char == '>')
+        {
+            ReadingName = 1;
+            if (TargetDBPos)
+            {
+                WriteBinary(StarChar, sizeof(char), 1, DBFile);
+                TargetDBPos++;
+            }
+            WriteBinary(&SourceFilePos, sizeof(int), 1, IndexFile);
+            // Source file pos is a long long; assume we must write another 4 bytes:
+            WriteBinary(&Dummy, sizeof(int), 1, IndexFile);
+            WriteBinary(&TargetDBPos, sizeof(int), 1, IndexFile);
+            NameLength = 0;
+            continue;
+        }
+        if (Char == '\r' || Char == '\n')
+        {
+            if (ReadingName)
+            {
+                // Pad the protein name out:
+                while (NameLength < 80)
+                {
+                    WriteBinary(NullChar, sizeof(char), 1, IndexFile);
+                    NameLength++;
+                }
+
+            }
+            ReadingName = 0;
+            continue;
+        }
+        if (ReadingName)
+        {
+            if (NameLength < 79)
+            {
+                WriteBinary(&Char, sizeof(char), 1, IndexFile);
+                NameLength++;
+            }
+
+            continue;
+        }
+        if (Char == ' ' || Char == '\t' || Char == '*')
+        {
+            continue;
+        }
+        WriteBinary(&Char, sizeof(char), 1, DBFile);
+        TargetDBPos++;
+    }
+    fclose(DBFile);
+    fclose(IndexFile);
+    Database = (DatabaseFile*)calloc(1, sizeof(DatabaseFile));
+    strcpy(Database->FileName, "AdditionalDB.trie");
+    Database->Type = evDBTypeTrie;
+    AddDatabase(Database);
+}
+
+// Free the linked list of TagCheckNodes
+void FreeTagCheckNodes()
+{
+    StringNode* Node;
+    StringNode* Prev = NULL;
+    for (Node = FirstTagCheckNode; Node; Node = Node->Next)
+    {
+        if (Prev)
+        {
+            SafeFree(Prev->String);
+            SafeFree(Prev);
+        }
+        Prev = Node;
+    }
+    if (Prev)
+    {
+        SafeFree(Prev->String);
+        SafeFree(Prev);
+    }
+}
+
+void FreeInputFileNodes()
+{
+    InputFileNode* Node;
+    InputFileNode* Prev = NULL;
+
+    // At this point, freeing doesn't matter much, since this function's called
+    // just before program exit.
+    for (Node = GlobalOptions->FirstFile; Node; Node = Node->Next)
+    {
+        SafeFree(Prev);
+        Prev = Node;
+    }
+    SafeFree(Prev);
+}
+
+char* ProteaseNames[] = {"none", "trypsin", "chymotrypsin", "lysc", "aspn", "gluc"};
+
+typedef struct ParseSpectraFromFileInfo
+{
+    int FirstScan;
+    int LastScan;
+  int ScanNumber; //This is a user defined number attached to each spectrum.  In mzXML files, this is read from the field scanNumber, but for other files
+  //it is a 0-based indexing 
+  int SpecIndex; //This is the 1-based index of the spectrum in the file.  In mzXML files, the MS1 scans are not counted.
+    InputFileNode* InputFile;
+} ParseSpectraFromFileInfo;
+
+// Callback for ParseSpectraFromMS2File: Handle one line of an .ms2 spectrum file.
+int ParseSpectraFromMS2FileCallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    ParseSpectraFromFileInfo* Info;
+    //
+    Info = (ParseSpectraFromFileInfo*)UserData;
+    if (LineBuffer[0] == ':')
+    {
+        Info->InputFile->Format = SPECTRUM_FORMAT_MS2_COLONS;
+        Info->ScanNumber = atoi(LineBuffer + 1);
+	
+        if (Info->ScanNumber >= Info->FirstScan && (Info->LastScan < 0 || Info->ScanNumber < Info->LastScan))
+        {
+	  AddSpectrumToList(Info->InputFile, FilePos, Info->ScanNumber, Info->SpecIndex);
+        }
+	Info->SpecIndex = Info->SpecIndex += 1;
+    }
+    if (LineBuffer[0] == 'S' && (LineBuffer[1] == ' ' || LineBuffer[1] == '\t'))
+    {
+		// Start of a spectrum:
+        Info->ScanNumber = atoi(LineBuffer + 1);
+	Info->InputFile->Format = SPECTRUM_FORMAT_MS2;	
+	
+
+        if (Info->ScanNumber >= Info->FirstScan && (Info->LastScan < 0 || Info->ScanNumber < Info->LastScan))
+        {
+	  AddSpectrumToList(Info->InputFile, FilePos, Info->ScanNumber, Info->SpecIndex);
+        }
+	Info->SpecIndex = Info->SpecIndex += 1;
+    }
+    if (LineBuffer[0] == 'Z' && (LineBuffer[1] == ' ' || LineBuffer[1] == '\t'))
+    {
+        //// This is the start of a spectrum:
+        //Info->InputFile->Format = SPECTRUM_FORMAT_MS2;
+        //if (Info->ScanNumber >= Info->FirstScan && (Info->LastScan < 0 || Info->ScanNumber < Info->LastScan))
+        //{
+        //    AddSpectrumToList(Info->InputFile, FilePos, Info->ScanNumber);
+        //}
+    }
+    return 1;
+}
+
+// Iterate over lines in the MS2 file.
+// If you reach a line of the form "Z [charge] [mass]", that's the beginning of a spectrum record.
+// The spectrum code knows that it should process the first Z it sees, skip any others, then process peaks
+// until it sees something that is not a peak.
+void ParseSpectraFromMS2File(char* FileName, InputFileNode* InputFile, int FirstScan, int LastScan)
+{
+    FILE* MS2File;
+    ParseSpectraFromFileInfo Info;
+    //
+    MS2File = fopen(FileName, "rb");
+    if (!MS2File)
+    {
+        REPORT_ERROR_S(8, FileName);
+        return;
+    }
+    printf("Count spectra from '%s'...\n", FileName);
+    Info.FirstScan = FirstScan;
+    Info.LastScan = LastScan;
+    Info.InputFile = InputFile;
+    Info.SpecIndex = 1;
+    ParseFileByLines(MS2File, ParseSpectraFromMS2FileCallback, &Info, 0);
+    fclose(MS2File);
+}
+
+// Callback for ParseSpectraFromMGFFile: Handle one line of an .mgf spectrum file.
+int ParseSpectraFromMGFFileCallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    ParseSpectraFromFileInfo* Info;
+    //
+    Info = (ParseSpectraFromFileInfo*)UserData;
+    if (!strncmp(LineBuffer, "BEGIN IONS", 10))
+    {
+      
+        if (Info->ScanNumber >= Info->FirstScan && (Info->LastScan < 0 || Info->ScanNumber <= Info->LastScan))
+        {
+	  AddSpectrumToList(Info->InputFile, FilePos, Info->ScanNumber, Info->SpecIndex);
+        }
+	Info->SpecIndex++;
+        Info->ScanNumber++;
+    }
+    return 1;
+}
+
+//Callback for ParseSpectraFromCDTAFile: Handle one line of .cdta file
+//Assume: header begins with '=' and the second token after a '.' is the scan number
+int ParseSpectraFromCDTAFileCallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+	ParseSpectraFromFileInfo* Info;
+	char* StrA;
+	char* StrB;
+	char* StrC;
+	char* StrD;
+	
+	int ScanNumber;
+	
+	
+	Info = (ParseSpectraFromFileInfo*)UserData;
+	if(!strncmp(LineBuffer,"=",1))  //This denotes the beginning of a new spectrum
+	{
+		StrA = strtok(LineBuffer,".");
+		StrB = strtok(NULL,".");
+		ScanNumber = atoi(StrB);
+		
+		if (ScanNumber >= Info->FirstScan && (Info->LastScan < 0 || ScanNumber <= Info->LastScan))
+		  {
+		    AddSpectrumToList(Info->InputFile, FilePos, ScanNumber, Info->SpecIndex);
+		  }
+		Info->SpecIndex++;
+	}
+	return 1;
+	
+	
+}
+
+// Callback for ParseSpectraFromPKLFile: Handle one line of a .pkl spectrum file.
+// Assume: If there are three numbers, then this line is the header of a spectrum.
+int ParseSpectraFromPKLFileCallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    ParseSpectraFromFileInfo* Info;
+    char* StrA;
+    char* StrB;
+    char* StrC;
+    float FloatValue;
+    int IntValue;
+    //
+    Info = (ParseSpectraFromFileInfo*)UserData;
+
+    // First, check to see that there are three fields on this line of the file:
+    StrA = strtok(LineBuffer, WHITESPACE);
+    StrB = strtok(NULL, WHITESPACE);
+    if (!StrB)
+    {
+        return 1;
+    }
+    StrC = strtok(NULL, WHITESPACE);
+    if (!StrC)
+    {
+        return 1;
+    }
+    // Now, check to see that the three fields are valid numbers:
+    FloatValue = (float)atof(StrA);
+    if (!FloatValue)
+    {
+        return 1;
+    }
+    FloatValue = (float)atof(StrB);
+    if (!FloatValue)
+    {
+        return 1;
+    }
+    IntValue = atoi(StrC);
+    
+    if (Info->ScanNumber >= Info->FirstScan && (Info->LastScan < 0 || Info->ScanNumber <= Info->LastScan))
+    {
+      AddSpectrumToList(Info->InputFile, FilePos, Info->ScanNumber, Info->SpecIndex);
+    }
+    Info->SpecIndex++;
+    Info->ScanNumber++;
+    return 1;
+}
+
+void ParseSpectraFromPKLFile(char* FileName, InputFileNode* InputFile,
+    int FirstScan, int LastScan)
+{
+    FILE* SpectrumFile;
+    ParseSpectraFromFileInfo Info;
+    //
+    SpectrumFile = fopen(FileName, "rb");
+    if (!SpectrumFile)
+    {
+        REPORT_ERROR_S(8, FileName);
+        return;
+    }
+    printf("Count spectra from '%s'...\n", FileName);
+    Info.FirstScan = FirstScan;
+    Info.LastScan = LastScan;
+    Info.InputFile = InputFile;
+    Info.ScanNumber = 0;
+    Info.SpecIndex = 1;
+    ParseFileByLines(SpectrumFile, ParseSpectraFromPKLFileCallback, &Info, 0);
+    fclose(SpectrumFile);
+}
+
+
+//Iterate over lines in the CDTA file.
+//This format is form of concatenated DTA file, where each DTA file is separated by a line 
+//beginning with =.  Scan numbers are attempted to be parsed from the DTA scan header
+//They are expected to be of the form =======FileName.StartScan.EndScan.Charge.dta==========
+//StartScan is taken to be the scan.
+void ParseSpectraFromCDTAFile(char* FileName, InputFileNode* InputFile, int FirstScan, int LastScan)
+{
+	FILE* CDTAFile;
+	ParseSpectraFromFileInfo Info;
+	
+	CDTAFile = fopen(FileName,"rb");
+	if(!CDTAFile)
+	{
+		REPORT_ERROR_S(8, FileName);
+		return;
+	}
+	printf("Count spectra from '%s'..\n",FileName);
+	Info.FirstScan = FirstScan;
+	Info.LastScan = LastScan;
+	Info.InputFile = InputFile;
+	Info.ScanNumber = 0;
+	Info.SpecIndex = 1;
+	ParseFileByLines(CDTAFile, ParseSpectraFromCDTAFileCallback, &Info,0);
+	fclose(CDTAFile);
+		
+}
+
+// Iterate over lines in the MGF file.
+// If you reach a line of the form "BEGIN IONS", that starts a spectrum record.
+// The spectrum parse code knows that it should process a CHARGE line, a PEPMASS line, then peak lines.
+// Note: Scan numbers from the MGF file are *ignored*!  The first scan we see is number 0,
+// the next is number 1, etc.
+void ParseSpectraFromMGFFile(char* FileName, InputFileNode* InputFile,
+    int FirstScan, int LastScan)
+{
+    FILE* MGFFile;
+    ParseSpectraFromFileInfo Info;
+    //
+    MGFFile = fopen(FileName, "rb");
+    if (!MGFFile)
+    {
+        REPORT_ERROR_S(8, FileName);
+        return;
+    }
+    printf("Count spectra from '%s'...\n", FileName);
+    Info.FirstScan = FirstScan;
+    Info.LastScan = LastScan;
+    Info.InputFile = InputFile;
+    Info.ScanNumber = 0;
+    Info.SpecIndex = 1;
+    ParseFileByLines(MGFFile, ParseSpectraFromMGFFileCallback, &Info, 0);
+    fclose(MGFFile);
+}
+
+void AddSpectrumNodesForFile(char* FileName, InputFileNode* InputFile,
+    int FirstScan, int LastScan)
+{
+    int Format;
+    //
+    // Based upon the file extension, decide whether and how to parse the input file
+    Format = GuessSpectrumFormatFromExtension(FileName);
+    InputFile->Format = Format;
+    switch (Format)
+    {
+    case SPECTRUM_FORMAT_MS2:
+    case SPECTRUM_FORMAT_MS2_COLONS:
+        ParseSpectraFromMS2File(FileName, InputFile, FirstScan, LastScan);
+        break;
+    case SPECTRUM_FORMAT_MZXML:
+        ParseSpectraFromMZXML(FileName, InputFile, FirstScan, LastScan);
+        break;
+    case SPECTRUM_FORMAT_MZDATA:
+        ParseSpectraFromMZData(FileName, InputFile, FirstScan, LastScan);
+        break;
+    case SPECTRUM_FORMAT_MGF:
+        ParseSpectraFromMGFFile(FileName, InputFile, FirstScan, LastScan);
+        break;
+    case SPECTRUM_FORMAT_PKL:
+        ParseSpectraFromPKLFile(FileName, InputFile, FirstScan, LastScan);
+        break;
+    case SPECTRUM_FORMAT_DTA:
+        // Let's assume that we can treat it as a .dta file.
+      AddSpectrumToList(InputFile, 0, 0,1);
+        break;
+    case SPECTRUM_FORMAT_CDTA:
+    	//This is a special flavor of concatenated DTA file (ala PNNL)
+    	ParseSpectraFromCDTAFile(FileName,InputFile,FirstScan,LastScan);
+    	break;
+    default:
+        printf("Not parsing unknown spectrum file format:%s\n", FileName);
+        break;
+    }
+}
+
+// Add a spectrum file to our input list.  If the file contains multiple spectra,
+// then we'll create several SpectrumNode instances.  If FirstScan is set and >0,
+// skip all scans with scan number < FirstScan.  If LastScan is set and >-1, then skip
+// all scans with scan number > LastScan.  (INCLUSIVE ends)
+void AddSpectraToList(char* FileName, int FirstScan, int LastScan)
+{
+    InputFileNode* NewFile;
+    //
+    NewFile = (InputFileNode*)calloc(1, sizeof(InputFileNode));
+    strncpy(NewFile->FileName, FileName, MAX_FILENAME_LEN);
+    if (GlobalOptions->LastFile)
+    {
+        GlobalOptions->LastFile->Next = NewFile;
+    }
+    else
+    {
+        GlobalOptions->FirstFile = NewFile;
+    }
+    GlobalOptions->LastFile = NewFile;
+    AddSpectrumNodesForFile(FileName, NewFile, FirstScan, LastScan);
+    //strncpy(NewNode->FileName, FileName, MAX_FILENAME_LEN);
+
+}
+
+#ifdef _WIN32
+// The WINDOWS way to iterate over a directory:
+void ProcessInputCommandSpectra(char* FileName, int FirstScan, int LastScan)
+{
+    char DirBuffer[1024];
+    char StarBuffer[1024];
+    char FileNameBuffer[1024];
+    struct stat StatBuffer;
+    int StatResult;
+    int Len;
+    int Result;
+    int SkipFile;
+    if (!FileName || !FileName[0])
+    {
+        printf("* Error: null filename specified in 'spectra' command\n");
+        return;
+    }
+    StatResult = stat(FileName, &StatBuffer);
+    if (StatResult < 0)
+    {
+        REPORT_ERROR_S(8, FileName);
+        //printf("Unable to stat '%s' - skipping.\n", FileName);
+        return;
+    }
+    if (StatBuffer.st_mode & _S_IFDIR)
+    {
+        HANDLE hFindFile;
+        WIN32_FIND_DATA wFileFindData;
+        sprintf(DirBuffer, FileName);
+        Len = strlen(FileName);
+        if (DirBuffer[Len-1] != '\\')
+        {
+            strcat(DirBuffer, "\\");
+        }
+        sprintf(StarBuffer, "%s*.*", DirBuffer);
+        hFindFile = FindFirstFile(StarBuffer, &wFileFindData);
+        while (hFindFile != INVALID_HANDLE_VALUE)
+        {
+            SkipFile = 0;
+            if (wFileFindData.cFileName[0]=='\0')
+            {
+                SkipFile = 1;
+            }
+            if (wFileFindData.cFileName[0]=='.' && wFileFindData.cFileName[1]=='\0')
+            {
+                SkipFile = 1;
+            }
+            if (wFileFindData.cFileName[0]=='.' && wFileFindData.cFileName[1]=='.' && wFileFindData.cFileName[2]=='\0')
+            {
+                SkipFile = 1;
+            }
+            if (!SkipFile)
+            {
+                sprintf(FileNameBuffer, "%s%s", DirBuffer, wFileFindData.cFileName);
+                StatResult = stat(FileNameBuffer, &StatBuffer);
+                if (StatBuffer.st_mode & _S_IFREG)
+                {
+                    //printf("Adding file to list: '%s'\n", FileNameBuffer);
+                    AddSpectraToList(FileNameBuffer, FirstScan, LastScan);
+                }
+            }
+            Result = FindNextFile(hFindFile, &wFileFindData);
+            if (!Result)
+            {
+                break;
+            }
+        }
+    }
+    else
+    {
+        AddSpectraToList(FileName, FirstScan, LastScan);
+    }
+}
+#else
+// The UNIX way to iterate over a directory:
+void ProcessInputCommandSpectra(char* FileName, int FirstScan, int LastScan)
+{
+    char DirBuffer[1024];
+    char FileNameBuffer[1024];
+    struct stat StatBuffer;
+    DIR *dirp;
+    struct dirent *ep;
+    int StatResult;
+    int Len;
+    if (!FileName || !FileName[0])
+    {
+        printf("* Error: null filename specified in 'spectra' command\n");
+        return;
+    }
+
+    StatResult = stat(FileName, &StatBuffer);
+    if (S_ISDIR(StatBuffer.st_mode))
+    {
+        sprintf(DirBuffer, FileName);
+        Len = strlen(FileName);
+        if (DirBuffer[Len-1] != '/')
+        {
+            strcat(DirBuffer, "/");
+        }
+        dirp = opendir(DirBuffer);
+        while ((ep = readdir(dirp)) != NULL)
+        {
+            Len = strlen(ep->d_name);
+            if (ep->d_name[0]=='\0')
+            {
+                continue;
+            }
+            if (ep->d_name[0]=='.' && ep->d_name[1]=='\0')
+            {
+                continue;
+            }
+            if (ep->d_name[0]=='.' && ep->d_name[1]=='.' && ep->d_name[2]=='\0')
+            {
+                continue;
+            }
+            sprintf(FileNameBuffer, "%s%s", DirBuffer, ep->d_name);
+            StatResult = stat(FileNameBuffer, &StatBuffer);
+            if (S_ISREG(StatBuffer.st_mode))
+            {
+                AddSpectraToList(FileNameBuffer, FirstScan, LastScan);
+            }
+        }
+        closedir(dirp);
+    }
+    else
+    {
+        AddSpectraToList(FileName, FirstScan, LastScan);
+    }
+}
+#endif
+
+#define REJECT_NULL_VALUE(name)\
+{\
+if (!CommandValue || !CommandValue[0]) \
+{\
+    printf("* Error: Null value for '(name)'\n");\
+    return;\
+}\
+}
+
+typedef int (*InputParameterParser)(char* CommandValue);
+
+#define INPUT_VALUE_TYPE_NONE 0
+#define INPUT_VALUE_TYPE_INT 1
+#define INPUT_VALUE_TYPE_STRING 2
+typedef struct InputParameter
+{
+    char* Name;
+    InputParameterParser ParseFunction;
+    int ValueType;
+} InputParameter;
+
+int ParseInputTagCheck(char* Value)
+{
+    StringNode* StrNode;
+    //
+    StrNode = (StringNode*)calloc(1, sizeof(StringNode));
+    StrNode->String = strdup(Value);
+    if (FirstTagCheckNode)
+    {
+        LastTagCheckNode->Next = StrNode;
+    }
+    else
+    {
+        FirstTagCheckNode = StrNode;
+    }
+    LastTagCheckNode = StrNode;
+    return 1;
+}
+
+int ParseInputTagsOnly(char* Value)
+{
+    GlobalOptions->RunMode |= RUN_MODE_TAGS_ONLY;
+    return 1;
+}
+
+int ParseInputExternalTagger(char* Value)
+{
+    GlobalOptions->ExternalTagger = 1;
+    ReadExternalTags(Value,1); //ARI_MOD
+    return 1;
+}
+
+int ParseInputSpectra(char* Value)
+{
+    char* ScanStr;
+    int FirstScan;
+    int LastScan;
+    // Spectrum file:
+    // Note: If the file is a directory, we will iterate over all the files in
+    // the directory.  We don't recurse into subdirectories.
+    //ScanStr = strtok(NULL, ",");
+    FirstScan = 0; //default
+    LastScan = -1; //default
+    ScanStr = strtok(NULL, ",");
+    if (ScanStr)
+    {
+        FirstScan = atoi(ScanStr);
+        ScanStr = strtok(NULL, ",");
+        if (ScanStr)
+        {
+            LastScan = atoi(ScanStr);
+            // LastScan of -1 means no upper limit...but otherwise, LastScan should
+            // not be below FirstScan!
+            if (LastScan < FirstScan && LastScan >= 0)
+            {
+                REPORT_WARNING_II(9, FirstScan, LastScan);
+                return 0;
+            }
+        }
+    }
+    ProcessInputCommandSpectra(Value, FirstScan, LastScan);
+    return 1;
+}
+
+int ParseInputInstrument(char* CommandValue)
+{
+    // Instrument name is LCQ or QTOF or FT-Hybrid.  If QTOF, use a different
+    // scoring model, and don't perform parent-mass correction.
+    if (!CompareStrings(CommandValue, "ESI-ION-TRAP"))
+    {
+        GlobalOptions->InstrumentType = INSTRUMENT_TYPE_LTQ;
+    }
+    else if (!CompareStrings(CommandValue, "QTOF"))
+    {
+        GlobalOptions->InstrumentType = INSTRUMENT_TYPE_QTOF;
+        GlobalOptions->ParentMassPPM = 100;
+    }
+    else if (!CompareStrings(CommandValue, "FT-HYBRID"))
+    {
+        GlobalOptions->InstrumentType = INSTRUMENT_TYPE_FT_HYBRID;
+        GlobalOptions->ParentMassPPM = 100;
+    }
+    else
+    {
+        printf("** Warning: unknown instrument type '%s'\n", CommandValue);
+        return 0;
+    }
+    return 1;
+}
+
+int ParseInputProtease(char* CommandValue)
+{
+    int ProteaseIndex;
+    for (ProteaseIndex = 0; ProteaseIndex < sizeof(ProteaseNames)/sizeof(char*); ProteaseIndex++)
+    {
+        if (!CompareStrings(ProteaseNames[ProteaseIndex], CommandValue))
+        {
+            GlobalOptions->DigestType = ProteaseIndex;
+            return 1;
+        }
+    }
+    printf("* Error: Protease '%s' not understood\n", CommandValue);
+    return 0;
+}
+
+int GuessDBTypeFromExtension(char* FileName)
+{
+    char* Extension;
+    Extension = FileName + strlen(FileName);
+    while (Extension > FileName)
+    {
+        Extension--;
+        if (*Extension == '.')
+        {
+            if (!CompareStrings(Extension, ".ms2db"))
+            {
+                return evDBTypeMS2DB;
+            }
+            if (!CompareStrings(Extension, ".dat"))
+            {
+                return evDBTypeSpliceDB;
+            }
+        }
+    }
+    return evDBTypeTrie; // default guess
+}
+
+int ParseInputDB(char* CommandValue)
+{
+    DatabaseFile* Database;
+    FILE* TempFile;
+    char DBFileName[MAX_FILENAME_LEN + 1];
+  
+    //printf("CommandValue: %s\n",CommandValue);
+    /// If what we got looks like a complete path, then keep the path:
+    if (CommandValue[0]=='/' || CommandValue[0]=='.' || CommandValue[1]==':')
+    {
+        strncpy(DBFileName, CommandValue, MAX_FILENAME_LEN);
+    }
+    else
+    {
+        // Otherwise, go to $resourcedir\database
+        sprintf(DBFileName, "%sDatabase%c%s", GlobalOptions->ResourceDir, SEPARATOR, CommandValue);
+        TempFile = fopen(DBFileName,"rb");
+        if(!TempFile)
+        {
+        	//if not in /Database, look in just the resourcedir
+        	sprintf(DBFileName, "%s%s", GlobalOptions->ResourceDir,CommandValue);
+        }
+        else
+        {
+        	fclose(TempFile);
+        }
+    }
+    printf("DBFileName: %s\n",DBFileName);
+    //To-ju: Putting protein databases in a subfolder of the inspect executable will cause unnatural coupling.
+
+    //strncpy(DBFileName, CommandValue, MAX_FILENAME_LEN);
+
+    
+    TempFile = fopen(DBFileName, "rb");
+    if (!TempFile)
+    {
+        REPORT_ERROR_S(8, DBFileName);
+        return 0;
+    }
+    else
+    {
+        fclose(TempFile);
+    }
+    Database = (DatabaseFile*)calloc(1, sizeof(DatabaseFile));
+    strcpy(Database->FileName, DBFileName);
+    Database->Type = GuessDBTypeFromExtension(Database->FileName);
+    AddDatabase(Database);
+    return 1;
+}
+
+int ParseInputPMTolerance(char* CommandValue)
+{
+  GlobalOptions->ParentMassEpsilon = (int)(strtod(CommandValue,NULL) * MASS_SCALE);
+    return 1;
+}
+
+int ParseInputReportMatches(char* CommandValue)
+{
+    GlobalOptions->ReportMatchCount = atoi(CommandValue);
+    GlobalOptions->ReportMatchCount = min(100, max(1, GlobalOptions->ReportMatchCount));
+    return 1;
+}
+
+int ParseInputRequireTermini(char* CommandValue)
+{
+    int RequireTerminiCount;
+    //
+    RequireTerminiCount = atoi(CommandValue);
+    if (RequireTerminiCount < 0 || RequireTerminiCount > 2)
+    {
+        REPORT_ERROR_I(47, RequireTerminiCount);
+    }
+
+    GlobalOptions->RequireTermini = RequireTerminiCount;
+    return 1;
+}
+
+int ParseInputRequiredMod(char* CommandValue)
+{
+    strncpy(GlobalOptions->MandatoryModName, CommandValue, 256);
+    return 1;
+}
+int ParseInputTagCount(char* CommandValue)
+{
+    GlobalOptions->GenerateTagCount = atoi(CommandValue);
+    return 1;
+}
+
+int ParseInputTagLength(char* CommandValue)
+{
+    GlobalOptions->GenerateTagLength = atoi(CommandValue);
+    if (GlobalOptions->GenerateTagLength <= 0 || GlobalOptions->GenerateTagLength > 6)
+    {
+        REPORT_ERROR_I(38, GlobalOptions->GenerateTagLength);
+        GlobalOptions->GenerateTagLength = DEFAULT_TAG_LENGTH;
+        return 0;
+    }
+    return 1;
+}
+
+int ParseInputIonTolerance(char* CommandValue)
+{
+    //if (!CompareStrings(CommandName, "IonTolerance") || !CompareStrings(CommandName, "Ion_Tolerance"))
+  GlobalOptions->Epsilon = (int)(strtod(CommandValue,NULL) * MASS_SCALE);
+    return 1;
+}
+
+int ParseInputMods(char* CommandValue)
+{
+    GlobalOptions->MaxPTMods = atoi(CommandValue);
+    return 1;
+}
+
+int ParseInputFreeMods(char* CommandValue)
+{
+    char Path[MAX_FILENAME_LEN];
+
+    GlobalOptions->MaxPTMods = atoi(CommandValue);
+    // "freemods,1" or "freemods,2" allows mutations plus a rich PTM set.
+    if (GlobalOptions->MaxPTMods && !(GlobalOptions->RunMode & RUN_MODE_BLIND))
+    {
+        GlobalOptions->RunMode |= RUN_MODE_MUTATION;
+        GlobalOptions->PhosphorylationFlag = 1;
+        //sprintf(Path, "%s%s", GlobalOptions->ResourceDir, FILENAME_MASS_DELTAS);
+        //LoadMassDeltas(Path,  GlobalOptions->RunMode & RUN_MODE_MUTATION);
+    }
+    return 1;
+}
+
+int ParseInputLogOdds(char* CommandValue)
+{
+  float LogOdds = atof(CommandValue);
+  GlobalOptions->MinLogOddsForMutation = LogOdds;
+  return 1;
+}
+
+/*int ParseInputSuffixArrayBuild(char * CommandValue)
+{
+  
+  int ret;
+
+  ret = buildSuffixArray(CommandValue,NULL);
+  exit(ret);
+
+}
+*/
+int ParseInputMutationMode(char* CommandValue)
+{
+    char Path[MAX_FILENAME_LEN];
+
+    int AA;
+    int ModFlags = 0;
+    char StrAminos[2];
+    float MassDelta;
+    char StrName[5];
+
+    FILE* MassDeltaFile;
+
+    AllPTModCount = 0;
+
+    //THIS IS FIXED, WE ONLY ALLOW 1 MUTATION PER PEPTIDE!
+    GlobalOptions->MaxPTMods = 1;
+
+
+    //printf("MaxPTMods: %d\n",GlobalOptions->MaxPTMods);
+    // "mutationMode,1 or mutationMode,2 allows 1 or 2 mutations per peptide
+    if (GlobalOptions->MaxPTMods && !(GlobalOptions->RunMode & RUN_MODE_BLIND))
+    {
+        GlobalOptions->RunMode |= RUN_MODE_TAG_MUTATION;
+	//printf("MaxPTMods: %d\n",GlobalOptions->MaxPTMods);
+        GlobalOptions->PhosphorylationFlag = 1;	
+    }
+    return 1;
+}
+
+int ParseInputPMCOnly(char* CommandValue)
+{
+    if (atoi(CommandValue))
+    {
+        GlobalOptions->RunMode |= RUN_MODE_PMC_ONLY;
+    }
+    return 1;
+}
+
+int ParseInputNoScoring(char* CommandValue)
+{
+  GlobalOptions->RunMode |= RUN_MODE_RAW_OUTPUT;
+  
+  return 1;
+}
+int ParseInputTagless(char* CommandValue)
+{
+    GlobalOptions->TaglessSearchFlag = atoi(CommandValue);
+    return 1;
+}
+int ParseInputBlind(char* CommandValue)
+{
+    if (atoi(CommandValue))
+    {
+        GlobalOptions->RunMode |= RUN_MODE_BLIND;
+    }
+    return 1;
+}
+int ParseInputBlindTagging(char* CommandValue)
+{
+  
+    if (atoi(CommandValue))
+    {
+      
+      GlobalOptions->RunMode |= RUN_MODE_BLIND_TAG;
+      
+    }
+    return 1;
+}
+
+// Maximum size, in daltons, of PTMs to consider.  (Blind search only)
+int ParseInputMaxPTMSize(char* CommandValue)
+{
+    GlobalOptions->MaxPTMDelta = atoi(CommandValue);
+    if (GlobalOptions->MaxPTMDelta < 1 || GlobalOptions->MaxPTMDelta >= 2000)
+    {
+        printf("** Error: Invalid maxptmsize '%s' - please select a value between 10 and 2000Da\n", CommandValue);
+        GlobalOptions->MaxPTMDelta = 200;
+        return 0;
+    }
+    GlobalOptions->DeltaBinCount = (GlobalOptions->MaxPTMDelta - GlobalOptions->MinPTMDelta) * 10 + 1;
+    GlobalOptions->DeltasPerAA = max(512, GlobalOptions->DeltaBinCount * 2);
+    return 1;
+}
+
+// Maximum size, in daltons, of PTMs to consider.  (Blind search only)
+int ParseInputMinPTMSize(char* CommandValue)
+{
+    GlobalOptions->MinPTMDelta = atoi(CommandValue);
+    if (GlobalOptions->MinPTMDelta < -2000 || GlobalOptions->MinPTMDelta > 2000)
+    {
+        printf("** Error: Invalid minptmsize '%s' - please select a value between -2000 and 2000Da\n", CommandValue);
+        GlobalOptions->MaxPTMDelta = 200;
+        return 0;
+    }
+    GlobalOptions->DeltaBinCount = (GlobalOptions->MaxPTMDelta - GlobalOptions->MinPTMDelta) * 10 + 1;
+    GlobalOptions->DeltasPerAA = max(512, GlobalOptions->DeltaBinCount * 2);
+    return 1;
+}
+
+// If multicharge flag is set, then ALWAYS try charge correction on spectra.  (Otherwise, do it only
+// if the source file provides no charge, or says the charge is zero)
+int ParseInputMultiCharge(char* CommandValue)
+{
+    GlobalOptions->MultiChargeMode = atoi(CommandValue);
+    return 1;
+}
+int ParseInputXMLStrict(char* CommandValue)
+{
+    GlobalOptions->XMLStrictFlag = atoi(CommandValue);
+    return 1;
+}
+
+void debugPrintPTMStuff()
+{
+  int index = 0;
+  int index2 = 0;
+  int dIndex = 0;
+  printf("AllKnownPTMods:\n");
+  for(index = 0; index < AllPTModCount; ++index)
+    {
+      printf(" [%d]: Name=%s,Mass=%d,Flags=%x\n",index,AllKnownPTMods[index].Name,AllKnownPTMods[index].Mass,AllKnownPTMods[index].Flags);
+      for(index2 = 0; index2 < TRIE_CHILD_COUNT; ++index2)
+	printf("  - Allowed on %c=%d\n",(char)(index2+'A'),AllKnownPTMods[index].Allowed[index2]);
+    }
+
+  printf("\nMassDeltas:\n");
+  for(index= 0; index < TRIE_CHILD_COUNT; ++index)
+    {
+       for (index2 = 0; index2 < GlobalOptions->DeltasPerAA; index2++)
+	 {
+	   if(!MassDeltas[index][index2].Flags)
+	     continue;
+	   printf("[%c][%d] : Delta=%d, RealDelta=%d,Name=%s,Index=%d\n",(char)(index+'A'),index2,MassDeltas[index][index2].Delta,MassDeltas[index][index2].RealDelta,MassDeltas[index][index2].Name,MassDeltas[index][index2].Index);
+	 }
+    }
+  /*
+  printf("\nMassDeltasByIndex:\n");
+  for(index = 0; index < AMINO_ACIDS; ++index)
+    {
+      for(index2 = 0; index2 < MAX_PT_MODTYPE; ++index2)
+	{
+	  dIndex = index*MAX_PT_MODTYPE+index2;
+	  printf("[%d] (AA:%c,index:%d) : Delta=%d,RealDelta=%d,Name=%s,Index=%d\n",dIndex, (char)(index+'A'),index2,MassDeltaByIndex[dIndex]->Delta,MassDeltaByIndex[dIndex]->RealDelta,MassDeltaByIndex[dIndex]->Name,MassDeltaByIndex[dIndex]->Index);
+	}
+	}*/
+  
+}
+
+int ParseInputPRMModel(char* CommandValue)
+{
+    char* StrCharge = NULL;
+    char* FileName;
+    int Charge;
+    //
+    StrCharge = CommandValue;
+    FileName = strtok(CommandValue, ",");
+    Charge = atoi(StrCharge);
+    if (Charge < 2 || Charge > 3)
+    {
+        REPORT_ERROR(46);
+        return 0;
+    }
+    return ReplacePRMScoringModel(Charge, FileName);
+}
+
+int ParseInputTAGModel(char* CommandValue)
+{
+    char* StrCharge = NULL;
+    char* FileName;
+    int Charge;
+    //
+    StrCharge = CommandValue;
+    FileName = strtok(CommandValue, ",");
+    Charge = atoi(StrCharge);
+    if (Charge < 2 || Charge > 3)
+    {
+        REPORT_ERROR(46);
+        return 0;
+    }
+    return ReplaceTAGScoringModel(Charge, FileName);
+}
+
+int ParseInputMod(char* CommandValue)
+{
+    int ModFlags;
+    char* StrMass = NULL;
+    char* StrAminos = NULL;
+    char* StrType = NULL;
+    char* StrName = NULL;
+    float MassDelta;
+    char* Amino;
+    int AminoIndex;
+    int AminoFoundFlag;
+    int Bin;
+    int ModIndex;
+    char ModNameBuffer[64];
+    //
+    if (!MassDeltas)
+    {
+        LoadMassDeltas(NULL, 0);
+    }
+    if (AllPTModCount == MAX_PT_MODTYPE)
+    {
+        // Too many!
+        REPORT_ERROR_S(35, CommandValue);
+        return 0;
+    }
+    ModFlags = DELTA_FLAG_VALID;
+    StrMass = CommandValue;
+    StrAminos = strtok(NULL, ","); // required, can be "*" for no specificity
+    if (!StrAminos || !*StrAminos)
+    {
+        printf("* Error: Modification must have amino acids specified!\n");
+        return 0;
+    }
+    StrType = strtok(NULL, ","); // optional: fix/opt/cterminal/nterminal
+    if (StrType)
+    {
+        StrName = strtok(NULL, ","); // optional: name
+    }
+    if (!StrMass || !StrAminos || !StrAminos[0])
+    {
+        printf("** Error: invalid modification in input file.  Skipping!\n");
+        return 0;
+    }
+    if (strstr(StrAminos, "*"))
+    {
+        StrAminos = "ACDEFGHIKLMNPQRSTVWY";
+    }
+    MassDelta = (float)atof(StrMass);
+    if (MassDelta == 0 || MassDelta > 1000 || MassDelta < -200)
+    {
+        printf("** Error: invalid modification in input file; mass is %.2f.  Skipping!\n", MassDelta);
+        return 0;
+    }
+    // Default modification type is OPTIONAL.
+    if (!StrType)
+    {
+        StrType = "opt";
+    }
+    // Default name is the mass (rounded to integer, with sign indicated)
+    if (!StrName)
+    {
+        if (MassDelta > 0)
+        {
+            sprintf(ModNameBuffer, "%+d", (int)(MassDelta + 0.5));
+        }
+        else
+        {
+            sprintf(ModNameBuffer, "%-d", (int)(MassDelta - 0.5));
+        }
+        StrName = ModNameBuffer;
+    }
+    // If it's a fixed modification, then adjust the amino acid mass:
+    if (!CompareStrings(StrType, "fix") || !CompareStrings(StrType, "fixed"))
+    {
+        for (Amino = StrAminos; *Amino; Amino++)
+        {
+            AminoIndex = *Amino - 'A';
+            if (AminoIndex >= 0 && AminoIndex < TRIE_CHILD_COUNT)
+            {
+                PeptideMass[Amino[0]] += (int)(MassDelta * MASS_SCALE);
+                // We haven't yet called PopulateJumpingHash(), so that's all we need to do
+            }
+        }
+        return 1;
+    }
+    else if (!CompareStrings(StrType, "cterminal") || !CompareStrings(StrType, "c-terminal"))
+    {
+        ModFlags |= DELTA_FLAG_C_TERMINAL;
+    }
+    else if (!CompareStrings(StrType, "nterminal") || !CompareStrings(StrType, "n-terminal"))
+    {
+        ModFlags |= DELTA_FLAG_N_TERMINAL;
+    }
+    else if (!CompareStrings(StrType, "opt") || !CompareStrings(StrType, "optional"))
+    {
+        ; // pass
+    }
+    else
+    {
+        REPORT_ERROR_S(36, StrType);
+    }
+
+    if (!CompareStrings(StrName, "phosphorylation"))
+    {
+        g_PhosphorylationMod = AllPTModCount;
+        GlobalOptions->PhosphorylationFlag = 1;
+        ModFlags |= DELTA_FLAG_PHOSPHORYLATION;
+    }
+    AllKnownPTMods[AllPTModCount].Flags = ModFlags;
+    strncpy(AllKnownPTMods[AllPTModCount].Name, StrName, 40);
+    // Add another modification to each amino acid's mod-array:
+    AminoFoundFlag = 0;
+    for (Amino = StrAminos; *Amino; Amino++)
+    {
+        AminoIndex = *Amino - 'A';
+        if (AminoIndex >= 0 && AminoIndex < TRIE_CHILD_COUNT)
+        {
+            AminoFoundFlag = 1;
+            AllKnownPTMods[AllPTModCount].Allowed[AminoIndex] = 1;
+            // Add to the first still-available slot:
+            for (ModIndex = 0; ModIndex < GlobalOptions->DeltasPerAA; ModIndex++)
+            {
+                if (!MassDeltas[AminoIndex][ModIndex].Flags)
+                {
+                    strncpy(MassDeltas[AminoIndex][ModIndex].Name, StrName, 40);
+                    MassDeltas[AminoIndex][ModIndex].RealDelta = (int)(MassDelta * MASS_SCALE);
+                    ROUND_MASS_TO_DELTA_BIN(MassDelta, Bin);
+                    MassDeltas[AminoIndex][ModIndex].Delta = Bin;
+                    MassDeltas[AminoIndex][ModIndex].Index = AllPTModCount;
+                    MassDeltaByIndex[AminoIndex * MAX_PT_MODTYPE + AllPTModCount] = &MassDeltas[AminoIndex][ModIndex];
+                    MassDeltaByIndex[MDBI_ALL_MODS * MAX_PT_MODTYPE + AllPTModCount] = &MassDeltas[AminoIndex][ModIndex];
+                    MassDeltas[AminoIndex][ModIndex].Flags = ModFlags;
+                    break;
+                }
+            }
+        }
+    }
+    if (!AminoFoundFlag)
+    {
+        REPORT_ERROR_S(37, StrAminos);
+        return 0;
+    }
+    AllKnownPTMods[AllPTModCount].Mass = (int)(MassDelta * MASS_SCALE);
+    g_PTMLimit[AllPTModCount] = 2; // allow 2 per peptide by default
+    // But, only allow ONE c-terminal one:
+    if ((ModFlags & DELTA_FLAG_C_TERMINAL) || (ModFlags & DELTA_FLAG_N_TERMINAL))
+    {
+        g_PTMLimit[AllPTModCount] = 1;
+    }
+    AllPTModCount++;
+    return 1;
+}
+int ParseInputPTM(char* CommandValue)
+{
+    printf("*** The 'ptm' input command is no longer supported - please use 'mod' instead.\n");
+    printf(" (Refer to the documentation for details)\n");
+    return 0;
+}
+int ParseInputSequenceFile(char* CommandValue)
+{
+    PrepareSecondarySequenceFile(CommandValue);
+    return 1;
+}
+
+int ParseInputReadGFF(char* CommandValue)
+{
+    FILE* GFFFile;
+    StringNode* Node;
+    // Check to be sure we can read the file:
+    GFFFile = fopen(CommandValue, "rb");
+    if (!GFFFile)
+    {
+        REPORT_ERROR_S(8, CommandValue);
+    }
+    else
+    {
+        // File is ok - add it to the GFF file list.
+        fclose(GFFFile);
+        Node = (StringNode*)calloc(1, sizeof(StringNode));
+        Node->String = strdup(CommandValue);
+        if (GlobalOptions->LastGFFFileName)
+        {
+            GlobalOptions->LastGFFFileName->Next = Node;
+        }
+        else
+        {
+            GlobalOptions->FirstGFFFileName = Node;
+        }
+        GlobalOptions->LastGFFFileName = Node;
+    }
+    GlobalOptions->RunMode = RUN_MODE_PREP_MS2DB;
+    return 1;
+}
+
+int ParseInputGenomeFile(char* CommandValue)
+{
+    strncpy(GlobalOptions->GenomeFileName, CommandValue, MAX_FILENAME_LEN);
+    GlobalOptions->RunMode = RUN_MODE_PREP_MS2DB;
+    return 1;
+}
+
+int ParseInputChromosomeName(char* CommandValue)
+{
+    strncpy(GlobalOptions->ChromosomeName, CommandValue, 256);
+    return 1;
+}
+
+int ParseInputParentPPM(char* ValueString)
+{
+    int CommandValue = atoi(ValueString);
+    if (CommandValue < 1 || CommandValue > 4000)
+    {
+        REPORT_ERROR_I(44, CommandValue);
+        return 0;
+    }
+    GlobalOptions->ParentMassPPM = CommandValue;
+    return 1;
+}
+
+int ParseInputPeakPPM(char* ValueString)
+{
+    int CommandValue = atoi(ValueString);
+    if (CommandValue < 1 || CommandValue > 1000)
+    {
+        REPORT_ERROR_I(44, CommandValue);
+        return 0;
+    }
+    GlobalOptions->PeakPPM = CommandValue;
+    return 1;
+}
+
+int ParseInputNewScoring(char* Value)
+{
+    GlobalOptions->NewScoring = 1;
+    return 1;
+}
+static const InputParameter InputParameters[] =
+{
+    {"Blind", ParseInputBlind, INPUT_VALUE_TYPE_INT},
+    {"Unrestrictive", ParseInputBlind, INPUT_VALUE_TYPE_INT},
+    {"BlindTagging", ParseInputBlindTagging, INPUT_VALUE_TYPE_INT},
+    {"Database", ParseInputDB, INPUT_VALUE_TYPE_STRING},
+    {"DB", ParseInputDB, INPUT_VALUE_TYPE_STRING},
+    //    {"ExternalTagger", ParseInputExternalTagger, INPUT_VALUE_TYPE_NONE}, //ARI_MOD
+    {"ExternalTagFile",ParseInputExternalTagger,INPUT_VALUE_TYPE_STRING}, //ARI_MOD
+    {"FreeMods", ParseInputFreeMods, INPUT_VALUE_TYPE_INT},
+    {"MutationMode",ParseInputMutationMode,INPUT_VALUE_TYPE_NONE},
+    {"Instrument", ParseInputInstrument, INPUT_VALUE_TYPE_STRING},
+    {"IonTolerance", ParseInputIonTolerance, INPUT_VALUE_TYPE_STRING},
+    {"MaxPTMSize", ParseInputMaxPTMSize, INPUT_VALUE_TYPE_INT},
+    {"MinPTMSize", ParseInputMinPTMSize, INPUT_VALUE_TYPE_INT},
+    {"Mod", ParseInputMod, INPUT_VALUE_TYPE_STRING},
+    {"Mods", ParseInputMods, INPUT_VALUE_TYPE_INT},
+    {"MultiCharge", ParseInputMultiCharge, INPUT_VALUE_TYPE_INT},
+    {"PMCOnly", ParseInputPMCOnly, INPUT_VALUE_TYPE_INT},
+    {"PMTolerance", ParseInputPMTolerance, INPUT_VALUE_TYPE_STRING},
+    {"PM_Tolerance", ParseInputPMTolerance, INPUT_VALUE_TYPE_STRING},  // deprecated
+    {"PRMModel", ParseInputPRMModel, INPUT_VALUE_TYPE_STRING},
+    {"Protease", ParseInputProtease, INPUT_VALUE_TYPE_STRING},
+    {"ReportMatches", ParseInputReportMatches, INPUT_VALUE_TYPE_INT},
+    {"RequireTermini", ParseInputRequireTermini, INPUT_VALUE_TYPE_INT},
+    {"RequiredMod", ParseInputRequiredMod, INPUT_VALUE_TYPE_STRING},
+    {"SequenceFile", ParseInputSequenceFile, INPUT_VALUE_TYPE_STRING},
+    {"Spectra", ParseInputSpectra, INPUT_VALUE_TYPE_STRING},
+    {"TagCheck", ParseInputTagCheck, INPUT_VALUE_TYPE_STRING},
+    {"TagCount", ParseInputTagCount, INPUT_VALUE_TYPE_INT},
+    {"TagCountB", ParseInputTagCount, INPUT_VALUE_TYPE_INT}, // deprecated
+    {"TagLength", ParseInputTagLength, INPUT_VALUE_TYPE_INT},
+    {"TAGModel", ParseInputTAGModel, INPUT_VALUE_TYPE_STRING},
+    {"Tagless", ParseInputTagless, INPUT_VALUE_TYPE_INT},
+    {"TagsOnly", ParseInputTagsOnly, INPUT_VALUE_TYPE_NONE},
+    {"XMLStrict", ParseInputXMLStrict, INPUT_VALUE_TYPE_INT},
+    {"NoScoring",ParseInputNoScoring,INPUT_VALUE_TYPE_NONE},
+
+    // Commands for preparing MS2DB files:
+    {"ReadGFF", ParseInputReadGFF, INPUT_VALUE_TYPE_STRING},
+    {"GenomeFile", ParseInputGenomeFile, INPUT_VALUE_TYPE_STRING},
+    {"ChromosomeName", ParseInputChromosomeName, INPUT_VALUE_TYPE_STRING},
+    {"ParentPPM", ParseInputParentPPM, INPUT_VALUE_TYPE_INT},
+    {"PeakPPM", ParseInputPeakPPM, INPUT_VALUE_TYPE_INT},
+    {"NewScoring",ParseInputNewScoring,INPUT_VALUE_TYPE_NONE},
+    {"MinMutationLogOdds",ParseInputLogOdds,INPUT_VALUE_TYPE_STRING},
+    //{"BuildSuffixArray",ParseInputSuffixArrayBuild,INPUT_VALUE_TYPE_STRING},
+    // Sentinel:
+    {NULL}
+};
+
+// Process one line from the inspect input file.  Lines have the form "command,value".
+int ProcessInputCommand(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    const InputParameter* Parameter;
+    int CommandMatched = 0;
+    int ValueOK = 1;
+    char* CheckChar;
+    char* CommandName;
+    char* Value;
+    //
+    CommandName = strtok(LineBuffer, ",");
+    Value = strtok(NULL, ",");
+    for (Parameter = InputParameters; Parameter->Name; Parameter++)
+    {
+        if (CompareStrings(CommandName, Parameter->Name))
+        {
+            continue;
+        }
+        CommandMatched = 1;
+        // Validate the value:
+        switch (Parameter->ValueType)
+        {
+        case INPUT_VALUE_TYPE_NONE:
+            if (Value && *Value)
+            {
+                REPORT_ERROR_S(39, CommandName);
+                ValueOK = 0;
+            }
+            break;
+        case INPUT_VALUE_TYPE_STRING:
+            if (!Value || !*Value)
+            {
+                REPORT_ERROR_S(40, CommandName);
+                ValueOK = 0;
+            }
+            break;
+        case INPUT_VALUE_TYPE_INT:
+            if (!Value || !*Value)
+            {
+                REPORT_ERROR_S(41, CommandName);
+                ValueOK = 0;
+                break;
+            }
+            for (CheckChar = Value; *CheckChar; CheckChar++)
+            {
+                if (!isdigit(*CheckChar))
+                {
+                    REPORT_ERROR_S(41, CommandName);
+                    ValueOK = 0;
+                    break;
+                }
+            }
+            break;
+        }
+        if (ValueOK)
+        {
+            Parameter->ParseFunction(Value);
+        }
+    }
+    if (!CommandMatched)
+    {
+        REPORT_ERROR_S(13, CommandName);
+    }
+    return 1;
+}
+
+// Parse the input file; return TRUE if successful.
+int ParseInputFile()
+{
+    FILE* InputFile;
+    int ModIndex;
+
+    ///////////////////
+    InputFile = fopen(GlobalOptions->InputFileName, "rb");
+    if (!InputFile)
+    {
+        REPORT_ERROR_S(8, GlobalOptions->InputFileName);
+        return 0;
+    }
+    ParseFileByLines(InputFile, ProcessInputCommand, NULL, 0);
+    fclose(InputFile);
+
+    // PTM processing:
+    if (AllPTModCount && !GlobalOptions->MaxPTMods)
+    {
+        // This is worrisome - the user has defined modifications, but matches are not
+        // permitted to USE modifications.  That is reasonable only under weird circumstances.
+        if (GlobalOptions->RunMode & RUN_MODE_TAGS_ONLY && GlobalOptions->ExternalTagger)
+        {
+            //
+        }
+        else
+        {
+            REPORT_ERROR(34);
+        }
+    }
+    for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+    {
+        g_PTMLimit[ModIndex] = min(g_PTMLimit[ModIndex], GlobalOptions->MaxPTMods);
+    }
+    if (GlobalOptions->MaxPTMods > 2)
+    {
+        if (GlobalOptions->RunMode & (RUN_MODE_MUTATION | RUN_MODE_BLIND))
+        {
+            printf("** Warning: Unrestrictive search with more than two mods is NOT recommended.\n");
+        }
+    }
+    // Set the flanking mass tolerance: Equal to parent mass tolerance plus ion tolerance
+    // plus 0.1
+    GlobalOptions->FlankingMassEpsilon = GlobalOptions->ParentMassEpsilon + GlobalOptions->Epsilon + 10;
+    //debugPrintPTMStuff();
+    if (GlobalOptions->ErrorCount)
+    {
+        return 0;
+    }
+    else
+    {
+        return 1;
+    }
+
+
+}
+
+int CompareSpectrumNodes(const SpectrumNode* NodeA, const SpectrumNode* NodeB)
+{
+    int NameResult;
+    NameResult = strcmp(NodeA->InputFile->FileName, NodeB->InputFile->FileName);
+    if (NameResult)
+    {
+        return NameResult;
+    }
+    //return (strcmp(NodeA->FileName, NodeB->FileName));
+    return (NodeA->FilePosition - NodeB->FilePosition);
+}
+
+// Sort spectra by filename.
+void SortSpectra()
+{
+    SpectrumNode* Node;
+    SpectrumNode* Prev;
+    int NodeIndex;
+    int NodeCount;
+    //
+    if (!GlobalOptions->FirstSpectrum)
+    {
+        return;
+    }
+    g_BigNodeArray = (SpectrumNode*)calloc(GlobalOptions->SpectrumCount, sizeof(SpectrumNode));
+    NodeIndex = 0;
+    for (Node = GlobalOptions->FirstSpectrum; Node; Node = Node->Next)
+    {
+        memcpy(g_BigNodeArray + NodeIndex, Node, sizeof(SpectrumNode));
+        NodeIndex++;
+    }
+    NodeCount = NodeIndex;
+
+    // Free old list:
+    Prev = NULL;
+    for (Node = GlobalOptions->FirstSpectrum; Node; Node = Node->Next)
+    {
+        SafeFree(Prev);
+        Prev = Node;
+    }
+    SafeFree(Prev);
+
+    // Sort array:
+    qsort(g_BigNodeArray, NodeCount, sizeof(SpectrumNode), (QSortCompare)CompareSpectrumNodes);
+    for (NodeIndex = 0; NodeIndex < NodeCount; NodeIndex++)
+    {
+        if (NodeIndex < NodeCount-1)
+        {
+            g_BigNodeArray[NodeIndex].Next = g_BigNodeArray + NodeIndex + 1;
+        }
+        else
+        {
+            g_BigNodeArray[NodeIndex].Next = NULL;
+        }
+    }
+    GlobalOptions->FirstSpectrum = g_BigNodeArray;
+    GlobalOptions->LastSpectrum = g_BigNodeArray + (NodeCount - 1);
+}
+
+
diff --git a/ParseInput.h b/ParseInput.h
new file mode 100644
index 0000000..983e5a6
--- /dev/null
+++ b/ParseInput.h
@@ -0,0 +1,44 @@
+//Title:          ParseInput.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#ifndef PARSE_INPUT_H
+#define PARSE_INPUT_H
+
+
+
+int ParseInputFile();
+void FreeTagCheckNodes();
+void FreeInputFileNodes();
+void SortSpectra();
+int ProcessInputCommand(int LineNumber, int FilePos, char* LineBuffer, void* UserData);
+
+#endif //PARSE_INPUT_H
+
diff --git a/ParseXML.c b/ParseXML.c
new file mode 100644
index 0000000..b910b55
--- /dev/null
+++ b/ParseXML.c
@@ -0,0 +1,1239 @@
+//Title:          ParseXML.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include "Spectrum.h"
+#include "Utils.h"
+#include "Inspect.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+#include "CMemLeak.h"
+#include "Tagger.h"
+#include "base64.h"
+#include "Errors.h"
+#include "expat.h"
+
+#define MZXML_BUFFER_SIZE 102400
+
+typedef enum MZXMLScanState
+{
+    evMZXMLNone = 0,
+    evMZXMLPrecursorMZ,
+    evMZXMLPeaks,
+} MZXMLScanState;
+
+typedef enum MZDataScanState
+{
+    evMZDataNone = 0,
+    evMZDataIonSelection,
+    evMZDataMZArray,
+    evMZDataMZArrayBody,
+    evMZDataIntensityArray,
+    evMZDataIntensityArrayBody
+} MZDataScanState;
+
+#define MZXML_PARSE_LIST_SPECTRA 0
+#define MZXML_PARSE_OBTAIN_PEAKS 1
+
+// The MZXMLParseCursor is used for parsing MZXML and MZDATA formats.
+// It holds the expat Parser object, and a limited amount of parse state
+// (i.e. the current tag).
+typedef struct MZXMLParseCursor
+{
+    int FirstScan;
+    int LastScan;
+    int ScanNumber;
+  int SpecIndex;
+    int ErrorFlag;
+  int Charge; //NEC_MZXML files may contain precursorCharge
+    XML_Parser Parser;
+    int PeakCountAllocation;
+    int PeakBufferSize;
+    int PeakBufferPos;
+    char* PeakBuffer;
+    char* DecodedPeakBuffer;
+    float* Peaks;
+    char PrecursorMZBuffer[256];
+    InputFileNode* InputFile;
+    int State;
+    MSSpectrum* Spectrum;
+    int PeakCount;
+    int ByteOrderLittle;
+    int SpectrumPeaksCompleteFlag;
+    void* XMLBuffer;
+    int Mode;
+    int MSLevel;
+} MZXMLParseCursor;
+
+typedef struct MZDataParseCursor
+{
+    int FirstScan;
+    int LastScan;
+    int ScanNumber;
+  int SpecIndex;
+    int ErrorFlag;
+    XML_Parser Parser;
+    int PeakCountAllocation;
+    int PeakBufferSize;
+    int PeakBufferPos;
+    char* PeakBuffer;
+    char* DecodedPeakBuffer;
+    float* MZBuffer;
+    float* IntensityBuffer;
+    InputFileNode* InputFile;
+    int State;
+    MSSpectrum* Spectrum;
+    int PeakCount;
+    int ByteOrderLittle;
+    int SpectrumPeaksCompleteFlag;
+    void* XMLBuffer;
+    int Mode;
+    float* Peaks;
+    int PrecursorMZ;
+    int SpectrumStartFilePos;
+    int MSLevel;
+    int Precision;
+} MZDataParseCursor;
+
+// We build a single MZXMLCursor when needed, and free it when cleaning up.
+MZXMLParseCursor* g_MZXMLParseCursor = NULL;
+MZDataParseCursor* g_MZDataParseCursor = NULL;
+
+void EndianByteSwap(char* Buffer, int EntrySize)
+{
+    char ByteSwap;
+    int Pos;
+
+    for (Pos = 0; Pos < (EntrySize / 2); Pos++)
+    {
+        ByteSwap = Buffer[Pos];
+        Buffer[Pos] = Buffer[EntrySize - Pos - 1];
+        Buffer[EntrySize - Pos - 1] = ByteSwap;
+    }
+}
+// expat callback: Handle character data in the body of a tag. 
+// The only mzxml body we care about is <peaks>
+void MZXMLCharacterDataHandler(void* UserData, const XML_Char* String, int Length)
+{
+    MZXMLParseCursor* Cursor;
+    int PeakCopySize;
+    //
+    Cursor = (MZXMLParseCursor*)UserData;
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    switch (Cursor->State)
+    {
+    case evMZXMLPrecursorMZ:
+        strncat(Cursor->PrecursorMZBuffer, String, min(Length, 255));
+        break;
+    case evMZXMLPeaks:
+        PeakCopySize = Length;
+        if (Cursor->PeakBufferPos + PeakCopySize >= Cursor->PeakBufferSize)
+        {
+            REPORT_ERROR(0);
+            PeakCopySize = max(0, Cursor->PeakBufferSize - Cursor->PeakBufferPos - 1);
+        }
+        memcpy(Cursor->PeakBuffer + Cursor->PeakBufferPos, String, PeakCopySize);
+        Cursor->PeakBufferPos += PeakCopySize;
+        Cursor->PeakBuffer[Cursor->PeakBufferPos] = '\0';
+        break;
+    // Default behavior is to ignore text:
+    default:
+        break;
+    }
+}
+
+void MZXMLStartScan(MZXMLParseCursor* Cursor, const char** Attributes)
+{
+    int AttributeIndex;
+    const char* Name;
+    const char* Value;
+    int MSLevel = 1;
+    int ScanNumber = -1;
+    int PeakCount = 0;
+    int FilePos;
+    //
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!CompareStrings(Name, "msLevel"))
+        {
+            MSLevel = atoi(Value);
+        }
+        else if (!CompareStrings(Name, "peaksCount"))
+        {
+            PeakCount = atoi(Value);
+        }
+        else if (!CompareStrings(Name, "num"))
+        {
+            ScanNumber = atoi(Value);
+        }
+    }
+    
+    Cursor->ScanNumber = -1;
+    Cursor->PeakBufferPos = 0;
+    Cursor->PeakBuffer[0] = '\0';
+    Cursor->PrecursorMZBuffer[0] = '\0';
+    Cursor->PeakCount = PeakCount;
+    Cursor->MSLevel = MSLevel;
+
+    
+      
+
+    // If it's a level-2 scan with non-trivial peak count, then we should parse it:
+    if (MSLevel >= 2 && PeakCount >= 10)
+    {
+        if (ScanNumber >= Cursor->FirstScan && (Cursor->LastScan < 0 || ScanNumber <= Cursor->LastScan))
+        {
+            FilePos = XML_GetCurrentByteIndex(Cursor->Parser);
+            if (Cursor->Mode == MZXML_PARSE_LIST_SPECTRA)
+            {
+	      AddSpectrumToList(Cursor->InputFile, FilePos, ScanNumber, Cursor->SpecIndex);
+            }
+            Cursor->ScanNumber = ScanNumber;
+            // Allocate peak buffer, if necessary:
+            if (PeakCount >= Cursor->PeakCountAllocation)
+            {
+                Cursor->PeakCountAllocation = PeakCount * 2;
+                Cursor->PeakBufferSize = sizeof(double) * 4 * Cursor->PeakCountAllocation;
+                SafeFree(Cursor->PeakBuffer);
+                Cursor->PeakBuffer = (char*)malloc(Cursor->PeakBufferSize);
+                SafeFree(Cursor->Peaks)
+                Cursor->Peaks = (float*)malloc(sizeof(float) * 2 * Cursor->PeakCountAllocation);
+                SafeFree(Cursor->DecodedPeakBuffer);
+                Cursor->DecodedPeakBuffer = (char*)malloc(Cursor->PeakBufferSize);
+            }
+        }
+    }
+    if(MSLevel >= 2)
+      Cursor->SpecIndex++;
+}
+
+// Callback for reaching </peaks> in an mzXML parser - decode the peak array!
+void MZXMLFinishPeaks(MZXMLParseCursor* Cursor, MSSpectrum* Spectrum)
+{
+    int Trail;
+    int FloatIndex;
+    int PeakCount;
+    int PeakIndex;
+    float Value;
+    float RawMass;
+    //
+
+    PeakCount = Cursor->PeakCount;
+
+	Trail = (PeakCount % 3);
+    if (!(PeakCount % 3))
+    {
+        Cursor->PeakBuffer[PeakCount * 32/3] = '\0';
+    }
+    else
+    {
+        Cursor->PeakBuffer[(PeakCount * 32/3) + Trail + 1] = '\0';
+    }
+    b64_decode_mio(Cursor->PeakBuffer, Cursor->DecodedPeakBuffer);
+    for (FloatIndex = 0; FloatIndex < (2 * PeakCount); FloatIndex++)
+    {
+#ifdef BYTEORDER_LITTLE_ENDIAN
+        if (!Cursor->ByteOrderLittle)
+        {
+            EndianByteSwap(Cursor->DecodedPeakBuffer + (FloatIndex * 4), 4);
+        }
+#else
+        if (Cursor->ByteOrderLittle)
+        {
+            EndianByteSwap(Cursor->DecodedPeakBuffer + (FloatIndex * 4), 4);
+        }
+#endif
+        memcpy(Cursor->Peaks + FloatIndex, Cursor->DecodedPeakBuffer + FloatIndex * 4, 4);
+    }
+
+    Spectrum->PeakCount = PeakCount;
+    Spectrum->PeakAllocation = PeakCount;
+    Spectrum->Peaks = (SpectralPeak*)calloc(sizeof(SpectralPeak), PeakCount);
+    if (!Spectrum->Peaks)
+    {
+        REPORT_ERROR_I(49, sizeof(SpectralPeak) * PeakCount);
+    }
+
+    for (PeakIndex = 0; PeakIndex < PeakCount; PeakIndex++)
+    {
+      
+        Value = Cursor->Peaks[PeakIndex * 2];
+	RawMass = Value;
+        Spectrum->Peaks[PeakIndex].Mass = (int)(Value * MASS_SCALE + 0.5);
+	
+        Value = Cursor->Peaks[PeakIndex * 2 + 1];
+        Spectrum->Peaks[PeakIndex].Intensity = Value;
+	
+    }
+    if (Spectrum->Peaks[0].Mass < -1 || Spectrum->Peaks[0].Mass > (GlobalOptions->DynamicRangeMax + GlobalOptions->Epsilon))
+    {
+            
+        REPORT_WARNING_SII(45, Spectrum->Node->InputFile->FileName, Spectrum->Node->ScanNumber,
+            Spectrum->Peaks[0].Mass / MASS_SCALE);
+    }
+    if (Spectrum->Peaks[Spectrum->PeakCount - 1].Mass < -1 || Spectrum->Peaks[Spectrum->PeakCount - 1].Mass > (GlobalOptions->DynamicRangeMax + GlobalOptions->Epsilon))
+    {
+        REPORT_WARNING_SII(45, Spectrum->Node->InputFile->FileName, Spectrum->Node->ScanNumber,
+            Spectrum->Peaks[Spectrum->PeakCount - 1].Mass / MASS_SCALE);
+    }
+    if (Spectrum->Peaks[0].Intensity < 0)
+    {
+        REPORT_WARNING_SIF(45, Spectrum->Node->InputFile->FileName, Spectrum->Node->ScanNumber,
+            Spectrum->Peaks[0].Intensity);
+    }
+    if (Spectrum->Peaks[Spectrum->PeakCount - 1].Intensity < 0)
+    {
+        REPORT_WARNING_SIF(45, Spectrum->Node->InputFile->FileName, Spectrum->Node->ScanNumber,
+            Spectrum->Peaks[0].Intensity);
+    }
+
+    Cursor->State = evMZXMLNone;
+    Cursor->SpectrumPeaksCompleteFlag = 1;
+    // After the end of the <peaks> flag, this scan ends.  
+    // Nuke the handlers, so we can finish off the buffer in peace.
+    XML_SetElementHandler(Cursor->Parser, NULL, NULL);
+    XML_SetCharacterDataHandler(Cursor->Parser, NULL);
+    XML_SetProcessingInstructionHandler(Cursor->Parser, NULL);
+}
+
+// expat callback: End a tag.
+void MZXMLEndElement(void* UserData, const char* Tag)
+{
+    MZXMLParseCursor* Cursor;
+    MSSpectrum* Spectrum;
+    //
+    Cursor = (MZXMLParseCursor*)UserData;
+    Spectrum = Cursor->Spectrum;
+    //printf("End '%s'\n", Tag); 
+    // Set the precursor m/z, if appropriate:
+    if (Cursor->State == evMZXMLPrecursorMZ)
+    {
+        if (Spectrum)
+        {
+	  Spectrum->MZ = (int)(MASS_SCALE * strtod(Cursor->PrecursorMZBuffer,NULL));
+	  Spectrum->FileMZ = (int)(MASS_SCALE * strtod(Cursor->PrecursorMZBuffer,NULL));
+	    if(Cursor->Charge != 0 && Cursor->Charge < 6)
+	      {
+		Spectrum->FileCharge[Cursor->Charge] = 1;
+		Spectrum->FileChargeFlag = 1;
+	      }
+	    Spectrum->Charge = Cursor->Charge;
+	    Spectrum->ParentMass = (Spectrum->MZ * Spectrum->Charge) - (Spectrum->Charge - 1)*HYDROGEN_MASS;
+	}
+    }
+
+    // If we just finished <peaks>, and we have a spectrum, then set the peaks.
+    if (Cursor->State != evMZXMLPeaks || !Spectrum)
+    {
+        Cursor->State = evMZXMLNone;
+        return;
+    }
+    MZXMLFinishPeaks(Cursor, Spectrum);
+}
+
+void MZXMLStartPeaks(MZXMLParseCursor* Cursor, const char** Attributes)
+{
+    const char* Name;
+    const char* Value;
+    int ScanNumber = -1;
+    int PeakCount = 0;
+    int AttributeIndex;
+    //
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    if (Cursor->MSLevel < 2)
+    {
+        return; // we don't care about peaks at level 1
+    }
+    
+    Cursor->State = evMZXMLPeaks;
+    Cursor->PeakBuffer[0] = '\0';
+    Cursor->PeakBufferPos = 0;
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+      Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!CompareStrings(Name, "byteOrder"))
+        {
+            // Parse the byte ordering:
+            if (!CompareStrings(Value, "network"))
+            {
+                Cursor->ByteOrderLittle = 0;
+            }
+            else if (!CompareStrings(Value, "little"))
+            {
+                Cursor->ByteOrderLittle = 0;
+            }
+            else if (!CompareStrings(Value, "big"))
+            {
+                Cursor->ByteOrderLittle = 0;
+            }
+        }
+    }
+}
+
+// expat callback: Handle a tag and its attributes.
+void MZXMLStartElement(void* UserData, const char* Tag, const char** Attributes)
+{
+    MZXMLParseCursor* Cursor;
+    int ExpectedTag = 0;
+
+    //NEC_Added for precursorCharge parsing
+    int AttributeIndex = 0; 
+    const char* Name;
+    const char* Value;
+    //
+    Cursor = (MZXMLParseCursor*)UserData;
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    //printf("Start '%s'\n", Tag);
+    // Switch on our current state, and handle the tags we expect to see in our current state.
+    // Tags we don't expect are ignored (i.e. new tags can be added without breaking the parser)
+    switch (Cursor->State)
+    {
+    default:
+        // If we encounter <scan>, start the new scan:
+        if (!strcmp(Tag, "scan"))
+        {
+            MZXMLStartScan(Cursor, Attributes);
+            return;
+        }
+        if (!strcmp(Tag, "precursorMz"))
+        {
+            Cursor->State = evMZXMLPrecursorMZ;
+	    Cursor->Charge = 0;
+	    for(AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+	      {
+		Name = Attributes[AttributeIndex];
+		Value = Attributes[AttributeIndex + 1];
+		if(!CompareStrings(Name, "precursorCharge"))
+		  {
+		    Cursor->Charge = atoi(Value);
+		  }
+	      }
+	   
+            Cursor->PrecursorMZBuffer[0] = '\0';
+            return;
+        }
+        if (!strcmp(Tag, "peaks"))
+        {
+            MZXMLStartPeaks(Cursor, Attributes);
+            return;
+        }
+        break;
+    }
+}
+
+void MZDataParseMSLevel(MZDataParseCursor* Cursor, const char** Attributes)
+{
+    const char* Name;
+    const char* Value;
+    int AttributeIndex;
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!strcmp(Name, "msLevel"))
+        {
+            Cursor->MSLevel = atoi(Value);
+	    
+		
+        }
+    }
+}
+
+void MZDataGetPrecursorMZ(MZDataParseCursor* Cursor, const char** Attributes)
+{
+    const char* Name;
+    const char* Value;
+    int AttributeIndex;
+    int MassChargeRatioFlag = 0;
+    double FloatValue;
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!strcmp(Name, "name"))
+        {
+            if (!strcmp(Value, "MassToChargeRatio") || !strcmp(Value, "mz"))
+            {
+                MassChargeRatioFlag = 1;
+            }
+            continue;
+        }
+        if (!strcmp(Name, "value"))
+        {
+	  FloatValue = strtod(Value,NULL);
+        }
+    }
+    if (MassChargeRatioFlag)
+    {
+        Cursor->PrecursorMZ = (int)(MASS_SCALE * FloatValue);
+    }
+}
+
+// Look up the number of peaks.  Re-allocate buffers if necessary.
+void MZDataGetPeakCount(MZDataParseCursor* Cursor, const char** Attributes)
+{
+    const char* Name;
+    const char* Value;
+    int AttributeIndex;
+    for (AttributeIndex = 0; Attributes[AttributeIndex]; AttributeIndex += 2)
+    {
+        Name = Attributes[AttributeIndex];
+        Value = Attributes[AttributeIndex + 1];
+        if (!strcmp(Name, "precision"))
+        {
+            Cursor->Precision = atoi(Value);
+        }
+        else if (!strcmp(Name, "endian"))
+        {
+            Cursor->ByteOrderLittle = 0; // default
+            if (!strcmp(Value, "little"))
+            {
+                Cursor->ByteOrderLittle = 1;
+            }
+            else if (!strcmp(Value, "network"))
+            {
+                Cursor->ByteOrderLittle = 0;
+            }
+            else if (!strcmp(Value, "big"))
+            {
+                Cursor->ByteOrderLittle = 0;
+            }
+            continue;
+        }
+        if (!strcmp(Name, "length"))
+        {
+            Cursor->PeakCount = atoi(Value);
+            // Is this more peaks than we can currently handle?
+            if (Cursor->PeakCount >= Cursor->PeakCountAllocation)
+            {
+                Cursor->PeakCountAllocation = Cursor->PeakCount * 2;
+                SafeFree(Cursor->PeakBuffer);
+                Cursor->PeakBufferSize = sizeof(double) * 4 * Cursor->PeakCountAllocation;
+                Cursor->PeakBuffer = (char*)malloc(Cursor->PeakBufferSize);
+                SafeFree(Cursor->DecodedPeakBuffer);
+                Cursor->DecodedPeakBuffer = (char*)malloc(sizeof(float) * Cursor->PeakCountAllocation);
+                SafeFree(Cursor->IntensityBuffer);
+                Cursor->IntensityBuffer = (float*)malloc(sizeof(float) * Cursor->PeakCountAllocation);
+                SafeFree(Cursor->MZBuffer);
+                Cursor->MZBuffer = (float*)malloc(sizeof(float) * Cursor->PeakCountAllocation);
+            }
+            continue;
+        }
+    }
+}
+
+// expat callback: Handle a tag and its attributes.
+void MZDataStartElement(void* UserData, const char* Tag, const char** Attributes)
+{
+    MZDataParseCursor* Cursor;
+    int ExpectedTag = 0;
+    //
+    Cursor = (MZDataParseCursor*)UserData;
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+
+    // Switch on our current state, and handle the tags we expect to see in our current state.
+    // Tags we don't expect are ignored (i.e. new tags can be added without breaking the parser)
+    // If we encounter <spectrum>, start the new scan:
+    if (!strcmp(Tag, "spectrum"))
+    {
+        //MZDataStartScan(Cursor, Attributes);
+        Cursor->SpectrumStartFilePos = XML_GetCurrentByteIndex(Cursor->Parser);
+        Cursor->ScanNumber = atoi(Attributes[1]);
+        return;
+    }
+    // If we encounter <ionSelection>, update our state:
+    if (!strcmp(Tag, "ionSelection"))
+    {
+        Cursor->State = evMZDataIonSelection;
+        return;
+    }
+    // If we encounter <cvParam> within ionSelection, set precursor m/z if possible:
+    if (!strcmp(Tag, "cvParam") && Cursor->State == evMZDataIonSelection)
+    {
+        MZDataGetPrecursorMZ(Cursor, Attributes);
+        return;
+    }
+    
+    if (!strcmp(Tag, "data"))
+    {
+        switch (Cursor->State)
+        {
+        case evMZDataMZArray:
+            Cursor->State = evMZDataMZArrayBody;
+            Cursor->PeakBufferPos = 0;
+            MZDataGetPeakCount(Cursor, Attributes);
+            break;
+        case evMZDataIntensityArray:
+            Cursor->State = evMZDataIntensityArrayBody;
+            Cursor->PeakBufferPos = 0;
+            MZDataGetPeakCount(Cursor, Attributes);
+            break;
+        default:
+            REPORT_ERROR(0);
+            break;
+        }
+        return;
+    }
+    if (!strcmp(Tag, "mzArrayBinary"))   
+    {
+        Cursor->State = evMZDataMZArray;
+        return;
+    }
+    if (!strcmp(Tag, "intenArrayBinary"))   
+    {
+        Cursor->State = evMZDataIntensityArray;
+        return;
+    }
+    if (!strcmp(Tag, "precursor"))
+    {
+        MZDataParseMSLevel(Cursor, Attributes);
+        return;
+    }
+}
+
+void MZDataCompleteSpectrum(MZDataParseCursor* Cursor, MSSpectrum* Spectrum)
+{
+    int PeakIndex;
+
+    if (Cursor->Mode == MZXML_PARSE_LIST_SPECTRA)
+    {
+        if (Cursor->PeakCount >= 10 && Cursor->MSLevel > 1)
+        {
+	  AddSpectrumToList(Cursor->InputFile, Cursor->SpectrumStartFilePos, Cursor->ScanNumber, Cursor->SpecIndex);
+	  Cursor->SpecIndex++;
+        }
+        return;
+    }
+    
+    Spectrum->PeakCount = Cursor->PeakCount;
+    Spectrum->PeakAllocation = Cursor->PeakCount;
+    Spectrum->Peaks = (SpectralPeak*)calloc(sizeof(SpectralPeak), Cursor->PeakCount);
+    Spectrum->MZ = Cursor->PrecursorMZ;
+    Spectrum->FileMZ = Cursor->PrecursorMZ;
+
+    for (PeakIndex = 0; PeakIndex < Cursor->PeakCount; PeakIndex++)
+    {
+        Spectrum->Peaks[PeakIndex].Mass = (int)(Cursor->MZBuffer[PeakIndex] * MASS_SCALE + 0.5);
+        Spectrum->Peaks[PeakIndex].Intensity = Cursor->IntensityBuffer[PeakIndex];
+    }
+    if (Spectrum->Peaks[0].Mass < -1 || Spectrum->Peaks[0].Mass > (GlobalOptions->DynamicRangeMax + GlobalOptions->Epsilon))
+    {
+        REPORT_WARNING_SII(45, Spectrum->Node->InputFile->FileName, Spectrum->Node->ScanNumber,
+            Spectrum->Peaks[0].Mass);
+    }
+    if (Spectrum->Peaks[Spectrum->PeakCount - 1].Mass < -1 || Spectrum->Peaks[Spectrum->PeakCount - 1].Mass > (GlobalOptions->DynamicRangeMax + GlobalOptions->Epsilon))
+    {
+        REPORT_WARNING_SII(45, Spectrum->Node->InputFile->FileName, Spectrum->Node->ScanNumber,
+            Spectrum->Peaks[0].Mass);
+    }
+    if (Spectrum->Peaks[0].Intensity < 0)
+    {
+        REPORT_WARNING_SIF(45, Spectrum->Node->InputFile->FileName, Spectrum->Node->ScanNumber,
+            Spectrum->Peaks[0].Intensity);
+    }
+    if (Spectrum->Peaks[Spectrum->PeakCount - 1].Intensity < 0)
+    {
+        REPORT_WARNING_SIF(45, Spectrum->Node->InputFile->FileName, Spectrum->Node->ScanNumber,
+            Spectrum->Peaks[0].Intensity);
+    }
+
+    Cursor->State = evMZDataNone;
+    Cursor->SpectrumPeaksCompleteFlag = 1;
+    // After the end of the <peaks> flag, this scan ends.  
+    // Nuke the handlers, so we can finish off the buffer in peace.
+    XML_SetElementHandler(Cursor->Parser, NULL, NULL);
+    XML_SetCharacterDataHandler(Cursor->Parser, NULL);
+    XML_SetProcessingInstructionHandler(Cursor->Parser, NULL);
+
+}
+
+// MZData callback for end </data> tag:
+// - Decode the base64-encoded float array
+// - Store the floats in the MZ or Intensity array
+void MZDataProcessEncodedPeakData(MZDataParseCursor* Cursor, MSSpectrum* Spectrum)
+{
+    int PeakCount;
+    int Trail;
+    int FloatIndex;
+    int EncodedRecordSize;
+    //
+    if (Cursor->State == evMZDataIntensityArrayBody)
+    {
+        Cursor->State = evMZDataIntensityArray;
+    }
+    else if (Cursor->State == evMZDataMZArrayBody)
+    {
+        Cursor->State = evMZDataMZArray;
+    }
+    else
+    {
+        REPORT_ERROR(0);
+    }
+    if (!Spectrum)
+    {
+        return;
+    }
+    PeakCount = Cursor->PeakCount;
+    Trail = (PeakCount % 3);
+    if (!(PeakCount % 3))
+    {
+        Cursor->PeakBuffer[PeakCount * 32/3] = '\0';
+    }
+    else
+    {
+        Cursor->PeakBuffer[(PeakCount * 32/3) + Trail + 1] = '\0';
+    }
+    b64_decode_mio(Cursor->PeakBuffer, Cursor->DecodedPeakBuffer);
+    if (Cursor->Precision == 32)
+    {
+        EncodedRecordSize = 4;
+    }
+    else if (Cursor->Precision == 64)
+    {
+        EncodedRecordSize = 8;
+    }
+    else
+    {
+        // Default to 32bit:
+        EncodedRecordSize = 4;
+    }
+    for (FloatIndex = 0; FloatIndex < PeakCount; FloatIndex++)
+    {
+#ifdef BYTEORDER_LITTLE_ENDIAN
+        if (!Cursor->ByteOrderLittle)
+        {
+            EndianByteSwap(Cursor->DecodedPeakBuffer + (FloatIndex * EncodedRecordSize), EncodedRecordSize);
+            //ByteSwap = Cursor->DecodedPeakBuffer[FloatIndex*4];
+            //Cursor->DecodedPeakBuffer[FloatIndex*4] = Cursor->DecodedPeakBuffer[FloatIndex*4 + 3];
+            //Cursor->DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
+            //ByteSwap = Cursor->DecodedPeakBuffer[FloatIndex*4 + 1];
+            //Cursor->DecodedPeakBuffer[FloatIndex*4 + 1] = Cursor->DecodedPeakBuffer[FloatIndex*4 + 2];
+            //Cursor->DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
+        }
+#else
+        if (Cursor->ByteOrderLittle)
+        {
+            EndianByteSwap(Cursor->DecodedPeakBuffer + (FloatIndex * EncodedRecordSize), EncodedRecordSize);
+            //ByteSwap = Cursor->DecodedPeakBuffer[FloatIndex*4];
+            //Cursor->DecodedPeakBuffer[FloatIndex*4] = Cursor->DecodedPeakBuffer[FloatIndex*4 + 3];
+            //Cursor->DecodedPeakBuffer[FloatIndex*4 + 3] = ByteSwap;
+            //ByteSwap = Cursor->DecodedPeakBuffer[FloatIndex*4 + 1];
+            //Cursor->DecodedPeakBuffer[FloatIndex*4 + 1] = Cursor->DecodedPeakBuffer[FloatIndex*4 + 2];
+            //Cursor->DecodedPeakBuffer[FloatIndex*4 + 2] = ByteSwap;
+        }
+#endif
+        if (Cursor->State == evMZDataMZArrayBody || Cursor->State == evMZDataMZArray)
+        {
+            Cursor->MZBuffer[FloatIndex] = *((float*)(Cursor->DecodedPeakBuffer + FloatIndex * EncodedRecordSize));
+            //memcpy(Cursor->MZBuffer + FloatIndex, Cursor->DecodedPeakBuffer + FloatIndex * 4, 4);
+        }
+        else
+        {
+            //memcpy(Cursor->IntensityBuffer + FloatIndex, Cursor->DecodedPeakBuffer + FloatIndex * 4, 4);
+            Cursor->IntensityBuffer[FloatIndex] = *((float*)(Cursor->DecodedPeakBuffer + FloatIndex * EncodedRecordSize));
+        }
+    }
+}
+
+// expat callback: End a tag.
+void MZDataEndElement(void* UserData, const char* Tag)
+{
+    MZDataParseCursor* Cursor;
+    MSSpectrum* Spectrum;
+    //
+    Cursor = (MZDataParseCursor*)UserData;
+    Spectrum = Cursor->Spectrum;
+
+    if (!strcmp(Tag, "spectrum"))
+    {
+        MZDataCompleteSpectrum(Cursor, Spectrum);
+        Cursor->SpectrumPeaksCompleteFlag = 1;
+        return;
+    }
+    if (!strcmp(Tag, "data"))
+    {
+        MZDataProcessEncodedPeakData(Cursor, Spectrum);
+        return;
+    }
+    if (!strcmp(Tag, "intenArrayBinary"))
+    {
+        Cursor->State = evMZDataNone;
+        return;
+    }
+    if (!strcmp(Tag, "mzArrayBinary"))
+    {
+        Cursor->State = evMZDataNone;
+        return;
+    }
+    if (!strcmp(Tag, "ionSelection"))
+    {
+        Cursor->State = evMZDataNone;
+        return;
+    }
+}
+
+// expat callback: Handle character data in the body of a tag. 
+// The only mzdata body we care about is <data>
+void MZDataCharacterDataHandler(void* UserData, const XML_Char* String, int Length)
+{
+    MZDataParseCursor* Cursor;
+    int PeakCopySize;
+    //
+    Cursor = (MZDataParseCursor*)UserData;
+    if (Cursor->ErrorFlag)
+    {
+        return;
+    }
+    switch (Cursor->State)
+    {
+    case evMZDataMZArrayBody:
+    case evMZDataIntensityArrayBody: // deliberate fallthrough
+        PeakCopySize = Length;
+        if (Cursor->PeakBufferPos + PeakCopySize >= Cursor->PeakBufferSize)
+        {
+            REPORT_ERROR(0);
+            PeakCopySize = max(0, Cursor->PeakBufferSize - Cursor->PeakBufferPos - 1);
+        }
+        memcpy(Cursor->PeakBuffer + Cursor->PeakBufferPos, String, PeakCopySize);
+        Cursor->PeakBufferPos += PeakCopySize;
+        Cursor->PeakBuffer[Cursor->PeakBufferPos] = '\0';
+        break;
+    // Default behavior is to ignore text:
+    default:
+        break;
+    }
+}
+
+MZDataParseCursor* GetMZDataParseCursor()
+{
+    if (g_MZDataParseCursor)
+    {
+        return g_MZDataParseCursor;
+    }
+    g_MZDataParseCursor = (MZDataParseCursor*)calloc(1, sizeof(MZDataParseCursor));
+    g_MZDataParseCursor->PeakCountAllocation = 1024;
+    g_MZDataParseCursor->PeakBufferSize = sizeof(double) * 4 * g_MZDataParseCursor->PeakCountAllocation;
+    g_MZDataParseCursor->PeakBuffer = (char*)malloc(g_MZDataParseCursor->PeakBufferSize);
+    g_MZDataParseCursor->DecodedPeakBuffer = (char*)malloc(g_MZDataParseCursor->PeakCountAllocation * sizeof(float));
+    g_MZDataParseCursor->MZBuffer = (float*)malloc(g_MZDataParseCursor->PeakCountAllocation * sizeof(float));
+    g_MZDataParseCursor->IntensityBuffer = (float*)malloc(g_MZDataParseCursor->PeakCountAllocation * sizeof(float));
+    g_MZDataParseCursor->Parser = XML_ParserCreate(NULL);
+    //g_MZDataParseCursor->XMLBuffer = XML_GetBuffer(g_MZDataParseCursor->Parser, sizeof(char) * MZXML_BUFFER_SIZE);
+    //if (!g_MZDataParseCursor->XMLBuffer)
+    //{
+    //    printf("* Error: Unable to get XML buffer of size %d\n", MZXML_BUFFER_SIZE);
+    //}
+    return g_MZDataParseCursor;
+}
+
+MZXMLParseCursor* GetMZXMLParseCursor()
+{
+    if (g_MZXMLParseCursor)
+    {
+        return g_MZXMLParseCursor;
+    }
+    g_MZXMLParseCursor = (MZXMLParseCursor*)calloc(1, sizeof(MZXMLParseCursor));
+    g_MZXMLParseCursor->PeakCountAllocation = 1024;
+    g_MZXMLParseCursor->PeakBufferSize = sizeof(double) * 4 * g_MZXMLParseCursor->PeakCountAllocation;
+    g_MZXMLParseCursor->PeakBuffer = (char*)malloc(g_MZXMLParseCursor->PeakBufferSize);
+    g_MZXMLParseCursor->DecodedPeakBuffer = (char*)malloc(g_MZXMLParseCursor->PeakBufferSize);
+    g_MZXMLParseCursor->Peaks = (float*)malloc(sizeof(float) * 2 * g_MZXMLParseCursor->PeakCountAllocation);
+    g_MZXMLParseCursor->Parser = XML_ParserCreate(NULL);
+    //g_MZXMLParseCursor->XMLBuffer = XML_GetBuffer(g_MZXMLParseCursor->Parser, sizeof(char) * MZXML_BUFFER_SIZE);
+    //if (!g_MZXMLParseCursor->XMLBuffer)
+    //{
+    //    printf("* Error: Unable to get XML buffer of size %d\n", MZXML_BUFFER_SIZE);
+    //}
+
+    return g_MZXMLParseCursor;
+}
+
+void FreeMZXMLParseCursor()
+{
+    if (!g_MZXMLParseCursor)
+    {
+        return;
+    }
+    SafeFree(g_MZXMLParseCursor->PeakBuffer);
+    SafeFree(g_MZXMLParseCursor->Peaks);
+    SafeFree(g_MZXMLParseCursor->DecodedPeakBuffer);
+    if (g_MZXMLParseCursor->Parser)
+    {
+        XML_ParserFree(g_MZXMLParseCursor->Parser);
+    }
+    SafeFree(g_MZXMLParseCursor);
+    g_MZXMLParseCursor = NULL;
+}
+
+void FreeMZDataParseCursor()
+{
+    if (!g_MZDataParseCursor)
+    {
+        return;
+    }
+    SafeFree(g_MZDataParseCursor->PeakBuffer);
+    SafeFree(g_MZDataParseCursor);
+    XML_ParserFree(g_MZDataParseCursor->Parser);
+    g_MZDataParseCursor = NULL;
+}
+
+// Parse through an MZXML file to get a list of spectra and their byte offsets.
+void ParseSpectraFromMZXML(char* FileName, InputFileNode* InputFile, int FirstScan, int LastScan)
+{
+    FILE* MZXMLFile;
+    MZXMLParseCursor* Cursor;
+    int FilePos = 0;
+    int DoneFlag = 0;
+    //void* XMLBuffer;
+    int BytesRead;
+    int XMLParseResult;
+    int Error;
+    //
+
+    MZXMLFile = fopen(FileName, "rb");
+    if (!MZXMLFile)
+    {
+        REPORT_ERROR_S(8, FileName);
+        return;
+    }
+    printf("Parse spectra from '%s'...\n", FileName);
+    Cursor = GetMZXMLParseCursor();
+    Cursor->FirstScan = FirstScan;
+    Cursor->LastScan = LastScan;
+    Cursor->InputFile = InputFile;
+    Cursor->ErrorFlag = 0;
+    Cursor->Spectrum = NULL;
+    Cursor->SpecIndex = 1;
+    Cursor->Mode = MZXML_PARSE_LIST_SPECTRA;
+    XML_SetUserData(Cursor->Parser, Cursor);
+    XML_SetElementHandler(Cursor->Parser, MZXMLStartElement, MZXMLEndElement);
+    XML_SetCharacterDataHandler(Cursor->Parser, MZXMLCharacterDataHandler);
+    while (!DoneFlag)
+    {
+        // Get a buffer (parser handles the memory):
+        Cursor->XMLBuffer = XML_GetBuffer(Cursor->Parser, sizeof(char) * MZXML_BUFFER_SIZE);
+        if (!Cursor->XMLBuffer)
+        {
+            printf("* ParseSpectraFromMZXML Error: Unable to get XML buffer of size %d\n", MZXML_BUFFER_SIZE);
+            break;
+        }
+
+        // Read into the buffer:
+        BytesRead = ReadBinary(Cursor->XMLBuffer, sizeof(char), MZXML_BUFFER_SIZE, MZXMLFile);
+        if (!BytesRead)
+        {
+            // We'll call XML_Parse once more, this time with DoneFlag set to 1. 
+            DoneFlag = 1;
+        }
+
+        // Parse this block o' text:
+        XMLParseResult = XML_Parse(Cursor->Parser, Cursor->XMLBuffer, BytesRead, DoneFlag);
+        if (!XMLParseResult)
+        {
+            printf("XML Parse error - file position ~%d\n", XML_GetCurrentByteIndex(Cursor->Parser));
+            Error = XML_GetErrorCode(Cursor->Parser);
+            printf("Error code %d description '%s'\n", Error, XML_ErrorString(Error));
+        }
+
+        // If Cursor->ErrorFlag is set, then the file isn't valid!  Error out
+        // now, since recovery could be difficult.
+        if (Cursor->ErrorFlag)
+        {
+            break;
+        }
+        FilePos += BytesRead;
+    }
+    
+    // Close file, free memory:
+    fclose(MZXMLFile);
+	FreeMZXMLParseCursor();
+}
+
+// Parse ONE spectrum from the file.  Return true on success.
+int SpectrumLoadMZXML(MSSpectrum* Spectrum, FILE* MZXMLFile)
+{
+    MZXMLParseCursor* Cursor;
+    int FilePos = 0;
+    int DoneFlag = 0;
+    //void* XMLBuffer;
+    int BytesRead;
+    int XMLParseResult;
+    int ReturnResult = 1;
+    //
+
+    Cursor = GetMZXMLParseCursor();
+    Cursor->Spectrum = Spectrum;
+    Cursor->Mode = MZXML_PARSE_OBTAIN_PEAKS;
+    Cursor->ErrorFlag = 0;
+    Cursor->FirstScan = 0;
+    Cursor->LastScan = -1;
+    XML_ParserReset(Cursor->Parser, NULL);
+    XML_SetUserData(Cursor->Parser, Cursor);
+    XML_SetElementHandler(Cursor->Parser, MZXMLStartElement, MZXMLEndElement);
+    XML_SetCharacterDataHandler(Cursor->Parser, MZXMLCharacterDataHandler);
+
+    while (!DoneFlag)
+    {
+        // Get a buffer (parser handles the memory):
+        Cursor->XMLBuffer = XML_GetBuffer(Cursor->Parser, sizeof(char) * MZXML_BUFFER_SIZE);
+        if (!Cursor->XMLBuffer)
+        {
+            printf("* SpectrumLoadMZXML Error: Unable to get XML buffer of size %d\n", MZXML_BUFFER_SIZE);
+            break;
+        }
+
+        // Read into the buffer:
+        BytesRead = ReadBinary(Cursor->XMLBuffer, sizeof(char), MZXML_BUFFER_SIZE, MZXMLFile);
+        if (!BytesRead)
+        {
+            // We'll call XML_Parse once more, this time with DoneFlag set to 1. 
+            DoneFlag = 1;
+        }
+
+        // Parse this block o' text:
+        XMLParseResult = XML_Parse(Cursor->Parser, Cursor->XMLBuffer, BytesRead, DoneFlag);
+        if (!XMLParseResult)
+        {
+            Cursor->ErrorFlag = 1;
+            // If we have peaks...let's NOT report a warning...because we're parsing a sub-document, 
+            // and we'll run off the edge and get well-formedness complaints.
+            // Newer expat versions will have the ability to abort when we hit the </scan>
+            // tag ending.
+            if (!Cursor->Spectrum->PeakCount)
+            {
+                ReturnResult = 0;
+            }
+        }
+
+        // If Cursor->ErrorFlag is set, then the file isn't valid!  Error out
+        // now, since recovery could be difficult.
+        if (Cursor->ErrorFlag)
+        {
+            break;
+        }
+        if (Cursor->SpectrumPeaksCompleteFlag)
+        {
+            break;
+        }
+
+        FilePos += BytesRead;
+    }
+    // Sanity check: We must have a precursor m/z!
+    if (!Cursor->Spectrum->MZ)
+    {
+        ReturnResult = 0;
+    }
+    if(Cursor->Spectrum->Charge && (Cursor->Spectrum->Charge <= 0 || Cursor->Spectrum->Charge >= 6))
+      ReturnResult = 0;
+
+    //Other Checks for decent peaks
+    if (Cursor->Spectrum->Peaks[0].Mass < -1 || Cursor->Spectrum->Peaks[0].Mass > (GlobalOptions->DynamicRangeMax + GlobalOptions->Epsilon))
+    {
+      ReturnResult = 0;
+    }
+    if (Cursor->Spectrum->Peaks[Cursor->Spectrum->PeakCount - 1].Mass < -1 || Cursor->Spectrum->Peaks[Cursor->Spectrum->PeakCount - 1].Mass > (GlobalOptions->DynamicRangeMax + GlobalOptions->Epsilon))
+    {
+      ReturnResult = 0;
+    }
+    if (Cursor->Spectrum->Peaks[0].Intensity < 0)
+    {
+      ReturnResult = 0;
+    }
+    if (Cursor->Spectrum->Peaks[Spectrum->PeakCount - 1].Intensity < 0)
+    {
+      ReturnResult = 0;
+    }
+    return ReturnResult;    
+}
+
+// Parse through an mzData file to get a list of spectra and their byte offsets.
+void ParseSpectraFromMZData(char* FileName, InputFileNode* InputFile, int FirstScan, int LastScan)
+{
+    FILE* MZXMLFile;
+    MZDataParseCursor* Cursor;
+    int FilePos = 0;
+    int DoneFlag = 0;
+    //void* XMLBuffer;
+    int BytesRead;
+    int XMLParseResult;
+    int Error;
+    //
+
+    MZXMLFile = fopen(FileName, "rb");
+    if (!MZXMLFile)
+    {
+        REPORT_ERROR_S(8, FileName);
+        return;
+    }
+    printf("Parse spectra from '%s'...\n", FileName);
+    Cursor = GetMZDataParseCursor();
+    Cursor->FirstScan = FirstScan;
+    Cursor->LastScan = LastScan;
+    Cursor->InputFile = InputFile;
+    Cursor->ErrorFlag = 0;
+    Cursor->SpecIndex = 1;
+    Cursor->Spectrum = NULL;
+    Cursor->Mode = MZXML_PARSE_LIST_SPECTRA;
+    XML_SetUserData(Cursor->Parser, Cursor);
+    XML_SetElementHandler(Cursor->Parser, MZDataStartElement, MZDataEndElement);
+    XML_SetCharacterDataHandler(Cursor->Parser, MZDataCharacterDataHandler);
+    //XMLBuffer = Cursor->XMLBuffer;
+
+    while (!DoneFlag)
+    {
+        // Get a buffer (parser handles the memory):
+        Cursor->XMLBuffer = XML_GetBuffer(Cursor->Parser, sizeof(char) * MZXML_BUFFER_SIZE);
+        if (!Cursor->XMLBuffer)
+        {
+            printf("* Error: Unable to get XML buffer of size %d\n", MZXML_BUFFER_SIZE);
+            break;
+        }
+
+        // Read into the buffer:
+        BytesRead = ReadBinary(Cursor->XMLBuffer, sizeof(char), MZXML_BUFFER_SIZE, MZXMLFile);
+        if (!BytesRead)
+        {
+            // We'll call XML_Parse once more, this time with DoneFlag set to 1. 
+            DoneFlag = 1;
+        }
+
+        // Parse this block o' text:
+        XMLParseResult = XML_Parse(Cursor->Parser, Cursor->XMLBuffer, BytesRead, DoneFlag);
+        if (!XMLParseResult)
+        {
+            printf("XML Parse error - file position ~%d\n", XML_GetCurrentByteIndex(Cursor->Parser));
+            Error = XML_GetErrorCode(Cursor->Parser);
+            printf("Error code %d description '%s'\n", Error, XML_ErrorString(Error));
+        }
+
+        // If Cursor->ErrorFlag is set, then the file isn't valid!  Error out
+        // now, since recovery could be difficult.
+        if (Cursor->ErrorFlag)
+        {
+            break;
+        }
+        FilePos += BytesRead;
+    }
+    
+    // Close file, free memory:
+    fclose(MZXMLFile);
+	FreeMZDataParseCursor();
+}
+
+
+// Parse ONE spectrum from the file
+void SpectrumLoadMZData(MSSpectrum* Spectrum, FILE* MZXMLFile)
+{
+    MZDataParseCursor* Cursor;
+    int FilePos = 0;
+    int DoneFlag = 0;
+    //void* XMLBuffer;
+    int BytesRead;
+    int XMLParseResult;
+    //
+
+    Cursor = GetMZDataParseCursor();
+    Cursor->Spectrum = Spectrum;
+    Cursor->Mode = MZXML_PARSE_OBTAIN_PEAKS;
+    Cursor->ErrorFlag = 0;
+    XML_ParserReset(Cursor->Parser, NULL);
+    XML_SetUserData(Cursor->Parser, Cursor);
+    XML_SetElementHandler(Cursor->Parser, MZDataStartElement, MZDataEndElement);
+    XML_SetCharacterDataHandler(Cursor->Parser, MZDataCharacterDataHandler);
+    while (!DoneFlag)
+    {
+        // Get a buffer (parser handles the memory):
+        Cursor->XMLBuffer = XML_GetBuffer(Cursor->Parser, sizeof(char) * MZXML_BUFFER_SIZE);
+        if (!Cursor->XMLBuffer)
+        {
+            printf("* Error: Unable to get XML buffer of size %d\n", MZXML_BUFFER_SIZE);
+            break;
+        }
+
+        // Read into the buffer:
+        BytesRead = ReadBinary(Cursor->XMLBuffer, sizeof(char), MZXML_BUFFER_SIZE, MZXMLFile);
+        if (!BytesRead)
+        {
+            // We'll call XML_Parse once more, this time with DoneFlag set to 1. 
+            DoneFlag = 1;
+        }
+
+        // Parse this block o' text:
+        XMLParseResult = XML_Parse(Cursor->Parser, Cursor->XMLBuffer, BytesRead, DoneFlag);
+        if (!XMLParseResult)
+        {
+            // Let's NOT report a warning...because we're parsing a sub-document, 
+            // and we'll run off the edge and get well-formedness complaints.
+            // Newer expat versions will have the ability to abort when we hit the </scan>
+            // tag ending.
+        }
+
+        // If Cursor->ErrorFlag is set, then the file isn't valid!  Error out
+        // now, since recovery could be difficult.
+        if (Cursor->ErrorFlag)
+        {
+            break;
+        }
+        if (Cursor->SpectrumPeaksCompleteFlag)
+        {
+            break;
+        }
+
+        FilePos += BytesRead;
+    }
+    
+    // Close file, free memory:
+    //fclose(MZXMLFile);
+}
+
diff --git a/ParseXML.h b/ParseXML.h
new file mode 100644
index 0000000..46ea08a
--- /dev/null
+++ b/ParseXML.h
@@ -0,0 +1,46 @@
+//Title:          ParseXML.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef PARSE_XML_H
+#define PARSE_XML_H
+
+
+
+int SpectrumLoadMZXML(MSSpectrum* Spectrum, FILE* MZXMLFile);
+void ParseSpectraFromMZXML(char* FileName, InputFileNode* InputFile, int FirstScan, int LastScan);
+void ParseSpectraFromMZData(char* FileName, InputFileNode* InputFile, int FirstScan, int LastScan);
+int SpectrumLoadMZData(MSSpectrum* Spectrum, FILE* MZDataFile);
+void FreeMZXMLParseCursor();
+void FreeMZDataParseCursor();
+
+#endif // PARSE_XML_H
+
diff --git a/ParseXML.py b/ParseXML.py
new file mode 100644
index 0000000..fc9dae6
--- /dev/null
+++ b/ParseXML.py
@@ -0,0 +1,281 @@
+#Title:          ParseXML.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+MZXML and mzData peak parsing
+"""
+
+import os
+import sys
+import struct
+import xml.sax
+import xml.sax.handler
+import base64
+import MSSpectrum
+
+if hasattr(base64, "b64decode"):
+    B64Decode = base64.b64decode
+    B64Encode = base64.b64encode
+else:
+    B64Decode = base64.decodestring
+    B64Encode = base64.encodestring
+
+def GetSpectrumPeaksMZXML(Spectrum, File):
+    Spectrum.Peaks = []
+    SAXParser = xml.sax.make_parser()
+    Handler = MZXMLPeakParser(Spectrum)
+    Handler.Parser = SAXParser
+    SAXParser.setContentHandler(Handler)
+    try:
+        SAXParser.parse(File)
+    except xml.sax.SAXException, XMLException:
+        Message = XMLException.getMessage()
+        # If there are no peaks, then all exceptions are raised:
+        if not len(Spectrum.Peaks):
+            raise
+        # If we did succeed in getting peaks, then the error likely arose
+        # after the end of the peaks tag.
+        if Message == "junk after document element":
+            pass
+        elif Message == "not well-formed (invalid token)":
+            pass
+        else:
+            raise
+
+def GetSpectrumPeaksMZData(Spectrum, File):
+    Spectrum.Peaks = []
+    SAXParser = xml.sax.make_parser()
+    Handler = MZDataPeakParser(Spectrum)
+    Handler.Parser = SAXParser
+    SAXParser.setContentHandler(Handler)
+    try:
+        SAXParser.parse(File)
+    except xml.sax.SAXException, XMLException:
+        Message = XMLException.getMessage()
+        if Message == "junk after document element":
+            pass
+        elif Message == "not well-formed (invalid token)":
+            pass
+        else:
+            raise
+
+class MZXMLParseStates:
+    SpectrumComplete = -1
+    Skipping = 0
+    Peaks = 1
+    PrecursorMZ = 2
+
+class XMLDictionaryHandler(xml.sax.handler.ContentHandler):
+    """
+    A simple wrapper for the skeletal ContentHandler class.  Fixes broken API names, and
+    supports the use of "triage dictionaries" self.StartHandlers and self.EndHandlers
+    to find the handlers for tags.
+    """
+    def __init__(self):
+        # Repair names:
+        self.startElement = self.StartElement
+        self.endElement = self.EndElement
+        self.characters = self.HandleCharacters
+        #
+        self.VerboseFlag = 0
+    def StartElement(self, Name, Attributes):
+        #print "Start <%s>@%s"%(Name, self.Parser._parser.CurrentByteIndex)
+        Handler = self.StartHandlers.get(Name, None)
+        if self.VerboseFlag:
+            print "<%s> %s"%(Name, Handler)
+        if Handler:
+            Handler(Attributes)
+    def EndElement(self, Name):
+        #print "  End <%s>"%Name
+        Handler = self.EndHandlers.get(Name, None)
+        if self.VerboseFlag:
+            print "</%s> %s"%(Name, Handler)
+        if Handler:
+            Handler()
+    def HandleCharacters(self, String):
+        pass
+    
+class MZXMLPeakParser(XMLDictionaryHandler):
+    def __init__(self, Spectrum):
+        self.State = MZXMLParseStates.Skipping
+        self.StartHandlers = {"peaks":self.StartPeaks,
+                              "precursorMz":self.StartPrecursorMZ
+                              }
+        self.EndHandlers = {"peaks":self.EndPeaks,
+                            "scan":self.EndScan,
+                            "precursorMz":self.EndPrecursorMZ
+                            }
+        self.Spectrum = Spectrum
+        self.PeakBuffer = ""
+        XMLDictionaryHandler.__init__(self)
+    def HandleCharacters(self, String):
+        if self.State == MZXMLParseStates.PrecursorMZ:
+            self.PrecursorMZBuffer += String
+            return
+        if self.State == MZXMLParseStates.Peaks:
+            self.PeakBuffer += String
+            return
+    def StartPrecursorMZ(self, Attributes):
+        if self.State == MZXMLParseStates.SpectrumComplete:
+            return
+        self.State = MZXMLParseStates.PrecursorMZ
+        self.PrecursorMZBuffer = ""
+    def EndPrecursorMZ(self):
+        if self.State == MZXMLParseStates.SpectrumComplete:
+            return
+        #print "Precursor MZ -> %s"%self.PrecursorMZBuffer
+        self.Spectrum.PrecursorMZ = float(self.PrecursorMZBuffer)
+        self.State = MZXMLParseStates.Skipping
+    def StartPeaks(self, Attributes):
+        if self.State == MZXMLParseStates.SpectrumComplete:
+            return
+        self.State = MZXMLParseStates.Peaks
+        self.PeakBuffer = ""
+        ByteOrder = Attributes.get("byteOrder", "network")
+        if ByteOrder == "little" or ByteOrder == "little-endian":
+            self.ByteOrder = "little"
+        else:
+            self.ByteOrder = "big"
+    def EndScan(self):
+        self.State = MZXMLParseStates.SpectrumComplete
+    def EndPeaks(self):
+        if self.State == MZXMLParseStates.SpectrumComplete:
+            return
+        DecodedPeaks = B64Decode(self.PeakBuffer)
+        StringPos = 0
+        self.Peaks = []
+        while StringPos < len(DecodedPeaks):
+            if self.ByteOrder == sys.byteorder:
+                Mass = struct.unpack("f", DecodedPeaks[StringPos:StringPos+4])[0]
+                Intensity = struct.unpack("f", DecodedPeaks[StringPos+4:StringPos+8])[0]                
+            else:
+                Mass = struct.unpack("!f", DecodedPeaks[StringPos:StringPos+4])[0]
+                Intensity = struct.unpack("!f", DecodedPeaks[StringPos+4:StringPos+8])[0]
+            Peak = MSSpectrum.PeakClass(Mass, Intensity)
+            StringPos += 8
+            #print Peak.Mass, Peak.Intensity
+            self.Spectrum.Peaks.append(Peak)
+
+class MZDataParseStates:
+    SpectrumComplete = -1
+    Skipping = 0
+    MZArray = 1
+    MZArrayData = 2
+    IntensityArray = 3
+    IntensityArrayData = 4
+
+class MZDataPeakParser(XMLDictionaryHandler):
+    def __init__(self, Spectrum):
+        self.State = MZDataParseStates.Skipping
+        self.StartHandlers = {"data":self.StartData,
+                              "mzArrayBinary":self.StartMZArrayBinary,
+                              "intenArrayBinary":self.StartIntensityArrayBinary,
+                              "cvParam":self.StartCVParam,
+                              }
+        self.EndHandlers = {"data":self.EndData,
+                            "spectrum":self.EndSpectrum,
+                            }
+        self.Spectrum = Spectrum
+        self.PeakBuffer = ""
+        XMLDictionaryHandler.__init__(self)
+    def StartCVParam(self, Attributes):
+        Name = Attributes.get("name", None)
+        Value = Attributes.get("value", None)
+        if Name == "mz":
+            self.Spectrum.PrecursorMZ = float(Value)
+    def StartMZArrayBinary(self, Attributes):
+        if self.State == MZDataParseStates.SpectrumComplete:
+            return
+        self.State = MZDataParseStates.MZArray
+    def StartIntensityArrayBinary(self, Attributes):
+        if self.State == MZDataParseStates.SpectrumComplete:
+            return
+        self.State = MZDataParseStates.IntensityArray
+    def HandleCharacters(self, String):
+        if self.State in (MZDataParseStates.MZArrayData, MZDataParseStates.IntensityArrayData):
+            self.PeakBuffer += String
+    def EndData(self):
+        if self.State in (MZDataParseStates.MZArrayData, MZDataParseStates.IntensityArrayData):
+            # Parse the float array:
+            FloatList = []
+            DecodedPeaks = B64Decode(self.PeakBuffer)
+            StringPos = 0
+            while StringPos < len(DecodedPeaks):
+                if self.ByteOrder == sys.byteorder:
+                    if self.Precision == 64:
+                        Value = struct.unpack("d", DecodedPeaks[StringPos:StringPos + 8])[0]
+                    else:
+                        Value = struct.unpack("f", DecodedPeaks[StringPos:StringPos + 4])[0]
+                else:
+                    if self.Precision == 64:
+                        Value  = struct.unpack("!d", DecodedPeaks[StringPos:StringPos + 8])[0]
+                    else:
+                        Value = struct.unpack("!f", DecodedPeaks[StringPos:StringPos + 4])[0]
+                #Peak = MSSpectrum.PeakClass(Mass, Intensity)
+                FloatList.append(Value)
+                if self.Precision == 64:
+                    StringPos += 8
+                else:
+                    StringPos += 4
+                #print Peak.Mass, Peak.Intensity
+                #self.Spectrum.Peaks.append(Peak)
+            if self.State == MZDataParseStates.MZArrayData:
+                self.MZList = FloatList
+            else:
+                self.IntensityList = FloatList
+            #print "...parsed %s values!"%len(FloatList)
+            self.State = MZDataParseStates.Skipping
+    def StartData(self, Attributes):
+        if self.State == MZDataParseStates.SpectrumComplete:
+            return
+        self.Precision = Attributes.get("precision", "32")
+        ByteOrder = Attributes.get("endian", "network")
+        if ByteOrder == "little" or ByteOrder == "little-endian":
+            self.ByteOrder = "little"
+        else:
+            self.ByteOrder = "big"
+        if self.State == MZDataParseStates.MZArray:
+            self.State = MZDataParseStates.MZArrayData
+            self.PeakBuffer = ""
+            return
+        if self.State == MZDataParseStates.IntensityArray:
+            self.State = MZDataParseStates.IntensityArrayData
+            self.PeakBuffer = ""
+    def EndSpectrum(self):
+        if self.State != MZXMLParseStates.SpectrumComplete:
+            self.State = MZXMLParseStates.SpectrumComplete
+            for PeakIndex in range(len(self.MZList)):
+                Mass = self.MZList[PeakIndex]
+                Intensity = self.IntensityList[PeakIndex]
+                Peak = MSSpectrum.PeakClass(Mass, Intensity)
+                self.Spectrum.Peaks.append(Peak)
diff --git a/PhosCut2.bn b/PhosCut2.bn
new file mode 100644
index 0000000..35c9bea
Binary files /dev/null and b/PhosCut2.bn differ
diff --git a/PhosCut3.bn b/PhosCut3.bn
new file mode 100644
index 0000000..25eaef8
Binary files /dev/null and b/PhosCut3.bn differ
diff --git a/PhosphateLocalization.py b/PhosphateLocalization.py
new file mode 100644
index 0000000..3e3dbfc
--- /dev/null
+++ b/PhosphateLocalization.py
@@ -0,0 +1,324 @@
+#Title:          PhosphateLocalization.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""PhosphateLocalization.py
+
+This script is a glorified wrapper for Label.py. It calls label and
+calculates the PLS score for each spectral annotation in the input set.
+
+1. read in input data.  If it is not in native inspect format, then we
+send it to the GetByteOffset part, so that we can use it like Inspect.
+
+2. Label all possible annotations of the string and get their peptide score (think binomial)
+
+3. Find the difference between the top two scores, report. print.
+The results are reported by appending two extra columns to the data from the input
+file.  These correspond to the top annotation, and it's PLS.
+"""
+
+UsageInfo = """PhosphateLocalization.py
+Calculates the Phosphate Localization Score (PLS) for each spectral
+annotation in the input file.  Make sure to read the tutorial so
+that you understand how to use it correctly.
+
+Required Options:
+ -r [FileName] File of formatted annotations
+ -m [Directory] Directory containing spectra files (not filename)
+ -w [FileName] Output of this program
+
+Additional Options:
+ -d [Directory] Directory for the images and annotated peak lists
+      created during the label process.  Default "LabelSpewage"
+
+"""
+
+import os
+import sys
+import getopt
+import ResultsParser
+import GetByteOffset
+import string
+import Label
+
+class DetectiveClass(ResultsParser.ResultsParser):
+    def __init__(self):
+        self.InputFilePath = None
+        self.OutputFilePath = None
+        self.LabeledAnnotationsDir = "LabelSpewage" # for Labeled output
+        self.MZXMLDir = None
+        self.InspectFormat = 0
+        self.ScanOffset = {} # potentially large dictionary for storing the byte offset of each spectrum
+        self.OldInspectResults = {} #(file, scan) => (MQScore, Annotation)  #file name only, not! path
+        self.PLSDict = {} # self.PLSDict[(SpectrumFile, Scan)] = (PLS, NewPeptide)
+        self.Columns = ResultsParser.Columns()
+        ResultsParser.ResultsParser.__init__(self)
+        
+    def Main(self):
+        self.CheckInputFormat(self.InputFilePath)
+        if not self.InspectFormat:
+            self.GetByteOffsetsForSpectra()
+        MakeDirectory(self.LabeledAnnotationsDir)
+        self.LabelMe()
+        self.MakeOutput()
+
+
+    def MakeOutput(self):
+        """The results of Label.py have been put into a folder, and we now have to parse
+        those and put them back into the file that people gave us.
+        """
+        ## get all the stuff from Label
+        self.ProcessResultsFiles(self.LabeledAnnotationsDir, self.ParseLabelSpewage)
+        # start putting it into the output
+        Handle = open(self.InputFilePath, "rb")
+        OutHandle = open(self.OutputFilePath, "wb")
+        for Line in Handle.xreadlines():
+            if not Line.strip():
+                continue
+            if Line[0] == "#":
+                #header
+                OutHandle.write("%s\tBetterAnnotation\tPLS\n"%Line.strip())
+                continue
+            Bits = Line.strip().split("\t")
+            SpectrumFullPath = Bits[self.Columns.getIndex("SpectrumFile")]
+            SpectrumFile = os.path.split(SpectrumFullPath)[1]
+            Scan = Bits[self.Columns.getIndex("Scan#")]
+            Annotation = Bits[self.Columns.getIndex("Annotation")]
+            Tuple = (SpectrumFile, Scan)
+            if not self.PLSDict.has_key(Tuple):
+                print "NO KEY, %s, %s"%(SpectrumFile, Scan)
+                continue
+            (PLS, NewPeptideAnnotation) = self.PLSDict[(SpectrumFile, Scan)]
+            #now write stuff out
+            Bits.append("%s"%NewPeptideAnnotation)
+            Bits.append("%s"%PLS)
+            String = "\t".join(Bits)
+            OutHandle.write("%s\n"%String)
+        OutHandle.close()
+
+    def ParseLabelSpewage(self, FilePath):
+        """In each file I am going to grep out
+        filename, scan number, PLS, better peptide if such exists
+        """
+        
+        ##in the filename are the scan number and mzxml filename
+        if not FilePath[-3:] == "txt":
+            return #skip png images
+        (Path, FileName) = os.path.split(FilePath)
+        Pos = FileName.find("mzXML") + 5
+        SpectrumFile = FileName[:Pos]
+        Dot = FileName.find(".", Pos+1)
+        Scan = FileName[Pos+1:Dot] # string value, not int
+        NewPeptide = None
+        Handle= open(FilePath, "rb")
+        PLS = "N/A" #default, shoudl get overridden for every file
+        for Line in Handle.xreadlines():
+            Line = Line.strip()
+            #hard coded magic
+            if Line[:10] == "Phosphate ":
+                #Phosphate Localization Score: 52.2
+                Colon = Line.find(":")
+                PLS = Line[Colon + 1:]
+                #print Line
+                #print "I parsed out %s"%PLS
+            if Line[:7] == "WARNING":
+                #parse out new peptide
+                ToSplit = Line.replace("WARNING: Better annotation than input.", "")
+                (BetterMQScore, NewPeptide) = ToSplit.split(",")
+                NewPeptide = NewPeptide.strip()
+            if Line[:2] == "b2":
+                #this means we've started to get into the rest of the verbose output
+                # and past what we care about
+                break
+        Handle.close()
+        Tuple = (SpectrumFile, Scan)
+        self.PLSDict[Tuple] = (PLS, NewPeptide)
+
+
+    def GetByteOffsetsForSpectra(self):
+        "Read mzXML from either a single file, or directory, creating the self.ScanOffset dictionary"
+        Abacus = GetByteOffset.Abacus()
+        if os.path.isdir(self.MZXMLDir):
+            for FileName in os.listdir(self.MZXMLDir):
+                (Stub, Extension) = os.path.splitext(FileName)
+                if Extension.lower() == ".mzxml":
+                    Path = os.path.join(self.MZXMLDir, FileName)
+                    ScanOffsetSingleFile = Abacus.GetByteOffset(Path)
+                    for (ScanNumber, ScanOffset) in ScanOffsetSingleFile.items():
+                        self.ScanOffset[(FileName, ScanNumber)] = (Path, ScanOffset)
+        else:
+            ScanOffsetSingleFile = Abacus.GetByteOffset(self.MZXMLDir)
+            FileName = os.path.split(self.MZXMLDir)[1]
+            for (ScanNumber, ScanOffset) in ScanOffsetSingleFile.items():
+                self.ScanOffset[(FileName, ScanNumber)] = (self.MZXMLDir, ScanOffset)
+                #print "Storing value (%s,%s) with key (%s, %s)"%(self.MZXMLDir, ScanOffset, FileName, ScanNumber)
+
+    def LabelMe(self):
+        Handle = open(self.InputFilePath, "rb")
+        Dymo = Label.LabelClass()
+        Count = 0
+        GoodScoreCount = 0
+        WrongChargeCount = 0
+        ScoredWorseCount = 0
+        for Line in Handle.xreadlines():
+            if Line[0] == "#":
+                self.Columns.initializeHeaders(Line)
+                continue
+            if not Line.strip():
+                continue
+            Bits = list(Line.strip().split("\t"))
+            #Charge = int (Bits[self.Columns.Charge])  I don't thin I need this anymore
+            Count +=1
+            Annotation = Bits[self.Columns.getIndex("Annotation")]
+            #print "Annotation :%s:"%Annotation
+            FileName = Bits[self.Columns.getIndex("SpectrumFile")]
+            Scan = int(Bits[self.Columns.getIndex("Scan#")])
+            if not self.InspectFormat:
+                FileNameMinusPath = os.path.split(FileName)[1]
+                (FullPathDummy, ByteOffset) = self.ScanOffset[(FileNameMinusPath, Scan)]
+                #print (FullPathDummy, ByteOffset)
+                #continue
+            else:
+                ByteOffset = int(Bits[self.Columns.getIndex("SpecFilePos")])
+            (Path,File) = os.path.split(FileName)
+            FileName = os.path.join(self.MZXMLDir, File)
+            VerboseFileName = "%s.%s.%s.verbose.txt"%(File, Scan, Annotation[2:-2])
+            ImageFileName = "%s.%s.%s.png"%(File, Scan, Annotation[2:-2])
+            VerboseFilePath = os.path.join(self.LabeledAnnotationsDir, VerboseFileName)
+            ImageFilePath = os.path.join(self.LabeledAnnotationsDir, ImageFileName)
+            ## as we've got a single Dymo object, we must be passing in full args list
+            ## -p to suppress the image popup, and -P for the PLS score
+            Args = " -r %s -b %d -a %s -v %s -w %s -p -P"%(FileName, ByteOffset, Annotation, VerboseFilePath, ImageFilePath)
+            ArgsList = Args.split()
+            #print "Parsing Results for %s, scan %s, charge %s"%(FileName, Scan, Charge)
+            #print "Args: %s"%Args
+            Dymo.ParseCommandLine(ArgsList)
+            Dymo.Main()
+
+        Handle.close()
+        
+    def CheckInputFormat(self, FileName):
+        """This method serves to catch input files that are not in the
+        proper Inspect format.  If this is the case, then we must convert the
+        files to Inspect format.  This basically means that we put a byte offset at the
+        end.
+        Expected format. (tab delimited, 3 columns)
+        Spectrum File          Spectrum Number (int)            Annotation (string, no! numbers!)
+        """
+        Handle = open (self.InputFilePath, "rb")
+        ## 1. get the first line and see if it's already in Inspect Format
+        Line = Handle.readline()
+        try:
+            Bits = Line.strip().split("\t")
+        except:
+            print "####################################################"
+            print "Input file in improper format. Please read tutorial."
+            sys.exit(1)
+        #if not len(Bits) < self.Columns.getIndex("SpecFilePos"):
+        #    self.InspectFormat = 1
+        #    return # in inspect format.  it's okay
+        ## 2. Check to see if each line of the input file has the proper format
+        Reject = 0
+        for Line in Handle.xreadlines():
+            if Line[0] == "#":
+                self.Columns.initializeHeaders(Line)
+                continue
+            try:
+                Bits = Line.strip().split("\t")
+            except:
+                print "####################################################"
+                print "Input file in improper format. Please read tutorial."
+                sys.exit(1)
+            #now check to see if column 1 is a number, and 2 is a string (with no brackets)
+            try:
+                SpectrumNumber = int(Bits[self.Columns.getIndex("Scan#")])
+            except:
+                Reject = 1
+                print "Second column must be a integer representing the spectrum number"
+            Annotation = Bits[self.Columns.getIndex("Annotation")]
+            AcceptArray = string.ascii_letters
+            AcceptArray += "."  #for delimiting the prefix/suffix
+            AcceptArray += "*"  # for the beginning/end of a protein. should only be in prefix/suffix
+            AcceptArray += string.digits
+            for Index in range(len(Annotation)):
+                if not Annotation[Index] in AcceptArray:
+                    print "This annotation is in an improper format %s"%Annotation
+                    Reject = 1
+                    break
+            if Reject:
+                print "####################################################"
+                print "There were formatting problems with the input file"
+                print "We cannot proceed.  Please read the tutorial."
+                sys.exit(1)
+        print "Input file %s received in the correct format"%FileName
+    def ParseCommandLine(self,Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "r:w:m:d:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-r":
+                # -r results file(s)
+                if not os.path.exists(Value):
+                    print "** Error: couldn't find results file '%s'\n\n"%Value
+                    print UsageInfo
+                    sys.exit(1)
+                self.InputFilePath = Value
+            if Option == "-d":
+                self.LabeledAnnotationsDir = Value
+            if Option == "-w":
+                self.OutputFilePath = Value
+            if Option == "-m":
+                self.MZXMLDir = Value
+        if not OptionsSeen.has_key("-r") or not OptionsSeen.has_key("-m"):
+            print UsageInfo
+            sys.exit(1)
+
+
+def MakeDirectory(Dir):
+    if os.path.exists(Dir):
+        return 
+    try:
+        os.makedirs(Dir)
+    except:
+        raise
+    
+
+
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "(psyco not found - running in non-optimized mode)"
+    MacGyver = DetectiveClass()
+    MacGyver.ParseCommandLine(sys.argv[1:])
+    MacGyver.Main()
diff --git a/PrepDB.py b/PrepDB.py
new file mode 100644
index 0000000..01eb755
--- /dev/null
+++ b/PrepDB.py
@@ -0,0 +1,283 @@
+#Title:          PrepDB.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+Translate a protein database to a good format for trie-based searching.
+The source database should be in either FASTA format or in swiss-prot format.
+The output database will be in "concatenated format" - peptide strings with
+asterisks delimiting the peptides, no whitespace.
+We also save a binary file indexing into the concatenated DB.
+
+Index file format is record-based, with one record per peptide:
+- original DB position (int); the START of a record (>)
+- concatenated DB file position (int); the START of a record (first peptide)
+- Peptide ID (string, 80 chars)
+"""
+import sys
+import struct
+import traceback
+import os
+import string
+
+class SwissCompressor:
+    """
+    Convert a protein database into concatenated format.
+    Processes the SwissProt database format.
+    """
+    def __init__(self, SourceFileName, SquishedFileName, IndexFileName, Species = None):
+        self.SourceFile = open(SourceFileName,"rb")
+        self.SquishedFile = open(SquishedFileName,"wb")
+        self.IndexFile = open(IndexFileName,"wb")
+        self.FASTA = 0
+        self.Species = Species
+    def Compress(self):
+        """
+        The parts of swiss-prot we care about look like this:
+SQ   SEQUENCE   296 AA;  34077 MW;  B0D7CD175C7A3625 CRC64;
+     FNSNMLRGSV CEEDVSLMTS IDNMIEEIDF YEKEIYKGSH SGGVIKGMDY DLEDDENDED
+     EMTEQMVEEV ADHITQDMID EVAHHVLDNI THDMAHMEEI VHGLSGDVTQ IKEIVQKVNV
+     AVEKVKHIVE TEETQKTVEP EQIEETQNTV EPEQTEETQK TVEPEQTEET QNTVEPEQIE
+     ETQKTVEPEQ TEEAQKTVEP EQTEETQKTV EPEQTEETQK TVEPEQTEET QKTVEPEQTE
+     ETQKTVEPEQ TEETQKTVEP EQTEETQKTV EPEQTEETQN TVEPEPTQET QNTVEP
+//
+        """        
+        self.InSequence = 0
+        RecordNumber = 0
+        LineNumber = 0
+        CorrectSpecies = 0
+        while (1):
+            LineNumber += 1
+            SourceFilePos = self.SourceFile.tell()
+            RawFileLine  = self.SourceFile.readline()
+            if not RawFileLine:
+                break # end o' file!
+            FileLine = RawFileLine.strip()
+            if self.InSequence:
+                # // marks end of sequence; anything else is sequence data.
+                # ...but in some cases, the // marker isn't present, so we
+                # stop when we see the "ID" tag from the next record.
+                #if FileLine[:2] == "//":
+                #print self.InSequence, FileLine
+                if RawFileLine[:2] != "  ":
+                    self.InSequence = 0
+                    if self.FASTA:
+                        pass
+                    else:
+                        self.SquishedFile.write("*")
+                    RecordNumber += 1
+                else:
+                    Stripped = FileLine.replace(" ","")
+                    self.SquishedFile.write(Stripped)
+            else:
+                if FileLine[:3] == "OS ":
+                    if self.Species == None or FileLine.lower().find(self.Species)!=-1:
+                        CorrectSpecies = 1
+                    else:
+                        CorrectSpecies = 0
+                if FileLine[:3] == "ID ":
+                    SourceFileRecordStart = SourceFilePos
+                    ID = FileLine.split()[1]
+                    ID = ID[:80]
+                    if self.FASTA:
+                        self.SquishedFile.write("\n>%s\n"%ID)
+                if FileLine[:3] == "SQ ":
+                    if CorrectSpecies:
+                        self.InSequence = 1
+                        SquishedFilePos = self.SquishedFile.tell()
+                        Str = struct.pack("<qi80s", SourceFileRecordStart, SquishedFilePos, ID)
+                        self.IndexFile.write(Str)
+            if LineNumber%1000 == 0:
+                print "Processed line %d."%LineNumber
+                #self.SquishedFile.flush()
+                #self.IndexFile.flush()
+                #sys.stdin.readline()
+        print "Total records seen:", RecordNumber
+
+class FASTACompressor:
+    """
+    Convert a protein database into concatenated format.
+    Processes FASTA format.
+    """
+    def __init__(self, SourceFileName, SquishedFileName, IndexFileName, Species = None):
+        self.SourceFile = open(SourceFileName,"rb")
+        self.SquishedFile = open(SquishedFileName,"wb")
+        self.IndexFile = open(IndexFileName,"wb")
+        self.SquishedFileName = SquishedFileName
+        self.IndexFileName = IndexFileName
+    def Compress(self):
+        RecordNumber = 0
+        LineNumber = 0
+        FirstRecord = 1
+        LineNumberWarnings = 0
+        DummyTable = string.maketrans("", "")
+        while (1):
+            LineNumber += 1
+            SourceFilePos = self.SourceFile.tell()
+            FileLine  = self.SourceFile.readline()
+            if not FileLine:
+                break # end o' file!
+            FileLine = FileLine.strip()
+            if not FileLine:
+                continue # empty lines (whitespace only) are skipped
+            if FileLine[0] == ">":
+                RecordNumber += 1
+                if not FirstRecord:
+                    self.SquishedFile.write("*")                
+                ID = FileLine[1:81].strip()
+                # Fix weird characters in the ID:
+                ID = ID.replace("\t", " ")
+                # Note: Important to call tell() *after* writing the asterisk!  (Fixed a bug 1/20/5)
+                SquishedFilePos = self.SquishedFile.tell() 
+                Str = struct.pack("<qi80s", SourceFilePos, SquishedFilePos, ID)
+                self.IndexFile.write(Str)
+                FirstRecord = 0
+            else:
+                WarnFlag = 0
+                FileLine = string.translate(FileLine, DummyTable, " \r\n\t*")
+                FileLine = FileLine.upper()
+                Str = ""
+                for Char in FileLine:
+                    if Char not in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
+                        WarnFlag = 1
+                    else:
+                        Str += Char
+                #FileLine = FileLine.replace("*","")
+                if WarnFlag and LineNumberWarnings < 10:
+                    print "* Warning: line %s contains non-amino-acid characters:"%LineNumber
+                    print FileLine
+                    LineNumberWarnings += 1
+                    if LineNumberWarnings >= 10:
+                        print "(omitting further warnings)"
+                self.SquishedFile.write(Str)
+        print "Converted %s protein sequences (%s lines) to .trie format."%(RecordNumber + 1, LineNumber)
+        print "Created database file '%s'"%self.SquishedFileName
+
+class MS2DBCompressor:
+    """
+    Creates the index file for a splice graph, no modification is made to the original database
+    """
+    def __init__(self, SourceFileName, SquishedFileName, IndexFileName, Species = None):
+        self.SourceFile = open(SourceFileName,"rb")
+        
+        self.IndexFile = open(IndexFileName,"wb")
+        self.IndexFileName = IndexFileName
+    def Compress(self):
+        RecordNumber = 0
+        LineNumber = 0
+        FirstRecord = 1
+        LineNumberWarnings = 0
+        DummyTable = string.maketrans("", "")
+        while (1):
+            LineNumber += 1
+            SourceFilePos = self.SourceFile.tell()
+            FileLine  = self.SourceFile.readline()
+            if not FileLine:
+                break # end o' file!
+            FileLine = FileLine.strip()
+            if not FileLine:
+                continue # empty lines (whitespace only) are skipped
+            if FileLine[0:6] == "<Gene ":
+                RecordNumber += 1
+                
+                ID = ""
+                Bits = FileLine[6:].split(" ")
+                for B in Bits:
+                    (Item,Value) = B.split("=")
+                    if Item == "Name":
+                        ID = Value[1:-1]
+                        break
+
+                if ID == "":
+                    print "No valid ID found in %s"%FileLine
+                    raw_input()
+                
+                # Note: Important to call tell() *after* writing the asterisk!  (Fixed a bug 1/20/5)
+                Str = struct.pack("<qi80s", SourceFilePos, SourceFilePos, ID)
+                self.IndexFile.write(Str)
+                FirstRecord = 0
+            
+        print "Converted %s protein sequences (%s lines) to .ms2index format."%(RecordNumber + 1, LineNumber)
+        print "Created index file '%s'"%self.IndexFileName
+
+
+def PrintUsage():
+    print "Please supply a database filename."
+    print "Usage: PrepDB.py <format> <OriginalDB> [NewDB] [IndexFile]"
+    print "Example: Prepdb.py FASTA Drome.fasta"
+    print "  The source format can be either FASTA or SWISS or MS2DB"
+    print "  New DB file name defaults to original filename with .trie appended (no new file is created for MS2DB)"
+    print "  Index file name defaults to original filename with .index appended (or .ms2index for MS2DB)"
+
+if __name__ == "__main__":    
+    if len(sys.argv)<3:
+        PrintUsage()
+        sys.exit()
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "(psyco not found - running in non-optimized mode)"
+    # First argument: Original database file format
+    Format = sys.argv[1].lower()
+    if Format == "fasta":
+        CompressorClass = FASTACompressor
+    elif Format == "swiss":
+        CompressorClass = SwissCompressor
+    elif Format == "ms2db":
+        CompressorClass = MS2DBCompressor
+    else:
+        print "Unknown source database format '%s'"%Format
+        PrintUsage()
+        sys.exit()
+    # Second argument: Original database file
+    SourceFileName = sys.argv[2]
+    # Optional third argument: New database file name
+    if len(sys.argv) > 3:
+        SquishedFileName = sys.argv[3]
+    elif Format == "ms2db":
+        SquishedFileName = None
+    else:
+        SquishedFileName = "%s.trie"%os.path.splitext(SourceFileName)[0]
+    # Optional third argument: Index file name
+    if len(sys.argv) > 4:
+        IndexFileName = sys.argv[4]
+    elif Format == "ms2db":
+        IndexFileName = "%s.ms2index"%os.path.splitext(SourceFileName)[0]
+    else:
+        IndexFileName = "%s.index"%os.path.splitext(SourceFileName)[0]
+    # Use FASTACompressor for FASTA format, Compressor for the weird swiss-prot format
+    # If "species" is a string, then the Swiss-prot reader will filter out any records
+    # that don't contain that string.  For example, set Species = "sapiens" to grab only
+    # human proteins.
+    Species = None
+    Squasher = CompressorClass(SourceFileName, SquishedFileName, IndexFileName, Species)
+    Squasher.Compress()
diff --git a/ProteinGrouper.py b/ProteinGrouper.py
new file mode 100644
index 0000000..5250d36
--- /dev/null
+++ b/ProteinGrouper.py
@@ -0,0 +1,471 @@
+#Title:          ProteinGrouper.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#Updated 1-3-2012 to allow column header based lookup (NEC)
+
+import os
+import sys
+import ResultsParser
+import TrieUtils
+import Utils
+import getopt
+
+UsageInfo = """ProteinGrouper.py version 2012.01.03
+ProteinGrouper updates the 'Protein' field for Inspect annotations, replacing
+the single protein name with a '!' delimited list of protein names.  For each 
+Inspect results file specified, a new file is created with the updated 'Protein'
+field.  
+[REQUIRED]
+-r [File or Dir] File or directory containing Inspect annotations
+-t [File] Trie file used to search spectra, assumes an index file exists of the same name
+-w [Dir] Directory where updated Inspect annotations are written
+
+[OPTIONAL]:
+-p Assign peptides to a parsimonious set of proteins.  
+-a Assign peptides to a parsimonious set of proteins.  This protein will
+appear first in the list of proteins contained by the peptide.
+"""
+DELIM = "!"
+
+class ProteinGrouper(ResultsParser.ResultsParser):
+    def __init__(self):
+        ResultsParser.ResultsParser.__init__(self)
+        self.Columns = ResultsParser.Columns()
+        Utils.Initialize()
+        self.TrieFiles = []
+        self.IndexFiles = []
+        self.DoParsimony = 0
+        self.DoParsimonyAndGroup = 0
+        self.TUtils = TrieUtils.TrieUtils()
+
+
+        self.Peptide2ProteinID = {} #Peptide sequence -> (TrieIndex,ProteinIDS)
+        self.Peptide2SpectralCount = {} #Peptide sequence -> Spectral Count
+        self.Protein2Peptides = {} # (TrieIndex,ProteinID) -> Peptide sequences
+        #self.ProteinNames = {} #(TrieIndex,ProteinID) -> ProteinName
+
+        #Populated only if parsimony
+        self.SelectedProteins = {}
+        self.HeaderLine = None
+        
+
+    def Main(self):
+
+        #Load peptides
+        for FileName in self.InputFiles:
+            print "Loading peptides from %s"%FileName
+            self.LoadPeptides(FileName)
+        if self.DoParsimony == 1 or self.DoParsimonyAndGroup == 1:
+            self.ChoosePeptides()
+        for FileName in self.InputFiles:
+            self.WritePeptides(FileName)
+            
+
+    #We assume that if multiple peptides are listed for a spectrum, then they appear together in the file and in decreasing order of
+    #confidence
+    def LoadPeptides(self,FileName):
+        #print "Loading peptides!!"
+        #raw_input()
+        LocalDebug = 1
+
+        RawFileName = os.path.basename(FileName)
+        
+        File = open(FileName,'r')
+        #OutputFile = os.path.join(self.OutputDir,RawFileName)
+        #OutFile = open(OutputFile ,'w')
+        PeptideList = {}
+        
+        PrevSpectrum = None
+        
+        
+        for Line in File:
+            Line = Line.strip()
+            if Line == "":
+                continue
+            if Line[0] == "#":
+                self.HeaderLine = Line
+                continue
+            
+            if LocalDebug:
+                print Line
+            
+            Bits = Line.split("\t")
+            ModdedPeptide = Bits[self.Columns.getIndex("Annotation")]
+            Peptide = Utils.GetPeptideFromModdedName(ModdedPeptide).Aminos
+
+            #if Peptide == "YGPLLDLPELPFPELER":
+            #    LocalDebug = 1
+            #else:
+            #    LocalDebug = 0
+            CurrSpectrum = (Bits[0],Bits[1])
+            if CurrSpectrum == PrevSpectrum:
+                continue
+            PrevSpectrum = CurrSpectrum
+
+            #Update the spectral count for this peptide
+            if not self.Peptide2SpectralCount.has_key(Peptide):
+                self.Peptide2SpectralCount[Peptide] = 0
+            self.Peptide2SpectralCount[Peptide] += 1
+
+            if LocalDebug:
+                print self.Peptide2SpectralCount[Peptide]
+                #raw_input()
+
+            if self.Peptide2ProteinID.has_key(Peptide):
+                if LocalDebug:
+                    print "Already searched for peptide %s"%Peptide
+                    raw_input()
+                continue
+                
+            else:
+                PeptideList[Peptide] = 1
+                if LocalDebug:
+                    print "Searching %s for the first time!"%Peptide
+
+
+                #See if we've reached enough peptides to search
+                if len(PeptideList.keys()) >= TrieUtils.MIN_TRIE_SEARCH_SIZE:
+                    if LocalDebug:
+                        print "Reached %s peptides to search"%(len(PeptideList.keys()))
+                        #raw_input()
+                    
+                    #Loop through each trie file that we have
+                    for TrieIndex in range(0,len(self.TrieFiles)):
+                        if LocalDebug:
+                            print "Searching trieDB: %s"%(self.TrieFiles[TrieIndex])
+                        Locations = self.TUtils.GetAllLocations(PeptideList.keys(),self.TrieFiles[TrieIndex])
+                        if LocalDebug:
+                            print "Finished searching"
+                        #For each peptide that we searched, add it's locations and proteins
+                        for Pep in PeptideList.keys():
+                            #if Pep == "AAGARPLTSPESLSR" or Pep == "GFFDPNTHENLTYLQLLR" or Pep == "EMAVPDVHLPDVQLPK" or Pep == "YGPLLDLPELPFPELER":
+                            #    LocalDebug = 1
+                            #else:
+                            #    LocalDebug = 0
+                            if LocalDebug:
+                                print Pep
+                                print "Total Locations: %s"%(len(Locations[Pep]))
+                            if len(Locations[Pep]) == 0:
+                                print "No locations found for %s"%Pep
+                                continue
+                            if not self.Peptide2ProteinID.has_key(Pep):
+                                self.Peptide2ProteinID[Pep] = []
+                            for (ID,Res) in Locations[Pep]:
+
+                                if LocalDebug:
+                                    print "%s appear in %s at pos %s"%(Pep,ID,Res)
+                                #Get the protein name
+                                #if not self.ProteinNames.has_key((TrieIndex,ID)):
+                                ProteinName = self.TUtils.GetProteinName(self.IndexFiles[TrieIndex],ID)
+                                    
+                                #else:
+                                #    ProteinName = self.ProteinNames[(TrieIndex,ID)]
+                                if LocalDebug:
+                                    print "Hit to protine %s"%ProteinName
+                                if ProteinName[0:3] == "XXX": #Skip hits to reverse DB
+                                    continue
+                                if ProteinName.find(DELIM) >= 0:
+                                    print "ERROR: Protein %s contains delim %s"%(ProteinName,DELIM)
+                                    sys.exit(0)
+                                #self.ProteinNames[(TrieIndex,ID)] = ProteinName
+
+                                if self.Peptide2ProteinID[Pep].count((TrieIndex,ID)) == 0:
+                                    self.Peptide2ProteinID[Pep].append((TrieIndex,ID))
+                               
+                                if not self.Protein2Peptides.has_key((TrieIndex,ID)):
+                                    self.Protein2Peptides[(TrieIndex,ID)] = []
+                                if self.Protein2Peptides[(TrieIndex,ID)].count(Pep) == 0:
+                                    self.Protein2Peptides[(TrieIndex,ID)].append(Pep)
+
+                    PeptideList = {}
+        if len(PeptideList.keys()) > 0:
+            if LocalDebug:
+                print "Reached %s peptides to search"%(len(PeptideList.keys()))
+                
+            for TrieIndex in range(0,len(self.TrieFiles)):
+                Locations = self.TUtils.GetAllLocations(PeptideList.keys(),self.TrieFiles[TrieIndex])
+                    
+                for Pep in PeptideList.keys():
+                    if LocalDebug:
+                        print Pep
+                        
+                    if len(Locations[Pep]) == 0:
+                        print "No locations found for %s"%Pep
+                        continue
+                    if not self.Peptide2ProteinID.has_key(Pep):
+                        self.Peptide2ProteinID[Pep] = []
+
+                    for (ID,Res) in Locations[Pep]:
+
+                        #Get the protein name
+                        #if not self.ProteinNames.has_key((TrieIndex,ID)):
+                        ProteinName = self.TUtils.GetProteinName(self.IndexFiles[TrieIndex],ID)
+
+                        #else:
+                        #    ProteinName = self.ProteinNames[(TrieIndex,ID)]
+
+                        if ProteinName[0:3] == "XXX": #Skip hits to reverse DB
+                            continue
+                        if ProteinName.find(DELIM) >= 0:
+                            print "ERROR: Protein %s contains delim %s"%(ProteinName,DELIM)
+                            sys.exit(0)
+                        #self.ProteinNames[(TrieIndex,ID)] = ProteinName
+
+                        if self.Peptide2ProteinID[Pep].count((TrieIndex,ID)) == 0:
+                            self.Peptide2ProteinID[Pep].append((TrieIndex,ID))
+                        
+                        if not self.Protein2Peptides.has_key((TrieIndex,ID)):
+                            self.Protein2Peptides[(TrieIndex,ID)] = []
+                        if self.Protein2Peptides[(TrieIndex,ID)].count(Pep) == 0:
+                            self.Protein2Peptides[(TrieIndex,ID)].append(Pep)
+                        
+
+                        
+                   
+            
+        File.close()
+        
+
+    def ChoosePeptides(self):
+        
+        LocalDebug = 1
+        if LocalDebug:
+            print "Total peptides: %s"%(len(self.Peptide2ProteinID.keys()))
+            print "Total protiens: %s"%(len(self.Protein2Peptides.keys()))
+
+
+        ProteinCounts = {}
+        for (TrieIndex,ID) in self.Protein2Peptides.keys():
+            SpecCount = 0
+            for Pep in self.Protein2Peptides[(TrieIndex,ID)]:
+                SpecCount += self.Peptide2SpectralCount[Pep]
+            ProteinCounts[(TrieIndex,ID)] = (len(self.Protein2Peptides[(TrieIndex,ID)]),SpecCount)
+
+        self.SelectedProteins = {}
+        self.FinalPeptideProteins = {} #peptide -> final protein selection
+        
+        while (1):
+            BestCandidate = None
+            BestScore = None
+            BestProteinName = None
+            #Find the next best protein (best = most peptides)
+            for (TrieIndex,ProteinID) in ProteinCounts.keys():
+
+                #We've already added this guy
+                if self.SelectedProteins.has_key((TrieIndex,ProteinID)):
+                    continue
+                Score = ProteinCounts[(TrieIndex,ProteinID)]
+                CurrProteinName = self.TUtils.GetProteinName(self.IndexFiles[TrieIndex],ProteinID)
+
+                if Score > BestScore or BestScore == None or (Score == BestScore and CurrProteinName < BestProteinName):
+                    BestScore = Score
+                    BestCandidate = (TrieIndex,ProteinID)
+                    BestProteinName = CurrProteinName
+                    #print "New Best %s, score %s"%(ProteinID,BestScore)
+            if not BestScore:
+                break
+            (PeptideCount, SpectrumCount) = BestScore
+            if PeptideCount == 0:
+                break
+            #%%%
+            ProteinName = BestProteinName
+            print "Accept protein %s (%s)\n  Gets %s peptides, %s spectra"%(BestCandidate, ProteinName, PeptideCount, SpectrumCount)
+            self.SelectedProteins[BestCandidate] = BestScore
+            # Lay claim to all the (not-yet-claimed) peptides:
+            for Peptide in self.Protein2Peptides[BestCandidate]:
+                if LocalDebug:
+                    print "  Grab %s spectra from peptide %s"%(self.Peptide2SpectralCount[Peptide], Peptide)
+                self.FinalPeptideProteins[Peptide] = BestCandidate
+                # Other proteins (if not already accepted) lose a peptide, and some spectra:
+                for (OtherTrieIndex,OtherID) in self.Peptide2ProteinID[Peptide]:
+                    if self.SelectedProteins.has_key((OtherTrieIndex,OtherID)):
+                        continue
+                    #if LocalDebug:
+                    #    print "Removing spectra from other Protein %s/%s (%s)"%(OtherTrieIndex,OtherID,self.ProteinNames[(OtherTrieIndex,OtherID)])
+                    (pCount,sCount) = ProteinCounts[(OtherTrieIndex,OtherID)]
+
+                    if LocalDebug:
+                        print "Old counts: %s peptides %s spectra"%(pCount,sCount)
+                    self.Protein2Peptides[(OtherTrieIndex,OtherID)].remove(Peptide)
+                    pCount -= 1
+                    sCount -= self.Peptide2SpectralCount[Peptide]
+                    if LocalDebug:
+                        print "New counts: %s peptides %s spectra"%(pCount,sCount)
+                    ProteinCounts[(OtherTrieIndex, OtherID)] = (pCount,sCount)
+        # Sanity check - the selected proteins have peptides, the unselected proteins have 0
+        for Protein in self.Protein2Peptides.keys():
+            #ProteinName = self.ProteinNames[Protein]
+            PeptideCount = len(self.Protein2Peptides[Protein])
+            SpectrumCount = ProteinCounts.get(Protein, 0)
+            if self.SelectedProteins.has_key(Protein) and PeptideCount <= 0:
+                print "** Warning: Selected protein %s has %s peptides!"%(Protein, PeptideCount)
+            if not self.SelectedProteins.has_key(Protein) and PeptideCount != 0:
+                print "** Warning: Unelected protein %s has %s peptides!"%(Protein, PeptideCount)
+            
+        
+
+    def WritePeptides(self,FileName):
+        RawFileName = os.path.basename(FileName)
+        InputFile = open(FileName,'r')
+        OutputFile = os.path.join(self.OutputDir,RawFileName)
+        OutFile = open(OutputFile ,'w')
+        
+        MissCount = 0
+        LineCount = 0
+        for Line in InputFile:
+            Line = Line.strip()
+            if Line == "":
+                continue
+            if Line[0] == "#":
+                OutFile.write(Line + "\n")
+                continue
+            Bits = Line.split("\t")
+            ModdedPeptide = Bits[self.Columns.getIndex("Annotation")]
+            Peptide = Utils.GetPeptideFromModdedName(ModdedPeptide).Aminos
+
+
+            #See if we are doing parsimony
+            if self.DoParsimonyAndGroup == 1:
+                if not self.Peptide2ProteinID.has_key(Peptide) or len(self.Peptide2ProteinID[Peptide]) == 0:
+                    print "ERROR: Peptide %s of %s has no locations!!!"%(Peptide,Line)
+                    MissCount += 1
+                    continue
+
+                if not self.FinalPeptideProteins.has_key(Peptide):
+                   print "ERROR: Peptide %s of %s has no selected protein!!!"%(Peptide,Line)
+                   print "Formerly found in:"
+                   for (TrieIndex,ID) in self.Peptide2ProteinID[Peptide]:
+                       print "(%s,%s)"%(TrieIndex,ID)
+                   continue 
+                Protein = self.FinalPeptideProteins[Peptide]
+                (TrieIndex,ProtID) = Protein
+                
+                #Add other proteins
+
+                
+
+                Locations = self.Peptide2ProteinID[Peptide]
+                LocStr = self.TUtils.GetProteinName(self.IndexFiles[TrieIndex],ProtID)
+                for Prot in Locations:
+                    if Prot != Protein:
+                        (TIndex,PID) = Prot
+                        LocStr += DELIM + self.TUtils.GetProteinName(self.IndexFiles[TIndex],PID)
+                Bits[self.Columns.getIndex("Protein")] = LocStr
+
+            elif self.DoParsimony == 1:
+                if not self.FinalPeptideProteins.has_key(Peptide):
+                   print "ERROR: Peptide %s of %s has no selected protein!!!"%(Peptide,Line)
+                   MissCount += 1
+                   continue 
+                Protein = self.FinalPeptideProteins[Peptide]
+                (TrieIndex,ProtID) = Protein
+                Bits[self.Columns.getIndex("Protein")] = self.TUtils.GetProteinName(self.IndexFiles[TrieIndex],ProtID) 
+                Bits[self.Columns.getIndex("RecordNumber")] = str(Protein[1])
+            else:
+                if not self.Peptide2ProteinID.has_key(Peptide) or len(self.Peptide2ProteinID[Peptide]) == 0:
+                    print "ERROR: Peptide %s of %s has no locations!!!"%(Peptide,Line)
+                    MissCount += 1
+                    continue
+
+                Locations = self.Peptide2ProteinID[Peptide]
+                LocStr = ""
+                for Prot in Locations:
+                    (TIndex,PD) = Prot
+                    LocStr += self.TUtils.GetProteinName(self.IndexFiles[TIndex],PID) + DELIM
+                LocStr = LocStr[0:-1*len(DELIM)]
+                
+                Bits[self.Columns.getIndex("Protein")] = LocStr
+            Str = "\t".join(Bits)
+            OutFile.write("%s\n"%Str)
+            #print Str
+            LineCount += 1
+
+        print "Total peptides omitted: %s"%MissCount
+        print "Wrote %s lines to %s"%(LineCount,OutputFile)
+        OutFile.close()
+        InputFile.close()
+                    
+            
+
+    def ParseCommandLine(self,Arguments):
+        (Options,Args) = getopt.getopt(Arguments,"r:w:t:pa")
+        OptionsSeen = {}
+        for (Option,Value) in Options:
+            OptionsSeen[Option] = 1
+            
+            if Option == "-r":
+                if not os.path.exists(Value):
+                    print "ERROR: %s is not a valid file or directory"%Value
+                    sys.exit(0)
+                
+                if not os.path.isdir(Value):
+                    self.InputFiles = [Value]
+                
+                else:
+                    Files = os.listdir(Value)
+                    self.InputFiles = []
+                    for FileName in Files:
+                        self.InputFiles.append(os.path.join(Value,FileName))
+
+            elif Option == "-w":
+                if not os.path.exists(Value):
+                    os.makedirs(Value)
+                self.OutputDir = Value
+            elif Option == "-t":
+                if not os.path.isfile(Value):
+                    print "ERROR: %s is not a valid database file"%Value
+                    sys.exit(0)
+
+                IndexFileName = os.path.splitext(Value)[0] + ".index"
+                if not os.path.isfile(IndexFileName):
+                    print "ERROR: Unable to find index file %s for trie file %s"%(IndexFileName,Value)
+                    sys.ext(0)
+                self.TrieFiles.append(Value)
+                self.IndexFiles.append(IndexFileName)
+            elif Option == "-p":
+                self.DoParsimony = 1
+            elif Option == "-a":
+                self.DoParsimonyAndGroup = 1
+
+            else:
+                print "ERROR %s is not a valid argument"%Option
+        
+        if not OptionsSeen.has_key("-r") or not OptionsSeen.has_key("-w") or not OptionsSeen.has_key("-t"):
+            print "ERROR: Missing arguments"
+            print UsageInfo
+            sys.exit(0)
+
+
+if __name__ == "__main__":
+    Grouper = ProteinGrouper()
+    Grouper.ParseCommandLine(sys.argv[1:])
+    Grouper.Main()
diff --git a/PyInspect.pyd b/PyInspect.pyd
new file mode 100644
index 0000000..bfe8d19
Binary files /dev/null and b/PyInspect.pyd differ
diff --git a/PyInspect/PyInspect.c b/PyInspect/PyInspect.c
new file mode 100644
index 0000000..e05aa65
--- /dev/null
+++ b/PyInspect/PyInspect.c
@@ -0,0 +1,661 @@
+//Title:          PyInspect.c
+//Authors:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include "Python.h"
+#include "Trie.h"
+#include "Score.h"
+#include "PySpectrum.h"
+#include "BN.h"
+#include "PyUtils.h"
+#include "Errors.h"
+#include "FreeMod.h"
+#include "IonScoring.h"
+#include "SVM.h"
+#include "LDA.h"
+#include "ParseInput.h"
+#include "ParseXML.h"
+#include "TagFile.h"
+
+PyObject* InspectError;
+PRMBayesianModel* InteractiveBN = NULL;
+
+PyObject* PyResetIonScoring(PyObject* self, PyObject* args)
+{
+    int IntensityScheme;
+    float IntensityRadius;
+    int CutFlag = 0;
+    int NoiseModelFlag = 0;
+    //
+    if (!PyArg_ParseTuple(args, "if|ii", &IntensityScheme, &IntensityRadius, &CutFlag, &NoiseModelFlag))
+    {
+        return NULL;
+    }
+    FreePRMBayesianModel(InteractiveBN);
+    InteractiveBN = (PRMBayesianModel*)calloc(1, sizeof(PRMBayesianModel));
+    InteractiveBN->NoiseModel = NoiseModelFlag;
+    InteractiveBN->IntensityScheme = IntensityScheme;
+    switch (InteractiveBN->IntensityScheme)
+    {
+    case 0:
+    case 1:
+    case 4:
+        InteractiveBN->MinIntensityLevel = 3; 
+        break;
+    case 2:
+    case 3:
+        InteractiveBN->MinIntensityLevel = 2; 
+        break;
+    default:
+        REPORT_ERROR(0);
+        break;
+    }
+    
+    InteractiveBN->IntensityRadius = (int)(IntensityRadius * DALTON);
+    InteractiveBN->HalfIntensityRadius = InteractiveBN->IntensityRadius / 2;
+    InteractiveBN->CutFlag = CutFlag;
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+PyObject* PyAddIonScoringNode(PyObject* self, PyObject* args)
+{
+    int NodeType;
+    int NodeInfoA;
+    char* Name;
+    int FragmentType = evFragmentTypeNone;
+    float NodeMassOffset;
+    if (!PyArg_ParseTuple(args, "sii|fi", &Name, &NodeType, &NodeInfoA, &NodeMassOffset, &FragmentType))
+    {
+        return NULL;
+    }
+    // Add a node:
+    AddPRMBayesianNode(InteractiveBN, Name, NodeType, NodeInfoA, NodeMassOffset, FragmentType);
+    // Return the node's index:
+    return PyInt_FromLong(InteractiveBN->NodeCount - 1);
+}
+
+PyObject* PySetIonScoringNodeParents(PyObject* self, PyObject* args)
+{
+    int NodeIndex;
+    PyObject* ParentIndexList;
+    PRMBayesianNode* Node;
+    PRMBayesianNode* Parent;
+    int OverallBlockSize;
+    int ParentIndex;
+    int OtherParentIndex;
+    //
+    if (!PyArg_ParseTuple(args, "iO", &NodeIndex, &ParentIndexList))
+    {
+        return NULL;
+    }
+    // Validate input:
+    if (NodeIndex < 0 || NodeIndex >= InteractiveBN->NodeCount)
+    {
+        sprintf(PythonErrorString, "Illegal node index %d in SetIonScoringNodeParents", NodeIndex);
+        ReportPythonError();
+        return NULL;
+    }
+    Node = InteractiveBN->Nodes[NodeIndex];
+    
+    // Free the OLD parents, if any:
+    SafeFree(Node->Parents);
+    SafeFree(Node->ParentBlocks);
+
+    // Set the parents of this node:
+    Node->ParentCount = PyList_Size(ParentIndexList);
+    if (Node->ParentCount)
+    {
+        Node->Parents = (PRMBayesianNode**)calloc(sizeof(PRMBayesianNode*), Node->ParentCount);
+        Node->ParentBlocks = (int*)calloc(sizeof(int), Node->ParentCount);
+    }
+    for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+    {
+        Parent = InteractiveBN->Nodes[PyInt_AsLong(PyList_GetItem(ParentIndexList, ParentIndex))];
+        Node->Parents[ParentIndex] = Parent;
+    }
+    // Set the parent block sizes.  Node->ParentBlocks[n] is the total number of combinations
+    // for values of parents n+1 and beyond (or 1, if n is ParentCount - 1).  When indexing into
+    // the probability tables, we use these blocks.
+    for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+    {
+        OverallBlockSize = Node->ValueCount;
+        for (OtherParentIndex = ParentIndex + 1; OtherParentIndex < Node->ParentCount; OtherParentIndex++)
+        {
+            OverallBlockSize *= Node->Parents[OtherParentIndex]->ValueCount;
+        }
+        Node->ParentBlocks[ParentIndex] = OverallBlockSize;
+        Parent = Node->Parents[ParentIndex];
+        OverallBlockSize /= Parent->ValueCount;
+    }
+    // Allocate probability tables:
+    Node->TableSize = Node->ValueCount;
+    if (Node->ParentCount)
+    {
+        Node->TableSize = (Node->Parents[0]->ValueCount * Node->ParentBlocks[0]);
+    }
+    SafeFree(Node->CountTable);
+    Node->CountTable = (int*)calloc(Node->TableSize, sizeof(int));
+    SafeFree(Node->ProbTable);
+    Node->ProbTable = (float*)calloc(Node->TableSize, sizeof(float));
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+void TrainNoiseModelRandomMasses(PRMBayesianModel* Model, MSSpectrum* Spectrum)
+{
+    int Bin;
+    int BinIndex;
+    //
+    for (BinIndex = 0; BinIndex < 20; BinIndex++)
+    {
+        Bin = rand() % Spectrum->IntensityBinCount;
+        Model->RandomIntensityCounts[Spectrum->BinnedIntensityLevels[Bin]]++;
+    }
+}
+
+PyObject* PyTrainBNOnSpectrum(PyObject* self, PyObject* args)
+{
+    PySpectrum* SpectrumObject;
+    char* PeptideAnnotation;
+    Peptide* Match;
+    int PRM = 0;
+    int AminoCount;
+    int AminoIndex;
+    int ModIndex;
+    int NodeIndex;
+    int TableIndex;
+    int ParentIndex;
+    MSSpectrum* Spectrum;
+    PRMBayesianNode* Node;
+    //
+    if (!PyArg_ParseTuple(args, "Os", &SpectrumObject, &PeptideAnnotation))
+    {
+        return NULL;
+    }
+    Match = GetPeptideFromAnnotation(PeptideAnnotation);
+    if (!Match)
+    {
+        REPORT_ERROR(0);
+        return NULL;
+    }
+    Spectrum = SpectrumObject->Spectrum;
+    // Force the spectrum's parent mass to match the right parent mass:
+    Spectrum->ParentMass = Match->ParentMass;
+    PrepareSpectrumForIonScoring(InteractiveBN, Spectrum, 1);
+    AminoCount = strlen(Match->Bases);
+    for (NodeIndex = 0, Node = InteractiveBN->Head; Node; NodeIndex++, Node = Node->Next)
+    {
+        PRM = 0;
+        for (AminoIndex = 0; AminoIndex <= AminoCount; AminoIndex++)
+        {
+            ///////////////////////////////////////////////////////////////////////////////////////
+            // Set values, and accumulate table entries:
+            Node->Values[AminoIndex] = IonScoringGetNodeValue(InteractiveBN, Node, Spectrum, PRM, Match, AminoIndex);
+            ///////////////////////////////////////////////////////////////////////////////////////
+            // Accumulate PRM from the prefix so far:
+            if (AminoIndex == AminoCount)
+            {
+                break;
+            }
+            PRM += PeptideMass[Match->Bases[AminoIndex]];
+            for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+            {
+                if (Match->AminoIndex[ModIndex] == AminoIndex)
+                {
+                    PRM += Match->ModType[ModIndex]->RealDelta;
+                }
+            }
+        } // Amino loop
+    } // NodeIndex loop
+
+    // Iterate over the values arrays, accumulate counts in the frequency tables:
+    for (NodeIndex = 0; NodeIndex < InteractiveBN->NodeCount; NodeIndex++)
+    {
+        for (AminoIndex = 0; AminoIndex <= AminoCount; AminoIndex++)
+        {
+            Node = InteractiveBN->Nodes[NodeIndex];
+            TableIndex = 0;
+            for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+            {
+                TableIndex += Node->Parents[ParentIndex]->Values[AminoIndex] * Node->ParentBlocks[ParentIndex];
+            }
+            TableIndex += Node->Values[AminoIndex];
+            if (TableIndex >= Node->TableSize)
+            {
+                // Panic!
+                REPORT_ERROR(0);
+                TableIndex = 0;
+            }
+            Node->CountTable[TableIndex]++;
+        }
+    }
+
+    // And, count how frequent the various intensity levels are for a random mass:
+    TrainNoiseModelRandomMasses(InteractiveBN, Spectrum);
+
+    // Cleanup:
+    FreePeptideNode(Match);
+
+    // Return:
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+PyObject* PyDebugPrintPRMBayesianModel(PyObject* self, PyObject* args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+    {
+        return NULL;
+    }
+
+    DebugPrintPRMBayesianModel(InteractiveBN);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+// Save InteractiveBN to disk:
+PyObject* PySaveBNModel(PyObject* self, PyObject* args)
+{
+    char* FileName;
+    if (!PyArg_ParseTuple(args, "s", &FileName))
+    {
+        return NULL;
+    }
+
+    SavePRMBayesianModel(InteractiveBN, FileName);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+// Load InteractiveBN from file; return node count
+PyObject* PyLoadBNModel(PyObject* self, PyObject* args)
+{
+    char* FileName;
+    if (!PyArg_ParseTuple(args, "s", &FileName))
+    {
+        return NULL;
+    }
+
+    FreePRMBayesianModel(InteractiveBN);
+    InteractiveBN = LoadPRMBayesianModel(FileName);
+    if (!InteractiveBN)
+    {
+        return PyInt_FromLong(-1);
+    }
+    return PyInt_FromLong(InteractiveBN->NodeCount);
+}
+
+// Convert the COUNT tables of the bayesian network into PROBABILITY tables.
+PyObject* PyComputeBNProbabilityTables(PyObject* self, PyObject* args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+    {
+        return NULL;
+    }
+    ComputePRMBayesianModelProbabilityTables(InteractiveBN, 1);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+PyObject* PyComputeMutualInformation(PyObject* self, PyObject* args)
+{
+    PyObject* ReturnList;
+    PyObject* NodeEntropyList;
+    PRMBayesianNode* Node;
+    PRMBayesianNode* Parent;
+    float EntropySum[256];
+    int TableIndex;
+    float Entropy;
+    float JointEntropy;
+    int ParentIndex;
+    int TempIndex;
+    int ParentValue;
+    float Probability;
+    int Value;
+    int ValueIndex;
+    int NodeIndex;
+    float NodeEntropy[512];
+    float MutualInformation;
+    int FullTableCount;
+    if (!PyArg_ParseTuple(args, ""))
+    {
+        return NULL;
+    }
+
+    ReturnList = PyList_New(0);
+
+    // Compute the entropy of each node:
+    for (NodeIndex = 0, Node = InteractiveBN->Head; Node; Node = Node->Next, NodeIndex++)
+    {
+        ////////////////////////////////////////////////////////////////
+        // Compute the node's entropy:
+        memset(EntropySum, 0, sizeof(float) * 256);
+        FullTableCount = 0;
+        for (TableIndex = 0; TableIndex < Node->TableSize; TableIndex++)
+        {
+            FullTableCount += Node->CountTable[TableIndex];
+        }
+        for (TableIndex = 0; TableIndex < Node->TableSize; TableIndex++)
+        {
+            Probability = Node->CountTable[TableIndex] / (float)FullTableCount;
+            Value = TableIndex % Node->ValueCount;
+            EntropySum[Value] += Probability; 
+        }
+        Entropy = 0;
+        for (ValueIndex = 0; ValueIndex < Node->ValueCount; ValueIndex++)
+        {
+            printf("Node %d %s value %d: Odds %.6f\n", NodeIndex, Node->Name, ValueIndex, EntropySum[ValueIndex]);
+            if (EntropySum[ValueIndex] > 0.0)
+            {
+                Entropy -= EntropySum[ValueIndex] * (float)log(EntropySum[ValueIndex]);
+            }
+        }
+        NodeEntropy[NodeIndex] = Entropy;
+    }
+    for (NodeIndex = 0, Node = InteractiveBN->Head; Node; Node = Node->Next, NodeIndex++)
+    {
+        NodeEntropyList = PyList_New(0);
+        PyList_Append(ReturnList, NodeEntropyList);
+        PyList_Append(NodeEntropyList, PyInt_FromLong(NodeIndex));
+        PyList_Append(NodeEntropyList, PyString_FromString(Node->Name));
+        PyList_Append(NodeEntropyList, PyFloat_FromDouble(NodeEntropy[NodeIndex]));
+        ////////////////////////////////////////////////////////////////
+        // Compute the node's joint entropy with each parent:
+        for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+        {
+            // EntropySum[ParentValue*Block+Value] = probability that parent has ParentValue
+            // and node has Value
+            Parent = Node->Parents[ParentIndex];
+            memset(EntropySum, 0, sizeof(float) * 256);
+            for (TableIndex = 0; TableIndex < Node->TableSize; TableIndex++)
+            {
+                TempIndex = TableIndex;
+                if (ParentIndex)
+                {
+                    TempIndex = TempIndex % Node->ParentBlocks[ParentIndex - 1];
+                }
+                ParentValue = TempIndex / Node->ParentBlocks[ParentIndex];
+                Probability = Node->CountTable[TableIndex] / (float)FullTableCount;
+                //Probability = (float)exp(Node->ProbTable[TableIndex]);
+                Value = TableIndex % Node->ValueCount;
+                EntropySum[ParentValue * Node->ValueCount + Value] += Probability; 
+            }
+            JointEntropy = 0;
+            for (ValueIndex = 0; ValueIndex < (Node->ValueCount * Parent->ValueCount); ValueIndex++)
+            {
+                ParentValue = ValueIndex / Node->ValueCount;
+                printf("Node %d %s value %d and parent (%d %s) has value %d: Odds %.6f\n", NodeIndex, 
+                    Node->Name, 
+                    ValueIndex % Node->ValueCount, Parent->Index, Parent->Name, ParentValue, EntropySum[ValueIndex]);
+                if (EntropySum[ValueIndex] > 0.0)
+                {
+                    JointEntropy -= EntropySum[ValueIndex] * (float)log(EntropySum[ValueIndex]);
+                }
+            }
+            MutualInformation = (NodeEntropy[NodeIndex] + NodeEntropy[Parent->Index] - JointEntropy);
+            printf("Node %d(%s) parent %d(%s):\n", NodeIndex, Node->Name, Parent->Index, Parent->Name);
+            printf("  Child entropy %.6f, parent entropy %.6f\n", NodeEntropy[NodeIndex], NodeEntropy[Parent->Index]);
+            printf("  Joint entropy: %.6f\n", JointEntropy);
+            printf("  Mutual information: %.6f\n", MutualInformation);
+            printf("  Conditional entropy (Child|Parent): %.6f\n", JointEntropy - NodeEntropy[Parent->Index]);
+            printf("  Normalized MI: %.6f\n", MutualInformation / NodeEntropy[NodeIndex]);
+            PyList_Append(NodeEntropyList, PyFloat_FromDouble(MutualInformation / NodeEntropy[NodeIndex]));
+        }
+        Py_DECREF(NodeEntropyList);
+    }
+    return ReturnList;
+}
+
+PyObject* PyGetBNFeatureNames(PyObject* self, PyObject* args)
+{
+    PyObject* ReturnList;
+    PRMBayesianNode* Node;
+    //
+    if (!PyArg_ParseTuple(args, ""))
+    {
+        return NULL;
+    }
+    ReturnList = PyList_New(0);
+    for (Node = InteractiveBN->Head; Node; Node = Node->Next)
+    {
+        PyList_Append(ReturnList, PyString_FromString(Node->Name));
+    }
+    return ReturnList;
+}
+
+PyObject* PyComputeBNValuesForSpectrum(PyObject* self, PyObject* args)
+{
+    PySpectrum* SpectrumObject;
+    char* PeptideAnnotation;
+    Peptide* Match;
+    int PRM = 0;
+    int AminoCount;
+    int AminoIndex;
+    int ModIndex;
+    int NodeIndex;
+    MSSpectrum* Spectrum;
+    PRMBayesianNode* Node;
+    PyObject* ReturnList;
+    PyObject* NodeValueList;
+    //
+    if (!PyArg_ParseTuple(args, "Os", &SpectrumObject, &PeptideAnnotation))
+    {
+        return NULL;
+    }
+    Match = GetPeptideFromAnnotation(PeptideAnnotation);
+    if (!Match)
+    {
+        REPORT_ERROR(0);
+        return NULL;
+    }
+    Spectrum = SpectrumObject->Spectrum;
+    // Force the spectrum's parent mass to match the right parent mass:
+    Spectrum->ParentMass = Match->ParentMass;
+    PrepareSpectrumForIonScoring(InteractiveBN, Spectrum, 1);
+    AminoCount = strlen(Match->Bases);
+    for (NodeIndex = 0, Node = InteractiveBN->Head; Node; NodeIndex++, Node = Node->Next)
+    {
+        PRM = 0;
+        for (AminoIndex = 0; AminoIndex <= AminoCount; AminoIndex++)
+        {
+            ///////////////////////////////////////////////////////////////////////////////////////
+            // Set values, and accumulate table entries:
+            Node->Values[AminoIndex] = IonScoringGetNodeValue(InteractiveBN, Node, Spectrum, PRM, Match, AminoIndex);
+            ///////////////////////////////////////////////////////////////////////////////////////
+            // Add to PRM:
+            if (AminoIndex == AminoCount)
+            {
+                break;
+            }
+            PRM += PeptideMass[Match->Bases[AminoIndex]];
+            for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+            {
+                if (Match->AminoIndex[ModIndex] == AminoIndex)
+                {
+                    PRM += Match->ModType[ModIndex]->RealDelta;
+                }
+            }
+        } // Amino loop
+    } // NodeIndex loop
+
+    // Iterate over the values arrays, building the return-list.
+    ReturnList = PyList_New(0);
+    for (AminoIndex = 0; AminoIndex <= AminoCount; AminoIndex++)
+    {
+        NodeValueList = PyList_New(0);
+        PyList_Append(ReturnList, NodeValueList);
+        for (NodeIndex = 0; NodeIndex < InteractiveBN->NodeCount; NodeIndex++)
+        {
+            Node = InteractiveBN->Nodes[NodeIndex];
+            PyList_Append(NodeValueList, PyInt_FromLong(Node->Values[AminoIndex]));
+            //Py_DECREF(NodeValueList);
+        }
+    }
+
+    // Cleanup:
+    FreePeptideNode(Match);
+
+    // Return:
+    return ReturnList;
+}
+
+PyObject* PyFinishIonScoringNetwork(PyObject* self, PyObject* args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+    {
+        return NULL;
+    }
+
+    // Perform any activities necessary to finalizing InteractiveBN:
+    BuildModelFlankList(InteractiveBN);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+PyObject* PyReloadPMC(PyObject* self, PyObject* args)
+{
+    //
+    if (!PyArg_ParseTuple(args, "|"))
+    {
+        return NULL;
+    }
+
+    // Reload parent mass correction and charge-correction models:
+
+#ifdef PMC_USE_SVM
+    LoadPMCSVM(1);
+#else
+    LoadPMCLDA(1);
+#endif    
+
+#ifdef CC_USE_SVM
+    LoadCCModelSVM(1);
+#else
+    LoadCCModelLDA(1);
+#endif
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+static PyMethodDef PyInspectMethods[] = 
+{
+    {"ResetIonScoring", PyResetIonScoring, 1, "Reset the ion scoring network"},
+    {"AddIonScoringNode", PyAddIonScoringNode, 1, "Add a node to the ion scoring network"},
+    {"SetIonScoringNodeParents", PySetIonScoringNodeParents, 1, "Set the parent(s) of an ion scoring node"},
+    {"FinishIonScoringNetwork", PyFinishIonScoringNetwork, 1, "Finalize an ion scoring network"},
+    {"TrainBNOnSpectrum", PyTrainBNOnSpectrum, 1, "Accumulate counts for network nodes, given a spectrum and peptide"},
+    {"DebugPrintBNModel", PyDebugPrintPRMBayesianModel, 1, "Debug print"},
+    {"SaveBNModel", PySaveBNModel, 1, "Save model to a binary file"},
+    {"LoadBNModel", PyLoadBNModel, 1, "Load from binary file (as written by SaveBNModel)"},
+    {"ComputeBNProbabilityTables", PyComputeBNProbabilityTables, 1, "Compute probability tables for a BNModel"},
+    {"ComputeBNValuesForSpectrum", PyComputeBNValuesForSpectrum, 1, "Compute values for nodes in the BNModel"},
+    {"GetBNFeatureNames", PyGetBNFeatureNames, 1, "Return a list of names of nodes in the bayesian network"},
+    {"ComputeMutualInformation", PyComputeMutualInformation, 1, "Compute MutualInformation for nodes and their parents"},
+    {"ReloadPMC", PyReloadPMC, 1, "Reset PMC / CC models"},
+    //{"erf", PyErrorFunction, METH_VARARGS, "return the error function erf(x)"},
+    //{"GammaIncomplete", PyGammaIncomplete, METH_VARARGS, "return the incomplete gamma function g(a, x)"},
+    //{"foo", ex_foo, 1, "foo() doc string"},
+    {NULL, NULL}
+};
+
+// Cleanup, called by Python when unloading.  Deallocate memory:
+void PyInspectCleanup(void)
+{
+    FreeMassDeltaByMass();
+    FreeMassDeltas();
+    FreeIsSubDecoration();
+    //FreeTaggingModel();
+    FreeJumpingHash();
+    FreeSVMModels();
+    FreeBayesianModels();
+    FreeLDAModels();
+    FreePRMBayesianModel(InteractiveBN);
+    InteractiveBN = NULL;
+    SafeFree(GlobalOptions);
+    GlobalOptions = NULL;
+    FreeCCModelSVM();
+    FreeTagSkewScores();
+    SafeFree(MassDeltaByIndex);
+    MassDeltaByIndex = NULL;
+    FreeMZXMLParseCursor();
+    FreeMZDataParseCursor();
+
+}
+
+PyMODINIT_FUNC initPyInspect(void)
+{
+    PyObject* Module;
+    ////////////////////
+    PySpectrumType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&PySpectrumType) < 0)
+    {
+        return;
+    }
+    Module = Py_InitModule("PyInspect", PyInspectMethods);
+
+    // Add the Error object:
+    InspectError = PyErr_NewException("Inspect.error", NULL, NULL);
+    Py_INCREF(InspectError);
+    PyModule_AddObject(Module, "error", InspectError);
+    InitErrors();
+
+    // Add the Spectrum object:
+    PyModule_AddObject(Module, "Spectrum", (PyObject *)&PySpectrumType);
+
+    // Create an ion scoring network, for interactive use:
+    InteractiveBN = (PRMBayesianModel*)calloc(1, sizeof(PRMBayesianModel));
+
+    // Perform some standard loading here, like amino acid masses.
+    AllocMassDeltaByIndex();
+    InitOptions();
+    sprintf(GlobalOptions->ResourceDir, ".%c", SEPARATOR);
+    LoadPeptideMasses(NULL);
+    PeptideMass['C'] += 57000; // ASSUMED: All cysteines carry the +57 modification.
+    LoadMassDeltas(NULL, 0);
+    InitBayesianModels();
+    SetTagSkewScores();
+    //LoadFlankingAminoEffects();
+    //LoadCCModel();
+#ifdef MQSCORE_USE_SVM
+    InitPValueSVM();
+#else
+    InitPValueLDA();
+#endif
+    PopulateJumpingHash();
+    
+    // Set the blind-flag to TRUE so that modified peptides 
+    // incur a score-penalty:
+    GlobalOptions->RunMode |= RUN_MODE_BLIND;
+    // Register our cleanup function to run at exit:
+    Py_AtExit(PyInspectCleanup);
+}
diff --git a/PyInspect/PySpectrum.c b/PyInspect/PySpectrum.c
new file mode 100644
index 0000000..6e650a8
--- /dev/null
+++ b/PyInspect/PySpectrum.c
@@ -0,0 +1,1265 @@
+//Title:          PySpectrum.c
+//Authors:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+// PySpectrum: Python wrapper for an MSSpectrum object.
+#include "CMemLeak.h"
+#include "PySpectrum.h"
+#include "PyUtils.h"
+#include "Trie.h"
+#include "Tagger.h"
+#include "Score.h"
+#include "Mods.h"
+#include "CMemLeak.h"
+#include "FreeMod.h"
+#include "BN.h"
+#include "ChargeState.h"
+#include "Scorpion.h"
+#include "SVM.h"
+#include "IonScoring.h"
+#include "Errors.h"
+#include "TagFile.h"
+
+// Important note: These type objects MUST be defined here in the .c file, not in the header! 
+// Otherwise, a copy is built for each including file, and these copies are not 
+// updated during module initialization.  (The result is that MSPeak objects instantiated
+// from code don't have their members set right, so their attributes can't be accessed)
+PyTypeObject PySpectrumType = 
+{
+    PyObject_HEAD_INIT(NULL)
+    0, //ob_size
+    "PyInspect.PySpectrum",  //tp_name
+    sizeof(PySpectrum),  //tp_basicsize
+    0,                         //tp_itemsize
+    PySpectrumDealloc,                         //tp_dealloc
+    0,                         //tp_print
+    0,                         //tp_getattr
+    0,                         //tp_setattr
+    0,                         //tp_compare
+    0,                         //tp_repr
+    0,                         //tp_as_number
+    0,                         //tp_as_sequence
+    0,                         //tp_as_mapping
+    0,                         //tp_hash 
+    0,                         //tp_call
+    0,                         //tp_str
+    0,                         //tp_getattro
+    0,                         //tp_setattro
+    0,                         //tp_as_buffer
+    Py_TPFLAGS_DEFAULT,        //tp_flags
+    "MS spectrum",           // tp_doc 
+    0,                       // tp_traverse 
+    0,                       // tp_clear 
+    0,                       // tp_richcompare 
+    0,                       // tp_weaklistoffset 
+    0,                       // tp_iter 
+    0,                       // tp_iternext 
+    PySpectrumMethods,             // tp_methods 
+    PySpectrumMembers,             // tp_members 
+    PySpectrumGetSet,           // tp_getset 
+    0,                         // tp_base 
+    0,                         // tp_dict 
+    0,                         // tp_descr_get 
+    0,                         // tp_descr_set 
+    0,                         // tp_dictoffset 
+    (initproc)PySpectrumInit,      // tp_init 
+    0,                         // tp_alloc 
+    PySpectrumNew,                 // tp_new 
+};
+
+extern PRMBayesianModel* InteractiveBN; // lives in PyInspect.c
+
+TrieTag* TagGraphGenerateTags(TagGraph* Graph, MSSpectrum* Spectrum, int* TagCount, 
+    int MaximumTagCount, SpectrumTweak* Tweak, float TagEdgeScoreMultiplier,
+    PRMBayesianModel* Model);
+
+// __new__ method of PySpectrum; call this in C code to create new PySpectrum objects.
+// (It's expected that PySpectrumInit gets called too)
+PyObject* PySpectrumNew(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PySpectrum* self;
+    //
+    self = (PySpectrum*)type->tp_alloc(type, 0);
+    if (self != NULL) 
+    {
+        // Perform non-parameterized initialization here. 
+        //memset(self->NumberToMSIndex, -1, MAX_MS_SCAN);
+    }
+    return (PyObject*)self;
+}
+
+// Called when Inspect.Spectrum() is instantiated from Python code.
+// Parse the specified spectrum file!
+PyObject* PySpectrumInit(PySpectrum* self, PyObject* args, PyObject* kwds)
+{
+    char* FilePath;
+    int FilePosition = 0; // Default: byte offset 0
+    SpectrumNode* Node;
+    FILE* SpectrumFile;
+    int LoadResult;
+
+
+    //
+    // Constructor argument: the path to a mass-spec run output file
+    if (!PyArg_ParseTuple(args, "s|i", &FilePath, &FilePosition))
+    {
+        return (PyObject*)-1;
+    }
+    Node = (SpectrumNode*)calloc(1, sizeof(SpectrumNode));
+
+    Node->FilePosition = FilePosition;
+
+    Node->ScanNumber = 0;
+    Node->InputFile = (InputFileNode*)calloc(1, sizeof(InputFileNode));
+    strncpy(Node->InputFile->FileName, FilePath, MAX_FILENAME_LEN);
+    strncpy(self->FileName, FilePath, MAX_FILENAME_LEN);
+    // Guess the file format:
+    Node->InputFile->Format = GuessSpectrumFormatFromExtension(FilePath);
+    
+    Node->Spectrum = (MSSpectrum*)calloc(1, sizeof(MSSpectrum));
+    Node->Spectrum->Node = Node;
+
+    // Special case: If it's a .ms2 extension, it could be "colon format" or standard MS2:
+    if (Node->InputFile->Format == SPECTRUM_FORMAT_MS2_COLONS)
+    {
+        Node->InputFile->Format = GuessSpectrumFormatFromHeader(FilePath, Node->Spectrum);
+    }
+    SpectrumFile = fopen(FilePath, "rb");
+    if (!SpectrumFile)
+    {
+        sprintf(PythonErrorString, "** Error: Unable to open spectrum file '%s'\n", FilePath);
+        ReportPythonError();
+        // In an Init function, we must return -1 to indicate that an object can't
+        // be created.  (Normally we return NULL for failure!)
+        return (PyObject*)-1;
+    }
+
+    fseek(SpectrumFile, Node->FilePosition, 0);
+    LoadResult = SpectrumLoadFromFile(Node->Spectrum, SpectrumFile);
+    fclose(SpectrumFile);
+    if (!LoadResult)
+    {
+        sprintf(PythonErrorString, "** Error: Unable to parse spectrum from %s:%d\n", FilePath, Node->FilePosition);
+        ReportPythonError();
+        // In an Init function, we must return -1 to indicate that an object can't
+        // be created.  (Normally we return NULL for failure!)
+        return (PyObject*)-1;
+    }
+
+    WindowFilterPeaks(Node->Spectrum, 0, 0);
+
+    IntensityRankPeaks(Node->Spectrum);
+
+    self->Spectrum = Node->Spectrum;
+    TweakSpectrum(Node);
+
+    return 0;
+}
+
+int GuessCharge(SpectrumNode* Node, int MatchMass)
+{
+    int BestDiff = -1;
+    int BestCharge;
+    int Charge;
+    int Mass;
+    int Diff;
+    //
+    for (Charge = 1; Charge < 5; Charge++)
+    {
+        Mass = Node->Spectrum->MZ * Charge - (Charge - 1) * HYDROGEN_MASS;
+        Diff = abs(MatchMass - Mass);
+        if (BestDiff < 0 || Diff < BestDiff)
+        {
+            BestDiff = Diff;
+            BestCharge = Charge;
+        }
+    }
+    return BestCharge;
+}
+
+// Helper function for both LabelPeaks() and Score(); the code paths overlap
+PyObject* ScoreHelper(PySpectrum* self, char* PeptideString, int Charge, int VerboseFlag, int LabelPeaksFlag, int ReturnScoreDetails)
+{
+    Peptide* Match;
+    float MQScore;
+    SpectrumNode* Node;
+    int PeakIndex;
+    SpectralPeak* Peak;
+    PyObject* LabeledPeakList;
+    PyObject* LabeledPeakTuple;
+    PyListObject* List;
+    int FeatureIndex;
+    //
+
+    Node = self->Spectrum->Node;
+    Match = GetPeptideFromAnnotation(PeptideString);
+    if (!Match)
+    {
+        sprintf(PythonErrorString, "** Error: Unable to parse peptide annotation '%s'\n", PeptideString);
+        ReportPythonError();
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    if (!Charge)
+    {
+        Charge = GuessCharge(Node, Match->ParentMass);    
+    }
+    Node->Tweaks[0].ParentMass = Match->ParentMass;
+    Node->Tweaks[0].Charge = Charge;
+    Match->Tweak = Node->Tweaks;
+    Node->Spectrum->Charge = Charge;
+    GlobalOptions->DigestType = DIGEST_TYPE_TRYPSIN;
+    ComputeMQScoreFeatures(Node->Spectrum, Match, Match->ScoreFeatures, VerboseFlag);
+
+#ifdef MQSCORE_USE_SVM
+    MQScore = SVMComputeMQScore(Node->Spectrum, Match, Match->ScoreFeatures);
+#else
+    MQScore = LDAComputeMQScore(Node->Spectrum, Match, Match->ScoreFeatures);
+#endif
+
+    if (VerboseFlag)
+    {
+        // Print out the ion-type categorization of all peaks:
+        printf("\n");
+        printf("Score %s on spectrum %s:%d\n", PeptideString, Node->InputFile->FileName, Node->FilePosition);
+        for (PeakIndex = 0; PeakIndex < Node->Spectrum->PeakCount; PeakIndex++)
+        {
+            Peak = Node->Spectrum->Peaks + PeakIndex;
+            printf("%.2f\t%.2f\t%s\t%d\t\n", Peak->Mass / (float)MASS_SCALE, Peak->Intensity, GetFragmentTypeName(Peak->IonType), Peak->AminoIndex);
+        }
+    }
+    
+    if (LabelPeaksFlag)
+    {
+        // Return a list of peaks.
+        LabeledPeakList = PyList_New(0);
+        for (PeakIndex = 0; PeakIndex < Node->Spectrum->PeakCount; PeakIndex++)
+        {
+            Peak = Node->Spectrum->Peaks + PeakIndex;
+            LabeledPeakTuple = Py_BuildValue("ffsi", Peak->Mass / (float)MASS_SCALE, Peak->Intensity, GetFragmentTypeName(Peak->IonType), Peak->AminoIndex);
+            PyList_Append(LabeledPeakList, LabeledPeakTuple);
+            // The call to PyList_Append has incremented the refcount of the 
+            // tuple to 2.  We're abandoning our reference to the tuple now, 
+            // so we decrease its reference count:
+            Py_DECREF(LabeledPeakTuple);
+            LabeledPeakTuple = NULL; // just to be explicit about it!
+        }
+        FreePeptideNode(Match);
+        return LabeledPeakList;
+    }
+    else
+    {
+        if (ReturnScoreDetails)
+        {
+            List = (PyListObject*)PyList_New(0);
+            PyList_Append((PyObject*)List, PyFloat_FromDouble(MQScore));
+            for (FeatureIndex = 0; FeatureIndex < 7; FeatureIndex++)
+            {
+                PyList_Append((PyObject*)List, PyFloat_FromDouble(Match->ScoreFeatures[FeatureIndex]));
+            }
+            FreePeptideNode(Match);
+            //Py_DECREF(List); // Important to do this!
+            return (PyObject*)List;
+        }
+        else
+        {
+            FreePeptideNode(Match);
+            return PyFloat_FromDouble(MQScore);
+        }
+    }
+}
+
+PyObject* PySpectrumLabelPeaks(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    char* PeptideString;
+    int Charge = 0;
+    int VerboseFlag = 0;
+    // 
+    if (!PyArg_ParseTuple(args, "s|ii", &PeptideString, &Charge, &VerboseFlag))
+    {
+        return NULL;
+    }
+    return ScoreHelper(self, PeptideString, Charge, VerboseFlag, 1, 0);
+}
+
+PyObject* PySpectrumScore(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    char* PeptideString;
+    int Charge = 0;
+    int VerboseFlag = 0;
+    if (!PyArg_ParseTuple(args, "s|ii", &PeptideString, &Charge, &VerboseFlag))
+    {
+        return NULL;
+    }
+    return ScoreHelper(self, PeptideString, Charge, VerboseFlag, 0, 0);
+}
+
+PyObject* PySpectrumScoreDetailed(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    char* PeptideString;
+    int Charge = 0;
+    int VerboseFlag = 0;
+    if (!PyArg_ParseTuple(args, "s|ii", &PeptideString, &Charge, &VerboseFlag))
+    {
+        return NULL;
+    }
+    return ScoreHelper(self, PeptideString, Charge, VerboseFlag, 0, 1);
+}
+
+
+// Deallocate an PySpectrum object.  
+void PySpectrumDealloc(PyObject* selfobject)
+{
+    PySpectrum* self = (PySpectrum*)selfobject;
+    if (self)
+    {
+        if (self->Spectrum)
+        {
+            // The PySpectrum object wraps a Spectrum object, but also a SpectrumNode and an InputFileNode.  
+            // So, free those as well:
+            if (self->Spectrum->Node->InputFile)
+            {
+                free(self->Spectrum->Node->InputFile);
+                self->Spectrum->Node->InputFile = NULL;
+            }
+            if (self->Spectrum->Node)
+            {
+                FreeSpectrumNode(self->Spectrum->Node);
+            }
+            else
+            {
+                FreeSpectrum(self->Spectrum);
+            }
+            if (self->MatchList)
+            {
+                Py_DECREF(self->MatchList);
+            }
+        }
+        self->ob_type->tp_free((PyObject*)self);
+    }
+}
+
+PyObject* PySpectrumGetPeakCount(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    if (!PyArg_ParseTuple(args, ""))
+    {
+        return NULL;
+    }
+    return Py_BuildValue("i", self->Spectrum->PeakCount);
+}
+
+//PyObject* PySpectrumGetCharge(PySpectrum* self, PyObject* args, PyObject* kwargs)
+//{
+//    if (!PyArg_ParseTuple(args, ""))
+//    {
+//        return NULL;
+//    }
+//    return Py_BuildValue("i", self->Spectrum->Charge);
+//}
+
+
+// Run parent mass correction.  Return a list of tuples of the 
+// form (Mass, ModelScore).
+PyObject* PySpectrumCorrectParentMass(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    int CorrectChargeTemp = -1;
+    int PMToleranceTemp = -1;
+    int VerboseFlag = 0;
+    PMCSpectrumInfo* SpectrumInfo;
+    PyObject* FeatureTupleList;
+    PyObject* FeatureTuple;
+    PMCInfo* Info;
+    MSSpectrum* Spectrum = self->Spectrum;
+
+    //
+    if (!PyArg_ParseTuple(args, "|ii", &PMToleranceTemp, &CorrectChargeTemp))
+    {
+        return NULL;
+    }
+
+    SpectrumInfo = GetPMCSpectrumInfo(Spectrum);
+    PerformPMC(SpectrumInfo);
+    
+    FeatureTupleList = PyList_New(0);
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        FeatureTuple = PyTuple_New(2);
+        PyTuple_SetItem(FeatureTuple, 0, PyFloat_FromDouble(Info->ParentMass / (float)MASS_SCALE));
+        PyTuple_SetItem(FeatureTuple, 1, PyFloat_FromDouble(Info->SVMScore));
+        PyList_Append(FeatureTupleList, FeatureTuple);
+    }
+
+    //return ScoreHelper(self, PeptideString, Charge, VerboseFlag, 0, 0);
+    FreePMCSpectrumInfo(SpectrumInfo);
+    return FeatureTupleList;
+}
+
+// Function to assist in training and testing parent mass correction.  Given a spectrum,
+// compute its self-convolution features, and return them as a list of tuples.
+PyObject* PySpectrumGetPMCFeatures(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    PMCSpectrumInfo* SpectrumInfo;
+    PMCInfo* Info;
+    int PMCCount;
+    PyObject* FeatureTupleList;
+    PyObject* FeatureTuple;
+    int FeatureIndex;
+    int PMCFeatureCount = 64;
+    //
+
+    SpectrumInfo = GetPMCSpectrumInfo(self->Spectrum);
+    ComputePMCFeatures(SpectrumInfo);
+
+    // Count the PMCInfo nodes:
+    PMCCount = 0;
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        PMCCount++;
+    }
+    // Return a list of features.
+    FeatureTupleList = PyList_New(0);
+    for (Info = SpectrumInfo->Head; Info; Info = Info->Next)
+    {
+        FeatureTuple = PyTuple_New(PMCFeatureCount + 1);
+        PyTuple_SetItem(FeatureTuple, 0, PyFloat_FromDouble(Info->ParentMass / (float)MASS_SCALE));
+        for (FeatureIndex = 0; FeatureIndex < PMCFeatureCount; FeatureIndex++)
+        {
+            PyTuple_SetItem(FeatureTuple, FeatureIndex + 1, PyFloat_FromDouble(Info->Features[FeatureIndex]));
+        }
+        PyList_Append(FeatureTupleList, FeatureTuple);
+        Py_DECREF(FeatureTuple); // Important!
+    }
+    FreePMCSpectrumInfo(SpectrumInfo);
+    return FeatureTupleList;
+}
+
+// Function to assist in training and testing charge correction.  Given a spectrum,
+// computes and return the charge-correction features.
+PyObject* PySpectrumGetCCFeatures(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    int Charge1Flag = 0;
+    PMCSpectrumInfo* SpectrumInfo1;
+    PMCSpectrumInfo* SpectrumInfo2;
+    PMCSpectrumInfo* SpectrumInfo3;
+    float CCFeatures[64];
+    int FeatureIndex;
+    PyObject* FeatureTuple;
+    //
+    if (!PyArg_ParseTuple(args, "i", &Charge1Flag))
+    {
+        return NULL;
+    }
+    memset(CCFeatures, 0, sizeof(float) * 64);
+    /////////////////////////////////
+    // Charge 1 PMC:
+    self->Spectrum->Charge = 1;
+    self->Spectrum->ParentMass = (self->Spectrum->MZ * 1);
+    SpectrumInfo1 = GetPMCSpectrumInfo(self->Spectrum);
+    PerformPMC(SpectrumInfo1);
+    /////////////////////////////////
+    // Charge 2 PMC:
+    self->Spectrum->Charge = 2;
+    self->Spectrum->ParentMass = (self->Spectrum->MZ * 2) - HYDROGEN_MASS;
+    SpectrumInfo2 = GetPMCSpectrumInfo(self->Spectrum);
+    PerformPMC(SpectrumInfo2);
+    /////////////////////////////////
+    // Charge 3 PMC:
+    self->Spectrum->Charge = 3;
+    self->Spectrum->ParentMass = (self->Spectrum->MZ * 3) - 2 * HYDROGEN_MASS;
+    SpectrumInfo3 = GetPMCSpectrumInfo(self->Spectrum);
+    PerformPMC(SpectrumInfo3);
+    if (Charge1Flag == 1)
+    {
+        //////////////////////////////////
+        // Get features:
+        GetChargeCorrectionFeatures1(SpectrumInfo1, SpectrumInfo2, SpectrumInfo3, CCFeatures);
+    }
+    else
+    {
+        GetChargeCorrectionFeatures2(SpectrumInfo2, SpectrumInfo3, CCFeatures);
+    }
+    FeatureTuple = PyTuple_New(64);
+    for (FeatureIndex = 0; FeatureIndex < 64; FeatureIndex++)
+    {
+        PyTuple_SetItem(FeatureTuple, FeatureIndex, PyFloat_FromDouble(CCFeatures[FeatureIndex]));
+    }
+
+    FreePMCSpectrumInfo(SpectrumInfo1);
+    FreePMCSpectrumInfo(SpectrumInfo2);
+    FreePMCSpectrumInfo(SpectrumInfo3);
+    return FeatureTuple;
+}
+
+
+PyObject* PySpectrumSetCharge(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    int NewCharge;
+    if (!PyArg_ParseTuple(args, "i", &NewCharge))
+    {
+        return NULL;
+    }
+    self->Spectrum->Charge = NewCharge;
+    // Reset the parent mass, based upon the M/Z:
+    self->Spectrum->ParentMass = (self->Spectrum->MZ * NewCharge) - (NewCharge - 1) * HYDROGEN_MASS;
+    // Set tweaks:
+    TweakSpectrum(self->Spectrum->Node); //sam, comment out for phos pmc
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+PyObject* PySpectrumGetMZ(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    float MZ;
+    //
+    MZ = self->Spectrum->MZ / (float)MASS_SCALE;
+    return PyFloat_FromDouble(MZ);
+}
+
+PyObject* PySpectrumGetParentMass(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    float Mass;
+    //
+    Mass = self->Spectrum->ParentMass / (float)MASS_SCALE;
+    return PyFloat_FromDouble(Mass);
+}
+
+PyObject* PySpectrumSetParentMass(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    float NewParentMass;
+    int Charge = 0;
+    MSSpectrum* Spectrum = self->Spectrum;
+    //
+    if (!PyArg_ParseTuple(args, "f|i", &NewParentMass, &Charge))
+    {
+        return NULL;
+    }
+    Spectrum->ParentMass = (int)(NewParentMass * MASS_SCALE + 0.5);
+    if (Charge)
+    {
+        Spectrum->Charge = Charge;
+    }
+    if (Spectrum->Charge)
+    {
+        Spectrum->MZ = (Spectrum->ParentMass + (Spectrum->Charge - 1) * HYDROGEN_MASS) / Spectrum->Charge;
+    }
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+PyObject* PySpectrumBYConvolve(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    // If TriplyChargedFlag is true, then we're looking for pairs with one singly-charged
+    // peak and one doubly-charged peak.  Otherwise we're looking for two singly-charged peaks.
+    int TriplyChargedFlag = 0;
+    int Offset;
+    float FloatOffset = 1.0;
+    float Convolution = 0;
+    int PeakIndex;
+    int OtherMass;
+    int Bin;
+    float Intensity;
+    //
+    MSSpectrum* Spectrum = self->Spectrum;
+    //
+    if (!PyArg_ParseTuple(args, "|fi", &FloatOffset, &TriplyChargedFlag))
+    {
+        return NULL;
+    }
+
+    // Special case: If TriplyChargedFlag is -1, then compute direct self-convolution!
+    // The direct self-convolution (dot product with self) is useful for scaling the
+    // b,y convolutions.
+    if (TriplyChargedFlag < 0)
+    {
+        Convolution = 0;
+        for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+        {
+            Bin = (Spectrum->Peaks[PeakIndex].Mass + 50) / 100;
+            if (Bin >= 0 && Bin < Spectrum->IntensityBinCount)
+            {
+                Intensity = Spectrum->BinnedIntensitiesTight[Bin];
+                Convolution += Spectrum->Peaks[PeakIndex].Intensity * Intensity; // * PeakScalingFactor;
+            }
+        }
+        return PyFloat_FromDouble(Convolution);
+    }
+    Offset = (int)(FloatOffset * MASS_SCALE + 0.5);
+
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        if (TriplyChargedFlag)
+        {
+            OtherMass = Spectrum->ParentMass + 2 * HYDROGEN_MASS - (2 * Spectrum->Peaks[PeakIndex].Mass) + Offset;
+        }
+        else
+        {
+            OtherMass = Spectrum->ParentMass + HYDROGEN_MASS - Spectrum->Peaks[PeakIndex].Mass + Offset;
+        }
+        Bin = ((OtherMass + 50) / 100);
+        if (Bin < 0 || Bin >= Spectrum->IntensityBinCount)
+        {
+            continue;
+        }
+        Convolution += Spectrum->Peaks[PeakIndex].Intensity * Spectrum->BinnedIntensitiesTight[Bin];
+    }
+    return PyFloat_FromDouble(Convolution);
+}
+
+PyObject* PySpectrumCorrectCharge(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    // If TriplyChargedFlag is true, then we're looking for pairs with one singly-charged
+    // peak and one doubly-charged peak.  Otherwise we're looking for two singly-charged peaks.
+    int TriplyChargedFlag = 0;
+    float FloatOffset = 1.0;
+    float Convolution = 0;
+    int Result;
+    int ReturnScoresFlag = 0;
+    float Model1Score;
+    float Model2Score;
+    //
+    MSSpectrum* Spectrum = self->Spectrum;
+    //
+    if (!PyArg_ParseTuple(args, "|i", &ReturnScoresFlag))
+    {
+        return NULL;
+    }
+    Result = ChargeCorrectSpectrum(self->Spectrum->Node, &Model1Score, &Model2Score);
+    if (ReturnScoresFlag)
+    {
+        return Py_BuildValue("ff", Model1Score, Model2Score);
+    }
+    else
+    {
+        return PyInt_FromLong(Result);
+    }
+}
+
+void PySpectrumReportTagsFromTrie(PyObject* TagList, TrieNode* Root)
+{
+    TrieTagHanger* Hanger;
+    TrieTag* Tag;
+    int ChildIndex;
+    TrieNode* Node;
+    PyObject* TagTuple;
+    //
+    for (Hanger = Root->FirstTag; Hanger; Hanger = Hanger->Next)
+    {
+        Tag = Hanger->Tag;
+        TagTuple = Py_BuildValue("fsf", Tag->PrefixMass / (float)DALTON,
+            Tag->Tag, Tag->SuffixMass / (float)DALTON);
+        PyList_Append(TagList, TagTuple);
+    }
+
+    for (ChildIndex = 0; ChildIndex < AMINO_ACIDS; ChildIndex++)
+    {
+        if (ChildIndex == 'I' - 'A' || ChildIndex == 'Q' - 'A')
+        {
+            continue; 
+        }
+        Node = Root->Children[ChildIndex];
+        if (Node)
+        {
+            PySpectrumReportTagsFromTrie(TagList, Node);
+        }
+    }
+}
+
+int WriteTagsToList(TrieNode* Root, TrieTag* Tags, int MaxCount, int CurrentCount)
+{
+    //int RunningTotal;
+    int ChildIndex;
+    TrieNode* Child;
+    TrieTagHanger* Hanger;
+    //
+    if (CurrentCount >= MaxCount)
+    {
+        return CurrentCount;
+    }
+
+    for (ChildIndex = 0; ChildIndex < AMINO_ACIDS; ChildIndex++)
+    {
+        if (ChildIndex == 'I' - 'A' || ChildIndex == 'Q' - 'A')
+        {
+            continue; 
+        }
+
+        Child = Root->Children[ChildIndex];
+        if (Child)
+        {
+            CurrentCount = WriteTagsToList(Child, Tags, MaxCount, CurrentCount);
+        }
+    }
+    for (Hanger = Root->FirstTag; Hanger; Hanger = Hanger->Next)
+    {
+        memcpy(Tags + CurrentCount, Hanger->Tag, sizeof(TrieTag));
+        CurrentCount++;
+        if (CurrentCount >= MaxCount)
+        {
+            return CurrentCount;
+        }
+    }
+    return CurrentCount;
+}
+
+// Generate tags for this spectrum.  Optionally, use the new PRM scoring model
+// to do so.
+PyObject* PySpectrumGenerateTags(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    int CustomScoringModelFlag = 0;
+    int Charge;
+    MSSpectrum* Spectrum;
+    SpectrumTweak* Tweak;
+    TrieTag* Tags;
+    PyObject* TagList;
+    PyObject* TagTuple;
+    int TagCount;
+    PRMBayesianModel* Model;
+    int MaximumTagCount = 200;
+    int TagIndex;
+    TrieTag* Tag;
+    TrieNode* Root = NULL;
+    float TagEdgeScoreMultiplier = 1.0;
+    //static TrieTag* SortedFilteredTagList = NULL;
+    //
+    if (!PyArg_ParseTuple(args, "i|if", &Charge, &CustomScoringModelFlag, &TagEdgeScoreMultiplier))
+    {
+        return NULL;
+    }
+    Spectrum = self->Spectrum;
+    Spectrum->Charge = Charge;
+    TweakSpectrum(Spectrum->Node);
+    Tweak = Spectrum->Node->Tweaks + (Charge - 1) * 2;
+    if (Spectrum->Graph)
+    {
+        FreeTagGraph(Spectrum->Graph);
+    }
+    Spectrum->Graph = ConstructTagGraph(Spectrum);
+    TagGraphAddNodes(Spectrum->Graph, Spectrum);
+    // Look up the correct PRM scoring model:
+    if (CustomScoringModelFlag)
+    {
+        Model = InteractiveBN;
+    }
+    else
+    {
+        // Use the current production model to score the nodes:
+        if (Spectrum->Charge > 2)
+        {
+            Model = TAGModelCharge3;
+        }
+        else
+        {
+            Model = TAGModelCharge2;
+        }
+    }
+    PrepareSpectrumForIonScoring(Model, Spectrum, 1);
+    // Score PRMs using this model:
+    TagGraphScorePRMNodes(Model, Spectrum->Graph, Spectrum, Tweak);
+    
+    TagGraphPopulateEdges(Spectrum->Graph);
+    Tags = TagGraphGenerateTags(Spectrum->Graph, Spectrum, &TagCount, MaximumTagCount, Tweak, TagEdgeScoreMultiplier, InteractiveBN);
+    
+    // Note: This array of tags may have some duplicates.  Let's just build a trie and
+    // use THAT for our output!
+    Root = BuildTrieFromTags(Tags, TagCount, Root, MaximumTagCount);
+    //if (!SortedFilteredTagList)
+    //{
+    //    SortedFilteredTagList = (TrieTag*)calloc(500, sizeof(TrieTag));
+    //}
+    TagCount = WriteTagsToList(Root, Tags, 500, 0);
+    qsort(Tags, TagCount, sizeof(TrieTag), (QSortCompare)CompareTagScores); 
+    TagList = PyList_New(0);
+    for (TagIndex = 0; TagIndex < TagCount; TagIndex++)
+    {
+        Tag = Tags + TagIndex;
+        TagTuple = Py_BuildValue("fsffii", Tag->PrefixMass / (float)DALTON,
+            Tag->Tag, Tag->SuffixMass / (float)DALTON, Tag->Score, 
+            Tag->TotalSkew, Tag->TotalAbsSkew);
+        PyList_Append(TagList, TagTuple);
+    }
+
+    //PySpectrumReportTagsFromTrie(TagList, Root);
+    FreeTrieNode(Root);
+    
+    return TagList;
+}
+
+PyObject* PySpectrumGetPRMScore(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    float Mass;
+    int CustomScoringModelFlag = 0;
+    PRMBayesianModel* Model;
+    SpectrumTweak* Tweak;
+    SpectrumNode* SpecNode;
+    int IntMass;
+    float Score;
+    int VerboseFlag = 0;
+    //
+    if (!PyArg_ParseTuple(args, "f|ii", &Mass, &CustomScoringModelFlag, &VerboseFlag))
+    {
+        return NULL;
+    }
+    if (CustomScoringModelFlag)
+    {
+        Model = InteractiveBN;
+    }
+    else
+    {
+        if (self->Spectrum->Charge < 3)
+        {
+            Model = PRMModelCharge2;
+        }
+        else
+        {
+            Model = PRMModelCharge3;
+        }
+    }
+    PrepareSpectrumForIonScoring(Model, self->Spectrum, 1);
+    IntMass = (int)(Mass * MASS_SCALE);
+    SpecNode = self->Spectrum->Node;
+    if (SpecNode->Tweaks[2].Charge)
+    {
+        Tweak = SpecNode->Tweaks + 2;
+    }
+    else if (SpecNode->Tweaks[4].Charge)
+    {
+        Tweak = SpecNode->Tweaks + 4;
+    }
+    else
+    {
+        Tweak = SpecNode->Tweaks;
+    }
+    Score = GetIonPRMFeatures(self->Spectrum, Tweak, Model, IntMass, VerboseFlag);
+    return PyFloat_FromDouble(Score);
+    //GetIonPRMFeatures(self->Spectrum, Tweak, Model, Mass, 1);
+}
+
+PyObject* PySpectrumPlotPRMScores(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    char* FileName;
+    FILE* PlotFile;
+    int PRM;
+    float Score;
+    SpectrumTweak* Tweak;
+    PRMBayesianModel* Model;
+    int UseCustomModelFlag = 0;
+    //
+    if (!PyArg_ParseTuple(args, "s|i", &FileName, &UseCustomModelFlag))
+    {
+        return NULL;
+    }
+    if (UseCustomModelFlag)
+    {
+        Model = InteractiveBN; 
+    }
+    else
+    {
+        if (self->Spectrum->Charge < 3)
+        {
+            Model = PRMModelCharge2;
+        }
+        else
+        {
+            Model = PRMModelCharge3;
+        }
+    }
+
+    PlotFile = fopen(FileName, "wb");
+    if (!PlotFile)
+    {
+        sprintf(PythonErrorString, "Unable to open '%s'", FileName);
+        ReportPythonError();
+        return NULL;
+    }
+    
+    PrepareSpectrumForIonScoring(Model, self->Spectrum, 1);
+    Tweak = self->Spectrum->Node->Tweaks + (self->Spectrum->Charge * 2) - 2;
+    for (PRM = 0; PRM < self->Spectrum->ParentMass; PRM += (DALTON / 10))
+    {
+        Score = GetIonPRMFeatures(self->Spectrum, Tweak, Model, PRM, 0);
+        fprintf(PlotFile, "%.2f\t%.2f\t\n", PRM / (float)DALTON, Score);
+    }
+    fclose(PlotFile);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+void VerboseReportTopTag(TrieTag* Tag, Peptide* Match, MSSpectrum* Spectrum)
+{
+    int PRM;
+    int AminoIndex;
+    float PRMScore;
+    int ValidFlag;
+    int TagNodeIndex;
+    TagGraphNode* Node;
+    int Diff;
+    int PeptidePRM;
+    int AminoCount;
+    int ModIndex;
+
+    PRM = Tag->PrefixMass;
+    AminoCount = strlen(Match->Bases);
+    printf("  Tag %s %.2f (prefix %.2f, suffix %.2f)\n", Tag->Tag, Tag->Score, Tag->PrefixMass / (float)DALTON, Tag->SuffixMass / (float)DALTON);
+    for (TagNodeIndex = 0; TagNodeIndex < 4; TagNodeIndex++)
+    {
+        // PRM is our node's mass.  
+        // First question: Is it correct?
+        ValidFlag = 0;
+        PeptidePRM = 0;
+        for (AminoIndex = 0; AminoIndex < AminoCount; AminoIndex++)
+        {
+            Diff = abs(PeptidePRM - PRM);
+            if (Diff < GlobalOptions->Epsilon)
+            {
+                ValidFlag = 1;
+                break;
+            }
+            
+            PeptidePRM += PeptideMass[Match->Bases[AminoIndex]];
+            for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+            {
+                if (Match->AminoIndex[ModIndex] == AminoIndex)
+                {
+                    PeptidePRM += Match->ModType[ModIndex]->RealDelta;
+                }
+            }
+        }
+        // Second question: What's the score of the node?
+        Node = Tag->Nodes[TagNodeIndex];
+        PRMScore = Node->Score;
+        if (ValidFlag)
+        {
+            printf("    [RIGHT]");
+        }
+        else
+        {
+            printf("    [wrong]");
+        }
+        printf(" %.2f %.2f", PRM / (float)DALTON, PRMScore);
+        if (TagNodeIndex < 3)
+        {
+            printf(" -%c- ", Tag->Tag[TagNodeIndex]);
+        }
+        printf("\n");
+        // Increment the PRM.
+        PRM += PeptideMass[Tag->Tag[TagNodeIndex]];
+        
+    }
+}
+
+void VerboseReportTrueTagPRMs(Peptide* Match, MSSpectrum* Spectrum)
+{
+    int TruePRM;
+    int PrevPRM;
+    int AminoIndex;
+    int AminoCount;
+    int ModIndex;
+    int Diff;
+    TagGraphNode* BackNode;
+    TagGraphNode* Node;
+    TagGraphNode* BestNode;
+    TagGraphNode* FirstNode;
+    TagGraphNode* OldFirstNode = NULL;
+    TagGraphEdge* BestEdge;
+    float BestEdgeScore;
+    TagGraphEdge* Edge;
+    int BestDiff;
+    float Score;
+    //
+    TruePRM = 0;
+    AminoCount = strlen(Match->Bases);
+    for (AminoIndex = 0; AminoIndex <= AminoCount; AminoIndex++)
+    {
+        BestNode = NULL;
+        FirstNode = NULL;
+
+        // Look for a close node:
+        for (Node = Spectrum->Graph->FirstNode; Node; Node = Node->Next)
+        {
+            Diff = abs(Node->Mass - TruePRM);
+            if (Diff < GlobalOptions->Epsilon)
+            {
+                if (!FirstNode)
+                {
+                    FirstNode = Node;
+                }
+                if (!BestNode || Node->Score > BestNode->Score)
+                {
+                    BestNode = Node;
+                    BestDiff = Diff;
+                }
+            }
+        }
+
+        // Also, is there an edge to the PREVIOUS node?
+        if (TruePRM)
+        {
+            BestEdge = NULL;
+            BestEdgeScore = -9999;
+            for (BackNode = OldFirstNode; BackNode; BackNode = BackNode->Next)
+            {
+                if (BackNode->Mass > PrevPRM + GlobalOptions->Epsilon)
+                {
+                    break;
+                }
+                for (Edge = BackNode->FirstEdge; Edge; Edge = Edge->Next)
+                {
+                    Diff = abs(Edge->ToNode->Mass - TruePRM);
+                    if (Diff < GlobalOptions->Epsilon)
+                    {
+                        Score = Edge->FromNode->Score + Edge->ToNode->Score;
+                        if (Score > BestEdgeScore)
+                        {
+                            BestEdge = Edge;
+                            BestEdgeScore = Score;
+                        }
+                    }
+                }
+            }
+            if (BestEdge)
+            {
+                printf("-Edge: From %.2f (%.2f) to %.2f (%.2f) via %c (skew %.2f)\n", BestEdge->FromNode->Mass / (float)DALTON,
+                    BestEdge->FromNode->Score, BestEdge->ToNode->Mass / (float)DALTON, BestEdge->ToNode->Score,
+                    BestEdge->Jump->Amino, BestEdge->Skew / (float)DALTON);
+            }
+            else
+            {
+                printf("-(no edge)\n");
+            }
+        }
+        if (BestNode)
+        {
+            printf("PRM %.2f: Best node score PRM %.2f (diff %.2f) score %.2f\n",
+                TruePRM / (float)DALTON, BestNode->Mass / (float)DALTON, BestDiff / (float)DALTON, BestNode->Score);
+        }
+        else
+        {
+            printf("PRM %.2f: (no node)\n", TruePRM / (float)DALTON);
+        }
+        // Add mass for this aa:
+        OldFirstNode = FirstNode;
+        PrevPRM = TruePRM;
+        TruePRM += PeptideMass[Match->Bases[AminoIndex]];
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (Match->AminoIndex[ModIndex] == AminoIndex)
+            {
+                TruePRM += Match->ModType[ModIndex]->RealDelta;
+            }
+        }
+        
+    }
+}
+
+PyObject* PySpectrumCheckTagging(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    char* Annotation;
+    Peptide* Match;
+    MSSpectrum* Spectrum;
+    PRMBayesianModel* Model;
+    SpectrumTweak* Tweak;
+    int Charge;
+    int TagCount;
+    int MaximumTagCount = 500;
+    TrieTag* Tags;
+    float TagEdgeScoreMultiplier = 10.0;
+    int TagIndex;
+    //
+    if (!PyArg_ParseTuple(args, "s", &Annotation))
+    {
+        return NULL;
+    }
+    Match = GetPeptideFromAnnotation(Annotation);
+    // Bail out if the match isn't valid:
+    if (!Match)
+    {
+        Py_INCREF(Py_None);
+        return Py_None;
+    }
+    Spectrum = self->Spectrum;
+    Charge = self->Spectrum->Charge;
+    // Look up the correct PRM scoring model:
+    Model = InteractiveBN;
+    PrepareSpectrumForIonScoring(Model, Spectrum, 1);
+    Tweak = Spectrum->Node->Tweaks + (Charge - 1) * 2;
+    // Generate some tags:
+    if (Spectrum->Graph)
+    {
+        FreeTagGraph(Spectrum->Graph);
+    }
+    Spectrum->Graph = ConstructTagGraph(Spectrum);
+    TagGraphAddNodes(Spectrum->Graph, Spectrum);
+    // Score PRMs using this model:
+    TagGraphScorePRMNodes(Model, Spectrum->Graph, Spectrum, Tweak);
+    TagGraphPopulateEdges(Spectrum->Graph);
+    Tags = TagGraphGenerateTags(Spectrum->Graph, Spectrum, &TagCount, MaximumTagCount, Tweak, TagEdgeScoreMultiplier, Model);
+    printf("\nCheck tagging: Peptide %s\n", Annotation);
+    printf("%s:%d\n", self->FileName, Spectrum->Node->FilePosition);
+    ///////////////////////////////////////////////////////////////////////
+    // Report on the tag graph node scores which correpsond to the true PRMs.  Are 
+    // we missing a PRM entirely?  Missing an edge entirely?  Did we miss a tag
+    // simply because the scores were medicore?
+    VerboseReportTrueTagPRMs(Match, Spectrum);
+
+    ///////////////////////////////////////////////////////////////////////
+    // Report the top 10 tags.  Are they correct?  Partially correct?
+    //printf("Top 10 tags (true peptide %s):\n", Annotation);
+    for (TagIndex = 0; TagIndex < min(10, TagCount); TagIndex++)
+    {
+        VerboseReportTopTag(Tags + TagIndex, Match, Spectrum);
+    }
+    
+    // Cleanup:
+    FreePeptideNode(Match);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+PyObject* PySpectrumGetCutScores(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    Peptide* Match;
+    char* Annotation;
+    MSSpectrum* Spectrum;
+    int NodeIndex;
+    int AminoIndex;
+    int PRM;
+    float Score;
+    int AminoCount;
+    int ModIndex;
+    PyObject* ReturnList;
+    PRMBayesianNode* Node;
+    //
+    if (!PyArg_ParseTuple(args, "s", &Annotation))
+    {
+        return NULL;
+    }
+    Spectrum = self->Spectrum;
+    Match = GetPeptideFromAnnotation(Annotation);
+    // Force the spectrum's parent mass to match the right parent mass:
+    Spectrum->ParentMass = Match->ParentMass;
+    PrepareSpectrumForIonScoring(InteractiveBN, Spectrum, 1);
+    AminoCount = strlen(Match->Bases);
+    for (NodeIndex = 0, Node = InteractiveBN->Head; Node; NodeIndex++, Node = Node->Next)
+    {
+        PRM = 0;
+        for (AminoIndex = 0; AminoIndex <= AminoCount; AminoIndex++)
+        {
+            ///////////////////////////////////////////////////////////////////////////////////////
+            // Set values, and accumulate table entries:
+            Node->Values[AminoIndex] = IonScoringGetNodeValue(InteractiveBN, Node, Spectrum, PRM, Match, AminoIndex);
+            ///////////////////////////////////////////////////////////////////////////////////////
+            // Add to PRM:
+            if (AminoIndex == AminoCount)
+            {
+                break;
+            }
+            PRM += PeptideMass[Match->Bases[AminoIndex]];
+            for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+            {
+                if (Match->AminoIndex[ModIndex] == AminoIndex)
+                {
+                    PRM += Match->ModType[ModIndex]->RealDelta;
+                }
+            }
+        } // Amino loop
+    } // NodeIndex loop
+
+    // Iterate over the values arrays, building the return-list.
+    ReturnList = PyList_New(0);
+    for (AminoIndex = 0; AminoIndex <= AminoCount; AminoIndex++)
+    {
+        Score = PRMBNGetCutScore(Spectrum, InteractiveBN, AminoIndex);
+        PyList_Append(ReturnList, PyFloat_FromDouble(Score));
+    }
+
+    // Cleanup:
+    FreePeptideNode(Match);
+
+    // Return:
+    return ReturnList;
+}
+
+PyObject* PySpectrumGetMatchFeatures(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    PyObject* ReturnList;
+    char* Annotation;
+    Peptide* Match;
+    MSSpectrum* Spectrum;
+    float MatchFeatures[256];
+    int FeatureIndex;
+    int FeatureCount;
+    //
+    if (!PyArg_ParseTuple(args, "s", &Annotation))
+    {
+        return NULL;
+    }
+    Spectrum = self->Spectrum;
+    Match = GetPeptideFromAnnotation(Annotation);
+    FeatureCount = GetPeptideMatchFeaturesFull(Spectrum, Match, MatchFeatures);
+    FreePeptideNode(Match);
+    ReturnList = PyList_New(0);
+    for (FeatureIndex = 0; FeatureIndex < FeatureCount; FeatureIndex++)
+    {
+        PyList_Append(ReturnList, PyFloat_FromDouble(MatchFeatures[FeatureIndex]));
+    }
+    
+    return ReturnList;
+}
+
+PyObject* PySpectrumPrepareIonScoring(PySpectrum* self, PyObject* args, PyObject* kwargs)
+{
+    int CustomModelFlag = 0;
+    PRMBayesianModel* Model;
+    MSSpectrum* Spectrum = self->Spectrum;
+    if (!PyArg_ParseTuple(args, "i", &CustomModelFlag))
+    {
+        return NULL;
+    }
+    if (CustomModelFlag)
+    {
+        Model = InteractiveBN;
+    }
+    else
+    {
+        if (self->Spectrum->Charge < 3)
+        {
+            Model = PRMModelCharge2;
+        }
+        else
+        {
+            Model = PRMModelCharge3;
+        }
+    }
+    PrepareSpectrumForIonScoring(Model, Spectrum, 1);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
diff --git a/PyInspect/PySpectrum.h b/PyInspect/PySpectrum.h
new file mode 100644
index 0000000..52bf906
--- /dev/null
+++ b/PyInspect/PySpectrum.h
@@ -0,0 +1,145 @@
+//Title:          PySpectrum.h
+//Authors:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef PY_SPECTRUM_H
+#define PY_SPECTRUM_H
+// PySpectrum: Python wrapper for a Spectrum object.
+#include "Python.h"
+#include "structmember.h"
+#include "Utils.h"
+#include "Spectrum.h"
+#include "TagFile.h"
+
+typedef struct
+{
+    PyObject_HEAD
+    MSSpectrum* Spectrum;
+    char FileName[MAX_FILENAME_LEN];
+    PyObject* MatchList; // list of PyPeptide instances
+    int PrevMass;
+    //struct Peptide* FirstMatch; // list of Peptide instances for matches
+    //struct Peptide* LastMatch;
+} PySpectrum;
+
+extern PyTypeObject PySpectrumType;
+
+void SpectrumSetCharge(MSSpectrum* Spectrum, int Charge);
+PyObject* PySpectrumGetPeakCount(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumInit(PySpectrum* self, PyObject *args, PyObject *kwds);
+PyObject* PySpectrumNew(PyTypeObject *type, PyObject *args, PyObject *kwds);
+PyObject* PySpectrumScore(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumScoreDetailed(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumLabelPeaks(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumCorrectParentMass(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumGetPMCFeatures(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumSetCharge(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumBYConvolve(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumSetParentMass(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumGetParentMass(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumGetCCFeatures(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumCorrectCharge(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumGenerateTags(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumGetPRMScore(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumPlotPRMScores(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumCheckTagging(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumGetCutScores(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumGetMatchFeatures(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumPrepareIonScoring(PySpectrum* self, PyObject* args, PyObject* kwargs);
+PyObject* PySpectrumGetMZ(PySpectrum* self, PyObject* args, PyObject* kwargs);
+
+void PySpectrumDealloc(PyObject* selfobject);
+
+// Methods of the PySpectrum class
+static PyMethodDef PySpectrumMethods[] = 
+{
+    {"GetPeakCount", (PyCFunction)PySpectrumGetPeakCount, METH_VARARGS | METH_KEYWORDS, 
+      "Return peak count for a scan"},
+    {"ScorePeptide", (PyCFunction)PySpectrumScore, METH_VARARGS | METH_KEYWORDS, 
+      "Score a peptide match for this spectrum"},
+    {"ScorePeptideDetailed", (PyCFunction)PySpectrumScoreDetailed, METH_VARARGS | METH_KEYWORDS, 
+      "Score a peptide match for this spectrum; return all the scoring features"},
+    {"LabelPeaks", (PyCFunction)PySpectrumLabelPeaks, METH_VARARGS | METH_KEYWORDS, 
+      "Label spectrum peaks using a peptide annotation"},
+    {"CorrectParentMass", (PyCFunction)PySpectrumCorrectParentMass, METH_VARARGS | METH_KEYWORDS, 
+      "Select correct charge and parent mass for the spectrum"},
+    {"GetPMCFeatures", (PyCFunction)PySpectrumGetPMCFeatures, METH_VARARGS | METH_KEYWORDS, 
+      "Compute parent-mass-correction features for the spectrum"},
+    {"SetCharge", (PyCFunction)PySpectrumSetCharge, METH_VARARGS | METH_KEYWORDS, 
+      "Adjust the spectrum's charge"},
+    {"BYConvolve", (PyCFunction)PySpectrumBYConvolve, METH_VARARGS | METH_KEYWORDS, 
+      "Perform b/y peak convolution"},
+    {"SetParentMass", (PyCFunction)PySpectrumSetParentMass, METH_VARARGS | METH_KEYWORDS, 
+      "Set the parent mass"},
+    {"GetParentMass", (PyCFunction)PySpectrumGetParentMass, METH_VARARGS | METH_KEYWORDS, 
+      "Returns the parent mass"},
+    {"GetCCFeatures", (PyCFunction)PySpectrumGetCCFeatures, METH_VARARGS | METH_KEYWORDS, 
+      "Compute charge correction features for the spectrum"},
+    {"CorrectCharge", (PyCFunction)PySpectrumCorrectCharge, METH_VARARGS | METH_KEYWORDS, 
+      "Get the corrected charge for this spectrum"},
+    {"GenerateTags", (PyCFunction)PySpectrumGenerateTags, METH_VARARGS | METH_KEYWORDS, 
+      "Generate tags for this spectrum"},
+    {"GetPRMScore", (PyCFunction)PySpectrumGetPRMScore, METH_VARARGS | METH_KEYWORDS, 
+      "Get the score for a prefix residue mass (PRM)"},
+    {"PlotPRMScores", (PyCFunction)PySpectrumPlotPRMScores, METH_VARARGS | METH_KEYWORDS, 
+      "Output a plot of PRM scores for this spectrum"},
+    {"CheckTagging", (PyCFunction)PySpectrumCheckTagging, METH_VARARGS | METH_KEYWORDS, 
+      "Test whether this spectrum can generate a tag for a specified peptide"},
+    {"GetCutScores", (PyCFunction)PySpectrumGetCutScores, METH_VARARGS | METH_KEYWORDS, 
+      "Compute cut-point scores for the specified peptide annotation"},
+    {"GetMatchFeatures", (PyCFunction)PySpectrumGetMatchFeatures, METH_VARARGS | METH_KEYWORDS, 
+      "Get features for scoring a peptide match"},
+    {"PrepareIonScoring", (PyCFunction)PySpectrumPrepareIonScoring, METH_VARARGS | METH_KEYWORDS, 
+      "Force a call to PrepareSpectrumForIonScoring"},
+    {"GetMZ", (PyCFunction)PySpectrumGetMZ, METH_VARARGS | METH_KEYWORDS,
+      "Get the m/z for this spectrum"},
+      
+
+    {NULL},
+};
+
+
+// Methods (currently none) of the MSPeak class
+static PyMemberDef PySpectrumMembers[] =
+{
+    {NULL},
+};
+
+// Getters and setters for the PySpectrum class.  (Should be used
+// if Python code will be modifying the run dynamically)
+static PyGetSetDef PySpectrumGetSet[] = 
+{
+    {NULL}  // Sentinel 
+};
+
+
+
+#endif // PY_SPECTRUM_H
diff --git a/PyInspect/PyUtils.c b/PyInspect/PyUtils.c
new file mode 100644
index 0000000..0c9b8a2
--- /dev/null
+++ b/PyInspect/PyUtils.c
@@ -0,0 +1,49 @@
+//Title:          PyUtils.c
+//Authors:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include "Python.h"
+#include <stdarg.h>
+#include "CMemLeak.h"
+#include "TagFile.h"
+
+char PythonErrorString[2048];
+extern PyObject* InspectError; // defined in PyInspect.c
+
+FILE* LogFile = NULL;
+
+// Simple interface for error-reporting to Python callers: 
+// Print an error to PythonErrorString, then call ReportPythonError().
+void ReportPythonError() 
+{
+    PyErr_SetString(InspectError, PythonErrorString);
+}
diff --git a/PyInspect/PyUtils.h b/PyInspect/PyUtils.h
new file mode 100644
index 0000000..584696f
--- /dev/null
+++ b/PyInspect/PyUtils.h
@@ -0,0 +1,39 @@
+//Title:          PyUtils.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef PY_UTILS_H
+#define PY_UTILS_H
+
+void ReportPythonError();
+extern char PythonErrorString[2048];
+
+#endif // PY_UTILS_H
diff --git a/PySVM.pyd b/PySVM.pyd
new file mode 100644
index 0000000..d01abd7
Binary files /dev/null and b/PySVM.pyd differ
diff --git a/PySVM/PySVM.c b/PySVM/PySVM.c
new file mode 100644
index 0000000..6790aff
--- /dev/null
+++ b/PySVM/PySVM.c
@@ -0,0 +1,327 @@
+//Title:          PySVM.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "Python.h"
+#include "svm.h"
+//#include "TagFile.h"
+
+#define MAX_LINE_LENGTH 2048
+
+struct svm_model* Model;
+double DecisionValues[10]; // hacky
+struct svm_node* SVMFeatures;
+int SVMFeatureAllocation;
+
+// Assume no more than 128 features!
+double MinFeatureValue[128];
+double MaxFeatureValue[128];
+
+static PyObject* ex_foo(PyObject* self, PyObject* args)
+{
+    printf("Hello, world\n");
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+// Copy one line (up to a \r or \n character) from a source buffer to a target buffer.
+// Optionally, strip out spaces.  Return the position just AFTER the end of the line.
+// (If a line ends in \r\n, we'll end up processing the line, and then one empty line; that's okay)
+// If a line is very long, we stop copying, and skip over the rest of it.
+int CopyBufferLine(char* Source, int BufferPos, int BufferEnd, char* LineBuffer, int StripWhitespace)
+{
+    int LinePos = 0;
+    int LineComplete = 0;
+    int Chars = 0;
+    int Skipping = 0;
+    //
+    while (!LineComplete)
+    {
+        if (BufferPos > BufferEnd)
+        {
+            // Our line extends off the edge of the buffer.  That's probably a Bad Thing.
+            printf("** Warning: Ran off the edge of the buffer in CopyBufferLine.  Line too ling?\n");
+            LineBuffer[LinePos] = '\0';
+            return BufferPos;
+        }
+        switch (Source[BufferPos])
+        {
+        case ' ':
+            if (StripWhitespace)
+            {
+                BufferPos++;
+            }
+            else
+            {
+                if (!Skipping)
+                {
+                    LineBuffer[LinePos++] = Source[BufferPos];
+                }
+                BufferPos++;
+                Chars++;
+            }
+            break;
+        case '\r':
+        case '\n':
+            LineBuffer[LinePos] = '\0';
+            BufferPos++;
+            LineComplete = 1;
+            break;
+        case '\0':
+            LineBuffer[LinePos] = '\0';
+            LineComplete = 1;
+            break;
+        default:
+            if (!Skipping)
+            {
+                LineBuffer[LinePos++] = Source[BufferPos];
+            }
+            BufferPos++;
+            Chars++;
+            break;
+        }
+        if (Chars == MAX_LINE_LENGTH - 1)
+        {
+            printf("** Error: Line too long!  Truncating line.");
+            // Read the rest of the chars, but don't write them:
+            Chars = 0;
+            Skipping = 1;
+        }
+    }
+    return BufferPos;
+}
+
+static PyObject* PyLoadScaling(PyObject* self, PyObject* args)
+{
+    char* FilePath;
+    char* FileText;
+    char LineBuffer[MAX_LINE_LENGTH];
+    FILE* ScaleFile;
+    int BufferPos; 
+    int BufferEnd;
+    int FeatureNumber;
+    char* StrValue;
+    double MinValue;
+    double MaxValue;
+    //
+    if (!PyArg_ParseTuple(args, "s", &FilePath))
+    {
+        return (PyObject*)-1; // Return -1 to signal that the object can't be created
+    }
+    ScaleFile = fopen(FilePath, "rb");
+    if (!ScaleFile)
+    {
+        printf("** Error: Can't open file '%s'\n", FilePath);
+        return (PyObject*)-1; // Return -1 to signal that the object can't be created
+    }
+    FileText = (char*)calloc(sizeof(char), 10240);
+    BufferEnd = fread(FileText, sizeof(char), 10240, ScaleFile);
+    BufferPos = 0;
+    while (1)
+    {
+        if (!FileText[BufferPos])
+        {
+            break;
+        }
+        BufferPos = CopyBufferLine(FileText, BufferPos, BufferEnd, LineBuffer, 0);
+        //printf("Line parsed: '%s'\n", LineBuffer);
+        StrValue = strtok(LineBuffer, " \t");
+        if (!StrValue)
+        {
+            continue;
+        }
+        FeatureNumber = atoi(StrValue);
+        if (FeatureNumber <= 0)
+        {
+            continue;
+        }
+        StrValue = strtok(NULL, " \t");
+        if (!StrValue)
+        {
+            continue;
+        }
+        MinValue = atof(StrValue);
+        StrValue = strtok(NULL, " \t");
+        if (!StrValue)
+        {
+            continue;
+        }
+        MaxValue = atof(StrValue);
+        MinFeatureValue[FeatureNumber - 1] = MinValue;
+        MaxFeatureValue[FeatureNumber - 1] = MaxValue;
+        //printf("Feature %d: Range %f...%f\n", FeatureNumber, MinValue, MaxValue);
+    }
+    fclose(ScaleFile);
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+
+void ScaleSVMFeatures(struct svm_node* Features, int FeatureCount)
+{
+    int FeatureIndex;
+    double Value;
+    double Range;
+    //
+    for (FeatureIndex = 0; FeatureIndex < FeatureCount; FeatureIndex++)
+    {
+        Value = Features[FeatureIndex].value;
+        Range = MaxFeatureValue[FeatureIndex] - MinFeatureValue[FeatureIndex];
+        if (Value <= MinFeatureValue[FeatureIndex])
+        {
+            SVMFeatures[FeatureIndex].value = -1.0;
+            continue;
+        }
+        if (Value >= MaxFeatureValue[FeatureIndex])
+        {
+            Features[FeatureIndex].value = 1.0;
+            continue;
+        }
+        Features[FeatureIndex].value = -1.0 + 2.0 * (Value - MinFeatureValue[FeatureIndex]) / Range;
+    }
+}
+
+static PyObject* PyLoadModel(PyObject* self, PyObject* args)
+{
+    char* FilePath;
+    if (!PyArg_ParseTuple(args, "s", &FilePath))
+    {
+        return (PyObject*)-1; // Return -1 to signal that the object can't be created
+    }
+
+    // Free the old model, if any:
+    if (Model)
+    {
+        svm_destroy_model(Model);
+        Model = NULL;
+    }
+
+    // Load model from specified file:
+    Model = svm_load_model(FilePath);
+
+    Py_INCREF(Py_None);
+    return Py_None;
+}
+
+PyObject* PyScoreHelper(PyObject* FeatureList, int ScaleFlag)
+{
+    int FeatureIndex;
+    int FeatureCount;
+    int SequenceType;
+    //
+    if (PyList_Check(FeatureList))
+    {
+        SequenceType = 1;
+        FeatureCount = PyList_Size(FeatureList);
+    }
+    else if PyTuple_Check(FeatureList)
+    {
+        SequenceType = 2;
+        FeatureCount = PyTuple_Size(FeatureList);
+    }
+    else
+    {
+        printf("** Error in PyScore vector: Illegal argument (not a vector or tuple)\n");
+        return (PyObject*)-1; // Return -1 to signal that the object can't be created
+    }
+
+    // Allocate SVMFeatures, if necessary:
+    if (FeatureCount >= SVMFeatureAllocation)
+    {
+        if (SVMFeatures)
+        {
+            free(SVMFeatures);
+        }
+        SVMFeatures = (struct svm_node*)malloc((FeatureCount + 1)  * sizeof(struct svm_node));
+        SVMFeatureAllocation = FeatureCount + 1;
+    }
+
+    // Populate SVMFeatures:
+    for (FeatureIndex = 0; FeatureIndex < FeatureCount; FeatureIndex++)
+    {
+        SVMFeatures[FeatureIndex].index = FeatureIndex + 1;
+        if (SequenceType == 1)
+        {
+            SVMFeatures[FeatureIndex].value = PyFloat_AsDouble(PyList_GetItem(FeatureList, FeatureIndex));
+        }
+        else
+        {
+            SVMFeatures[FeatureIndex].value = PyFloat_AsDouble(PyTuple_GetItem(FeatureList, FeatureIndex));
+        }
+    }
+    if (ScaleFlag)
+    {
+        ScaleSVMFeatures(SVMFeatures, FeatureCount);
+    }
+    SVMFeatures[FeatureCount].index = -1;
+    // Predict, and return:
+    svm_predict_values(Model, SVMFeatures, DecisionValues);
+    return PyFloat_FromDouble(DecisionValues[0]);
+}
+
+static PyObject* PyScoreVector(PyObject* self, PyObject* args)
+{
+    PyObject* FeatureList;
+    //
+    if (!PyArg_ParseTuple(args, "O", &FeatureList))
+    {
+        return (PyObject*)-1; // Return -1 to signal that the object can't be created
+    }
+    return PyScoreHelper(FeatureList, 0);
+}
+
+static PyObject* PyScaleAndScoreVector(PyObject* self, PyObject* args)
+{
+    PyObject* FeatureList;
+    //
+    if (!PyArg_ParseTuple(args, "O", &FeatureList))
+    {
+        return (PyObject*)-1; // Return -1 to signal that the object can't be created
+    }
+    return PyScoreHelper(FeatureList, 1);
+}
+
+static PyMethodDef PySVM_methods[] = {
+    {"foo", ex_foo, METH_VARARGS, "foo() doc string"},
+    {"LoadModel", PyLoadModel, METH_VARARGS, "Load an SVM model from disk"},
+    {"Score", PyScoreVector, METH_VARARGS, "Score a (pre-scaled) vector"},
+    {"LoadScaling", PyLoadScaling, METH_VARARGS, "Load feature scaling parameters from a file"},
+    {"ScaleAndScore", PyScaleAndScoreVector, METH_VARARGS, "Scale and score a feature-vector"},
+    {NULL, NULL}
+};
+
+PyMODINIT_FUNC initPySVM(void)
+{
+    Py_InitModule("PySVM", PySVM_methods);
+    Model = NULL;
+    SVMFeatures = NULL;
+    SVMFeatureAllocation = 0;
+}
diff --git a/PySVM/PySVM.sln b/PySVM/PySVM.sln
new file mode 100644
index 0000000..28e4a22
--- /dev/null
+++ b/PySVM/PySVM.sln
@@ -0,0 +1,21 @@
+Microsoft Visual Studio Solution File, Format Version 8.00
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "PySVM", "PySVM.vcproj", "{A0608D6F-84ED-44AE-A2A6-A3CC7F4A4030}"
+	ProjectSection(ProjectDependencies) = postProject
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfiguration) = preSolution
+		Debug = Debug
+		Release = Release
+	EndGlobalSection
+	GlobalSection(ProjectConfiguration) = postSolution
+		{A0608D6F-84ED-44AE-A2A6-A3CC7F4A4030}.Debug.ActiveCfg = Debug|Win32
+		{A0608D6F-84ED-44AE-A2A6-A3CC7F4A4030}.Debug.Build.0 = Debug|Win32
+		{A0608D6F-84ED-44AE-A2A6-A3CC7F4A4030}.Release.ActiveCfg = Release|Win32
+		{A0608D6F-84ED-44AE-A2A6-A3CC7F4A4030}.Release.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+	EndGlobalSection
+	GlobalSection(ExtensibilityAddIns) = postSolution
+	EndGlobalSection
+EndGlobal
diff --git a/PySVM/PySVM.vcproj b/PySVM/PySVM.vcproj
new file mode 100644
index 0000000..26567a8
--- /dev/null
+++ b/PySVM/PySVM.vcproj
@@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="7.10"
+	Name="PySVM"
+	SccProjectName=""
+	SccLocalPath="">
+	<Platforms>
+		<Platform
+			Name="Win32"/>
+	</Platforms>
+	<Configurations>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory=".\Release"
+			IntermediateDirectory=".\Release"
+			ConfigurationType="2"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="FALSE">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="2"
+				InlineFunctionExpansion="1"
+				AdditionalIncludeDirectories="d:\python-2.5\Include,d:\python-2.5\PC"
+				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS"
+				StringPooling="TRUE"
+				RuntimeLibrary="2"
+				EnableFunctionLevelLinking="TRUE"
+				UsePrecompiledHeader="2"
+				PrecompiledHeaderFile=".\Release/PySVM.pch"
+				AssemblerListingLocation=".\Release/"
+				ObjectFile=".\Release/"
+				ProgramDataBaseFileName=".\Release/"
+				WarningLevel="3"
+				SuppressStartupBanner="TRUE"
+				CompileAs="0"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalOptions="/export:initPySVM"
+				AdditionalDependencies="odbc32.lib odbccp32.lib python25.lib"
+				OutputFile="d:\research\inspect\PySVM.pyd"
+				LinkIncremental="1"
+				SuppressStartupBanner="TRUE"
+				AdditionalLibraryDirectories="d:\python-2.5\PCbuild"
+				ModuleDefinitionFile=""
+				ProgramDatabaseFile=".\Release/PySVM.pdb"
+				SubSystem="2"
+				ImportLibrary=".\Release/PySVM.lib"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="NDEBUG"
+				MkTypLibCompatible="TRUE"
+				SuppressStartupBanner="TRUE"
+				TargetEnvironment="1"
+				TypeLibraryName=".\Release/PySVM.tlb"
+				HeaderFileName=""/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory=".\Debug"
+			IntermediateDirectory=".\Debug"
+			ConfigurationType="2"
+			UseOfMFC="0"
+			ATLMinimizesCRunTimeLibraryUsage="FALSE">
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				AdditionalIncludeDirectories="d:\python-2.5\Include,d:\python-2.5\PC"
+				PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="2"
+				PrecompiledHeaderFile=".\Debug/PySVM.pch"
+				AssemblerListingLocation=".\Debug/"
+				ObjectFile=".\Debug/"
+				ProgramDataBaseFileName=".\Debug/"
+				WarningLevel="3"
+				SuppressStartupBanner="TRUE"
+				DebugInformationFormat="4"
+				CompileAs="0"/>
+			<Tool
+				Name="VCCustomBuildTool"/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalOptions="/export:initPySVM"
+				AdditionalDependencies="odbc32.lib odbccp32.lib python25_d.lib"
+				OutputFile="d:\research\inspect\PySVM_d.pyd"
+				LinkIncremental="1"
+				SuppressStartupBanner="TRUE"
+				AdditionalLibraryDirectories="d:\python-2.5\PCbuild"
+				ModuleDefinitionFile=""
+				GenerateDebugInformation="TRUE"
+				ProgramDatabaseFile=".\Debug/PySVM_d.pdb"
+				SubSystem="2"
+				ImportLibrary=".\Debug/PySVM_d.lib"
+				TargetMachine="1"/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="_DEBUG"
+				MkTypLibCompatible="TRUE"
+				SuppressStartupBanner="TRUE"
+				TargetEnvironment="1"
+				TypeLibraryName=".\Debug/PySVM.tlb"
+				HeaderFileName=""/>
+			<Tool
+				Name="VCPostBuildEventTool"/>
+			<Tool
+				Name="VCPreBuildEventTool"/>
+			<Tool
+				Name="VCPreLinkEventTool"/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"/>
+			<Tool
+				Name="VCWebDeploymentTool"/>
+			<Tool
+				Name="VCManagedWrapperGeneratorTool"/>
+			<Tool
+				Name="VCAuxiliaryManagedWrapperGeneratorTool"/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;rc;def;r;odl;hpj;bat;for;f90">
+			<File
+				RelativePath="PySVM.c">
+				<FileConfiguration
+					Name="Release|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="2"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;$(NoInherit)"/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|Win32">
+					<Tool
+						Name="VCCLCompilerTool"
+						Optimization="0"
+						AdditionalIncludeDirectories=""
+						PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;$(NoInherit)"/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\svm-predict.c">
+			</File>
+			<File
+				RelativePath=".\svm.cpp">
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;fi;fd">
+			<File
+				RelativePath=".\svm.h">
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="ico;cur;bmp;dlg;rc2;rct;bin;cnt;rtf;gif;jpg;jpeg;jpe">
+		</Filter>
+		<File
+			RelativePath="readme.txt">
+		</File>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/PySVM/svm-predict.c b/PySVM/svm-predict.c
new file mode 100644
index 0000000..11db985
--- /dev/null
+++ b/PySVM/svm-predict.c
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include "svm.h"
+
+char* line;
+int max_line_len = 1024;
+struct svm_node *x;
+int max_nr_attr = 64;
+
+struct svm_model* model;
+int predict_probability=0;
+
+void predict(FILE *input, FILE *output)
+{
+    int correct = 0;
+    int total = 0;
+    double error = 0;
+    double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
+    double dec_values[10]; // hacky
+    int svm_type=svm_get_svm_type(model);
+    int nr_class=svm_get_nr_class(model);
+    int* labels = (int*)malloc(nr_class*sizeof(int));
+    double* prob_estimates=NULL;
+    int j;
+
+    if (predict_probability)
+    {
+        if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
+        {
+            printf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
+        }
+        else
+        {
+            svm_get_labels(model,labels);
+            prob_estimates = (double *) malloc(nr_class*sizeof(double));
+            fprintf(output,"labels");        
+            for(j = 0; j < nr_class; j++)
+            {
+                fprintf(output," %d",labels[j]);
+            }
+            fprintf(output,"\n");
+        }
+    }
+    while(1)
+    {
+        int i = 0;
+        int c;
+        double target;
+        double v;
+
+        if (fscanf(input,"%lf",&target)==EOF)
+        {
+            break;
+        }
+
+        while(1)
+        {
+            if (i>=max_nr_attr-1)    // need one more for index = -1
+            {
+                max_nr_attr *= 2;
+                x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
+            }
+
+            do
+            {
+                c = getc(input);
+                if (c=='\n' || c==EOF) goto out2;
+            } while(isspace(c));
+            ungetc(c,input);
+            fscanf(input,"%d:%lf",&x[i].index,&x[i].value);
+            ++i;
+        }    
+
+out2:
+        x[i++].index = -1;
+
+        if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
+        {
+            v = svm_predict_probability(model,x,prob_estimates);
+            fprintf(output,"%g ",v);
+            for(j=0;j<nr_class;j++)
+            {
+                fprintf(output,"%g ",prob_estimates[j]);
+            }
+            fprintf(output,"\n");
+        }
+        else
+        {
+            //v = svm_predict(model,x);           
+            //fprintf(output,"%g\n",v);
+            svm_predict_values(model, x, dec_values);
+            //printf("%.2f %.2f\n", v, dec_values[0]);
+            fprintf(output, "%g\n", dec_values[0]); // high values should correspond to +1, that's easier to remember.
+            if (dec_values[0] < 0)
+            {
+                v = 1;
+            }
+            else
+            {
+                v = -1;
+            }
+        }
+
+        if (v == target)
+        {
+            ++correct;
+        }
+        error += (v-target)*(v-target);
+        sumv += v;
+        sumy += target;
+        sumvv += v*v;
+        sumyy += target*target;
+        sumvy += v*target;
+        ++total;
+    }
+    printf("Accuracy = %g%% (%d/%d) (classification)\n",
+        (double)correct/total*100,correct,total);
+    printf("Mean squared error = %g (regression)\n",error/total);
+    printf("Squared correlation coefficient = %g (regression)\n",
+        ((total*sumvy-sumv*sumy)*(total*sumvy-sumv*sumy))/
+        ((total*sumvv-sumv*sumv)*(total*sumyy-sumy*sumy))
+        );
+    if (predict_probability)
+    {
+        free(prob_estimates);
+        free(labels);
+    }
+}
+
+void exit_with_help()
+{
+    printf(
+    "Usage: svm-predict [options] test_file model_file output_file\n"
+    "options:\n"
+    "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); one-class SVM not supported yet\n"
+    );
+    exit(1);
+}
+
+int svpredict_main(int argc, char **argv)
+{
+    FILE *input, *output;
+    int i;
+
+    // parse options
+    for(i=1;i<argc;i++)
+    {
+        if (argv[i][0] != '-') break;
+        ++i;
+        switch(argv[i-1][1])
+        {
+            case 'b':
+                predict_probability = atoi(argv[i]);
+                break;
+            default:
+                fprintf(stderr,"unknown option\n");
+                exit_with_help();
+        }
+    }
+    if (i>=argc)
+        exit_with_help();
+    
+    input = fopen(argv[i],"r");
+    if (input == NULL)
+    {
+        fprintf(stderr,"can't open input file %s\n",argv[i]);
+        exit(1);
+    }
+
+    output = fopen(argv[i+2],"w");
+    if (output == NULL)
+    {
+        fprintf(stderr,"can't open output file %s\n",argv[i+2]);
+        exit(1);
+    }
+
+    if ((model=svm_load_model(argv[i+1]))==0)
+    {
+        fprintf(stderr,"can't open model file %s\n",argv[i+1]);
+        exit(1);
+    }
+    
+    line = (char *) malloc(max_line_len*sizeof(char));
+    x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
+    if (predict_probability)
+    {
+        if (svm_check_probability_model(model)==0)
+        {
+            fprintf(stderr,"model does not support probabiliy estimates\n");
+            predict_probability=0;
+        }
+    }
+    predict(input,output);
+    svm_destroy_model(model);
+    free(line);
+    free(x);
+    fclose(input);
+    fclose(output);
+    return 0;
+}
diff --git a/PySVM/svm.cpp b/PySVM/svm.cpp
new file mode 100644
index 0000000..aaf2fa8
--- /dev/null
+++ b/PySVM/svm.cpp
@@ -0,0 +1,3087 @@
+
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <float.h>
+#include <string.h>
+#include <stdarg.h>
+#include "svm.h"
+typedef float Qfloat;
+typedef signed char schar;
+#ifndef min
+template <class T> inline T min(T x,T y) { return (x<y)?x:y; }
+#endif
+#ifndef max
+template <class T> inline T max(T x,T y) { return (x>y)?x:y; }
+#endif
+template <class T> inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
+template <class S, class T> inline void clone(T*& dst, S* src, int n)
+{
+	dst = new T[n];
+	memcpy((void *)dst,(void *)src,sizeof(T)*n);
+}
+#define INF HUGE_VAL
+#define TAU 1e-12
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+#if 1
+void info(char *fmt,...)
+{
+	va_list ap;
+	va_start(ap,fmt);
+	vprintf(fmt,ap);
+	va_end(ap);
+}
+void info_flush()
+{
+	fflush(stdout);
+}
+#else
+void info(char *fmt,...) {}
+void info_flush() {}
+#endif
+
+//
+// Kernel Cache
+//
+// l is the number of total data items
+// size is the cache size limit in bytes
+//
+class Cache
+{
+public:
+	Cache(int l,int size);
+	~Cache();
+
+	// request data [0,len)
+	// return some position p where [p,len) need to be filled
+	// (p >= len if nothing needs to be filled)
+	int get_data(const int index, Qfloat **data, int len);
+	void swap_index(int i, int j);	// future_option
+private:
+	int l;
+	int size;
+	struct head_t
+	{
+		head_t *prev, *next;	// a cicular list
+		Qfloat *data;
+		int len;		// data[0,len) is cached in this entry
+	};
+
+	head_t *head;
+	head_t lru_head;
+	void lru_delete(head_t *h);
+	void lru_insert(head_t *h);
+};
+
+Cache::Cache(int l_,int size_):l(l_),size(size_)
+{
+	head = (head_t *)calloc(l,sizeof(head_t));	// initialized to 0
+	size /= sizeof(Qfloat);
+	size -= l * sizeof(head_t) / sizeof(Qfloat);
+	size = max(size, 2*l);	// cache must be large enough for two columns
+	lru_head.next = lru_head.prev = &lru_head;
+}
+
+Cache::~Cache()
+{
+	for (head_t *h = lru_head.next; h != &lru_head; h=h->next)
+		free(h->data);
+	free(head);
+}
+
+void Cache::lru_delete(head_t *h)
+{
+	// delete from current location
+	h->prev->next = h->next;
+	h->next->prev = h->prev;
+}
+
+void Cache::lru_insert(head_t *h)
+{
+	// insert to last position
+	h->next = &lru_head;
+	h->prev = lru_head.prev;
+	h->prev->next = h;
+	h->next->prev = h;
+}
+
+int Cache::get_data(const int index, Qfloat **data, int len)
+{
+	head_t *h = &head[index];
+	if (h->len) lru_delete(h);
+	int more = len - h->len;
+
+	if (more > 0)
+	{
+		// free old space
+		while(size < more)
+		{
+			head_t *old = lru_head.next;
+			lru_delete(old);
+			free(old->data);
+			size += old->len;
+			old->data = 0;
+			old->len = 0;
+		}
+
+		// allocate new space
+		h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
+		size -= more;
+		swap(h->len,len);
+	}
+
+	lru_insert(h);
+	*data = h->data;
+	return len;
+}
+
+void Cache::swap_index(int i, int j)
+{
+	if (i==j) return;
+
+	if (head[i].len) lru_delete(&head[i]);
+	if (head[j].len) lru_delete(&head[j]);
+	swap(head[i].data,head[j].data);
+	swap(head[i].len,head[j].len);
+	if (head[i].len) lru_insert(&head[i]);
+	if (head[j].len) lru_insert(&head[j]);
+
+	if (i>j) swap(i,j);
+	for (head_t *h = lru_head.next; h!=&lru_head; h=h->next)
+	{
+		if (h->len > i)
+		{
+			if (h->len > j)
+				swap(h->data[i],h->data[j]);
+			else
+			{
+				// give up
+				lru_delete(h);
+				free(h->data);
+				size += h->len;
+				h->data = 0;
+				h->len = 0;
+			}
+		}
+	}
+}
+
+//
+// Kernel evaluation
+//
+// the static method k_function is for doing single kernel evaluation
+// the constructor of Kernel prepares to calculate the l*l kernel matrix
+// the member function get_Q is for getting one column from the Q Matrix
+//
+class QMatrix {
+public:
+	virtual Qfloat *get_Q(int column, int len) const = 0;
+	virtual Qfloat *get_QD() const = 0;
+	virtual void swap_index(int i, int j) const = 0;
+};
+
+class Kernel: public QMatrix {
+public:
+	Kernel(int l, svm_node * const * x, const svm_parameter& param);
+	virtual ~Kernel();
+
+	static double k_function(const svm_node *x, const svm_node *y,
+				 const svm_parameter& param);
+	virtual Qfloat *get_Q(int column, int len) const = 0;
+	virtual Qfloat *get_QD() const = 0;
+	virtual void swap_index(int i, int j) const	// no so const...
+	{
+		swap(x[i],x[j]);
+		if (x_square) swap(x_square[i],x_square[j]);
+	}
+protected:
+
+	double (Kernel::*kernel_function)(int i, int j) const;
+
+private:
+	const svm_node **x;
+	double *x_square;
+
+	// svm_parameter
+	const int kernel_type;
+	const double degree;
+	const double gamma;
+	const double coef0;
+
+	static double dot(const svm_node *px, const svm_node *py);
+	double kernel_linear(int i, int j) const
+	{
+		return dot(x[i],x[j]);
+	}
+	double kernel_poly(int i, int j) const
+	{
+		return pow(gamma*dot(x[i],x[j])+coef0,degree);
+	}
+	double kernel_rbf(int i, int j) const
+	{
+		return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
+	}
+	double kernel_sigmoid(int i, int j) const
+	{
+		return tanh(gamma*dot(x[i],x[j])+coef0);
+	}
+};
+
+Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
+:kernel_type(param.kernel_type), degree(param.degree),
+ gamma(param.gamma), coef0(param.coef0)
+{
+	switch(kernel_type)
+	{
+		case LINEAR:
+			kernel_function = &Kernel::kernel_linear;
+			break;
+		case POLY:
+			kernel_function = &Kernel::kernel_poly;
+			break;
+		case RBF:
+			kernel_function = &Kernel::kernel_rbf;
+			break;
+		case SIGMOID:
+			kernel_function = &Kernel::kernel_sigmoid;
+			break;
+	}
+
+	clone(x,x_,l);
+
+	if (kernel_type == RBF)
+	{
+		x_square = new double[l];
+		for (int i=0;i<l;i++)
+			x_square[i] = dot(x[i],x[i]);
+	}
+	else
+		x_square = 0;
+}
+
+Kernel::~Kernel()
+{
+	delete[] x;
+	delete[] x_square;
+}
+
+double Kernel::dot(const svm_node *px, const svm_node *py)
+{
+	double sum = 0;
+	while(px->index != -1 && py->index != -1)
+	{
+		if (px->index == py->index)
+		{
+			sum += px->value * py->value;
+			++px;
+			++py;
+		}
+		else
+		{
+			if (px->index > py->index)
+				++py;
+			else
+				++px;
+		}			
+	}
+	return sum;
+}
+
+double Kernel::k_function(const svm_node *x, const svm_node *y,
+			  const svm_parameter& param)
+{
+	switch(param.kernel_type)
+	{
+		case LINEAR:
+			return dot(x,y);
+		case POLY:
+			return pow(param.gamma*dot(x,y)+param.coef0,param.degree);
+		case RBF:
+		{
+			double sum = 0;
+			while(x->index != -1 && y->index !=-1)
+			{
+				if (x->index == y->index)
+				{
+					double d = x->value - y->value;
+					sum += d*d;
+					++x;
+					++y;
+				}
+				else
+				{
+					if (x->index > y->index)
+					{	
+						sum += y->value * y->value;
+						++y;
+					}
+					else
+					{
+						sum += x->value * x->value;
+						++x;
+					}
+				}
+			}
+
+			while(x->index != -1)
+			{
+				sum += x->value * x->value;
+				++x;
+			}
+
+			while(y->index != -1)
+			{
+				sum += y->value * y->value;
+				++y;
+			}
+			
+			return exp(-param.gamma*sum);
+		}
+		case SIGMOID:
+			return tanh(param.gamma*dot(x,y)+param.coef0);
+		default:
+			return 0;	/* Unreachable */
+	}
+}
+
+// Generalized SMO+SVMlight algorithm
+// Solves:
+//
+//	min 0.5(\alpha^T Q \alpha) + b^T \alpha
+//
+//		y^T \alpha = \delta
+//		y_i = +1 or -1
+//		0 <= alpha_i <= Cp for y_i = 1
+//		0 <= alpha_i <= Cn for y_i = -1
+//
+// Given:
+//
+//	Q, b, y, Cp, Cn, and an initial feasible point \alpha
+//	l is the size of vectors and matrices
+//	eps is the stopping criterion
+//
+// solution will be put in \alpha, objective value will be put in obj
+//
+class Solver {
+public:
+	Solver() {};
+	virtual ~Solver() {};
+
+	struct SolutionInfo {
+		double obj;
+		double rho;
+		double upper_bound_p;
+		double upper_bound_n;
+		double r;	// for Solver_NU
+	};
+
+	void Solve(int l, const QMatrix& Q, const double *b_, const schar *y_,
+		   double *alpha_, double Cp, double Cn, double eps,
+		   SolutionInfo* si, int shrinking);
+protected:
+	int active_size;
+	schar *y;
+	double *G;		// gradient of objective function
+	enum { LOWER_BOUND, UPPER_BOUND, FREE };
+	char *alpha_status;	// LOWER_BOUND, UPPER_BOUND, FREE
+	double *alpha;
+	const QMatrix *Q;
+	const Qfloat *QD;
+	double eps;
+	double Cp,Cn;
+	double *b;
+	int *active_set;
+	double *G_bar;		// gradient, if we treat free variables as 0
+	int l;
+	bool unshrinked;	// XXX
+
+	double get_C(int i)
+	{
+		return (y[i] > 0)? Cp : Cn;
+	}
+	void update_alpha_status(int i)
+	{
+		if (alpha[i] >= get_C(i))
+			alpha_status[i] = UPPER_BOUND;
+		else if (alpha[i] <= 0)
+			alpha_status[i] = LOWER_BOUND;
+		else alpha_status[i] = FREE;
+	}
+	bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
+	bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
+	bool is_free(int i) { return alpha_status[i] == FREE; }
+	void swap_index(int i, int j);
+	void reconstruct_gradient();
+	virtual int select_working_set(int &i, int &j);
+	virtual int max_violating_pair(int &i, int &j);
+	virtual double calculate_rho();
+	virtual void do_shrinking();
+};
+
+void Solver::swap_index(int i, int j)
+{
+	Q->swap_index(i,j);
+	swap(y[i],y[j]);
+	swap(G[i],G[j]);
+	swap(alpha_status[i],alpha_status[j]);
+	swap(alpha[i],alpha[j]);
+	swap(b[i],b[j]);
+	swap(active_set[i],active_set[j]);
+	swap(G_bar[i],G_bar[j]);
+}
+
+void Solver::reconstruct_gradient()
+{
+	// reconstruct inactive elements of G from G_bar and free variables
+
+	if (active_size == l) return;
+
+	int i;
+	for (i=active_size;i<l;i++)
+		G[i] = G_bar[i] + b[i];
+	
+	for (i=0;i<active_size;i++)
+		if (is_free(i))
+		{
+			const Qfloat *Q_i = Q->get_Q(i,l);
+			double alpha_i = alpha[i];
+			for (int j=active_size;j<l;j++)
+				G[j] += alpha_i * Q_i[j];
+		}
+}
+
+void Solver::Solve(int l, const QMatrix& Q, const double *b_, const schar *y_,
+		   double *alpha_, double Cp, double Cn, double eps,
+		   SolutionInfo* si, int shrinking)
+{
+	this->l = l;
+	this->Q = &Q;
+	QD=Q.get_QD();
+	clone(b, b_,l);
+	clone(y, y_,l);
+	clone(alpha,alpha_,l);
+	this->Cp = Cp;
+	this->Cn = Cn;
+	this->eps = eps;
+	unshrinked = false;
+
+	// initialize alpha_status
+	{
+		alpha_status = new char[l];
+		for (int i=0;i<l;i++)
+			update_alpha_status(i);
+	}
+
+	// initialize active set (for shrinking)
+	{
+		active_set = new int[l];
+		for (int i=0;i<l;i++)
+			active_set[i] = i;
+		active_size = l;
+	}
+
+	// initialize gradient
+	{
+		G = new double[l];
+		G_bar = new double[l];
+		int i;
+		for (i=0;i<l;i++)
+		{
+			G[i] = b[i];
+			G_bar[i] = 0;
+		}
+		for (i=0;i<l;i++)
+			if (!is_lower_bound(i))
+			{
+				const Qfloat *Q_i = Q.get_Q(i,l);
+				double alpha_i = alpha[i];
+				int j;
+				for (j=0;j<l;j++)
+					G[j] += alpha_i*Q_i[j];
+				if (is_upper_bound(i))
+					for (j=0;j<l;j++)
+						G_bar[j] += get_C(i) * Q_i[j];
+			}
+	}
+
+	// optimization step
+
+	int iter = 0;
+	int counter = min(l,1000)+1;
+
+	while(1)
+	{
+		// show progress and do shrinking
+
+		if (--counter == 0)
+		{
+			counter = min(l,1000);
+			if (shrinking) do_shrinking();
+			info("."); info_flush();
+		}
+
+		int i,j;
+		if (select_working_set(i,j)!=0)
+		{
+			// reconstruct the whole gradient
+			reconstruct_gradient();
+			// reset active set size and check
+			active_size = l;
+			info("*"); info_flush();
+			if (select_working_set(i,j)!=0)
+				break;
+			else
+				counter = 1;	// do shrinking next iteration
+		}
+		
+		++iter;
+
+		// update alpha[i] and alpha[j], handle bounds carefully
+		
+		const Qfloat *Q_i = Q.get_Q(i,active_size);
+		const Qfloat *Q_j = Q.get_Q(j,active_size);
+
+		double C_i = get_C(i);
+		double C_j = get_C(j);
+
+		double old_alpha_i = alpha[i];
+		double old_alpha_j = alpha[j];
+
+		if (y[i]!=y[j])
+		{
+			double quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
+			if (quad_coef <= 0)
+				quad_coef = TAU;
+			double delta = (-G[i]-G[j])/quad_coef;
+			double diff = alpha[i] - alpha[j];
+			alpha[i] += delta;
+			alpha[j] += delta;
+			
+			if (diff > 0)
+			{
+				if (alpha[j] < 0)
+				{
+					alpha[j] = 0;
+					alpha[i] = diff;
+				}
+			}
+			else
+			{
+				if (alpha[i] < 0)
+				{
+					alpha[i] = 0;
+					alpha[j] = -diff;
+				}
+			}
+			if (diff > C_i - C_j)
+			{
+				if (alpha[i] > C_i)
+				{
+					alpha[i] = C_i;
+					alpha[j] = C_i - diff;
+				}
+			}
+			else
+			{
+				if (alpha[j] > C_j)
+				{
+					alpha[j] = C_j;
+					alpha[i] = C_j + diff;
+				}
+			}
+		}
+		else
+		{
+			double quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
+			if (quad_coef <= 0)
+				quad_coef = TAU;
+			double delta = (G[i]-G[j])/quad_coef;
+			double sum = alpha[i] + alpha[j];
+			alpha[i] -= delta;
+			alpha[j] += delta;
+
+			if (sum > C_i)
+			{
+				if (alpha[i] > C_i)
+				{
+					alpha[i] = C_i;
+					alpha[j] = sum - C_i;
+				}
+			}
+			else
+			{
+				if (alpha[j] < 0)
+				{
+					alpha[j] = 0;
+					alpha[i] = sum;
+				}
+			}
+			if (sum > C_j)
+			{
+				if (alpha[j] > C_j)
+				{
+					alpha[j] = C_j;
+					alpha[i] = sum - C_j;
+				}
+			}
+			else
+			{
+				if (alpha[i] < 0)
+				{
+					alpha[i] = 0;
+					alpha[j] = sum;
+				}
+			}
+		}
+
+		// update G
+
+		double delta_alpha_i = alpha[i] - old_alpha_i;
+		double delta_alpha_j = alpha[j] - old_alpha_j;
+		
+		for (int k=0;k<active_size;k++)
+		{
+			G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
+		}
+
+		// update alpha_status and G_bar
+
+		{
+			bool ui = is_upper_bound(i);
+			bool uj = is_upper_bound(j);
+			update_alpha_status(i);
+			update_alpha_status(j);
+			int k;
+			if (ui != is_upper_bound(i))
+			{
+				Q_i = Q.get_Q(i,l);
+				if (ui)
+					for (k=0;k<l;k++)
+						G_bar[k] -= C_i * Q_i[k];
+				else
+					for (k=0;k<l;k++)
+						G_bar[k] += C_i * Q_i[k];
+			}
+
+			if (uj != is_upper_bound(j))
+			{
+				Q_j = Q.get_Q(j,l);
+				if (uj)
+					for (k=0;k<l;k++)
+						G_bar[k] -= C_j * Q_j[k];
+				else
+					for (k=0;k<l;k++)
+						G_bar[k] += C_j * Q_j[k];
+			}
+		}
+	}
+
+	// calculate rho
+
+	si->rho = calculate_rho();
+
+	// calculate objective value
+	{
+		double v = 0;
+		int i;
+		for (i=0;i<l;i++)
+			v += alpha[i] * (G[i] + b[i]);
+
+		si->obj = v/2;
+	}
+
+	// put back the solution
+	{
+		for (int i=0;i<l;i++)
+			alpha_[active_set[i]] = alpha[i];
+	}
+
+	// juggle everything back
+	/*{
+		for (int i=0;i<l;i++)
+			while(active_set[i] != i)
+				swap_index(i,active_set[i]);
+				// or Q.swap_index(i,active_set[i]);
+	}*/
+
+	si->upper_bound_p = Cp;
+	si->upper_bound_n = Cn;
+
+	info("\noptimization finished, #iter = %d\n",iter);
+
+	delete[] b;
+	delete[] y;
+	delete[] alpha;
+	delete[] alpha_status;
+	delete[] active_set;
+	delete[] G;
+	delete[] G_bar;
+}
+
+// return 1 if already optimal, return 0 otherwise
+int Solver::select_working_set(int &out_i, int &out_j)
+{
+	// return i,j such that
+	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
+	// j: minimizes the decrease of obj value
+	//    (if quadratic coefficeint <= 0, replace it with tau)
+	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
+	
+	double Gmax = -INF;
+	int Gmax_idx = -1;
+	int Gmin_idx = -1;
+	double obj_diff_min = INF;
+
+	for (int t=0;t<active_size;t++)
+		if (y[t]==+1)	
+		{
+			if (!is_upper_bound(t))
+				if (-G[t] >= Gmax)
+				{
+					Gmax = -G[t];
+					Gmax_idx = t;
+				}
+		}
+		else
+		{
+			if (!is_lower_bound(t))
+				if (G[t] >= Gmax)
+				{
+					Gmax = G[t];
+					Gmax_idx = t;
+				}
+		}
+
+	int i = Gmax_idx;
+	const Qfloat *Q_i = NULL;
+	if (i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
+		Q_i = Q->get_Q(i,active_size);
+
+	for (int j=0;j<active_size;j++)
+	{
+		if (y[j]==+1)
+		{
+			if (!is_lower_bound(j))
+			{
+				double grad_diff=Gmax+G[j];
+				if (grad_diff >= eps)
+				{
+					double obj_diff; 
+					double quad_coef=Q_i[i]+QD[j]-2*y[i]*Q_i[j];
+					if (quad_coef > 0)
+						obj_diff = -(grad_diff*grad_diff)/quad_coef;
+					else
+						obj_diff = -(grad_diff*grad_diff)/TAU;
+
+					if (obj_diff <= obj_diff_min)
+					{
+						Gmin_idx=j;
+						obj_diff_min = obj_diff;
+					}
+				}
+			}
+		}
+		else
+		{
+			if (!is_upper_bound(j))
+			{
+				double grad_diff= Gmax-G[j];
+				if (grad_diff >= eps)
+				{
+					double obj_diff; 
+					double quad_coef=Q_i[i]+QD[j]+2*y[i]*Q_i[j];
+					if (quad_coef > 0)
+						obj_diff = -(grad_diff*grad_diff)/quad_coef;
+					else
+						obj_diff = -(grad_diff*grad_diff)/TAU;
+
+					if (obj_diff <= obj_diff_min)
+					{
+						Gmin_idx=j;
+						obj_diff_min = obj_diff;
+					}
+				}
+			}
+		}
+	}
+
+	if (Gmin_idx == -1)
+ 		return 1;
+
+	out_i = Gmax_idx;
+	out_j = Gmin_idx;
+	return 0;
+}
+
+// return 1 if already optimal, return 0 otherwise
+int Solver::max_violating_pair(int &out_i, int &out_j)
+{
+	// return i,j: maximal violating pair
+
+	double Gmax1 = -INF;		// max { -y_i * grad(f)_i | i in I_up(\alpha) }
+	int Gmax1_idx = -1;
+
+	double Gmax2 = -INF;		// max { y_i * grad(f)_i | i in I_low(\alpha) }
+	int Gmax2_idx = -1;
+
+	for (int i=0;i<active_size;i++)
+	{
+		if (y[i]==+1)	// y = +1
+		{
+			if (!is_upper_bound(i))	// d = +1
+			{
+				if (-G[i] >= Gmax1)
+				{
+					Gmax1 = -G[i];
+					Gmax1_idx = i;
+				}
+			}
+			if (!is_lower_bound(i))	// d = -1
+			{
+				if (G[i] >= Gmax2)
+				{
+					Gmax2 = G[i];
+					Gmax2_idx = i;
+				}
+			}
+		}
+		else		// y = -1
+		{
+			if (!is_upper_bound(i))	// d = +1
+			{
+				if (-G[i] >= Gmax2)
+				{
+					Gmax2 = -G[i];
+					Gmax2_idx = i;
+				}
+			}
+			if (!is_lower_bound(i))	// d = -1
+			{
+				if (G[i] >= Gmax1)
+				{
+					Gmax1 = G[i];
+					Gmax1_idx = i;
+				}
+			}
+		}
+	}
+
+	if (Gmax1+Gmax2 < eps)
+ 		return 1;
+
+	out_i = Gmax1_idx;
+	out_j = Gmax2_idx;
+	return 0;
+}
+
+void Solver::do_shrinking()
+{
+	int i,j,k;
+	if (max_violating_pair(i,j)!=0) return;
+	double Gm1 = -y[j]*G[j];
+	double Gm2 = y[i]*G[i];
+
+	// shrink
+	
+	for (k=0;k<active_size;k++)
+	{
+		if (is_lower_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (-G[k] >= Gm1) continue;
+			}
+			else	if (-G[k] >= Gm2) continue;
+		}
+		else if (is_upper_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (G[k] >= Gm2) continue;
+			}
+			else	if (G[k] >= Gm1) continue;
+		}
+		else continue;
+
+		--active_size;
+		swap_index(k,active_size);
+		--k;	// look at the newcomer
+	}
+
+	// unshrink, check all variables again before final iterations
+
+	if (unshrinked || -(Gm1 + Gm2) > eps*10) return;
+	
+	unshrinked = true;
+	reconstruct_gradient();
+
+	for (k=l-1;k>=active_size;k--)
+	{
+		if (is_lower_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (-G[k] < Gm1) continue;
+			}
+			else	if (-G[k] < Gm2) continue;
+		}
+		else if (is_upper_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (G[k] < Gm2) continue;
+			}
+			else	if (G[k] < Gm1) continue;
+		}
+		else continue;
+
+		swap_index(k,active_size);
+		active_size++;
+		++k;	// look at the newcomer
+	}
+}
+
+double Solver::calculate_rho()
+{
+	double r;
+	int nr_free = 0;
+	double ub = INF, lb = -INF, sum_free = 0;
+	for (int i=0;i<active_size;i++)
+	{
+		double yG = y[i]*G[i];
+
+		if (is_lower_bound(i))
+		{
+			if (y[i] > 0)
+				ub = min(ub,yG);
+			else
+				lb = max(lb,yG);
+		}
+		else if (is_upper_bound(i))
+		{
+			if (y[i] < 0)
+				ub = min(ub,yG);
+			else
+				lb = max(lb,yG);
+		}
+		else
+		{
+			++nr_free;
+			sum_free += yG;
+		}
+	}
+
+	if (nr_free>0)
+		r = sum_free/nr_free;
+	else
+		r = (ub+lb)/2;
+
+	return r;
+}
+
+//
+// Solver for nu-svm classification and regression
+//
+// additional constraint: e^T \alpha = constant
+//
+class Solver_NU : public Solver
+{
+public:
+	Solver_NU() {}
+	void Solve(int l, const QMatrix& Q, const double *b, const schar *y,
+		   double *alpha, double Cp, double Cn, double eps,
+		   SolutionInfo* si, int shrinking)
+	{
+		this->si = si;
+		Solver::Solve(l,Q,b,y,alpha,Cp,Cn,eps,si,shrinking);
+	}
+private:
+	SolutionInfo *si;
+	int select_working_set(int &i, int &j);
+	double calculate_rho();
+	void do_shrinking();
+};
+
+// return 1 if already optimal, return 0 otherwise
+int Solver_NU::select_working_set(int &out_i, int &out_j)
+{
+	// return i,j such that y_i = y_j and
+	// i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
+	// j: minimizes the decrease of obj value
+	//    (if quadratic coefficeint <= 0, replace it with tau)
+	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
+
+	double Gmaxp = -INF;
+	int Gmaxp_idx = -1;
+
+	double Gmaxn = -INF;
+	int Gmaxn_idx = -1;
+
+	int Gmin_idx = -1;
+	double obj_diff_min = INF;
+
+	for (int t=0;t<active_size;t++)
+		if (y[t]==+1)
+		{
+			if (!is_upper_bound(t))
+				if (-G[t] >= Gmaxp)
+				{
+					Gmaxp = -G[t];
+					Gmaxp_idx = t;
+				}
+		}
+		else
+		{
+			if (!is_lower_bound(t))
+				if (G[t] >= Gmaxn)
+				{
+					Gmaxn = G[t];
+					Gmaxn_idx = t;
+				}
+		}
+
+	int ip = Gmaxp_idx;
+	int in = Gmaxn_idx;
+	const Qfloat *Q_ip = NULL;
+	const Qfloat *Q_in = NULL;
+	if (ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
+		Q_ip = Q->get_Q(ip,active_size);
+	if (in != -1)
+		Q_in = Q->get_Q(in,active_size);
+
+	for (int j=0;j<active_size;j++)
+	{
+		if (y[j]==+1)
+		{
+			if (!is_lower_bound(j))	
+			{
+				double grad_diff=Gmaxp+G[j];
+				if (grad_diff >= eps)
+				{
+					double obj_diff; 
+					double quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
+					if (quad_coef > 0)
+						obj_diff = -(grad_diff*grad_diff)/quad_coef;
+					else
+						obj_diff = -(grad_diff*grad_diff)/TAU;
+
+					if (obj_diff <= obj_diff_min)
+					{
+						Gmin_idx=j;
+						obj_diff_min = obj_diff;
+					}
+				}
+			}
+		}
+		else
+		{
+			if (!is_upper_bound(j))
+			{
+				double grad_diff=Gmaxn-G[j];
+				if (grad_diff >= eps)
+				{
+					double obj_diff; 
+					double quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
+					if (quad_coef > 0)
+						obj_diff = -(grad_diff*grad_diff)/quad_coef;
+					else
+						obj_diff = -(grad_diff*grad_diff)/TAU;
+
+					if (obj_diff <= obj_diff_min)
+					{
+						Gmin_idx=j;
+						obj_diff_min = obj_diff;
+					}
+				}
+			}
+		}
+	}
+
+	if (Gmin_idx == -1)
+ 		return 1;
+
+	if (y[Gmin_idx] == +1)
+		out_i = Gmaxp_idx;
+	else
+		out_i = Gmaxn_idx;
+	out_j = Gmin_idx;
+
+	return 0;
+}
+
+void Solver_NU::do_shrinking()
+{
+	double Gmax1 = -INF;	// max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
+	double Gmax2 = -INF;	// max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
+	double Gmax3 = -INF;	// max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
+	double Gmax4 = -INF;	// max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
+
+	// find maximal violating pair first
+	int k;
+	for (k=0;k<active_size;k++)
+	{
+		if (!is_upper_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (-G[k] > Gmax1) Gmax1 = -G[k];
+			}
+			else	if (-G[k] > Gmax3) Gmax3 = -G[k];
+		}
+		if (!is_lower_bound(k))
+		{
+			if (y[k]==+1)
+			{	
+				if (G[k] > Gmax2) Gmax2 = G[k];
+			}
+			else	if (G[k] > Gmax4) Gmax4 = G[k];
+		}
+	}
+
+	// shrinking
+
+	double Gm1 = -Gmax2;
+	double Gm2 = -Gmax1;
+	double Gm3 = -Gmax4;
+	double Gm4 = -Gmax3;
+
+	for (k=0;k<active_size;k++)
+	{
+		if (is_lower_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (-G[k] >= Gm1) continue;
+			}
+			else	if (-G[k] >= Gm3) continue;
+		}
+		else if (is_upper_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (G[k] >= Gm2) continue;
+			}
+			else	if (G[k] >= Gm4) continue;
+		}
+		else continue;
+
+		--active_size;
+		swap_index(k,active_size);
+		--k;	// look at the newcomer
+	}
+
+	// unshrink, check all variables again before final iterations
+
+	if (unshrinked || max(-(Gm1+Gm2),-(Gm3+Gm4)) > eps*10) return;
+	
+	unshrinked = true;
+	reconstruct_gradient();
+
+	for (k=l-1;k>=active_size;k--)
+	{
+		if (is_lower_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (-G[k] < Gm1) continue;
+			}
+			else	if (-G[k] < Gm3) continue;
+		}
+		else if (is_upper_bound(k))
+		{
+			if (y[k]==+1)
+			{
+				if (G[k] < Gm2) continue;
+			}
+			else	if (G[k] < Gm4) continue;
+		}
+		else continue;
+
+		swap_index(k,active_size);
+		active_size++;
+		++k;	// look at the newcomer
+	}
+}
+
+double Solver_NU::calculate_rho()
+{
+	int nr_free1 = 0,nr_free2 = 0;
+	double ub1 = INF, ub2 = INF;
+	double lb1 = -INF, lb2 = -INF;
+	double sum_free1 = 0, sum_free2 = 0;
+
+	for (int i=0;i<active_size;i++)
+	{
+		if (y[i]==+1)
+		{
+			if (is_lower_bound(i))
+				ub1 = min(ub1,G[i]);
+			else if (is_upper_bound(i))
+				lb1 = max(lb1,G[i]);
+			else
+			{
+				++nr_free1;
+				sum_free1 += G[i];
+			}
+		}
+		else
+		{
+			if (is_lower_bound(i))
+				ub2 = min(ub2,G[i]);
+			else if (is_upper_bound(i))
+				lb2 = max(lb2,G[i]);
+			else
+			{
+				++nr_free2;
+				sum_free2 += G[i];
+			}
+		}
+	}
+
+	double r1,r2;
+	if (nr_free1 > 0)
+		r1 = sum_free1/nr_free1;
+	else
+		r1 = (ub1+lb1)/2;
+	
+	if (nr_free2 > 0)
+		r2 = sum_free2/nr_free2;
+	else
+		r2 = (ub2+lb2)/2;
+	
+	si->r = (r1+r2)/2;
+	return (r1-r2)/2;
+}
+
+//
+// Q matrices for various formulations
+//
+class SVC_Q: public Kernel
+{ 
+public:
+	SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
+	:Kernel(prob.l, prob.x, param)
+	{
+		clone(y,y_,prob.l);
+		cache = new Cache(prob.l,(int)(param.cache_size*(1<<20)));
+		QD = new Qfloat[prob.l];
+		for (int i=0;i<prob.l;i++)
+			QD[i]= (Qfloat)(this->*kernel_function)(i,i);
+	}
+	
+	Qfloat *get_Q(int i, int len) const
+	{
+		Qfloat *data;
+		int start;
+		if ((start = cache->get_data(i,&data,len)) < len)
+		{
+			for (int j=start;j<len;j++)
+				data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
+		}
+		return data;
+	}
+
+	Qfloat *get_QD() const
+	{
+		return QD;
+	}
+
+	void swap_index(int i, int j) const
+	{
+		cache->swap_index(i,j);
+		Kernel::swap_index(i,j);
+		swap(y[i],y[j]);
+		swap(QD[i],QD[j]);
+	}
+
+	~SVC_Q()
+	{
+		delete[] y;
+		delete cache;
+		delete[] QD;
+	}
+private:
+	schar *y;
+	Cache *cache;
+	Qfloat *QD;
+};
+
+class ONE_CLASS_Q: public Kernel
+{
+public:
+	ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
+	:Kernel(prob.l, prob.x, param)
+	{
+		cache = new Cache(prob.l,(int)(param.cache_size*(1<<20)));
+		QD = new Qfloat[prob.l];
+		for (int i=0;i<prob.l;i++)
+			QD[i]= (Qfloat)(this->*kernel_function)(i,i);
+	}
+	
+	Qfloat *get_Q(int i, int len) const
+	{
+		Qfloat *data;
+		int start;
+		if ((start = cache->get_data(i,&data,len)) < len)
+		{
+			for (int j=start;j<len;j++)
+				data[j] = (Qfloat)(this->*kernel_function)(i,j);
+		}
+		return data;
+	}
+
+	Qfloat *get_QD() const
+	{
+		return QD;
+	}
+
+	void swap_index(int i, int j) const
+	{
+		cache->swap_index(i,j);
+		Kernel::swap_index(i,j);
+		swap(QD[i],QD[j]);
+	}
+
+	~ONE_CLASS_Q()
+	{
+		delete cache;
+		delete[] QD;
+	}
+private:
+	Cache *cache;
+	Qfloat *QD;
+};
+
+class SVR_Q: public Kernel
+{ 
+public:
+	SVR_Q(const svm_problem& prob, const svm_parameter& param)
+	:Kernel(prob.l, prob.x, param)
+	{
+		l = prob.l;
+		cache = new Cache(l,(int)(param.cache_size*(1<<20)));
+		QD = new Qfloat[2*l];
+		sign = new schar[2*l];
+		index = new int[2*l];
+		for (int k=0;k<l;k++)
+		{
+			sign[k] = 1;
+			sign[k+l] = -1;
+			index[k] = k;
+			index[k+l] = k;
+			QD[k]= (Qfloat)(this->*kernel_function)(k,k);
+			QD[k+l]=QD[k];
+		}
+		buffer[0] = new Qfloat[2*l];
+		buffer[1] = new Qfloat[2*l];
+		next_buffer = 0;
+	}
+
+	void swap_index(int i, int j) const
+	{
+		swap(sign[i],sign[j]);
+		swap(index[i],index[j]);
+		swap(QD[i],QD[j]);
+	}
+	
+	Qfloat *get_Q(int i, int len) const
+	{
+		Qfloat *data;
+		int real_i = index[i];
+		if (cache->get_data(real_i,&data,l) < l)
+		{
+			for (int j=0;j<l;j++)
+				data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
+		}
+
+		// reorder and copy
+		Qfloat *buf = buffer[next_buffer];
+		next_buffer = 1 - next_buffer;
+		schar si = sign[i];
+		for (int j=0;j<len;j++)
+			buf[j] = si * sign[j] * data[index[j]];
+		return buf;
+	}
+
+	Qfloat *get_QD() const
+	{
+		return QD;
+	}
+
+	~SVR_Q()
+	{
+		delete cache;
+		delete[] sign;
+		delete[] index;
+		delete[] buffer[0];
+		delete[] buffer[1];
+		delete[] QD;
+	}
+private:
+	int l;
+	Cache *cache;
+	schar *sign;
+	int *index;
+	mutable int next_buffer;
+	Qfloat *buffer[2];
+	Qfloat *QD;
+};
+
+//
+// construct and solve various formulations
+//
+static void solve_c_svc(
+	const svm_problem *prob, const svm_parameter* param,
+	double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
+{
+	int l = prob->l;
+	double *minus_ones = new double[l];
+	schar *y = new schar[l];
+
+	int i;
+
+	for (i=0;i<l;i++)
+	{
+		alpha[i] = 0;
+		minus_ones[i] = -1;
+		if (prob->y[i] > 0) y[i] = +1; else y[i]=-1;
+	}
+
+	Solver s;
+	s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
+		alpha, Cp, Cn, param->eps, si, param->shrinking);
+
+	double sum_alpha=0;
+	for (i=0;i<l;i++)
+		sum_alpha += alpha[i];
+
+	if (Cp==Cn)
+		info("nu = %f\n", sum_alpha/(Cp*prob->l));
+
+	for (i=0;i<l;i++)
+		alpha[i] *= y[i];
+
+	delete[] minus_ones;
+	delete[] y;
+}
+
+static void solve_nu_svc(
+	const svm_problem *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si)
+{
+	int i;
+	int l = prob->l;
+	double nu = param->nu;
+
+	schar *y = new schar[l];
+
+	for (i=0;i<l;i++)
+		if (prob->y[i]>0)
+			y[i] = +1;
+		else
+			y[i] = -1;
+
+	double sum_pos = nu*l/2;
+	double sum_neg = nu*l/2;
+
+	for (i=0;i<l;i++)
+		if (y[i] == +1)
+		{
+			alpha[i] = min(1.0,sum_pos);
+			sum_pos -= alpha[i];
+		}
+		else
+		{
+			alpha[i] = min(1.0,sum_neg);
+			sum_neg -= alpha[i];
+		}
+
+	double *zeros = new double[l];
+
+	for (i=0;i<l;i++)
+		zeros[i] = 0;
+
+	Solver_NU s;
+	s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
+		alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
+	double r = si->r;
+
+	info("C = %f\n",1/r);
+
+	for (i=0;i<l;i++)
+		alpha[i] *= y[i]/r;
+
+	si->rho /= r;
+	si->obj /= (r*r);
+	si->upper_bound_p = 1/r;
+	si->upper_bound_n = 1/r;
+
+	delete[] y;
+	delete[] zeros;
+}
+
+static void solve_one_class(
+	const svm_problem *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si)
+{
+	int l = prob->l;
+	double *zeros = new double[l];
+	schar *ones = new schar[l];
+	int i;
+
+	int n = (int)(param->nu*prob->l);	// # of alpha's at upper bound
+
+	for (i=0;i<n;i++)
+		alpha[i] = 1;
+	if (n<prob->l)
+		alpha[n] = param->nu * prob->l - n;
+	for (i=n+1;i<l;i++)
+		alpha[i] = 0;
+
+	for (i=0;i<l;i++)
+	{
+		zeros[i] = 0;
+		ones[i] = 1;
+	}
+
+	Solver s;
+	s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
+		alpha, 1.0, 1.0, param->eps, si, param->shrinking);
+
+	delete[] zeros;
+	delete[] ones;
+}
+
+static void solve_epsilon_svr(
+	const svm_problem *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si)
+{
+	int l = prob->l;
+	double *alpha2 = new double[2*l];
+	double *linear_term = new double[2*l];
+	schar *y = new schar[2*l];
+	int i;
+
+	for (i=0;i<l;i++)
+	{
+		alpha2[i] = 0;
+		linear_term[i] = param->p - prob->y[i];
+		y[i] = 1;
+
+		alpha2[i+l] = 0;
+		linear_term[i+l] = param->p + prob->y[i];
+		y[i+l] = -1;
+	}
+
+	Solver s;
+	s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
+		alpha2, param->C, param->C, param->eps, si, param->shrinking);
+
+	double sum_alpha = 0;
+	for (i=0;i<l;i++)
+	{
+		alpha[i] = alpha2[i] - alpha2[i+l];
+		sum_alpha += fabs(alpha[i]);
+	}
+	info("nu = %f\n",sum_alpha/(param->C*l));
+
+	delete[] alpha2;
+	delete[] linear_term;
+	delete[] y;
+}
+
+static void solve_nu_svr(
+	const svm_problem *prob, const svm_parameter *param,
+	double *alpha, Solver::SolutionInfo* si)
+{
+	int l = prob->l;
+	double C = param->C;
+	double *alpha2 = new double[2*l];
+	double *linear_term = new double[2*l];
+	schar *y = new schar[2*l];
+	int i;
+
+	double sum = C * param->nu * l / 2;
+	for (i=0;i<l;i++)
+	{
+		alpha2[i] = alpha2[i+l] = min(sum,C);
+		sum -= alpha2[i];
+
+		linear_term[i] = - prob->y[i];
+		y[i] = 1;
+
+		linear_term[i+l] = prob->y[i];
+		y[i+l] = -1;
+	}
+
+	Solver_NU s;
+	s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
+		alpha2, C, C, param->eps, si, param->shrinking);
+
+	info("epsilon = %f\n",-si->r);
+
+	for (i=0;i<l;i++)
+		alpha[i] = alpha2[i] - alpha2[i+l];
+
+	delete[] alpha2;
+	delete[] linear_term;
+	delete[] y;
+}
+
+//
+// decision_function
+//
+struct decision_function
+{
+	double *alpha;
+	double rho;	
+};
+
+decision_function svm_train_one
+(
+	const svm_problem *prob, const svm_parameter *param,
+	double Cp, double Cn)
+{
+	double *alpha = Malloc(double,prob->l);
+	Solver::SolutionInfo si;
+	switch(param->svm_type)
+	{
+		case C_SVC:
+			solve_c_svc(prob,param,alpha,&si,Cp,Cn);
+			break;
+		case NU_SVC:
+			solve_nu_svc(prob,param,alpha,&si);
+			break;
+		case ONE_CLASS:
+			solve_one_class(prob,param,alpha,&si);
+			break;
+		case EPSILON_SVR:
+			solve_epsilon_svr(prob,param,alpha,&si);
+			break;
+		case NU_SVR:
+			solve_nu_svr(prob,param,alpha,&si);
+			break;
+	}
+
+	info("obj = %f, rho = %f\n",si.obj,si.rho);
+
+	// output SVs
+
+	int nSV = 0;
+	int nBSV = 0;
+	for (int i=0;i<prob->l;i++)
+	{
+		if (fabs(alpha[i]) > 0)
+		{
+			++nSV;
+			if (prob->y[i] > 0)
+			{
+				if (fabs(alpha[i]) >= si.upper_bound_p)
+					++nBSV;
+			}
+			else
+			{
+				if (fabs(alpha[i]) >= si.upper_bound_n)
+					++nBSV;
+			}
+		}
+	}
+
+	info("nSV = %d, nBSV = %d\n",nSV,nBSV);
+
+	decision_function f;
+	f.alpha = alpha;
+	f.rho = si.rho;
+	return f;
+}
+
+//
+// svm_model
+//
+struct svm_model
+{
+	svm_parameter param;	// parameter
+	int nr_class;		// number of classes, = 2 in regression/one class svm
+	int l;			// total #SV
+	svm_node **SV;		// SVs (SV[l])
+	double **sv_coef;	// coefficients for SVs in decision functions (sv_coef[n-1][l])
+	double *rho;		// constants in decision functions (rho[n*(n-1)/2])
+	double *probA;          // pariwise probability information
+	double *probB;
+
+	// for classification only
+
+	int *label;		// label of each class (label[n])
+	int *nSV;		// number of SVs for each class (nSV[n])
+				// nSV[0] + nSV[1] + ... + nSV[n-1] = l
+	// XXX
+	int free_sv;		// 1 if svm_model is created by svm_load_model
+				// 0 if svm_model is created by svm_train
+};
+
+// Platt's binary SVM Probablistic Output: an improvement from Lin et al.
+void sigmoid_train(
+	int l, const double *dec_values, const double *labels, 
+	double& A, double& B)
+{
+	double prior1=0, prior0 = 0;
+	int i;
+
+	for (i=0;i<l;i++)
+		if (labels[i] > 0) prior1+=1;
+		else prior0+=1;
+	
+	int max_iter=100; 	// Maximal number of iterations
+	double min_step=1e-10;	// Minimal step taken in line search
+	double sigma=1e-3;	// For numerically strict PD of Hessian
+	double eps=1e-5;
+	double hiTarget=(prior1+1.0)/(prior1+2.0);
+	double loTarget=1/(prior0+2.0);
+	double *t=Malloc(double,l);
+	double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
+	double newA,newB,newf,d1,d2;
+	int iter; 
+	
+	// Initial Point and Initial Fun Value
+	A=0.0; B=log((prior0+1.0)/(prior1+1.0));
+	double fval = 0.0;
+
+	for (i=0;i<l;i++)
+	{
+		if (labels[i]>0) t[i]=hiTarget;
+		else t[i]=loTarget;
+		fApB = dec_values[i]*A+B;
+		if (fApB>=0)
+			fval += t[i]*fApB + log(1+exp(-fApB));
+		else
+			fval += (t[i] - 1)*fApB +log(1+exp(fApB));
+	}
+	for (iter=0;iter<max_iter;iter++)
+	{
+		// Update Gradient and Hessian (use H' = H + sigma I)
+		h11=sigma; // numerically ensures strict PD
+		h22=sigma;
+		h21=0.0;g1=0.0;g2=0.0;
+		for (i=0;i<l;i++)
+		{
+			fApB = dec_values[i]*A+B;
+			if (fApB >= 0)
+			{
+				p=exp(-fApB)/(1.0+exp(-fApB));
+				q=1.0/(1.0+exp(-fApB));
+			}
+			else
+			{
+				p=1.0/(1.0+exp(fApB));
+				q=exp(fApB)/(1.0+exp(fApB));
+			}
+			d2=p*q;
+			h11+=dec_values[i]*dec_values[i]*d2;
+			h22+=d2;
+			h21+=dec_values[i]*d2;
+			d1=t[i]-p;
+			g1+=dec_values[i]*d1;
+			g2+=d1;
+		}
+
+		// Stopping Criteria
+		if (fabs(g1)<eps && fabs(g2)<eps)
+			break;
+
+		// Finding Newton direction: -inv(H') * g
+		det=h11*h22-h21*h21;
+		dA=-(h22*g1 - h21 * g2) / det;
+		dB=-(-h21*g1+ h11 * g2) / det;
+		gd=g1*dA+g2*dB;
+
+
+		stepsize = 1; 		// Line Search
+		while (stepsize >= min_step)
+		{
+			newA = A + stepsize * dA;
+			newB = B + stepsize * dB;
+
+			// New function value
+			newf = 0.0;
+			for (i=0;i<l;i++)
+			{
+				fApB = dec_values[i]*newA+newB;
+				if (fApB >= 0)
+					newf += t[i]*fApB + log(1+exp(-fApB));
+				else
+					newf += (t[i] - 1)*fApB +log(1+exp(fApB));
+			}
+			// Check sufficient decrease
+			if (newf<fval+0.0001*stepsize*gd)
+			{
+				A=newA;B=newB;fval=newf;
+				break;
+			}
+			else
+				stepsize = stepsize / 2.0;
+		}
+
+		if (stepsize < min_step)
+		{
+			info("Line search fails in two-class probability estimates\n");
+			break;
+		}
+	}
+
+	if (iter>=max_iter)
+		info("Reaching maximal iterations in two-class probability estimates\n");
+	free(t);
+}
+
+double sigmoid_predict(double decision_value, double A, double B)
+{
+	double fApB = decision_value*A+B;
+	if (fApB >= 0)
+		return exp(-fApB)/(1.0+exp(-fApB));
+	else
+		return 1.0/(1+exp(fApB)) ;
+}
+
+// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
+void multiclass_probability(int k, double **r, double *p)
+{
+	int t,j;
+	int iter = 0, max_iter=100;
+	double **Q=Malloc(double *,k);
+	double *Qp=Malloc(double,k);
+	double pQp, eps=0.005/k;
+	
+	for (t=0;t<k;t++)
+	{
+		p[t]=1.0/k;  // Valid if k = 1
+		Q[t]=Malloc(double,k);
+		Q[t][t]=0;
+		for (j=0;j<t;j++)
+		{
+			Q[t][t]+=r[j][t]*r[j][t];
+			Q[t][j]=Q[j][t];
+		}
+		for (j=t+1;j<k;j++)
+		{
+			Q[t][t]+=r[j][t]*r[j][t];
+			Q[t][j]=-r[j][t]*r[t][j];
+		}
+	}
+	for (iter=0;iter<max_iter;iter++)
+	{
+		// stopping condition, recalculate QP,pQP for numerical accuracy
+		pQp=0;
+		for (t=0;t<k;t++)
+		{
+			Qp[t]=0;
+			for (j=0;j<k;j++)
+				Qp[t]+=Q[t][j]*p[j];
+			pQp+=p[t]*Qp[t];
+		}
+		double max_error=0;
+		for (t=0;t<k;t++)
+		{
+			double error=fabs(Qp[t]-pQp);
+			if (error>max_error)
+				max_error=error;
+		}
+		if (max_error<eps) break;
+		
+		for (t=0;t<k;t++)
+		{
+			double diff=(-Qp[t]+pQp)/Q[t][t];
+			p[t]+=diff;
+			pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
+			for (j=0;j<k;j++)
+			{
+				Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
+				p[j]/=(1+diff);
+			}
+		}
+	}
+	if (iter>=max_iter)
+		info("Exceeds max_iter in multiclass_prob\n");
+	for (t=0;t<k;t++) free(Q[t]);
+	free(Q);
+	free(Qp);
+}
+
+// Cross-validation decision values for probability estimates
+void svm_binary_svc_probability(
+	const svm_problem *prob, const svm_parameter *param,
+	double Cp, double Cn, double& probA, double& probB)
+{
+	int i;
+	int nr_fold = 5;
+	int *perm = Malloc(int,prob->l);
+	double *dec_values = Malloc(double,prob->l);
+
+	// random shuffle
+	for (i=0;i<prob->l;i++) perm[i]=i;
+	for (i=0;i<prob->l;i++)
+	{
+		int j = i+rand()%(prob->l-i);
+		swap(perm[i],perm[j]);
+	}
+	for (i=0;i<nr_fold;i++)
+	{
+		int begin = i*prob->l/nr_fold;
+		int end = (i+1)*prob->l/nr_fold;
+		int j,k;
+		struct svm_problem subprob;
+
+		subprob.l = prob->l-(end-begin);
+		subprob.x = Malloc(struct svm_node*,subprob.l);
+		subprob.y = Malloc(double,subprob.l);
+			
+		k=0;
+		for (j=0;j<begin;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		for (j=end;j<prob->l;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		int p_count=0,n_count=0;
+		for (j=0;j<k;j++)
+			if (subprob.y[j]>0)
+				p_count++;
+			else
+				n_count++;
+
+		if (p_count==0 && n_count==0)
+			for (j=begin;j<end;j++)
+				dec_values[perm[j]] = 0;
+		else if (p_count > 0 && n_count == 0)
+			for (j=begin;j<end;j++)
+				dec_values[perm[j]] = 1;
+		else if (p_count == 0 && n_count > 0)
+			for (j=begin;j<end;j++)
+				dec_values[perm[j]] = -1;
+		else
+		{
+			svm_parameter subparam = *param;
+			subparam.probability=0;
+			subparam.C=1.0;
+			subparam.nr_weight=2;
+			subparam.weight_label = Malloc(int,2);
+			subparam.weight = Malloc(double,2);
+			subparam.weight_label[0]=+1;
+			subparam.weight_label[1]=-1;
+			subparam.weight[0]=Cp;
+			subparam.weight[1]=Cn;
+			struct svm_model *submodel = svm_train(&subprob,&subparam);
+			for (j=begin;j<end;j++)
+			{
+				svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
+				// ensure +1 -1 order; reason not using CV subroutine
+				dec_values[perm[j]] *= submodel->label[0];
+			}		
+			svm_destroy_model(submodel);
+			svm_destroy_param(&subparam);
+			free(subprob.x);
+			free(subprob.y);
+		}
+	}		
+	sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
+	free(dec_values);
+	free(perm);
+}
+
+// Return parameter of a Laplace distribution 
+double svm_svr_probability(
+	const svm_problem *prob, const svm_parameter *param)
+{
+	int i;
+	int nr_fold = 5;
+	double *ymv = Malloc(double,prob->l);
+	double mae = 0;
+
+	svm_parameter newparam = *param;
+	newparam.probability = 0;
+	svm_cross_validation(prob,&newparam,nr_fold,ymv);
+	for (i=0;i<prob->l;i++)
+	{
+		ymv[i]=prob->y[i]-ymv[i];
+		mae += fabs(ymv[i]);
+	}		
+	mae /= prob->l;
+	double std=sqrt(2*mae*mae);
+	int count=0;
+	mae=0;
+	for (i=0;i<prob->l;i++)
+	        if (fabs(ymv[i]) > 5*std) 
+                        count=count+1;
+		else 
+		        mae+=fabs(ymv[i]);
+	mae /= (prob->l-count);
+	info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
+	free(ymv);
+	return mae;
+}
+
+
+// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
+// perm, length l, must be allocated before calling this subroutine
+void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
+{
+	int l = prob->l;
+	int max_nr_class = 16;
+	int nr_class = 0;
+	int *label = Malloc(int,max_nr_class);
+	int *count = Malloc(int,max_nr_class);
+	int *data_label = Malloc(int,l);	
+	int i;
+
+	for (i=0;i<l;i++)
+	{
+		int this_label = (int)prob->y[i];
+		int j;
+		for (j=0;j<nr_class;j++)
+		{
+			if (this_label == label[j])
+			{
+				++count[j];
+				break;
+			}
+		}
+		data_label[i] = j;
+		if (j == nr_class)
+		{
+			if (nr_class == max_nr_class)
+			{
+				max_nr_class *= 2;
+				label = (int *)realloc(label,max_nr_class*sizeof(int));
+				count = (int *)realloc(count,max_nr_class*sizeof(int));
+			}
+			label[nr_class] = this_label;
+			count[nr_class] = 1;
+			++nr_class;
+		}
+	}
+
+	int *start = Malloc(int,nr_class);
+	start[0] = 0;
+	for (i=1;i<nr_class;i++)
+		start[i] = start[i-1]+count[i-1];
+	for (i=0;i<l;i++)
+	{
+		perm[start[data_label[i]]] = i;
+		++start[data_label[i]];
+	}
+	start[0] = 0;
+	for (i=1;i<nr_class;i++)
+		start[i] = start[i-1]+count[i-1];
+
+	*nr_class_ret = nr_class;
+	*label_ret = label;
+	*start_ret = start;
+	*count_ret = count;
+	free(data_label);
+}
+
+//
+// Interface functions
+//
+svm_model* svm_train(const svm_problem *prob, const svm_parameter *param)
+{
+	svm_model *model = Malloc(svm_model,1);
+	model->param = *param;
+	model->free_sv = 0;	// XXX
+
+	if (param->svm_type == ONE_CLASS || param->svm_type == EPSILON_SVR || param->svm_type == NU_SVR)
+	{
+		// regression or one-class-svm
+		model->nr_class = 2;
+		model->label = NULL;
+		model->nSV = NULL;
+		model->probA = NULL; model->probB = NULL;
+		model->sv_coef = Malloc(double *,1);
+
+		if (param->probability && 
+		   (param->svm_type == EPSILON_SVR ||
+		    param->svm_type == NU_SVR))
+		{
+			model->probA = Malloc(double,1);
+			model->probA[0] = svm_svr_probability(prob,param);
+		}
+
+		decision_function f = svm_train_one(prob,param,0,0);
+		model->rho = Malloc(double,1);
+		model->rho[0] = f.rho;
+
+		int nSV = 0;
+		int i;
+		for (i=0;i<prob->l;i++)
+        {
+			if (fabs(f.alpha[i]) > 0) 
+            {
+                ++nSV;
+            }
+        }
+		model->l = nSV;
+		model->SV = Malloc(svm_node*, nSV);
+		model->sv_coef[0] = Malloc(double,nSV);
+		int j = 0;
+		for (i=0;i<prob->l;i++)
+        {
+			if (fabs(f.alpha[i]) > 0)
+			{
+				model->SV[j] = prob->x[i];
+				model->sv_coef[0][j] = f.alpha[i];
+				++j;
+			}		
+        }
+		free(f.alpha);
+	}
+	else
+	{
+		// classification
+		int l = prob->l;
+		int nr_class;
+		int *label = NULL;
+		int *start = NULL;
+		int *count = NULL;
+		int *perm = Malloc(int,l);
+
+		// group training data of the same class
+		svm_group_classes(prob,&nr_class,&label,&start,&count,perm);		
+		svm_node** x = Malloc(svm_node *,l);
+		int i;
+		for (i=0;i<l;i++)
+        {
+			x[i] = prob->x[perm[i]];
+        }
+
+		// calculate weighted C
+
+		double *weighted_C = Malloc(double, nr_class);
+		for (i=0;i<nr_class;i++)
+        {
+			weighted_C[i] = param->C;
+        }
+		for (i=0;i<param->nr_weight;i++)
+		{	
+			int j;
+			for (j=0;j<nr_class;j++)
+				if (param->weight_label[i] == label[j])
+					break;
+			if (j == nr_class)
+            {
+				fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
+            }
+			else
+            {
+				weighted_C[j] *= param->weight[i];
+            }
+		}
+
+		// train k*(k-1)/2 models
+		
+		bool *nonzero = Malloc(bool,l);
+		for (i=0;i<l;i++)
+        {
+			nonzero[i] = false;
+        }
+		decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
+
+		double *probA=NULL,*probB=NULL;
+		if (param->probability)
+		{
+			probA=Malloc(double,nr_class*(nr_class-1)/2);
+			probB=Malloc(double,nr_class*(nr_class-1)/2);
+		}
+
+		int p = 0;
+		for (i=0;i<nr_class;i++)
+			for (int j=i+1;j<nr_class;j++)
+			{
+				svm_problem sub_prob;
+				int si = start[i], sj = start[j];
+				int ci = count[i], cj = count[j];
+				sub_prob.l = ci+cj;
+				sub_prob.x = Malloc(svm_node *,sub_prob.l);
+				sub_prob.y = Malloc(double,sub_prob.l);
+				int k;
+				for (k=0;k<ci;k++)
+				{
+					sub_prob.x[k] = x[si+k];
+					sub_prob.y[k] = +1;
+				}
+				for (k=0;k<cj;k++)
+				{
+					sub_prob.x[ci+k] = x[sj+k];
+					sub_prob.y[ci+k] = -1;
+				}
+
+				if (param->probability)
+					svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
+
+				f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
+				for (k=0;k<ci;k++)
+					if (!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
+						nonzero[si+k] = true;
+				for (k=0;k<cj;k++)
+					if (!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
+						nonzero[sj+k] = true;
+				free(sub_prob.x);
+				free(sub_prob.y);
+				++p;
+			}
+
+		// build output
+
+		model->nr_class = nr_class;
+		
+		model->label = Malloc(int,nr_class);
+		for (i=0; i<nr_class; i++)
+        {
+			model->label[i] = label[i];
+        }
+		
+		model->rho = Malloc(double,nr_class*(nr_class-1)/2);
+		for (i=0;i<nr_class*(nr_class-1)/2;i++)
+        {
+			model->rho[i] = f[i].rho;
+        }
+
+		if (param->probability)
+		{
+			model->probA = Malloc(double,nr_class*(nr_class-1)/2);
+			model->probB = Malloc(double,nr_class*(nr_class-1)/2);
+			for (i=0;i<nr_class*(nr_class-1)/2;i++)
+			{
+				model->probA[i] = probA[i];
+				model->probB[i] = probB[i];
+			}
+		}
+		else
+		{
+			model->probA=NULL;
+			model->probB=NULL;
+		}
+
+		int total_sv = 0;
+		int* nz_count = Malloc(int,nr_class);
+		model->nSV = Malloc(int,nr_class);
+		for (i=0;i<nr_class;i++)
+		{
+			int nSV = 0;
+			for (int j=0;j<count[i];j++)
+            {
+				if (nonzero[start[i]+j])
+				{	
+					++nSV;
+					++total_sv;
+				}
+            }
+			model->nSV[i] = nSV;
+			nz_count[i] = nSV;
+		}
+		
+		info("Total nSV = %d\n",total_sv);
+
+		model->l = total_sv;
+		model->SV = Malloc(svm_node *,total_sv);
+		p = 0;
+		for (i=0;i<l;i++)
+			if (nonzero[i]) model->SV[p++] = x[i];
+
+		int *nz_start = Malloc(int,nr_class);
+		nz_start[0] = 0;
+		for (i=1;i<nr_class;i++)
+			nz_start[i] = nz_start[i-1]+nz_count[i-1];
+
+		model->sv_coef = Malloc(double *,nr_class-1);
+		for (i=0;i<nr_class-1;i++)
+			model->sv_coef[i] = Malloc(double,total_sv);
+
+		p = 0;
+		for (i=0;i<nr_class;i++)
+			for (int j=i+1;j<nr_class;j++)
+			{
+				// classifier (i,j): coefficients with
+				// i are in sv_coef[j-1][nz_start[i]...],
+				// j are in sv_coef[i][nz_start[j]...]
+
+				int si = start[i];
+				int sj = start[j];
+				int ci = count[i];
+				int cj = count[j];
+				
+				int q = nz_start[i];
+				int k;
+				for (k=0;k<ci;k++)
+					if (nonzero[si+k])
+						model->sv_coef[j-1][q++] = f[p].alpha[k];
+				q = nz_start[j];
+				for (k=0;k<cj;k++)
+					if (nonzero[sj+k])
+						model->sv_coef[i][q++] = f[p].alpha[ci+k];
+				++p;
+			}
+		
+		free(label);
+		free(probA);
+		free(probB);
+		free(count);
+		free(perm);
+		free(start);
+		free(x);
+		free(weighted_C);
+		free(nonzero);
+		for (i=0;i<nr_class*(nr_class-1)/2;i++)
+			free(f[i].alpha);
+		free(f);
+		free(nz_count);
+		free(nz_start);
+	}
+	return model;
+}
+
+// Stratified cross validation
+void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
+{
+	int i;
+	int *fold_start = Malloc(int,nr_fold+1);
+	int l = prob->l;
+	int *perm = Malloc(int,l);
+	int nr_class;
+
+	// stratified cv may not give leave-one-out rate
+	// Each class to l folds -> some folds may have zero elements
+	if ((param->svm_type == C_SVC ||
+	    param->svm_type == NU_SVC) && nr_fold < l)
+	{
+		int *start = NULL;
+		int *label = NULL;
+		int *count = NULL;
+		svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
+
+		// random shuffle and then data grouped by fold using the array perm
+		int *fold_count = Malloc(int,nr_fold);
+		int c;
+		int *index = Malloc(int,l);
+		for (i=0;i<l;i++)
+			index[i]=perm[i];
+		for (c=0; c<nr_class; c++) 
+			for (i=0;i<count[c];i++)
+			{
+				int j = i+rand()%(count[c]-i);
+				swap(index[start[c]+j],index[start[c]+i]);
+			}
+		for (i=0;i<nr_fold;i++)
+		{
+			fold_count[i] = 0;
+			for (c=0; c<nr_class;c++)
+				fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
+		}
+		fold_start[0]=0;
+		for (i=1;i<=nr_fold;i++)
+			fold_start[i] = fold_start[i-1]+fold_count[i-1];
+		for (c=0; c<nr_class;c++)
+			for (i=0;i<nr_fold;i++)
+			{
+				int begin = start[c]+i*count[c]/nr_fold;
+				int end = start[c]+(i+1)*count[c]/nr_fold;
+				for (int j=begin;j<end;j++)
+				{
+					perm[fold_start[i]] = index[j];
+					fold_start[i]++;
+				}
+			}
+		fold_start[0]=0;
+		for (i=1;i<=nr_fold;i++)
+			fold_start[i] = fold_start[i-1]+fold_count[i-1];
+		free(start);	
+		free(label);
+		free(count);	
+		free(index);
+		free(fold_count);
+	}
+	else
+	{
+		for (i=0;i<l;i++) perm[i]=i;
+		for (i=0;i<l;i++)
+		{
+			int j = i+rand()%(l-i);
+			swap(perm[i],perm[j]);
+		}
+		for (i=0;i<=nr_fold;i++)
+			fold_start[i]=i*l/nr_fold;
+	}
+
+	for (i=0;i<nr_fold;i++)
+	{
+		int begin = fold_start[i];
+		int end = fold_start[i+1];
+		int j,k;
+		struct svm_problem subprob;
+
+		subprob.l = l-(end-begin);
+		subprob.x = Malloc(struct svm_node*,subprob.l);
+		subprob.y = Malloc(double,subprob.l);
+			
+		k=0;
+		for (j=0;j<begin;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		for (j=end;j<l;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		struct svm_model *submodel = svm_train(&subprob,param);
+		if (param->probability && 
+		   (param->svm_type == C_SVC || param->svm_type == NU_SVC))
+		{
+			double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
+			for (j=begin;j<end;j++)
+				target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
+			free(prob_estimates);			
+		}
+		else
+			for (j=begin;j<end;j++)
+				target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
+		svm_destroy_model(submodel);
+		free(subprob.x);
+		free(subprob.y);
+	}		
+	free(fold_start);
+	free(perm);	
+}
+
+
+int svm_get_svm_type(const svm_model *model)
+{
+	return model->param.svm_type;
+}
+
+int svm_get_nr_class(const svm_model *model)
+{
+	return model->nr_class;
+}
+
+void svm_get_labels(const svm_model *model, int* label)
+{
+	if (model->label != NULL)
+    {
+		for (int i=0;i<model->nr_class;i++)
+        {
+			label[i] = model->label[i];
+        }
+    }
+}
+
+double svm_get_svr_probability(const svm_model *model)
+{
+	if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
+	    model->probA!=NULL)
+		return model->probA[0];
+	else
+	{
+		info("Model doesn't contain information for SVR probability inference\n");
+		return 0;
+	}
+}
+
+void svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
+{
+	if (model->param.svm_type == ONE_CLASS ||
+	   model->param.svm_type == EPSILON_SVR ||
+	   model->param.svm_type == NU_SVR)
+	{
+		double *sv_coef = model->sv_coef[0];
+		double sum = 0;
+		for (int i=0;i<model->l;i++)
+			sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
+		sum -= model->rho[0];
+		*dec_values = sum;
+	}
+	else
+	{
+		int i;
+		int nr_class = model->nr_class;
+		int l = model->l;
+		
+		double *kvalue = Malloc(double,l);
+		for (i=0;i<l;i++)
+			kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
+
+		int *start = Malloc(int,nr_class);
+		start[0] = 0;
+		for (i=1;i<nr_class;i++)
+			start[i] = start[i-1]+model->nSV[i-1];
+
+		int p=0;
+		int pos=0;
+		for (i=0;i<nr_class;i++)
+			for (int j=i+1;j<nr_class;j++)
+			{
+				double sum = 0;
+				int si = start[i];
+				int sj = start[j];
+				int ci = model->nSV[i];
+				int cj = model->nSV[j];
+				
+				int k;
+				double *coef1 = model->sv_coef[j-1];
+				double *coef2 = model->sv_coef[i];
+				for (k=0;k<ci;k++)
+					sum += coef1[si+k] * kvalue[si+k];
+				for (k=0;k<cj;k++)
+					sum += coef2[sj+k] * kvalue[sj+k];
+				sum -= model->rho[p++];
+				dec_values[pos++] = sum;
+			}
+
+		free(kvalue);
+		free(start);
+	}
+}
+
+double svm_predict(const svm_model *model, const svm_node *x)
+{
+	if (model->param.svm_type == ONE_CLASS ||
+	   model->param.svm_type == EPSILON_SVR ||
+	   model->param.svm_type == NU_SVR)
+	{
+		double res;
+		svm_predict_values(model, x, &res);
+		
+		if (model->param.svm_type == ONE_CLASS)
+			return (res>0)?1:-1;
+		else
+			return res;
+	}
+	else
+	{
+		int i;
+		int nr_class = model->nr_class;
+		double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
+		svm_predict_values(model, x, dec_values);
+
+		int *vote = Malloc(int,nr_class);
+		for (i=0;i<nr_class;i++)
+			vote[i] = 0;
+		int pos=0;
+		for (i=0;i<nr_class;i++)
+			for (int j=i+1;j<nr_class;j++)
+			{
+				if (dec_values[pos++] > 0)
+					++vote[i];
+				else
+					++vote[j];
+			}
+
+		int vote_max_idx = 0;
+		for (i=1;i<nr_class;i++)
+			if (vote[i] > vote[vote_max_idx])
+				vote_max_idx = i;
+		free(vote);
+		free(dec_values);
+		return model->label[vote_max_idx];
+	}
+}
+
+double svm_predict_probability(
+	const svm_model *model, const svm_node *x, double *prob_estimates)
+{
+	if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
+	    model->probA!=NULL && model->probB!=NULL)
+	{
+		int i;
+		int nr_class = model->nr_class;
+		double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
+		svm_predict_values(model, x, dec_values);
+
+		double min_prob=1e-7;
+		double **pairwise_prob=Malloc(double *,nr_class);
+		for (i=0;i<nr_class;i++)
+			pairwise_prob[i]=Malloc(double,nr_class);
+		int k=0;
+		for (i=0;i<nr_class;i++)
+			for (int j=i+1;j<nr_class;j++)
+			{
+				pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
+				pairwise_prob[j][i]=1-pairwise_prob[i][j];
+				k++;
+			}
+		multiclass_probability(nr_class,pairwise_prob,prob_estimates);
+
+		int prob_max_idx = 0;
+		for (i=1;i<nr_class;i++)
+			if (prob_estimates[i] > prob_estimates[prob_max_idx])
+				prob_max_idx = i;
+		for (i=0;i<nr_class;i++)
+			free(pairwise_prob[i]);
+		free(dec_values);
+                free(pairwise_prob);	     
+		return model->label[prob_max_idx];
+	}
+	else 
+		return svm_predict(model, x);
+}
+
+const char *svm_type_table[] =
+{
+	"c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
+};
+
+const char *kernel_type_table[]=
+{
+	"linear","polynomial","rbf","sigmoid",NULL
+};
+
+int svm_save_model(const char *model_file_name, const svm_model *model)
+{
+	FILE *fp = fopen(model_file_name,"w");
+	if (fp==NULL) return -1;
+
+	const svm_parameter& param = model->param;
+
+	fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
+	fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
+
+	if (param.kernel_type == POLY)
+		fprintf(fp,"degree %g\n", param.degree);
+
+	if (param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
+		fprintf(fp,"gamma %g\n", param.gamma);
+
+	if (param.kernel_type == POLY || param.kernel_type == SIGMOID)
+		fprintf(fp,"coef0 %g\n", param.coef0);
+
+	int nr_class = model->nr_class;
+	int l = model->l;
+	fprintf(fp, "nr_class %d\n", nr_class);
+	fprintf(fp, "total_sv %d\n",l);
+	
+	{
+		fprintf(fp, "rho");
+		for (int i=0;i<nr_class*(nr_class-1)/2;i++)
+			fprintf(fp," %g",model->rho[i]);
+		fprintf(fp, "\n");
+	}
+	
+	if (model->label)
+	{
+		fprintf(fp, "label");
+		for (int i=0;i<nr_class;i++)
+			fprintf(fp," %d",model->label[i]);
+		fprintf(fp, "\n");
+	}
+
+	if (model->probA) // regression has probA only
+	{
+		fprintf(fp, "probA");
+		for (int i=0;i<nr_class*(nr_class-1)/2;i++)
+			fprintf(fp," %g",model->probA[i]);
+		fprintf(fp, "\n");
+	}
+	if (model->probB)
+	{
+		fprintf(fp, "probB");
+		for (int i=0;i<nr_class*(nr_class-1)/2;i++)
+			fprintf(fp," %g",model->probB[i]);
+		fprintf(fp, "\n");
+	}
+
+	if (model->nSV)
+	{
+		fprintf(fp, "nr_sv");
+		for (int i=0;i<nr_class;i++)
+			fprintf(fp," %d",model->nSV[i]);
+		fprintf(fp, "\n");
+	}
+
+	fprintf(fp, "SV\n");
+	const double * const *sv_coef = model->sv_coef;
+	const svm_node * const *SV = model->SV;
+
+	for (int i=0;i<l;i++)
+	{
+		for (int j=0;j<nr_class-1;j++)
+			fprintf(fp, "%.16g ",sv_coef[j][i]);
+
+		const svm_node *p = SV[i];
+		while(p->index != -1)
+		{
+			fprintf(fp,"%d:%.8g ",p->index,p->value);
+			p++;
+		}
+		fprintf(fp, "\n");
+	}
+
+	fclose(fp);
+	return 0;
+}
+
+svm_model *svm_load_model(const char *model_file_name)
+{
+	FILE *fp = fopen(model_file_name,"rb");
+	if (fp==NULL) return NULL;
+	
+	// read parameters
+
+	svm_model *model = Malloc(svm_model,1);
+	svm_parameter& param = model->param;
+	model->rho = NULL;
+	model->probA = NULL;
+	model->probB = NULL;
+	model->label = NULL;
+	model->nSV = NULL;
+
+	char cmd[81];
+	while(1)
+	{
+		fscanf(fp,"%80s",cmd);
+
+		if (strcmp(cmd,"svm_type")==0)
+		{
+			fscanf(fp,"%80s",cmd);
+			int i;
+			for (i=0;svm_type_table[i];i++)
+			{
+				if (strcmp(svm_type_table[i],cmd)==0)
+				{
+					param.svm_type=i;
+					break;
+				}
+			}
+			if (svm_type_table[i] == NULL)
+			{
+				fprintf(stderr,"unknown svm type.\n");
+				free(model->rho);
+				free(model->label);
+				free(model->nSV);
+				free(model);
+				return NULL;
+			}
+		}
+		else if (strcmp(cmd,"kernel_type")==0)
+		{		
+			fscanf(fp,"%80s",cmd);
+			int i;
+			for (i=0;kernel_type_table[i];i++)
+			{
+				if (strcmp(kernel_type_table[i],cmd)==0)
+				{
+					param.kernel_type=i;
+					break;
+				}
+			}
+			if (kernel_type_table[i] == NULL)
+			{
+				fprintf(stderr,"unknown kernel function.\n");
+				free(model->rho);
+				free(model->label);
+				free(model->nSV);
+				free(model);
+				return NULL;
+			}
+		}
+		else if (strcmp(cmd,"degree")==0)
+			fscanf(fp,"%lf",&param.degree);
+		else if (strcmp(cmd,"gamma")==0)
+			fscanf(fp,"%lf",&param.gamma);
+		else if (strcmp(cmd,"coef0")==0)
+			fscanf(fp,"%lf",&param.coef0);
+		else if (strcmp(cmd,"nr_class")==0)
+			fscanf(fp,"%d",&model->nr_class);
+		else if (strcmp(cmd,"total_sv")==0)
+			fscanf(fp,"%d",&model->l);
+		else if (strcmp(cmd,"rho")==0)
+		{
+			int n = model->nr_class * (model->nr_class-1)/2;
+			model->rho = Malloc(double,n);
+			for (int i=0;i<n;i++)
+				fscanf(fp,"%lf",&model->rho[i]);
+		}
+		else if (strcmp(cmd,"label")==0)
+		{
+			int n = model->nr_class;
+			model->label = Malloc(int,n);
+			for (int i=0;i<n;i++)
+				fscanf(fp,"%d",&model->label[i]);
+		}
+		else if (strcmp(cmd,"probA")==0)
+		{
+			int n = model->nr_class * (model->nr_class-1)/2;
+			model->probA = Malloc(double,n);
+			for (int i=0;i<n;i++)
+				fscanf(fp,"%lf",&model->probA[i]);
+		}
+		else if (strcmp(cmd,"probB")==0)
+		{
+			int n = model->nr_class * (model->nr_class-1)/2;
+			model->probB = Malloc(double,n);
+			for (int i=0;i<n;i++)
+				fscanf(fp,"%lf",&model->probB[i]);
+		}
+		else if (strcmp(cmd,"nr_sv")==0)
+		{
+			int n = model->nr_class;
+			model->nSV = Malloc(int,n);
+			for (int i=0;i<n;i++)
+				fscanf(fp,"%d",&model->nSV[i]);
+		}
+		else if (strcmp(cmd,"SV")==0)
+		{
+			while(1)
+			{
+				int c = getc(fp);
+				if (c==EOF || c=='\n') break;	
+			}
+			break;
+		}
+		else
+		{
+			fprintf(stderr,"unknown text in model file\n");
+			free(model->rho);
+			free(model->label);
+			free(model->nSV);
+			free(model);
+			return NULL;
+		}
+	}
+
+	// read sv_coef and SV
+
+	int elements = 0;
+	long pos = ftell(fp);
+
+	while(1)
+	{
+		int c = fgetc(fp);
+		switch(c)
+		{
+			case '\n':
+				// count the '-1' element
+			case ':':
+				++elements;
+				break;
+			case EOF:
+				goto out;
+			default:
+				;
+		}
+	}
+out:
+	fseek(fp,pos,SEEK_SET);
+
+	int m = model->nr_class - 1;
+	int l = model->l;
+	model->sv_coef = Malloc(double *,m);
+	int i;
+	for (i=0;i<m;i++)
+		model->sv_coef[i] = Malloc(double,l);
+	model->SV = Malloc(svm_node*,l);
+	svm_node *x_space=NULL;
+	if (l>0) x_space = Malloc(svm_node,elements);
+
+	int j=0;
+	for (i=0;i<l;i++)
+	{
+		model->SV[i] = &x_space[j];
+		for (int k=0;k<m;k++)
+			fscanf(fp,"%lf",&model->sv_coef[k][i]);
+		while(1)
+		{
+			int c;
+			do {
+				c = getc(fp);
+				if (c=='\n') goto out2;
+			} while(isspace(c));
+			ungetc(c,fp);
+			fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));
+			++j;
+		}	
+out2:
+		x_space[j++].index = -1;
+	}
+
+	fclose(fp);
+
+	model->free_sv = 1;	// XXX
+	return model;
+}
+
+void svm_destroy_model(svm_model* model)
+{
+	if (model->free_sv && model->l > 0)
+		free((void *)(model->SV[0]));
+	for (int i=0;i<model->nr_class-1;i++)
+		free(model->sv_coef[i]);
+	free(model->SV);
+	free(model->sv_coef);
+	free(model->rho);
+	free(model->label);
+	free(model->probA);
+	free(model->probB);
+	free(model->nSV);
+	free(model);
+}
+
+void svm_destroy_param(svm_parameter* param)
+{
+	free(param->weight_label);
+	free(param->weight);
+}
+
+const char* svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
+{
+	// svm_type
+
+	int svm_type = param->svm_type;
+	if (svm_type != C_SVC &&
+	   svm_type != NU_SVC &&
+	   svm_type != ONE_CLASS &&
+	   svm_type != EPSILON_SVR &&
+	   svm_type != NU_SVR)
+    {
+		return "unknown svm type";
+    }
+	
+	// kernel_type
+	
+	int kernel_type = param->kernel_type;
+	if (kernel_type != LINEAR &&
+	   kernel_type != POLY &&
+	   kernel_type != RBF &&
+	   kernel_type != SIGMOID)
+    {
+		return "unknown kernel type";
+    }
+
+	// cache_size,eps,C,nu,p,shrinking
+
+	if (param->cache_size <= 0)
+    {
+		return "cache_size <= 0";
+    }
+
+	if (param->eps <= 0)
+    {
+		return "eps <= 0";
+    }
+
+	if (svm_type == C_SVC ||
+	   svm_type == EPSILON_SVR ||
+	   svm_type == NU_SVR)
+    {
+		if (param->C <= 0)
+        {
+			return "C <= 0";
+        }
+    }
+
+	if (svm_type == NU_SVC ||
+	   svm_type == ONE_CLASS ||
+	   svm_type == NU_SVR)
+    {
+		if (param->nu < 0 || param->nu > 1)
+			return "nu < 0 or nu > 1";
+    }
+
+	if (svm_type == EPSILON_SVR)
+    {
+		if (param->p < 0)
+        {
+            return "p < 0";
+        }
+    }
+
+	if (param->shrinking != 0 &&
+	   param->shrinking != 1)
+    {
+		return "shrinking != 0 and shrinking != 1";
+    }
+
+	if (param->probability != 0 &&
+	   param->probability != 1)
+    {
+		return "probability != 0 and probability != 1";
+    }
+
+	if (param->probability == 1 &&
+	   svm_type == ONE_CLASS)
+    {
+		return "one-class SVM probability output not supported yet";
+    }
+
+
+	// check whether nu-svc is feasible
+	
+	if (svm_type == NU_SVC)
+	{
+		int l = prob->l;
+		int max_nr_class = 16;
+		int nr_class = 0;
+		int *label = Malloc(int,max_nr_class);
+		int *count = Malloc(int,max_nr_class);
+
+		int i;
+		for (i=0; i<l; i++)
+		{
+			int this_label = (int)prob->y[i];
+			int j;
+			for (j=0;j<nr_class;j++)
+				if (this_label == label[j])
+				{
+					++count[j];
+					break;
+				}
+			if (j == nr_class)
+			{
+				if (nr_class == max_nr_class)
+				{
+					max_nr_class *= 2;
+					label = (int *)realloc(label,max_nr_class*sizeof(int));
+					count = (int *)realloc(count,max_nr_class*sizeof(int));
+				}
+				label[nr_class] = this_label;
+				count[nr_class] = 1;
+				++nr_class;
+			}
+		}
+	
+		for (i=0;i<nr_class;i++)
+		{
+			int n1 = count[i];
+			for (int j=i+1;j<nr_class;j++)
+			{
+				int n2 = count[j];
+				if (param->nu*(n1+n2)/2 > min(n1,n2))
+				{
+					free(label);
+					free(count);
+					return "specified nu is infeasible";
+				}
+			}
+		}
+		free(label);
+		free(count);
+	}
+
+	return NULL;
+}
+
+int svm_check_probability_model(const svm_model *model)
+{
+	return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
+		model->probA!=NULL && model->probB!=NULL) ||
+		((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
+		 model->probA!=NULL);
+}
diff --git a/PySVM/svm.h b/PySVM/svm.h
new file mode 100644
index 0000000..e613473
--- /dev/null
+++ b/PySVM/svm.h
@@ -0,0 +1,72 @@
+
+
+#ifndef _LIBSVM_H
+#define _LIBSVM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct svm_node
+{
+	int index;
+	double value;
+};
+
+struct svm_problem
+{
+	int l;
+	double *y;
+	struct svm_node **x;
+};
+
+enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR };	/* svm_type */
+enum { LINEAR, POLY, RBF, SIGMOID };	/* kernel_type */
+
+struct svm_parameter
+{
+	int svm_type;
+	int kernel_type;
+	double degree;	/* for poly */
+	double gamma;	/* for poly/rbf/sigmoid */
+	double coef0;	/* for poly/sigmoid */
+
+	/* these are for training only */
+	double cache_size; /* in MB */
+	double eps;	/* stopping criteria */
+	double C;	/* for C_SVC, EPSILON_SVR and NU_SVR */
+	int nr_weight;		/* for C_SVC */
+	int *weight_label;	/* for C_SVC */
+	double* weight;		/* for C_SVC */
+	double nu;	/* for NU_SVC, ONE_CLASS, and NU_SVR */
+	double p;	/* for EPSILON_SVR */
+	int shrinking;	/* use the shrinking heuristics */
+	int probability; /* do probability estimates */
+};
+
+struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
+void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
+
+int svm_save_model(const char *model_file_name, const struct svm_model *model);
+struct svm_model *svm_load_model(const char *model_file_name);
+
+int svm_get_svm_type(const struct svm_model *model);
+int svm_get_nr_class(const struct svm_model *model);
+void svm_get_labels(const struct svm_model *model, int *label);
+double svm_get_svr_probability(const struct svm_model *model);
+
+void svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
+double svm_predict(const struct svm_model *model, const struct svm_node *x);
+double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
+
+void svm_destroy_model(struct svm_model *model);
+void svm_destroy_param(struct svm_parameter *param);
+
+const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
+int svm_check_probability_model(const struct svm_model *model);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LIBSVM_H */
diff --git a/ReleaseFiles.txt b/ReleaseFiles.txt
new file mode 100644
index 0000000..5890795
--- /dev/null
+++ b/ReleaseFiles.txt
@@ -0,0 +1,234 @@
+# This document lists all the files to be included in the
+# standard release of Inspect.  The script Release.py is
+# responsible for parsing this file, copying stuff as
+# needed, and zipping the release.
+
+####################################
+# Inspect source code (to be kept in synch with Makefile):
+base64.c
+BN.c
+BuildMS2DB.c
+ChargeState.c
+CMemLeak.c
+Errors.c
+ExonGraphAlign.c
+FreeMod.c
+main.c
+Mods.c
+MS2DB.c
+ParseInput.c
+ParseXML.c
+PValue.c
+Run.c
+Score.c
+Scorpion.c
+SNP.c
+Spectrum.c
+Spliced.c
+SpliceDB.c
+SpliceScan.c
+SVM.c
+Tagger.c
+Trie.c
+TagFile.c
+Utils.c
+base64.h
+BN.h
+BuildMS2DB.h
+ChargeState.h
+CMemLeak.h
+Errors.h
+ExonGraphAlign.h
+FreeMod.h
+Inspect.h
+Mods.h
+MS2DB.h
+ParseInput.h
+ParseXML.h
+PValue.h
+Run.h
+Score.h
+Scorpion.h
+SNP.h
+Spectrum.h
+Spliced.h
+SpliceDB.h
+SpliceScan.h
+SVM.h
+Tagger.h
+Trie.h
+TagFile.h
+Utils.h
+LDA.c
+LDA.h
+ParentMass.c
+ParentMass.h
+IonScoring.c
+IonScoring.h
+TagFile.c
+TagFile.h
+
+# Other build-specific files:
+Inspect.sln
+Inspect.vcproj
+Makefile
+ReleaseFiles.txt
+BuildInspect.py
+
+####################################
+# Executables and dlls:
+#Inspect.exe
+#libexpat.dll
+
+####################################
+# PyInspect stuff:
+PyInspect.pyd
+PyInspect/PyInspect.c
+PyInspect/PySpectrum.c
+PyInspect/PySpectrum.h
+PyInspect/PyUtils.c
+PyInspect/PyUtils.h
+ReleasePyInspect.py
+
+####################################
+# PySVM stuff:
+PySVM.pyd
+PySVM/PySVM.c
+PySVM/PySVM.sln
+PySVM/PySVM.vcproj
+PySVM/svm-predict.c
+PySVM/svm.cpp
+PySVM/svm.h
+ReleasePySVM.py
+
+####################################
+# Inspect data files:
+AminoAcidMasses.txt
+InVivoModifications.txt
+InVitroModifications.txt
+CCSVM1.model
+CCSVM1.range
+CCSVM2.model
+CCSVM2.range
+Ch2BNPEP.dat
+Ch2BNPEPQ.dat
+Ch3BNPEP.dat
+Ch3BNPEPQ.dat
+IsotopePatterns.txt
+PMCLDA1.model
+PMCLDA2.model
+PMCLDA3.model
+PRM2.dat
+PRM3.dat
+PRMQ2.dat
+PRMQ3.dat
+PTMods.txt
+ScoringModel.dat
+Database/CommonContaminants.fasta
+TagSkewScores.dat
+PRM2.bn
+PRM3.bn
+TAG2.bn
+TAG3.bn
+MQScoreSVM2.model
+MQScoreSVM2.range
+MQScoreSVM3.model
+MQScoreSVM3.range
+MQScoreLDA2.model
+MQScoreLDA3.model
+PhosCut2.bn
+PhosCut3.bn
+CCSVM2Phos.model
+CCSVM2Phos.range
+PMCLDA2Phos.model
+PMCLDA3Phos.model
+
+####################################
+# Documentation:
+docs/Analysis.html
+docs/Copyright.html
+docs/Database.html
+docs/index.html
+docs/Installation.html
+docs/MS2DB.html
+docs/Searching.html
+docs/UnrestrictedSearchTutorial.pdf
+docs/RunningInspectOnTheFWGrid.pdf
+docs/InspectTutorial.pdf
+docs/PLSTutorial.pdf
+
+####################################
+# Analysis scripts:
+BasicStats.py
+Global.py
+Label.py
+LDA.py
+Learning.py
+MakeImage.py
+MSSpectrum.py
+PrepDB.py
+PLSUtils.py
+FDRUtils.py
+ResultsParser.py
+Score.py
+SelectProteins.py
+ShuffleDB.py
+Summary.py
+SystemTest.py
+Utils.py
+ParseXML.py
+InspectToPepXML.py
+ProteinGrouper.py
+TrieUtils.py
+ComputeFScore.py
+ComputeFDR.jar
+BuildMS2DB.jar
+MS2DBShuffler.jar
+
+####################################
+# Old PTM analysis scripts:
+ExplainPTMs.py
+PTMChooserLM.py
+PTMDatabase.txt
+
+####################################
+# New PTM analysis scripts:
+ComputePTMFeatures.py
+CombinePTMFeatures.py
+BuildMGF.py
+PTMSearchBigDB.py
+TrainPTMFeatures.py
+AdjustPTM.py
+BuildConsensusSpectrum.py
+PTMAnalysis.py
+SpectralSimilarity.py
+RunPySVM.py
+CompareHEKPTM.py
+GetByteOffset.py
+StripPTM.py
+PhosphateLocalization.py
+
+####################################
+# System tests:
+SystemTest/TestInput.txt
+SystemTest/TestInputMod.txt
+SystemTest/TestPMC.txt
+SystemTest/TestSpectrum.dta
+SystemTest/TestSpectra.pkl
+SystemTest/TestInputTag1.txt
+SystemTest/TestInputTag3.txt
+SystemTest/SimpleChromosome.trie
+SystemTest/SimpleGenes.gff
+SystemTest/BuildSimpleChromosome.txt
+SystemTest/TestMS2.txt
+SystemTest/Yeast.ms2
+SystemTest/YeastSmall.fasta
+Database/TestDatabase.index
+Database/TestDatabase.trie
+SystemTest/Shew_Short.fasta
+SystemTest/TestCDTA.txt
+SystemTest/Shew_dta.txt
+
+
+# Not for production use:
+#TestSuite.py
diff --git a/ReleasePyInspect.py b/ReleasePyInspect.py
new file mode 100644
index 0000000..de81fb9
--- /dev/null
+++ b/ReleasePyInspect.py
@@ -0,0 +1,67 @@
+#Title:          ReleasePyInspect.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+Script to build PyInspect
+"""
+import sys
+import os
+
+import distutils.core
+
+PyInspectFileNames = [
+    "PyInspect/PyInspect.c", "PyInspect/PySpectrum.c", "PyInspect/PyUtils.c",
+    "base64.c", "BN.c", "BuildMS2DB.c", "ChargeState.c", "CMemLeak.c",
+    "Errors.c", "ExonGraphAlign.c", "FreeMod.c", "IonScoring.c", "LDA.c",
+    "Mods.c", "MS2DB.c", "ParentMass.c", "ParseInput.c", "ParseXML.c", "PValue.c",
+    "Run.c", "Score.c", "Scorpion.c", "SNP.c",
+    "Spectrum.c", "Spliced.c", "SpliceDB.c",
+    "SpliceScan.c", "SVM.c", "Tagger.c", "Trie.c", "Utils.c","TagFile.c"]
+
+def Main(Arguments):
+    print "Prepping PyInspect..."
+    if sys.platform == "win32":
+        LibraryList = ["libexpat"]
+    else:
+        LibraryList = ["expat"]
+        
+    PyInspectExtension = distutils.core.Extension('PyInspect',
+        sources = PyInspectFileNames,
+        include_dirs = [".", "expat/lib"],
+        library_dirs = ["expat/lib/release","pdk_wrapper"], 
+        libraries = LibraryList)
+
+    distutils.core.setup(name = 'PyInspect', version = '1.0', ext_modules=[PyInspectExtension],
+        script_args = Arguments)
+
+if __name__ == "__main__":
+    Main(sys.argv[1:])
diff --git a/ReleasePySVM.py b/ReleasePySVM.py
new file mode 100644
index 0000000..577eaab
--- /dev/null
+++ b/ReleasePySVM.py
@@ -0,0 +1,48 @@
+#Title:          ReleasePySVM.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+Script to build PySVM
+"""
+import sys
+import distutils.core
+
+def Main(Arguments):
+    print "Prepping PySVM..."
+    PySVMFileNames = ['PySVM/PySVM.c',"PySVM/svm-predict.c", "PySVM/svm.cpp",]
+    PySVMExtension = distutils.core.Extension('PySVM', sources = PySVMFileNames)
+    distutils.core.setup(name = 'PySVM', version = '1.0', ext_modules = [PySVMExtension],
+          script_args = Arguments)
+
+if __name__ == "__main__":
+    Main(sys.argv[1:])
+    
diff --git a/ResultsParser.py b/ResultsParser.py
new file mode 100644
index 0000000..4a08097
--- /dev/null
+++ b/ResultsParser.py
@@ -0,0 +1,152 @@
+#Title:          ResultsParser.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+Constants and methods for parsing (Inspect) search results
+"""
+import os
+import random
+class Columns:
+
+    DefaultInspectHeader = "#SpectrumFile\tScan#\tAnnotation\tProtein\tCharge\tMQScore\tLength\tTotalPRMScore\tMedianPRMScore\tFractionY\tFractionB\tIntensity\tNTT\tInspectFDR\tF-Score\tDeltaScore\tDeltaScoreOther\tRecordNumber\tDBFilePos\tSpecFilePos\tPrecursorMZ\tPrecursorMZError\tSpecIndex"
+
+
+    def __init__(self):
+        self.header = self.initializeHeaders(self.DefaultInspectHeader)
+
+    def initializeHeaders(self,Header):
+        if Header[0] == '#':
+            Header = Header[1:]
+
+        self.headers = Header.lower().split("\t")
+        
+    def getIndex(self,headerVal):
+        
+        for i in range(0,len(self.headers)):
+            if headerVal.lower() == self.headers[i]:
+                return i
+
+        return -1
+        
+
+#    "Constants for which columns contain which data"
+#    SpectrumFile = 0
+#    ScanNumber = 1
+#    Annotation = 2
+#    ProteinName = 3
+#    Charge = 4
+#    MQScore = 5
+#    Length = 6
+#    NTT = 12
+#    PValue = 13
+#    FScore = 14
+#    DeltaScoreAny = 15
+#    DeltaScore = 16
+#    ProteinID = 17
+#    DBPos = 18
+#    FileOffset = 19 #Spectrum File pos
+#    ParentMZ = 20 #Corrected, associated with tweak
+#    MZError = 21
+
+#    #More columns for splicing
+#    Chromosome = 22
+#    Strand = 23
+#    GenomicPost = 24
+#    SplicedSequence = 25
+#    Splices = 26
+#    SearchDB = 27
+
+
+
+class SpectrumOracleMixin:
+    def __init__(self):
+        self.SpectrumOracle = {}
+    def FixSpectrumPath(self, Path):
+        FileName = os.path.split(Path)[-1]
+        Stub = os.path.splitext(FileName)[0]
+        return self.SpectrumOracle.get(Stub, Path)
+    def PopulateSpectrumOracle(self, RootDirectory):
+        """
+        Used when mzxml files are spread over multiple subdirectories.
+        MZXMLOracle[Stub] = full path to the corresponding MZXML file
+        Used with -M option (not with -s option)
+        """
+        if not RootDirectory or not os.path.exists(RootDirectory):
+            return
+        print "Populate oracle from %s..."%RootDirectory
+        for SubFileName in os.listdir(RootDirectory):
+            # Avoid expensive iteration through results directories:
+            if SubFileName[:7] == "Results":
+                continue
+            SubFilePath = os.path.join(RootDirectory, SubFileName)
+            if os.path.isdir(SubFilePath):
+                self.PopulateSpectrumOracle(SubFilePath)
+                continue
+            (Stub, Extension) = os.path.splitext(SubFileName)
+            Extension = Extension.lower()
+            if Extension == ".mzxml":
+                self.SpectrumOracle[Stub] = os.path.join(RootDirectory, SubFileName)
+            elif Extension == ".mgf":
+                self.SpectrumOracle[Stub] = os.path.join(RootDirectory, SubFileName)
+            elif Extension == ".ms2":
+                self.SpectrumOracle[Stub] = os.path.join(RootDirectory, SubFileName)
+                
+class ResultsParser:
+    def __init__(self, *args, **kw):
+        #self.Columns = Columns
+        self.Running = 1
+    def ProcessResultsFiles(self, FilePath, Callback, MaxFilesToParse = None, QuietFlag = 0):
+        """
+        Function for applying a Callback function to one search-reuslts file, or to every
+        search-results file in a directory.
+        """
+        print "ResultsParser:%s"%FilePath
+        FileCount = 0
+        if os.path.isdir(FilePath):
+            FileNames = os.listdir(FilePath)
+            random.shuffle(FileNames)
+            for FileNameIndex in range(len(FileNames)):
+                FileName = FileNames[FileNameIndex]
+                if not QuietFlag:
+                    print "(%s/%s) %s"%(FileNameIndex, len(FileNames), FileName)
+                (Stub, Extension) = os.path.splitext(FileName)
+                if Extension.lower() not in (".txt", ".filtered", ".res", ".csv", ".out"):
+                    continue
+                FileCount += 1
+                SubFilePath = os.path.join(FilePath, FileName)
+                apply(Callback, (SubFilePath,))
+                # Don't parse every single file, that will take too long!
+                if MaxFilesToParse != None and FileCount > MaxFilesToParse:
+                    break 
+        else:
+            apply(Callback, (FilePath,))
+    
diff --git a/Run.c b/Run.c
new file mode 100644
index 0000000..35f9949
--- /dev/null
+++ b/Run.c
@@ -0,0 +1,1492 @@
+//Title:          Run.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#include "CMemLeak.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include <math.h>
+#include "Trie.h"
+#include "Utils.h"
+#include "Run.h"
+#include "Tagger.h"
+#include "Score.h"
+#include "FreeMod.h"
+#include "Spliced.h"
+#include "Errors.h"
+#include "BN.h"
+#include "SVM.h"
+#include "Scorpion.h"
+#include "ChargeState.h"
+#include "PValue.h"
+#include "MS2DB.h"
+#include "IonScoring.h"
+#include "TagFile.h" //ARI_MOD
+
+
+extern float g_CutScores[];
+extern PRMBayesianModel* PRMModelCharge2;
+
+// Forward Declaration
+void DebugPrintBlindTagExtensions(SearchInfo* Info);
+void AttemptParentMassPeakRemoval(MSSpectrum* Spectrum);
+void RestoreParentMassPeakRemoval(MSSpectrum* Spectrum);
+
+
+TrieTag* TagGraphGenerateTags(TagGraph* Graph, MSSpectrum* Spectrum, int* TagCount, 
+    int MaximumTagCount, SpectrumTweak* Tweak, float TagEdgeScoreMultiplier,
+    PRMBayesianModel* Model);
+
+void OutputMatchesForSpectrum(SpectrumNode* Node, FILE* OutputFile)
+{
+    char MatchedPeptideVerbose[256];
+    char PeptideName[256];
+    Peptide* Match;
+    //Peptide* NextMatch;
+    int MatchNumber = 0;
+    int FeatureIndex;
+    static int FirstCall = 1;
+    PeptideSpliceNode* SpliceNode;
+
+    double FileMass;
+    double PeptideMass;
+    double PeptideMZ;
+
+    int bytecount = 0;
+    
+    //
+    if (!OutputFile)
+    {
+        return;
+    }
+    PeptideName[0] = '\0'; 
+    SetMatchDeltaCN(Node);
+
+    //SetMatchDeltaCN(Node);
+    Match = Node->FirstMatch;
+    while (Match)
+    {
+     
+      GetProteinID(Match->RecordNumber, Match->DB, PeptideName);
+
+	
+        // Write a header line:
+        if (FirstCall)
+        {
+            FirstCall = 0;
+            if(fprintf(OutputFile, "#SpectrumFile\tScan#\tAnnotation\tProtein\tCharge\t")<0)
+	      {
+		REPORT_ERROR_S(50,GlobalOptions->OutputFileName);
+		exit(50);
+	      }
+            if(fprintf(OutputFile, "MQScore\tLength\tTotalPRMScore\tMedianPRMScore\tFractionY\tFractionB\tIntensity\tNTT\t") < 0)
+	      {
+		REPORT_ERROR_S(50,GlobalOptions->OutputFileName);
+		exit(50);
+	      }
+            if(fprintf(OutputFile, "InspectFDR\tF-Score\t") < 0)
+	      {
+		REPORT_ERROR_S(50,GlobalOptions->OutputFileName);
+		exit(50);
+	      }
+            if(fprintf(OutputFile, "DeltaScore\tDeltaScoreOther\tRecordNumber\tDBFilePos\tSpecFilePos\tPrecursorMZ\tPrecursorMZError") < 0)
+	      {
+		REPORT_ERROR_S(50,GlobalOptions->OutputFileName);
+		exit(50);
+	      }
+            if (GlobalOptions->FirstDatabase->Type != evDBTypeTrie)
+            {
+	      if(fprintf(OutputFile, "\tChromosome\tStrand\tGenomicPos\tSplicedSequence\tSplices\tSearchedDB") < 0)
+		{
+		  REPORT_ERROR_S(50,GlobalOptions->OutputFileName);
+		  exit(50);
+		}
+            }
+	    if(fprintf(OutputFile,"\tSpecIndex") < 0)
+	      {
+		REPORT_ERROR_S(50,GlobalOptions->OutputFileName);
+		exit(50);
+	      }
+	    if(fprintf(OutputFile, "\n") < 0)
+	      {
+		REPORT_ERROR_S(50,GlobalOptions->OutputFileName);
+		exit(50);
+	      }
+	    fflush(OutputFile);
+	    }
+        
+        //GetProteinID(Match->RecordNumber, IndexFile, PeptideName, 1);
+        WriteMatchToString(Match, MatchedPeptideVerbose, 1);
+
+	//bytecount = fprintf(OutputFile, "TEST");
+	//fprintf(OutputFile, "bytecount:%d",bytecount);
+	//printf("TESTEST\tTEST\n");
+	//fflush(OutputFile);
+	//fprintf(OutputFile, "XX%dXX\t", Node->ScanNumber);
+
+        // Which spectrum?
+	fprintf(OutputFile, "%s\t%d\t", Node->InputFile->FileName, Node->ScanNumber);
+        // What's the match?
+        fprintf(OutputFile, "%s\t%s\t%d\t%.3f\t", MatchedPeptideVerbose, PeptideName, Match->Tweak->Charge, Match->MatchQualityScore);
+        // How good is the match?
+        for (FeatureIndex = 0; FeatureIndex < MQ_FEATURE_COUNT; FeatureIndex++)
+        {
+            fprintf(OutputFile, "%.3f\t", Match->ScoreFeatures[FeatureIndex]);
+        }
+        //fprintf(OutputFile, "%.3f\t", Match->InitialScore / 1000.0);
+        fprintf(OutputFile, "%.5f\t", Match->FScore);
+        fprintf(OutputFile, "%.5f\t", Match->PValue);
+        fprintf(OutputFile, "%.3f\t", Match->DeltaCN);
+        fprintf(OutputFile, "%.3f\t", Match->DeltaCNOther);
+        //fprintf(OutputFile, "%.3f\t", Match->ParentMassError / 100.0); // Temp: Parent mass error (for FT)
+        // Extra fields, for debugging:
+        fprintf(OutputFile, "%d\t%d\t%d\t", Match->RecordNumber, Match->FilePos, Node->FilePosition);
+
+	//FileMass = ((float)Node->Spectrum->MZ) * Match->Tweak->Charge - (Match->Tweak->Charge-1)*1007.8;
+	fprintf(OutputFile,"%.3f\t",(double)Node->Spectrum->FileMZ/MASS_SCALE);
+
+	PeptideMass = (double)GetPeptideParentMass(Match);
+	PeptideMZ = (double)(PeptideMass + (Match->Tweak->Charge-1)*1007.8)/Match->Tweak->Charge;
+	PeptideMZ = PeptideMZ/MASS_SCALE;
+
+	fprintf(OutputFile,"%.3f",((double)Node->Spectrum->FileMZ/MASS_SCALE - PeptideMZ));
+	
+	//NEC_DEBUG
+	//printf("%s\t%d\t%s\t",Node->InputFile->FileName, Node->ScanNumber,MatchedPeptideVerbose);
+	//printf("%.3f\t%.3f\t%d\n",Match->MatchQualityScore,PeptideMass,Match->Tweak->ParentMass);
+	//fprintf(OutputFile,"\t%.3f",(double)PeptideMass);
+	//fprintf(OutputFile,"\t%d\t%d",Match->Tweak->Charge, Match->Tweak->ParentMass);
+	
+        ////////////////////////////////////////////////////////////
+        // If it's a splice-tolerant search, then output some information about the match:
+	if (Match->DB->Type != evDBTypeTrie)
+        {
+            if (Match->ChromosomeNumber >= 0)
+            {
+                fprintf(OutputFile, "\t%d", Match->ChromosomeNumber);
+                fprintf(OutputFile, "\t%d", Match->ChromosomeForwardFlag);
+            }
+            else
+            {
+                fprintf(OutputFile, "\t");
+                fprintf(OutputFile, "\t");
+            }
+            if (Match->GenomicLocationStart >= 0)
+            {
+                fprintf(OutputFile, "\t%d-%d", 
+                    min(Match->GenomicLocationStart, Match->GenomicLocationEnd),
+                    max(Match->GenomicLocationStart, Match->GenomicLocationEnd));
+            }
+            else
+            {
+                fprintf(OutputFile, "\t");
+            }
+            fprintf(OutputFile, "\t%s", Match->SplicedBases);
+	    if(Match->SpliceHead)
+	      {
+		for (SpliceNode = Match->SpliceHead; SpliceNode; SpliceNode = SpliceNode->Next)
+		  {
+		    fprintf(OutputFile, "\t%d-%d ",  SpliceNode->DonorPos, SpliceNode->AcceptorPos);
+		  }
+	      }
+	    else
+	      fprintf(OutputFile,"\t");
+
+	    fprintf(OutputFile, "\t%s", Match->DB->FileName);
+	}
+	fprintf(OutputFile,"\t%d",Node->SpecIndex);
+        fprintf(OutputFile, "\n");
+        Match = Match->Next;
+        MatchNumber++;
+        if (MatchNumber >= GlobalOptions->ReportMatchCount)
+        {
+            break;
+        }
+    }
+    //printf("Wrote out %d matches for '%s'.\n", MatchNumber, Node->FileName);
+    fflush(OutputFile);
+}
+
+#define vprintf(x) if (VerboseFlag) printf(x)
+
+void MutationModeSearch(SearchInfo* Info)
+{
+    Peptide* FirstMatch = NULL;
+    Peptide* LastMatch = NULL;
+    Peptide* NextOldMatchNode;
+    Peptide* OldMatchNode;
+    Peptide* MatchNode;
+    Peptide* NextMatchNode;
+    Peptide* FreeNode;
+    Peptide* FreePrev;
+    int MatchCount = 0;
+    int VerboseFlag = 0;
+    int TweakIndex;
+    MSSpectrum* Spectrum = Info->Spectrum;
+    SpectrumNode* Node = Info->Spectrum->Node;
+
+    
+    if (Spectrum->PeakCount < 10) // Demand AT LEAST ten peaks (even that many is a bit silly; 50 is more like it)
+    {
+        return;
+    }
+
+    for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+    {
+        if (!Node->Tweaks[TweakIndex].Charge)
+        {
+            continue;
+        }
+	
+        fseek(Info->DB->DBFile, 0, 0);
+        // *** PRM scores now *** 
+        Spectrum->Charge = Node->Tweaks[TweakIndex].Charge;
+        Spectrum->ParentMass = Node->Tweaks[TweakIndex].ParentMass;
+        //vprintf("[V] Assign isotope neighbors\n");
+        //SpectrumAssignIsotopeNeighbors(Node->Spectrum);
+        //vprintf("[V] Find isotopic peaks\n");
+        //SpectrumFindIsotopicPeaks(Node->Spectrum);
+        FreeTagGraph(Node->Spectrum->Graph);
+        vprintf("[V] Construct tag graph\n");
+        Node->Spectrum->Graph = ConstructTagGraph(Node->Spectrum);
+        vprintf("[V] Add nodes\n");
+        TagGraphAddNodes(Node->Spectrum->Graph, Node->Spectrum);
+        vprintf("[V] Score nodes\n");
+        TagGraphScorePRMNodes(NULL, Node->Spectrum->Graph, Node->Spectrum, Node->Tweaks + TweakIndex);
+        vprintf("[V] Populate back edges\n");
+        if (GlobalOptions->MaxPTMods > 1)
+        {
+            TagGraphPopulateBackEdges(Node->Spectrum->Graph);
+        }
+        vprintf("[V] Set PRM scores\n");
+        SetSpectrumPRMScores(Node->Spectrum, Node->Tweaks + TweakIndex); 
+        vprintf("[V] Tagless search 1:\n");
+	
+        SearchDatabaseTagless(Info, GlobalOptions->MaxPTMods, VerboseFlag, Node->Tweaks + TweakIndex);
+        ////////////
+        vprintf("[V] Score matches:\n");
+        vprintf("[V] merge multi-charge list:\n");
+        OldMatchNode = FirstMatch;
+        if (FirstMatch)
+        {
+            NextOldMatchNode = FirstMatch->Next;
+        }
+        else
+        {
+            NextOldMatchNode = NULL;
+        }
+        MatchNode = Node->FirstMatch;
+        if (MatchNode)
+        {
+            NextMatchNode = MatchNode->Next;
+        }
+        else
+        {
+            NextMatchNode = NULL;
+        }
+        MatchCount = 0;
+        FirstMatch = NULL;
+        LastMatch = NULL;
+        while (MatchNode || OldMatchNode)
+        {
+            if (!MatchNode || (OldMatchNode && MatchNode->InitialScore < OldMatchNode->InitialScore))
+            {
+                // Add one of the old matches to the master-list:
+                if (FirstMatch)
+                {
+                    LastMatch->Next = OldMatchNode;
+                    OldMatchNode->Prev = LastMatch;
+                    LastMatch = OldMatchNode;
+                    LastMatch->Next = NULL;
+                }
+                else
+                {
+                    FirstMatch = OldMatchNode;
+                    LastMatch = OldMatchNode;
+                    OldMatchNode->Prev = NULL;
+                    OldMatchNode->Next = NULL;
+                }
+                OldMatchNode = NextOldMatchNode;
+                if (OldMatchNode)
+                {
+                    NextOldMatchNode = OldMatchNode->Next;
+                }
+                else
+                {
+                    NextOldMatchNode = NULL;
+                }
+
+            }
+            else
+            {
+                // Add one of the new matches to the master-list:
+                if (FirstMatch)
+                {
+                    LastMatch->Next = MatchNode;
+                    MatchNode->Prev = LastMatch;
+                    LastMatch = MatchNode;
+                    LastMatch->Next = NULL;
+                }
+                else
+                {
+                    FirstMatch = MatchNode;
+                    LastMatch = MatchNode;
+                    MatchNode->Prev = NULL;
+                    MatchNode->Next = NULL;
+                }
+                MatchNode = NextMatchNode;
+                if (MatchNode)
+                {
+                    NextMatchNode = MatchNode->Next;
+                }
+                else
+                {
+                    NextMatchNode = NULL;
+                }
+            }
+            MatchCount++;
+            if (MatchCount >= GlobalOptions->StoreMatchCount)
+            {
+                break;
+            }
+        }
+        // Now we can free any remaining matches from these lists:
+        FreeNode = MatchNode;
+        FreePrev = NULL;
+        while (FreeNode)
+        {
+            if (FreePrev)
+            {
+                FreePeptideNode(FreePrev);
+            }
+            FreePrev = FreeNode;
+            FreeNode = FreeNode->Next;
+        }
+        if (FreePrev)
+        {
+            FreePeptideNode(FreePrev);
+        }
+        FreeNode = OldMatchNode;
+        FreePrev = NULL;
+        while (FreeNode)
+        {
+            if (FreePrev)
+            {
+                FreePeptideNode(FreePrev);
+            }
+            FreePrev = FreeNode;
+            FreeNode = FreeNode->Next;
+        }
+        if (FreePrev)
+        {
+            FreePeptideNode(FreePrev);
+        }
+        Node->FirstMatch = NULL;
+        Node->LastMatch = NULL;
+        Node->MatchCount = 0;
+        // Check the master-list for duplicates:
+        for (MatchNode = FirstMatch; MatchNode; MatchNode = MatchNode->Next)
+        {
+            for (OldMatchNode = MatchNode->Next; OldMatchNode; OldMatchNode = OldMatchNode->Next)
+            {
+                if (OldMatchNode->RecordNumber == MatchNode->RecordNumber && !strcmp(OldMatchNode->Bases, MatchNode->Bases) && 
+                    !memcmp(MatchNode->AminoIndex, OldMatchNode->AminoIndex, sizeof(int) * MAX_PT_MODS) && 
+                    !memcmp(MatchNode->ModType, OldMatchNode->ModType, sizeof(int) * MAX_PT_MODS))
+                {
+                    // Free OldMatchNode, it's a duplicate!
+                    if (OldMatchNode->Prev)
+                    {
+                        OldMatchNode->Prev->Next = OldMatchNode->Next;
+                    }
+                    if (OldMatchNode->Next)
+                    {
+                        OldMatchNode->Next->Prev = OldMatchNode->Prev;
+                    }
+                    if (LastMatch == OldMatchNode)
+                    {
+                        LastMatch = OldMatchNode->Prev;
+                    }
+                    FreePeptideNode(OldMatchNode);
+                    OldMatchNode = MatchNode->Next;
+                    if (!OldMatchNode)
+                    {
+                        break;
+                    }
+                }
+            }
+        }
+    } // tweak loop
+    Node->FirstMatch = FirstMatch;
+    Node->LastMatch = LastMatch;
+    Node->MatchCount = MatchCount;
+    vprintf("[V] Complete.\n");
+}
+
+TrieNode* ConstructTagsForSpectrum(TrieNode* Root, SpectrumNode* Node, int TagCount)
+{
+    int TweakIndex;
+    MSSpectrum* Spectrum;
+    //
+    Spectrum = Node->Spectrum;
+    for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+    {
+        if (!Node->Tweaks[TweakIndex].Charge)
+        {
+            continue;
+        }
+	//printf("Constructing tags for %d tweak %d\n",Node->ScanNumber, TweakIndex);
+        Spectrum->Charge = Node->Tweaks[TweakIndex].Charge;
+        Spectrum->ParentMass = Node->Tweaks[TweakIndex].ParentMass;
+        //SpectrumAssignIsotopeNeighbors(Spectrum);
+        //SpectrumFindIsotopicPeaks(Spectrum);
+		//sam Temp Insert
+	AttemptParentMassPeakRemoval( Spectrum);
+	//printf("PeakRemoved: %d\n",Spectrum->RemovedPeakIndex);
+        Root = GenerateTagsFromSpectrum(Spectrum, Root, TagCount, Node->Tweaks + TweakIndex);
+		//Sam Temp Insert
+		RestoreParentMassPeakRemoval(Spectrum);
+
+    }
+
+    
+    return Root;
+}
+
+void OutputTagsToFile(FILE* OutputFile, char* SpectrumFileName, int ScanNumber, int SpectrumFilePos, TrieTag* TagArray, int TagCount)
+{
+    int TagIndex;
+    //TrieTagHanger* Tag;
+    TrieTag* Tag;
+    TagCount = min(TagCount, GlobalOptions->GenerateTagCount);
+    for (TagIndex = 0; TagIndex < TagCount; TagIndex++)
+    {
+        Tag = TagArray + TagIndex;
+        fprintf(OutputFile, "%s\t", SpectrumFileName);
+        fprintf(OutputFile, "%d\t", ScanNumber);
+        fprintf(OutputFile, "%d\t", SpectrumFilePos);
+        fprintf(OutputFile, "%d\t", TagIndex);
+        fprintf(OutputFile, "%.2f\t", Tag->PrefixMass / (float)MASS_SCALE);
+        fprintf(OutputFile, "%s\t", Tag->Tag);
+        fprintf(OutputFile, "%.2f\t", Tag->SuffixMass / (float)MASS_SCALE);
+        fprintf(OutputFile, "%.2f\t", Tag->Score);
+        fprintf(OutputFile, "\n");
+    }
+}
+
+int MergeIdenticalTags(TrieTag* TagArray, int TagCount)
+{
+    int TagIndexA;
+    int TagIndexB;
+    TrieTag* TagA;
+    TrieTag* TagB;
+    int Diff;
+    //
+    for (TagIndexA = 0; TagIndexA < TagCount; TagIndexA++)
+    {
+        TagA = TagArray + TagIndexA;
+        TagIndexB = TagIndexA + 1;
+        while (TagIndexB < TagCount)
+        {
+            TagB = TagArray + TagIndexB;
+            if (strcmp(TagA->Tag, TagB->Tag))
+            {
+                TagIndexB++;
+                continue;
+            }
+            Diff = abs(TagA->PrefixMass - TagB->PrefixMass);
+            if (Diff > GlobalOptions->Epsilon)
+            {
+                TagIndexB++;
+                continue;
+            }
+            Diff = abs(TagA->SuffixMass - TagB->SuffixMass);
+            if (Diff > GlobalOptions->Epsilon)
+            {
+                TagIndexB++;
+                continue;
+            }
+            // These tags are essentially identical!  Remove B.
+            memmove(TagArray + TagIndexB, TagArray + TagIndexB + 1, sizeof(TrieTag) * (TagCount - TagIndexB));
+            TagCount--;
+            // TagIndexB is unchanged.
+        }
+    }
+    return TagCount;
+}
+
+static TrieTag* _TagsOnlyTagList = NULL;
+
+// Perform ONLY tag generation...and output the resulting tags.
+void PerformTagGeneration(void)
+{
+    SpectrumNode* SNode;
+    FILE* SpectrumFile;
+    int Result;
+    int TagCount = GlobalOptions->GenerateTagCount;
+    int TweakIndex;
+    int TotalTagCount;
+    int TagIndex;
+    int TagsGenerated;
+    SpectrumTweak* Tweak;
+    TrieTag* Tags;
+    //
+    // Write a HEADER to the output file:
+    fprintf(GlobalOptions->OutputFile, "#File\tScan\tFilePos\tTagIndex\tPrefix\tTag\tSuffix\tTagscore\t\n");
+    if (!_TagsOnlyTagList)
+    {
+        _TagsOnlyTagList = (TrieTag*)calloc(TWEAK_COUNT * TagCount + 1, sizeof(TrieTag));
+    }
+    
+    
+    BuildDecorations();
+    for (SNode = GlobalOptions->FirstSpectrum; SNode; SNode = SNode->Next)
+    {
+        SpectrumFile = fopen(SNode->InputFile->FileName, "rb");
+        if (SpectrumFile)
+        {
+            SNode->Spectrum = (MSSpectrum*)calloc(1, sizeof(MSSpectrum));
+            SNode->Spectrum->Node = SNode;
+            fseek(SpectrumFile, SNode->FilePosition, 0);
+            Result = SpectrumLoadFromFile(SNode->Spectrum, SpectrumFile);
+            fclose(SpectrumFile);
+            if (!Result)
+            {
+                SafeFree(SNode->Spectrum);
+                SNode->Spectrum = NULL;
+                continue;
+            }
+            else
+            {
+                WindowFilterPeaks(SNode->Spectrum, 0, 0);
+                IntensityRankPeaks(SNode->Spectrum);
+            }
+            if (!SNode->PMCFlag)
+            {
+                TweakSpectrum(SNode);
+            }
+            TotalTagCount = 0;
+            for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+            {
+                if (!SNode->Tweaks[TweakIndex].Charge)
+                {
+                    continue;
+                }
+                Tweak = SNode->Tweaks + TweakIndex;
+                SNode->Spectrum->Charge = Tweak->Charge;
+                SNode->Spectrum->ParentMass = Tweak->ParentMass;
+                //SpectrumAssignIsotopeNeighbors(SNode->Spectrum);
+                //SpectrumFindIsotopicPeaks(SNode->Spectrum);
+                SNode->Spectrum->Graph = ConstructTagGraph(SNode->Spectrum);
+                TagGraphAddNodes(SNode->Spectrum->Graph, SNode->Spectrum);
+                TagGraphScorePRMNodes(NULL, SNode->Spectrum->Graph, SNode->Spectrum, Tweak);
+                TagGraphPopulateEdges(SNode->Spectrum->Graph);
+                Tags = TagGraphGenerateTags(SNode->Spectrum->Graph, SNode->Spectrum, &TagsGenerated, TagCount, Tweak, TAG_EDGE_SCORE_MULTIPLIER, NULL);
+
+                for (TagIndex = 0; TagIndex < min(TagCount, TagsGenerated); TagIndex++)
+                {
+                    memcpy(_TagsOnlyTagList + TotalTagCount, Tags + TagIndex, sizeof(TrieTag));
+                    TotalTagCount++;
+                }
+                FreeTagGraph(SNode->Spectrum->Graph);
+                SNode->Spectrum->Graph = NULL;
+            } // Tweak list
+            qsort(_TagsOnlyTagList, TotalTagCount, sizeof(TrieTag), (QSortCompare)CompareTagScores);
+            TotalTagCount = MergeIdenticalTags(_TagsOnlyTagList, TotalTagCount);
+            OutputTagsToFile(GlobalOptions->OutputFile, SNode->InputFile->FileName, SNode->ScanNumber, SNode->FilePosition, _TagsOnlyTagList, TotalTagCount);
+            // Clean up the spectrum:
+            FreeSpectrum(SNode->Spectrum);
+            SNode->Spectrum = NULL;
+        }        
+    }
+    SafeFree(_TagsOnlyTagList);
+    _TagsOnlyTagList = NULL;
+}
+
+#define TEMP_TAGGING_INPUT "TempTagging.dta"
+#define TEMP_TAGGING_OUTPUT "TempTags.txt"
+
+// Call upon PepNovo to generate some tags for us.
+// To do so, we need to write out a temporary .dta file!
+TrieNode* ConstructTagsExternalTagger(TrieNode* Root, SpectrumNode* Node, int TagCount)
+{
+    FILE* TempDTAFile;
+    FILE* TempTagOutputFile;
+    int TweakIndex;
+    int PeakIndex;
+    SpectralPeak* Peak;
+    char CommandLine[2048];
+    char LineBuffer[MAX_LINE_LENGTH];
+    int BytesToRead;
+    int BufferPos = 0;
+    int BytesRead;
+    int BufferEnd = 0;
+    int LineNumber = 0;
+    int PrevLineFilePos = 0;
+    int LineFilePos = 0;
+    char TextBuffer[BUFFER_SIZE * 2];
+    char* BitA;
+    char* BitB;
+    char* BitC;
+    int WithinTagsFlag = 0;
+    float PrefixMass;
+    float Probability;
+    int DuplicateFlag;
+    TrieTag* NewTag;
+    char* TempAA;
+    char AnnotationBuffer[256];
+    char ModBuffer[256];
+    int TagIndex = 0;
+    int AminoIndex;
+    MassDelta* Delta;
+    int ModBufferPos;
+    int ModIndex;
+    int TotalTagCount = 0;
+    //
+    if (!Root)
+    {
+        Root = NewTrieNode();
+        Root->FailureNode = Root;
+    }
+    // Initialization for tags-only:
+    if (GlobalOptions->RunMode & RUN_MODE_TAGS_ONLY)
+    {
+        if (!_TagsOnlyTagList)
+        {
+            _TagsOnlyTagList = (TrieTag*)calloc(TWEAK_COUNT * TagCount + 1, sizeof(TrieTag));
+        }
+    }
+
+    for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+    {
+        // Skip this mass-tweak, if it's not a valid charge/mass combo
+        if (!Node->Tweaks[TweakIndex].Charge)
+        {
+            continue;
+        }
+        unlink(TEMP_TAGGING_INPUT);
+        TempDTAFile = fopen(TEMP_TAGGING_INPUT, "wb");
+        if (!TempDTAFile)
+        {
+            printf("** Error opening tag input file %s for writing!", TEMP_TAGGING_INPUT);
+            return Root;
+        }
+        fprintf(TempDTAFile, "%.3f %d\n", Node->Tweaks[TweakIndex].ParentMass / (float)MASS_SCALE, Node->Tweaks[TweakIndex].Charge);
+        for (PeakIndex = 0; PeakIndex < Node->Spectrum->PeakCount; PeakIndex++)
+        {
+            Peak = Node->Spectrum->Peaks + PeakIndex;
+            fprintf(TempDTAFile, "%.3f %.3f\n", Peak->Mass / (float)MASS_SCALE, Peak->Intensity);
+        }
+        fclose(TempDTAFile);
+        // Call out to pepnovo:
+        unlink(TEMP_TAGGING_OUTPUT);
+        sprintf(CommandLine, "pepnovo.exe -dta %s -model tryp_model.txt -num_tags %d > %s", TEMP_TAGGING_INPUT, TagCount, TEMP_TAGGING_OUTPUT);
+        system(CommandLine);
+        TempTagOutputFile = fopen(TEMP_TAGGING_OUTPUT, "rb");
+        if (!TempTagOutputFile)
+        {
+            printf("** Error: Unable to open tag output file '%s'\n", TEMP_TAGGING_OUTPUT);
+            return Root;
+        }
+        WithinTagsFlag = 0;
+        while (1)
+        {
+            BytesToRead = BUFFER_SIZE - BufferEnd;
+            BytesRead = ReadBinary(TextBuffer + BufferEnd, sizeof(char), BytesToRead, TempTagOutputFile);
+            BufferEnd += BytesRead;
+            TextBuffer[BufferEnd] = '\0';
+            if (BufferPos == BufferEnd)
+            { 
+                // We're done!
+                break;
+            }
+            // Copy a line of text to the line buffer.  Skip spaces, and stop at carriage return or newline.
+            
+            BufferPos = CopyBufferLine(TextBuffer, BufferPos, BufferEnd, LineBuffer, 0);
+            LineNumber += 1;
+            PrevLineFilePos = LineFilePos;
+            LineFilePos += BufferPos;
+            //printf("Line %d starts at %d\n", LineNumber, LineFilePos);
+            // Now, move the remaining text to the start of the buffer:
+            memmove(TextBuffer, TextBuffer + BufferPos, BufferEnd - BufferPos);
+            BufferEnd -= BufferPos;
+            BufferPos = 0;
+            // Now, process this line of text!
+            if (!LineBuffer[0])
+            {
+                continue;
+            }
+            BitA = strtok(LineBuffer, "\t\r\n");
+            BitB = strtok(NULL, "\t\r\n");
+            if (!BitB)
+            {
+                continue;
+            }
+            BitC = strtok(NULL, "\t\r\n");
+            if (!BitC)
+            {
+                continue;
+            }
+            if (!strcmp(BitC, "Probability:") && !strcmp(BitB, "Tag"))
+            {
+                WithinTagsFlag = 1;
+            }
+            if (WithinTagsFlag)
+            {
+                PrefixMass = (float)atof(BitA);
+                Probability = (float)atof(BitC);
+                if (Probability < (float)0.1)
+                {
+                    continue;
+                }
+                NewTag = _TagsOnlyTagList + TotalTagCount;
+                memset(NewTag, 0, sizeof(TrieTag));
+                // Special code:
+                // PepNovo may include MODIFICATIONS in its tags - so, we must parse them.
+                // We assume that (a) modifications are written in the form %+d, and (b) we
+                // know of the modification type from the inspect input file.
+                TempAA = BitB;
+                AminoIndex = 0;
+                ModBufferPos = 0;
+                
+                while (*TempAA)
+                {
+                    if (*TempAA >= 'A' && *TempAA <= 'Z')
+                    {
+                        // an amino acid - so, finish the modification-in-progress, if there is one.
+                        if (ModBufferPos && AminoIndex)
+                        {
+                            if (NewTag->ModsUsed == MAX_PT_MODS)
+                            {
+                                printf("** Error tagging scan %d from file %s: Too many PTMs!\n", Node->ScanNumber, Node->InputFile->FileName);
+                                break;
+                            }
+                            ModBuffer[ModBufferPos] = '\0';
+                            Delta = FindPTModByName(NewTag->Tag[AminoIndex - 1], ModBuffer);
+                            if (Delta)
+                            {
+                                NewTag->AminoIndex[NewTag->ModsUsed] = AminoIndex - 1;
+                                NewTag->ModType[NewTag->ModsUsed] = Delta;
+                                NewTag->ModsUsed++;
+                            }
+                            else
+                            {
+                                printf("** Error tagging scan %d from file %s: Modification %s not understood!\n", Node->ScanNumber, Node->InputFile->FileName, ModBuffer);
+                            }
+                        }
+                        ModBufferPos = 0;
+                        // Add the AA:
+                        NewTag->Tag[AminoIndex++] = *TempAA;
+                    }// aa
+                    else
+                    {
+                        ModBuffer[ModBufferPos++] = *TempAA;
+                    } // not aa
+                    TempAA++;
+                }
+                NewTag->Tag[AminoIndex] = '\0';
+                // Finish any pending mod (COPY-PASTA FROM ABOVE)
+                if (ModBufferPos && AminoIndex)
+                {
+                    if (NewTag->ModsUsed == MAX_PT_MODS)
+                    {
+                        printf("** Error tagging scan %d from file %s: Too many PTMs!\n", Node->ScanNumber, Node->InputFile->FileName);
+                        break;
+                    }
+                    ModBuffer[ModBufferPos] = '\0';
+                    Delta = FindPTModByName(NewTag->Tag[AminoIndex - 1], ModBuffer);
+                    if (Delta)
+                    {
+                        NewTag->AminoIndex[NewTag->ModsUsed] = AminoIndex - 1;
+                        NewTag->ModType[NewTag->ModsUsed] = Delta;
+                        NewTag->ModsUsed++;
+                    }
+                    else
+                    {
+                        printf("** Error tagging scan %d from file %s: Modification %s not understood!\n", Node->ScanNumber, Node->InputFile->FileName, ModBuffer);
+                    }
+                }
+
+                //strncpy(NewTag->Tag, BitB, MAX_TAG_LENGTH);
+                NewTag->Charge = Node->Tweaks[TweakIndex].Charge;
+                NewTag->ParentMass = Node->Tweaks[TweakIndex].ParentMass;
+                NewTag->PSpectrum = Node->Spectrum;
+                NewTag->Tweak = Node->Tweaks;
+                NewTag->PrefixMass = (int)(PrefixMass * MASS_SCALE + 0.5);
+                NewTag->SuffixMass = Node->Spectrum->ParentMass - NewTag->PrefixMass;
+                NewTag->Score = Probability;
+                for (TempAA = NewTag->Tag; *TempAA; TempAA++)
+                {
+		  NewTag->SuffixMass -= PeptideMass[(*TempAA)];
+                }
+                for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+                {
+                    if (NewTag->AminoIndex[ModIndex] >= 0 && NewTag->ModType[ModIndex])
+                    {
+                        NewTag->SuffixMass -= NewTag->ModType[ModIndex]->RealDelta;
+                    }
+                }
+                TotalTagCount++;
+            } // Handle a line AFTER the tag header
+        } // Loop over file lines
+        fclose(TempTagOutputFile);
+    } // Loop over tweaks
+    qsort(_TagsOnlyTagList, TotalTagCount, sizeof(TrieTag), (QSortCompare)CompareTagScores);
+    TotalTagCount = MergeIdenticalTags(_TagsOnlyTagList, TotalTagCount);
+    TotalTagCount = min(TotalTagCount, GlobalOptions->GenerateTagCount);
+    for (TagIndex = 0; TagIndex < TotalTagCount; TagIndex++)
+    {
+        NewTag = _TagsOnlyTagList + TagIndex;
+        if (GlobalOptions->RunMode & RUN_MODE_TAGS_ONLY)
+        {
+            fprintf(GlobalOptions->OutputFile, "%s\t", Node->InputFile->FileName);
+            fprintf(GlobalOptions->OutputFile, "%d\t", Node->ScanNumber);
+            fprintf(GlobalOptions->OutputFile, "%d\t", Node->FilePosition);
+            fprintf(GlobalOptions->OutputFile, "%d\t", TagIndex);
+            fprintf(GlobalOptions->OutputFile, "%.2f\t", NewTag->PrefixMass / (float)MASS_SCALE);
+            WriteTagToString(NewTag, AnnotationBuffer, 1);
+            fprintf(GlobalOptions->OutputFile, "%s\t", AnnotationBuffer);
+            fprintf(GlobalOptions->OutputFile, "%.2f\t", NewTag->SuffixMass / (float)MASS_SCALE);
+            fprintf(GlobalOptions->OutputFile, "%.2f\t", NewTag->Score);
+            fprintf(GlobalOptions->OutputFile, "\n");
+        }
+        else
+        {
+            Root = AddTagToTrie(Root, NewTag, &DuplicateFlag);
+        }
+    }
+    return Root;
+}
+
+int SearchSpectrumBlockMSAlignment(SearchInfo* Info, SpectrumNode* FirstBlockSpectrum, 
+    SpectrumNode* LastBlockSpectrum, DatabaseFile* DB)
+{
+    char MatchedPeptideVerbose[256];
+    SpectrumNode* BlockSpectrum;
+    int SpectraSearched = 0;
+    int TweakIndex;
+    Peptide* Match;
+    MSSpectrum*Spectrum;
+    //
+    for (BlockSpectrum = FirstBlockSpectrum; BlockSpectrum != LastBlockSpectrum; BlockSpectrum = BlockSpectrum->Next)
+    {
+        if (!BlockSpectrum->Spectrum)
+        {
+            continue;
+        }
+        WindowFilterPeaks(BlockSpectrum->Spectrum, 0, 0);
+        IntensityRankPeaks(BlockSpectrum->Spectrum);
+        if (!BlockSpectrum->PMCFlag)
+        {
+	  
+            TweakSpectrum(BlockSpectrum);
+        }
+
+
+        //fflush(stdout);
+
+        if (!BlockSpectrum->Spectrum)
+        {
+            continue;
+        }
+	
+        Info->Spectrum = BlockSpectrum->Spectrum;
+        MutationModeSearch(Info);
+	if(!(GlobalOptions->RunMode & RUN_MODE_RAW_OUTPUT))
+	  {
+	    MQScoreSpectralMatches(BlockSpectrum);
+	  }
+	else
+	  {
+	    Spectrum = BlockSpectrum->Spectrum;
+	    Match = BlockSpectrum->FirstMatch;
+	    while(Match)
+	      {
+    
+		WriteMatchToString(Match,MatchedPeptideVerbose,1);
+		fprintf(GlobalOptions->OutputFile,"%s\t%d\t%s\t%d\t%d\t%d\n",Spectrum->Node->InputFile->FileName,Spectrum->Node->ScanNumber,MatchedPeptideVerbose, Match->InitialScore,Match->FilePos, Match->Tweak->ParentMass);
+		Match = Match->Next;
+	      }
+	  }
+        //OutputMatchesForSpectrum(BlockSpectrum, GlobalOptions->OutputFile);
+        //FreeMatchList(BlockSpectrum);
+        // Free PRM scores:
+        for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+        {
+            if (BlockSpectrum->Tweaks[TweakIndex].PRMScores)
+            {
+                SafeFree(BlockSpectrum->Tweaks[TweakIndex].PRMScores);
+                BlockSpectrum->Tweaks[TweakIndex].PRMScores = NULL;
+            }
+        }
+        if (BlockSpectrum->Spectrum->Graph)
+        {
+            FreeTagGraph(BlockSpectrum->Spectrum->Graph);
+            BlockSpectrum->Spectrum->Graph = NULL;
+        }
+        SpectraSearched++;
+    }
+    return SpectraSearched;
+}
+
+int SearchSpectrumBlockTrie(SearchInfo* Info, SpectrumNode* FirstBlockSpectrum, SpectrumNode* LastBlockSpectrum, DatabaseFile* DB)
+{
+    int TagCount;
+    SpectrumNode* BlockSpectrum;
+    char TagBuffer[256];
+    int SpectraSearched = 0;
+    //
+    // Construct tags for these spectra, and scan with trie:
+
+
+    TagCount = GlobalOptions->GenerateTagCount;
+    for (BlockSpectrum = FirstBlockSpectrum; BlockSpectrum != LastBlockSpectrum; BlockSpectrum = BlockSpectrum->Next)
+    {
+        if (!BlockSpectrum->Spectrum)
+        {
+            continue;
+        }
+        SpectraSearched++;
+        
+	//ARI_MOD - move tag generation below
+	// We perform peak filtering AFTER calling the external tagger. 
+        //if (GlobalOptions->ExternalTagger)
+        //{
+        //    Info->Root = ConstructTagsExternalTagger(Info->Root, BlockSpectrum, TagCount);
+        //}
+
+
+
+        WindowFilterPeaks(BlockSpectrum->Spectrum, 0, 0);
+
+
+        IntensityRankPeaks(BlockSpectrum->Spectrum);
+	
+        if (!GlobalOptions->ExternalTagger && !BlockSpectrum->PMCFlag) //ARI_MOD - no tweaking if using external tags
+        {
+
+            TweakSpectrum(BlockSpectrum);
+	   
+        }
+
+	
+        
+	if (!GlobalOptions->ExternalTagger)
+        {
+	   Info->Root = ConstructTagsForSpectrum(Info->Root, BlockSpectrum, TagCount);
+        }
+	else //ARI_MOD - get tags from the TagHolder and add them to the trie,
+	     //then prepare spectrum for scoring (this is a part of TweakSpectrum() that is needed
+	  {
+	    Info->Root = AddExternalTags(Info->Root,BlockSpectrum);
+	    PrepareSpectrumForIonScoring(PRMModelCharge2,BlockSpectrum->Spectrum,0);
+
+	  }
+    }
+    // Special case for external tagger, tags only:
+    if (GlobalOptions->ExternalTagger && (GlobalOptions->RunMode & RUN_MODE_TAGS_ONLY))
+    {
+        //
+    }
+    else
+    {
+
+        memset(TagBuffer, 0, sizeof(char)*256);
+        InitializeTrieFailureNodes(Info->Root, Info->Root, TagBuffer);
+        //printf("Scan file with trie...\n");
+        //fflush(stdout);
+        fseek(Info->DB->DBFile, 0, 0);
+        switch (DB->Type)
+        {
+        case evDBTypeMS2DB:
+	    SearchMS2DB(Info);
+            break;
+        case evDBTypeSpliceDB:
+	    SearchSplicableGenes(Info);
+            break;
+        case evDBTypeTrie:
+            ScanFileWithTrie(Info);
+            break;
+        default:
+            break;
+        }
+    }
+    return SpectraSearched;
+}
+// Return number of spectra searched
+int SearchSpectrumBlockAgainstDB(SpectrumNode* FirstBlockSpectrum, SpectrumNode* LastBlockSpectrum, DatabaseFile* DB)
+{
+    SearchInfo* Info;
+    int SpectraSearched;
+    //
+    Info = (SearchInfo*)calloc(1, sizeof(SearchInfo));
+    Info->DB = DB;
+
+    // MutationMode search is 'unrestricted, but not blind' mode.
+    if (GlobalOptions->RunMode & (RUN_MODE_MUTATION | RUN_MODE_BLIND))
+    {
+      
+        SpectraSearched = SearchSpectrumBlockMSAlignment(Info, FirstBlockSpectrum, LastBlockSpectrum, DB);
+    }
+    else
+    {
+
+        SpectraSearched = SearchSpectrumBlockTrie(Info, FirstBlockSpectrum, LastBlockSpectrum, DB);
+        //if (GlobalOptions->RunMode & RUN_MODE_BLINDTAG)
+        //{
+	//   DebugPrintBlindTagExtensions(Info);
+        //}
+    }
+    
+    FreeTrieNode(Info->Root);
+    free(Info);
+    Info->Root = NULL;
+    return SpectraSearched;
+}
+
+// I want to see some basic information about the onesided tag extension
+// like the number of DB hits/tag and the number of onesided extends/tag
+//SpectrumFileName-ScanCount-prefixMass-prefixExtends-Tag-TagHits-SuffixMass-SuffixExtends-score
+void DebugPrintBlindTagExtensions(SearchInfo* Info)
+{
+    TrieNode* Root = Info->Root;
+    TrieNode* L1 = NULL;
+    TrieNode* L2 = NULL;
+    TrieNode* L3 = NULL;
+    TrieTagHanger* Hanger = NULL;
+    TrieTag* Tag = NULL;
+    int LevelOneKids;
+    int LevelTwoKids;
+    int LevelThreeKids;
+    SpectrumNode* SNode = NULL;
+    FILE* OutputFile;
+    
+    printf("I GOT TO THE DEBUG\n");
+    OutputFile = fopen("BlindTaggingInfo.txt", "wb");
+    //fprintf(OutputFile, "SpectrumFileName\tScanCount\tPrefixMass\tPrefixExtends\t");
+    //fprintf(OutputFile, "Tag\tTagHits\tSuffixMass\tSuffixExtends\tScore\n");
+    if (!OutputFile)
+    {
+        printf("Unable to upen the output file. BlindTaggingInfo\n");
+        return;
+    }
+    for (LevelOneKids = 0; LevelOneKids < TRIE_CHILD_COUNT; LevelOneKids++)
+    {
+        //every node here is of depth 1, and has a single letter word
+        if (LevelOneKids == ('I'-'A') || LevelOneKids == ('Q'-'A'))
+        { //don't print out both nodes for I and L, or for Q and K
+            continue;
+        }
+        L1 = Root->Children[LevelOneKids];
+        if (L1 != NULL)
+        {
+            for (LevelTwoKids = 0; LevelTwoKids < TRIE_CHILD_COUNT; LevelTwoKids++)
+            {
+                if (LevelTwoKids == ('I'-'A') || LevelTwoKids == ('Q'-'A'))
+                {
+                    continue;
+                }
+                L2 = L1->Children[LevelTwoKids];
+                if(L2 != NULL)
+                {
+                    for (LevelThreeKids = 0; LevelThreeKids < TRIE_CHILD_COUNT; LevelThreeKids++)
+                    {
+                        if (LevelThreeKids == ('I'-'A') || LevelThreeKids == ('Q'-'A'))
+                        {
+                            continue;
+                        }
+                        L3 = L2->Children[LevelThreeKids];
+                        if (L3 != NULL)
+                        {
+                            //Level three kids should be a tripeptide, with a hanger and tags
+                            //printf("My depth is %d\n",L3->Depth);
+                            Hanger = L3->FirstTag;
+                            while (Hanger) // != NULL; Go through all the hangers on a tag
+                            {
+                                Tag = Hanger->Tag;
+                                SNode = Tag->PSpectrum->Node;
+                                fprintf(OutputFile, "%s\t",SNode->InputFile->FileName);
+                                fprintf(OutputFile, "%d\t",SNode->ScanNumber);
+                                fprintf(OutputFile, "%.2f\t", Tag->PrefixMass / (float)MASS_SCALE);
+                                fprintf(OutputFile, "%d\t",Tag->PrefixExtends);
+                                fprintf(OutputFile, "%s\t", Tag->Tag);
+                                fprintf(OutputFile, "%d\t", Tag->DBTagMatches);
+                                fprintf(OutputFile, "%.2f\t", Tag->SuffixMass / (float)MASS_SCALE);
+                                fprintf(OutputFile, "%d\t",Tag->SuffixExtends);
+                                fprintf(OutputFile, "%.2f\t", Tag->Score);
+                                fprintf(OutputFile, "\n");
+                                fflush(OutputFile);
+
+                                Hanger = Hanger->Next;
+                            }// while
+                        }
+                    } //Level three kids
+                }
+            }// level 2 kids
+        }
+    }// level one kids
+    fclose(OutputFile);
+}
+
+char* FindExtension(char* FileName)
+{
+    char* ExtensionString;
+    //
+    ExtensionString = FileName + strlen(FileName);
+    while (ExtensionString > FileName)
+    {
+        ExtensionString--;
+        if (*ExtensionString == '.')
+        {
+            return ExtensionString;
+        }
+    }
+    return FileName + strlen(FileName);
+}
+
+// Set DB->IndexFileName, based upon the FileName and database type.
+void FindDatabaseIndexFile(DatabaseFile* DB)
+{
+    char* Extension;
+    
+    strcpy(DB->IndexFileName, DB->FileName);
+    Extension = FindExtension(DB->IndexFileName);
+    sprintf(Extension, ".index\0");
+    DB->IndexFile = fopen(DB->IndexFileName, "rb");
+    if (DB->IndexFile)
+    {
+      
+        return;
+    }
+    
+    sprintf(Extension, ".ms2index\0");
+    
+   
+    DB->IndexFile = fopen(DB->IndexFileName, "rb");
+    if (DB->IndexFile)
+    {
+        return;
+    }
+    
+    // No index file; that's ok.
+    return;
+}
+
+// Search all our SpectrumNodes, one block at a time.
+// Once the search is complete, compute p-values and output search results.
+void RunSearch(void) 
+{
+    int BlockSize;
+    SpectrumNode* FirstBlockSpectrum;
+    SpectrumNode* LastBlockSpectrum;
+    SpectrumNode* BlockSpectrum;
+    FILE* SpectrumFile;
+    int Result;
+    TrieNode* Root = NULL;
+    int SpectraSearched = 0;
+    int ThisBlockSpectraSearched;
+    DatabaseFile* DB;
+
+        // Find index filenames:
+    for (DB = GlobalOptions->FirstDatabase; DB; DB = DB->Next)
+    {
+        FindDatabaseIndexFile(DB);
+    }
+    // Open database files:
+    for (DB = GlobalOptions->FirstDatabase; DB; DB = DB->Next)
+    {
+        if (!DB->DBFile)
+        {
+            DB->DBFile = fopen(DB->FileName, "rb");
+        }
+        if (!DB->IndexFile)
+        {
+            DB->IndexFile = fopen(DB->IndexFileName, "rb");
+        }
+	//printf("DBFile: %s\n", DB->FileName);
+	//getchar();
+    }
+
+    if (GlobalOptions->RunMode & (RUN_MODE_MUTATION | RUN_MODE_BLIND))
+    {
+        GlobalOptions->TrieBlockSize = 100;  
+        GlobalOptions->StoreMatchCount = 100; 
+        GlobalOptions->ReportMatchCount = 10; // in production report at MOST 20 even in blind mode
+    }
+
+    //printf("About to PopulatePTMListWIthMutations...\n");
+    //if (GlobalOptions->RunMode & (RUN_MODE_TAG_MUTATION))
+    //   PopulatePTMListWithMutations();
+      //
+    
+    //printf("Building decorations...\n");
+    BuildDecorations();
+    //printf("Done building decorations...n");
+
+
+    FirstBlockSpectrum = GlobalOptions->FirstSpectrum;
+    while (FirstBlockSpectrum)
+    {
+        fflush(stdout);
+	
+
+        // Load one block of spectrum objects:
+        BlockSize = 0;
+        LastBlockSpectrum = FirstBlockSpectrum;
+        for (BlockSize = 0; BlockSize < GlobalOptions->TrieBlockSize; BlockSize++)
+        {
+            fflush(stdout);
+    
+            SpectrumFile = fopen(LastBlockSpectrum->InputFile->FileName, "rb");
+            
+	    if (SpectrumFile)
+            {
+                LastBlockSpectrum->Spectrum = (MSSpectrum*)calloc(1, sizeof(MSSpectrum));
+		LastBlockSpectrum->Spectrum->PeakAllocation = 0;
+                LastBlockSpectrum->Spectrum->Node = LastBlockSpectrum;
+                fseek(SpectrumFile, LastBlockSpectrum->FilePosition, 0);
+		
+                Result = SpectrumLoadFromFile(LastBlockSpectrum->Spectrum, SpectrumFile);
+                //printf("Load from '%s' result %d\n", LastBlockSpectrum->InputFile->FileName, Result);
+                fclose(SpectrumFile);
+                if (!Result)
+                {
+                    SafeFree(LastBlockSpectrum->Spectrum);
+                    LastBlockSpectrum->Spectrum = NULL;
+                }
+            }
+            LastBlockSpectrum = LastBlockSpectrum->Next;
+            if (!LastBlockSpectrum)
+            {
+                BlockSize++;
+                break;
+            }
+        }
+        printf("Search block of %d spectra starting with %s:%d\n", BlockSize, FirstBlockSpectrum->InputFile->FileName, FirstBlockSpectrum->ScanNumber);
+        fflush(stdout);
+        ThisBlockSpectraSearched = 0;
+        for (DB = GlobalOptions->FirstDatabase; DB; DB = DB->Next)
+        {
+            ThisBlockSpectraSearched = SearchSpectrumBlockAgainstDB(FirstBlockSpectrum, LastBlockSpectrum, DB);
+        }
+        SpectraSearched += ThisBlockSpectraSearched;
+        printf("Search progress: %d / %d (%.2f%%)\n", SpectraSearched, GlobalOptions->SpectrumCount, 100 * SpectraSearched / (float)max(1, GlobalOptions->SpectrumCount));
+        fflush(stdout);
+
+        // Clean up this block, and move to the next:
+        fflush(stdout);
+        for (BlockSpectrum = FirstBlockSpectrum; BlockSpectrum != LastBlockSpectrum; BlockSpectrum = BlockSpectrum->Next)
+        {
+            if (BlockSpectrum->Spectrum)
+            {
+	      if(!(GlobalOptions->RunMode & RUN_MODE_RAW_OUTPUT))
+		{
+		  
+		  OutputMatchesForSpectrum(BlockSpectrum, GlobalOptions->OutputFile);
+		}
+	      FreeSpectrum(BlockSpectrum->Spectrum);
+	      FreeMatchList(BlockSpectrum);
+	      BlockSpectrum->Spectrum = NULL;
+            }
+        }
+        fflush(stdout);
+
+        FreeTrieNode(Root);
+        Root = NULL;
+	
+        FirstBlockSpectrum = LastBlockSpectrum;
+        fflush(stdout);
+
+    }
+    ///////////////////////////////////////////////////////////
+    // After searching, we compute p-values and output matches:
+    
+    if(fclose(GlobalOptions->OutputFile))
+      {
+	REPORT_ERROR_S(50,GlobalOptions->OutputFileName);
+	return;
+	
+      }
+
+    
+
+    GlobalOptions->OutputFile = NULL;
+
+    /// Compute p-values, write them to final output file:
+    if (GlobalOptions->ExternalTagger && (GlobalOptions->RunMode & RUN_MODE_TAGS_ONLY))
+    {
+        // do nothing
+    }
+    else if(GlobalOptions->RunMode & RUN_MODE_RAW_OUTPUT)
+      {
+	//do nothing
+      }
+    else
+    {
+        CalculatePValues(GlobalOptions->OutputFileName, GlobalOptions->FinalOutputFileName);
+    }
+
+    // Close database files:
+    for (DB = GlobalOptions->FirstDatabase; DB; DB = DB->Next)
+    {
+        if (DB->DBFile)
+        {
+            fclose(DB->DBFile);
+            DB->DBFile = NULL;
+        }
+        if (DB->IndexFile)
+        {
+            fclose(DB->IndexFile);
+            DB->IndexFile = NULL;
+        }
+    }
+}
+
+// Special run mode: Perform parent mass correction on our input spectra.  Output the
+// parent masses and charge states.
+void PerformSpectrumTweakage(void)
+{
+    SpectrumNode* Node;
+    FILE* SpectrumFile;
+    int TweakIndex;
+    int Result;
+    //
+    for (Node = GlobalOptions->FirstSpectrum; Node; Node = Node->Next)
+    {
+        SpectrumFile = fopen(Node->InputFile->FileName, "rb");
+        fseek(SpectrumFile, Node->FilePosition, 0);
+        if (SpectrumFile)
+        {
+            Node->Spectrum = (MSSpectrum*)calloc(1, sizeof(MSSpectrum));
+            Node->Spectrum->Node = Node;
+            Result = SpectrumLoadFromFile(Node->Spectrum, SpectrumFile);
+            fclose(SpectrumFile);
+            if (!Result)
+            {
+                FreeSpectrum(Node->Spectrum);
+                Node->Spectrum = NULL;
+            }
+            else
+            {
+                WindowFilterPeaks(Node->Spectrum, 0, 0);
+                IntensityRankPeaks(Node->Spectrum);
+                PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+                //SpectrumComputeBinnedIntensities(Node);
+                Node->Spectrum->Node = Node;
+                TweakSpectrum(Node);
+                
+                fprintf(GlobalOptions->OutputFile, "%s\t", Node->InputFile->FileName);
+                fprintf(GlobalOptions->OutputFile, "%d\t", Node->ScanNumber);
+                fprintf(GlobalOptions->OutputFile, "%d\t", Node->FilePosition);
+                for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+                {
+                    if (Node->Tweaks[TweakIndex].Charge)
+                    {
+                        fprintf(GlobalOptions->OutputFile, "%.2f\t%d\t", Node->Tweaks[TweakIndex].ParentMass / (float)DALTON, Node->Tweaks[TweakIndex].Charge);
+                    }
+                }
+                fprintf(GlobalOptions->OutputFile, "\n");
+            }
+            FreeSpectrum(Node->Spectrum);
+            Node->Spectrum = NULL;
+        }
+        
+    }
+}
+
+//For phosphorylated spectra, the superprominent M-p peak can 
+//fritz the charge state guessing, and tagging.  So we remove it.
+void AttemptParentMassPeakRemoval(MSSpectrum* Spectrum)
+{
+    int MostIntensePeakIndex = 0; //NEC: Added to get rid of possible use when uninitialized warning
+    int MostIntenseMass = 0; //NEC: Added to get rid of possible use when uninitialized warning
+    int PeakIndex;
+    float MostIntense = 0.0;
+    float NextMostIntense = 0.0;
+    int Diff;
+    int ExpectedDiff;
+    int ExpectedDiff2;
+    int Epsilon = HALF_DALTON;
+	int CalculatedMZ;
+    //
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        if (Spectrum->Peaks[PeakIndex].Intensity > MostIntense)
+        {
+            NextMostIntense = MostIntense;
+            MostIntense = Spectrum->Peaks[PeakIndex].Intensity;
+            MostIntensePeakIndex = PeakIndex;
+            MostIntenseMass = Spectrum->Peaks[PeakIndex].Mass;
+        }
+        else if(Spectrum->Peaks[PeakIndex].Intensity > NextMostIntense)
+        {
+            NextMostIntense = Spectrum->Peaks[PeakIndex].Intensity;
+        }
+    }
+    //printf("Most intense %f, next %f\n",MostIntense, NextMostIntense);
+    //if more than 2 times great, and in the right place, remove peak.
+  //  if (MostIntense < 2 * NextMostIntense)
+  //  {
+		//Spectrum->RemovedPeakIndex = -1;//dummy holder
+  //      return;
+  //  }
+    //printf ("MZ of %d, charge %d\n", Spectrum->MZ, Spectrum->Charge);
+	//Set m/z with the new parentmass and charge that was just assigned in ConstructTags
+    CalculatedMZ = (Spectrum->ParentMass + (Spectrum->Charge - 1) * HYDROGEN_MASS) / Spectrum->Charge;
+    Diff = abs(CalculatedMZ - MostIntenseMass);
+    ExpectedDiff = PHOSPHATE_WATER_MASS / Spectrum->Charge;
+    ExpectedDiff2 = (PHOSPHATE_WATER_MASS + WATER_MASS) / Spectrum->Charge;
+    if (abs (Diff - ExpectedDiff) < Epsilon)
+    { //remove peak
+        Spectrum->RemovedPeakIndex = MostIntensePeakIndex;
+        Spectrum->RemovedPeakIntensity = Spectrum->Peaks[MostIntensePeakIndex].Intensity;
+        Spectrum->Peaks[MostIntensePeakIndex].Intensity = 1.0; //cut to ground
+    }
+    else if (abs(Diff - ExpectedDiff2) < Epsilon)
+    { //remove peak
+        Spectrum->RemovedPeakIndex = MostIntensePeakIndex;
+        Spectrum->RemovedPeakIntensity = Spectrum->Peaks[MostIntensePeakIndex].Intensity;
+        Spectrum->Peaks[MostIntensePeakIndex].Intensity = 1.0; //cut to ground
+    }
+	else
+	{
+		Spectrum->RemovedPeakIndex = -1;//dummy holder
+	}
+}
+
+void RestoreParentMassPeakRemoval(MSSpectrum* Spectrum)
+{
+	if (Spectrum->RemovedPeakIndex == -1)
+	{
+		return;
+	}
+	Spectrum->Peaks[Spectrum->RemovedPeakIndex].Intensity = Spectrum->RemovedPeakIntensity;
+}
diff --git a/Run.h b/Run.h
new file mode 100644
index 0000000..7a090eb
--- /dev/null
+++ b/Run.h
@@ -0,0 +1,41 @@
+//Title:          Run.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef RUN_H
+#define RUN_H
+
+void RunSearch();
+void PerformSpectrumTweakage();
+void PerformTagGeneration();
+
+#endif // RUN_H
+
diff --git a/RunPySVM.py b/RunPySVM.py
new file mode 100644
index 0000000..6bc8741
--- /dev/null
+++ b/RunPySVM.py
@@ -0,0 +1,67 @@
+#Title:          RunPySVM.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Wrapper for PySVM
+"""
+import os
+import sys
+import traceback
+try:
+    import PySVM
+except:
+    print "(Warning: PySVM not imported - SVM training not available)"
+
+def Predict(FeaturePath, ModelPath, OutputPath):
+    PySVM.LoadModel(ModelPath)
+    InputFile = open(FeaturePath, "rb")
+    OutputFile = open(OutputPath, "wb")
+    for FileLine in InputFile.xreadlines():
+        Bits = FileLine.split()
+        FeatureVector = []
+        for Bit in Bits[1:]:
+            ColonPos = Bit.find(":")
+            if ColonPos == -1:
+                continue
+            FeatureIndex = int(Bit[:ColonPos]) - 1
+            while len(FeatureVector) <= FeatureIndex:
+                FeatureVector.append(0)
+            FeatureVector[FeatureIndex] = float(Bit[ColonPos + 1:])
+        Score = PySVM.Score(FeatureVector)
+        OutputFile.write("%s\n"%Score)
+    InputFile.close()
+    OutputFile.close()
+    
+
+if __name__ == "__main__":
+    Predict("TestFeatures.SVMScaled.txt", "SVM.model", "SVMPrediction.pytxt")
diff --git a/SNP.c b/SNP.c
new file mode 100644
index 0000000..d4bfc8f
--- /dev/null
+++ b/SNP.c
@@ -0,0 +1,244 @@
+//Title:          SNP.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include "Trie.h"
+#include "Utils.h"
+#include "Run.h"
+#include "Tagger.h"
+#include "Score.h"
+#include "FreeMod.h"
+#include "Spliced.h"
+#include "BN.h"
+#include "SVM.h"
+#include "Scorpion.h"
+#include "ChargeState.h"
+#include "SNP.h"
+
+// Code to support the inclusion of POLYMORPHISMS, particularly SNPs, in a protein database.
+// The motivation: If a protein has several polymorphic sites, we can include all of them in 
+// a string-table proteomic database only by including multiple isoforms.  Since we're already
+// using a DAG data-structure to capture alternative splicing, let's include SNPs in the DAG,
+// and capture polymorphic variability as well as splicing variability.  
+
+// We use PolyNodes during database construction (not needed during search) to keep track of all
+// polymorphisms.  The polynodes are read from a binary file (currently written out by 
+// ParseSNPDatabase.py while parsing snp.txt from the ucsc genome browser).  They're ordered 
+// by genomic position.
+
+PolyNode* g_FirstPolyNode = NULL;
+PolyNode* g_LastPolyNode = NULL;
+
+Polymorphism* g_Polymorphisms = NULL; // array
+int g_PolymorphismCount;
+
+// Search for the FIRST polymorphism that lies within the given interval.
+// Return its index.  Return -1 if there's no polymorphism in that interval.
+// Simple binary search.
+int FindPolyInInterval(int Start, int End)
+{
+    int Low;
+    int High;
+    int Mid;
+    int Pos;
+    //
+    if (!g_PolymorphismCount)
+    {
+        return -1;
+    }
+    Low = 0;
+    High = g_PolymorphismCount - 1;
+    while (1)
+    {
+        // If we're down to a minimally-sized poly-interval, check it and return:
+        if (Low + 1 >= High)
+        {
+            Pos = g_Polymorphisms[Low].Pos;
+            if (Pos >= Start && Pos < End)
+            {
+                return Low;
+            }
+            Pos = g_Polymorphisms[High].Pos;
+            if (Pos >= Start && Pos < End)
+            {
+                return High;
+            }
+            return -1;
+        }
+
+        Mid = (Low + High) / 2;
+        Pos = g_Polymorphisms[Mid].Pos;
+        if (Pos < Start)
+        {
+            Low = Mid;
+            continue;
+        }
+        if (Pos >= End)
+        {
+            High = Mid;
+            continue;
+        }
+        // We found one!  Make sure we have the FIRST one.
+        for (Low = Mid; Low >= 0; Low--)
+        {
+            if (g_Polymorphisms[Low].Pos < Start)
+            {
+                return (Low + 1);
+            }
+        }
+        return 0;
+    }
+}
+
+// Free the full linked list of poly nodes. 
+void FreePolyNodes()
+{
+    PolyNode* Node;
+    PolyNode* Prev = NULL;
+    //
+    if (!g_FirstPolyNode)
+    {
+        return;
+    }
+    for (Node = g_FirstPolyNode; Node; Node = Node->Next)
+    {
+        SafeFree(Prev);
+        Prev = Node;
+    }
+    SafeFree(Prev);
+    g_FirstPolyNode = NULL;
+    g_LastPolyNode = NULL;
+}
+
+// Parse polymorphism nodes for the current chromosome.
+void ParsePolyNodes(char* FileName)
+{
+    FILE* File;
+    PolyNode* Node;
+    int BytesRead;
+    int GenomicPosition;
+    int RecordNumber;
+    //
+    File = fopen(FileName, "rb");
+    if (!File)
+    {
+        printf("** Error: Unable to open polymorphism database '%s'\n", FileName);
+        return;
+    }
+    RecordNumber = 0;
+    while (1)
+    {
+        BytesRead = ReadBinary(&GenomicPosition, sizeof(int), 1, File);
+        if (!BytesRead)
+        {
+            break;
+        }
+        Node = (PolyNode*)calloc(sizeof(PolyNode), 1);
+        Node->Pos = GenomicPosition;
+        ReadBinary(&Node->Type, sizeof(char), 1, File);
+        switch (Node->Type)
+        {
+        case 0:
+            ReadBinary(Node->SNP, sizeof(char), 2, File);
+            break;
+        case 1:
+            ReadBinary(Node->SNP, sizeof(char), 3, File);
+            break;
+        case 2:
+            ReadBinary(Node->SNP, sizeof(char), 4, File);
+            break;
+        default:
+            printf("** Error: Unable to parse polymorphism node %d type '%d'\n", RecordNumber, Node->Type);
+            break;
+        }
+        if (g_LastPolyNode)
+        {
+            g_LastPolyNode->Next = Node;
+            // Sanity check: These nodes MUST come in order.
+            if (g_LastPolyNode->Pos >= Node->Pos)
+            {
+                printf("** Error parsing polymorphism data: Record %d is out of order!  (Start %d vs %d)\n", RecordNumber, Node->Pos, g_LastPolyNode->Pos);
+            }
+        }
+        else
+        {
+            g_FirstPolyNode = Node;
+        }
+        g_LastPolyNode = Node;
+        RecordNumber++;
+    }
+    fclose(File);
+    ////////////////////////////////////////////////////////////////////////
+    // Now, put all those nodes into an array:
+    g_PolymorphismCount = RecordNumber;
+    g_Polymorphisms = (Polymorphism*)calloc(g_PolymorphismCount, sizeof(Polymorphism));
+    RecordNumber = 0;
+    for (Node = g_FirstPolyNode; Node; Node = Node->Next)
+    {
+        g_Polymorphisms[RecordNumber].Pos = Node->Pos;
+        memcpy(g_Polymorphisms[RecordNumber].SNP, Node->SNP, sizeof(char) * 4);
+        RecordNumber++;
+    }
+    FreePolyNodes();
+}
+
+// For debugging: Print out all the polymorphism nodes.
+void DebugPrintPolyNodes(int FirstRecord, int LastRecord)
+{
+    PolyNode* Node;
+    int RecordNumber;
+    //
+    RecordNumber = 0;
+    for (Node = g_FirstPolyNode; Node; Node = Node->Next)
+    {
+        if (FirstRecord >= 0 && RecordNumber < FirstRecord)
+        {
+            continue;
+        }
+        if (LastRecord >= 0 && RecordNumber > LastRecord)
+        {
+            continue;
+        }
+        printf("SNP record %d: Pos %d can be %c or %c\n", RecordNumber, Node->Pos, Node->SNP[0], Node->SNP[1]);
+        RecordNumber++;
+    }
+}
+
+void SNPTestMain()
+{
+    ParsePolyNodes("SNP\\1.snp");
+    DebugPrintPolyNodes(-1, -1);
+}
diff --git a/SNP.h b/SNP.h
new file mode 100644
index 0000000..15e791a
--- /dev/null
+++ b/SNP.h
@@ -0,0 +1,63 @@
+//Title:          SNP.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include "Trie.h"
+
+typedef struct PolyNode
+{
+    struct PolyNode* Next;
+    int Pos;
+    int Type; // placeholder; for now always 0 for SNP
+    char SNP[4];
+} PolyNode;
+
+// Polymorphism: For now, assume it's always a SNP.
+// Usually SNPC and SNPD are null.
+typedef struct Polymorphism
+{
+    int Pos;
+    char SNP[4];
+} Polymorphism;
+
+extern PolyNode* g_FirstPolyNode;
+extern PolyNode* g_LastPolyNode;
+extern int g_PolymorphismCount;
+extern Polymorphism* g_Polymorphisms;
+
+void ParsePolyNodes(char* FileName);
+void FreePolyNodes();
+int FindPolyInInterval(int Start, int End);
diff --git a/SVM.c b/SVM.c
new file mode 100644
index 0000000..e418b5c
--- /dev/null
+++ b/SVM.c
@@ -0,0 +1,644 @@
+//Title:          SVM.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+// SVM support functions.
+// We employ SVMs to distinguish between (1) true and false peptide classifications,
+// and (2) true and false mutation assignments. 
+// We also can use SVMs for charge state determination (+2 versus +3, currently; +1 is easy and for +4 and beyond we
+// have no data) and parent mass correction.
+#include "CMemLeak.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+//#include <malloc.h>
+#include "SVM.h"
+#include "Utils.h"
+#include "Inspect.h"
+#include "Spectrum.h"
+#include "Trie.h"
+#include "Scorpion.h"
+#include "BN.h"
+#include "Score.h"
+#include "Errors.h"
+#include "IonScoring.h"
+
+// Forward declarations:
+float LDAClassify(float* Features);
+
+// Global variables:
+extern PRMBayesianModel* PRMModelCharge2;
+
+SVMModel* PValueSVM = NULL;
+float g_SVMToPValueMin;
+int g_PValueBinCount;
+float* g_SVMToPValue;
+
+SVMModel* CCModel1SVM = NULL;
+SVMModel* CCModel2SVM = NULL;
+
+SVMModel* MQModel2SVM = NULL;
+SVMModel* MQModel3SVM = NULL;
+
+
+//SVMModel* PValueSVM = NULL;
+extern float g_CutScores[];
+extern float g_BAbsSkew[];
+extern float g_YAbsSkew[];
+extern float g_BSkew[];
+extern float g_YSkew[];
+extern float g_BIntensity[];
+extern float g_YIntensity[];
+
+float GetPValue(float MQScore) 
+{
+    int Bin;
+    //
+    Bin = (int)((MQScore - g_SVMToPValueMin)*10 + 0.5);
+    Bin = max(Bin, 0);
+    Bin = min(Bin, g_PValueBinCount - 1);
+    return g_SVMToPValue[Bin];
+}
+
+// Given a model and an array of feature-values, perform SVM classification.  
+float SVMClassify(SVMModel* Model, float* Coords, int PreScaled)
+{
+    SupportVector* Vector;
+    double Total = 0;
+    double InnerProduct;
+    int CoordIndex;
+    double Diff;
+    double ScaledCoords[64];
+    //
+
+    if (PreScaled)
+    {
+        for (CoordIndex = 0; CoordIndex < Model->Coords; CoordIndex++)
+        {
+            ScaledCoords[CoordIndex] = Coords[CoordIndex];
+        }
+    }
+    else
+    {
+        // Scale coordinates to the range [-1, 1] based upon the extrema in the model:
+        for (CoordIndex = 0; CoordIndex < Model->Coords; CoordIndex++)
+        {
+            ScaledCoords[CoordIndex] = (Coords[CoordIndex] - Model->ScaleMin[CoordIndex]) / Model->ScaleSize[CoordIndex] - 1.0;
+            ScaledCoords[CoordIndex] = min(1, max(-1, ScaledCoords[CoordIndex]));
+        }
+    }
+
+    // Compute the SVM value by taking weighted inner products with the support vectors ('border points')
+    for (Vector = Model->FirstVector; Vector; Vector = Vector->Next)
+    {
+        InnerProduct = 0;
+        for (CoordIndex = 0; CoordIndex < Model->Coords; CoordIndex++)
+        {
+            Diff = (ScaledCoords[CoordIndex] - Vector->Coords[CoordIndex]);
+            InnerProduct += Diff * Diff;
+        }
+        InnerProduct = exp(-Model->Gamma * InnerProduct);
+        Total += Vector->Weight * InnerProduct;
+    }
+    Total -= Model->Rho;
+    return (float)Total;
+}
+
+// Free an SVMModel instance, including its list of vectors.
+void FreeSVMModel(SVMModel* Model)
+{
+    SupportVector* Vector;
+    SupportVector* Prev = NULL;
+    //printf("Free SVM model.\n");
+    if (Model)
+    {
+        for (Vector = Model->FirstVector; Vector; Vector = Vector->Next)
+        {
+            SafeFree(Prev);
+            Prev = Vector;
+        }
+        SafeFree(Prev);
+        SafeFree(Model);
+    }
+}
+
+// Free all loaded SVM models.
+void FreeSVMModels()
+{
+    FreeSVMModel(PValueSVM);
+    FreeSVMModel(MQModel2SVM);
+    FreeSVMModel(MQModel3SVM);
+}
+
+void InitPValueSVM()
+{
+    char FilePath[2048];
+
+    // NEW models:
+    if (!MQModel2SVM)
+    {
+        sprintf(FilePath, "%s%s.model", GlobalOptions->ResourceDir, "MQScoreSVM2");
+        MQModel2SVM = ReadSVMModel(FilePath);
+        sprintf(FilePath, "%s%s.range", GlobalOptions->ResourceDir, "MQScoreSVM2");
+        ReadSVMScaling(MQModel2SVM, FilePath);
+    }
+
+    if (!MQModel3SVM)
+    {
+        sprintf(FilePath, "%s%s.model", GlobalOptions->ResourceDir, "MQScoreSVM3");
+        MQModel3SVM = ReadSVMModel(FilePath);
+        sprintf(FilePath, "%s%s.range", GlobalOptions->ResourceDir, "MQScoreSVM3");
+        ReadSVMScaling(MQModel3SVM, FilePath);
+    }
+}
+
+float SVMComputeMQScore(MSSpectrum* Spectrum, Peptide* Match, float* MQFeatures)
+{
+    SVMModel* Model;
+    float Score;
+
+    if (Spectrum->Charge < 3)
+    {
+        Model = MQModel2SVM;
+    }
+    else
+    {
+        Model = MQModel3SVM;
+    }
+    if (!Model)
+    {
+        return 0.0;
+    }
+    Score = SVMClassify(Model, MQFeatures, 0);
+    Score = GetPenalizedScore(Spectrum, Match, Score);
+    return Score;
+}
+
+int ReadSVMScalingCallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    SVMModel* Model;
+    char* Value;
+    int CoordIndex;
+    float ScaleMin;
+    float ScaleMax;
+    //
+    Model = (SVMModel*)UserData;
+    //CoordIndex = LineNumber - 3;
+    Value = strtok(LineBuffer, " \r\n\t\0");
+    if (!Value)
+    {
+        return 1;
+    }
+    CoordIndex = atoi(Value) - 1;
+    if (CoordIndex < 0)
+    {
+        return 1;
+    }
+    Value = strtok(NULL, " \r\n\t\0");
+    if (!Value)
+    {
+        return 1;
+    }
+    ScaleMin = (float)atof(Value);
+    Value = strtok(NULL, " \r\n\t\0");
+    if (!Value)
+    {
+        return 1;
+    }
+    ScaleMax = (float)atof(Value);
+    if (ScaleMax <= ScaleMin)
+    {
+        REPORT_ERROR(0);
+    }
+    Model->ScaleMin[CoordIndex] = ScaleMin;
+    Model->ScaleMax[CoordIndex] = ScaleMax;
+    Model->ScaleSize[CoordIndex] = (Model->ScaleMax[CoordIndex] - Model->ScaleMin[CoordIndex]) / 2.0;
+    return 1;
+}
+
+// Read feature extrema (for scaling) for an SVM model.
+void ReadSVMScaling(SVMModel* Model, char* ScaleFileName)
+{
+    FILE* File;
+    //
+    File = fopen(ScaleFileName, "r");
+    if (!File)
+    {
+        REPORT_ERROR_S(8, ScaleFileName);
+        return;
+    }
+    ParseFileByLines(File, ReadSVMScalingCallback, Model, 0);
+    fclose(File);
+}
+
+typedef struct SVMParseInfo
+{
+    SVMModel* Model;
+    int InVectors;
+} SVMParseInfo;
+
+
+int ReadSVMModelCallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    SVMModel* Model;
+    SVMParseInfo* Info;
+    SupportVector* Vector;
+    char* Command;
+    char* Value;
+    int CoordIndex;
+    char* CoordIndexStr;
+
+    //
+    Info = (SVMParseInfo*)UserData;
+    Model = Info->Model;
+    // Process either a support vector line, or a header command.
+    if (Info->InVectors)
+    {
+        Vector = (SupportVector*)calloc(sizeof(SupportVector), 1);
+        // Weight, then a list of values.
+        Value = strtok(LineBuffer, " \r\n\t");
+        if (!Value)
+        {
+            printf("* Critical error: strtok failed in ReadSVMModel\n");
+            return 0;
+        }
+        Vector->Weight = atof(Value);
+        CoordIndex = 0;
+        while (1)
+        {
+            Value = strtok(NULL, " \r\n\t");
+            if (!Value)
+            {
+                break;
+            }
+            CoordIndexStr = Value;
+            while (*Value != ':' && *Value)
+            {
+                Value++;
+            }
+            *Value = '\0';
+            CoordIndex = atoi(CoordIndexStr) - 1;
+            Value++;
+            if (CoordIndex >= SUPPORT_VECTOR_LENGTH)
+            {
+                printf("* Error: SVM vector too long!\n");
+                break;
+            }
+            Vector->Coords[CoordIndex] = atof(Value);
+            CoordIndex++;
+        }
+        Model->Coords = CoordIndex;
+        if (Model->LastVector)
+        {
+            Model->LastVector->Next = Vector;
+            Model->LastVector = Vector;
+        }
+        else
+        {
+            Model->FirstVector = Vector;
+            Model->LastVector = Vector;
+        }
+    }
+    else
+    {
+        // A header line.  We pay attention to parameters "gamma" and "rho".  The line "sv" marks the end of 
+        // the header, and the start of the support vectors.
+        Command = strtok(LineBuffer, " ");
+        Value = strtok(NULL, " ");
+        // First, handle commands that take no arguments:
+        if (!CompareStrings(Command, "sv"))
+        {
+            Info->InVectors = 1;
+        }
+        else
+        {
+            // The remaining commands have a mandatory argument:
+            if (!Value)
+            {
+                printf("* Invalid command line in ReadSVMModel\n");
+                return 0;
+            }
+            if (!CompareStrings(Command, "gamma"))
+            {
+                Model->Gamma = atof(Value);
+            }
+            if (!CompareStrings(Command, "rho"))
+            {
+                Model->Rho = atof(Value);
+            }
+        }
+    }
+    return 1;
+}
+
+// Read an SVM model from a .model file.
+SVMModel* ReadSVMModel(char* ModelFileName)
+{
+    SVMModel* Model;
+    FILE* File;
+    SVMParseInfo Info;
+    //
+    //printf("Reading SVM model.\n");
+    Model = (SVMModel*)calloc(sizeof(SVMModel), 1);
+    File = fopen(ModelFileName, "r");
+    if (!File)
+    {
+        REPORT_ERROR_S(8, ModelFileName);
+        return NULL;
+    }
+    Info.Model = Model;
+    Info.InVectors = 0;
+    ParseFileByLines(File, ReadSVMModelCallback, &Info, 0);   
+    return Model;
+}
+
+void TestPValue(char* FeatureVectorFileName)
+{
+    FILE* FeatureVectorFile;
+    int* HistogramFalse;
+    int* HistogramTrue;
+    float Coords[32];
+    char* ValueString;
+    int TrueFlag;
+    int FeatureIndex;
+    float Result;
+    int HistogramBin;
+    FILE* OutputFile;
+    int FalseCount = 0;
+    int TrueCount = 0;
+    int TrueCumulative = 0;
+    int FalseCumulative = 0;
+    int BufferEnd = 0;
+    int BufferPos = 0;
+    int BytesRead;
+    int BytesToRead;
+    char* Buffer;
+    char* LineBuffer;
+    int LineNumber = 0;
+    char* FieldString;
+    char TextBuffer[BUFFER_SIZE * 2];
+    //char* ValueString;
+    //
+    FeatureVectorFile = fopen(FeatureVectorFileName, "r");
+    Buffer = (char*)malloc(sizeof(char) * 10240);
+    LineBuffer = (char*)malloc(sizeof(char)*MAX_LINE_LENGTH);
+
+    HistogramFalse = (int*)calloc(sizeof(int), 1000);
+    HistogramTrue = (int*)calloc(sizeof(int), 1000);
+    OutputFile = fopen("PValueTest.txt", "w");
+    InitPValueSVM();
+    while (1)
+    {
+        BytesToRead = BUFFER_SIZE - BufferEnd;
+        BytesRead = ReadBinary(TextBuffer + BufferEnd, sizeof(char), BytesToRead, FeatureVectorFile);
+        BufferEnd += BytesRead;
+        TextBuffer[BufferEnd] = '\0';
+        if (BufferPos == BufferEnd)
+        { 
+            // We're done!
+            break;
+        }
+
+        // Copy a line of text to the line buffer.  Skip spaces, and stop at carriage return or newline.
+        BufferPos = CopyBufferLine(TextBuffer, BufferPos, BufferEnd, LineBuffer, 0);
+        LineNumber += 1;
+
+        // Now, move the remaining text to the start of the buffer:
+        memmove(TextBuffer, TextBuffer + BufferPos, BufferEnd - BufferPos);
+        BufferEnd -= BufferPos;
+        BufferPos = 0;
+
+        // Now, process this line of text!
+        // Skip empty lines:
+        if (!LineBuffer[0])
+        {
+            continue;
+        }
+        if (LineBuffer[0] == '#')
+        {
+            continue;
+        }
+        // Ok, it's a feature line.  Split into pieces...
+        memset(Coords, 0, sizeof(float)*32);
+        ValueString = strtok(LineBuffer, WHITESPACE);
+        TrueFlag = atoi(ValueString);
+        fprintf(OutputFile, "%d\t", TrueFlag);
+        if (TrueFlag < 0)
+        {
+            TrueFlag = 0;
+        }
+        FeatureIndex = 0;
+        while (1)
+        {
+            FieldString = strtok(NULL, WHITESPACE);
+            ValueString = FieldString;
+            if (!ValueString)
+            {
+                break;
+            }
+            while (*ValueString!=':')
+            {
+                ValueString++;
+            }
+            *ValueString = '\0';
+            FeatureIndex = atoi(FieldString) - 1;
+            ValueString++;
+            Coords[FeatureIndex++] = (float)atof(ValueString);
+            fprintf(OutputFile, "%s\t", ValueString);
+        }
+        Result = SVMClassify(PValueSVM, Coords, 1);
+        fprintf(OutputFile, "%.4f\n", Result);
+        HistogramBin = (int)(Result*10 + 0.5) + 300;
+        HistogramBin = max(0, min(999, HistogramBin));
+        if (TrueFlag)
+        {
+            HistogramTrue[HistogramBin]++;
+            TrueCount++;
+        }
+        else
+        {
+            HistogramFalse[HistogramBin]++;
+            FalseCount++;
+        }
+    }
+    FalseCount = max(FalseCount, 1); // avoid dividing by zero
+    TrueCount = max(TrueCount, 1); // avoid dividing by zero
+    
+    for (HistogramBin = 0; HistogramBin < 1000; HistogramBin++)
+    {
+        TrueCumulative += HistogramTrue[HistogramBin];
+        FalseCumulative += HistogramFalse[HistogramBin];
+        fprintf(OutputFile, "%d\t%.2f\t%.2f\t%.2f\t\n",
+            HistogramBin, (HistogramBin - 300) / 10.0,
+            100*TrueCumulative/(float)TrueCount,
+            100*FalseCumulative/(float)FalseCount);
+    }
+}
+
+float LDAClassify(float* Features)
+{
+    float ScaledFeatures[6];
+    double FeatureMin[] = {-1.88, 0, 0, 0};
+    double FeatureMax[] = {3.81, 1, 1, 2};
+    float HalfRange;
+    int X;
+    int Y;
+    int FeatureCount;
+    static double* CovInv[6];
+    double* MeanVectorTrue;
+    double* MeanVectorFalse;
+    double SubProdTrue;
+    double SubProdFalse;
+    float ProdTemp[6];
+    float ProdTrue;
+    float ProdFalse;
+
+    // Constants for TRYPTIC scoring:
+    double TCovInvA[] = {6.037,-8.996,-7.351,-0.283};
+    double TCovInvB[] = {-8.996,51.379,-3.536,2.428};
+    double TCovInvC[] = {-7.351,-3.536,28.577,-0.271};
+    double TCovInvD[] = {-0.283,2.428,-0.271,2.382};
+    double TMeanVectorTrue[] = {2.048,0.022,0.187,0.622};
+    double TMeanVectorFalse[] = {-0.352,-0.668,-0.629,0.102};
+    double TSubProdTrue = (float)-10.052;
+    double TSubProdFalse = (float)-12.146;
+
+    // Constants for NON-TRYPTIC scoring:
+    double NTCovInvA[] = {6.003,-8.708,-7.383};
+    double NTCovInvB[] = {-8.708,48.904,-3.259};
+    double NTCovInvC[] = {-7.383,-3.259,28.546};
+    double NTMeanVectorTrue[] = {2.048,0.022,0.187};
+    double NTMeanVectorFalse[] = {-0.352,-0.668,-0.629};
+    double NTSubProdTrue = (float)-9.880;
+    double NTSubProdFalse = (float)-11.888;
+
+    // Choose the feature-set by digest type.
+    if (GlobalOptions->DigestType == DIGEST_TYPE_TRYPSIN)
+    {
+        MeanVectorTrue = TMeanVectorTrue;
+        MeanVectorFalse = TMeanVectorFalse;
+        SubProdTrue = TSubProdTrue;
+        SubProdFalse = TSubProdFalse;
+        CovInv[0] = TCovInvA;
+        CovInv[1] = TCovInvB;
+        CovInv[2] = TCovInvC;
+        CovInv[3] = TCovInvD;
+        FeatureCount = 4;
+    }
+    else
+    {
+        MeanVectorTrue = NTMeanVectorTrue;
+        MeanVectorFalse = NTMeanVectorFalse;
+        SubProdTrue = NTSubProdTrue;
+        SubProdFalse = NTSubProdFalse;
+        CovInv[0] = NTCovInvA;
+        CovInv[1] = NTCovInvB;
+        CovInv[2] = NTCovInvC;
+        FeatureCount = 3;
+    }
+    // Scale the features into [-1, 1]:
+    for (X = 0; X < FeatureCount; X++)
+    {
+        HalfRange = (float)((FeatureMax[X] - FeatureMin[X]) / 2.0);
+        ScaledFeatures[X] = (float)((Features[X] - FeatureMin[X]) / HalfRange - 1.0);
+    }
+    // Compute the product of the inverse covariance matrix with our feature vector:
+    for (X = 0; X < FeatureCount; X++)
+    {
+        ProdTemp[X] = 0;
+        for (Y = 0; Y < FeatureCount; Y++)
+        {
+            ProdTemp[X] += (float)(ScaledFeatures[Y] * CovInv[X][Y]);
+        }
+    }
+    // Compute u0 * C-1 * X and u1 * C-1 * X
+    ProdTrue = 0;
+    ProdFalse = 0;
+    for (X = 0; X < FeatureCount; X++)
+    {
+        ProdFalse += (float)(MeanVectorFalse[X] * ProdTemp[X]);
+        ProdTrue += (float)(MeanVectorTrue[X] * ProdTemp[X]);
+    }
+    ProdTrue += (float)SubProdTrue;
+    ProdFalse += (float)SubProdFalse;
+    //printf("%.2f\t%.2f\t%.2f\t\n", (ProdTrue - ProdFalse), ProdTrue, ProdFalse);
+    return (ProdTrue - ProdFalse);
+}
+
+void LoadCCModelSVM(int ForceRefresh)
+{
+    char FilePath[2048];
+    if (CCModel1SVM)
+    {
+        if (ForceRefresh)
+        {
+            FreeSVMModel(CCModel1SVM);
+            FreeSVMModel(CCModel2SVM);
+        }
+        else
+        {
+            return;
+        }
+    }
+
+	if(GlobalOptions->PhosphorylationFlag)
+	{ //separate model for charge 2 only.  not enuf training data for charge 1
+		sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "CCSVM1.model");
+		CCModel1SVM = ReadSVMModel(FilePath);
+		sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "CCSVM1.range");
+		ReadSVMScaling(CCModel1SVM, FilePath);
+		sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "CCSVM2Phos.model");
+		CCModel2SVM = ReadSVMModel(FilePath);
+		sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "CCSVM2Phos.range");
+		ReadSVMScaling(CCModel2SVM, FilePath);
+	}
+	else
+	{
+		sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "CCSVM1.model");
+		CCModel1SVM = ReadSVMModel(FilePath);
+		sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "CCSVM1.range");
+		ReadSVMScaling(CCModel1SVM, FilePath);
+		sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "CCSVM2.model");
+		CCModel2SVM = ReadSVMModel(FilePath);
+		sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "CCSVM2.range");
+		ReadSVMScaling(CCModel2SVM, FilePath);
+	}
+}
+
+void FreeCCModelSVM()
+{
+    FreeSVMModel(CCModel1SVM);
+    CCModel1SVM = NULL;
+    FreeSVMModel(CCModel2SVM);
+    CCModel2SVM = NULL;
+
+}
diff --git a/SVM.h b/SVM.h
new file mode 100644
index 0000000..3e32429
--- /dev/null
+++ b/SVM.h
@@ -0,0 +1,81 @@
+//Title:          SVM.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef SVM_H
+#define SVM_H
+// Structs to support use of SVMs:
+#include "Utils.h"
+#include "Inspect.h"
+#include "Spectrum.h"
+#include "Trie.h"
+
+// Support vectors are of this length (or shorter)
+#define SUPPORT_VECTOR_LENGTH 32
+
+typedef struct SupportVector
+{
+    //int Classification; // +1 or -1
+    double Weight;
+    double Coords[SUPPORT_VECTOR_LENGTH];
+    struct SupportVector* Next;
+} SupportVector;
+
+typedef struct SVMModel
+{
+    SupportVector* FirstVector;
+    SupportVector* LastVector;
+    int Coords;
+    double ScaleMin[SUPPORT_VECTOR_LENGTH];
+    double ScaleMax[SUPPORT_VECTOR_LENGTH];
+    double ScaleSize[SUPPORT_VECTOR_LENGTH];
+    double Beta[SUPPORT_VECTOR_LENGTH]; // for computing classifier values
+    double Beta0;
+    double Rho;
+    double Gamma; // for RBF kernel
+} SVMModel;
+
+extern SVMModel* PValueSVMModel;
+
+float SVMComputeMQScore(MSSpectrum* Spectrum, Peptide* Match, float* MQFeatures);
+float SVMClassify(SVMModel* Model, float* Coords, int PreScaled);
+void FreeSVMModels();
+SVMModel* ReadSVMModel(char* FileName);
+void ReadSVMScaling(SVMModel* Model, char* ScaleFileName);
+float GetPValue(float MQScore);
+float LDAClassify(float* Features);
+void TestPValue(char* FeatureVectorFileName);
+void LoadCCModelSVM(int ForceRefresh);
+void FreeCCModelSVM();
+void InitPValueSVM();
+
+#endif // SVM_H
+
diff --git a/Score.c b/Score.c
new file mode 100644
index 0000000..baba739
--- /dev/null
+++ b/Score.c
@@ -0,0 +1,862 @@
+//Title:          Score.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include <string.h>
+#include <math.h> 
+#include <stdlib.h>
+#include "Mods.h"
+#include "Score.h"
+#include "Spectrum.h"
+#include "Inspect.h"
+#include "Tagger.h"
+#include "SVM.h"
+#include "IonScoring.h"
+#include "ParentMass.h"
+
+////////////////////////////////////////////////////////////////////////////////////////
+// Score.c and Score.h support an 'alignment-based' scoring method which has been 
+// discarded in favor of SVM-based scoring.  The two methods have similar performance, but
+// the SVM was slightly better and slightly faster.  This code is kept around for reference,
+// but isn't executed in practice.
+
+// The max length permitted for any peptide match:
+#define MAX_PEPTIDE_LENGTH 256
+
+// Set SHOW_DP_TABLE to enable verbose printout of the d.p. table from scoring.
+//#define SHOW_DP_TABLE 1
+
+// TheoPeak has a mass, an ion type, and a score.  It's a peak
+// in the *theoretical fragmentation spectrum*.  Some peaks (e.g.
+// an a-ion peak late in the peptide) have bad scores; others get very 
+// good scores.  The scores are log-odds scores.  Scores are based on the
+// scores in the ScoringModel.
+typedef struct TheoPeak
+{
+    int Mass;
+    int IonType; 
+    int LossType;
+    int Score;
+    int AntiScore;
+    // CutIndex is the number of amino acids in this fragment.
+    // For instance, in EAMAPK, b1 and y5 are the same cut point.
+    // CutIndex is always at least 1, since a fragment has at least one amino acid.
+    int CutIndex; 
+    // TrueCutIndex is the index of the cut.  (So, b and y fragments with same TrueCutIndex represent
+    // breakage of the same peptide bond)
+    int TrueCutIndex; 
+    // AssignedPeak is built during spectral scoring, when backtracking along the DP table
+    SpectralPeak* AssignedPeak;
+} TheoPeak;
+
+// Our scoring model uses several type of cuts, and computes probabilities for each.
+// Here's a diagram of the cut points for peptide "SPECTRUM":
+// LeftEdge   L1   L2   Mid   Mid   Mid   R2   R1   RightEdge 
+//          S    P    E     C     T     R    U    M
+typedef enum CutPointType
+{
+    CutPointTypeL1 = 0,
+    CutPointTypeL2,
+    CutPointTypeR1,
+    CutPointTypeR2,
+    CutPointTypeMid,
+    CutPointTypeLeftEdge,
+    CutPointTypeRightEdge,
+    CutPointTypeCount
+} CutPointType;
+
+// Skew odds are modeled by bins (0 to 0.05 Da, 0.05 to 0.1, up to a bin for 0.5+).
+#define SKEW_BIN_COUNT 10
+
+#define SECTOR_COUNT 3
+
+// Scoring model, built by DPTrainer.py
+typedef struct ScoringModel
+{
+    int BScore[CutPointTypeCount];
+    int YScore[CutPointTypeCount];
+    int BH2OBoostScore[CutPointTypeCount];
+    int BH2OScore[CutPointTypeCount];
+    int BNH3BoostScore[CutPointTypeCount];
+    int BNH3Score[CutPointTypeCount];
+    int ABoostScore[CutPointTypeCount];
+    int AScore[CutPointTypeCount];
+    int YH2OBoostScore[CutPointTypeCount];
+    int YH2OScore[CutPointTypeCount];
+    int YNH3Score[CutPointTypeCount];
+    int B2BoostScore[CutPointTypeCount];
+    int B2Score[CutPointTypeCount];
+    int Y2Score[CutPointTypeCount];
+    int AH2OScore[CutPointTypeCount];
+    int ANH3Score[CutPointTypeCount];
+    int NoisePenalty[21];
+    int PresenceScore[SECTOR_COUNT];
+    int AbsenceScore[SECTOR_COUNT];
+    int InexplicableScore[SECTOR_COUNT];
+    int SkewScore[SKEW_BIN_COUNT];
+} ScoringModel;
+
+// We have an array of models - one for each charge state (1, 2, 3+)
+ScoringModel* Models;
+
+// P-values are assigned based on a histogram of Match Quality Scores for false matches.
+// Histogram bin-count (and edges) are hard-coded.
+#define PVALUE_BIN_COUNT 300
+#define PVALUE_BIN_BOOST 100
+float g_MatchPValueShort[PVALUE_BIN_COUNT];
+float g_MatchPValueMedium[PVALUE_BIN_COUNT];
+float g_MatchPValueLong[PVALUE_BIN_COUNT];
+float g_MatchPValueLongLong[PVALUE_BIN_COUNT];
+
+int InitPValue(char* FileName)
+{
+    FILE* File;
+    //
+    File = fopen(FileName, "rb");
+    if (!File)
+    {
+        return 0;
+    }
+    ReadBinary(g_MatchPValueShort, sizeof(float), PVALUE_BIN_COUNT, File);
+    ReadBinary(g_MatchPValueMedium, sizeof(float), PVALUE_BIN_COUNT, File);
+    ReadBinary(g_MatchPValueLong, sizeof(float), PVALUE_BIN_COUNT, File);
+    ReadBinary(g_MatchPValueLongLong, sizeof(float), PVALUE_BIN_COUNT, File);
+    fclose(File);
+    return 1;
+}
+
+
+// For debug output: Return a description of an ion type code.
+char* GetIonTypeName(int IonType)
+{
+    switch (IonType)
+    {
+    case evIonTypeNone:
+        return "None";
+    case evIonTypeB:
+        return "B";
+    case evIonTypeY:
+        return "Y";
+    case evIonTypeA:
+        return "A";
+    case evIonTypeBH2O:
+        return "B-H2O";
+    case evIonTypeAH2O:
+        return "A-H2O";
+    case evIonTypeBNH3:
+        return "B-NH3";
+    case evIonTypeANH3:
+        return "A-NH3";
+    case evIonTypeYH2O:
+        return "Y-H2O";
+    case evIonTypeYNH3:
+        return "Y-NH3";
+    case evIonTypeB2:
+        return "B2";
+    case evIonTypeY2:
+        return "Y2";
+    case evIonTypeNoise:
+        return "<noise>";
+    case evIonTypeBPhos:
+        return "b-p";
+    case evIonTypeYPhos:
+        return "y-p";
+    default:
+        return "BROKEN****";
+    }
+}
+
+// For debug output: Return a description of an ion type code.
+char* GetShortIonTypeName(int IonType)
+{
+    switch (IonType)
+    {
+    case evIonTypeNone:
+        return "-";
+    case evIonTypeB:
+        return "b";
+    case evIonTypeY:
+        return "y";
+    case evIonTypeA:
+        return "a";
+    case evIonTypeBH2O:
+        return "b-H2O";
+    case evIonTypeAH2O:
+        return "a-H2O";
+    case evIonTypeBNH3:
+        return "b-NH3";
+    case evIonTypeANH3:
+        return "a-NH3";
+    case evIonTypeYH2O:
+        return "y-H2O";
+    case evIonTypeYNH3:
+        return "y-NH3";
+    case evIonTypeB2:
+        return "b2";
+    case evIonTypeY2:
+        return "y2";
+    case evIonTypeNoise:
+        return "<noise>";
+    case evIonTypeBPhos:
+        return "b-p";
+    case evIonTypeYPhos:
+        return "y-p";
+    default:
+        return "*";
+    }
+}
+
+// For debugging: Print a theoretical fragmentation spectrum.  
+void DebugPrintPeaks(TheoPeak* Peaks, int PeakCount)
+{
+    int PeakIndex;
+    //
+    printf("\n-----Peak list:-----\n");
+    for (PeakIndex = 0; PeakIndex < PeakCount; PeakIndex++)
+    {
+        printf("%d: m/z %.2f %s %d %d\n", PeakIndex, Peaks[PeakIndex].Mass / (float)MASS_SCALE, GetIonTypeName(Peaks[PeakIndex].IonType),
+            Peaks[PeakIndex].CutIndex, Peaks[PeakIndex].Score);
+    }
+}
+
+// Theoretical spectrum builder - simple struct for remembering sector edges
+// and the current proline bonus.
+typedef struct TheoPeakBuilder
+{
+    int SectorEdgeA;
+    int SectorEdgeB;
+    int BProlineBonus;
+    int YProlineBonus;
+    int MatchLength;
+} TheoPeakBuilder;
+
+// Add a new theoretical peak to Peaks.
+void AddTheoPeak(TheoPeakBuilder* Theo, TheoPeak* Peaks, int PeakCount, int IonType, int LossType, int Mass,
+                 int CutIndex, int PrefixFlag, ScoringModel* Model, int Score)
+{
+    int SectorNumber;
+    //
+    Peaks[PeakCount].IonType = IonType;
+    Peaks[PeakCount].LossType = LossType;
+    Peaks[PeakCount].Mass = Mass;
+    Peaks[PeakCount].CutIndex = CutIndex;
+    Peaks[PeakCount].Score = Score;
+    if (PrefixFlag)
+    {
+        Peaks[PeakCount].TrueCutIndex = CutIndex;
+        Peaks[PeakCount].Score = min(0, Peaks[PeakCount].Score + Theo->BProlineBonus);
+    }
+    else
+    {
+        Peaks[PeakCount].TrueCutIndex = Theo->MatchLength - CutIndex;
+        Peaks[PeakCount].Score = min(0, Peaks[PeakCount].Score + Theo->YProlineBonus);
+    }
+    if (Mass > Theo->SectorEdgeB)
+    {
+        SectorNumber = 2;
+    }
+    else if (Mass > Theo->SectorEdgeA)
+    {
+        SectorNumber = 1;
+    }
+    else
+    {
+        SectorNumber = 0;
+    }
+    // Compare against the null model right here:
+    Peaks[PeakCount].Score -= Model->PresenceScore[SectorNumber];
+}
+
+// Give a bonus for peptides that seem plausible given the cleavage type.
+// For instance: If our sample was subjected to trypsin digest, then *most* fragments will
+// end in K or R (and be preceded by a K or R), so give a bonus to such peptides.
+// The DEFAULT behavior is to assume no digest and give no points.
+// This code is NO LONGER USED in production; instead, number of tryptic termini (NTT) is used
+// as one feature for LDA.
+int ApplyDigestBonus(Peptide* Match)
+{
+    int Score = 0;
+    int AminoCount;
+    int AminoIndex;
+    // SWT 3/9/5: Use a somewhat HEAVIER penalty for broken endpoints.  And, penalize two bad endpoints
+    // superadditively
+    int MissedCleavagePenalty = 100;
+    int BrokenSuffixPenalty = 550;
+    int BrokenPrefixPenalty = 550;
+    int BrokenBothPenalty = 400; // extra penalty if both endpoints broken
+    char MutantBases[256];
+    int BrokenTermini = 0;
+    int ModIndex;
+    // Write MutantBases string.  This contains the *real* amino acids of the match, with any mutations
+    // applied.  For instance, if Match->Bases is "EAMAPK" but match has an M->Q mutation in position 2,
+    // then MutantBases will be "EAQAPK".  
+    strcpy(MutantBases, Match->Bases);
+    if (!GlobalOptions->TaglessSearchFlag)
+    {
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (!Match->ModType[ModIndex])
+            {
+                break;
+            }
+            if (Match->ModType[ModIndex]->Amino)
+            {
+                MutantBases[Match->AminoIndex[ModIndex]] = Match->ModType[ModIndex]->Amino;
+            }
+        }
+    }
+    switch (GlobalOptions->DigestType)
+    {
+    case 0:
+        // No digest (or unknown digest), so no points
+        return 0;
+    case 1:
+        // A tryptic peptide gets a minor bonus, and 
+        // missed cleavages get a minor penalty.
+        AminoCount = strlen(MutantBases);
+        for (AminoIndex = 1; AminoIndex < AminoCount - 1; AminoIndex++)
+        {
+            if ((MutantBases[AminoIndex] == 'K' || MutantBases[AminoIndex] == 'R') && (MutantBases[AminoIndex + 1]!='P'))
+                Score -= MissedCleavagePenalty;
+        }
+        if (MutantBases[AminoCount - 1] != 'K' && MutantBases[AminoCount - 1] != 'R')
+        {
+            Score -= BrokenSuffixPenalty;
+            BrokenTermini++;
+        }
+        if (Match->PrefixAmino && (Match->PrefixAmino!='K' && Match->PrefixAmino!='R'))
+        {
+            Score -= BrokenPrefixPenalty;
+            BrokenTermini++;
+        }
+        if (BrokenTermini==2)
+        {
+            Score -= BrokenBothPenalty;
+        }
+        return Score;
+    case 2:
+        // Chymotrypsin: Cleaves C-terminal side of FYWL (if not followed by P)
+        AminoCount = strlen(MutantBases);
+        for (AminoIndex = 1; AminoIndex < AminoCount - 1; AminoIndex++)
+        {
+            if ((MutantBases[AminoIndex] == 'F' || MutantBases[AminoIndex] == 'Y' ||
+                MutantBases[AminoIndex] == 'W' || MutantBases[AminoIndex] == 'L') && (MutantBases[AminoIndex + 1]!='P'))
+                Score -= MissedCleavagePenalty;
+        }
+        if (MutantBases[AminoCount - 1] != 'F' && MutantBases[AminoCount - 1] != 'Y' &&
+            MutantBases[AminoCount - 1] != 'W' && MutantBases[AminoCount - 1] != 'L')
+        {
+            BrokenTermini++;
+            Score -= BrokenSuffixPenalty;
+        }
+        if (Match->PrefixAmino && (Match->PrefixAmino != 'F' && Match->PrefixAmino != 'Y' &&
+            Match->PrefixAmino != 'W' && Match->PrefixAmino != 'L'))
+        {
+            BrokenTermini++;
+            Score -= BrokenPrefixPenalty;
+        }
+        if (BrokenTermini==2)
+        {
+            Score -= BrokenBothPenalty;
+        }
+
+        return Score;
+    case 3:
+        // Lys-C - similar to trypsin.  Cleaves after K if not before P.
+        // missed cleavages get a minor penalty.
+        AminoCount = strlen(MutantBases);
+        for (AminoIndex = 1; AminoIndex < AminoCount - 1; AminoIndex++)
+        {
+            if ((MutantBases[AminoIndex] == 'K') && (MutantBases[AminoIndex+1]!='P'))
+                Score -= MissedCleavagePenalty;
+        }
+        if (MutantBases[AminoCount - 1] != 'K')
+        {
+            Score -= BrokenSuffixPenalty;
+            BrokenTermini++;
+        }
+        if (Match->PrefixAmino && (Match->PrefixAmino!='K'))
+        {
+            Score -= BrokenPrefixPenalty;
+            BrokenTermini++;
+        }
+        if (BrokenTermini==2)
+        {
+            Score -= BrokenBothPenalty;
+        }
+
+        return Score;
+    case 4:
+        // Asp-N - Cleaves before (on N-terminal side of) DE
+        AminoCount = strlen(MutantBases);
+        // Penalty for missed cleavages:
+        for (AminoIndex = 1; AminoIndex < AminoCount - 1; AminoIndex++)
+        {
+            if ((MutantBases[AminoIndex] == 'D') || (MutantBases[AminoIndex]=='E'))
+                Score -= MissedCleavagePenalty;
+        }
+        if (Match->SuffixAmino && (Match->SuffixAmino!='K' && Match->SuffixAmino!='E'))
+        {
+            Score -= BrokenSuffixPenalty;
+            BrokenTermini++;
+        }
+        if (MutantBases[0]!='D' && MutantBases[0]!='E')
+        {
+            Score -= BrokenPrefixPenalty;
+            BrokenTermini++;
+        }
+        if (BrokenTermini==2)
+        {
+            Score -= BrokenBothPenalty;
+        }
+
+        return Score;
+    case 5:
+        // GluC cleaves c-terminal of E
+        AminoCount = strlen(MutantBases);
+        for (AminoIndex = 1; AminoIndex < AminoCount - 1; AminoIndex++)
+        {
+            if (MutantBases[AminoIndex] == 'E')
+            {
+                Score -= MissedCleavagePenalty;
+            }
+        }
+        if (MutantBases[AminoCount - 1] != 'E')
+        {
+            Score -= BrokenSuffixPenalty;
+            BrokenTermini++;
+        }
+        if (Match->PrefixAmino && (Match->PrefixAmino!='E'))
+        {
+            Score -= BrokenPrefixPenalty;
+            BrokenTermini++;
+        }
+        if (BrokenTermini==2)
+        {
+            Score -= BrokenBothPenalty;
+        }
+        return Score;
+    default:
+        printf("Unknown digest type '%d' encountered, no scoring adjustment applied.\n", GlobalOptions->DigestType);
+        return 0;
+    }
+}
+
+int DiffPeptides(char* AA1, char* AA2)
+{
+    int DiffCount = 0;
+    while (*AA1 && *AA2)
+    {
+        if (*AA1 != *AA2)
+        {
+            DiffCount++;
+        }
+        AA1++;
+        AA2++;
+    }
+    return DiffCount;
+}
+
+void SetMatchDeltaCN(SpectrumNode* Spectrum)
+{
+    Peptide* Match;
+    Peptide* OtherMatch;
+    int MatchNumber = 0;
+    int IsSame;
+
+    // Init DeltaCN and DeltaCNOther:
+    for (Match = Spectrum->FirstMatch; Match; Match = Match->Next)
+    {
+        Match->DeltaCN = (float)FORBIDDEN_PATH;
+        Match->DeltaCNOther = (float)FORBIDDEN_PATH;
+    }
+
+    // Properly set DeltaCN and DeltaCNOther:
+    for (Match = Spectrum->FirstMatch; Match; Match = Match->Next)
+    {
+        MatchNumber++;
+        if (Match != Spectrum->FirstMatch)
+        {
+            Match->DeltaCN = Match->MatchQualityScore - Spectrum->FirstMatch->MatchQualityScore;
+        }
+        else
+        {
+            if (Match->Next)
+            {
+                Match->DeltaCN = Match->MatchQualityScore - Match->Next->MatchQualityScore;
+            }
+            else
+            {
+                Match->DeltaCN = max(0, Match->MatchQualityScore);
+            }
+        }
+        // If this match is already dissimilar to a higher-scoring one, stop now:
+        if (Match->DeltaCNOther != FORBIDDEN_PATH)
+        {
+            continue;
+        }
+        if (Match->FamilyLeader)
+        {
+            Match->DeltaCNOther = Match->FamilyLeader->DeltaCNOther + (Match->MatchQualityScore - Match->FamilyLeader->MatchQualityScore);
+            continue;
+        }
+        if (MatchNumber > GlobalOptions->ReportMatchCount)
+        {
+            // We won't bother computing DeltaCNOther for any poorer matches, because we'll drop them anyway.
+            break;
+        }
+        for (OtherMatch = Match->Next; OtherMatch; OtherMatch = OtherMatch->Next)
+        {
+            IsSame = 0;
+            if (abs(Match->FilePos - OtherMatch->FilePos) < 3)
+            {
+                IsSame = 1;
+            }
+            if (DiffPeptides(Match->Bases, OtherMatch->Bases) < 2)
+            {
+                IsSame = 1;
+            }
+            if (DiffPeptides(Match->Bases, OtherMatch->Bases + 1) < 2)
+            {
+                IsSame = 1;
+            }
+            if (DiffPeptides(Match->Bases + 1, OtherMatch->Bases) < 2)
+            {
+                IsSame = 1;
+            }
+            if (IsSame)
+            {
+                OtherMatch->FamilyLeader = Match;
+            }
+            else
+            {
+                OtherMatch->DeltaCNOther = OtherMatch->MatchQualityScore - Match->MatchQualityScore;
+                if (Match->DeltaCNOther == FORBIDDEN_PATH)
+                {
+                    Match->DeltaCNOther = Match->MatchQualityScore - OtherMatch->MatchQualityScore;
+                }
+            }
+        }
+        if (Match->DeltaCNOther == FORBIDDEN_PATH)
+        {
+            if (Match == Spectrum->LastMatch)
+            {
+                Match->DeltaCNOther = max(Match->MatchQualityScore, 0);
+            }
+            else
+            {
+                Match->DeltaCNOther = Match->MatchQualityScore - Spectrum->LastMatch->MatchQualityScore;
+            }
+        }
+    }
+}
+
+
+// Get PeptideMatchFeatures having to do with cut scores (mean, median...)
+int GetCutScorePeptideMatchFeatures(MSSpectrum* Spectrum, Peptide* Match, float* FeatureArray, PRMBayesianModel* Model)
+{
+    int FeatureIndex = 0;
+    float CutScores[256];
+    int PRMCount;
+    int AminoIndex;
+    float ScoreTotal;
+    int PeptideLength;
+    //
+    PeptideLength = strlen(Match->Bases);
+    //for (NodeIndex = 0, Node = Model->Head; Node; NodeIndex++, Node = Node->Next)
+    //{
+    //    PRM = 0;
+    //    for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+    //    {
+    //        ///////////////////////////////////////////////////////////////////////////////////////
+    //        // Set values, and accumulate table entries:
+    //        Node->Values[AminoIndex] = IonScoringGetNodeValue(Model, Node, Spectrum, PRM, Match, AminoIndex);
+    //        ///////////////////////////////////////////////////////////////////////////////////////
+    //        // Add to PRM:
+    //        if (AminoIndex == PeptideLength)
+    //        {
+    //            break;
+    //        }
+    //        PRM += PeptideMass[Match->Bases[AminoIndex]];
+    //        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    //        {
+    //            if (Match->AminoIndex[ModIndex] == AminoIndex)
+    //            {
+    //                PRM += Match->ModType[ModIndex]->RealDelta;
+    //            }
+    //        }
+    //    } // Amino loop
+    //} // NodeIndex loop
+
+    //// Populate the CutScores array:
+    //for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+    //{
+    //    CutScores[AminoIndex] = PRMBNGetCutScore(Spectrum, Model, AminoIndex);
+    //}
+    PopulateCutScores(Model, Spectrum, Match, CutScores);
+
+    // Compute features based upon cut scores:
+    // Total/mean for ALL cut scores:
+    ScoreTotal = 0;
+    PRMCount = 0;
+    for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+    {
+        ScoreTotal += CutScores[AminoIndex];
+        PRMCount++;
+    }
+    FeatureArray[FeatureIndex++] = ScoreTotal;
+    FeatureArray[FeatureIndex++] = ScoreTotal / (float)PRMCount;
+
+    // Total/mean for CENTRAL cut scores:
+    ScoreTotal = 0;
+    PRMCount = 0;
+    for (AminoIndex = 1; AminoIndex < PeptideLength; AminoIndex++)
+    {
+        ScoreTotal += CutScores[AminoIndex];
+        PRMCount++;
+    }
+    FeatureArray[FeatureIndex++] = ScoreTotal;
+    FeatureArray[FeatureIndex++] = ScoreTotal / (float)max(1, PRMCount);
+
+    // Total/mean for CENTRAL cut scores:
+    ScoreTotal = 0;
+    PRMCount = 0;
+    for (AminoIndex = 2; AminoIndex < (PeptideLength - 1); AminoIndex++)
+    {
+        ScoreTotal += CutScores[AminoIndex];
+        PRMCount++;
+    }
+    FeatureArray[FeatureIndex++] = ScoreTotal;
+    FeatureArray[FeatureIndex++] = ScoreTotal / (float)max(1, PRMCount);
+
+    // Median cut score:
+    PRMCount = PeptideLength + 1;
+    FeatureArray[FeatureIndex++] = GetMedian(CutScores + 2, PRMCount - 4);
+    FeatureArray[FeatureIndex++] = GetMedian(CutScores + 1, PRMCount - 2);
+    FeatureArray[FeatureIndex++] = GetMedian(CutScores, PRMCount);
+
+    return FeatureIndex;
+}
+
+// Helper for GetPeptideMatchFeaturesFull: Compute features having to do with the percentage of peaks
+// and peak intensity explained by the match.
+int GetExplainedPeakPeptideMatchFeatures(MSSpectrum* Spectrum, Peptide* Match, float* FeatureArray)
+{
+    int PeakIndex;
+    float IntensityB = 0;
+    float IntensityY = 0;
+    float IntensityBSeries = 0;
+    float IntensityYSeries = 0;
+    float TotalIntensity = 0;
+    int PeakCountB = 0;
+    int PeakCountY = 0;
+    int StrongPeakCountB = 0;
+    int StrongPeakCountY = 0;
+    float WeightedPeakCountTotal = 0;
+    float WeightedPeakCountB = 0;
+    float WeightedPeakCountY = 0;
+    int StrongPeakCount;
+    int FeatureIndex = 0;
+    int FragmentType;
+    float PeakIntensity;
+    float WeightedPeakIndex;
+    int BFlag[256];
+    int YFlag[256];
+    int PeptideLength;
+    int PresentCount;
+    int AminoIndex;
+    //
+    PeptideLength = strlen(Match->Bases);
+    memset(BFlag, 0, sizeof(int) * (PeptideLength + 1));
+    memset(YFlag, 0, sizeof(int) * (PeptideLength + 1));
+    StrongPeakCount = PeptideLength * 2;
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        FragmentType = Spectrum->Peaks[PeakIndex].IonType;
+        PeakIntensity = Spectrum->Peaks[PeakIndex].Intensity;
+        TotalIntensity += PeakIntensity;
+        WeightedPeakIndex = (float)1.0 / (Spectrum->Peaks[PeakIndex].IntensityRank + 1);
+        WeightedPeakCountTotal += WeightedPeakIndex;
+        switch (FragmentType)
+        {
+            case evFragmentY:
+                PeakCountY++;
+                IntensityY += PeakIntensity;
+                IntensityYSeries += PeakIntensity;
+                WeightedPeakCountY += WeightedPeakIndex;
+                YFlag[Spectrum->Peaks[PeakIndex].AminoIndex] = 1;
+                if (Spectrum->Peaks[PeakIndex].IntensityRank < StrongPeakCount)
+                {
+                    StrongPeakCountY++;
+                }
+                break;
+            case evFragmentYLoss:
+                IntensityYSeries += PeakIntensity;
+                break;
+            case evFragmentB:
+                PeakCountB++;
+                IntensityB += PeakIntensity;
+                IntensityBSeries += PeakIntensity;
+                WeightedPeakCountB += WeightedPeakIndex;
+                BFlag[Spectrum->Peaks[PeakIndex].AminoIndex] = 1;
+                if (Spectrum->Peaks[PeakIndex].IntensityRank < StrongPeakCount)
+                {
+                    StrongPeakCountB++;
+                }
+                break;
+            case evFragmentBLoss:
+                IntensityBSeries += PeakIntensity;
+                break;
+        }
+    }
+    // Fraction of B, Y present:
+    PresentCount = 0;
+    for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+    {
+        PresentCount += YFlag[AminoIndex];
+    }
+    FeatureArray[FeatureIndex++] = PresentCount / (float)(PeptideLength + 1);
+    PresentCount = 0;
+    for (AminoIndex = 0; AminoIndex <= PeptideLength; AminoIndex++)
+    {
+        PresentCount += BFlag[AminoIndex];
+    }
+    FeatureArray[FeatureIndex++] = PresentCount / (float)(PeptideLength + 1);
+    PresentCount = 0;
+    for (AminoIndex = 1; AminoIndex < PeptideLength; AminoIndex++)
+    {
+        PresentCount += YFlag[AminoIndex];
+    }
+    FeatureArray[FeatureIndex++] = PresentCount / (float)(PeptideLength - 1);
+    PresentCount = 0;
+    for (AminoIndex = 1; AminoIndex < PeptideLength; AminoIndex++)
+    {
+        PresentCount += BFlag[AminoIndex];
+    }
+    FeatureArray[FeatureIndex++] = PresentCount / (float)(PeptideLength - 1);
+
+    // Fraction of top peaks:
+    FeatureArray[FeatureIndex++] = (StrongPeakCountY + StrongPeakCountB) / (float)StrongPeakCount;
+    FeatureArray[FeatureIndex++] = StrongPeakCountY / (float)StrongPeakCount;
+    FeatureArray[FeatureIndex++] = StrongPeakCountB / (float)StrongPeakCount;
+
+    FeatureArray[FeatureIndex++] = (WeightedPeakCountY + WeightedPeakCountB) / WeightedPeakCountTotal;
+    FeatureArray[FeatureIndex++] = WeightedPeakCountY / WeightedPeakCountTotal;
+    FeatureArray[FeatureIndex++] = WeightedPeakCountB / WeightedPeakCountTotal;
+    
+    // Fraction of intensity:
+    FeatureArray[FeatureIndex++] = (IntensityY + IntensityB) / TotalIntensity;
+    FeatureArray[FeatureIndex++] = IntensityY / TotalIntensity;
+    FeatureArray[FeatureIndex++] = IntensityB / TotalIntensity;
+
+    // Fraction of intensity:
+    FeatureArray[FeatureIndex++] = (IntensityYSeries + IntensityBSeries) / TotalIntensity;
+    FeatureArray[FeatureIndex++] = IntensityYSeries / TotalIntensity;
+    FeatureArray[FeatureIndex++] = IntensityBSeries / TotalIntensity;
+
+    return FeatureIndex;
+}
+
+// Compute features rating the quality of this annotation for the spectrum.
+// Set feature values in FeatureArray, return the number of features set.
+int GetPeptideMatchFeaturesFull(MSSpectrum* Spectrum, Peptide* Match, float* FeatureArray)
+{
+    int FeatureIndex = 0;
+    int PeptideLength;
+    PRMBayesianModel* Model;
+    PMCSpectrumInfo* SpectrumInfo;
+    PMCInfo* Info;
+    //
+
+    // Length:
+    PeptideLength = strlen(Match->Bases);
+    FeatureArray[FeatureIndex++] = (float)PeptideLength;
+    
+    if (Spectrum->Charge < 3)
+    {
+        Model = TAGModelCharge2;
+    }
+    else
+    {
+        Model = TAGModelCharge3;
+    }
+
+    Spectrum->ParentMass = GetPeptideParentMass(Match);
+
+    // Compute cut scores:
+    FeatureIndex += GetCutScorePeptideMatchFeatures(Spectrum, Match, FeatureArray + FeatureIndex, Model);
+
+    // Compute features based on the fraction of top peaks / intensity explained:
+    FeatureIndex += GetExplainedPeakPeptideMatchFeatures(Spectrum, Match, FeatureArray + FeatureIndex);
+
+    ///////////////////////////////
+    // Spectral convolution:
+    SpectrumInfo = GetPMCSpectrumInfo(Spectrum);
+    Info = (PMCInfo*)calloc(1, sizeof(PMCInfo));
+    Info->Charge = SpectrumInfo->Charge;
+    Info->ParentMass = Spectrum->ParentMass;
+    SpectrumInfo->Head = Info;
+    SpectrumInfo->Tail = Info;
+    ConvolveMassCorrectedSpectrum(Info, SpectrumInfo);
+    FeatureArray[FeatureIndex++] = Info->Convolve[2];
+    FeatureArray[FeatureIndex++] = Info->Convolve2[0];
+    FreePMCSpectrumInfo(SpectrumInfo);
+
+    /////////////////////////////////
+    // Number of tryptic termini:
+    FeatureArray[FeatureIndex++] = (float)CountTrypticTermini(Match);
+
+    ////////////////////////////////
+    // Fancy length feature:
+    FeatureArray[FeatureIndex++] = (float)log(max(1, PeptideLength - 5));
+    FeatureArray[FeatureIndex++] = (float)log(max(1, PeptideLength - 4));
+    FeatureArray[FeatureIndex++] = (float)log(max(1, PeptideLength - 3));
+
+    return FeatureIndex;
+}
+
+float GetPenalizedScore(MSSpectrum* Spectrum, Peptide* Match, float Score)
+{
+    int ModIndex;
+    if (strlen(Match->Bases) < MIN_VALID_PEPTIDE_LENGTH)
+    {
+        Score -= 1.0;
+    }
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (Match->ModType[ModIndex])
+        {
+            Score -= 0.25;
+        }
+    }
+    return Score;
+}
diff --git a/Score.h b/Score.h
new file mode 100644
index 0000000..22a08b6
--- /dev/null
+++ b/Score.h
@@ -0,0 +1,85 @@
+//Title:          Score.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef SCORE_H
+#define SCORE_H
+#include "Inspect.h"
+#include "Trie.h"
+
+// For marking forbidden paths in d.p. tables: 
+#define FORBIDDEN_PATH -99999999
+
+typedef enum evIonType
+{
+    evIonTypeNone = 0,
+    evIonTypeB, 
+    evIonTypeY,
+    evIonTypeA,
+    evIonTypeBH2O,
+    evIonTypeAH2O,
+    evIonTypeBNH3,
+    evIonTypeANH3,
+    evIonTypeYH2O,
+    evIonTypeYNH3,
+    evIonTypeB2,
+    evIonTypeY2,
+    evIonTypeNoise,
+    evIonTypeBPhos, // B minus a phosphorylation
+    evIonTypeYPhos, // Y minus a phosphorylation
+    evIonTypeCount,
+} evIonType;
+
+typedef enum evLossType
+{
+    evLossNone = 0,
+    evLossB,
+    evLossY,
+} evLossType;
+
+// Workhorse function of Score.c: Compare the theoretical fragmentation pattern of a peptide to
+// a spectrum, and assign a score.  
+int ScoreMatch(MSSpectrum* Spectrum, Peptide* Match, int VerboseFlag);
+
+// Apply a penalty if the peptide doesn't match GlobalOptions->DigestType.  We have a 
+// special scoring model for tryptic peptides, but other less specific proteases - like 
+// GluC - also should affect scoring based on endpoints.
+int ApplyDigestBonus(Peptide* Match);
+
+// Compute the p-value for a match, based upon explained intensity and explained peaks and b/y ladder
+// and match score.
+//void ComputeMatchConfidenceLevel(MSSpectrum* Spectrum, Peptide* Match);
+void ScoreMatchTest(int VerboseFlag);
+int InitPValue(char* FileName);
+void SetMatchDeltaCN(SpectrumNode* Spectrum);
+int GetPeptideMatchFeaturesFull(MSSpectrum* Spectrum, Peptide* Match, float* FeatureArray);
+float GetPenalizedScore(MSSpectrum* Spectrum, Peptide* Match, float Score);
+#endif // SCORE_H
diff --git a/Score.py b/Score.py
new file mode 100644
index 0000000..797e2df
--- /dev/null
+++ b/Score.py
@@ -0,0 +1,61 @@
+#Title:          Score.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Score.py: Simple wrapper for inspect scoring
+"""
+import sys
+import string
+import PyInspect
+
+def FormatTuple(Tuple):
+    Str = "("
+    for Entry in Tuple:
+        Str += "%.4g, "%Entry
+    Str = Str[:-2]
+    Str += ")"
+    return Str
+
+ColonBits = sys.argv[1].split(":")
+try:
+    FileOffset = int(ColonBits[-1])
+    FileName = string.join(ColonBits[:-1], ":")
+except:
+    FileName = sys.argv[1]
+    FileOffset = 0
+    
+Spectrum = PyInspect.Spectrum(FileName, FileOffset)
+#Result = Spectrum.ScorePeptideDetailed(sys.argv[2])
+Result = Spectrum.ScorePeptideDetailed(sys.argv[2])
+Str = "MQ %.4f %s"%(Result[0], FormatTuple(Result[1:]))
+print Str
diff --git a/ScoringModel.dat b/ScoringModel.dat
new file mode 100644
index 0000000..99e6bdc
Binary files /dev/null and b/ScoringModel.dat differ
diff --git a/Scorpion.c b/Scorpion.c
new file mode 100644
index 0000000..71d499a
--- /dev/null
+++ b/Scorpion.c
@@ -0,0 +1,1304 @@
+//Title:          Scorpion.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include <string.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <assert.h>
+#include <math.h> 
+#include "Mods.h"
+#include "Scorpion.h"
+#include "Spectrum.h"
+#include "Inspect.h"
+#include "Tagger.h"
+#include "SVM.h"
+#include "BN.h"
+#include "FreeMod.h"
+#include "IonScoring.h"
+
+#define PRM_FEATURE_COUNT 32
+#define MAX_PEPTIDE_LENGTH 256
+#define INTENSITY_LEVEL_COUNT 4
+
+int g_CutFeatures[MAX_PEPTIDE_LENGTH * CUT_FEATURE_COUNT];
+float g_VerboseCutFeatures[MAX_PEPTIDE_LENGTH * CUT_FEATURE_COUNT];
+int g_PRMFeatures[PRM_FEATURE_COUNT];
+//float g_PRMBScore; // hax
+//float g_PRMYScore; // hax
+float g_CutScores[MAX_PEPTIDE_LENGTH];
+extern PRMBayesianModel* PRMModelCharge2;
+
+int SeizePeaks(MSSpectrum* Spectrum, int TargetMass, int IonType, int AminoIndex, float* pIntensity, float* pSkew, float* pAbsSkew);
+
+FILE* g_ScorpionScoringFile = NULL;
+
+float GetExplainedPeakPercent(MSSpectrum* Spectrum, int PeakCount, int BYOnly)
+{
+    int PeaksSeen = 0;
+    int AnnotatedCount = 0;
+    int PeakIndex;
+    SpectralPeak* Peak;
+    int VerboseFlag = 0;
+    //
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        Peak = Spectrum->Peaks + PeakIndex;
+        if (PeakCount>0 && Peak->IntensityRank >= PeakCount)
+        {
+            continue;
+        }
+        PeaksSeen++;
+        switch (Peak->IonType)
+        {
+        case IonB:
+        case IonB2:
+        case IonBI:
+        case IonY:
+        case IonY2:
+        case IonYI:
+            AnnotatedCount++;
+            if (VerboseFlag)
+            {
+                printf("Peak index %d at %.2f: %d\n", Peak->IntensityRank, Peak->Mass / (float)MASS_SCALE, Peak->IonType);
+            }
+
+            break;
+        case 0:
+            if (VerboseFlag)
+            {
+                printf("* Peak index %d at %.2f NOT annotated\n", Peak->IntensityRank, Peak->Mass / (float)MASS_SCALE);
+            }
+            break; // No annotated intensity for you!
+        default:
+            if (VerboseFlag)
+            {
+                printf("Peak index %d at %.2f: %d\n", Peak->IntensityRank, Peak->Mass / (float)MASS_SCALE, Peak->IonType);
+            }
+
+            if (!BYOnly)
+            {
+                AnnotatedCount++;
+            }
+        }
+    }
+    if (!PeaksSeen)
+    {
+        return 0;
+    }
+    if (PeakCount > 0)
+    {
+        return AnnotatedCount / (float)PeakCount;
+    }
+    return AnnotatedCount / (float)PeaksSeen;
+}
+
+
+float GetExplainedIntensityPercent(MSSpectrum* Spectrum, int PeakCount, int BYOnly)
+{
+    float PeakIntensity = 0;
+    float AnnotatedIntensity = 0;
+    int PeakIndex;
+    SpectralPeak* Peak;
+    //
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        Peak = Spectrum->Peaks + PeakIndex;
+        if (PeakCount>0 && Peak->IntensityRank >= PeakCount)
+        {
+            continue;
+        }
+        
+        PeakIntensity += Peak->Intensity;
+        //printf("%.2f\t%.2f\t%d\n", Peak->Mass / (float)MASS_SCALE, Peak->Intensity, Peak->IonType);
+        switch (Peak->IonType)
+        {
+        case IonB:
+        case IonB2:
+        case IonBI:
+        case IonY:
+        case IonY2:
+        case IonYI:
+            AnnotatedIntensity += Peak->Intensity;
+            break;
+        case 0:
+            break; // No annotated intensity for you!
+        default:
+            if (!BYOnly)
+            {
+                AnnotatedIntensity += Peak->Intensity;
+            }
+        }
+    }
+    if (PeakIntensity == 0)
+    {
+        return 0;
+    }
+    return AnnotatedIntensity / PeakIntensity;
+}
+
+int GetFlankBFeature(char Left, char Right)
+{
+    // H on right: Strong suppression
+    if (Right == 'H')
+    {
+        return 0;
+    }
+    // G or P on left: Strong suppression
+    if (Left == 'G' || Left == 'P')
+    {
+        return 1;
+    }
+    // K or R on left: Augmentation
+    if (Left == 'K' || Left == 'R')
+    {
+        return 2;
+    }
+    // P on right: Augmentation
+    if (Right == 'P')
+    {
+        return 3;
+    }
+
+    return 4;
+}
+
+int GetFlankYFeature(char Left, char Right)
+{
+    // K or R on right: Strong suppression
+    if (Right == 'K' || Right == 'R')
+    {
+        return 0;
+    }
+    // G or P on left: Strong suppression
+    if (Left == 'G' || Left == 'P')
+    {
+        return 1;
+    }
+    // K or R on left: Augmentation
+    if (Left == 'K' || Left == 'R')
+    {
+        return 2;
+    }
+    // P on right: Augmentation
+    if (Right == 'P')
+    {
+        return 3;
+    }
+
+    return 4;
+}
+
+// SECTOR_COUNT
+#define GET_SECTOR(Mass) \
+if (Mass > GlobalOptions->DynamicRangeMax || Mass < GlobalOptions->DynamicRangeMin) \
+{\
+    Sector = -1;\
+}\
+else if (Mass > SectorCutoffA) \
+{\
+    Sector = 1;\
+}\
+else\
+{\
+    Sector = 0;\
+}
+
+// SECTOR_COUNT
+#define GET_CUT_SECTOR(Mass) \
+if (Mass > GlobalOptions->DynamicRangeMax || Mass < GlobalOptions->DynamicRangeMin) \
+{\
+    Sector = -1;\
+}\
+else if (Mass > SectorCutoffA) \
+{\
+    Sector = 1;\
+}\
+else\
+{\
+    Sector = 0;\
+}
+
+// Helper macro for GetPRMFeatures.  NOT applicable in peptide context, only in tagging context
+#define GET_BIN_INTENSITY(Mass) \
+    Bin = (Mass + 50) / 100;\
+    if (Bin >= 0 && Bin < Spectrum->IntensityBinCount) \
+    { \
+    IntensityLevel = Spectrum->BinnedIntensityLevels[Bin]; \
+    } \
+    else \
+    { \
+        IntensityLevel = 0; \
+    } 
+
+// Given a putative prefix residue mass for an unannotated spectrum, compute features for scoring its quality.
+// Used in building a score array for blind search, and in quick-scoring tagged search.
+// This code has SIGNIFICANT OVERLAP with the code in GetCutFeatures()
+// Here, PRM is a mass (in thousandths-of-a-dalton).
+float GetPRMFeatures(MSSpectrum* Spectrum, SpectrumTweak* Tweak, BayesianModel* Model, int PRM, int VerboseFlag)
+{
+    int ParentMass;
+    int MassB;
+    int MassY;
+    int Mass;
+    int IntensityLevel;
+    int Sector;
+    int SectorCutoffA;
+    float Score = 0;
+    int Bin;
+    //Spectrum->Charge = Tweak->Charge;
+    //Spectrum->ParentMass = Tweak->ParentMass;
+    ParentMass = Tweak->ParentMass;
+    SectorCutoffA = (int)(ParentMass * 0.5 + 0.5);
+    // SECTOR_COUNT
+    if (PRM > SectorCutoffA)
+    {
+        g_PRMFeatures[SISector] = 1;
+    }
+    else
+    {
+        g_PRMFeatures[SISector] = 0;
+    }
+    MassB = PRM + DALTON;
+    MassY = ParentMass - PRM;
+    // Compute the vector of features.  Compute parent features BEFORE computing children.
+    g_PRMFeatures[SICharge] = Spectrum->Charge;
+
+    // Alterations to PRM scoring in context of a phosphopeptide search:
+    // - Don't try to use a phosphate-loss peak as a b or y peak
+    // - Give a bonus for phosphate loss peaks, maybe
+
+    // Find the intensity level for the y peak, and store it in the feature-vector:
+    GET_BIN_INTENSITY(MassY);
+    //IntensityLevel = SeizePeaks(Spectrum, MassY, 0);
+    g_PRMFeatures[IonY] = IntensityLevel;
+
+    // If the y peak is outside dynamic range, then don't adjust the score.
+    // If it's in range: Add the y node's log-probability, and subtract the null model's log-probability.
+    GET_SECTOR(MassY);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonY, IonY, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("Y\t%.1f\t\t%d\t%.2f\t%.2f\n", MassY / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonY, IonY, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // FOR PHOSOPHOPEPTIDE TAGGING:
+    if (GlobalOptions->PhosphorylationFlag)
+    {
+        GET_BIN_INTENSITY(MassY - PHOSPHATE_WATER_MASS);
+        if (IntensityLevel)
+        {
+            Score += 0.5;
+        }
+    }
+
+    // b peak:
+    //IntensityLevel = SeizePeaks(Spectrum, MassB, 0);
+    GET_BIN_INTENSITY(MassB);
+    g_PRMFeatures[IonB] = IntensityLevel;
+
+    GET_SECTOR(MassB);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonB, IonB, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("B\t%.1f\t\t%d\t%.2f\t%.2f\n", MassB / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonB, IonB, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // FOR PHOSOPHOPEPTIDE TAGGING:
+    if (GlobalOptions->PhosphorylationFlag)
+    {
+        GET_BIN_INTENSITY(MassB - PHOSPHATE_WATER_MASS);
+        if (IntensityLevel)
+        {
+            Score += 0.5;
+        }
+    }
+
+    // y isotopic peak:
+    Mass = MassY + DALTON;
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, 0);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonYI] = IntensityLevel;
+    GET_SECTOR(Mass);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonYI, IonYI, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("YI\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonYI, IonYI, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // b isotopic peak:
+    Mass = MassB + DALTON;
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, 0);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonBI] = IntensityLevel;
+    GET_SECTOR(Mass);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonBI, IonBI, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("BI\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonBI, IonBI, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // doubly-charged y:
+    Mass = (int)((MassY + HYDROGEN_MASS)/2 + 0.5);
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, 0);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonY2] = IntensityLevel;
+    GET_SECTOR(Mass);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonY2, IonY2, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("Y2\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonY2, IonY2, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // doubly-charged b:
+    Mass = (int)((MassB + HYDROGEN_MASS)/2 + 0.5);
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, 0);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonB2] = IntensityLevel; 
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonB2, IonB2, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("B2\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonB2, IonB2, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // Y-H2O:
+    Mass = MassY - WATER_MASS;
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, 0);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonYH2O] = IntensityLevel;
+    GET_SECTOR(Mass);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonYH2O, IonYH2O, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("Y-h2o\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonYH2O, IonYH2O, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // a:
+    Mass = MassB - 27000;
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, IonA);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonA] = IntensityLevel;
+    GET_SECTOR(Mass);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonA, IonA, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("a\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonA, IonA, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // b-H2O:
+    Mass = MassB - WATER_MASS;
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, IonBH2O);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonBH2O] = IntensityLevel;
+    GET_SECTOR(Mass);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonBH2O, IonBH2O, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("b-h2o\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonBH2O, IonBH2O, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // y-NH3:
+    Mass = MassY - 17000;
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, IonYNH3);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonYNH3] = IntensityLevel;
+    GET_SECTOR(Mass);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonYNH3, IonYNH3, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("y-nh3\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonYNH3, IonYNH3, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    // b-NH3:
+    Mass = MassB - 17000;
+    //IntensityLevel = SeizePeaks(Spectrum, Mass, IonBNH3);
+    GET_BIN_INTENSITY(Mass);
+    g_PRMFeatures[IonBNH3] = IntensityLevel;
+    GET_SECTOR(Mass);
+    if (Model && Sector >= 0)
+    {
+        Score += ComputeBNProbability(Model->Nodes + IonBNH3, IonBNH3, g_PRMFeatures);
+        Score -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        if (VerboseFlag)
+        {
+            printf("b-nh3\t%.1f\t\t%d\t%.2f\t%.2f\n", Mass / (float)MASS_SCALE, IntensityLevel, 
+                ComputeBNProbability(Model->Nodes + IonBNH3, IonBNH3, g_PRMFeatures),
+                Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+        }
+    }
+
+    if (GlobalOptions->PhosphorylationFlag)
+    {
+        Mass = (Spectrum->ParentMass + HYDROGEN_MASS * (Tweak->Charge - 1) - PHOSPHATE_WATER_MASS) / 2;
+        if (abs(PRM - Mass) < 1000)
+        {
+            Score = min(Score, (float)2.0);
+        }
+    }
+
+    return Score;
+}
+
+float g_BIntensity[MAX_PEPTIDE_LENGTH];
+float g_YIntensity[MAX_PEPTIDE_LENGTH];
+float g_BSkew[MAX_PEPTIDE_LENGTH];
+float g_YSkew[MAX_PEPTIDE_LENGTH];
+float g_BAbsSkew[MAX_PEPTIDE_LENGTH];
+float g_YAbsSkew[MAX_PEPTIDE_LENGTH];
+
+#define FRAGMENTATION_NORMAL 0
+#define FRAGMENTATION_PHOSPHO 1
+
+// Given a spectrum and a peptide, generate the feature-vector for each cut-point along the backbone.
+// This is separate from GetPRMFeatures, which is done in the context of a spectrum WITHOUT a peptide.
+// (The difference: Here we know the flanking peptide, and we have 5 possible sectors rather than 3)
+//
+// Either FeatureFile is non-null (in which case we should write our feature-vectors to the file), or 
+// ScoringNetwork is non-null (in which case we should save the array of cut-point probabilities)
+void GetCutFeatures(MSSpectrum* Spectrum, SpectrumTweak* Tweak, Peptide* Match, 
+    BayesianModel* Model)
+{
+    int Mass;
+    int PRM;
+    int AminoIndex;
+    int Length;
+    int CutMasses[MAX_PEPTIDE_LENGTH];
+    //int BaseFlags[MAX_PEPTIDE_LENGTH];
+    //int AcidFlags[MAX_PEPTIDE_LENGTH];
+    int ModIndex;
+    int MassB;
+    int MassY;
+    int SectorCutoffA;
+    //int SectorCutoffB;
+    //int SectorCutoffC;
+    int ParentMass;
+    int FeatureValue;
+    int Sector;
+    int IntensityLevel;
+    int VerboseFlag = 0;
+    int AminoIndexY;
+    int AminoIndexB;
+    int CutFeaturesBaseIndex;
+    //
+    Spectrum->Charge = Tweak->Charge;
+    Spectrum->ParentMass = Tweak->ParentMass;
+
+    // Check whether we're using special fragmentation models.
+    // Use phosphopeptide fragmentation rules if this is Sphos or Tphos (but not for Yphos)
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!Match->ModType[ModIndex])
+        {
+            break;
+        }
+        if (Match->ModType[ModIndex]->Flags & DELTA_FLAG_PHOSPHORYLATION && Match->Bases[Match->AminoIndex[ModIndex]]!='Y')
+        {
+            Match->SpecialFragmentation = FRAGMENTATION_PHOSPHO;
+            Match->SpecialModPosition = Match->AminoIndex[ModIndex];
+        }
+    }
+    Length = strlen(Match->Bases);
+    Mass = 0;
+    //memset(BaseFlags, 0, sizeof(int)*MAX_PEPTIDE_LENGTH);
+    //memset(AcidFlags, 0, sizeof(int)*MAX_PEPTIDE_LENGTH);
+    // Get the array of masses:
+    for (AminoIndex = 0; AminoIndex < Length; AminoIndex++)
+    {
+        Mass += PeptideMass[Match->Bases[AminoIndex]];
+        
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (Match->AminoIndex[ModIndex] != AminoIndex)
+            {
+                continue;
+            }
+            if (!Match->ModType[ModIndex])
+            {
+                break;
+            }
+            Mass += Match->ModType[ModIndex]->RealDelta;
+        }
+        CutMasses[AminoIndex] = Mass;
+    }
+    ParentMass = Mass + PARENT_MASS_BOOST;
+    // Set dynamic-range feature:
+    for (AminoIndex = 0; AminoIndex < Length-1; AminoIndex++)
+    {
+        Mass = CutMasses[AminoIndex];
+        MassB = Mass + DALTON;
+        MassY = ParentMass - Mass;
+        if (MassB < GlobalOptions->DynamicRangeMin || MassB > GlobalOptions->DynamicRangeMax)
+        {
+            // B is out.
+            if (MassY < GlobalOptions->DynamicRangeMin || MassY > GlobalOptions->DynamicRangeMax)
+            {
+                // Both out:
+                g_CutFeatures[AminoIndex * CUT_FEATURE_COUNT] = 0;
+            }
+            else
+            {
+                // Y, no B:
+                g_CutFeatures[AminoIndex * CUT_FEATURE_COUNT] = 2;
+            }
+
+        }
+        else
+        {
+            if (MassY < GlobalOptions->DynamicRangeMin || MassY > GlobalOptions->DynamicRangeMax)
+            {
+                // B, no Y:
+                g_CutFeatures[AminoIndex*CUT_FEATURE_COUNT] = 1;
+            }
+            else
+            {
+                // Both lie inside dynamic range
+                g_CutFeatures[AminoIndex*CUT_FEATURE_COUNT] = 3;
+            }
+        }
+    }
+
+    SectorCutoffA = (int)(ParentMass * 0.5 + 0.5);
+    //SectorCutoffB = (int)(ParentMass * 0.667 + 0.5);
+    //SectorCutoffC = (int)(ParentMass * 0.667 + 0.5); // SECTOR_COUNT
+    memset(g_VerboseCutFeatures, 0, sizeof(int) * MAX_PEPTIDE_LENGTH * CUT_FEATURE_COUNT);
+    memset(g_BSkew, 0, sizeof(float) * MAX_PEPTIDE_LENGTH);
+    memset(g_YSkew, 0, sizeof(float) * MAX_PEPTIDE_LENGTH);
+    memset(g_BAbsSkew, 0, sizeof(float) * MAX_PEPTIDE_LENGTH);
+    memset(g_YAbsSkew, 0, sizeof(float) * MAX_PEPTIDE_LENGTH);
+
+    // Annotate the M-P peak, if it's a phosphopeptide:
+    if (Match->SpecialFragmentation == FRAGMENTATION_PHOSPHO)
+    {
+        Mass = (ParentMass + (Tweak->Charge - 1) * HYDROGEN_MASS - PHOSPHATE_WATER_MASS) / Tweak->Charge;
+        SeizePeaks(Spectrum, Mass, IonParentLoss, -1, NULL, NULL, NULL);
+    }
+
+    // Capture ions:
+    for (AminoIndex = 0; AminoIndex < Length; AminoIndex++)
+    {
+        // We number the CUTS from 0 up through length-1.  We set AminoIndexY and AminoIndexB
+        // to the length (in amino acids) of the b and y fragments, for easy reading by humans.
+        AminoIndexY = Length - AminoIndex - 1;
+        AminoIndexB = AminoIndex + 1;
+        CutFeaturesBaseIndex = AminoIndex * CUT_FEATURE_COUNT;
+        PRM = CutMasses[AminoIndex];
+        g_CutScores[AminoIndex] = 0;
+        MassB = PRM + DALTON;
+        MassY = ParentMass - PRM;
+        // Compute the vector of features.  Compute parent features BEFORE computing children.
+        g_CutFeatures[CutFeaturesBaseIndex + SICharge] = Spectrum->Charge;
+        g_CutFeatures[CutFeaturesBaseIndex + SIFlankB] = GetFlankBFeature(Match->Bases[AminoIndex], Match->Bases[AminoIndex + 1]);
+        g_CutFeatures[CutFeaturesBaseIndex + SIFlankY] = GetFlankYFeature(Match->Bases[AminoIndex], Match->Bases[AminoIndex + 1]);
+        // 2+3 value sector (first cut, last cut, and three regions)
+        
+        if (PRM > ParentMass * 0.5)
+        {
+            FeatureValue = 1;
+        }
+        else
+        {
+            FeatureValue = 0;
+        }
+        g_CutFeatures[CutFeaturesBaseIndex + SISector] = FeatureValue;
+
+        // Find the intensity level for the y peak, and store it in the feature-vector:
+        IntensityLevel = SeizePeaks(Spectrum, MassY, IonY, AminoIndexY, g_YIntensity + AminoIndex, g_YSkew + AminoIndex, g_YAbsSkew + AminoIndex);
+        if (Match->SpecialFragmentation && AminoIndex < Match->SpecialModPosition)
+        {
+            IntensityLevel = max(IntensityLevel, SeizePeaks(Spectrum, MassY - PHOSPHATE_WATER_MASS, IonY, AminoIndexY, g_YIntensity + AminoIndex, g_YSkew + AminoIndex, g_YAbsSkew + AminoIndex));
+        }
+        g_CutFeatures[CutFeaturesBaseIndex + IonY] = IntensityLevel;
+        // If the y peak is outside dynamic range, then don't adjust the score.
+        // If it's in range: Add the y node's log-probability, and subtract the null model's log-probability.
+        GET_SECTOR(MassY);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonY, IonY, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonY] = ComputeBNProbability(Model->Nodes + IonY, IonY, g_CutFeatures + CutFeaturesBaseIndex) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            if (VerboseFlag)
+            {
+                printf("%s\t%s\t%d\t%.1f\tY\t%d\t%.2f\t%.2f\t\n", Spectrum->Node->InputFile->FileName,
+                    Match->Bases, AminoIndex, MassY / (float)MASS_SCALE, IntensityLevel, 
+                    ComputeBNProbability(Model->Nodes + IonY, IonY, g_CutFeatures + CutFeaturesBaseIndex),
+                    Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+            }
+        }
+        // b peak:
+        IntensityLevel = SeizePeaks(Spectrum, MassB, IonB, AminoIndexB, g_BIntensity + AminoIndex, g_BSkew + AminoIndex, g_BAbsSkew + AminoIndex);
+        if (Match->SpecialFragmentation && AminoIndex >= Match->SpecialModPosition)
+        {
+            IntensityLevel = max(IntensityLevel, SeizePeaks(Spectrum, MassB - PHOSPHATE_WATER_MASS, IonB, AminoIndexB, g_BIntensity + AminoIndex, g_BSkew + AminoIndex, g_BAbsSkew + AminoIndex));
+        }
+
+        g_CutFeatures[CutFeaturesBaseIndex + IonB] = IntensityLevel;
+        GET_SECTOR(MassB);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonB, IonB, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonB] = ComputeBNProbability(Model->Nodes + IonB, IonB, g_CutFeatures + CutFeaturesBaseIndex) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            if (VerboseFlag)
+            {
+                printf("%s\t%s\t%d\t%.1f\tB\t%d\t%.2f\t%.2f\t\n", Spectrum->Node->InputFile->FileName,
+                    Match->Bases, AminoIndex, MassB / (float)MASS_SCALE, IntensityLevel, 
+                    ComputeBNProbability(Model->Nodes + IonB, IonB, g_CutFeatures + CutFeaturesBaseIndex),
+                    Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+            }
+        }
+        // y isotopic peak:
+        Mass = MassY + DALTON;
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonYI, AminoIndexY, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonYI] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonYI, IonYI, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonYI] = ComputeBNProbability(Model->Nodes + IonYI, IonYI, g_CutFeatures + CutFeaturesBaseIndex) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            if (VerboseFlag)
+            {
+                printf("%s\t%s\t%d\t%.1f\tYI\t%d\t%.2f\t%.2f\t\n", Spectrum->Node->InputFile->FileName,
+                    Match->Bases, AminoIndex, Mass / (float)MASS_SCALE, IntensityLevel, 
+                    ComputeBNProbability(Model->Nodes + IonYI, IonYI, g_CutFeatures + CutFeaturesBaseIndex),
+                    Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+            }
+        }
+
+        // b isotopic peak:
+        Mass = MassB + DALTON;
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonBI, AminoIndexB, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonBI] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonBI, IonBI, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonBI] = ComputeBNProbability(Model->Nodes + IonBI, IonBI, g_CutFeatures + CutFeaturesBaseIndex) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            if (VerboseFlag)
+            {
+                printf("%s\t%s\t%d\t%.1f\tBI\t%d\t%.2f\t%.2f\t\n", Spectrum->Node->InputFile->FileName,
+                    Match->Bases, AminoIndex, Mass / (float)MASS_SCALE, IntensityLevel, 
+                    ComputeBNProbability(Model->Nodes + IonBI, IonBI, g_CutFeatures + CutFeaturesBaseIndex),
+                    Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+            }
+        }
+
+        // doubly-charged y:
+        Mass = (int)((MassY + HYDROGEN_MASS)/2 + 0.5);
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonY2, AminoIndexY, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonY2] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonY2, IonY2, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[CutFeaturesBaseIndex + IonY2] = ComputeBNProbability(Model->Nodes + IonY2, IonY2, g_CutFeatures + CutFeaturesBaseIndex) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            if (VerboseFlag)
+            {
+                printf("%s\t%s\t%d\t%.1f\tY2\t%d\t%.2f\t%.2f\t\n", Spectrum->Node->InputFile->FileName,
+                    Match->Bases, AminoIndex, Mass / (float)MASS_SCALE, IntensityLevel, 
+                    ComputeBNProbability(Model->Nodes + IonY2, IonY2, g_CutFeatures + CutFeaturesBaseIndex),
+                    Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel]);
+            }
+        }
+
+        Mass = (int)((MassB + HYDROGEN_MASS)/2 + 0.5);
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonB2, AminoIndexB, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonB2] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonB2, IonB2, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonB2] = ComputeBNProbability(Model->Nodes + IonB2, IonB2, g_CutFeatures) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        }
+
+        // Y-H2O:
+        Mass = MassY - WATER_MASS;
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonYH2O, AminoIndexY, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonYH2O] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonYH2O, IonYH2O, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonYH2O] = ComputeBNProbability(Model->Nodes + IonYH2O, IonYH2O, g_CutFeatures) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        }
+
+        // a:
+        Mass = MassB - 27000;
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonA, AminoIndexB, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonA] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonA, IonA, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonA] = ComputeBNProbability(Model->Nodes + IonA, IonA, g_CutFeatures) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        }
+
+        // b-H2O:
+        Mass = MassB - WATER_MASS;
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonBH2O, AminoIndexB, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonBH2O] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonBH2O, IonBH2O, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonBH2O] = ComputeBNProbability(Model->Nodes + IonBH2O, IonBH2O, g_CutFeatures) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        }
+
+        // y-NH3:
+        Mass = MassY - 17000;
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonYNH3, AminoIndexY, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonYNH3] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonYNH3, IonYNH3, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonYNH3] = ComputeBNProbability(Model->Nodes + IonYNH3, IonYNH3, g_CutFeatures) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        }
+
+        // b-NH3:
+        Mass = MassB - 17000;
+        IntensityLevel = SeizePeaks(Spectrum, Mass, IonBNH3, AminoIndexB, 0, 0, 0);
+        g_CutFeatures[CutFeaturesBaseIndex + IonBNH3] = IntensityLevel;
+        GET_SECTOR(Mass);
+        if (Model && Sector >= 0)
+        {
+            g_CutScores[AminoIndex] += ComputeBNProbability(Model->Nodes + IonBNH3, IonBNH3, g_CutFeatures + CutFeaturesBaseIndex);
+            g_CutScores[AminoIndex] -= Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+            g_VerboseCutFeatures[AminoIndex*CUT_FEATURE_COUNT + IonBNH3] = ComputeBNProbability(Model->Nodes + IonBNH3, IonBNH3, g_CutFeatures) - Tweak->Intensities[Sector * INTENSITY_LEVEL_COUNT + IntensityLevel];
+        }
+
+    }
+}
+
+// Take all unlabeled peaks in a radius of the target m/z.  Annotate them with this ion type, 
+// and return the cumulative intensity level.
+int SeizePeaks(MSSpectrum* Spectrum, int TargetMass, int IonType, int AminoIndex, float* pIntensity, float* pSkew, float *pAbsSkew)
+{
+    int PeakIndex;
+    int MaxMass;
+    float Intensity = 0;
+    int Bin;
+    float WeightedSkew = 0;
+    float AbsWeightedSkew = 0;
+    //
+    Bin = (TargetMass + 50) / 100;
+    MaxMass = TargetMass + INTENSITY_BIN_RADIUS;
+
+    // If the mass is off the scale, then you get no peaks:
+    if (Bin >= Spectrum->IntensityBinCount || Bin < 0)
+    {
+        return 0;
+    }
+    PeakIndex = Spectrum->BinPeakIndex[Bin];
+    if (PeakIndex >= 0)
+    {
+        for ( ; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+        {
+            if (Spectrum->Peaks[PeakIndex].Mass > MaxMass)
+            {
+                break;
+            }
+            Intensity += Spectrum->Peaks[PeakIndex].Intensity;
+            Spectrum->Peaks[PeakIndex].IonType = IonType;
+            Spectrum->Peaks[PeakIndex].AminoIndex = AminoIndex;
+            WeightedSkew += Spectrum->Peaks[PeakIndex].Intensity * (Spectrum->Peaks[PeakIndex].Mass - TargetMass);
+            AbsWeightedSkew += Spectrum->Peaks[PeakIndex].Intensity * abs(Spectrum->Peaks[PeakIndex].Mass - TargetMass);
+        }
+    }
+    if (pIntensity)
+    {
+        *pIntensity = Intensity;
+        *pSkew = WeightedSkew;
+        *pAbsSkew = AbsWeightedSkew;
+    }
+
+    if (Intensity < Spectrum->IntensityCutoffLow)
+    {
+        return 0;
+    }
+    if (Intensity < Spectrum->IntensityCutoffMedium)
+    {
+        return 1;
+    }
+    if (Intensity < Spectrum->IntensityCutoffHigh)
+    {
+        return 2;
+    }
+    return 3;
+}
+
+FILE* g_TrainFile2;
+FILE* g_TrainFile3;
+
+// Callback for trining PRM scorer in peptide context.
+void TrainPepPRMCallback(SpectrumNode* Node, int Charge, int ParentMass, Peptide* Annotation)
+{
+    FILE* OutputFile;
+    int AminoIndex;
+    int Length;
+    int FeatureIndex;
+    MSSpectrum* Spectrum;
+
+    Spectrum = Node->Spectrum;
+    Length = strlen(Annotation->Bases);
+    WindowFilterPeaks(Spectrum, 0, 0);
+    IntensityRankPeaks(Spectrum);
+    // Use the charge+PM oracle:
+    Node->Tweaks[0].Charge = Charge;
+    Node->Tweaks[0].ParentMass = ParentMass;
+    
+    PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+    //SpectrumComputeBinnedIntensities(Node);
+    if (Charge < 3)
+    {
+        OutputFile = g_TrainFile2;
+    }
+    else
+    {
+        OutputFile = g_TrainFile3;
+    }
+    GetCutFeatures(Node->Spectrum, Node->Tweaks, Annotation, NULL);
+
+    for (AminoIndex = 0; AminoIndex < Length; AminoIndex++)
+    {
+        for (FeatureIndex = 0; FeatureIndex < SIMax; FeatureIndex++)
+        {
+            fprintf(OutputFile, "%d\t", g_CutFeatures[AminoIndex*CUT_FEATURE_COUNT + FeatureIndex]);
+        }
+        fprintf(OutputFile, "\n");
+    }
+    fflush(OutputFile);
+}
+
+void TrainPepPRM(char* OracleFile, char* OracleDir)
+{
+    g_TrainFile2 = fopen("TrainingFiles\\PEPPRM2.txt", "w");
+    g_TrainFile3 = fopen("TrainingFiles\\PEPPRM3.txt", "w");
+    TrainOnOracleFile(OracleFile, OracleDir, TrainPepPRMCallback);
+}
+
+void ScorpionSetPRMScores(MSSpectrum* Spectrum, SpectrumTweak* Tweak)
+{
+    BayesianModel* Model;
+    int PRM;
+    float fScore;
+    //
+    // Ensure models are loaded:
+    if (!BNCharge2TaggingBN)
+    {
+        InitBayesianModels();
+    }
+    Tweak->PRMScoreMax = Tweak->ParentMass;
+    if (Spectrum->Graph)
+    {
+        Tweak->PRMScoreMax = max(Tweak->PRMScoreMax, Spectrum->Graph->LastNode->Mass);
+    }
+    Tweak->PRMScoreMax = PRM_ARRAY_SLACK + (Tweak->PRMScoreMax / PRM_BIN_SIZE);
+    SafeFree(Tweak->PRMScores);
+    Tweak->PRMScores = (int*)calloc(Tweak->PRMScoreMax + 5, sizeof(int)); // extra slack in alloc
+    if (Tweak->Charge > 2)
+    {
+        Model = BNCharge3TaggingBN;
+    }
+    else
+    {
+        Model = BNCharge2TaggingBN;
+    }
+    for (PRM = 0; PRM < Tweak->PRMScoreMax; PRM++)
+    {
+        fScore = GetPRMFeatures(Spectrum, Tweak, Model, PRM * PRM_BIN_SIZE, 0);
+        Tweak->PRMScores[PRM] = (int)(fScore * 1000);
+    }
+}
+
+void FinishPRMTestRecord(char* RememberFileName, int* Scores, int MatchCount, int* RankHistogram, char* CandidateAnnotations)
+{
+    int TrueScore;
+    int ScoreIndex;
+    int BestScore = -9999;
+    int BestScoreIndex = 0;
+    int HistogramPoint = 0;
+    //
+    // Find the best score:
+    for (ScoreIndex = 0; ScoreIndex < MatchCount; ScoreIndex++)
+    {
+        if (Scores[ScoreIndex] > BestScore)
+        {
+            BestScore = Scores[ScoreIndex];
+            BestScoreIndex = ScoreIndex;
+        }
+    }
+    TrueScore = Scores[0];
+
+    qsort(Scores, MatchCount, sizeof(int), (QSortCompare)CompareInts);
+    for (ScoreIndex = 0; ScoreIndex < MatchCount; ScoreIndex++)
+    {
+        if (Scores[ScoreIndex] <= TrueScore)
+        {
+            // Found it!
+            RankHistogram[ScoreIndex] += 1;
+            HistogramPoint = ScoreIndex;
+            break;
+        }
+    }
+    // Verbose output:
+    printf("%s\t%s\t%s\t%d\t%d\t%d\n", RememberFileName, CandidateAnnotations, CandidateAnnotations + 128*BestScoreIndex,
+        BestScore, TrueScore, HistogramPoint);
+
+}
+
+
+// TestPepPRMCallback:
+// Print the minimum, maximum, and average score of cut-point scores for this annotation.
+// The primary goal here is to evaluate whether minor changes to the PepPRM scoring
+// model, such as changing the intensity cutoffs or adding new nodes and edges,
+// improve the model's effectiveness.
+// This function also serves as a 'sanity check' that true matches are getting
+// reasonably good PepPRM scores.
+void TestPepPRMCallback(SpectrumNode* Node, int Charge, int ParentMass, Peptide* Annotation)
+{
+    float MinPRMScore = 9999;
+    float MaxPRMScore = -9999;
+    int AminoIndex;
+    int PRMCount;
+    int PRM;
+    int Len;
+    float Score;
+    float TotalScore;
+    BayesianModel* Model;
+    ////////////////////////////////////////////////////////////////////////
+    // Main
+    Node->Tweaks[0].ParentMass = Annotation->ParentMass;
+    Node->Tweaks[0].Charge = Charge;
+    Node->Spectrum->ParentMass = Annotation->ParentMass;
+    Node->Spectrum->Charge = Charge;
+    WindowFilterPeaks(Node->Spectrum, 0, 0);
+    IntensityRankPeaks(Node->Spectrum);
+    PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+    //SpectrumComputeBinnedIntensities(Node);
+    //SpectrumComputeNoiseDistributions(Node);
+    if (Charge > 2)
+    {
+        Model = BNCharge3ScoringBN;
+    }
+    else
+    {
+        Model = BNCharge2ScoringBN;
+    }
+    PRMCount = 0;
+    PRM = 0;
+    TotalScore = 0;
+    Len = strlen(Annotation->Bases);
+    GetCutFeatures(Node->Spectrum, Node->Tweaks, Annotation, Model);
+    for (AminoIndex = 0; AminoIndex < Len; AminoIndex++)
+    {
+        Score = g_CutScores[AminoIndex];
+        TotalScore += Score;
+        MinPRMScore = min(MinPRMScore, Score);
+        MaxPRMScore = max(MaxPRMScore, Score);
+        PRMCount++;
+    }
+    Score = TotalScore / PRMCount;
+    printf("%s\t%s\t%.2f\t%.2f\t%.2f\t\n", Node->InputFile->FileName, Annotation->Bases, Score, MinPRMScore, MaxPRMScore);
+}
+
+void TestPRMQuickScoringCallback(SpectrumNode* Node, int Charge, int ParentMass, Peptide* Annotation)
+{
+    static int* Scores;
+    static int MatchCount;
+    static char* CurrentFile;
+    static int* RankHistogram;
+    static int RowsProcessed;
+    int Score;
+    int PRM;
+    int AminoIndex;
+    int ModIndex;
+    int Len;
+    BayesianModel* Model;
+    BayesianModel* PepPRMModel;
+    int Cumulative;
+    int TotalPeptides;
+    int RankIndex;
+    int PRMCount;
+    static char CandidateAnnotations[512*128];
+    static char RememberFileName[1024];
+
+    // If Node is null, then we've been called in initialize / cleanup mode:
+    if (!Node)
+    {
+        if (!Charge)
+        {
+            CurrentFile = (char*)calloc(256, sizeof(char));
+            Scores = (int*)calloc(512, sizeof(int));
+            RankHistogram = (int*)calloc(512, sizeof(int));
+            RowsProcessed = 0;
+            return;
+        }
+        // Finish the current peptide, if any:
+        if (*CurrentFile)
+        {
+            FinishPRMTestRecord(RememberFileName, Scores, MatchCount, RankHistogram, CandidateAnnotations);
+        }
+        // Now report:
+        printf("Histogram of PRM quick score pack positions:\n");
+        TotalPeptides = 0;
+        Cumulative = 0;
+        for (RankIndex = 0; RankIndex< 512; RankIndex++)
+        {
+            TotalPeptides += RankHistogram[RankIndex];
+        }
+        for (RankIndex = 0; RankIndex< 512; RankIndex++)
+        {
+            Cumulative += RankHistogram[RankIndex];
+            printf("%d\t%d\t%d\t%.2f\t%.2f\t\n", RankIndex, RankHistogram[RankIndex], Cumulative,
+                RankHistogram[RankIndex] / (float)TotalPeptides, Cumulative / (float)TotalPeptides);
+        }
+        // Lastly, free memory:
+        SafeFree(Scores);
+        SafeFree(RankHistogram);
+        SafeFree(CurrentFile);
+        return;
+    }
+    ////////////////////////////////////////////////////////////////////////
+    // Main
+    Node->Tweaks[0].ParentMass = Annotation->ParentMass;
+    Node->Tweaks[0].Charge = Charge;
+    Node->Spectrum->ParentMass = Annotation->ParentMass;
+    Node->Spectrum->Charge = Charge;
+    WindowFilterPeaks(Node->Spectrum, 0, 0);
+    IntensityRankPeaks(Node->Spectrum);
+    PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+    //SpectrumComputeBinnedIntensities(Node);
+    //SpectrumComputeNoiseDistributions(Node);
+    // OLD PRM SCORING: 
+    //SetPRMScores(Node->Spectrum); 
+    RowsProcessed++;
+    if (strcmp(CurrentFile, Node->InputFile->FileName))
+    {
+        if (MatchCount)
+        {
+            FinishPRMTestRecord(RememberFileName, Scores, MatchCount, RankHistogram, CandidateAnnotations);
+        }
+        MatchCount = 0;
+        strcpy(CurrentFile, Node->InputFile->FileName);
+    }
+    sprintf(RememberFileName, Node->InputFile->FileName);
+    // Compute score for these PRM values:
+    if (Charge > 2)
+    {
+        PepPRMModel = BNCharge3ScoringBN;
+        Model = BNCharge3TaggingBN;
+    }
+    else
+    {
+        PepPRMModel = BNCharge2ScoringBN;
+        Model = BNCharge2TaggingBN;
+    }
+    Len = strlen(Annotation->Bases);
+    PRM = 0;
+    Score = 0;
+    PRMCount = 0;
+    
+    // Verify that using flanking aminos and such really does improve things:
+    GetCutFeatures(Node->Spectrum, Node->Tweaks, Annotation, PepPRMModel);
+
+    for (AminoIndex = 0; AminoIndex < Len; AminoIndex++)
+    {
+        PRM += PeptideMass[Annotation->Bases[AminoIndex]];
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (Annotation->AminoIndex[ModIndex] == AminoIndex && Annotation->ModType[ModIndex])
+            {
+                PRM += Annotation->ModType[ModIndex]->RealDelta;
+            }
+        }
+        PRMCount++;
+
+
+        Score += (int)(1000 * g_CutScores[AminoIndex]); 
+    }
+    Score = Score / PRMCount;
+    // Cheese to prevent running off the edge of the array:
+    if (MatchCount < 512)
+    {
+        Scores[MatchCount] = Score;
+        strcpy(CandidateAnnotations + 128 * MatchCount, Annotation->Bases);
+        MatchCount++;
+    }
+}
+
+void TestPRMQuickScoring(char* OracleFile, char* OracleDir)
+{
+    InitBayesianModels();
+    TestPRMQuickScoringCallback(NULL, 0, 0, NULL); // initialization
+    TrainOnOracleFile(OracleFile, OracleDir, TestPRMQuickScoringCallback);
+    TestPRMQuickScoringCallback(NULL, 1, 0, NULL); // completion
+}
+
+void TestPepPRM(char* OracleFile, char* OracleDir)
+{
+    InitBayesianModels();
+    TrainOnOracleFile(OracleFile, OracleDir, TestPepPRMCallback);
+}
+
+// TestLDACallback:
+void TestLDACallback(SpectrumNode* Node, int Charge, int ParentMass, Peptide* Annotation)
+{
+    BayesianModel* Model;
+    // 
+    Node->Tweaks[0].ParentMass = Annotation->ParentMass;
+    Node->Tweaks[0].Charge = Charge;
+    Annotation->Tweak = Node->Tweaks;
+    Node->Spectrum->ParentMass = Annotation->ParentMass;
+    Node->Spectrum->Charge = Charge;
+    WindowFilterPeaks(Node->Spectrum, 0, 0);
+    IntensityRankPeaks(Node->Spectrum);
+    PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+    //SpectrumComputeBinnedIntensities(Node);
+    //SpectrumComputeNoiseDistributions(Node);
+    GlobalOptions->DigestType = DIGEST_TYPE_TRYPSIN;
+    if (Charge > 2)
+    {
+        Model = BNCharge3ScoringBN;
+    }
+    else
+    {
+        Model = BNCharge2ScoringBN;
+    }
+    ScorpionSetPRMScores(Node->Spectrum, Node->Tweaks); 
+
+}
+
+void TestLDA(char* OracleFile, char* OracleDir)
+{
+    InitBayesianModels();
+    TrainOnOracleFile(OracleFile, OracleDir, TestLDACallback);
+}
+
+// For debug output: Return a description of an ion type code.
+char* GetScorpIonName(int IonType)
+{
+    switch (IonType)
+    {
+    case IonY:
+        return "y";
+    case IonB:
+        return "b";
+    case IonYI:
+        return "yi";
+    case IonBI:
+        return "bi";
+    case IonY2:
+        return "y2";
+    case IonB2:
+        return "b2";
+    case IonYH2O:
+        return "y-h2o";
+    case IonA:
+        return "a";
+    case IonBH2O:
+        return "b-h2o";
+    case IonYNH3:
+        return "y-nh3";
+    case IonBNH3:
+        return "b-nh3";
+    default:
+        return "";
+    }
+}
diff --git a/Scorpion.h b/Scorpion.h
new file mode 100644
index 0000000..e530d7b
--- /dev/null
+++ b/Scorpion.h
@@ -0,0 +1,108 @@
+//Title:          Scorpion.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef SCORPION_H
+#define SCORPION_H
+
+// Scorpion - Ion-based scoring of mass spectra.  Compute various features 
+// for use by the match-scoring SVM.
+
+#include "Inspect.h"
+#include "Trie.h"
+#include "BN.h"
+
+#define CUT_FEATURE_COUNT 32
+
+extern int g_CutFeatures[];
+
+// Features used in scoring of cut points
+typedef enum ScorpIons
+{
+    SIDynamicRange = 0,
+    IonY,
+    IonB,
+    IonYI,
+    IonBI,
+    IonY2,
+    IonB2,
+    IonYH2O,
+    IonA,
+    IonBH2O,
+    IonYNH3,
+    IonBNH3,
+    SICharge,
+    SIFlankB, // Flanking amino acids that affect N-terminal fragments
+    SIFlankY, // Flanking amino acids that affect C-terminal fragments
+    SISector,
+    //SIBasePrefix,
+    //SIAcidPrefix,
+    //SIBaseSuffix,
+    //SIAcidSuffix,
+    //SIFlankLeft,
+    //SIFlankRight,
+    SITestA,
+    SITestB,
+    SITestC,
+    SITestD,
+    SITestE,
+    SITestF,
+    SITestG,
+    SITestH,
+    SITestI,
+    SITestJ,
+    SIMax,
+    IonParentLoss
+} ScorpIons;
+
+typedef enum CutFeature
+{
+    CFDynamic,
+    CFCharge,
+    CFFlank,
+    CFSector,
+    CFBasic,
+    CFAcidic,
+    CFY,
+    CF
+} CutFeature;
+
+void GetCutFeatures(MSSpectrum* Spectrum, SpectrumTweak* Tweak, Peptide* Match, BayesianModel* Model);
+void ScorpionSetPRMScores(MSSpectrum* Spectrum, SpectrumTweak* Tweak);
+void TestPRMQuickScoring(char* OracleFile, char* OracleDir);
+float GetExplainedIntensityPercent(MSSpectrum* Spectrum, int PeakCount, int BYOnly);
+float GetExplainedPeakPercent(MSSpectrum* Spectrum, int PeakCount, int BYOnly);
+int GetPeptideParentMass(Peptide* Match);
+void TrainPepPRM(char* OracleFile, char* OracleDir);
+void TestLDA(char* OracleFile, char* OracleDir);
+void TestPepPRM(char* OracleFile, char* OracleDir);
+char* GetScorpIonName(int IonType);
+#endif // SCORPION_H
diff --git a/SelectProteins.py b/SelectProteins.py
new file mode 100644
index 0000000..ab30cb2
--- /dev/null
+++ b/SelectProteins.py
@@ -0,0 +1,397 @@
+#Title:          SelectProteins.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Helper class for FDRUtils.py and FDR.py:
+Given an f-score cutoff, select a parsimonious collection of proteins which
+account for most / all of the annotations.
+"""
+import os
+import sys
+import traceback
+import struct
+import ResultsParser
+from Utils import *
+Initialize()
+
+class ProteinSelector(ResultsParser.ResultsParser):
+    def __init__(self):
+        self.PeptideDict = {} # aminos -> location list
+        self.ProteinPeptideCounts = {}
+        self.ProteinSpectrumCounts = {}
+        #self.SharedProteinPeptides = {}
+        #self.SharedProteinSpectra = {}
+        self.PeptideSpectrumCounts = {}
+        self.ProteinPeptides = {} # Protein -> List of aminos
+        self.ProteinNames = {}
+        self.ProteinSequences = {}
+        self.MQScoreWeight = 0.3
+        self.DeltaScoreWeight = 1.5
+        self.MinimumPeptideLength = 7
+        self.BestScoresByPeptide = {}
+        self.PValueCutoff = None
+        self.MaxFileLines = None
+        # if RetainRepresentativeCount is set, then we remember the
+        # best n spectra for a particular annotation in the dictionary
+        # self.BestRepresentatives
+        self.RetainRepresentativeCount = None
+        self.BestRepresentatives = {}
+        self.AnnotationSpectrumCounts = {}
+        self.FScoreCutoff2 = None
+        self.FScoreCutoff3 = None
+        self.Columns = ResultsParser.Columns()
+        ResultsParser.ResultsParser.__init__(self)
+
+    def FindPeptideLocations(self, Aminos):
+        PrevPos = -1
+        
+        LocationList = []
+        while (1):
+            Pos = self.DB.find(Aminos, PrevPos + 1)
+            if Pos == -1:
+                break
+            
+            # Which protein does Pos lie in?
+            LowIndex = 0
+            HighIndex = len(self.ProteinPos) - 1
+            # Pos >= ProteinPos[LowIndex] and Pos < ProteinPos[HighIndex]
+            # Special case - last protein:
+            
+            if Pos >= self.ProteinPos[HighIndex]:
+                ProteinID = HighIndex
+                ResidueNumber = Pos - self.ProteinPos[HighIndex]
+               
+            else:
+                while (1):
+                    if LowIndex+1==HighIndex:
+                        ProteinID = LowIndex
+                        ResidueNumber = Pos - self.ProteinPos[LowIndex]
+                        break
+                    MidIndex = (LowIndex + HighIndex) / 2
+                    if Pos >= self.ProteinPos[MidIndex]:
+                        LowIndex = MidIndex
+                    else:
+                        HighIndex = MidIndex
+            LocationList.append((ProteinID, ResidueNumber))
+            PrevPos = Pos
+        return LocationList
+    def OldFindPeptideLocations(self, Aminos):
+        LocationList = []
+        #print "Find locations for %s..."%Aminos
+        for (ID, Sequence) in self.ProteinSequences.items():
+            Pos = Sequence.find(Aminos)
+            if Pos != -1:
+                LocationList.append((ID, Pos))
+                #print "Found at pos %s in %s"%(Pos, ID)
+        if len(LocationList) == 0:
+            print "*** WARNING: Peptide '%s' not found in the database."%Aminos
+        return LocationList
+    def LoadDB(self, DBPath):
+        DBFile = open(DBPath, "rb")
+        self.DB = DBFile.read()
+        DBFile.close()
+        IndexPath = os.path.splitext(DBPath)[0] + ".index"
+        IndexFile = open(IndexPath, "rb")
+        BlockSize = struct.calcsize("<qi80s")
+        ID = 0
+        PrevID = None
+        self.ProteinPos = []
+        while (1):
+            Block = IndexFile.read(BlockSize)
+            if not Block:
+                break
+            Info = struct.unpack("<qi80s", Block)
+            Name = Info[2]
+            NullPos = Name.find("\0")
+            if NullPos !=- 1:
+                Name = Name[:NullPos]
+            self.ProteinNames[ID]= Name
+            StartPos = Info[1]
+            self.ProteinPos.append(StartPos)
+            if PrevID != None:
+                self.ProteinSequences[PrevID] = self.DB[self.ProteinPos[PrevID]:StartPos - 1]
+            PrevID = ID
+            ID += 1
+        self.ProteinSequences[PrevID] = self.DB[self.ProteinPos[PrevID]:]
+    def LoadMultipleDB(self, DBPathList):
+        """" Given a list of DB pathnames, load all the corresponding DB """
+        ID = 0
+        self.DB = ""
+        self.ProteinPos = []
+        for DBPath in DBPathList:
+            print "loading %s"%DBPath
+            DBFile = open(DBPath, "rb")
+            OldDB = self.DB
+            self.DB += DBFile.read()    # concatenate all DBs sequentially
+            DBFile.close()
+            IndexPath = os.path.splitext(DBPath)[0] + ".index"
+            IndexFile = open(IndexPath, "rb")
+            BlockSize = struct.calcsize("<qi80s")
+            PrevID = None
+            while (1):
+                Block = IndexFile.read(BlockSize)
+                if not Block:
+                    break
+                Info = struct.unpack("<qi80s", Block)
+                Name = Info[2]
+                NullPos = Name.find("\0")
+                if NullPos !=- 1:
+                    Name = Name[:NullPos]
+                self.ProteinNames[ID]= Name
+                StartPos = Info[1] + len(OldDB) # adjust StartPos for adding a new DB                
+                self.ProteinPos.append(StartPos)
+                if PrevID != None:
+                    self.ProteinSequences[PrevID] = self.DB[self.ProteinPos[PrevID]:StartPos - 1]
+                    
+                PrevID = ID
+                ID += 1
+            self.ProteinSequences[PrevID] = self.DB[self.ProteinPos[PrevID]:]
+            
+    def OldLoadDB(self, DBPath):
+        """
+        Load the database, popluating self.ProteinSequences
+        """
+        print "LoadDB(%s)"%DBPath
+        IndexPath = os.path.splitext(DBPath)[0] + ".index"
+        IndexFile = open(IndexPath, "rb")
+        DBFile = open(DBPath, "rb")
+        BlockSize = struct.calcsize("<qi80s")
+        PrevName = None
+        PrevID = None
+        PrevStartPos = None
+        ID = 0
+        while (1):
+            Block = IndexFile.read(BlockSize)
+            if not Block:
+                break
+            Info = struct.unpack("<qi80s", Block)
+            Name = Info[2]
+            NullPos = Name.find("\0")
+            if NullPos !=- 1:
+                Name = Name[:NullPos]
+            StartPos = Info[1]
+            self.ProteinNames[ID] = Name
+            if PrevName != None:
+                DBFile.seek(PrevStartPos)
+                Sequence = DBFile.read(StartPos - PrevStartPos)
+                Sequence = Sequence.replace("*", "")
+                self.ProteinSequences[PrevID] = Sequence
+            PrevName = Name
+            PrevID = ID
+            PrevStartPos = StartPos
+            ID += 1
+        if PrevName != None:
+            DBFile.seek(StartPos)
+            Sequence = DBFile.read()
+            self.ProteinSequences[PrevID] = Sequence
+            #self.ProteinNames[PrevID] = Name
+
+        DBFile.close()
+        IndexFile.close()
+    def ChooseProteins(self):
+        """
+        Iteratively select proteins which account for all the peptides.
+        """
+        self.SelectedProteins = {} # Protein -> (Peptides, Spectra)
+        self.PeptideProteins = {} # Peptide -> final selection of protein
+        print "\n\n\n"
+        print "CHOOSE PROTEINS:"
+        for (Peptide, SpectrumCount) in self.PeptideSpectrumCounts.items():
+            for (ProteinID, Pos) in self.PeptideDict[Peptide]:
+                self.ProteinSpectrumCounts[ProteinID] = self.ProteinSpectrumCounts.get(ProteinID, 0) + SpectrumCount
+
+        print "Loaded %s peptides and %s proteins"%(len(self.PeptideSpectrumCounts.keys()), len(self.ProteinSpectrumCounts.keys()))
+        while (1):
+            BestCandidate = None
+            BestScore = None
+            for Protein in self.ProteinPeptideCounts.keys():
+                if self.SelectedProteins.has_key(Protein):
+                    continue
+                PeptideCount = self.ProteinPeptideCounts[Protein]
+                SpectrumCount = self.ProteinSpectrumCounts.get(Protein, 0)
+                Score = (PeptideCount, SpectrumCount)
+                #print Protein, Score
+                if Score > BestScore or (Score == BestScore and self.ProteinNames[Protein] < self.ProteinNames[BestCandidate]):
+                    BestScore = Score
+                    BestCandidate = Protein
+                    #print "New Best %s, score %s"%(BestCandidate,BestScore)
+            if not BestScore:
+                break
+            (PeptideCount, SpectrumCount) = BestScore
+            if PeptideCount == 0:
+                break
+            #%%%
+            print "Accept protein %s (%s)\n  Gets %s peptides, %s spectra"%(BestCandidate, self.ProteinNames[BestCandidate], PeptideCount, SpectrumCount)
+            
+            self.SelectedProteins[BestCandidate] = BestScore
+            # Lay claim to all the (not-yet-claimed) peptides:
+            for Peptide in self.ProteinPeptides[BestCandidate]:
+                #print Peptide
+                if not self.PeptideProteins.has_key(Peptide):
+                    
+                    self.PeptideProteins[Peptide] = BestCandidate
+                    # Other proteins (if not already accepted) lose a peptide, and some spectra:
+                    for (OtherProtein, Pos) in self.PeptideDict[Peptide]:
+                        if self.SelectedProteins.has_key(OtherProtein):
+                            continue
+                        
+                        self.ProteinPeptideCounts[OtherProtein] -= 1
+                        self.ProteinSpectrumCounts[OtherProtein] = self.ProteinSpectrumCounts.get(OtherProtein, 0) - self.PeptideSpectrumCounts[Peptide]
+        # Sanity check - the selected proteins have peptides, the unselected proteins have 0
+        for Protein in self.ProteinPeptideCounts.keys():
+            ProteinName = self.ProteinNames[Protein]
+            PeptideCount = self.ProteinPeptideCounts[Protein]
+            SpectrumCount = self.ProteinSpectrumCounts.get(Protein, 0)
+            if self.SelectedProteins.has_key(Protein) and PeptideCount <= 0:
+                print "** Warning: Selected protein %s (%s) has %s peptides!"%(Protein, ProteinName, PeptideCount)
+            if not self.SelectedProteins.has_key(Protein) and PeptideCount != 0:
+                print "** Warning: Unelected protein %s (%s) has %s peptides!"%(Protein, ProteinName, PeptideCount)
+    def ParseAnnotations(self, FileName):
+        """
+        Parse annotations, remembering all protein locations for each peptide.
+        """
+        print "Parse %s..."%FileName
+        File = open(FileName, "rb")
+        OldSpectrum = None
+        Stub = os.path.split(FileName)[1]
+        LineNumber = 0
+        for FileLine in File:
+            LineNumber += 1
+            if LineNumber % 100 == 0:
+                print "%s %s..."%(Stub, LineNumber)
+                if self.MaxFileLines != None and LineNumber >= self.MaxFileLines:
+                    return # Quick-parse, for debugging only!
+            
+            
+            if FileLine[0] == "#":
+                self.Columns.initializeHeaders(FileLine)
+                continue
+            if not FileLine.strip():
+                continue                        
+            Bits = FileLine.split("\t")
+            try:
+                Spectrum = (Bits[self.Columns.getIndex("SpectrumFile")], Bits[self.Columns.getIndex("Scan#")])
+            except:
+                continue # header line
+            if Spectrum == OldSpectrum:
+                continue
+            OldSpectrum = Spectrum
+            try:
+                MQScore = float(Bits[self.Columns.getIndex("MQScore")])
+                DeltaScore = float(Bits[self.Columns.getIndex("DeltaScoreOther")])
+                Charge = int(Bits[self.Columns.getIndex("Charge")])
+            except:
+                traceback.print_exc()
+                print Bits
+                continue
+            # Apply a threshold: EITHER f-score cutoff (default) OR p-value cutoff
+            
+            if self.PValueCutoff != None:
+                
+                try:
+                    PValue = float(Bits[self.Columns.getIndex("InspectFDR")])
+                except:
+                    traceback.print_exc()
+                    print Bits
+                    continue
+                PeptideScore = (-PValue, MQScore)
+                if PValue > self.PValueCutoff:
+                    
+                    continue
+            else:
+                if Charge < 3:
+                    WeightedScore = self.MQScoreWeight * MQScore + self.DeltaScoreWeight * (DeltaScore / self.MeanDeltaScore2)
+                    if WeightedScore < self.FScoreCutoff2:
+                        
+                        continue
+                else:
+                    WeightedScore = self.MQScoreWeight * MQScore + self.DeltaScoreWeight * (DeltaScore / self.MeanDeltaScore3)
+                    if WeightedScore < self.FScoreCutoff3:
+                        
+                        continue
+                PeptideScore = WeightedScore
+                
+            
+            try:
+                Peptide = GetPeptideFromModdedName(Bits[self.Columns.getIndex("Annotation")])
+            except:
+                
+                continue
+            if len(Peptide.Aminos) < self.MinimumPeptideLength:
+                
+                continue
+            # Remember this peptide:
+            
+            if not self.PeptideDict.get(Peptide.Aminos):
+                
+                # It's a new peptide!  Figure out where it falls in the database:
+                LocationList = self.FindPeptideLocations(Peptide.Aminos)
+                for (Protein, Pos) in LocationList:
+                    if not self.ProteinPeptides.has_key(Protein):
+                        self.ProteinPeptides[Protein] = []
+                    #print "Adding peptide %s to protein %s '%s':"%(Peptide.Aminos,Protein,self.ProteinNames[Protein])
+                    self.ProteinPeptides[Protein].append(Peptide.Aminos)
+               
+                self.PeptideDict[Peptide.Aminos] = LocationList
+                for (ProteinNumber, Dummy) in LocationList:
+                    self.ProteinPeptideCounts[ProteinNumber] = self.ProteinPeptideCounts.get(ProteinNumber, 0) + 1
+            else:
+                # We've seen this peptide before:
+                LocationList = self.PeptideDict[Peptide.Aminos]
+            OldScore = self.BestScoresByPeptide.get(Peptide.Aminos, -9999)
+            self.BestScoresByPeptide[Peptide.Aminos] = max(PeptideScore, OldScore)
+            self.PeptideSpectrumCounts[Peptide.Aminos] = self.PeptideSpectrumCounts.get(Peptide.Aminos, 0) + 1
+            ##############################################################
+            # Populate self.BestRepresentative, if requested:
+            if self.RetainRepresentativeCount:
+                Peptide.MQScore = MQScore
+                Peptide.PValue = PValue
+                Peptide.SpectrumFilePath = Bits[0]
+                Peptide.ScanNumber = int(Bits[1])
+                Peptide.SpectrumFilePos = int(Bits[self.Columns.getIndex("SpecFilePos")])
+                Key = Peptide.GetFullModdedName()
+                RepresentativeList = self.BestRepresentatives.get(Key, [])
+                Tuple = (PeptideScore, Peptide)
+                RepresentativeList.append(Tuple)
+                RepresentativeList.sort()
+                self.BestRepresentatives[Key] = RepresentativeList[-self.RetainRepresentativeCount:]
+                self.AnnotationSpectrumCounts[Key] = self.AnnotationSpectrumCounts.get(Key, 0) + 1
+                
+
+if __name__ == "__main__":
+    # Test
+    Bob = ProteinSelector()
+    Bob.LoadDB("database\DictyCommon.Aug28.FS2.trie")
+    print Bob.FindPeptideLocations("GTVESEMAEQDSLLNKLNK")
+    print Bob.FindPeptideLocations("TSEGDFTLLLGQIVDNQIGDLNKSG")
+    print Bob.FindPeptideLocations("YAVFAPGLADVVIEVVAK")
diff --git a/ShuffleDB.py b/ShuffleDB.py
new file mode 100644
index 0000000..3ec586d
--- /dev/null
+++ b/ShuffleDB.py
@@ -0,0 +1,285 @@
+#Title:          ShuffleDB.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Shuffle all the records in a database.  Useful for generating a database of all bogus records,
+to get an idea of the rate at which matches of a given quality are generated when there's
+nothing real to match.  Or a database of half valid, half invalid records.
+"""
+import os
+import sys
+import string
+import getopt
+import struct
+import random
+
+UsageInfo = """
+ShuffleDB - Produce a 'red herring' database of erroneous peptide records.
+Options:
+ -r [Trie file name]: Path to input database
+ -w [FileName]: Path to output database
+ -s: If set, proteins will be REVERSED.  Default behavior is to SHUFFLE.
+ -b: If set, ONLY the scrambled proteins are written out.  Default
+     behavior is to write both forward and scrambled proteins.
+ -p: In shuffled mode, avoid repeating peptides of length 8 or more
+     in the shuffled database.  (Treat I and L as identical. 
+     Don't treat Q and K as identical)  Requires a little longer to run;
+     some "bad words" (repeated 8mers) will still be seen for repetitive
+     records.
+ -t: Number of shuffled copies to write out (defaults to 1)
+ 
+Example:
+ ShuffleDB.py -r database\Shewanella.trie -w database\ShewanellaHalf.trie -p
+"""
+
+##def EncodeQuad(Str):
+##    ValA = ord(Str[0]) - 65
+##    ValB = ord(Str[1]) - 65
+##    ValC = ord(Str[2]) - 65
+##    ValD = ord(Str[3]) - 65
+##    return ValA + 32*ValB + 32*32*ValC + 32*32*32*ValD
+##def EncodeOct(Str):
+##    return struct.pack("<ii", EncodeQuad(Str[:4]), EncodeQuad(Str[4:]))
+##def DecodeOct(Str):
+##    (ValA, ValB) = struct.unpack("<ii", Str)
+##    return DecodeQuad(ValA) + DecodeQuad(ValB)
+##def DecodeQuad(Value):
+##    ValA = Value % 32
+##    Value -= ValA
+##    Value /= 32
+##    ValB = Value % 32
+##    Value -= ValB
+##    Value /= 32
+##    ValC = Value % 32
+##    Value -= ValC
+##    Value /= 32
+##    ValD = Value
+##    print ValA, ValB, ValC, ValD
+##    return chr(ValA + 65) + chr(ValB + 65) + chr(ValC + 65) + chr(ValD + 65)
+
+class Shuffler:
+    MAX_SHUFFLE_ATTEMPTS = 100
+    PARANOIA_PEPTIDE_LENGTH = 8
+    def __init__(self):
+        self.ShuffleFlag = 1
+        self.WriteBothFlag = 1
+        self.TrieFileName = None
+        self.OutputFileName = None
+        self.BogusTimes = 1
+        self.BogusProteins = 0
+        self.ParanoiaFlag = 0
+        self.TotalBadWordCount = 0
+    def LoadProteinNames(self, IndexPath):
+        File = open(IndexPath, "rb")
+        self.ProteinNames = []
+        BlockSize = struct.calcsize("<qi80s")
+        while (1):
+            Block = File.read(BlockSize)
+            if not Block:
+                break
+            Tuple = struct.unpack("<qi80s", Block)
+            Name = Tuple[-1]
+            NullPos = Name.find('\0')
+            if NullPos != -1:
+                Name = Name[:NullPos]
+            self.ProteinNames.append(Name)
+        File.close()
+    def Main(self):
+        IndexPath = os.path.splitext(self.TrieFileName)[0] + ".index"
+        self.LoadProteinNames(IndexPath)
+        TrieFile = open(self.TrieFileName, "rb")
+        if self.ParanoiaFlag:
+            self.ForbiddenPeptides = {}
+            DB = TrieFile.read()
+            for Pos in range(len(DB) - self.PARANOIA_PEPTIDE_LENGTH):
+                if Pos % 10000 == 0:
+                    print "%s (%s)..."%(Pos, len(self.ForbiddenPeptides.keys()))
+                Peptide = DB[Pos:Pos + self.PARANOIA_PEPTIDE_LENGTH].replace("I", "L")
+                if Peptide.find("X")!=-1:
+                    # Peptides containing X need not be forbidden, because they will
+                    # never be matched!
+                    continue
+                self.ForbiddenPeptides[Peptide] = 1
+            TrieFile.seek(0)
+            print "(Note: %s peptide words are forbidden)"%len(self.ForbiddenPeptides.keys())
+        NewIndexPath = os.path.splitext(self.OutputFileName)[0] + ".index"
+        self.OutputTrieFile = open(self.OutputFileName, "wb")
+        self.OutputIndexFile = open(NewIndexPath, "wb")
+        Sequence = ""
+        ProteinIndex = 0
+        while (1):
+            Data = TrieFile.read(1024)
+            if not Data:
+                break
+            Sequence += Data
+            Pos = Sequence.find("*")
+            while (Pos != -1):
+                self.WriteProtein(Sequence[:Pos], ProteinIndex)
+                ProteinIndex += 1
+                Sequence = Sequence[Pos+1:]
+                Pos = Sequence.find("*")
+        if (Sequence):
+            self.WriteProtein(Sequence, ProteinIndex)
+            ProteinIndex += 1
+            #List = list(Sequence)
+            #List.reverse()
+            #Protein = string.join(List,"")
+            #ReversedTrieFile.write(Protein)
+            #ReversedTrieFile.write("*")
+        self.OutputTrieFile.close()
+        self.OutputIndexFile.close()
+        print "Wrote out %s proteins."%ProteinIndex
+        print "Wrote out %d bogus proteins."%self.BogusProteins
+        print "Total bad words:", self.TotalBadWordCount
+    def ShuffleProtein(self, Sequence):
+        """
+        Produce the invalid (shuffled) version of a protein.
+        """
+        Residues = list(Sequence)
+        if not self.ShuffleFlag:
+            Residues.reverse()
+            return string.join(Residues, "")
+        if not self.ParanoiaFlag:
+            random.shuffle(Residues)
+            return string.join(Residues, "")
+        # And now, the interesting case: We shall shuffle the protein, and we shall apply some
+        # heuristics along the way to minimize the number of shared k-mers.
+        BestBadWordCount = 9999
+        BestPeptideString = None
+        for AttemptIndex in range(10):
+            random.shuffle(Residues)
+            BadWordCount = 0
+            for Pos in range(len(Residues) - self.PARANOIA_PEPTIDE_LENGTH):
+                WordResidues = Residues[Pos:Pos + self.PARANOIA_PEPTIDE_LENGTH]
+                Word = string.join(WordResidues, "").replace("I", "L")
+                if self.ForbiddenPeptides.has_key(Word):
+                    # Damn, this shuffling shares a word!  Maybe we can re-shuffle this
+                    # word and solve the problem:
+                    FixedFlag = 0
+                    for WordShuffleIndex in range(10):
+                        random.shuffle(WordResidues)
+                        FixedWord = string.join(WordResidues, "").replace("I", "L")
+                        if self.ForbiddenPeptides.has_key(FixedWord):
+                            # The shuffled word is no good!
+                            continue
+                        # We shuffled a word, and in so doing, we changed the preceding
+                        # words.  Let's check whether they're now forbidden:
+                        BrokeOldWord = 0
+                        for StepsBack in range(1, self.PARANOIA_PEPTIDE_LENGTH):
+                            PrevPos = Pos - StepsBack
+                            if PrevPos < 0:
+                                break
+                            PrevWord = Residues[PrevPos:Pos]
+                            PrevWord.extend(WordResidues[:-StepsBack])
+                            PrevWord = string.join(PrevWord, "").replace("I", "L")
+                            if self.ForbiddenPeptides.has_key(PrevWord):
+                                BrokeOldWord = 1
+                                #print "Preceding word %s is '%s': FORBIDDEN!"%(StepsBack, PrevWord)
+                                break
+                            #print "Preceding word %s is '%s'"%(StepsBack, PrevWord)
+                        if not BrokeOldWord:
+                            FixedFlag = 1
+                            break
+                    if FixedFlag:
+                        # This word (and the previous words that overlap it) is now ok.
+                        Residues[Pos:Pos + self.PARANOIA_PEPTIDE_LENGTH] = WordResidues
+                    else:
+                        # We couldn't fix the word by shuffling it.  Increment the bad word count:
+                        BadWordCount += 1
+            if BadWordCount == 0:
+                #print "Protein '%s...' shuffled with no bad words"%(Sequence[:20])
+                return string.join(Residues, "")
+            if BadWordCount < BestBadWordCount:
+                BestBadWordCount = BadWordCount
+                BestPeptideString = string.join(Residues, "")
+        print "Protein '%s...' shuffled with %s bad words"%(Sequence[:20], BestBadWordCount)
+        self.TotalBadWordCount += BestBadWordCount
+        return BestPeptideString
+    def WriteProtein(self, Sequence, ProteinIndex):
+        """
+        Given a protein sequence, and protein index number (for looking up the name),
+        write a scrambled or reversed record to the output database.  (And write the
+        original, if the -b flag was specified)
+        """
+        for ShuffleIndex in range(self.BogusTimes):
+            ShuffledProtein = self.ShuffleProtein(Sequence)
+            OutputFilePos = self.OutputTrieFile.tell()
+            self.OutputTrieFile.write(ShuffledProtein)
+            self.OutputTrieFile.write("*")
+            if self.BogusTimes > 1:
+                ShuffledName = "XXX.%d.%s"%(ShuffleIndex, self.ProteinNames[ProteinIndex])
+            else:
+                ShuffledName = "XXX.%s"%self.ProteinNames[ProteinIndex]
+            Block = struct.pack("<qi80s", 0, OutputFilePos, ShuffledName)
+            self.OutputIndexFile.write(Block)
+            self.BogusProteins +=1
+        # If we're writing both the red herrings and the originals,
+        # then write the original protein as well now:
+        if self.WriteBothFlag:
+            OutputFilePos = self.OutputTrieFile.tell()
+            self.OutputTrieFile.write(Sequence)
+            self.OutputTrieFile.write("*")
+            Block = struct.pack("<qi80s", 0, OutputFilePos, self.ProteinNames[ProteinIndex])
+            self.OutputIndexFile.write(Block)
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "r:w:sbt:p")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-r":
+                self.TrieFileName = Value
+            elif Option == "-w":
+                self.OutputFileName = Value
+            elif Option == "-s":
+                self.ShuffleFlag = 0
+            elif Option == "-b":
+                self.WriteBothFlag = 0
+            elif Option == "-t":
+                self.BogusTimes = int(Value)
+            elif Option == "-p":
+                self.ParanoiaFlag = 1
+            else:
+                print "** Warning: Option %s not understood"%Option
+        
+if __name__ =="__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "* Warning: psyco not found"
+    App = Shuffler()
+    App.ParseCommandLine(sys.argv[1:])
+    if not App.TrieFileName or not App.OutputFileName:
+        print UsageInfo
+        sys.exit(-1)
+    App.Main()
diff --git a/SpectralSimilarity.py b/SpectralSimilarity.py
new file mode 100644
index 0000000..0010612
--- /dev/null
+++ b/SpectralSimilarity.py
@@ -0,0 +1,502 @@
+#Title:          SpectralSimilarity.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+This is an auxillary module for ComputePTMFeatures.py.  It measurs the similarity between
+two spectra.  The simplest case is a comparison of two spectra with the same annotation.
+We are also able to compare two spectra which differ by a PTM (provided the PTM position
+is known).  
+"""
+
+import MSSpectrum
+import Label
+import math
+from Utils import *
+Initialize()
+
+class SpectralSimilarity:
+    """
+    Container class that holds the spectra, and measures similarity
+    ASSUMPTION: Either AnnotationA and AnnotationB are the same,
+    or AnnotationA is modified and AnnotationB is NOT.
+    """
+    def __init__(self, SpectrumA, SpectrumB, AnnotationA, AnnotationB):
+        """
+        SpectrumA and SpectrumB are SpectrumClass objects from MSSpectrum.py
+        AnnotationA and AnnotationB are strings of the Inspect annotation of the
+        spectrum (if the annotation was R.THISPEPK.M  this method would get
+        passed "THISPEPK")
+        """
+        self.BinMultiplier = 1.0 # default
+        # Spectra can be either MSSpectrum objects or file paths:
+        if isinstance(SpectrumA, MSSpectrum.SpectrumClass):
+            self.SpectrumA = SpectrumA
+        else:
+            Spectrum = MSSpectrum.SpectrumClass()
+            Spectrum.ReadPeaks(SpectrumA)
+            self.SpectrumA = Spectrum
+            self.SpectrumA.FilterPeaks() 
+            self.SpectrumA.RankPeaksByIntensity()
+        if isinstance(SpectrumB, MSSpectrum.SpectrumClass):
+            self.SpectrumB = SpectrumB
+        else:
+            Spectrum = MSSpectrum.SpectrumClass()
+            Spectrum.ReadPeaks(SpectrumB)
+            self.SpectrumB = Spectrum
+            self.SpectrumB.FilterPeaks()
+            self.SpectrumB.RankPeaksByIntensity()            
+        # Annotations can be either a string, or a peptide object:
+        if isinstance(AnnotationA, PeptideClass):
+            self.PeptideA = AnnotationA
+        else:
+            self.PeptideA = GetPeptideFromModdedName(AnnotationA)
+        if isinstance(AnnotationB, PeptideClass):
+            self.PeptideB = AnnotationB
+        else:
+            self.PeptideB = GetPeptideFromModdedName(AnnotationB)
+    def ComputeSimilarity(self):
+        """
+        This method determines how similar two spectra are.  It can use a variety of methods,
+        and honestly this may turn into a big switch box
+        """
+        DPSimScore = self.DotProduct()
+        print "The Scaled Dot Product of these two is %f"%DPSimScore
+    def LabelPeaks(self, PeakTolerance = 0.5):
+        """
+        Should be called once, *if* the annotations differ, so that we can match
+        corresponding peaks which have different masses due to PTM attachment.
+        """
+        # Label the spectra so that I can know which peaks belong to what name eg. b7
+        Label.LabelSpectrum(self.SpectrumA, self.PeptideA, PeakTolerance)
+        Label.LabelSpectrum(self.SpectrumB, self.PeptideB, PeakTolerance)
+    def DotProductSignal(self, BinMultiplier = 1.0, MaxIntensityRank = 50, EnableShift = 1, VerboseFlag = 0, HashByRank = 0):
+        """
+        Variant of Dot Product, incorporating a correction factor introduced in [Parag and Mallick, 2006ish]
+        """
+        self.MaxIntensityRank = MaxIntensityRank
+        self.BinMultiplier = BinMultiplier
+        self.HashByRank = HashByRank
+        # set up hashes
+        HashA = {}
+        HashB = {}
+        # Populate HashA and HashB:
+        if EnableShift and len(self.PeptideA.Modifications) > 0:
+            self.HashPeaksWithShift(HashA, self.SpectrumA, self.PeptideA, VerboseFlag)
+        else:
+            self.HashPeaks(HashA, self.SpectrumA)
+        if EnableShift and len(self.PeptideB.Modifications) > 0:
+            self.HashPeaksWithShift(HashB, self.SpectrumB, self.PeptideB, VerboseFlag)
+        else:
+            self.HashPeaks(HashB, self.SpectrumB)
+        #Do Dot Product
+        MaxBins = max(HashA.keys())
+        MaxBins = max(MaxBins, max(HashB.keys()))
+        DotProduct = 0
+        SumSqA = 0
+        SumSqB = 0
+        TotalIntensityA = 0
+        TotalIntensityB = 0
+        for I in range(MaxBins):
+            A = HashA.get(I, 0)
+            B = HashB.get(I, 0)
+            TotalIntensityA += A
+            TotalIntensityB += B
+            if VerboseFlag and (A or B):
+                print "%s\t%s\t%s\t%s\t"%(I, A, B, A*B)
+            DotProduct += A * B
+            SumSqA += A * A
+            SumSqB += B * B
+        #print "Dot Product is %f"%DotProduct
+        #print "sqrt thing is %f"%math.sqrt(SumSqA * SumSqB)
+        OddsCollision = 1.0 / (max(self.SpectrumA.ParentMass, self.SpectrumB.ParentMass) * BinMultiplier)
+        DotProduct -= TotalIntensityA * TotalIntensityB * OddsCollision
+        return DotProduct / math.sqrt(SumSqA * SumSqB)
+    def DotProduct(self, BinMultiplier = 1.0, MaxIntensityRank = 50, EnableShift = 1, VerboseFlag = 0, HashByRank = 0):
+        """
+        This method measures similarity between spectra by calculating their dot product.
+        It is written to work on spectra might be PTM shifted versions of each other.
+        e.g. SAMMY and SAM+16MY.  If the peptide annotation has a PTM, then the peaks
+        are shifted back.
+        Variables:
+        Rank = indicates that you want to use a rank based dotproduct, instead of intensity based
+        """
+        self.MaxIntensityRank = MaxIntensityRank
+        self.BinMultiplier = BinMultiplier
+        self.HashByRank = HashByRank
+        # set up hashes
+        HashA = {}
+        HashB = {}
+        # Populate HashA and HashB:
+        if EnableShift and len(self.PeptideA.Modifications) > 0:
+            self.HashPeaksWithShift(HashA, self.SpectrumA, self.PeptideA, VerboseFlag)
+        else:
+            self.HashPeaks(HashA, self.SpectrumA)
+        if EnableShift and len(self.PeptideB.Modifications) > 0:
+            self.HashPeaksWithShift(HashB, self.SpectrumB, self.PeptideB, VerboseFlag)
+        else:
+            self.HashPeaks(HashB, self.SpectrumB)
+        #Do Dot Product
+        MaxBins = max(HashA.keys())
+        MaxBins = max(MaxBins, max(HashB.keys()))
+        DotProduct = 0
+        SumSqA = 0
+        SumSqB = 0
+        for I in range(MaxBins):
+            A = HashA.get(I, 0)
+            B = HashB.get(I, 0)
+            if VerboseFlag and (A or B):
+                print "%s\t%s\t%s\t%s\t%s\t"%(I, I/self.BinMultiplier, A, B, A*B)
+            DotProduct += A * B
+            SumSqA += A * A
+            SumSqB += B * B
+        #print "Dot Product is %f"%DotProduct
+        #print "sqrt thing is %f"%math.sqrt(SumSqA * SumSqB)
+        return DotProduct / math.sqrt(SumSqA * SumSqB)
+    def HashPeaksWithShift(self, Hash, Spectrum, Peptide, VerboseFlag = 0):
+        """
+        Takes a peptide, spectrum, and hashtable
+        and puts values into the hash so that it can be dotproducted (now there's a verb
+        from a noun!)
+        The caveat is that we shift peaks by the mass of the PTM.
+        WARNING: Currently written for only ONE PTM per peptide
+        """
+        ModIndices = Peptide.Modifications.keys()
+        ModList = Peptide.Modifications[ModIndices[0]]
+        FirstMod = ModList[0]
+        ModMass = FirstMod.Mass
+        ModIndex = ModIndices[0] + 1
+        PeptideLength = len(Peptide.Aminos)
+        for Peak in Spectrum.Peaks:
+            if self.MaxIntensityRank != None and Peak.IntensityRank > self.MaxIntensityRank:
+                continue
+            Bin = int(round(Peak.Mass * self.BinMultiplier)) # default
+            if Peak.IonType:
+                if Peak.IonType.Name in ("b", "b-h2o", "b-nh3", "b-h2o-h2o", "b-h2o-nh3", "a"):
+                    if Peak.PeptideIndex >= ModIndex:
+                        Bin = int(round((Peak.Mass - ModMass) * self.BinMultiplier))
+                        if VerboseFlag:
+                            print "Peak at %s is %s %s, shift left to %s"%(Peak.Mass, Peak.IonType.Name, Peak.PeptideIndex, Bin)
+                if Peak.IonType.Name in ("b2",):
+                    if Peak.PeptideIndex >= ModIndex:
+                        Bin = int(round((Peak.Mass - ModMass/2.0) * self.BinMultiplier))
+                        if VerboseFlag:
+                            print "Peak at %s is %s %s, shift halfleft to %s"%(Peak.Mass, Peak.IonType.Name, Peak.PeptideIndex, Bin)
+                if Peak.IonType.Name in ("y", "y-h2o", "y-nh3", "y-h2o-nh3", "y-h2o-h2o"):
+                    if (PeptideLength - Peak.PeptideIndex) < ModIndex:
+                        Bin = int(round((Peak.Mass - ModMass) * self.BinMultiplier))
+                        if VerboseFlag:
+                            print "Peak at %s is %s %s, shift right to %s"%(Peak.Mass, Peak.IonType.Name, Peak.PeptideIndex, Bin)
+                if Peak.IonType.Name in ("y2",):
+                    if (PeptideLength - Peak.PeptideIndex) < ModIndex:
+                        Bin = int(round((Peak.Mass - ModMass/2.0) * self.BinMultiplier))
+                        if VerboseFlag:
+                            print "Peak at %s is %s %s, shift halfright to %s"%(Peak.Mass, Peak.IonType.Name, Peak.PeptideIndex, Bin)
+            Value = Peak.Intensity
+            if self.HashByRank:
+                Value = 10.0 / (10 + Peak.IntensityRank)                        
+            Hash[Bin] = Hash.get(Bin, 0) + Value
+    def HashPeaks(self, Hash, Spectrum):
+        """
+        Hashes Peak intensities into integer bins
+        """
+        for Peak in Spectrum.Peaks:
+            if self.MaxIntensityRank != None and Peak.IntensityRank > self.MaxIntensityRank:
+                continue # only deal with the good peaks
+            Bin = int(round(Peak.Mass * self.BinMultiplier))
+            Value = Peak.Intensity
+            if self.HashByRank:
+                Value = 10.0 / (10 + Peak.IntensityRank)
+            Hash[Bin] = Hash.get(Bin, 0) + Value
+    def GetSharedPeakCount(self, PeakWeight, RankWeight, SkewMultiplier = 1.0,
+            PeakCountDivisor = 40, EnableShift = 1, VerboseFlag = 0):
+        """
+        Measure the shared peak count between two spectra.
+        Iterate over the top N peaks of spectrum A (where N = (ParentMass / 100)*5).
+        If the peak is present (modulo epsilon) as one of the top N peaks in spectrum B,
+        then receive score PeakWeight + RankWeight / IntensityRank.  The sum of these
+        scores is then scaled relative to the maximum attainable score.
+        """
+        SkewMultipliers = []
+        for X in range(5):
+            SkewMultipliers.append(SkewMultiplier ** X)
+        self.BinMultiplier = 1.0
+        SortedPeaksA = []
+        for Peak in self.SpectrumA.Peaks:
+            SortedPeaksA.append((Peak.Intensity, Peak))
+        SortedPeaksA.sort()
+        SortedPeaksA.reverse()
+        N = int(round(self.SpectrumA.ParentMass / float(PeakCountDivisor)))
+        #print "N = %s/%s = %s"%(self.SpectrumA.ParentMass, PeakCountDivisor, N)
+        self.SpectrumB.RankPeaksByIntensity()
+        # Populate HashedPeaks[Bin] = list of peaks in SpectrumB
+        # which fall into Bin.  Only peaks
+        # with rank <= N are hashed.
+        HashedPeaks = {}
+        PeptideLength = len(self.PeptideB.Aminos)
+        ModIndices = self.PeptideB.Modifications.keys()
+        if len(ModIndices):
+            ModIndex = ModIndices[0] + 1
+            ModMass = self.PeptideB.Modifications[ModIndices[0]][0].Mass
+        else:
+            ModIndex = None
+        for Peak in self.SpectrumB.Peaks:
+            if Peak.IntensityRank > N:
+                continue
+            Peak.ShiftedMass = Peak.Mass
+            if EnableShift and ModIndex != None and Peak.IonType:
+                if Peak.IonType.Name in ("b", "b-h2o", "b-nh3", "b-h2o-h2o", "b-h2o-nh3", "a"):
+                    if Peak.PeptideIndex >= ModIndex:
+                        Peak.ShiftedMass -= ModMass
+                if Peak.IonType.Name in ("b2",):
+                    if Peak.PeptideIndex >= ModIndex:
+                        Peak.ShiftedMass -= (ModMass / 2.0)
+                if Peak.IonType.Name in ("y", "y-h2o", "y-nh3", "y-h2o-nh3", "y-h2o-h2o"):
+                    if (PeptideLength - Peak.PeptideIndex) < ModIndex:
+                        Peak.ShiftedMass -= ModMass
+                if Peak.IonType.Name in ("y2",):
+                    if (PeptideLength - Peak.PeptideIndex) < ModIndex:
+                        Peak.ShiftedMass -= (ModMass / 2.0)
+            Bin = int(round(Peak.ShiftedMass * self.BinMultiplier))
+            if not HashedPeaks.has_key(Bin):
+                HashedPeaks[Bin] = []
+            HashedPeaks[Bin].append(Peak)
+##        ########################
+##        # Debug peak hashing:
+##        Keys = HashedPeaks.keys()
+##        Keys.sort()
+##        for Key in Keys:
+##            Str = "%s: "%Key
+##            for Peak in HashedPeaks[Key]:
+##                Str += "(#%d %.1f, %.2f)"%(Peak.IntensityRank, Peak.Mass, Peak.Intensity)
+##            print Str
+##        ########################        
+        OverallScore = 0
+        MaxScore = 0
+        PeptideLength = len(self.PeptideA.Aminos)
+        ModIndices = self.PeptideA.Modifications.keys()
+        if len(ModIndices):
+            ModIndex = ModIndices[0] + 1
+            ModMass = self.PeptideA.Modifications[ModIndices[0]][0].Mass
+        else:
+            ModIndex = None
+        for PeakIndex in range(min(N, len(SortedPeaksA))):
+            Peak = SortedPeaksA[PeakIndex][1]
+            Peak.ShiftedMass = Peak.Mass
+            if EnableShift and ModIndex != None and Peak.IonType:
+                if Peak.IonType.Name in ("b", "b-h2o", "b-nh3", "b-h2o-h2o", "b-h2o-nh3", "a"):
+                    if Peak.PeptideIndex >= ModIndex:
+                        Peak.ShiftedMass -= ModMass
+                if Peak.IonType.Name in ("b2",):
+                    if Peak.PeptideIndex >= ModIndex:
+                        Peak.ShiftedMass -= (ModMass / 2.0)
+                if Peak.IonType.Name in ("y", "y-h2o", "y-nh3", "y-h2o-nh3", "y-h2o-h2o"):
+                    if (PeptideLength - Peak.PeptideIndex) < ModIndex:
+                        Peak.ShiftedMass -= ModMass
+                if Peak.IonType.Name in ("y2",):
+                    if (PeptideLength - Peak.PeptideIndex) < ModIndex:
+                        Peak.ShiftedMass -= (ModMass / 2.0)
+            Bin = int(round(Peak.ShiftedMass * self.BinMultiplier))
+            BestPeak = None
+            BestScore = 0
+            for NearBin in (Bin - 1, Bin, Bin + 1):
+                PeakList = HashedPeaks.get(NearBin, [])
+                for PeakB in PeakList:
+                    Skew = abs(Peak.ShiftedMass - PeakB.ShiftedMass)
+                    SkewDeciDaltons = int(round(abs(Skew) * 10))
+                    if SkewDeciDaltons >= 5:
+                        continue
+                    #Score = PeakWeight / float(RankWeight + Peak.IntensityRank + PeakB.IntensityRank)
+                    Score = PeakWeight*10 + RankWeight*10 / (10.0 + Peak.IntensityRank)
+                    Score *= SkewMultipliers[SkewDeciDaltons]
+                    if Score > BestScore:
+                        BestScore = Score
+                        BestPeak = PeakB
+                        BestPeakScoreMultiplier = SkewMultipliers[SkewDeciDaltons]
+            if VerboseFlag:
+                Str = "PeakA #%d %.1f (bin %d):"%(Peak.IntensityRank, Peak.Mass, Bin)
+                if Peak.IonType:
+                    Str += " (%s %s)"%(Peak.IonType.Name, Peak.PeptideIndex)
+                print Str
+                if BestPeak:
+                    Str = "  Best near peak #%d %.1f ==> %s"%(BestPeak.IntensityRank, BestPeak.Mass, BestScore)
+                    if BestPeak.IonType:
+                        Str += " (%s %s)"%(BestPeak.IonType.Name, BestPeak.PeptideIndex)
+                    print Str
+            OverallScore += BestScore
+            MaxScore += PeakWeight*10 + RankWeight*10 / (10.0 + Peak.IntensityRank)
+            #MaxScore += PeakWeight / float(RankWeight + Peak.IntensityRank + Peak.IntensityRank)
+        return OverallScore / float(MaxScore)
+    def ComputeCorrelationCoefficient(self, BinMultiplier = 1.0, MaxIntensityRank = 50, EnableShift = 1, VerboseFlag = 0, HashByRank = 0):
+        """
+        Compute similarity between two spectra by computing the
+        correlation coefficient of the binned intensities.
+        """
+        self.BinMultiplier = BinMultiplier
+        self.MaxIntensityRank = MaxIntensityRank
+        self.HashByRank = HashByRank
+        # set up hashes
+        HashA = {}
+        HashB = {}
+        # Populate HashA and HashB:
+        if EnableShift and len(self.PeptideA.Modifications) > 0:
+            self.HashPeaksWithShift(HashA, self.SpectrumA, self.PeptideA, VerboseFlag)
+        else:
+            self.HashPeaks(HashA, self.SpectrumA)
+        if EnableShift and len(self.PeptideB.Modifications) > 0:
+            self.HashPeaksWithShift(HashB, self.SpectrumB, self.PeptideB, VerboseFlag)
+        else:
+            self.HashPeaks(HashB, self.SpectrumB)
+        MinBin = min(HashA.keys())
+        MinBin = min(MinBin, min(HashB.keys()))
+        MaxBin = max(HashA.keys())
+        MaxBin = max(MaxBin, max(HashB.keys()))
+        TotalA = 0
+        TotalB = 0
+        BinCount = MaxBin - MinBin + 1
+        for Bin in range(MinBin, MaxBin + 1):
+            A = HashA.get(Bin, 0)
+            B = HashB.get(Bin, 0)
+            TotalA += A
+            TotalB += B
+        MeanA = TotalA / float(BinCount)
+        MeanB = TotalB / float(BinCount)
+        if VerboseFlag:
+            print "MeanA %s over %s bins"%(MeanA, BinCount)
+            print "MeanB %s over %s bins"%(MeanB, BinCount)
+        SigmaSumA = 0
+        SigmaSumB = 0
+        for Bin in range(MinBin, MaxBin + 1):
+            A = HashA.get(Bin, 0)
+            B = HashB.get(Bin, 0)
+            SigmaSumA += (A - MeanA)**2
+            SigmaSumB += (B - MeanB)**2
+        VarianceA = SigmaSumA / float(BinCount)
+        SigmaA = math.sqrt(VarianceA)
+        VarianceB = SigmaSumB / float(BinCount)
+        SigmaB = math.sqrt(VarianceB)
+        if VerboseFlag:
+            print "A has variance %s stddev %s"%(VarianceA, SigmaA)
+            print "B has variance %s stddev %s"%(VarianceB, SigmaB)
+        CovarianceSum = 0
+        for Bin in range(MinBin, MaxBin + 1):
+            A = HashA.get(Bin, 0)
+            B = HashB.get(Bin, 0)
+            CovarianceSum += (A - MeanA) * (B - MeanB)
+        Covariance = CovarianceSum / float(BinCount - 1)
+        CorrelationCoefficient = Covariance / (SigmaA * SigmaB)
+        if VerboseFlag:
+            print "Covariance %s, corr.coeff %s"%(Covariance, CorrelationCoefficient)
+        return CorrelationCoefficient
+
+def Test():
+    FileName = "..\mzxml\Dicty-HeavyCells-11.mzXML"
+    FileHandle = open(FileName,"rb")
+    FileHandle.seek(60145805)
+    S1 = MSSpectrum.SpectrumClass()
+    S1.ReadPeaksFromFile(FileHandle,FileName) #it also sets the PrecursorMZ
+    Annotation1 = "MKRKLLK"
+
+    FileName = "..\mzxml\Dicty-HeavyCells-13.mzXML"
+    FileHandle = open(FileName,"rb")
+    FileHandle.seek(59307113)
+    S2 = MSSpectrum.SpectrumClass()
+    S2.ReadPeaksFromFile(FileHandle,FileName) #it also sets the PrecursorMZ
+    Annotation2 = "MKRKLLK"
+
+    FileName = "..\mzxml\Dicty-HeavyCells-12.mzXML"
+    FileHandle = open(FileName,"rb")
+    FileHandle.seek(87683754)
+    S3 = MSSpectrum.SpectrumClass()
+    S3.ReadPeaksFromFile(FileHandle,FileName) #it also sets the PrecursorMZ
+    Annotation3 = "MKIFIIK"
+
+    FileName = "..\mzxml\Dicty-HeavyCells-05.mzXML"
+    FileHandle = open(FileName,"rb")
+    FileHandle.seek(102201432)
+    S5 = MSSpectrum.SpectrumClass()
+    S5.ReadPeaksFromFile(FileHandle,FileName) #it also sets the PrecursorMZ
+    #S5.FilterPeaks()
+    Annotation5 = "NWNGQPVGVPQGQYANMNYAR"   
+
+    FileName = "..\mzxml\Dicty-HeavyCells-12.mzXML"
+    FileHandle = open(FileName,"rb")
+    FileHandle.seek(112303085)
+    S6 = MSSpectrum.SpectrumClass()
+    S6.ReadPeaksFromFile(FileHandle,FileName) #it also sets the PrecursorMZ
+    Annotation6 = "NWNGQPVGVPQGQYANMNYAR+14"   
+
+    FileName = "..\mzxml\Dicty-HeavyCells-12.mzXML"
+    FileHandle = open(FileName,"rb")
+    FileHandle.seek(111782847)
+    S7 = MSSpectrum.SpectrumClass()
+    S7.ReadPeaksFromFile(FileHandle,FileName) #it also sets the PrecursorMZ
+    Annotation7 = "NWNGQPVGVPQGQYANMNYAR"   
+
+    Simm = SpectralSimilarity(S5,S5,Annotation5,Annotation5)
+    Simm.SpectralAlignment()
+
+if __name__ == "__main__":
+    # Command-line arguments: Two spectra, then two annotations.
+    if len(sys.argv)<5:
+        print "Not enough arguments:", sys.argv
+        sys.exit(-1)
+    Comparator = SpectralSimilarity(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
+    Comparator.LabelPeaks()
+    Result = Comparator.GetSharedPeakCount(1, 0, 0.9, PeakCountDivisor = 20, VerboseFlag = 1)
+    print "Shared 1 0 0.8:", Result
+    print "\n\n"
+    Result = Comparator.GetSharedPeakCount(0, 1, 0.9, PeakCountDivisor = 20, VerboseFlag = 1)
+    print "Shared 0 1 0.8:", Result
+    print "\n\n"
+    
+##    Result = Comparator.GetSharedPeakCount(0, 1, PeakCountDivisor = 20, VerboseFlag = 1)
+##    print "Shared 0 1 1.0:", Result
+##    sys.exit(0)
+##    print "Shared 0 1 0.66:", Comparator.GetSharedPeakCount(0, 1, 0.66, PeakCountDivisor = 5)
+##    print "Shared 0 1 0.66:", Comparator.GetSharedPeakCount(0, 1, 0.66, PeakCountDivisor = 50)
+##    
+##    Result = Comparator.DotProduct(VerboseFlag = 1)
+##    print "Dot product similarity score 1.0:", Result
+##    Result = Comparator.DotProduct(2.0, VerboseFlag = 1)
+##    print "Dot product similarity score 2.0:", Result
+    Result = Comparator.DotProduct(0.5, VerboseFlag = 1, HashByRank = 1)
+    print "Dot product similarity score 0.5:", Result
+##    print "Shared 1 0:", Comparator.GetSharedPeakCount(1, 0)
+##    print "Shared 0 1:", Comparator.GetSharedPeakCount(0, 1)
+##    
+##    print "Shared 1 1:", Comparator.GetSharedPeakCount(1, 1)
+##    print "Cov/corr:", Comparator.ComputeCorrelationCoefficient()
+    Command = "label.py \"%s\" %s"%(sys.argv[1], sys.argv[3])
+    os.system(Command)
+    Command = "label.py \"%s\" %s"%(sys.argv[2], sys.argv[4])
+    os.system(Command)
diff --git a/Spectrum.c b/Spectrum.c
new file mode 100644
index 0000000..43da9eb
--- /dev/null
+++ b/Spectrum.c
@@ -0,0 +1,1487 @@
+//Title:          Spectrum.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include "Spectrum.h"
+#include "Utils.h"
+#include "Inspect.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <string.h>
+#include <math.h>
+#include "Tagger.h"
+#include "Errors.h"
+#include "ParseXML.h"
+
+#define INITIAL_PEAK_COUNT 100
+#define INITIAL_PRM_PEAK_COUNT 500
+
+#define MINIMUM_ALLOWED_PARENT_MASS GLYCINE_MASS
+#define MAXIMUM_ALLOWED_PARENT_MASS 6000*DALTON
+
+// This should be MORE than enough peaks for any realistic spectrum.
+// If there are more than this, we refuse to parse them all, so there.
+#define MAX_PEAKS_PER_SPECTRUM 10000
+
+/////////////////////////////////////////////////////////////////////////////////
+// Forward declarations:
+int SpectrumLoadHeaderLine(MSSpectrum* Spectrum, char* LineBuffer);
+void AttemptParentMassPeakRemoval(MSSpectrum* Spectrum);
+
+/////////////////////////////////////////////////////////////////////////////////
+// Functions:
+
+void SpectrumComputeSignalToNoise(MSSpectrum* Spectrum)
+{
+    int IntensePeakIndex;
+    int MedianPeakIndex;
+    int PeakIndex;
+    float Signal = 0;
+    float Noise = 0;
+    //
+    IntensePeakIndex = min(5, Spectrum->PeakCount) / 2;
+    MedianPeakIndex = Spectrum->PeakCount / 2;
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        if (Spectrum->Peaks[PeakIndex].IntensityRank == IntensePeakIndex)
+        {
+            Signal = Spectrum->Peaks[PeakIndex].Intensity;
+        }
+        if (Spectrum->Peaks[PeakIndex].IntensityRank == MedianPeakIndex)
+        {
+            Noise = Spectrum->Peaks[PeakIndex].Intensity;
+        }
+    }
+    Spectrum->SignalToNoise = Signal / (float)max(1.0, Noise);
+}
+
+// Remove peaks that are not reasonably high for their mass window.
+// If WindowWidth and KeepCount are <= 0, use reasonable defaults.
+void WindowFilterPeaks(MSSpectrum* Spectrum, float WindowWidth, int KeepCount)
+{
+    int FilterPeakIndex;
+    int NewIndex;
+    int OtherPeakIndex;
+    float* Intensities;
+    int Neighbors;
+    float WindowStart;
+    float WindowEnd;
+    int FilteredCount = 0;
+    //
+    if (Spectrum->UnfilteredPeaks)
+    {
+        // We've already performed window filtering; don't do it again!
+        return;
+    }
+    if (WindowWidth <= 0)
+    {
+        WindowWidth = DEFAULT_WINDOW_WIDTH;
+    }
+    if (KeepCount <= 0)
+    {
+        KeepCount = DEFAULT_WINDOW_KEEP_COUNT;
+    }
+
+    //
+    Intensities = (float*)calloc(Spectrum->PeakCount, sizeof(float));
+    for (FilterPeakIndex = 0; FilterPeakIndex < Spectrum->PeakCount; FilterPeakIndex++)
+    {
+        WindowStart = Spectrum->Peaks[FilterPeakIndex].Mass - (WindowWidth / (float)2.0);
+        WindowEnd = Spectrum->Peaks[FilterPeakIndex].Mass + (WindowWidth / (float)2.0);
+        Neighbors = 0;
+        for (OtherPeakIndex = 0; OtherPeakIndex < Spectrum->PeakCount; OtherPeakIndex++)
+        {
+            if (Spectrum->Peaks[OtherPeakIndex].Mass > WindowEnd)
+            {
+                break;
+            }
+            if (Spectrum->Peaks[OtherPeakIndex].Mass > WindowStart)
+            {
+                Intensities[Neighbors] = Spectrum->Peaks[OtherPeakIndex].Intensity;
+                Neighbors++;
+            }
+        }
+        qsort(Intensities, Neighbors, sizeof(float), (QSortCompare)CompareFloats);
+        if (Neighbors < KeepCount || Spectrum->Peaks[FilterPeakIndex].Intensity >= Intensities[KeepCount - 1])
+        {
+            Spectrum->Peaks[FilterPeakIndex].FilterScore = 1;
+            FilteredCount++;
+        }
+    }
+    SafeFree(Intensities);
+    // New array:
+    Spectrum->UnfilteredPeakCount = Spectrum->PeakCount;
+    Spectrum->UnfilteredPeaks = Spectrum->Peaks;
+    Spectrum->PeakCount = FilteredCount;
+    Spectrum->Peaks = (SpectralPeak*)calloc(FilteredCount, sizeof(SpectralPeak));
+    NewIndex = 0;
+    for (FilterPeakIndex = 0; FilterPeakIndex < Spectrum->UnfilteredPeakCount; FilterPeakIndex++)
+    {
+        if (Spectrum->UnfilteredPeaks[FilterPeakIndex].FilterScore)
+        {
+            memcpy(Spectrum->Peaks + NewIndex, Spectrum->UnfilteredPeaks + FilterPeakIndex, sizeof(SpectralPeak));
+            Spectrum->Peaks[NewIndex].Index = NewIndex;
+            NewIndex++;
+        }
+    }
+
+}
+
+// Sort from MOST to LEAST intense:
+int ComparePeaksIntensity(const SpectralPeak* PeakA, const SpectralPeak* PeakB)
+{
+    if (PeakA->Intensity < PeakB->Intensity)
+    {
+        return 1;
+    }
+    if (PeakA->Intensity > PeakB->Intensity)
+    {
+        return -1;
+    }
+    return 0;
+
+}
+
+int ComparePeaksByMass(const SpectralPeak* PeakA, const SpectralPeak* PeakB)
+{
+    if (PeakA->Mass < PeakB->Mass)
+    {
+        return -1;
+    }
+    if (PeakA->Mass > PeakB->Mass)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+void IntensityRankPeaks(MSSpectrum* Spectrum)
+{
+    int PeakIndex;
+    //
+    qsort(Spectrum->Peaks, Spectrum->PeakCount, sizeof(SpectralPeak), (QSortCompare)ComparePeaksIntensity);
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        Spectrum->Peaks[PeakIndex].IntensityRank = PeakIndex;
+    }
+    qsort(Spectrum->Peaks, Spectrum->PeakCount, sizeof(SpectralPeak), (QSortCompare)ComparePeaksByMass);
+    SpectrumComputeSignalToNoise(Spectrum);
+}
+
+
+void FreeMatchList(SpectrumNode* Spectrum)
+{
+    Peptide* MatchNode;
+    Peptide* MatchPrev = NULL;
+    for (MatchNode = Spectrum->FirstMatch; MatchNode; MatchNode = MatchNode->Next)
+    {
+        if (MatchPrev)
+        {
+            FreePeptideNode(MatchPrev);
+        }
+        MatchPrev = MatchNode;
+    }
+    if (MatchPrev)
+    {
+        FreePeptideNode(MatchPrev);
+    }
+    Spectrum->MatchCount = 0;
+    Spectrum->FirstMatch = NULL;
+    Spectrum->LastMatch = NULL;
+}
+
+void FreeSpectrum(MSSpectrum* Spectrum)
+{
+    if (!Spectrum)
+    {
+        return;
+    }
+    SafeFree(Spectrum->UnfilteredPeaks);
+    SafeFree(Spectrum->Peaks);
+    if (Spectrum->Graph)
+    {
+        FreeTagGraph(Spectrum->Graph);
+        Spectrum->Graph = NULL;
+    }
+    SafeFree(Spectrum->BinnedIntensities);
+    SafeFree(Spectrum->BinnedIntensitiesTight);
+    SafeFree(Spectrum->BinnedIntensityLevels);
+    SafeFree(Spectrum->BinPeakIndex);
+    SafeFree(Spectrum->IntensityThresholds);
+    SafeFree(Spectrum->IonScoringNoiseProbabilities);
+    SafeFree(Spectrum);
+}
+
+// Constructor: Allocate and return a spectrum
+MSSpectrum* NewSpectrum()
+{
+    MSSpectrum* Spectrum;
+    // Allocate a spectrum, with a reasonable amount of space to store peaks
+    Spectrum = (MSSpectrum*)calloc(1, sizeof(MSSpectrum));
+    Spectrum->Peaks = (SpectralPeak*)calloc(INITIAL_PEAK_COUNT, sizeof(SpectralPeak));
+    Spectrum->PeakAllocation = INITIAL_PEAK_COUNT;
+    return Spectrum;
+}
+
+int SpectrumAddPeak(MSSpectrum* Spectrum, float Mass, float Intensity)
+{
+    int OldAllocation;
+    
+    // If necessary, reallocate:
+    if (Spectrum->PeakCount > MAX_PEAKS_PER_SPECTRUM)
+    {
+    
+        if (Spectrum->Node->InputFile)
+        {
+            REPORT_ERROR_IS(31, Spectrum->Node->ScanNumber, Spectrum->Node->InputFile->FileName);
+        }
+        else
+        {
+            REPORT_ERROR_IS(31, Spectrum->Node->ScanNumber, "??");
+        }
+        return 0;
+    }
+    if (Spectrum->PeakCount == Spectrum->PeakAllocation)
+    {
+    
+        OldAllocation = Spectrum->PeakAllocation;
+        Spectrum->PeakAllocation = max(200, Spectrum->PeakAllocation * 2);
+	
+        if (OldAllocation)
+        {
+            Spectrum->Peaks = (SpectralPeak*)realloc(Spectrum->Peaks, sizeof(SpectralPeak) * Spectrum->PeakAllocation);
+            memset(Spectrum->Peaks + OldAllocation, 0, sizeof(SpectralPeak) * (Spectrum->PeakAllocation - OldAllocation));
+	
+        }
+        else
+        {
+            Spectrum->Peaks = (SpectralPeak*)calloc(Spectrum->PeakAllocation, sizeof(SpectralPeak));
+        }
+    }
+    ROUND_MASS(Mass, Spectrum->Peaks[Spectrum->PeakCount].Mass);
+    //Spectrum->Peaks[Spectrum->PeakCount].Mass = Mass;
+    Spectrum->Peaks[Spectrum->PeakCount].Intensity = Intensity;
+    Spectrum->Peaks[Spectrum->PeakCount].FilterScore = 0; // init
+    Spectrum->Peaks[Spectrum->PeakCount].NoisePenalty = 0; // init
+    Spectrum->Peaks[Spectrum->PeakCount].PercentIntensity = 0; // init
+    memset(Spectrum->Peaks[Spectrum->PeakCount].IsotopeNeighbors, -1, sizeof(int)*MAX_ISOTOPE_NEIGHBORS);
+    memset(Spectrum->Peaks[Spectrum->PeakCount].NoiseNeighbors, -1, sizeof(int)*MAX_NOISY_NEIGHBORS);
+    //Log("Added peak %d to spectrum.  (Alloc size is now %d)\n", Spectrum->PeakCount, Spectrum->PeakAllocation);
+    Spectrum->PeakCount++;
+    Spectrum->MaxIntensity = max(Spectrum->MaxIntensity, Intensity);
+    return 1;
+}
+
+// Handle the header line for .dta, .pkl, .ms2 formats.
+int SpectrumLoadHeaderLine(MSSpectrum* Spectrum, char* LineBuffer)
+{
+    char* StrA;
+    char* StrB;
+    char* StrC;
+    float Mass;
+	int Charge;
+    // Default case: The first line should be the parent mass and the charge.
+    StrA = strtok(LineBuffer, WHITESPACE);
+    if (!StrA || !*StrA)
+    {
+      
+        return 0;
+    }
+
+    StrB = strtok(NULL, WHITESPACE);
+    if (!StrB)
+    {
+
+        return 0;
+    }
+
+    StrC = strtok(NULL, WHITESPACE);
+    
+
+    // MS2 file: Z, charge, parent-mass.
+    if (!strcmp(StrA, "Z"))
+    {
+        
+        Mass = (float)atof(StrC);
+        ROUND_MASS(Mass, Spectrum->ParentMass);
+        if (Spectrum->ParentMass < MINIMUM_ALLOWED_PARENT_MASS || 
+            Spectrum->ParentMass > MAXIMUM_ALLOWED_PARENT_MASS)
+        {
+            if (Spectrum->Node && Spectrum->Node->InputFile)
+            {
+                REPORT_ERROR_IIS(42, Spectrum->ParentMass / MASS_SCALE, Spectrum->Node->FilePosition, Spectrum->Node->InputFile->FileName);
+            }
+            else
+            {
+                REPORT_ERROR_I(43, Spectrum->ParentMass / MASS_SCALE);
+            }
+            return 0;
+        }
+        Charge = atoi(StrB);
+        if (Charge > 6)
+        {
+            printf("** Invalid charge '%s' - maximum is 6\n", StrB);
+            return 0;
+        }
+	Spectrum->Charge = Charge;
+	Spectrum->FileChargeFlag = 1;
+	Spectrum->FileCharge[Charge] = 1;
+	Spectrum->MZ = (Spectrum->ParentMass + (Charge - 1) * HYDROGEN_MASS) / Charge;
+	Spectrum->FileMZ = Spectrum->MZ;
+        return 1;
+    }
+
+    // Header line of a PKL file: precursor mz, precursor intensity, and charge.
+    if (StrC)
+    {
+
+      Mass = (float)atof(StrA);
+      ROUND_MASS(Mass, Spectrum->ParentMass);
+      if (Spectrum->ParentMass < MINIMUM_ALLOWED_PARENT_MASS || 
+	  Spectrum->ParentMass > MAXIMUM_ALLOWED_PARENT_MASS)
+        {
+            printf("** Error in SpectrumLoadFromFile: Mass %.2f not legal.\n", Mass);
+            return 0;
+        }
+      Charge = atoi(StrC);
+      if (Charge > 6)
+        {
+            printf("** Invalid charge '%s' - maximum is 6\n", StrC);
+            return 0;
+        }
+      Spectrum->FileCharge[Charge] = 1;
+      Spectrum->FileChargeFlag = 1;
+      Spectrum->Charge = Charge;
+      Spectrum->FileMZ = Spectrum->ParentMass;
+      if (Charge)
+	{
+            Spectrum->ParentMass = (Spectrum->ParentMass * Charge) - (Charge - 1)*HYDROGEN_MASS;
+        }
+    }
+    else
+    {
+        // DTA file:
+        Mass = (float)atof(StrA);
+        if (Mass < 1)
+        {
+            // Invalid header line - the mass can't be zero!
+            return 0;
+        }
+        ROUND_MASS(Mass, Spectrum->ParentMass);
+        Charge = atoi(StrB);
+
+	
+        if (!Charge)
+        {
+            Spectrum->MZ = Spectrum->ParentMass;
+	    Spectrum->FileMZ = Spectrum->MZ;
+            Spectrum->ParentMass = 0;
+	    
+        }
+        else
+        {
+            // The file's mass is the residue mass + 19, which is the parent mass.
+	  Spectrum->FileCharge[Charge] = 1;
+	  Spectrum->FileChargeFlag = 1;
+	  Spectrum->Charge = Charge;
+	  Spectrum->FileMZ = (Spectrum->ParentMass + (Charge - 1) * HYDROGEN_MASS) / Charge;
+	  Spectrum->MZ = Spectrum->FileMZ;
+            //Spectrum->ParentMass -= HYDROGEN_MASS; // remove one H+
+            //Spectrum->ParentMass = (float)atof(StrA) - HYDROGEN_MASS; // remove one H+
+        }        
+    }
+    return 1;
+}
+
+int SpectrumLoadCDTAHeaderLine(MSSpectrum* Spectrum, char* LineBuffer)
+{
+
+  char* StrA;
+  char* StrB;
+    
+  float Mass;
+  int Charge;
+  // Default case: The first line should be the parent mass and the charge.
+  StrA = strtok(LineBuffer, WHITESPACE);
+  if (!StrA || !*StrA)
+    {
+      
+      return 0;
+    }
+  
+  StrB = strtok(NULL, WHITESPACE);
+  if (!StrB)
+    {
+      
+      return 0;
+    }
+
+ 
+  Mass = (float)atof(StrA);
+  if (Mass < 1)
+    {
+      // Invalid header line - the mass can't be zero!
+      return 0;
+    }
+  ROUND_MASS(Mass, Spectrum->ParentMass);
+  Charge = atoi(StrB);
+ 
+  
+  if (!Charge)
+    {
+      Spectrum->MZ = Spectrum->ParentMass;
+      Spectrum->FileMZ = Spectrum->MZ;
+      Spectrum->ParentMass = 0;
+      
+    }
+  else
+    {
+      if(Charge <= 0 || Charge >= 6)
+	return 0;
+      // The file's mass is the residue mass + 19, which is the parent mass.
+      Spectrum->FileCharge[Charge] = 1;
+      Spectrum->FileChargeFlag = 1;
+      Spectrum->Charge = Charge;
+      Spectrum->FileMZ = (Spectrum->ParentMass + (Charge - 1) * HYDROGEN_MASS) / Charge;
+      Spectrum->MZ = Spectrum->FileMZ;
+      
+      //Spectrum->ParentMass -= HYDROGEN_MASS; // remove one H+
+      //Spectrum->ParentMass = (float)atof(StrA) - HYDROGEN_MASS; // remove one H+
+    }        
+
+    return 1;
+}
+
+int GuessSpectralCharge(MSSpectrum* Spectrum)
+{
+    int PeakIndex;
+    float MeanMass = 0;
+    int Charge;
+    int BestDiff = 9999999;
+    int BestCharge = 2;
+    int ParentMass;
+    int Diff;
+    // Compute the MEDIAN peak mass:
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        if (PeakIndex >= Spectrum->PeakCount / 2)
+        {
+            MeanMass = (float)Spectrum->Peaks[PeakIndex].Mass;
+            break;
+        }
+    }
+    //MeanMass /= Spectrum->PeakCount;
+    // Use a charge that will bring the parent mass as close as possible to the mean peak mass x 2
+    for (Charge = 1; Charge < 10; Charge++)
+    {
+        ParentMass = (Spectrum->MZ * Charge) - (HYDROGEN_MASS * (Charge - 1));
+        Diff = abs(ParentMass - (int)(MeanMass*2));
+        if (Diff < BestDiff)
+        {
+            BestDiff = Diff;
+            BestCharge = Charge;
+        }
+    }
+    return BestCharge;
+}
+
+// Initial parent mass computation.  We do it LATER if our charge is 0 (unknown).
+void SpectrumComputeParentMass(MSSpectrum* Spectrum)
+{
+    //
+    if (!Spectrum->Charge)
+    {
+        return; // We'll tweak later!
+    }    
+    else
+    {
+      
+        Spectrum->MZ = (Spectrum->ParentMass + (Spectrum->Charge - 1)*HYDROGEN_MASS) / Spectrum->Charge;
+        Spectrum->FileMass = Spectrum->ParentMass;
+    }
+}
+
+// Return FALSE when we're done loading
+int SpectrumHandleMS2ColonLine(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    char* ValueStr;
+    float ParentMass;
+    float Mass;
+    float Intensity;
+    MSSpectrum* Spectrum;
+	int Charge;
+    int Result;
+    //
+    Spectrum = (MSSpectrum*)UserData;
+
+    if (LineBuffer[0] == ':')
+    {
+        if (Spectrum->PeakCount)
+        {
+            // We've loaded some peaks already, so this is the scan number of 
+            // the NEXT scan.
+            return 0;
+        }
+        else
+        {
+            // We know our scan number already, so we do nothing.
+            return 1;
+        }
+    }
+
+    // If we don't know our MZ yet, then we load it now.  Otherwise we add a peak.
+    if (!Spectrum->MZ)
+    {
+        ValueStr = strtok(LineBuffer, WHITESPACE);
+        if (!ValueStr)
+        {
+            return 1; // INVALID LINE, stop now
+        }
+        ParentMass = (float)(atof(ValueStr) * MASS_SCALE);
+        ValueStr = strtok(NULL, WHITESPACE);
+        if (!ValueStr)
+        {
+            return 0; // INVALID LINE, stop now
+        }
+        Charge = atoi(ValueStr);
+        if (Charge)
+        {
+	  if(Charge <= 0 || Charge >= 6)
+	    return 0;
+	  Spectrum->Charge = Charge;
+	  Spectrum->FileCharge[Charge] = 1;
+	  Spectrum->FileChargeFlag = 1;
+	  Spectrum->ParentMass = (int)(ParentMass - HYDROGEN_MASS + 0.5); 
+	  Spectrum->MZ = (int)((ParentMass + (Spectrum->Charge - 1)*HYDROGEN_MASS) / (float)Spectrum->Charge + 0.5);
+	  Spectrum->FileMZ = Spectrum->MZ;
+        }
+        else
+        {
+            Spectrum->ParentMass = (int)(ParentMass + 0.5);
+            Spectrum->MZ = (int)(ParentMass + 0.5);
+	    Spectrum->FileMZ = Spectrum->MZ;
+        }
+        return 1;
+    }
+    // Ordinary peak
+    ValueStr = strtok(LineBuffer,  WHITESPACE);
+    if (!ValueStr)
+    {
+        return 0; // INVALID LINE, stop now
+    }
+    Mass = (float)atof(ValueStr);
+    ValueStr = strtok(NULL,  WHITESPACE);
+    if (!ValueStr)
+    {
+        return 0; // INVALID LINE, stop now
+    }
+    Intensity = (float)atof(ValueStr);
+    Result = SpectrumAddPeak(Spectrum, Mass, Intensity);
+    return Result;
+}
+
+int SpectrumLoadCDTACallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+	MSSpectrum* Spectrum;
+	char * StrA;
+	char * Str;
+	float Mass;
+	float Intensity;
+
+	int ScanNumber;
+	int Charge;
+	int Result;
+	
+
+	Spectrum = (MSSpectrum*)UserData;	
+	
+	
+
+	//if line starts with = wthen this is header, we can get the scan number and charge
+	if(LineNumber == 1 && LineBuffer[0] == '=')
+	{
+	 
+		StrA = strtok(LineBuffer, ".");
+		StrA = strtok(NULL, ".");
+		ScanNumber = atoi(StrA);
+		Spectrum->Node->ScanNumber = ScanNumber;
+		
+		StrA = strtok(NULL,".");
+		StrA = strtok(NULL,".");
+		Charge = atoi(StrA);
+		
+		Spectrum->Charge = Charge;
+		Spectrum->FileCharge[Charge] = 1;
+		Spectrum->FileChargeFlag = 1;
+	}
+	else if(LineBuffer[0] == '=')
+	  {
+	    return 0;
+	  }
+	else if(LineNumber == 3) //The first line after the == should be the header
+	{
+	  
+
+	  return SpectrumLoadCDTAHeaderLine(Spectrum,LineBuffer);
+	  
+	}
+	else 
+	{
+	  
+	  // After the first line, we expect to see lines of the form "Mass Intensity"
+	  Str = strtok(LineBuffer, WHITESPACE);
+	  if (!Str)
+	    {
+	      return 1;
+	    }
+	  Mass = (float)atof(Str);
+	  if (!Mass) 
+	    {   
+	      return 1;
+	    }
+	  Str = strtok(NULL, WHITESPACE);
+	  if (!Str)
+	    {
+	      // This line had only two pieces on it.  Invalid syntax!
+	      //printf("**Error in file '%s': peak lines must contain mass AND intensity\n", Spectrum->Node->InputFile->FileName);
+	      REPORT_ERROR_IS(33, LineNumber, Spectrum->Node->InputFile->FileName);
+	      return 0;
+	    }
+	  Intensity = (float)atof(Str);
+
+	  
+	  if (!Intensity) 
+	    {   
+	      // invalid intensity?  Assume that a string staring with "0" really means intensity zero,
+	      // god help us.
+	      if (Str[0] != '0')
+		{
+		  REPORT_ERROR_IS(33, LineNumber, Spectrum->Node->InputFile->FileName);
+		  return 0;
+		}
+	    }
+	  // If there's a third piece on the line, then stop parsing now.  (That happens if we run
+	  // off the end of a record in a pkl file, into the start of the next record):
+	  Str = strtok(NULL, WHITESPACE);
+	  if (Str)
+	    {
+	      return 0;
+	    }
+	  Result = SpectrumAddPeak(Spectrum, Mass, Intensity);
+	  
+	  return Result;
+	  
+	  
+	}
+
+	
+
+}
+
+int SpectrumLoadMGFCallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    MSSpectrum* Spectrum;
+    float Mass;
+    float Intensity;
+    char* WordA;
+    char* WordB;
+    char* EQWordA;
+    int Result;
+    int Charge;
+    char* AndWord;
+    //
+    Spectrum = (MSSpectrum*)UserData;
+    
+
+    // If we see a command we recognize, then handle it:
+    WordA = strtok(LineBuffer, WHITESPACE);
+    WordB = strtok(NULL, WHITESPACE);
+    EQWordA = strtok(WordA, "=");
+
+    if (!CompareStrings(WordA, "END"))
+    {
+        if (WordB && !CompareStrings(WordB, "IONS"))
+        {
+            // Stop parsing lines now!
+            return 0; 
+        }
+    }
+    else if (!CompareStrings(EQWordA, "PEPMASS"))
+    {
+        Mass = (float)atof(LineBuffer + 8);
+        ROUND_MASS(Mass, Spectrum->MZ);
+	Spectrum->FileMZ = Spectrum->MZ;
+        if (Spectrum->MZ < MINIMUM_ALLOWED_PARENT_MASS || Spectrum->MZ > MAXIMUM_ALLOWED_PARENT_MASS)
+        {
+            // Illegal mass!
+            if (Spectrum->Node->InputFile)
+            {
+                REPORT_ERROR_IS(32, Spectrum->Node->ScanNumber, Spectrum->Node->InputFile->FileName);
+            }
+            else
+            {
+                REPORT_ERROR_IS(32, Spectrum->Node->ScanNumber, "???");
+            }
+            return 0;
+        }
+	
+    }
+    else if (!CompareStrings(EQWordA, "CHARGE"))
+    {
+		
+        Charge = atoi(LineBuffer + 7);
+	if (Charge)
+	  {
+
+	    
+	    Spectrum->Charge = Charge;
+	    if(Charge >= 6)
+	      return 0;
+	    Spectrum->FileCharge[Charge] = 1;
+	    Spectrum->FileChargeFlag = 1;
+	  }
+	// the CHARGE line may have the form "2+ and 3+"
+	if (WordB && !CompareStrings(WordB, "and"))
+	  {
+	    Charge = atoi(WordB + 4);
+	    if (Charge)
+	      {
+		Spectrum->Charge = Charge;
+		if(Charge >= 6)
+		  return 0;
+		
+		Spectrum->FileCharge[Charge] = 1;
+		Spectrum->FileChargeFlag = 1;
+	      }
+	  }
+    }
+    else
+    {
+        // Default: Try to read an m/z and intensity
+        Mass = (float)atof(WordA);
+        if (Mass && WordB)
+        {
+            Intensity = (float)atof(WordB);
+            Result = SpectrumAddPeak(Spectrum, Mass, Intensity);
+            return Result;
+        }
+    }
+    return 1;
+}
+
+// Load spectrum from a cdta file.  See a header line ====, then some peaks
+// end with a new ===.
+int SpectrumLoadCDTA(MSSpectrum* Spectrum, FILE* DTAFile)
+{
+	ParseFileByLines(DTAFile, SpectrumLoadCDTACallback,Spectrum,0);
+	if(Spectrum->Charge && (Spectrum->Charge <= 0 || Spectrum->Charge >= 6))
+	   return 0;
+	//Should we guess charge?	
+
+ return 1;	
+}
+
+// Load spectrum from an MGF file.  See one or more header lines, then some
+// peaks, then an "END IONS" line.
+int SpectrumLoadMGF(MSSpectrum* Spectrum, FILE* DTAFile)
+{
+
+    ParseFileByLines(DTAFile, SpectrumLoadMGFCallback, Spectrum, 0);
+    if (Spectrum->Charge)
+    {
+      if(Spectrum->Charge <= 0 || Spectrum->Charge >= 6)
+	return 0;
+      Spectrum->ParentMass = Spectrum->MZ * Spectrum->Charge - (HYDROGEN_MASS * (Spectrum->Charge - 1));
+	
+    }
+
+    return 1;
+}
+
+int GuessMS2FormatFromLine(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    MSSpectrum* Spectrum;
+    Spectrum = (MSSpectrum*)UserData;
+    if (LineBuffer[0] == ':')
+    {
+        Spectrum->Node->InputFile->Format = SPECTRUM_FORMAT_MS2_COLONS;
+        return 0;
+    }
+    if (LineBuffer[0] == 'Z')
+    {
+        Spectrum->Node->InputFile->Format = SPECTRUM_FORMAT_MS2;
+        return 0;
+    }
+    return 1;
+
+}
+
+// Return 1 if we succeeded.
+int SpectrumLoadDTAFileLine(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    MSSpectrum* Spectrum;
+    int Result;
+    float Mass;
+    float Intensity;
+    char* Str;
+    // 
+    Spectrum = (MSSpectrum*)UserData;
+    // Special case: MS2 format handles one or more "Z" lines
+    if (LineBuffer[0] == 'Z' && (LineBuffer[1] == ' ' || LineBuffer[1] == '\t'))
+    {
+        Result = SpectrumLoadHeaderLine(Spectrum, LineBuffer);
+        return Result;
+    }
+    // Special case: MS2 format skips the first "S" line, and knows the 
+	// second "S" line it sees marks the end of the record
+    if (LineBuffer[0] == 'S' && (LineBuffer[1] == ' ' || LineBuffer[1] == '\t'))
+    {
+		if (LineNumber > 1)
+		{
+			return 0;
+		}
+		else
+		{
+			return 1;
+		}
+    }
+    if (LineNumber == 1)
+    {
+        Result = SpectrumLoadHeaderLine(Spectrum, LineBuffer);
+        return Result;
+    }
+
+    // After the first line, we expect to see lines of the form "Mass Intensity"
+    Str = strtok(LineBuffer, WHITESPACE);
+    if (!Str)
+    {
+        return 1;
+    }
+    Mass = (float)atof(Str);
+    if (!Mass) 
+    {   
+        return 1;
+    }
+    Str = strtok(NULL, WHITESPACE);
+    if (!Str)
+    {
+        // This line had only two pieces on it.  Invalid syntax!
+        //printf("**Error in file '%s': peak lines must contain mass AND intensity\n", Spectrum->Node->InputFile->FileName);
+        REPORT_ERROR_IS(33, LineNumber, Spectrum->Node->InputFile->FileName);
+        return 0;
+    }
+    Intensity = (float)atof(Str);
+    if (!Intensity) 
+    {   
+        // invalid intensity?  Assume that a string staring with "0" really means intensity zero,
+        // god help us.
+        if (Str[0] != '0')
+        {
+            REPORT_ERROR_IS(33, LineNumber, Spectrum->Node->InputFile->FileName);
+            return 0;
+        }
+    }
+    // If there's a third piece on the line, then stop parsing now.  (That happens if we run
+    // off the end of a record in a pkl file, into the start of the next record):
+    Str = strtok(NULL, WHITESPACE);
+    if (Str)
+    {
+        return 0;
+    }
+    Result = SpectrumAddPeak(Spectrum, Mass, Intensity);
+    return Result;
+}
+
+int GuessSpectrumFormatFromHeader(char* FilePath, MSSpectrum* Spectrum)
+{
+    FILE* MS2File;
+    //
+    MS2File = fopen(FilePath, "rb");
+    ParseFileByLines(MS2File, GuessMS2FormatFromLine, Spectrum, 0);
+    fclose(MS2File);
+    return Spectrum->Node->InputFile->Format;
+}
+
+// SpectrumLoadFromFile: Return True if the spectrum is valid, False if it's not.
+// Example of an invalid spectrum file: Sequest .out files contaminating the .dta directory.
+// Iterate over lines, handling the header specially.
+int SpectrumLoadFromFile(MSSpectrum* Spectrum, FILE* DTAFile)
+{    
+    int ReturnCode = 1;
+    int MS2ChargeLineSeen = 0;
+    int i;
+    float PeakMass;
+    //
+
+    // handle XML formats separately from line-based foramts:
+    switch (Spectrum->Node->InputFile->Format)
+    {
+    case SPECTRUM_FORMAT_MZXML:
+        ReturnCode = SpectrumLoadMZXML(Spectrum, DTAFile);
+        break;
+    case SPECTRUM_FORMAT_MZDATA:
+        SpectrumLoadMZData(Spectrum, DTAFile);
+        break;
+    case SPECTRUM_FORMAT_MGF:
+        ReturnCode = SpectrumLoadMGF(Spectrum, DTAFile);
+        break;
+    case SPECTRUM_FORMAT_MS2_COLONS:
+        ParseFileByLines(DTAFile, SpectrumHandleMS2ColonLine, Spectrum, 0);
+        break;
+    case SPECTRUM_FORMAT_CDTA:
+    	ReturnCode = SpectrumLoadCDTA(Spectrum,DTAFile);
+    	break;
+    case SPECTRUM_FORMAT_PKL:
+    case SPECTRUM_FORMAT_DTA:
+    case SPECTRUM_FORMAT_MS2:
+    default:
+        ParseFileByLines(DTAFile, SpectrumLoadDTAFileLine, Spectrum, 0);
+        break;
+    }
+    if(Spectrum->Charge && (Spectrum->Charge < 0 || Spectrum->Charge >= 6))
+       return 0;
+
+    //We only like spectra with charge less than 3
+    if(Spectrum->Charge && !GlobalOptions->MultiChargeMode && Spectrum->Charge > 3)
+      {
+	//printf("Ignoring Spectrum %d with charge %d\n",Spectrum->Node->ScanNumber,Spectrum->Charge);
+	return 0;
+      }
+    else
+     {
+       //printf("Keeping Spectrum %d with charge %d and %d peaks\n",Spectrum->Node->ScanNumber,Spectrum->Charge,Spectrum->PeakCount);
+      }
+    
+    if (ReturnCode)
+    {
+        SpectrumComputeParentMass(Spectrum);
+    }
+    //printf("SCAN: %d\n",Spectrum->Node->ScanNumber);
+    //for(i = 0; i < Spectrum->PeakCount; ++i)
+    //  {
+	
+    //	PeakMass = (float)(Spectrum->Peaks[i].Mass);
+    //	printf("%f %f\n",PeakMass/1000, Spectrum->Peaks[i].Intensity);
+    // }
+    
+    
+    //if (GlobalOptions->PhosphorylationFlag)
+    //{
+    //    AttemptParentMassPeakRemoval(Spectrum);
+    //}
+    return ReturnCode;
+}
+
+////For phosphorylated spectra, the superprominent M-p peak can 
+////fritz the charge state guessing, and tagging.  So we remove it.
+//void AttemptParentMassPeakRemoval(MSSpectrum* Spectrum)
+//{
+//    int MostIntensePeakIndex;
+//    int MostIntenseMass;
+//    int PeakIndex;
+//    float MostIntense = 0.0;
+//    float NextMostIntense = 0.0;
+//    int Diff;
+//    int ExpectedDiff;
+//    int ExpectedDiff2;
+//    int Epsilon = 2 * DALTON;
+//    int Charge;
+//    //
+//    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+//    {
+//        if (Spectrum->Peaks[PeakIndex].Intensity > MostIntense)
+//        {
+//            NextMostIntense = MostIntense;
+//            MostIntense = Spectrum->Peaks[PeakIndex].Intensity;
+//            MostIntensePeakIndex = PeakIndex;
+//            MostIntenseMass = Spectrum->Peaks[PeakIndex].Mass;
+//        }
+//        else if(Spectrum->Peaks[PeakIndex].Intensity > NextMostIntense)
+//        {
+//            NextMostIntense = Spectrum->Peaks[PeakIndex].Intensity;
+//        }
+//    }
+//    //printf("Most intense %f, next %f\n",MostIntense, NextMostIntense);
+//    //if more than 3 times great, and in the right place, remove peak.
+//    //if (MostIntense < 2 * NextMostIntense)
+//    //{
+//    //    return;
+//    //}
+//    //printf ("MZ of %d, charge %d\n", Spectrum->MZ, Spectrum->Charge);
+//    // If the spectrum has a charge, then trust, otherwise try charge 2, 3
+//	//Set m/z with the new parentmass and charge that was just assigned in ConstructTags
+//	printf("Old MZ %f\n",Spectrum->MZ);
+//	Spectrum->MZ = (Spectrum->ParentMass + (Spectrum->Charge - 1) * HYDROGEN_MASS) / Spectrum->Charge;
+//	printf("New MZ %f\n",Spectrum->MZ);
+//	return;
+//    if (Spectrum->Charge)
+//    {
+//        Diff = abs(Spectrum->MZ - MostIntenseMass);
+//        ExpectedDiff = PHOSPHATE_WATER_MASS / Spectrum->Charge;
+//        ExpectedDiff2 = (PHOSPHATE_WATER_MASS + WATER_MASS) / Spectrum->Charge;
+//        if (abs (Diff - ExpectedDiff) < Epsilon)
+//        { //remove peak
+//            Spectrum->RemovedPeakIndex = MostIntensePeakIndex;
+//            Spectrum->RemovedPeakIntensity = Spectrum->Peaks[MostIntensePeakIndex].Intensity;
+//            Spectrum->Peaks[MostIntensePeakIndex].Intensity = 1.0; //cut to ground
+//        }
+//        else if (abs(Diff - ExpectedDiff2) < Epsilon)
+//        { //remove peak
+//            Spectrum->RemovedPeakIndex = MostIntensePeakIndex;
+//            Spectrum->RemovedPeakIntensity = Spectrum->Peaks[MostIntensePeakIndex].Intensity;
+//            Spectrum->Peaks[MostIntensePeakIndex].Intensity = 1.0; //cut to ground
+//        }
+//    }
+//    else
+//    {
+//        for (Charge = 1; Charge <= 3; Charge++)
+//        {
+//            Diff = abs(Spectrum->MZ - MostIntenseMass);
+//            ExpectedDiff = PHOSPHATE_WATER_MASS/ Charge;
+//            ExpectedDiff2 = (PHOSPHATE_WATER_MASS + WATER_MASS)/ Charge;
+//            // printf("Charge %d, Diff %d, ExpectedDiff %d\n", Charge, Diff, ExpectedDiff);
+//            if (abs (Diff - ExpectedDiff) < Epsilon)
+//            { // remove peak
+//                Spectrum->RemovedPeakIndex = MostIntensePeakIndex;
+//                Spectrum->RemovedPeakIntensity = Spectrum->Peaks[MostIntensePeakIndex].Intensity;
+//                Spectrum->Peaks[MostIntensePeakIndex].Intensity = 1.0; //cut to ground
+//                Spectrum->Charge = Charge; // This is a big enough clue, that we are going to guess charge
+//                Spectrum->MZ = MostIntenseMass + ExpectedDiff; //testing this feature
+//                break;
+//            }
+//            else if (abs(Diff - ExpectedDiff2) < Epsilon)
+//            { // remove peak
+//                Spectrum->RemovedPeakIndex = MostIntensePeakIndex;
+//                Spectrum->RemovedPeakIntensity = Spectrum->Peaks[MostIntensePeakIndex].Intensity;
+//                Spectrum->Peaks[MostIntensePeakIndex].Intensity = 1.0; //cut to ground
+//                Spectrum->Charge = Charge;
+//                Spectrum->MZ = MostIntenseMass + ExpectedDiff2;
+//                break;
+//            }
+//        } // end for
+//    } // end else
+//
+//}
+
+// Called AFTER filtering.  Looks 1Da to the left of peaks for potential isotope neighbors.
+void SpectrumAssignIsotopeNeighbors(MSSpectrum* Spectrum)
+{
+    // Don't worry *too* much about efficiency, as this happens only once during scoring
+    int PeakIndex;
+    int OldPeakIndex;
+    int IsotopeCount;
+    int NoiseCount;
+    int MaxMass;
+    int MinMass;
+    int OtherPeakIndex;
+    float IntensityPercent;
+    //
+    // Assign noise penalty:
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        IntensityPercent = Spectrum->Peaks[PeakIndex].Intensity / Spectrum->MaxIntensity;
+        Spectrum->Peaks[PeakIndex].PercentIntensity = IntensityPercent;
+        if (IntensityPercent < 0.05)
+        {
+            Spectrum->Peaks[PeakIndex].NoisePenalty = -921;//0.0001
+        }
+        else if (IntensityPercent < 0.3)
+        {
+            Spectrum->Peaks[PeakIndex].NoisePenalty = -1382; //0.000001
+        }
+        else if (IntensityPercent < 0.6)
+        {
+            Spectrum->Peaks[PeakIndex].NoisePenalty = -1842; //0.00000001
+        }
+        else 
+        {
+            Spectrum->Peaks[PeakIndex].NoisePenalty = -2303; //0.0000000001
+        }
+    }
+    // First, look for isotope neighbors.  Scan downward from each peak:
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        MaxMass = Spectrum->Peaks[PeakIndex].Mass - 79;
+        MinMass = Spectrum->Peaks[PeakIndex].Mass - 121;
+        IsotopeCount = 0;
+        for (OldPeakIndex = max(0, PeakIndex - 1); OldPeakIndex; OldPeakIndex--)
+        {
+            if (Spectrum->Peaks[OldPeakIndex].Mass < MinMass)
+            {
+                break;
+            }
+            if (Spectrum->Peaks[OldPeakIndex].Mass > MaxMass)
+            {
+                continue;
+            }
+            Spectrum->Peaks[PeakIndex].IsotopeNeighbors[IsotopeCount++] = OldPeakIndex;
+        }
+    }
+    // Now look for noise-neighbors (peaks which could be the same peak, but are split
+    // due to limitations in recording).
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        NoiseCount = 0;
+        MaxMass = Spectrum->Peaks[PeakIndex].Mass + 21; // 0.2 Da radius
+        MinMass = Spectrum->Peaks[PeakIndex].Mass - 21;
+        for (OtherPeakIndex = PeakIndex + 1; OtherPeakIndex < min(Spectrum->PeakCount, PeakIndex + 5); OtherPeakIndex++)
+        {
+            if (Spectrum->Peaks[OtherPeakIndex].Mass > MaxMass)
+            {
+                break;
+            }
+            Spectrum->Peaks[PeakIndex].NoiseNeighbors[NoiseCount++] = OtherPeakIndex;
+        }
+        for (OtherPeakIndex = max(0, PeakIndex - 1); OtherPeakIndex > max(-1, PeakIndex - 5); OtherPeakIndex--)
+        {
+            if (Spectrum->Peaks[OtherPeakIndex].Mass < MinMass)
+            {
+                break;
+            }
+            Spectrum->Peaks[PeakIndex].NoiseNeighbors[NoiseCount++] = OtherPeakIndex;
+        }
+    }
+}
+
+void SpectrumSetCharge(MSSpectrum* Spectrum, int Charge)
+{
+    //MZ = ((Charge-1)*1.0078 + self->Spectrum->ParentMass) / self->Spectrum->Charge;
+    Spectrum->Charge = Charge;
+    Spectrum->PMCorrectedFlag = 0;
+    Spectrum->ParentMass = (Spectrum->MZ * Charge) - (Charge - 1) * HYDROGEN_MASS;
+}
+
+// Compute the low/med/hi intensity cutoffs for the spectrum.
+void ComputeSpectrumIntensityCutoffs(MSSpectrum* Spectrum)
+{
+    int PeakIndex;
+    float GrassIntensity;
+    float TotalIntensity;
+    int CutoffRank;
+    float SortedIntensity[200];
+    int WeakPeakCount = 0;
+    //
+    TotalIntensity = 0;
+    CutoffRank = (int)(Spectrum->ParentMass / (100 * DALTON));
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        if (Spectrum->Peaks[PeakIndex].IntensityRank >= CutoffRank)
+        {
+            SortedIntensity[WeakPeakCount] = Spectrum->Peaks[PeakIndex].Intensity;
+            WeakPeakCount++;
+        }
+        TotalIntensity += Spectrum->Peaks[PeakIndex].Intensity;
+        if (WeakPeakCount == 200)
+        {
+            break;
+        }
+    }
+    if (!WeakPeakCount)
+    {
+        //printf("** Error in ComputeSpectrumIntensityCutoffs: No weak peak ranks found?  Intensity ranking must be complete here.\n");
+        if (!Spectrum->PeakCount)
+        {
+            return;
+        }
+        GrassIntensity = TotalIntensity / (2 * Spectrum->PeakCount);
+    }
+    else
+    {
+        qsort(SortedIntensity, WeakPeakCount, sizeof(float), (QSortCompare)CompareFloats);
+        GrassIntensity = SortedIntensity[WeakPeakCount / 2];
+    }
+    Spectrum->IntensityCutoffLow = (float)0.25 * GrassIntensity;
+    Spectrum->IntensityCutoffMedium = 3 * GrassIntensity;
+    Spectrum->IntensityCutoffHigh = 10 * GrassIntensity;
+}
+
+//// Allocate and populate BinnedIntensities for the spectrum.  Assumes that ParentMass is set.
+//void SpectrumComputeBinnedIntensities(SpectrumNode* Node) // OBSOLETE
+//{
+//    int MaxParentMass = 0;
+//    MSSpectrum* Spectrum;
+//    int PeakIndex;
+//    int Bin;
+//    int NearBin;
+//    SpectralPeak* Peak;
+//    float Intensity;
+//    int BinScalingFactor = 100; // One bin per 0.1Da
+//    
+//    // A spectrum has at most this many "high" peaks (one per 100Da)
+//    int SuperPeakCount;
+//
+//    static int* BestIntensityRank = NULL;
+//    static int BestIntensityRankSize = 0;
+//    //
+//    Spectrum = Node->Spectrum;
+//    if (!Spectrum)
+//    {
+//        return;
+//    }
+//    SuperPeakCount = Spectrum->ParentMass / (100 * DALTON);
+//    MaxParentMass = Spectrum->MZ * 3;
+//    Spectrum->IntensityBinCount = (MaxParentMass + DALTON) / BinScalingFactor; 
+//    SafeFree(Spectrum->BinnedIntensities);
+//    SafeFree(Spectrum->BinnedIntensitiesTight);
+//    SafeFree(Spectrum->BinnedIntensityLevels);
+//    SafeFree(Spectrum->BinPeakIndex);
+//    Spectrum->BinnedIntensities = (float*)calloc(Spectrum->IntensityBinCount, sizeof(float));
+//    Spectrum->BinnedIntensitiesTight = (float*)calloc(Spectrum->IntensityBinCount, sizeof(float));
+//    Spectrum->BinnedIntensityLevels = (int*)calloc(Spectrum->IntensityBinCount, sizeof(int));
+//    Spectrum->BinPeakIndex = (int*)calloc(Spectrum->IntensityBinCount, sizeof(int));
+//
+//    if (BestIntensityRankSize < Spectrum->IntensityBinCount)
+//    {
+//        SafeFree(BestIntensityRank);
+//        BestIntensityRankSize = Spectrum->IntensityBinCount + 500;
+//        BestIntensityRank = (int*)calloc(BestIntensityRankSize, sizeof(int));
+//    }
+//    for (Bin = 0; Bin < Spectrum->IntensityBinCount; Bin++)
+//    {
+//        Spectrum->BinPeakIndex[Bin] = -1;
+//        BestIntensityRank[Bin] = 999;
+//    }
+//
+//    // Iterate over spectral peaks, putting intensity into bins:
+//    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+//    {
+//        Peak = Spectrum->Peaks + PeakIndex;
+//        Bin = (Peak->Mass + 50) / BinScalingFactor;
+//        for (NearBin = Bin - 6; NearBin < Bin + 7; NearBin++)
+//        {
+//            if (NearBin < 0 || NearBin >= Spectrum->IntensityBinCount)
+//            {
+//                continue;
+//            }
+//            if (abs(Peak->Mass - (NearBin * BinScalingFactor)) > INTENSITY_BIN_RADIUS)
+//            {
+//                continue;
+//            }
+//            Spectrum->BinnedIntensities[NearBin] += Peak->Intensity;
+//            BestIntensityRank[Bin] = min(BestIntensityRank[Bin], Peak->IntensityRank);
+//            if (Spectrum->BinPeakIndex[NearBin] < 0)
+//            {
+//                Spectrum->BinPeakIndex[NearBin] = PeakIndex;
+//            }
+//            if (abs(Peak->Mass - (NearBin * BinScalingFactor)) <= INTENSITY_BIN_RADIUS_TIGHT)
+//            {
+//                Spectrum->BinnedIntensitiesTight[NearBin] += Peak->Intensity;
+//            }
+//        }
+//    }
+//    // Compute the intensity level (absent, lo, med, hi) for each bin:
+//    ComputeSpectrumIntensityCutoffs(Spectrum);
+//    for (Bin = 0; Bin < Spectrum->IntensityBinCount; Bin++)
+//    {
+//        Intensity = Spectrum->BinnedIntensities[Bin];
+//        if (Intensity > Spectrum->IntensityCutoffHigh && BestIntensityRank[Bin] < SuperPeakCount)
+//        {
+//            Spectrum->BinnedIntensityLevels[Bin] = 3;
+//        }
+//        else if (Intensity > Spectrum->IntensityCutoffMedium)
+//        {
+//            Spectrum->BinnedIntensityLevels[Bin] = 2;
+//        }
+//        else if (Intensity > Spectrum->IntensityCutoffLow)
+//        {
+//            Spectrum->BinnedIntensityLevels[Bin] = 1;
+//        }
+//        else
+//        {
+//            Spectrum->BinnedIntensityLevels[Bin] = 0;
+//        }
+//    }
+//}
+
+void SpectrumComputeNoiseDistributions(SpectrumNode* Node)
+{
+    MSSpectrum* Spectrum;
+    int BinCountA;
+    int BinCountB;
+    int BinCountC;
+    int BinCountD;
+    int BinCutoffA;
+    int BinCutoffB;
+    int Bin;
+    int Index;
+    int IntensityRank;
+    SpectrumTweak* Tweak;
+    int TweakIndex;
+    // Compute the distributions of intensity-levels for the three sectors according
+    // to each parent mass:
+    Spectrum = Node->Spectrum;
+    for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+    {
+        Tweak = Node->Tweaks + TweakIndex;
+        if (!Tweak->Charge)
+        {
+            continue;
+        }
+        BinCutoffA = (int)((Node->Tweaks[TweakIndex].ParentMass * 0.3333 + 5) / 100);
+        BinCutoffB = (int)((Node->Tweaks[TweakIndex].ParentMass * 0.6667 + 5) / 100);
+        BinCountA = 0;
+        BinCountB = 0;
+        BinCountC = 0;
+        BinCountD = 0;
+        // SECTOR_COUNT
+        BinCutoffA = (int)((Node->Tweaks[TweakIndex].ParentMass * 0.5 + 5) / 100);
+        for (Index = 0; Index < 8; Index++)
+        {
+            Node->Tweaks[TweakIndex].Intensities[Index] = 1; // padding-probability
+        }
+        for (Bin = 0; Bin < Spectrum->IntensityBinCount; Bin++)
+        {
+            if (Bin >= BinCutoffA)
+            {
+                BinCountB++;
+                Tweak->Intensities[4 + Spectrum->BinnedIntensityLevels[Bin]] += 1.0;
+            }
+            else
+            {
+                BinCountA++;
+                Tweak->Intensities[0 + Spectrum->BinnedIntensityLevels[Bin]] += 1.0;
+            }
+        }
+        for (IntensityRank = 0; IntensityRank < 4; IntensityRank++)
+        {
+            Tweak->Intensities[0 + IntensityRank] = (float)log((Tweak->Intensities[0 + IntensityRank] + 2) / (BinCountA + 2));
+            Tweak->Intensities[4 + IntensityRank] = (float)log((Tweak->Intensities[4 + IntensityRank] + 2) / (BinCountB + 2));
+        }
+        
+    }
+}
+
+// Add a spectrum to the list of spectra to be searched. 
+void AddSpectrumToList(InputFileNode* InputFile, int FilePos, int ScanNumber, int SpecIndex)
+{
+    SpectrumNode* NewNode;
+
+    NewNode = (SpectrumNode*)calloc(1, sizeof(SpectrumNode));
+    NewNode->InputFile = InputFile;
+    if (GlobalOptions->LastSpectrum)
+    {
+        GlobalOptions->LastSpectrum->Next = NewNode;
+    }
+    else
+    {
+        GlobalOptions->FirstSpectrum = NewNode;
+    }
+    NewNode->FilePosition = FilePos;
+    NewNode->ScanNumber = ScanNumber;
+    NewNode->SpecIndex = SpecIndex;
+    GlobalOptions->LastSpectrum = NewNode;
+    GlobalOptions->SpectrumCount++;
+    InputFile->SpectrumCount++;
+}
+
+
+int GuessSpectrumFormatFromExtension(char* FileName)
+{
+    char* Extension;
+    for (Extension = FileName + strlen(FileName); Extension > FileName; Extension--)
+    {
+        if (*Extension == '.')
+        {
+            break;
+        }
+    }
+    if (!CompareStrings(Extension, ".out"))
+    {
+        // sequest gunk, ignore.
+        return SPECTRUM_FORMAT_INVALID;
+    }
+    if (!CompareStrings(Extension, ".ms2"))
+    {
+        return SPECTRUM_FORMAT_MS2_COLONS; //SPECTRUM_FORMAT_MS2;
+    }
+    if (!CompareStrings(Extension, ".mzxml"))
+    {
+        return SPECTRUM_FORMAT_MZXML;
+    }
+    if (!CompareStrings(Extension, ".mzdata"))
+    {
+        return SPECTRUM_FORMAT_MZDATA;
+    }
+    if (!CompareStrings(Extension, ".mgf"))
+    {
+        return SPECTRUM_FORMAT_MGF;
+    }
+    if (!CompareStrings(Extension, ".dta"))
+    {
+        return SPECTRUM_FORMAT_DTA;
+    }
+    if (!CompareStrings(Extension, ".pkl"))
+    {
+        return SPECTRUM_FORMAT_PKL;
+    }
+    if(!CompareStrings(Extension,".txt"))
+    {
+    	//_dta.txt is a PNNL specific way of saying concatenated DTA
+    	for (Extension; Extension > FileName; Extension--)
+    	{
+       	 	if (*Extension == '_')
+       	 	{
+           		 break;
+        	}
+    	}
+    	if(!CompareStrings(Extension,"_dta.txt"))
+	  {
+
+    		return SPECTRUM_FORMAT_CDTA;
+	  }
+    }
+
+    // Unexpected extension.  Let's ASSUME that it's a .dta file.
+    REPORT_WARNING_S(30, FileName);
+    return SPECTRUM_FORMAT_DTA;
+}
+
+void FreeSpectrumNode(SpectrumNode* Node)
+{
+    int TweakIndex;
+    //
+    if (!Node)
+    {
+        return;
+    }
+    for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+    {
+        SafeFree(Node->Tweaks[TweakIndex].PRMScores);
+        Node->Tweaks[TweakIndex].PRMScores = NULL;
+    }
+    if (Node->Spectrum)
+    {
+        FreeSpectrum(Node->Spectrum);
+    }
+    SafeFree(Node);
+}
diff --git a/Spectrum.h b/Spectrum.h
new file mode 100644
index 0000000..5e84106
--- /dev/null
+++ b/Spectrum.h
@@ -0,0 +1,160 @@
+//Title:          Spectrum.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef SPECTRUM_H
+#define SPECTRUM_H
+
+// The basic spectrum object, with array of peaks.
+// Structs and functions to support loading of spectra from several
+// text-based file formats (.dta files, .mgf files, .ms2 files).
+
+#include <stdio.h>
+#include "Inspect.h"
+
+#define DEFAULT_WINDOW_WIDTH 50000 //50Da
+#define DEFAULT_WINDOW_KEEP_COUNT 6 // 6
+
+// After filtering, there will probably be just 1 possible isotope neighbor
+#define MAX_ISOTOPE_NEIGHBORS 8
+
+#define MAX_NOISY_NEIGHBORS 8
+
+// Intensity bin radii, in thousandths of a dalton
+#define INTENSITY_BIN_RADIUS_TIGHT 150 
+#define INTENSITY_BIN_RADIUS 500 
+
+// Set VERBOSE_DEBUGGING to true if scoring (in mutation-tolerant mode) is broken.  
+// Slows things down a bit, because we write out spdeadsheets:
+// PRMScores.xls (verbose annotations for every last PRM bin).
+// DTable.xls (in 2-mod mode) The DScore[] d.p. table
+// PrefixSuffix.xls (in 2-mod mode) The PrefixTable and SuffixTable
+//#define VERBOSE_DEBUGGING
+
+typedef struct SpectralPeak
+{
+    int Mass;
+    float Intensity;
+    int IntensityRank;
+    int Rank; // binned version of IntensityRank
+    int IonType; // for PRM peaks only
+    int FilterScore;
+    int NoisePenalty;
+    float PercentIntensity;
+    int HasNeutralLosses; // 0, 1, or 2
+    int TheoPeak; // for (greedy) interpretation 
+    int Score; // for (greedy) interpretation
+    // The IsotopeNeighbors array holds the indices of peaks that are potential 
+    // isotopes of this peak.  If a peak was assigned a Noise ion type, but it has 
+    // a neighbor peak at -1Da, then we give the peak the Isotope ion type.
+    // (The +1 peak gets an IsotopeNeighbors entry)
+    int IsotopeNeighbors[MAX_ISOTOPE_NEIGHBORS]; 
+    // Sometimes two high-intensity peaks are separated by only 0.1 amu.  That *probably*
+    // means there's one big peak that was split by the machine.  
+    int NoiseNeighbors[MAX_NOISY_NEIGHBORS];
+    int Index;
+    int RescuedFlag;
+    int AminoIndex; // For labeling purposes only!
+} SpectralPeak;
+
+
+typedef struct ListNode
+{
+    struct ListNode* Prev;
+    struct ListNode* Next;
+    int Entry;
+} ListNode;
+
+typedef struct MSSpectrum
+{
+    int MZ;
+    int ParentMass;
+    float SignalToNoise;
+  //Parent MZ from the file (BEFORE correction)
+  int FileMZ;
+	// Parent mass based on the file (BEFORE correction)
+    int FileMass; 
+	// The input file may indicate no charge at all (in which case we guess),
+	// a single charge (in which case we accept it, OR guess if MultiCharge is set),
+	// or multiple charges (in which case we accept it, OR guess if MultiCharge is set).
+  char FileCharge[6];
+  int FileChargeFlag;
+    int Charge;
+    int PeakCount;
+    // PeakAllocation is the size of the allocated Peaks array; >= PeakCount
+    // When we run out of space in the array, we reallocate to double size.
+    int PeakAllocation; 
+    SpectralPeak* Peaks;
+    int UnfilteredPeakCount;
+    SpectralPeak* UnfilteredPeaks;
+    int PRMPeakCount;
+    float MaxIntensity; // max over all peaks
+    int PMCorrectedFlag;
+    struct TagGraph* Graph;
+    int CandidatesScored; 
+    int IntensityBinCount;
+    float* BinnedIntensitiesTight; // size IntensityBinCount; used for PMC.  Tighter radius
+    float* BinnedIntensities; // size IntensityBinCount
+    int* BinnedIntensityLevels; // size IntensityBinCount
+    int* BinPeakIndex; // size IntensityBinCount
+    float IntensityCutoffLow;
+    float IntensityCutoffMedium;
+    float IntensityCutoffHigh;
+    struct SpectrumNode* Node;
+    // For use by IonScoring:
+    float* IntensityThresholds;
+    float* IonScoringNoiseProbabilities;
+    //For Phosphorylation trickery.  we remove superdominant peaks for M-p
+    int RemovedPeakIndex;
+    float RemovedPeakIntensity;
+
+#ifdef VERBOSE_DEBUGGING
+    char** PRMDebugStrings;
+#endif
+} MSSpectrum;
+
+int GuessSpectralCharge(MSSpectrum* Spectrum);
+void UnitTestSpectrum();
+void WindowFilterPeaks(MSSpectrum* Spectrum, float WindowWidth, int KeepCount);
+void SpectrumAssignIsotopeNeighbors(MSSpectrum* Spectrum);
+void IntensityRankPeaks();
+MSSpectrum* NewSpectrum();
+void FreeSpectrum(MSSpectrum* Spectrum);
+int SpectrumLoadFromFile(MSSpectrum* Spectrum, FILE* DTAFile);
+void SpectrumCorrectParentMass(MSSpectrum* Spectrum);
+void SpectrumSetCharge(MSSpectrum* Spectrum, int Charge);
+void FreeMatchList(SpectrumNode* Spectrum);
+//void SpectrumComputeBinnedIntensities(SpectrumNode* Node);
+void SpectrumComputeNoiseDistributions(SpectrumNode* Node);
+int GuessSpectrumFormatFromExtension(char* FileName);
+void FreeSpectrumNode(SpectrumNode* Node);
+int GuessSpectrumFormatFromHeader(char* FilePath, MSSpectrum* Spectrum);
+#endif // SPECTRUM_H
diff --git a/SpliceDB.c b/SpliceDB.c
new file mode 100644
index 0000000..144d834
--- /dev/null
+++ b/SpliceDB.c
@@ -0,0 +1,4212 @@
+//Title:          SpliceDB.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+// SpliceDB.c constructs a splice-tolerant database, starting from a collection of INTERVALS with LINKS.
+// Translated from the original Python script, CollectExons.py, for efficiency
+#include "CMemLeak.h"
+#include "Utils.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include "Trie.h"
+#include "Inspect.h"
+#include "Spliced.h"
+#include "SpliceDB.h"
+#include "SNP.h"
+
+#define MAX_INTERVALS_PER_GENE 2000
+#define MAX_INTERVAL_LENGTH 100000
+
+// Reject any splice junctions from an EST which don't have this good of 
+// a splice consensus score...unless we see more than one EST supporting
+// the junction.
+#define SPLICE_SIGNAL_SCORE_CUTOFF -15
+#define DEFAULT_MINIMUM_ORF_LENGTH 50
+#define IFLAG_FRAME_0 1
+#define IFLAG_FRAME_1 2
+#define IFLAG_FRAME_2 4
+#define IFLAG_ALL_READING_FRAMES 7
+
+// We might be parsing many, many intervals from two different sources...which means
+// they'll be read out-of-order.  We maintain an index g_IntervalIndex such that
+// g_IntervalIndex[n] is the first interval whose start is at least n*10000.
+// When it comes time to insert a new interval, we check g_IntervalIndex.  If 
+// the entry is NULL, the interval goes at the end of the global list.  If the
+// interval isn't null, the interval goes NEAR that interval (maybe a little earlier, 
+// maybe a little later, but the scan is cheap and that's the key idea)
+IntervalNode** g_IntervalIndex = NULL;
+
+// Linked list of all intervals in a chromosome (+ orientation):
+IntervalNode* g_FirstInterval;
+IntervalNode* g_LastInterval;
+
+// Linked list of GeneNode structs for the current gene. 
+GeneNode* g_GeneFirst;
+GeneNode* g_GeneLast;
+int GeneNodeCount;
+
+// Int variables for reporting statistics on the database generation algorithms:
+int g_StatsIncompleteGeneCount = 0;
+int g_StatsLargestGeneSize = 0;
+int g_StatsLargestGeneRecordNumber = 0;
+int g_StatsIntervalsBeforeMerge = 0;
+int g_StatsEdgesBeforeMerge = 0;
+int g_StatsIntervalsAfterMerge = 0;
+int g_StatsEdgesAfterMerge = 0;
+int g_StatsIntervalsAfterIntersect = 0;
+int g_StatsEdgesAfterIntersect = 0;
+int g_StatsTotalExonsWritten = 0;
+int g_StatsTotalEdgesWritten = 0;
+
+// Forward declarations:
+IntervalNode* InsertIntervalBefore(IntervalNode* Interval, IntervalNode* Before);
+IntervalNode* InsertIntervalAfter(IntervalNode* Interval, IntervalNode* After);
+int BuildAndWriteExons(FILE* GenomicFile, FILE* OutputFile, int ReverseFlag, char* GeneName, int ChromosomeNumber, int MinORFLength);
+void MaskBrokenSequence(char* Protein, int MinORFLength);
+void IntegrityCheckGene();
+void PruneShortORFs(int ReverseFlag, int MinimumORFLength);
+void DeleteExonLink(ExonNode* Exon, ExonLink* Link, int ForwardFlag);
+void PurgeNonCodingExonChunks();
+void GenomeDAGLinkBack(GenomeDAGNode* DAGNode, GenomeDAGNode* BackDAGNode, int Count);
+void FreeIntervalExons(IntervalNode* Interval);
+
+// Print the current GeneNode list to stdout, for debugging.
+void DebugPrintBuiltGene()
+{
+    GeneNode* GNode;
+    ExonNode* Exon;
+    ExonNode* OtherExon;
+    ExonLink* Link;
+    char Buffer[512];
+    int IntervalLen;
+    int TrueLen;
+    GenomeDAGNode* DAGNode;
+    GenomeDAGLink* DAGLink;
+    int DAGNodeIndex;
+    //
+    printf("\n--== Current gene ==--\n");
+    for (GNode = g_GeneFirst; GNode; GNode = GNode->Next)
+    {
+        printf("  Interval from %d to %d flag %d\n", GNode->Interval->Start, GNode->Interval->End, GNode->Interval->Flags);
+        for (DAGNodeIndex = 0; DAGNodeIndex < GNode->Interval->DAGNodeCount; DAGNodeIndex++)
+        {
+            DAGNode = GNode->Interval->DAGNodes + DAGNodeIndex;
+            // Skip any extra allocation (null DAG nodes)
+            if (!DAGNode->Sequence)
+            {
+                continue;
+            }
+            printf("    DAG node from %d to %d (%d bases)\n", DAGNode->Start, DAGNode->End , DAGNode->End - DAGNode->Start);
+            for (DAGLink = DAGNode->FirstBack; DAGLink; DAGLink = DAGLink->Next)
+            {
+                printf("    << Link back %d to DAG node %d-%d\n", DAGLink->Count, DAGLink->Node->Start, DAGLink->Node->End);
+            }
+            for (DAGLink = DAGNode->FirstForward; DAGLink; DAGLink = DAGLink->Next)
+            {
+                printf("    >> Link forw %d to DAG node %d-%d\n", DAGLink->Count, DAGLink->Node->Start, DAGLink->Node->End);
+            }
+        }
+        for (Exon = GNode->Interval->FirstExon; Exon; Exon = Exon->Next)
+        {
+            printf("    Exon from %d to %d (%dAA)\n", Exon->Start, Exon->End, Exon->Length);
+            IntervalLen = Exon->End - Exon->Start;
+            if (IntervalLen <= 0)
+            {
+                printf("** WARNING: Exon is * * E M P T Y * * \n");
+            }
+            TrueLen = strlen(Exon->Prefix) + Exon->Length*3 + strlen(Exon->Suffix);
+            if (IntervalLen != TrueLen)
+            {
+                printf("** Warning: %d-%d is length %d, but true exon length is %zd+%d+%zd\n", 
+                    Exon->Start, Exon->End, IntervalLen, 
+                    strlen(Exon->Prefix), Exon->Length*3, strlen(Exon->Suffix));
+            }
+            if (Exon->Sequence)
+            {
+                strncpy(Buffer, Exon->Sequence, 512);
+                Buffer[511] = '\0';
+                printf("    Sequence(partial): %s\n", Buffer);
+            }
+            for (Link = Exon->FirstBack; Link; Link = Link->Next)
+            {
+                OtherExon = Link->Exon;
+                printf("      Link back %d to an exon from %d to %d (%dAA)\n", Link->Power, 
+                    OtherExon->Start, OtherExon->End, OtherExon->Length);
+                if (OtherExon->Sequence)
+                {
+                    strncpy(Buffer, OtherExon->Sequence, 50);
+                    Buffer[50] = '\0';
+                    printf("      Ls: %s\n", Buffer);
+                }
+                if ((OtherExon->Start != Exon->End) && (OtherExon->Start != Exon->End + 1) && 
+                    (OtherExon->End != Exon->Start) && (OtherExon->End != Exon->Start - 1))
+                {
+                    if (!Link->Power)
+                    {
+                        printf("** Warning: Link with no power!\n");
+                    }
+                }
+            }
+            
+            for (Link = Exon->FirstForward; Link; Link = Link->Next)
+            {
+                OtherExon = Link->Exon;
+                printf("      Link forward %d to an exon from %d to %d (%dAA)\n", Link->Power, OtherExon->Start, 
+                    OtherExon->End, OtherExon->Length);
+                if (OtherExon->Sequence)
+                {
+                    strncpy(Buffer, OtherExon->Sequence, 50);
+                    Buffer[50] = '\0';
+                    printf("      Ls: %s\n", Buffer);
+                }
+                if ((OtherExon->Start != Exon->End) && (OtherExon->Start != Exon->End + 1) && 
+                    (OtherExon->End != Exon->Start) && (OtherExon->End != Exon->Start - 1))
+                {
+                    if (!Link->Power)
+                    {
+                        printf("** Warning: Link with no power!\n");
+                    }
+                }
+
+            }
+        }
+    }
+}
+
+// Print all intervals to stdout.  (VERY verbose, if done for the whole chromosome!)
+void DebugPrintIntervals(int IncludeLinks, int CountingFlag,
+    int CoverageStart, int CoverageEnd)
+{
+    IntervalNode* Interval;
+    EdgeNode* Edge;
+    int IntervalCount = 0;
+    int ForwardCount = 0;
+    int BackwardCount = 0;
+    int IForwardCount = 0;
+    int IBackwardCount = 0;
+
+    printf("\n\n=-=-=-=-=- Intervals =-=-=-=-=-\n");
+    for (Interval = g_FirstInterval; Interval; Interval = Interval->Next)
+    {
+        IntervalCount++;
+        // Skip output of intervals not in the range CoverageStart...CoverageEnd
+        if (CoverageStart >= 0 && Interval->End < CoverageStart)
+        {
+            continue;
+        }
+        if (CoverageEnd >= 0 && Interval->Start > CoverageEnd)
+        {
+            continue;
+        }
+        if (IncludeLinks >= 0)
+        {
+            IForwardCount = 0;
+            IBackwardCount = 0;
+            for (Edge = Interval->FirstForward; Edge; Edge = Edge->Next)
+            {
+                IForwardCount++;
+            }
+            for (Edge = Interval->FirstBack; Edge; Edge = Edge->Next)
+            {
+                IBackwardCount++;
+            }
+            printf("%d-%d %d <%d >%d\n", Interval->Start, Interval->End, Interval->Occurrences,
+                IBackwardCount, IForwardCount);
+        }
+        
+        for (Edge = Interval->FirstForward; Edge; Edge = Edge->Next)
+        {
+            if (IncludeLinks > 0)
+            {
+                printf("  -> %d-%d (%d)\n", Edge->Interval->Start, Edge->Interval->End, Edge->Count);
+            }
+            if (Edge->Interval->Start < Interval->Start)
+            {
+                printf("** Corruption: Forward link goes to an interval EARLIER along the chrom\n");
+                printf("** Start %d-%d, edge to %d-%d\n", Interval->Start, Interval->End, 
+                    Edge->Interval->Start, Edge->Interval->End);
+            }
+            ForwardCount++;
+        }
+        for (Edge = Interval->FirstBack; Edge; Edge = Edge->Next)
+        {
+            if (IncludeLinks > 0)
+            {
+                printf("  <- %d-%d (%d)\n", Edge->Interval->Start, Edge->Interval->End, Edge->Count);
+            }
+            if (Edge->Interval->Start > Interval->Start)
+            {
+                printf("** Corruption: Forward link goes to an interval EARLIER along the chrom\n");
+                printf("** Start %d-%d, edge to %d-%d\n", Interval->Start, Interval->End, 
+                    Edge->Interval->Start, Edge->Interval->End);
+            }
+            BackwardCount++;
+        }
+    }
+    printf("Total: %d intervals, %d forward links, %d backward links.\n", IntervalCount, ForwardCount, BackwardCount);
+    switch (CountingFlag)
+    {
+    case 1:
+        g_StatsIntervalsBeforeMerge = IntervalCount;
+        g_StatsEdgesBeforeMerge = ForwardCount;
+        break;
+    case 2:
+        g_StatsIntervalsAfterMerge = IntervalCount;
+        g_StatsEdgesAfterMerge = ForwardCount;
+        break;
+    case 3:
+        g_StatsIntervalsAfterIntersect = IntervalCount;
+        g_StatsEdgesAfterIntersect = ForwardCount;
+        break;        
+    default:
+        break;
+    }
+}
+
+// Add a new interval to the master list.  Or, if that interval has already been
+// seen, increment its count.  We use g_IntervalIndex to jump to *approximately* the right place 
+// in the global list of intervals, then scan forward or backward to find exactly the right spot.
+IntervalNode* AddInterval(int Start, int End, int Flags)
+{
+    IntervalNode* OldInterval;
+    IntervalNode* NewInterval;
+    int Bin;
+    int IterateBin;
+    //
+    if (!g_IntervalIndex)
+    {
+        // Somewhat hacky: Hard-coded size of 25000, large enough to cover human chromosome #1
+        g_IntervalIndex = (IntervalNode**)calloc(25000, sizeof(IntervalNode*));
+    }
+    Bin = Start / 10000;
+    OldInterval = g_IntervalIndex[Bin];
+    if (!OldInterval)
+    {
+        // This interval's start position is larger than any seen before! 
+        // Insert the interval at the end of the global list:
+        NewInterval = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+        NewInterval->Occurrences = 1;
+        NewInterval->Start = Start;
+        NewInterval->End = End;
+        NewInterval->Flags = Flags;
+        if (g_LastInterval)
+        {
+            g_LastInterval->Next = NewInterval;
+        }
+        NewInterval->Prev = g_LastInterval;
+        if (!g_FirstInterval)
+        {
+            g_FirstInterval = NewInterval;
+        }
+        g_LastInterval = NewInterval;
+        // Update the index:
+        for (IterateBin = Bin; IterateBin >= 0; IterateBin--)
+        {
+            if (g_IntervalIndex[IterateBin])
+            {
+                break;
+            }
+            g_IntervalIndex[IterateBin] = NewInterval;
+        }
+        return NewInterval;
+    }
+    // Next case: OldInterval is exactly right:
+    if (Start == OldInterval->Start && End == OldInterval->End)
+    {
+        OldInterval->Occurrences++;
+        OldInterval->Flags |= Flags;
+        return OldInterval;
+    }
+    // Next case: OldInterval precedes this interval.
+    if (Start > OldInterval->Start || (Start == OldInterval->Start && End > OldInterval->End))
+    {
+        // Iterate forward until OldInterval is NULL or OldInterval comes AFTER the new interval:
+        for (; OldInterval; OldInterval = OldInterval->Next)
+        {
+            if (OldInterval->Start > Start)
+            {
+                break;
+            }
+            if (OldInterval->Start == Start && OldInterval->End > End)
+            {
+                break;
+            }
+            if (OldInterval->Start == Start && OldInterval->End == End)
+            {
+                OldInterval->Occurrences++;
+                OldInterval->Flags |= Flags;
+                return OldInterval;
+            }
+        }
+        NewInterval = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+        NewInterval->Occurrences = 1;
+        NewInterval->Start = Start;
+        NewInterval->End = End;
+        NewInterval->Flags = Flags;
+        if (!OldInterval)
+        {
+            // The new interval comes at the END of the list:
+            if (g_LastInterval)
+            {
+                g_LastInterval->Next = NewInterval;
+            }
+            NewInterval->Prev = g_LastInterval;
+            g_LastInterval = NewInterval;
+        }
+        else
+        {
+            // Insert new interval just before OldInterval:
+            if (OldInterval->Prev)
+            {
+                OldInterval->Prev->Next = NewInterval;
+            }
+            NewInterval->Prev = OldInterval->Prev;
+            NewInterval->Next = OldInterval;
+            OldInterval->Prev = NewInterval;
+        }
+        return NewInterval;
+    }
+    else
+    {
+        // Last case: The new interval immediately precedes OldInterval.
+        NewInterval = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+        NewInterval->Occurrences = 1;
+        NewInterval->Start = Start;
+        NewInterval->End = End;
+        NewInterval->Flags = Flags;
+        if (OldInterval->Prev)
+        {
+            OldInterval->Prev->Next = NewInterval;
+        }
+        if (g_FirstInterval == OldInterval)
+        {
+            g_FirstInterval = NewInterval;
+        }
+        NewInterval->Prev = OldInterval->Prev;
+        NewInterval->Next = OldInterval;
+        OldInterval->Prev = NewInterval;
+        for (IterateBin = Bin; IterateBin >= 0; IterateBin--)
+        {
+            if (g_IntervalIndex[IterateBin] && (g_IntervalIndex[IterateBin]->Start < Start || (g_IntervalIndex[IterateBin]->Start == Start && g_IntervalIndex[IterateBin]->End < End)))
+            {
+                break;
+            }
+            g_IntervalIndex[IterateBin] = NewInterval;
+        }
+        return NewInterval;
+    }
+    //if (!FirstInterval)
+    //{
+    //    NewInterval = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+    //    NewInterval->Occurrences = 1;
+    //    NewInterval->Start = StartPos;
+    //    NewInterval->End = EndPos;
+    //    NewInterval->Flags = Flags;
+    //    FirstInterval = NewInterval;
+    //    LastInterval = NewInterval;
+    //    return NewInterval;
+    //}
+    //// After this loop, Interval is the last one before the new guy (or NULL, if the new guy
+    //// belongs at the start of the list), and NextInterval is the first one after the
+    //// new guy (or NULL, if the new guy belongs at the end of the list).
+    //for (Interval = LastInterval; Interval; Interval = Interval->Prev)
+    //{
+    //    if (Interval->Start == StartPos)
+    //    {
+    //        if (Interval->End > EndPos)
+    //        {
+    //            NextInterval = Interval;
+    //            continue;
+    //        }
+    //        if (Interval->End == EndPos)
+    //        {
+    //            Interval->Occurrences++;
+    //            Interval->Flags |= Flags;
+    //            return Interval;
+    //        }
+    //        break;
+    //    }
+    //    if (Interval->Start < StartPos)
+    //    {
+    //        break;
+    //    }
+    //    NextInterval = Interval;
+    //}
+
+    //NewInterval = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+    //NewInterval->Occurrences = 1;
+    //NewInterval->Start = StartPos;
+    //NewInterval->End = EndPos;
+    //NewInterval->Flags = Flags;
+
+    //if (!Interval)
+    //{
+    //    FirstInterval->Prev = NewInterval;
+    //    NewInterval->Next = FirstInterval;
+    //    FirstInterval = NewInterval;
+    //    return NewInterval;
+    //}
+    //Interval->Next = NewInterval;
+    //NewInterval->Prev = Interval;
+    //NewInterval->Next = NextInterval;
+    //if (NextInterval)
+    //{
+    //    NextInterval->Prev = NewInterval;
+    //}
+    //else
+    //{
+    //    LastInterval = NewInterval;
+    //}
+    //return NewInterval;
+    
+}
+
+// Link forward from interval A to interval B.
+void LinkIntervals(IntervalNode* A, IntervalNode* B, int Count, float Score)
+{
+    EdgeNode* OldEdge;
+    EdgeNode* NewEdge;
+    int Linked;
+    //
+    Linked = 0;
+    for (OldEdge = A->FirstForward; OldEdge; OldEdge = OldEdge->Next)
+    {
+        if (OldEdge->Interval == B)
+        {
+            OldEdge->Count += Count;
+            Linked = 1;
+        }
+    }
+    if (!Linked)
+    {
+        NewEdge = (EdgeNode*)calloc(sizeof(EdgeNode), 1);
+        NewEdge->Count = Count;
+        NewEdge->Score = Score;
+        NewEdge->Interval = B;
+        if (!A->FirstForward)
+        {
+            A->FirstForward = NewEdge;
+        }
+        else
+        {
+            A->LastForward->Next = NewEdge;
+            NewEdge->Prev = A->LastForward;
+        }
+        A->LastForward = NewEdge;
+    }
+    Linked = 0;
+    for (OldEdge = B->FirstBack; OldEdge; OldEdge = OldEdge->Next)
+    {
+        if (OldEdge->Interval == A)
+        {
+            OldEdge->Count += Count;
+            Linked = 1;
+        }
+    }
+    if (!Linked)
+    {
+        NewEdge = (EdgeNode*)calloc(sizeof(EdgeNode), 1);
+        NewEdge->Count = Count;
+        NewEdge->Score = Score;
+        NewEdge->Interval = A;
+        if (!B->FirstBack)
+        {
+            B->FirstBack = NewEdge;
+        }
+        else
+        {
+            B->LastBack->Next = NewEdge;
+            NewEdge->Prev = B->LastBack;
+        }
+        B->LastBack = NewEdge;
+    }
+}
+
+// Copied-and-modified from ParseIntervalsESTBinaryFile.
+// Parse intervals from a binary file, with reading-frame flags attached.
+void ParseIntervalsGeneFindBinaryFile(char* FileName)
+{
+    FILE* File;
+    IntervalNode* Interval;
+    IntervalNode* BackInterval;
+    int Start;
+    int End;
+    int BytesRead;
+    int Score;
+    int Flags;
+    int FilePos = 0;
+    int TotalIntervals = 0;
+    int JunctionCount = 0;
+    int JunctionIndex;
+    int JunctionStart;
+    float JunctionScore;
+    int TotalJunctions = 0;
+    int BackIntervalFound;
+    //
+    File = fopen(FileName, "rb");
+    if (!File)
+    {
+        printf("** Error in ParseIntervalsBinaryFile: Can't open '%s'\n", FileName);
+        return;
+    }
+    while (1)
+    {
+
+        BytesRead = ReadBinary(&Start, sizeof(int), 1, File);
+        if (!BytesRead)
+        {
+            break;
+        }
+        FilePos += BytesRead;
+        BytesRead += ReadBinary(&End, sizeof(int), 1, File);
+        // Sanity check:
+        if (Start<0 || End<0 || End<=Start)
+        {
+            printf("** BARF: Gene finder output reports interval from %d to %d!\n", Start, End);
+        }
+        BytesRead += ReadBinary(&Flags, sizeof(int), 1, File);
+        BytesRead += ReadBinary(&Score, sizeof(int), 1, File);
+        
+        Interval = AddInterval(Start, End, Flags);
+        TotalIntervals++;
+        //FilePos += ReadBinary(&Interval->Occurrences, sizeof(int), 1, File);
+        FilePos += ReadBinary(&JunctionCount, sizeof(int), 1, File);
+        // Read a list of junctions that END at this interval.
+        for (JunctionIndex = 0; JunctionIndex < JunctionCount; JunctionIndex++)
+        {
+            FilePos += ReadBinary(&JunctionStart, sizeof(int), 1, File);
+            //FilePos += ReadBinary(&JunctionOccurrences, sizeof(int), 1, File);
+            FilePos += ReadBinary(&JunctionScore, sizeof(float), 1, File);
+            // Right here is where we filter crummy splice junctions:
+            //if (JunctionOccurrences < 2 && JunctionScore < SPLICE_SIGNAL_SCORE_CUTOFF)
+            //{
+            //    continue;
+            //}
+            TotalJunctions++;
+            // Find an interval which ends at the junction's splice point:
+            BackIntervalFound = 0;
+            for (BackInterval = Interval->Prev; BackInterval; BackInterval = BackInterval->Prev)
+            {
+                if (BackInterval->End == JunctionStart)
+                {
+                    BackIntervalFound = 1;
+                    LinkIntervals(BackInterval, Interval, 1, JunctionScore);
+                    break;
+                }
+            }
+            if (!BackIntervalFound)
+            {
+                printf("** Warning: Found a junction with no back-interval!\n");
+                printf("  Junction %d %f\n", JunctionStart,  JunctionScore);
+                printf("  Interval %d-%d\n", Interval->Start, Interval->End);
+            }
+        }
+    }
+    fclose(File);
+    printf("Read %d intervals, %d junctions.\n", TotalIntervals, TotalJunctions);
+}
+
+// Parse intervals from a binary file containing Interval records. Each Interval
+// record may contain a list of Junction records.
+// Interval record: IntervalStart, IntervalEnd, IntervalCount, JunctionCount
+// Junction record: Start, Count, Score
+// The list contains all junctions that END at the START of this interval.  (That way,
+// we can look *back* through the list to find the splice donor)
+// We can filter any junctions that don't have a good occurrence-Count or a good 
+// consensus splice signal Score.
+// Note that EST intervals have no particular reading frame specified.
+void ParseIntervalsESTBinaryFile(char* FileName)
+{
+    FILE* File;
+    IntervalNode* Interval;
+    IntervalNode* BackInterval;
+    int Start;
+    int End;
+    int BytesRead;
+    int FilePos = 0;
+    int TotalIntervals = 0;
+    int JunctionCount;
+    int JunctionIndex;
+    int JunctionStart;
+    int JunctionOccurrences;
+    float JunctionScore;
+    int BackIntervalFound;
+    int TotalJunctions = 0;
+    //
+    File = fopen(FileName, "rb");
+    if (!File)
+    {
+        printf("** Error in ParseIntervalsBinaryFile: Can't open '%s'\n", FileName);
+        return;
+    }
+    while (1)
+    {
+
+        BytesRead = ReadBinary(&Start, sizeof(int), 1, File);
+        if (!BytesRead)
+        {
+            break;
+        }
+        FilePos += BytesRead;
+        BytesRead += ReadBinary(&End, sizeof(int), 1, File);
+        Interval = AddInterval(Start, End, IFLAG_ALL_READING_FRAMES);
+        TotalIntervals++;
+        FilePos += ReadBinary(&Interval->Occurrences, sizeof(int), 1, File);
+        FilePos += ReadBinary(&JunctionCount, sizeof(int), 1, File);
+        for (JunctionIndex = 0; JunctionIndex < JunctionCount; JunctionIndex++)
+        {
+            FilePos += ReadBinary(&JunctionStart, sizeof(int), 1, File);
+            FilePos += ReadBinary(&JunctionOccurrences, sizeof(int), 1, File);
+            FilePos += ReadBinary(&JunctionScore, sizeof(float), 1, File);
+            // Right here is where we filter crummy splice junctions:
+            if (JunctionOccurrences < 2 && JunctionScore < SPLICE_SIGNAL_SCORE_CUTOFF)
+            {
+                continue;
+            }
+            TotalJunctions++;
+            // Find an interval which ends at the junction's splice point:
+            BackIntervalFound = 0;
+            for (BackInterval = Interval->Prev; BackInterval; BackInterval = BackInterval->Prev)
+            {
+                if (BackInterval->End == JunctionStart)
+                {
+                    BackIntervalFound = 1;
+                    LinkIntervals(BackInterval, Interval, JunctionOccurrences, JunctionScore);
+                    break;
+                }
+            }
+            if (!BackIntervalFound)
+            {
+                printf("** Warning: Found a junction with no back-interval!\n");
+                printf("  Junction %d %d %f\n", JunctionStart, JunctionOccurrences, JunctionScore);
+                printf("  Interval %d-%d\n", Interval->Start, Interval->End);
+            }
+        }
+    }
+    fclose(File);
+    printf("Read %d intervals, %d junctions.\n", TotalIntervals, TotalJunctions);
+}
+
+// B inherits all backward links from A.
+// before:           after:
+//  C<->A           C<-\   A
+//      B               \->B
+void AssimilateLinksBack(IntervalNode* A, IntervalNode* B)
+{
+    EdgeNode* NodeA;
+    EdgeNode* PrevA;
+    EdgeNode* NodeB;
+    EdgeNode* NodeC;
+    EdgeNode* Next;
+    IntervalNode* C;
+    int Found;
+    int ACStrength = 0;
+    int BCStrength = 0;
+    //
+    
+    for (NodeA = A->FirstBack; NodeA; NodeA = NodeA->Next)
+    {
+        ACStrength = NodeA->Count;
+        BCStrength = 0;
+        // Ensure that B has a link to this target:
+        Found = 0;
+        for (NodeB = B->FirstBack; NodeB; NodeB = NodeB->Next)
+        {
+            if (NodeB->Interval == NodeA->Interval)
+            {
+                BCStrength = NodeB->Count;
+                //NodeB->Count += ACStrength; // Counts are already full
+                Found = 1;
+                break;
+            }
+        }
+        // If B didn't already link back to the target, add an EdgeNode to B's list:
+        if (!Found)
+        {
+            NodeB = (EdgeNode*)calloc(sizeof(EdgeNode), 1);
+            NodeB->Interval = NodeA->Interval;
+            NodeB->Count = ACStrength;
+            if (B->LastBack)
+            {
+                NodeB->Prev = B->LastBack;
+                B->LastBack->Next = NodeB;
+            }
+            else
+            {
+                B->FirstBack = NodeB;
+            }
+            B->LastBack = NodeB;
+        }
+        // Switch C to point to B.  It's possible that C has a pointer to B already, in which case
+        // we'll free the old one.
+        C = NodeA->Interval;
+        for (NodeC = C->FirstForward; NodeC; NodeC = NodeC->Next)
+        {
+            if (NodeC->Interval == B)
+            {
+                //FoundCount += NodeC->Count;
+                Next = NodeC->Next;
+                if (Next)
+                {
+                    Next->Prev = NodeC->Prev;
+                }
+                if (NodeC->Prev)
+                {
+                    NodeC->Prev->Next = Next;
+                }
+                if (C->FirstForward == NodeC)
+                {
+                    C->FirstForward = NodeC->Next;
+                }
+                if (C->LastForward == NodeC)
+                {
+                    C->LastForward = NodeC->Prev;
+                }
+                SafeFree(NodeC);
+                NodeC = Next;
+                if (!NodeC)
+                {
+                    break;
+                }
+            }
+        }
+        Found = 0;
+        for (NodeC = C->FirstForward; NodeC; NodeC= NodeC->Next)
+        {
+            if (NodeC->Interval == A)
+            {
+                //NodeC->Count += FoundCount;
+                //NodeC->Count = ACStrength + BCStrength; // Counts are already full
+                NodeC->Count = ACStrength;
+                NodeC->Interval = B;
+                Found = 1;
+            }
+        }
+        if (!Found)
+        {
+            printf("*** Corruption!  %d-%d should link forward to %d-%d\n", C->Start, C->End, A->Start, A->End);
+        }
+    }
+    
+    // Free the old nodes:
+    PrevA = NULL;
+    for (NodeA = A->FirstBack; NodeA; NodeA = NodeA->Next)
+    {
+        SafeFree(PrevA);
+        PrevA = NodeA;
+    }
+    SafeFree(PrevA);
+    A->FirstBack = NULL;
+    A->LastBack = NULL;
+
+}
+
+// B inherits all the forward links from A.
+void AssimilateLinksForward(IntervalNode* A, IntervalNode* B)
+{
+    EdgeNode* NodeA;
+    EdgeNode* PrevA;
+    EdgeNode* NodeB;
+    EdgeNode* NodeC;
+    EdgeNode* Next;
+    IntervalNode* C;
+    int Found;
+    int ACStrength = 0;
+    int BCStrength = 0;
+    //
+    for (NodeA = A->FirstForward; NodeA; NodeA = NodeA->Next)
+    {
+        ACStrength = NodeA->Count;
+        BCStrength = 0;
+        // Ensure that B has a link to this target:
+        Found = 0;
+        for (NodeB = B->FirstForward; NodeB; NodeB = NodeB->Next)
+        {
+            if (NodeB->Interval == NodeA->Interval)
+            {
+                BCStrength = NodeB->Count;
+                //NodeB->Count += ACStrength; // Counts are already full
+                Found = 1;
+                break;
+            }
+        }
+        if (!Found)
+        {
+            NodeB = (EdgeNode*)calloc(sizeof(EdgeNode), 1);
+            NodeB->Interval = NodeA->Interval;
+            NodeB->Count = ACStrength;
+            if (B->LastForward)
+            {
+                NodeB->Prev = B->LastForward;
+                B->LastForward->Next = NodeB;
+            }
+            else
+            {
+                B->FirstForward = NodeB;
+            }
+            B->LastForward = NodeB;
+        }
+        // Switch C to point to B.  It's possible that C has a pointer to B already, in which case
+        // we'll free the old one.
+        C = NodeA->Interval;
+        for (NodeC = C->FirstBack; NodeC; NodeC= NodeC->Next)
+        {
+            if (NodeC->Interval == B)
+            {
+                //FoundCount += NodeC->Count;
+                Next = NodeC->Next;
+                if (Next)
+                {
+                    Next->Prev = NodeC->Prev;
+                }
+                if (NodeC->Prev)
+                {
+                    NodeC->Prev->Next = Next;
+                }
+                if (C->FirstBack == NodeC)
+                {
+                    C->FirstBack = NodeC->Next;
+                }
+                if (C->LastBack == NodeC)
+                {
+                    C->LastBack = NodeC->Prev;
+                }
+                SafeFree(NodeC);
+                NodeC = Next;
+                if (!NodeC)
+                {
+                    break;
+                }
+            }
+        }
+        Found = 0;
+        for (NodeC = C->FirstBack; NodeC; NodeC = NodeC->Next)
+        {
+            if (NodeC->Interval == A)
+            {
+                //NodeC->Count = ACStrength + BCStrength;// Counts are already full
+                NodeC->Count = ACStrength;
+                NodeC->Interval = B;
+                Found = 1;
+            }
+        }
+        if (!Found)
+        {
+            printf("*** Corruption!  %d-%d should link backward to %d-%d\n", C->Start, C->End, A->Start, A->End);
+        }
+    }
+    
+    // Free the old nodes:
+    PrevA = NULL;
+    for (NodeA = A->FirstForward; NodeA; NodeA = NodeA->Next)
+    {
+        SafeFree(PrevA);
+        PrevA = NodeA;
+    }
+    SafeFree(PrevA);
+    A->FirstForward = NULL;
+    A->LastForward = NULL;
+}
+void FreeIntervalDAG(IntervalNode* Interval)
+{
+    int DAGNodeIndex;
+    GenomeDAGNode* DAGNode;
+    GenomeDAGLink* Link;
+    GenomeDAGLink* PrevLink;
+    //
+    if (!Interval || !Interval->DAGNodes)
+    {
+        return;
+    }
+    for (DAGNodeIndex = 0; DAGNodeIndex < Interval->DAGNodeCount; DAGNodeIndex++)
+    {
+        DAGNode = Interval->DAGNodes + DAGNodeIndex;
+        SafeFree(DAGNode->Sequence);
+        SafeFree(DAGNode->Exons);
+        // Free links back:
+        PrevLink = NULL;
+        for (Link = DAGNode->FirstBack; Link; Link = Link->Next)
+        {
+            SafeFree(PrevLink);
+            PrevLink = Link;
+        }
+        SafeFree(PrevLink);
+        // Free links forward:
+        PrevLink = NULL;
+        for (Link = DAGNode->FirstForward; Link; Link = Link->Next)
+        {
+            SafeFree(PrevLink);
+            PrevLink = Link;
+        }
+        SafeFree(PrevLink);
+    }
+    SafeFree(Interval->DAGNodes);
+    Interval->DAGNodes = NULL;
+    Interval->DAGNodeCount = 0;
+}
+
+void FreeInterval(IntervalNode* Interval)
+{
+    //
+    FreeIntervalDAG(Interval);
+    FreeIntervalExons(Interval);
+
+    Interval->FirstForward = NULL;
+    Interval->FirstBack = NULL;
+    Interval->LastForward = NULL;
+    Interval->LastBack = NULL;
+    Interval->Start = -1;
+    Interval->End = -1;
+    SafeFree(Interval);
+}
+
+// Remove an interval from the master list.  And USUALLY, free the
+// interval and its edges.  If DontFree is true, then don't free
+// any memory yet.
+void RemoveInterval(IntervalNode* Interval, int DontFree)
+{
+    EdgeNode* Prev;
+    EdgeNode* Edge;
+    EdgeNode* NeighborEdge;
+    if (Interval == g_FirstInterval)
+    {
+        g_FirstInterval = Interval->Next;
+    }
+    if (Interval == g_LastInterval)
+    {
+        g_LastInterval = Interval->Prev;
+    }
+    if (Interval->Prev)
+    {
+        Interval->Prev->Next = Interval->Next;
+    }
+    if (Interval->Next)
+    {
+        Interval->Next->Prev = Interval->Prev;
+    }
+    if (!DontFree)
+    {
+        Prev = NULL;
+        Edge = Interval->FirstForward;
+        while (Edge)
+        {
+            SafeFree(Prev);
+            Prev = Edge;
+            // If someone points at us, free their pointer (to avoid corruption!)
+            for (NeighborEdge = Edge->Interval->FirstBack; NeighborEdge; NeighborEdge = NeighborEdge->Next)
+            {
+                if (NeighborEdge->Interval == Interval)
+                {
+                    if (Edge->Interval->FirstBack == NeighborEdge)
+                    {
+                        Edge->Interval->FirstBack = NeighborEdge->Next;
+                    }
+                    if (Edge->Interval->LastBack == NeighborEdge)
+                    {
+                        Edge->Interval->LastBack = NeighborEdge->Prev;
+                    }
+                    if (NeighborEdge->Prev)
+                    {
+                        NeighborEdge->Prev->Next = NeighborEdge->Next;
+                    }
+                    if (NeighborEdge->Next)
+                    {
+                        NeighborEdge->Next->Prev = NeighborEdge->Prev;
+                    }
+                    SafeFree(NeighborEdge);
+                    break;
+                }
+            }
+
+            Edge = Edge->Next;
+        }
+        SafeFree(Prev);
+        //
+        Prev = NULL;
+        Edge = Interval->FirstBack;
+        while (Edge)
+        {
+            SafeFree(Prev);
+            Prev = Edge;
+            // If someone points at us, free their pointer (to avoid corruption!)
+            for (NeighborEdge = Edge->Interval->FirstForward; NeighborEdge; NeighborEdge = NeighborEdge->Next)
+            {
+                if (NeighborEdge->Interval == Interval)
+                {
+                    if (Edge->Interval->FirstForward == NeighborEdge)
+                    {
+                        Edge->Interval->FirstForward = NeighborEdge->Next;
+                    }
+                    if (Edge->Interval->LastForward == NeighborEdge)
+                    {
+                        Edge->Interval->LastForward = NeighborEdge->Prev;
+                    }
+                    if (NeighborEdge->Prev)
+                    {
+                        NeighborEdge->Prev->Next = NeighborEdge->Next;
+                    }
+                    if (NeighborEdge->Next)
+                    {
+                        NeighborEdge->Next->Prev = NeighborEdge->Prev;
+                    }
+                    SafeFree(NeighborEdge);
+                    break;
+                }
+            }
+            Edge = Edge->Next;
+        }
+        SafeFree(Prev);
+        FreeInterval(Interval);
+    }
+}
+
+// Merge all redundant intervals.  Intervals which overlap, and
+// have no incompatible edges, can be merged into one large(r) interval.
+// The merged interval inherits all reading frames of the subintervals.
+// (This could add some redundancy, but not much, especially if we later
+// prune short ORFs)
+void MergeIntervals()
+{
+    IntervalNode* MergeA;
+    IntervalNode* NextMergeA;
+    IntervalNode* MergeB;
+    int MergePerformed = 0;
+    int TotalMergesPerformed = 0;
+    //
+    
+    NextMergeA = g_FirstInterval;
+    while (1)
+    {
+        
+        MergeA = NextMergeA;
+        if (!MergeA)
+        {
+            break;
+        }
+        MergePerformed = 0;
+        MergeB = MergeA->Next;
+        while (1)
+        {
+            if (MergePerformed)
+            {
+                TotalMergesPerformed++;
+                //DebugPrintIntervals(-1. 0);
+                break;
+            }
+            if (!MergeB || MergeB->Start > MergeA->End)
+            {
+                NextMergeA = MergeA->Next;
+                break;
+            }
+            // Case 0: Identical!
+            if (MergeA->Start == MergeB->Start && MergeA->End == MergeB->End)
+            {
+                //printf("%% [0] Merge two identical intervals %d-%d\n", MergeA->Start, MergeA->End);
+                AssimilateLinksBack(MergeB, MergeA);
+                AssimilateLinksForward(MergeB, MergeA);
+                MergeA->Occurrences += MergeB->Occurrences;
+                MergeA->Flags |= MergeB->Flags;
+                RemoveInterval(MergeB, 0);
+                MergePerformed = 1;
+            }
+            // Case 1: Same starting point, A doesn't link forward:
+            else if (MergeA->Start == MergeB->Start && !MergeA->FirstForward)
+            {
+                //printf("%% [1] Same starting point: %d-%d, %d-%d\n", MergeA->Start, MergeA->End, MergeB->Start, MergeB->End);
+                AssimilateLinksBack(MergeA, MergeB);
+                MergeB->Occurrences += MergeA->Occurrences;
+                MergeB->Flags |= MergeA->Flags;
+                NextMergeA = MergeA->Next;
+                RemoveInterval(MergeA, 0);
+                MergePerformed = 1;
+            }
+            // Case 2: Same ending point, B doesn't link backward:
+            else if (MergeA->End == MergeB->End && !MergeB->FirstBack)
+            {
+                //printf("%% [2] Same ending point: %d-%d, %d-%d\n", MergeA->Start, MergeA->End, MergeB->Start, MergeB->End);
+                AssimilateLinksForward(MergeB, MergeA);
+                MergeA->Occurrences += MergeB->Occurrences;
+                MergeA->Flags |= MergeB->Flags;
+                NextMergeA = MergeA;
+                RemoveInterval(MergeB, 0);
+                MergePerformed = 1;
+            }
+            // Case 3: Full overlap, no links in B:
+            else if (MergeA->Start < MergeB->Start && MergeB->End < MergeA->End && !MergeB->FirstBack && !MergeB->FirstForward)
+            {
+                //printf("%% [3] full overlap: %d-%d, %d-%d\n", MergeA->Start, MergeA->End, MergeB->Start, MergeB->End);
+                MergeA->Occurrences += MergeB->Occurrences;
+                MergeA->Flags |= MergeB->Flags;
+                NextMergeA = MergeA;
+                RemoveInterval(MergeB, 0);
+                MergePerformed = 1;
+            }
+            // Case 4: 'proper' overlap, A no forward, B no backward:
+            else if (MergeB->Start > MergeA->Start && MergeB->End > MergeA->End && !MergeA->FirstForward && !MergeB->FirstBack)
+            {
+                //printf("%% [4] Proper overlap: %d-%d, %d-%d\n", MergeA->Start, MergeA->End, MergeB->Start, MergeB->End);
+                MergeA->End = MergeB->End;
+                AssimilateLinksForward(MergeB, MergeA);
+                MergeA->Occurrences += MergeB->Occurrences;
+                MergeA->Flags |= MergeB->Flags;
+                NextMergeA = MergeA;
+                RemoveInterval(MergeB, 0);
+                MergePerformed = 1;
+            }
+            else
+            {
+                // Default case: Non-mergable
+                MergeB = MergeB->Next;
+            }
+        }
+    } // Iterate MergeA
+    printf("Performed a total of %d merges.\n", TotalMergesPerformed);
+}
+
+// If two intervals intersect, then we don't want to store separate exons for each one.  That would be a lot of 
+// redundant sequence data!  So, after calling MergeIntervals, we call IntersectIntervals().  
+// The routine IntersectIntervals will produce a (near-)minimal disjoint set of intervals covering all the 
+// ESTs and splice boundaries we've ever seen.  The intersection interval inherits all reading frames from its
+// parents.
+void IntersectIntervals()
+{
+    IntervalNode* A;
+    IntervalNode* NextA;
+    IntervalNode* B;
+    IntervalNode* C;
+    IntervalNode* D;
+    int IntersectPerformed = 0;
+    //
+    
+    NextA = g_FirstInterval;
+    while (1)
+    {
+        A = NextA;
+        if (!A)
+        {
+            break;
+        }
+        if (IntersectPerformed)
+        {
+            //DebugPrintIntervals(1, 0);
+        }
+        IntersectPerformed = 0;
+        B = A->Next;
+        if (!B)
+        {
+            break;
+        }
+        // Easy case: B starts after A ends.  No intersection required:
+        if (B->Start >= A->End)
+        {
+            NextA = A->Next;
+            continue;
+        }
+        if (B->Start == A->Start && B->End == A->End)
+        {
+            //printf("%d-%d is identical to %d-%d\n", A->Start, A->End, B->Start, B->End);
+            AssimilateLinksForward(B, A);
+            AssimilateLinksBack(B, A);
+            A->Occurrences += B->Occurrences;
+            A->Flags |= B->Flags;
+            NextA = A;
+            RemoveInterval(B, 0);
+            IntersectPerformed = 1;
+            continue;
+        }
+        if (B->Start == A->Start)
+        {
+            // |----| A    
+            // |-----------| B
+            //
+            // |----|------|
+            //   A     C
+            //printf("%d-%d has same START as %d-%d\n", A->Start, A->End, B->Start, B->End);
+            C = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+            C->Start = A->End;
+            C->End = B->End;
+            C->Occurrences = B->Occurrences;
+            C->Flags = B->Flags;
+            A->Flags |= B->Flags;
+            AssimilateLinksForward(B, C);
+            AssimilateLinksBack(B, A);
+            LinkIntervals(A, C, 0, 0);
+            RemoveInterval(B, 0);
+            C = InsertIntervalAfter(C, A);
+            NextA = A;
+            IntersectPerformed = 1;
+            continue;
+        }
+        if (B->End == A->End)
+        {
+            // |-----------| A    
+            //       |-----| B
+            //
+            // |----|------|
+            //   C     B
+
+            //printf("%d-%d has same END as %d-%d\n", A->Start, A->End, B->Start, B->End);
+            NextA = A->Prev;
+            C = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+            C->Start = A->Start;
+            C->End = B->Start;
+            C->Occurrences = A->Occurrences;
+            C->Flags = A->Flags;
+            B->Flags |= A->Flags;
+            AssimilateLinksForward(A, B);
+            AssimilateLinksBack(A, C);
+            LinkIntervals(C, B, 0, 0);
+            RemoveInterval(A, 0);
+            C = InsertIntervalBefore(C, B);
+            if (!NextA)
+            {
+                NextA = g_FirstInterval;
+            }
+            IntersectPerformed = 1;
+            continue;
+        }
+        // |---------------|
+        //       |---|
+        //
+        // |-----|---|-----|
+        //    C    B    D
+        if (B->End < A->End)
+        {
+            //printf("%d-%d CONTAINS %d-%d\n", A->Start, A->End, B->Start, B->End);
+            C = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+            C->Start = A->Start;
+            C->End = B->Start;
+            C->Flags = A->Flags;
+            D = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+            D->Start = B->End;
+            D->End = A->End;
+            D->Flags = A->Flags;
+            B->Flags |= A->Flags;
+            AssimilateLinksBack(A, C);
+            AssimilateLinksForward(A, D);
+            LinkIntervals(C, B, 0, 0);
+            LinkIntervals(B, D, 0, 0);
+            C = InsertIntervalBefore(C, B);
+            D = InsertIntervalAfter(D, B);
+            RemoveInterval(A, 0);
+            NextA = C;
+            IntersectPerformed = 1;
+            continue;
+        }
+        // |-------------| A
+        //          |---------| B
+        //
+        // |--------|----|----|
+        //     C      B     D
+        //printf("%d-%d has PROPER OVERLAP with %d-%d\n", A->Start, A->End, B->Start, B->End);
+        C = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+        C->Start = A->Start;
+        C->End = B->Start;
+        C->Occurrences = A->Occurrences;
+        C->Flags = A->Flags;
+        D = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+        D->Start = A->End;
+        D->End = B->End;
+        D->Occurrences = B->Occurrences;
+        D->Flags = B->Flags;
+        B->Flags |= A->Flags;
+        //B2 = (IntervalNode*)calloc(sizeof(IntervalNode), 1);
+        AssimilateLinksBack(A, C);
+        AssimilateLinksForward(B, D);
+        AssimilateLinksForward(A, B);
+        LinkIntervals(C, B, 0, 0);
+        LinkIntervals(B, D, 0, 0);
+        C = InsertIntervalBefore(C, B);
+        D = InsertIntervalAfter(D, B);
+        RemoveInterval(B, 1);
+        B->End = A->End;
+        RemoveInterval(A, 0);
+        B = InsertIntervalBefore(B, D);
+        NextA = C;
+        IntersectPerformed = 1;
+        continue;
+    }
+}
+
+// Insert Interval into the master list.  It comes after After.
+IntervalNode* InsertIntervalAfter(IntervalNode* Interval, IntervalNode* After)
+{
+    IntervalNode* Node;
+    //
+    Node = After;
+    if (!Node)
+    {
+        Node = g_FirstInterval;
+    }
+    while (Node)
+    {
+        if (Node->Start > Interval->Start)
+        {
+            break;
+        }
+        if (Node->Start == Interval->Start)
+        {
+            if (Node->End == Interval->End)
+            {
+                AssimilateLinksForward(Interval, Node);
+                AssimilateLinksBack(Interval, Node);
+                Node->Occurrences += Interval->Occurrences;
+                SafeFree(Interval);
+                return Node;
+            }
+            if (Node->End > Interval->End)
+            {
+                break;
+            }
+        }
+        Node = Node->Next;
+    }
+    // At this point, Node is the guy that Interval will be inserted before:
+    if (!Node)
+    {
+        g_LastInterval->Next = Interval;
+        Interval->Prev = g_LastInterval;
+        g_LastInterval = Interval;
+    }
+    else
+    {
+        if (Node->Prev)
+        {
+            Node->Prev->Next = Interval;
+        }
+        Interval->Prev = Node->Prev;
+        Interval->Next = Node;
+        Node->Prev = Interval;
+    }
+    return Interval;
+}
+
+// Insert Interval into the master list.  It comes before Before.
+IntervalNode* InsertIntervalBefore(IntervalNode* Interval, IntervalNode* Before)
+{
+    IntervalNode* Node;
+    //
+    Node = Before;
+    if (!Node)
+    {
+        Node = g_LastInterval;
+    }
+    while (Node)
+    {
+        if (Node->Start < Interval->Start)
+        {
+            break;
+        }
+        if (Node->Start == Interval->Start)
+        {
+            if (Node->End == Interval->End)
+            {
+                AssimilateLinksForward(Interval, Node);
+                AssimilateLinksBack(Interval, Node);
+                Node->Occurrences += Interval->Occurrences;
+                SafeFree(Interval);
+                return Node;
+            }
+            if (Node->End < Interval->End)
+            {
+                break;
+            }
+        }
+        Node = Node->Prev;
+    }
+    // At this point, Node is the guy that Interval will be inserted after:
+    if (!Node)
+    {
+        g_FirstInterval->Prev = Interval;
+        Interval->Next = g_FirstInterval;
+        g_FirstInterval = Interval;
+    }
+    else
+    {
+        if (Node->Next)
+        {
+            Node->Next->Prev = Interval;
+        }
+        Interval->Next = Node->Next;
+        Interval->Prev = Node;
+        Node->Next = Interval;
+    }
+    return Interval;
+}
+
+// Add Interval to the current gene sometime after Start
+GeneNode* AddIntervalToGeneAfter(GeneNode* Start, IntervalNode* Interval)
+{
+    GeneNode* Node;
+    GeneNode* NewNode;
+    //
+    for (Node = Start; Node; Node = Node->Next)
+    {
+        if (Node->Interval->Start == Interval->Start)
+        {
+            // Already on list, good.
+            return Node;
+        }
+        if (Node->Interval->Start > Interval->Start)
+        {
+            NewNode = (GeneNode*)calloc(sizeof(GeneNode), 1);
+            NewNode->Interval = Interval;
+            Interval->GNode = NewNode;
+            if (Node->Prev)
+            {
+                Node->Prev->Next = NewNode;
+                NewNode->Prev = Node->Prev;
+            }
+            NewNode->Next = Node;
+            Node->Prev = NewNode;
+            GeneNodeCount++;
+            return NewNode;
+        }
+    }
+    // We ran off the edge of the list without seeing something that comes after the new interval.
+    // So, the new interval becomes the last one of the gene:
+    NewNode = (GeneNode*)calloc(sizeof(GeneNode), 1);
+    NewNode->Interval = Interval;
+    Interval->GNode = NewNode;
+    NewNode->Prev = g_GeneLast;
+    g_GeneLast->Next = NewNode;
+    g_GeneLast = NewNode;
+    GeneNodeCount++;
+    return NewNode;
+}
+
+// Add Interval to the current gene sometime before Start
+GeneNode* AddIntervalToGeneBefore(GeneNode* Start, IntervalNode* Interval)
+{
+    GeneNode* Node;
+    GeneNode* NewNode;
+    //
+    for (Node = Start; Node; Node = Node->Prev)
+    {
+        if (Node->Interval->Start == Interval->Start)
+        {
+            // Already on list, good.
+            return Node;
+        }
+        if (Node->Interval->Start < Interval->Start)
+        {
+            NewNode = (GeneNode*)calloc(sizeof(GeneNode), 1);
+            NewNode->Interval = Interval;
+            Interval->GNode = NewNode;
+            if (Node->Next)
+            {
+                Node->Next->Prev = NewNode;
+                NewNode->Next = Node->Next;
+            }
+            NewNode->Prev = Node;
+            Node->Next = NewNode;
+            GeneNodeCount++;
+            return NewNode;
+        }
+    }
+    // We ran off the edge of the list without seeing something that comes after the new interval.
+    // So, the new interval becomes the first one of the gene:
+    NewNode = (GeneNode*)calloc(sizeof(GeneNode), 1);
+    NewNode->Interval = Interval;
+    Interval->GNode = NewNode;
+    NewNode->Next = g_GeneFirst;
+    g_GeneFirst->Prev = NewNode;
+    g_GeneFirst = NewNode;
+    GeneNodeCount++;
+    return NewNode;
+}
+
+// Add new GeneNodes to handle any peptides that start in Node->Interval and extend forward
+// GNode is the bookmark where we started the satisfaction effort, so when (if) we insert new nodes,
+// we'll insert
+int SatisfyIntervalForward(GeneNode* GNode, int CharsSoFar)
+{
+    EdgeNode* Edge;
+    int Chars;
+    GeneNode* SubGNode;
+    int RX;
+    int MinRX;
+    //
+    // If this node has already been satisfied, then return immediately:
+    if (GNode->RX + CharsSoFar > 60)
+    {
+        return GNode->RX;
+    }
+    MinRX = 9999;
+    // Iterate over all 'forward intervals' that this interval links to:
+    for (Edge = GNode->Interval->FirstForward; Edge; Edge = Edge->Next)
+    {
+        // Find (or create) the GeneNode for the forward interval:
+        SubGNode = Edge->Interval->GNode;
+        if (!SubGNode)
+        {
+            SubGNode = AddIntervalToGeneAfter(GNode, Edge->Interval);
+        }
+        RX = Edge->Interval->End - Edge->Interval->Start;
+        Chars = CharsSoFar + (Edge->Interval->End - Edge->Interval->Start);
+        if (Chars < 60)
+        {
+            // We're not yet satisfied along this path, so continue adding intervals:
+            RX += SatisfyIntervalForward(SubGNode, Chars);
+        }
+        MinRX = min(MinRX, RX);
+    }
+    // Sanity check: RX cannot decrease when you add more intervals, it can only improve.
+    if (MinRX < GNode->RX)
+    {
+        printf("%d < %d???\n", MinRX, GNode->RX);
+    }
+    GNode->RX = MinRX;
+    return MinRX;
+}
+
+// Add new GeneNodes to handle any peptides that start in Node->Interval and extend backward
+int SatisfyIntervalBack(GeneNode* GNode, int CharsSoFar)
+{
+    EdgeNode* Edge;
+    int Chars;
+    GeneNode* SubGNode;
+    int LX;
+    int MinLX;
+    //
+    if (GNode->LX + CharsSoFar > 60)
+    {
+        return GNode->LX;
+    }
+    MinLX = 9999;
+    for (Edge = GNode->Interval->FirstBack; Edge; Edge = Edge->Next)
+    {
+        SubGNode = Edge->Interval->GNode;
+        if (!SubGNode)
+        {
+            SubGNode = AddIntervalToGeneBefore(GNode, Edge->Interval);
+        }
+        LX = Edge->Interval->End - Edge->Interval->Start;
+        Chars = CharsSoFar + (Edge->Interval->End - Edge->Interval->Start);
+        if (Chars < 60)
+        {
+            LX += SatisfyIntervalBack(SubGNode, Chars);
+        }
+        MinLX = min(MinLX, LX);
+    }
+    if (MinLX < GNode->LX)
+    {
+        printf("%d < %d???\n", MinLX, GNode->LX);
+    }
+
+    GNode->LX = MinLX;
+    return MinLX;
+
+}
+
+// Free all the GeneNode instances in the global list.
+void FreeGeneNodes()
+{
+    GeneNode* Prev;
+    GeneNode* Node;
+    // Free all the gene nodes:
+    Prev = NULL;
+    for (Node = g_GeneFirst; Node; Node = Node->Next)
+    {
+        Node->Interval->GNode = NULL;
+        SafeFree(Prev);
+        Prev = Node;
+    }
+    SafeFree(Prev);
+    g_GeneFirst = NULL;
+    g_GeneLast = NULL;
+    GeneNodeCount = 0;
+}
+
+// Take this interval, and 'satisfy' it by adding linked intervals until (a) there are 
+// no more links to follow, or (b) we have extended a considerable distance (in amino acids!).
+// Take the resulting pool of intervals, build exons for them, and write out one 
+// "gene" record.  
+int SatisfyIntervalAndWriteGene(IntervalNode* NextUnsatisfied, FILE* SequenceFile, FILE* OutputFile, 
+    int RecordNumber, int ChromosomeNumber, int ReverseFlag, int MinORFLength)
+{
+    GeneNode* Node;
+    GeneNode* Dissatisfied;
+    char GeneName[256];
+    int AllSatisfied;
+    char DirectionChar;
+    int ValidGeneFlag;
+    //
+
+    // First gene node wraps NextUnsatisfied:
+    g_GeneFirst = (GeneNode*)calloc(sizeof(GeneNode), 1);
+    g_GeneFirst->Interval = NextUnsatisfied;
+    NextUnsatisfied->GNode = g_GeneFirst;
+    g_GeneLast = g_GeneFirst;
+    GeneNodeCount = 1;
+
+    // Add the necessary gene nodes to satisfy:
+    Node = g_GeneFirst;
+    SatisfyIntervalForward(Node, 0);
+    SatisfyIntervalBack(Node, 0);
+    Node->Interval->Satisfied = 1;
+    // Iterate: If there are unsatisfied intervals in the gene, and the gene isn't too large, satisfy some more.
+    AllSatisfied = 0;
+    while (GeneNodeCount < MAX_INTERVALS_PER_GENE)
+    {
+        Dissatisfied = NULL;
+        // Find the first not-yet-satisfied interval:
+        for (Node = g_GeneFirst; Node; Node = Node->Next)
+        {
+            if (!Node->Interval->Satisfied)
+            {
+                Dissatisfied = Node;
+                break;
+            }
+        }
+        if (!Dissatisfied)
+        {
+            AllSatisfied = 1;
+            break; // Done!
+        }
+        SatisfyIntervalForward(Dissatisfied, 0);
+        SatisfyIntervalBack(Dissatisfied, 0);
+        Dissatisfied->Interval->Satisfied = 1;
+    }
+    if (!AllSatisfied)
+    {
+        g_StatsIncompleteGeneCount++;
+        IntegrityCheckGene();
+    }
+
+    // Write this gene out:
+    if (ReverseFlag)
+    {
+        DirectionChar = '-';
+    }
+    else
+    {
+        DirectionChar = '+';
+    }
+    sprintf(GeneName, "%d%c Gene %d, %d-%d", ChromosomeNumber, DirectionChar, RecordNumber, g_GeneFirst->Interval->Start, g_GeneLast->Interval->End);
+    // *************************
+    ValidGeneFlag = BuildAndWriteExons(SequenceFile, OutputFile, ReverseFlag, GeneName, ChromosomeNumber, MinORFLength);
+
+    FreeGeneNodes();
+    return ValidGeneFlag;
+}
+
+
+// Once the master interval list has been prepared, we can write out genes.
+// The procedure works like this:
+// We'll build a linked list of GeneNode structs, from g_GeneFirst to g_GeneLast, with size GeneNodeCount.  The intervals
+//   contained in this list of GeneNodes are what we'll write out as a gene record.
+// Iterate:
+// - Take the first interval not yet satisfied, A.  
+// - Find all neighbors necessary in order to satisfy A, and add them to the gene.
+// - Iterate:
+// -- If the gene contains too many intervals, stop.
+// -- If every interval in the gene has now been satisfied, stop.
+// -- Otherwise, take the first interval in the gene which has not yet been flagged satisfied, and add the necessary 
+//    intervals to satisfy it.  (It's possible that we already have the necessary intervals, and just need to 
+//    discoverify that fact) 
+// - For each interval in the active range: Construct exons
+// - Using the exon structs, write out a gene record
+// - Free the exon structs (they're bloaty, containing sequence strings) and the GeneNode list
+//
+// If the interval graph is well-behaved, and consists of small connected components, then we write one gene for each
+// connected component.  If the interval graph is messy, then our iterative procedure will cover the graph in
+// a reasonably efficient way.  (We're guaranteed to satisfy one interval with each gene, and we're likely to satisfy
+// several)
+int WriteGenesForIntervals(char* SequenceFileName, char* OutputFileName, int ChromosomeNumber, int ReverseFlag, int MinORFLength)
+{
+    FILE* SequenceFile;
+    FILE* OutputFile;
+    IntervalNode* NextUnsatisfied;
+    int RecordNumber = 0;
+    int ValidGeneFlag;
+    //
+    SequenceFile = fopen(SequenceFileName, "rb");
+    if (!SequenceFile)
+    {
+        printf("** ERROR: Unable to open chromosome database '%s'\n", SequenceFileName);
+        return 0;
+    }
+    OutputFile = fopen(OutputFileName, "wb");
+    if (!OutputFile)
+    {
+        printf("** ERROR: Unable to open output file '%s'\n", OutputFileName);
+        return 0;
+    }
+    // Iterate over intervals, skipping over intervals that have already been satisfied.
+    NextUnsatisfied = g_FirstInterval;
+    while (1)
+    {
+        if (!NextUnsatisfied)
+        {
+            break;
+        }
+        if (NextUnsatisfied->Satisfied)
+        {
+            NextUnsatisfied = NextUnsatisfied->Next;
+            continue;
+        }
+        
+        //printf("\n  - - - Satisfy the next interval: %d-%d\n", NextUnsatisfied->Start, NextUnsatisfied->End);
+        // if SatisfyIntervalAndWriteGene returns 0, then there's no real gene here (short ORFs were pruned).
+        ValidGeneFlag = SatisfyIntervalAndWriteGene(NextUnsatisfied, SequenceFile, OutputFile, RecordNumber, ChromosomeNumber, ReverseFlag, MinORFLength);
+        if (ValidGeneFlag)
+        {
+            RecordNumber++;
+            //printf("Wrote gene record %d\n", RecordNumber);
+        }
+    } // main loop for writing out genes.
+    printf("Genes have been written out.  Statistics:\n");
+    printf("%d\t", ChromosomeNumber);
+    printf("%d\t", ReverseFlag);
+    printf("%d\t", RecordNumber);
+    printf("%d\t", g_StatsIncompleteGeneCount);
+    printf("%d\t", g_StatsLargestGeneSize);
+    printf("%d\t", g_StatsLargestGeneRecordNumber);
+    printf("%d\t", g_StatsIntervalsBeforeMerge);
+    printf("%d\t", g_StatsEdgesBeforeMerge);
+    printf("%d\t", g_StatsIntervalsAfterMerge);
+    printf("%d\t", g_StatsEdgesAfterMerge);
+    printf("%d\t", g_StatsIntervalsAfterIntersect);
+    printf("%d\t", g_StatsEdgesAfterIntersect);
+    printf("%d\t", g_StatsTotalExonsWritten);
+    printf("%d\t", g_StatsTotalEdgesWritten);
+    printf("\n");
+    return RecordNumber;
+}
+
+// Free exons for an interval
+void FreeIntervalExons(IntervalNode* Interval)
+{
+    ExonNode* PrevExon;
+    ExonNode* Exon;
+    ExonLink* PrevLink;
+    ExonLink* Link;
+
+    PrevExon = NULL;
+    for (Exon = Interval->FirstExon; Exon; Exon = Exon->Next)
+    {
+        // Free forward links:
+        PrevLink = NULL;
+        for (Link = Exon->FirstForward; Link; Link = Link->Next)
+        {
+            SafeFree(PrevLink);
+            PrevLink = Link;
+        }
+        SafeFree(PrevLink);
+
+        // Free backward links:
+        PrevLink = NULL;
+        for (Link = Exon->FirstBack; Link; Link = Link->Next)
+        {
+            SafeFree(PrevLink);
+            PrevLink = Link;
+        }
+        SafeFree(PrevLink);
+
+        // Free exon itself:
+        SafeFree(PrevExon->Sequence);
+        SafeFree(PrevExon);
+        PrevExon = Exon;
+    }
+    SafeFree(PrevExon->Sequence);
+    SafeFree(PrevExon);
+    Interval->FirstExon = NULL;
+    Interval->LastExon = NULL;
+}
+
+void AddExonToInterval(IntervalNode* Interval, ExonNode* Exon)
+{
+    if (Interval->LastExon)
+    {
+        Interval->LastExon->Next = Exon;
+    }
+    else
+    {
+        Interval->FirstExon = Exon;
+    }
+    Interval->LastExon = Exon;
+    Exon->Interval = Interval;
+}
+
+
+
+// Given an exon and its dna sequence, translate into amino acids.  
+// The exon's prefix has already been set, but we'll set the suffix (with the 'leftovers')
+// If MinORFLength>0, then call MaskBrokenSequence to MASK OUT the interval between two stop 
+// codons separated by less than MinORFLength.
+void GetExonSequence(ExonNode* Exon, char* DNA, int MinORFLength)
+{
+    char ProteinBuffer[MAX_INTERVAL_LENGTH];
+    int Pos;
+    int Length;
+    char* Peptide;
+    int SuffixPos;
+
+    if (!DNA || !*DNA)
+    {
+        Exon->Suffix[0] = '\0';
+        Exon->Sequence = NULL;
+        Exon->Length = 0;
+        return;
+    }
+    Length = strlen(DNA);
+    Pos = 0;
+    Peptide = ProteinBuffer;
+    while (Pos + 2 < Length)
+    {
+        *Peptide = TranslateCodon(DNA + Pos);
+        Peptide++;
+        Pos += 3;
+    }
+    *Peptide = '\0';
+    MaskBrokenSequence(ProteinBuffer, MinORFLength);
+    //Exon->Length = strlen(ProteinBuffer);
+    Exon->Length = strlen(ProteinBuffer);
+    if (Exon->Length)
+    {
+        Exon->Sequence = (char*)calloc(sizeof(char), Exon->Length + 1);
+        strcpy(Exon->Sequence, ProteinBuffer);
+    }
+    SuffixPos = 0;
+    while (Pos < Length)
+    {
+        Exon->Suffix[SuffixPos] = DNA[Pos];
+        SuffixPos++;
+        Pos++;
+    }
+    Exon->Suffix[SuffixPos] = '\0';
+}
+
+// If an exon's protein sequence has two stop codons, with fewer than 50 residues in between,
+// then CUT OUT that section.  (Because we interpret genomic intervals in multiple reading 
+// frames, we often get reads of very short length; we don't believe that such short peptides
+// are reasonable!)
+// Todo: Try encoding some stop codons as selenocysteines (U).
+// Update: We no longer CUT the sequence, because that would ruin our genomic coordinates.
+// Rather, we MASK super-short reading frames!
+void MaskBrokenSequence(char* Protein, int MinORFLength)
+{
+    int AnchorPos = -1;
+    int Pos;
+    char AA;
+    int MaskPos;
+    //
+    // if MinORFLength <= 0, then don't filter.
+    if (MinORFLength <= 0)
+    {
+        return;
+    }
+    Pos = 0;
+    while (1)
+    {
+        AA = Protein[Pos];
+        if (!AA)
+        {
+            break;
+        }
+        if (AA == 'X')
+        {
+            if (AnchorPos == -1 || (Pos - AnchorPos >= MinORFLength))
+            {
+                AnchorPos = Pos;
+            }
+            else
+            {
+                for (MaskPos = AnchorPos + 1; MaskPos < Pos; MaskPos++)
+                {
+                    Protein[MaskPos] = 'X';
+                }
+                AnchorPos = Pos;
+                Pos++;
+                continue;
+            }
+        }
+        Pos++;
+    }
+}
+
+// Add a forward link from exon A to exon B
+void AddExonLink(ExonNode* A, ExonNode* B, char AA, int Power)
+{
+    ExonLink* Link;
+    //
+    Link = (ExonLink*)calloc(sizeof(ExonLink), 1);
+    Link->Exon = B;
+    Link->AA = AA;
+    Link->Power = Power;
+    if (A->LastForward)
+    {
+        A->LastForward->Next = Link;
+    }
+    else
+    {
+        A->FirstForward = Link;
+    }
+    A->LastForward = Link;
+    //
+    Link = (ExonLink*)calloc(sizeof(ExonLink), 1);
+    Link->Exon = A;
+    Link->AA = AA;
+    Link->Power = Power;
+    if (B->LastBack)
+    {
+        B->LastBack->Next = Link;
+    }
+    else
+    {
+        B->FirstBack = Link;
+    }
+    B->LastBack = Link;
+}
+
+// Link up the "edge" DAG nodes for intervals, if their parent intervals are linked.
+void LinkDAGAcrossIntervals(IntervalNode* Interval, EdgeNode* Edge, int ReverseFlag)
+{
+    IntervalNode* OtherInterval;
+    GenomeDAGNode* DAGNode;
+    GenomeDAGNode* OtherDAGNode;
+    int DAGNodeIndex;
+    int OtherDAGNodeIndex;
+    //
+    OtherInterval = Edge->Interval;
+    for (DAGNodeIndex = 0; DAGNodeIndex < Interval->DAGNodeCount; DAGNodeIndex++)
+    {
+        DAGNode = Interval->DAGNodes + DAGNodeIndex;
+        if (!DAGNode->Sequence)
+        {
+            continue;
+        }
+        // We link forward only from dag nodes that touch the edge:
+        if (DAGNode->End < Interval->End)
+        {
+            continue;
+        }
+        for (OtherDAGNodeIndex = 0; OtherDAGNodeIndex < OtherInterval->DAGNodeCount; OtherDAGNodeIndex++)
+        {
+            OtherDAGNode = OtherInterval->DAGNodes + OtherDAGNodeIndex;
+            if (!OtherDAGNode->Sequence)
+            {
+                continue;
+            }
+            if (OtherDAGNode->Start > OtherInterval->Start)
+            {
+                continue;
+            }
+            GenomeDAGLinkBack(OtherDAGNode, DAGNode, Edge->Count); // link!
+        }
+    }
+}
+
+// Add links between edges, according to how the 'parent' DAG is linked.
+// Each DAG can have three exons: prefix-length 0, prefix 1, prefix 2.
+// If you're length 1, then you get one with no prefix (and length-1 suffix), one with prefix (and no suffix).
+// If you're longer than 1, then the exons you get depend on your reading frame flags: EST-dervied exons get 
+// three exons, most gene-finding-derived exons get a single exon for the single plausible reading frame.
+void LinkIntervalExons(IntervalNode* Interval, int ReverseFlag)
+{
+    ExonNode* Exon;
+    ExonNode* NextExon = NULL;
+    GenomeDAGNode* DAGNode;
+    GenomeDAGLink* Edge;
+    GenomeDAGLink* NextEdge;
+    int DAGNodeIndex;
+    int ExonIndex;
+    int SuffixLength;
+    char DNA[4];
+    char AA = 0;
+    int Power = 0;
+    //
+    DNA[3] = '\0';
+    for (DAGNodeIndex = 0; DAGNodeIndex < Interval->DAGNodeCount; DAGNodeIndex++)
+    {
+        DAGNode = Interval->DAGNodes + DAGNodeIndex;
+        if (ReverseFlag)
+        {
+            Edge = DAGNode->FirstBack;
+        }
+        else
+        {
+            Edge = DAGNode->FirstForward;
+        }
+        while (Edge)
+        {
+            // This DAG has one, two, or three exons to join.
+            for (ExonIndex = 0; ExonIndex < 3; ExonIndex++)
+            {
+                Exon = DAGNode->Exons[ExonIndex];
+                if (!Exon)
+                {
+                    continue;
+                }
+                SuffixLength = strlen(Exon->Suffix);
+                switch (SuffixLength)
+                {
+                case 0:
+                    NextExon = Edge->Node->Exons[0];
+                    if (!NextExon)
+                    {
+                        continue;
+                    }
+                    AA = '\0';
+                    Power = Edge->Count;
+                    break;
+                case 1:
+                    // A length-1 suffix.  We link to a length-2 prefix, if available:
+                    NextExon = Edge->Node->Exons[2];
+                    if (NextExon)
+                    {
+                        // Combine our length-1 suffix with the length-2 prefix:
+                        DNA[0] = Exon->Suffix[0];
+                        DNA[1] = NextExon->Prefix[0];
+                        DNA[2] = NextExon->Prefix[1];
+                        AA = TranslateCodon(DNA);
+                        Power = Edge->Count;
+                        break;
+                    }
+                    else
+                    {
+                        // There's no length-2 prefix available.  If that's because the next
+                        // exon has length <1, then we "leapfrog" through it:
+                        if ((Edge->Node->End == Edge->Node->Start + 1) && Edge->Node->Exons[0])
+                        {
+                            // The ugly case.  Take our suffix char, add the forward interval's base, 
+                            // and then add one base from the "far interval".
+                            DNA[0] = Exon->Suffix[0];
+                            DNA[1] = Edge->Node->Exons[0]->Suffix[0];
+                            if (ReverseFlag)
+                            {
+                                NextEdge = Edge->Node->FirstBack;
+                            }
+                            else
+                            {
+                                NextEdge = Edge->Node->FirstForward;
+                            }
+                            for (; NextEdge; NextEdge = NextEdge->Next)
+                            {
+                                NextExon = NextEdge->Node->Exons[1];
+                                if (NextExon)
+                                {
+                                    DNA[2] = NextExon->Prefix[0];
+                                    AA = TranslateCodon(DNA);
+                                    Power = max(Edge->Count, NextEdge->Count);
+                                    AddExonLink(Exon, NextExon, AA, Power);
+                                }
+                            }
+                        }
+                        continue;
+                    }
+                case 2:
+                    NextExon = Edge->Node->Exons[1];
+                    if (!NextExon)
+                    {
+                        continue;
+                    }
+                    DNA[0] = Exon->Suffix[0];
+                    DNA[1] = Exon->Suffix[1];
+                    DNA[2] = NextExon->Prefix[0];
+                    AA = TranslateCodon(DNA);
+                    Power = Edge->Count;
+                    break;
+                }
+                AddExonLink(Exon, NextExon, AA, Power);
+
+            } // exon loop
+            Edge = Edge->Next;
+        } // edge loop
+    } // DAG node loop
+}
+
+// Write out one exon record in binary format. 
+void WriteExonRecord(ExonNode* Exon, FILE* OutputFile, int ReverseFlag)
+{
+    int Length;
+    int BackLinkCount;
+    int ForwardLinkCount;
+    ExonLink* Link;
+    //
+    WriteBinary(&Exon->Start, sizeof(int), 1, OutputFile);
+    WriteBinary(&Exon->End, sizeof(int), 1, OutputFile);
+    Length = Exon->Length;
+    g_StatsTotalExonsWritten++;
+
+    WriteBinary(&Length, sizeof(int), 1, OutputFile);
+    WriteBinary(&Exon->Interval->Occurrences, sizeof(int), 1, OutputFile);
+    if (Length)
+    {
+        WriteBinary(Exon->Sequence, sizeof(char), Length, OutputFile);
+    }
+    BackLinkCount = 0;
+    for (Link = Exon->FirstBack; Link; Link = Link->Next)
+    {
+        BackLinkCount++;
+    }
+    ForwardLinkCount = 0;
+    for (Link = Exon->FirstForward; Link; Link = Link->Next)
+    {
+        ForwardLinkCount++;
+    }
+
+    if (0) //ReverseFlag)
+    {
+        WriteBinary(Exon->Prefix, sizeof(char), 2, OutputFile);
+        WriteBinary(Exon->Suffix, sizeof(char), 2, OutputFile);
+
+        WriteBinary(&ForwardLinkCount, sizeof(int), 1, OutputFile);
+        WriteBinary(&BackLinkCount, sizeof(int), 1, OutputFile);
+        for (Link = Exon->FirstForward; Link; Link = Link->Next)
+        {
+            g_StatsTotalEdgesWritten++;
+            WriteBinary(&Link->Exon->Index, sizeof(int), 1, OutputFile);
+            WriteBinary(&Link->Power, sizeof(int), 1, OutputFile);
+            WriteBinary(&Link->AA, sizeof(char), 1, OutputFile);
+        }
+
+    }
+    else
+    {
+        WriteBinary(Exon->Prefix, sizeof(char), 2, OutputFile);
+        WriteBinary(Exon->Suffix, sizeof(char), 2, OutputFile);
+
+        WriteBinary(&BackLinkCount, sizeof(int), 1, OutputFile);
+        WriteBinary(&ForwardLinkCount, sizeof(int), 1, OutputFile);
+        for (Link = Exon->FirstBack; Link; Link = Link->Next)
+        {
+            g_StatsTotalEdgesWritten++;
+            WriteBinary(&Link->Exon->Index, sizeof(int), 1, OutputFile);
+            WriteBinary(&Link->Power, sizeof(int), 1, OutputFile);
+            WriteBinary(&Link->AA, sizeof(char), 1, OutputFile);
+        }
+    }
+}
+
+// Given a range of intervals (from First to Last), with exons built, write
+// out the binary gene record to the splice-tolerant database file.
+int WriteGeneRecord(int ChromosomeNumber, char* GeneName, int ReverseFlag, FILE* OutputFile)
+{
+    int ExonCount = 0;
+    IntervalNode* Interval;
+    ExonNode* Exon;
+    GeneNode* Node;
+    char ForwardFlag;
+    for (Node = g_GeneFirst; Node; Node = Node->Next)
+    {
+        Interval = Node->Interval;
+        for (Exon = Interval->FirstExon; Exon; Exon = Exon->Next)
+        {
+            ExonCount++;
+        }
+    }
+    if (!ExonCount)
+    {
+        // No exons!?  That can happen if we pruned them all away due to short ORFs.
+        // No need to write anything at all:
+        return 0;
+    }
+
+    WriteBinary(GeneName, sizeof(char), 256, OutputFile);
+    WriteBinary(GeneName, sizeof(char), 256, OutputFile);
+    WriteBinary(&ChromosomeNumber, sizeof(int), 1, OutputFile);
+    if (ReverseFlag)
+    {
+        ForwardFlag = 0;
+    }
+    else
+    {
+        ForwardFlag = 1;
+    }
+    WriteBinary(&ForwardFlag, sizeof(char), 1, OutputFile);
+    WriteBinary(&ExonCount, sizeof(int), 1, OutputFile);
+    if (ReverseFlag)
+    {
+        // Re-index all the exons:
+        ExonCount = 0;
+        for (Node = g_GeneLast; Node; Node = Node->Prev)
+        {
+            for (Exon = Node->Interval->FirstExon; Exon; Exon = Exon->Next)
+            {
+                Exon->Index = ExonCount;
+                ExonCount++;
+            }
+        }
+        for (Node = g_GeneLast; Node; Node = Node->Prev)
+        {
+            Interval = Node->Interval;
+            for (Exon = Interval->FirstExon; Exon; Exon = Exon->Next)
+            {
+                WriteExonRecord(Exon, OutputFile, ReverseFlag);
+            }
+        }
+    }
+    else
+    {
+        // Re-index all the exons:
+        ExonCount = 0;
+        for (Node = g_GeneFirst; Node; Node = Node->Next)
+        {
+            for (Exon = Node->Interval->FirstExon; Exon; Exon = Exon->Next)
+            {
+                Exon->Index = ExonCount;
+                ExonCount++;
+            }
+        }
+
+        for (Node = g_GeneFirst; Node; Node = Node->Next)
+        {
+            Interval = Node->Interval;
+            for (Exon = Interval->FirstExon; Exon; Exon = Exon->Next)
+            {
+                WriteExonRecord(Exon, OutputFile, ReverseFlag);
+            }
+        }
+    }
+    return 1;
+}
+
+// Verify that our exon construction is valid.  The number of exon forward-links and
+// backward-links should match.  And no exons should go outside the active range...
+void IntegrityCheckGene()
+{
+    int ForwardCount = 0;
+    int BackwardCount = 0;
+    int ExonCount = 0;
+    int IntervalCount = 0;
+    ExonNode* Exon;
+    ExonLink* Link;
+    ExonLink* RecipLink;
+    int FoundFlag;
+    IntervalNode* Interval;
+    GeneNode* Node;
+    EdgeNode* Edge;
+    int Count;
+    //
+    printf("\n===Integrity check: Intervals from %d to %d\n", g_GeneFirst->Interval->Start, g_GeneLast->Interval->End);
+
+    for (Node = g_GeneFirst; Node; Node = Node->Next)
+    {
+        Interval = Node->Interval;
+        if (Interval->Satisfied)
+        {
+            printf("%d - %d SATISFIED ", Interval->Start, Interval->End);
+        }
+        else
+        {
+            printf("%d - %d unsatisfied ", Interval->Start, Interval->End);
+        }
+        Count = 0;
+        for (Edge = Interval->FirstForward; Edge; Edge = Edge->Next)
+        {
+            Count++;
+        }
+        switch (Count)
+        {
+        case 0:
+            break;
+        case 1:
+            printf("to %d", Interval->FirstForward->Interval->Start);
+            break;
+        case 2:
+            printf("to %d, %d", Interval->FirstForward->Interval->Start, Interval->FirstForward->Next->Interval->Start);
+            break;
+        default:
+            printf("to %d, %d, +%d", Interval->FirstForward->Interval->Start, 
+                Interval->FirstForward->Next->Interval->Start, Count-2);
+            break;
+
+        }
+        printf("\n");
+        IntervalCount++;
+        for (Exon = Interval->FirstExon; Exon; Exon = Exon->Next)
+        {
+            ExonCount++;
+            for (Link = Exon->FirstForward; Link; Link = Link->Next)
+            {
+                ForwardCount++;
+                if (!Link->Exon->Interval->GNode)
+                {
+                    printf("** Warning: Exon %d links forward out of this world.\n", Exon->Index);
+                }
+                // Check for a reciprocal link, too:
+                FoundFlag = 0;
+                for (RecipLink = Link->Exon->FirstBack; RecipLink; RecipLink = RecipLink->Next)
+                {
+                    if (RecipLink->Exon == Exon)
+                    {
+                        FoundFlag = 1;
+                        if (Link->Power != RecipLink->Power)
+                        {
+                            printf("** Warning: Exon link %d to %d has inconsistent strength %d, %d\n", 
+                                Exon->Index, Link->Exon->Index, Link->Power, RecipLink->Power);
+                        }
+                        break;
+                    }
+                }
+                if (!FoundFlag)
+                {
+                    printf("** Warning: Exon %d has a non-reciprocated forward link.\n", Exon->Index);
+                }
+            }
+            for (Link = Exon->FirstBack; Link; Link = Link->Next)
+            {
+                BackwardCount++;
+                if (!Link->Exon->Interval->GNode)
+                {
+                    printf("** Warning: Exon %d links backward out of this world.\n", Exon->Index);
+                }
+                // Check for a reciprocal link, too:
+                FoundFlag = 0;
+                for (RecipLink = Link->Exon->FirstForward; RecipLink; RecipLink = RecipLink->Next)
+                {
+                    if (RecipLink->Exon == Exon)
+                    {
+                        FoundFlag = 1;
+                        break;
+                    }
+                }
+                if (!FoundFlag)
+                {
+                    printf("** Warning: Exon %d has a non-reciprocated backward link.\n", Exon->Index);
+                }
+            }
+        } // exon loop
+    } // interval loop
+    printf("Saw %d intervals, %d exons, %d links.\n", IntervalCount, ExonCount, ForwardCount);
+    if (ForwardCount != BackwardCount)
+    {
+        printf("** Warning: Total forward links is %d != backward links %d\n", ForwardCount, BackwardCount);
+    }
+}
+
+int g_UFTotalExons = 0;
+int g_UFTotalAA = 0;
+int g_UFTotalEdges = 0;
+int g_TotalExons = 0;
+int g_TotalAA = 0;
+int g_TotalEdges = 0;
+int g_TotalTrueExons = 0;
+int g_TotalTrueEdges = 0;
+
+typedef struct ExonSortNode
+{
+    ExonNode* Exon;
+} ExonSortNode;
+
+int CompareExonNodesForward(const ExonSortNode* NodeA, const ExonSortNode* NodeB)
+{
+    if (NodeA->Exon->Start < NodeB->Exon->Start)
+    {
+        return -1;
+    }
+    if (NodeA->Exon->Start > NodeB->Exon->Start)
+    {
+        return 1;
+    }
+    // arbitrary:
+    if (NodeA->Exon < NodeB->Exon)
+    {
+        return -1;
+    }
+    else
+    {
+        return 1;
+    }
+}
+int CompareExonNodesBackward(const ExonSortNode* NodeA, const ExonSortNode* NodeB)
+{
+    if (NodeA->Exon->Start < NodeB->Exon->Start)
+    {
+        return 1;
+    }
+    if (NodeA->Exon->Start > NodeB->Exon->Start)
+    {
+        return -1;
+    }
+    // arbitrary:
+    if (NodeA->Exon < NodeB->Exon)
+    {
+        return 1;
+    }
+    else
+    {
+        return -1;
+    }
+}
+
+// It is desirable for an exon's back-links to always hit exons with LOWER index numbers.
+void SortExons(int ReverseFlag)
+{
+    ExonSortNode* ExonNodes;
+    GeneNode* GNode;
+    int ExonCount;
+    int ExonIndex;
+    ExonNode* Exon;
+    //
+    for (GNode = g_GeneFirst; GNode; GNode = GNode->Next)
+    {
+        ExonCount = 0;
+        for (Exon = GNode->Interval->FirstExon; Exon; Exon = Exon->Next)
+        {
+            ExonCount++;
+        }
+        if (ExonCount)
+        {
+            ExonNodes = (ExonSortNode*)calloc(ExonCount, sizeof(ExonSortNode));
+            ExonIndex = 0;
+            for (Exon = GNode->Interval->FirstExon; Exon; Exon = Exon->Next)
+            {
+                ExonNodes[ExonIndex].Exon = Exon;
+                ExonIndex++;
+            }
+            if (ReverseFlag)
+            {
+                qsort(ExonNodes, ExonCount, sizeof(ExonSortNode), (QSortCompare)CompareExonNodesBackward);
+            }
+            else
+            {
+                qsort(ExonNodes, ExonCount, sizeof(ExonSortNode), (QSortCompare)CompareExonNodesForward);
+            }
+            GNode->Interval->FirstExon = ExonNodes[0].Exon;
+            GNode->Interval->LastExon = ExonNodes[ExonCount - 1].Exon;
+            for (ExonIndex = 0; ExonIndex < ExonCount; ExonIndex++)
+            {
+                if (ExonIndex < ExonCount - 1)
+                {
+                    ExonNodes[ExonIndex].Exon->Next = ExonNodes[ExonIndex + 1].Exon;
+                }
+                else
+                {
+                    ExonNodes[ExonIndex].Exon->Next = NULL;
+                }
+            }
+            SafeFree(ExonNodes);
+        }
+    }
+}
+
+// For reporting purposes, count how many exons and edges and amino acids are in our db:
+void CountExons(int PreFilterFlag)
+{
+    GeneNode* GNode;
+    ExonNode* Exon;
+    ExonLink* Link;
+    int Pos;
+    int TrueExonFlag;
+    //
+    for (GNode = g_GeneFirst; GNode; GNode = GNode->Next)
+    {
+        for (Exon = GNode->Interval->FirstExon; Exon; Exon = Exon->Next)
+        {
+            if (PreFilterFlag)
+            {
+                g_UFTotalExons++;
+                for (Pos = 0; Pos < Exon->Length; Pos++)
+                {
+                    if (Exon->Sequence[Pos]!='X')
+                    {
+                        g_UFTotalAA++;
+                    }
+                }
+                
+            }
+            else
+            {
+                g_TotalExons++;
+                TrueExonFlag = 1;
+                for (Link = Exon->FirstBack; Link; Link = Link->Next)
+                {
+                    if (Link->Exon->End == Exon->Start || Link->Exon->Start == Exon->End)
+                    {
+                        TrueExonFlag = 0;
+                    }
+                    else
+                    {
+                        g_TotalTrueEdges++;
+                    }
+                }
+                g_TotalTrueExons += TrueExonFlag;
+                for (Pos = 0; Pos < Exon->Length; Pos++)
+                {
+                    if (Exon->Sequence[Pos]!='X')
+                    {
+                        g_TotalAA++;
+                    }
+                }
+            }
+            for (Link = Exon->FirstBack; Link; Link = Link->Next)
+            {
+                if (PreFilterFlag)
+                {
+                    g_UFTotalEdges++;
+                    if (Link->AA)
+                    {
+                        g_UFTotalAA++;
+                    }
+                }
+                else
+                {
+                    g_TotalEdges++;
+                    if (Link->AA)
+                    {
+                        g_TotalAA++;
+                    }
+                }
+            }
+            for (Link = Exon->FirstForward; Link; Link = Link->Next)
+            {
+                if (PreFilterFlag)
+                {
+                    g_UFTotalEdges++;
+                    if (Link->AA)
+                    {
+                        g_UFTotalAA++;
+                    }
+                }
+                else
+                {
+                    g_TotalEdges++;
+                    if (Link->AA)
+                    {
+                        g_TotalAA++;
+                    }
+                }
+            } // iterate forward links
+        } // iterate exons
+    } // iterate GNodes
+}
+
+// Create a link between two genome DAG nodes.
+void GenomeDAGLinkBack(GenomeDAGNode* DAGNode, GenomeDAGNode* BackDAGNode, int Count)
+{
+    GenomeDAGLink* NewLink;
+    GenomeDAGLink* Link;
+    //
+    // Add the back-link:
+    NewLink = (GenomeDAGLink*)calloc(1, sizeof(GenomeDAGLink));
+    NewLink->Node = BackDAGNode;
+    NewLink->Count = Count;
+    if (!DAGNode->FirstBack)
+    {
+        DAGNode->FirstBack = NewLink;
+    }
+    else
+    {
+        for (Link = DAGNode->FirstBack; Link->Next; Link = Link->Next)
+        {
+            ;
+        }
+        Link->Next = NewLink;
+    }
+    // Add the forward-link:
+    NewLink = (GenomeDAGLink*)calloc(1, sizeof(GenomeDAGLink));
+    NewLink->Node = DAGNode;
+    NewLink->Count = Count;
+    if (!BackDAGNode->FirstForward)
+    {
+        BackDAGNode->FirstForward = NewLink;
+    }
+    else
+    {
+        for (Link = BackDAGNode->FirstForward; Link->Next; Link = Link->Next)
+        {
+            ;
+        }
+        Link->Next = NewLink;
+    }
+}
+
+int GetReadingFrameFlag(int Start, int End, int Offset, int ReverseFlag)
+{
+    int ReadingFrameFlag = 0;
+    //
+    if (ReverseFlag)
+    {
+        switch ((End - 1 - Offset) % 3)
+        {
+        case 0:
+            ReadingFrameFlag = IFLAG_FRAME_0;
+            break;
+        case 1:
+            ReadingFrameFlag = IFLAG_FRAME_1;
+            break;
+        case 2:
+            ReadingFrameFlag = IFLAG_FRAME_2;
+            break;
+        }
+    }
+    else
+    {
+        switch ((Start + Offset) % 3)
+        {
+        case 0:
+            ReadingFrameFlag = IFLAG_FRAME_0;
+            break;
+        case 1:
+            ReadingFrameFlag = IFLAG_FRAME_1;
+            break;
+        case 2:
+            ReadingFrameFlag = IFLAG_FRAME_2;
+            break;
+        }
+    }
+    return ReadingFrameFlag;
+}
+
+// Build a DAG for this genomic interval.  Then we'll generate three exons (two, for length-1 nodes)
+// for each node of the DAG.  The DAG is generally just one node, corresponding to genomic DNA.
+// But the DAG may have extra nodes and edges if there are SNPs that fall within the interval.
+void BuildDAGForInterval(IntervalNode* Interval, FILE* GenomicFile, int MinORFLength, int ReverseFlag)
+{
+    int DAGNodeCount;
+    int DAGNodeIndex;
+    int NextDAGStart;
+    GenomeDAGNode* DAGNode;
+    int Length;
+    ExonNode* Exon;
+    char RCDNABuffer[MAX_INTERVAL_LENGTH + 1];
+    int PolyIndex;
+    int FirstPolyIndex;
+    Polymorphism* Poly;
+    int PrevNodesStart = -1;
+    int PrevNodesEnd = -1;
+    int PrevNodeIndex;
+    int NewPrevNodesStart;
+    int SNPIndex;
+    int ReadingFrameFlag;
+    //
+
+    FirstPolyIndex = FindPolyInInterval(Interval->Start, Interval->End);
+
+    ////////////////////////////////////////////////////////////
+    // How many nodes in our DAG?  Assume that no polymorphisms overlap, and so we will need 1 dag node
+    // plus three per polymorphism (or two for a polymorphism directly after another polymorphism, but
+    // just alloc three anyway)
+    DAGNodeCount = 1;
+    if (FirstPolyIndex >= 0)
+    {
+        for (PolyIndex = FirstPolyIndex; PolyIndex < g_PolymorphismCount; PolyIndex++)
+        {
+            if (g_Polymorphisms[PolyIndex].Pos >= Interval->End)
+            {
+                break;
+            }
+            // A polymorphism means one node for each SNP allele, and (USUALLY) a node
+            // for the continued 'core track'
+            //       a
+            //  XXXX b xxxx
+            //       c 
+            DAGNodeCount++;
+            DAGNodeCount += strlen(g_Polymorphisms[PolyIndex].SNP);
+        }
+    }
+    Interval->DAGNodeCount = DAGNodeCount;
+    Interval->DAGNodes = (GenomeDAGNode*)calloc(DAGNodeCount, sizeof(GenomeDAGNode));
+    ////////////////////////////////////////////////////////////
+    // Initialize all the DAG nodes:
+    NextDAGStart = Interval->Start;
+    DAGNodeIndex = 0;
+    PolyIndex = FirstPolyIndex;
+    while (1)
+    {
+        if (PolyIndex < 0 || PolyIndex >= g_PolymorphismCount || g_Polymorphisms[PolyIndex].Pos >= Interval->End)
+        {
+            // There are no more polymorphisms.  
+            if (NextDAGStart < Interval->End)
+            {
+                // Generate an interval that extends to the end:
+                DAGNode = Interval->DAGNodes + DAGNodeIndex;
+                DAGNode->Start = NextDAGStart;
+                DAGNode->End = Interval->End; 
+                Length = DAGNode->End - DAGNode->Start;
+                DAGNode->Sequence = (char*)calloc(Length + 1, sizeof(char)); // +1 for null terminator
+                fseek(GenomicFile, DAGNode->Start, 0);
+                ReadBinary(DAGNode->Sequence, sizeof(char), Length, GenomicFile);
+                // If we have some nodes already, that's because there's a polymorphism.  Link to the 
+                // previous two nodes:
+                if (DAGNodeIndex)
+                {
+                    for (PrevNodeIndex = PrevNodesStart; PrevNodeIndex <= PrevNodesEnd; PrevNodeIndex++)
+                    {
+                        GenomeDAGLinkBack(DAGNode, Interval->DAGNodes + PrevNodeIndex, 0);
+                    }
+                }
+            }
+            // And we're done!
+            break;
+        }
+        // There is another polymorphism.  
+        Poly = g_Polymorphisms + PolyIndex;
+        if (NextDAGStart < Poly->Pos)
+        {
+            // If there's non-polymorphic sequence before the next poly, then
+            // generate a DAG node for it.
+            DAGNode = Interval->DAGNodes + DAGNodeIndex;
+            DAGNode->Start = NextDAGStart;
+            DAGNode->End = Poly->Pos; 
+            Length = DAGNode->End - DAGNode->Start;
+            DAGNode->Sequence = (char*)calloc(Length + 1, sizeof(char)); // +1 for null terminator
+            fseek(GenomicFile, DAGNode->Start, 0);
+            ReadBinary(DAGNode->Sequence, sizeof(char), Length, GenomicFile);
+            // If we have some nodes already, that's because there's a polymorphism.  Link to the 
+            // previous nodes:
+            if (DAGNodeIndex)
+            {
+                for (PrevNodeIndex = PrevNodesStart; PrevNodeIndex <= PrevNodesEnd; PrevNodeIndex++)
+                {
+                    GenomeDAGLinkBack(DAGNode, Interval->DAGNodes + PrevNodeIndex, 0);
+                }
+            }
+            PrevNodesStart = DAGNodeIndex;
+            PrevNodesEnd = DAGNodeIndex;
+            DAGNodeIndex++;
+        } 
+        // Nodes for the two (or more) alleles:
+        NewPrevNodesStart = DAGNodeIndex;
+        for (SNPIndex = 0; SNPIndex < 4; SNPIndex++)
+        {
+            if (!Poly->SNP[SNPIndex])
+            {
+                break;
+            }
+            DAGNode = Interval->DAGNodes + DAGNodeIndex;
+            DAGNode->Start = Poly->Pos;
+            DAGNode->End = Poly->Pos + 1;
+            DAGNode->Sequence = (char*)calloc(2, sizeof(char));
+            DAGNode->Sequence[0] = Poly->SNP[SNPIndex];
+            if (PrevNodesStart > -1)
+            {
+                for (PrevNodeIndex = PrevNodesStart; PrevNodeIndex <= PrevNodesEnd; PrevNodeIndex++)
+                {
+                    GenomeDAGLinkBack(DAGNode, Interval->DAGNodes + PrevNodeIndex, 0);
+                }
+            }
+            DAGNodeIndex++;
+        }
+        PrevNodesStart = NewPrevNodesStart;
+        PrevNodesEnd = DAGNodeIndex - 1;        
+        NextDAGStart = Poly->Pos + 1;
+        PolyIndex++;
+    }
+    ////////////////////////////////////////////////////////////////////////////////
+    // The DAG for the interval has now been constructed.  Build exons for all DAG nodes.
+    for (DAGNodeIndex = 0; DAGNodeIndex < Interval->DAGNodeCount; DAGNodeIndex++)
+    {
+        DAGNode = Interval->DAGNodes + DAGNodeIndex;
+        if (!DAGNode->Sequence)
+        {
+            continue; // not a real DAG node.
+        }
+        if (DAGNode->End <= DAGNode->Start)
+        {
+            DAGNode = DAGNode;
+        }
+        // Reverse-complement the DAG's sequence, if necessary:
+        Length = DAGNode->End - DAGNode->Start;
+        if (Length >= MAX_INTERVAL_LENGTH)
+        {
+            printf("** Warning: Genomic interval from %d to %d is MUCH too long to process; truncating!", DAGNode->Start, DAGNode->End);
+            DAGNode->Sequence[MAX_INTERVAL_LENGTH] = '\0';
+        }
+        if (ReverseFlag)
+        {
+            strcpy(RCDNABuffer, DAGNode->Sequence);
+            WriteReverseComplement(RCDNABuffer, DAGNode->Sequence);
+        }
+        DAGNode->Exons = (ExonNode**)calloc(3, sizeof(ExonNode*));
+        // Add two or three exons for this DAG node.
+        // Check the reading frame of the interval to decide where codons are supposed to begin.
+        ReadingFrameFlag = GetReadingFrameFlag(DAGNode->Start, DAGNode->End, 0, ReverseFlag);
+        if (ReadingFrameFlag & Interval->Flags)
+        {
+            // Reading Frame 0:
+            Exon = (ExonNode*)calloc(1, sizeof(ExonNode));
+            Exon->Prefix[0] = '\0';
+            GetExonSequence(Exon, DAGNode->Sequence, MinORFLength);
+            Exon->Start = DAGNode->Start;
+            Exon->End = DAGNode->End;
+            Exon->DAGNode = DAGNode;
+            DAGNode->Exons[0] = Exon;
+            AddExonToInterval(Interval, Exon);
+        }
+        ReadingFrameFlag = GetReadingFrameFlag(DAGNode->Start, DAGNode->End, 1, ReverseFlag);
+        if (ReadingFrameFlag & Interval->Flags)
+        {
+            // Reading frame 1:
+            Exon = (ExonNode*)calloc(sizeof(ExonNode), 1);
+            Exon->Prefix[0] = DAGNode->Sequence[0];
+            Exon->Prefix[1] = '\0';
+            GetExonSequence(Exon, DAGNode->Sequence + 1, MinORFLength);
+            Exon->Start = DAGNode->Start;
+            Exon->End = DAGNode->End;
+            Exon->DAGNode = DAGNode;
+            DAGNode->Exons[1] = Exon;
+            AddExonToInterval(Interval, Exon);
+        }
+        // Reading frame 2:
+        if (Length > 1)
+        {
+            ReadingFrameFlag = GetReadingFrameFlag(DAGNode->Start, DAGNode->End, 2, ReverseFlag);
+            if (ReadingFrameFlag & Interval->Flags)
+            {
+                Exon = (ExonNode*)calloc(sizeof(ExonNode), 1);
+                Exon->Prefix[0] = DAGNode->Sequence[0];
+                Exon->Prefix[1] = DAGNode->Sequence[1];
+                Exon->Prefix[2] = '\0';
+                GetExonSequence(Exon, DAGNode->Sequence + 2, MinORFLength);
+                Exon->Start = DAGNode->Start;
+                Exon->End = DAGNode->End;
+                Exon->DAGNode = DAGNode;
+                DAGNode->Exons[2] = Exon;
+                AddExonToInterval(Interval, Exon);
+            }
+        }
+    }
+}
+
+// Every interval gives rise to three exons (two, if it's only one base long).
+// If interval A links to interval B, then A's exons each link to a compatible
+// exon in B.  Exception: If an exon with suffix length 1 links to an interval
+// of length 1, then we must go to the NEXT-next interval to complete a codon.
+//
+// GenomicFile is the file containing the genomic sequence.
+int BuildAndWriteExons(FILE* GenomicFile, FILE* OutputFile, int ReverseFlag, 
+    char* GeneName, int ChromosomeNumber, int MinORFLength)
+{
+    IntervalNode* Interval;
+    EdgeNode* Edge;
+    GeneNode* Node;
+    int IntervalCount = 0;
+    int ValidGeneFlag;
+    int VerboseFlag = 0;
+    // 
+    // Construct 1-3 exons for each interval within the gene:
+    for (Node = g_GeneFirst; Node; Node = Node->Next)
+    {
+        BuildDAGForInterval(Node->Interval, GenomicFile, MinORFLength, ReverseFlag);
+        IntervalCount++;
+    }
+    if (IntervalCount > g_StatsLargestGeneSize)
+    {
+        g_StatsLargestGeneSize = IntervalCount;
+    }
+    //DebugPrintBuiltGene();
+    // Link up the DAG graphs for all the intervals:
+    for (Node = g_GeneFirst; Node; Node = Node->Next)
+    {
+        Interval = Node->Interval;
+
+        for (Edge = Interval->FirstForward; Edge; Edge = Edge->Next)
+        {
+            // Ignore edges that extend out of this gene (we'll get them in overlap)
+            if (!Edge->Interval->GNode)
+            {
+                continue;
+            }
+            LinkDAGAcrossIntervals(Interval, Edge, ReverseFlag);
+        }
+    }
+    //printf("\nLinked DAG across intervals:\n");
+    //DebugPrintBuiltGene();
+    // Link up the exons, in accordance with the DAG graph linkage:
+    for (Node = g_GeneFirst; Node; Node = Node->Next)
+    {
+        Interval = Node->Interval;
+        LinkIntervalExons(Interval, ReverseFlag);
+    }
+
+    CountExons(1);
+
+    // If an exon isn't part of any long reading frame, it can be dropped.  And if it contains a stop
+    // codon, or the stop codon's prefix (or suffix), and/or some edges, can be dropped.  Perform
+    // that filtering now:
+    //printf("\nLinked interval exons:\n");
+    if (VerboseFlag)
+    {
+        DebugPrintBuiltGene();
+    }
+    PruneShortORFs(ReverseFlag, MinORFLength);
+    // Exons may include 'masked-out' sequence blocks between stop codons.  These 
+    // sequences aren't needed for search, so delete them, splitting the exons if necessary.
+    //printf("\nPruned short ORFs:\n");
+    //DebugPrintBuiltGene();
+    
+    PurgeNonCodingExonChunks(ReverseFlag);
+    //printf("\nPruned non-coding chunks:\n");
+    //DebugPrintBuiltGene();
+    CountExons(0);
+    SortExons(ReverseFlag);
+    // Write out a gene record:
+    if (VerboseFlag)
+    {
+        DebugPrintBuiltGene();
+    }
+    ValidGeneFlag = WriteGeneRecord(ChromosomeNumber, GeneName, ReverseFlag, OutputFile);
+
+    // Go back and free all the exon records:
+    for (Node = g_GeneFirst; Node; Node = Node->Next)
+    {
+        Interval = Node->Interval;
+        FreeIntervalExons(Interval);
+        FreeIntervalDAG(Interval);
+    }
+    return ValidGeneFlag;
+}
+
+// Delete an exon entirely!
+void DeleteExon(ExonNode* Exon)
+{
+    ExonNode* OtherExon;
+    ExonLink* Link;
+    ExonLink* NextLink;
+    
+    // First, fix the pointers from the parent interval:
+    if (Exon->Interval->FirstExon == Exon)
+    {
+        Exon->Interval->FirstExon = Exon->Next;
+        if (Exon->Interval->LastExon == Exon)
+        {
+            Exon->Interval->LastExon = NULL;
+        }
+    }
+    else
+    {
+        for (OtherExon = Exon->Interval->FirstExon; OtherExon; OtherExon = OtherExon->Next)
+        {
+            if (OtherExon->Next == Exon)
+            {
+                OtherExon->Next = Exon->Next;
+                if (Exon->Interval->LastExon == Exon)
+                {
+                    Exon->Interval->LastExon = OtherExon;
+                }
+                break;
+            }
+        }
+    }
+    // Now, free all the edges (and reciprocal edges):
+    Link = Exon->FirstBack;
+    while (Link)
+    {
+        NextLink = Link->Next;
+        DeleteExonLink(Exon, Link, 0);
+        Link = NextLink;
+    }
+    Link = Exon->FirstForward;
+    while (Link)
+    {
+        NextLink = Link->Next;
+        DeleteExonLink(Exon, Link, 1);
+        Link = NextLink;
+    }
+
+    // Now, free the exon itself:
+    SafeFree(Exon->Sequence);
+    SafeFree(Exon);
+}
+
+// Delete the specified Link from this Exon.  ForwardFlag indicates
+// whether it's a forward link.
+void DeleteExonLink(ExonNode* Exon, ExonLink* Link, int ForwardFlag)
+{
+    ExonLink* OtherLink;
+    ExonNode* OtherExon;
+    ExonLink* Prev;
+    //
+    if (ForwardFlag)
+    {
+        // Update the exon's linked list of edges, removing Link:
+        for (OtherLink = Exon->FirstForward; OtherLink; OtherLink = OtherLink->Next)
+        {
+            if (OtherLink->Next == Link)
+            {
+                OtherLink->Next = Link->Next;
+                if (Exon->LastForward == Link)
+                {
+                    Exon->LastForward = OtherLink;
+                }
+                break;
+            }
+        }
+        if (Exon->FirstForward == Link)
+        {
+            Exon->FirstForward = Link->Next;
+        }
+        if (Exon->LastForward == Link)
+        {
+            Exon->LastForward = NULL;
+        }
+
+        // Remove the link from the other exon:
+        OtherExon = Link->Exon;
+        Prev = NULL;
+        for (OtherLink = OtherExon->FirstBack; OtherLink; OtherLink = OtherLink->Next)
+        {
+            if (OtherLink->Exon == Exon && OtherLink->AA == Link->AA)
+            {
+                if (OtherExon->LastBack == OtherLink)
+                {
+                    OtherExon->LastBack = Prev;
+                }
+                if (Prev)
+                {
+                    Prev->Next = OtherLink->Next;
+                }
+                else
+                {
+                    OtherExon->FirstBack = OtherLink->Next;
+                }
+                SafeFree(OtherLink);
+                break;
+            }
+            Prev = OtherLink;
+        }
+        SafeFree(Link);
+    } // forward link
+    else
+    {
+        // Update the exon's linked list of edges, removing Link:
+        for (OtherLink = Exon->FirstBack; OtherLink; OtherLink = OtherLink->Next)
+        {
+            if (OtherLink->Next == Link)
+            {
+                OtherLink->Next = Link->Next;
+                if (Exon->LastBack == Link)
+                {
+                    Exon->LastBack = OtherLink;
+                }
+                break;
+            }
+        }
+        if (Exon->FirstBack == Link)
+        {
+            Exon->FirstBack = Link->Next;
+        }
+        if (Exon->LastBack == Link)
+        {
+            Exon->LastBack = NULL;
+        }
+
+        // Remove the link from the other exon:
+        OtherExon = Link->Exon;
+        Prev = NULL;
+        for (OtherLink = OtherExon->FirstForward; OtherLink; OtherLink = OtherLink->Next)
+        {
+            if (OtherLink->Exon == Exon && OtherLink->AA == Link->AA)
+            {
+                if (OtherExon->LastForward == OtherLink)
+                {
+                    OtherExon->LastForward = Prev;
+                }
+                if (Prev)
+                {
+                    Prev->Next = OtherLink->Next;
+                }
+                else
+                {
+                    OtherExon->FirstForward = OtherLink->Next;
+                }
+                SafeFree(OtherLink);
+                break;
+            }
+            Prev = OtherLink;
+        }
+        SafeFree(Link);
+    } // backward link
+}
+
+// if Link is set, we've counted the exon itself and we're on this link.  
+// if Link is null, we're entering the exon:
+int GeneFindLongestExtension(ExonNode* OldExon, ExonLink* Link, int LongEnough, int ForwardFlag)
+{
+    int Length;
+    ExonNode* Exon;
+    ExonLink* OtherLink;
+    int Extension;
+    int BestExtension;
+    int Pos;
+    //
+    if (Link && Link->AA)
+    {
+        Length = 1;
+        if (Length >= LongEnough)
+        {
+            return Length;
+        }
+    }
+    else
+    {
+        Length = 0;
+    }
+    Exon = Link->Exon;
+
+    // Iterate over bases in the exon, and add to our length:
+    if (ForwardFlag)
+    {
+        for (Pos = 0; Pos < Exon->Length; Pos++)
+        {
+            if (Exon->Sequence[Pos] == 'X')
+            {
+                return Length;
+            }
+            Length++;
+        }
+    }
+    else
+    {
+        for (Pos = Exon->Length - 1; Pos >= 0; Pos--)
+        {
+            if (Exon->Sequence[Pos] == 'X')
+            {
+                return Length;
+            }
+            Length++;
+        }
+    }
+    if (Length >= LongEnough)
+    {
+        return Length;
+    }
+
+    // Continue following edges:
+    if (ForwardFlag)
+    {
+        OtherLink = Exon->FirstForward;
+    }
+    else
+    {
+        OtherLink = Exon->FirstBack;
+    }
+    BestExtension = 0;
+    while (OtherLink)
+    {
+        Extension = GeneFindLongestExtension(Exon, OtherLink, LongEnough - Length, ForwardFlag);
+        if (Length + Extension >= LongEnough)
+        {
+            return (Length + Extension);
+        }
+        BestExtension = max(BestExtension, Extension);
+        OtherLink = OtherLink->Next;
+    }
+    return (Length + BestExtension);
+}
+
+// For each exon:
+// if its length is zero, we're done
+// if it's all stop codons, delete the exon
+// if it's all 'real' residues, we're done
+// if it starts with one or more stop codons, delete them (and back-edges)
+// otherwise, it starts with one or more real residues.  Split them to a separate exon.
+void PurgeNonCodingExonChunks(int ReverseFlag)
+{
+    GeneNode* GNode;
+    ExonNode* Exon;
+    ExonNode* NextExon = NULL;
+    ExonNode* NewExon;
+    int FirstReal;
+    int FirstStop;
+    char AA = 0;
+    ExonLink* Link;
+    ExonLink* OtherLink;
+    ExonLink* NextLink;
+    char* NewSequence;
+    int AminoIndex;
+    int AAEdgeBack;
+    int AAEdgeForward;
+    //
+    for (GNode = g_GeneFirst; GNode; GNode = GNode->Next)
+    {
+        Exon = GNode->Interval->FirstExon;
+        while (Exon)
+        {
+            // An EMPTY exon can be nuked:
+            if (Exon->Start == Exon->End)
+            {
+                NextExon = Exon->Next;
+                DeleteExon(Exon);
+                Exon = NextExon;
+                continue;
+            }
+            // This exon has no amino acids, but it has edges.  Keep it:
+            if (Exon->Length == 0)
+            {
+                Exon = Exon->Next;
+                continue;
+            }
+
+            // Loop over residues to find the first stop codon (-1 if none) and the first real codon (-1 if none)
+            FirstReal = -1;
+            FirstStop = -1;
+            for (AminoIndex = 0; AminoIndex < Exon->Length; AminoIndex++)
+            {
+                AA = Exon->Sequence[AminoIndex];
+                if (AA == 'X')
+                {
+                    if (FirstStop < 0)
+                    {
+                        FirstStop = AminoIndex;
+                    }
+                }
+                else
+                {
+                    if (FirstReal < 0)
+                    {
+                        FirstReal = AminoIndex;
+                    }
+                }
+            }
+            // Count the number of AAEdges (edges with an amino acid char attached) back and forward
+            AAEdgeBack = 0;
+            AAEdgeForward = 0;
+            for (Link = Exon->FirstBack; Link; Link = Link->Next)
+            {
+                if (Link->AA)
+                {
+                    AAEdgeBack++;
+                }
+            }
+            for (Link = Exon->FirstForward; Link; Link = Link->Next)
+            {
+                if (Link->AA)
+                {
+                    AAEdgeForward++;
+                }
+            }
+            //printf("FirstReal %d FirstStop %d EdgeBack %d EdgeForward %d\n", FirstReal, FirstStop, AAEdgeBack, AAEdgeForward);
+            if (FirstReal == -1)
+            {
+                // This exon contains nothing but stop codons!  
+                // We probably mustn't simply delete this exon, or else the aa-edges leading into and out of 
+                // it will be broken.  But let's truncate its sequence, and delete any
+                // non-AA edges.
+                if (AAEdgeBack)
+                {
+                    NewExon = (ExonNode*)calloc(1, sizeof(ExonNode));
+                    NewExon->Interval = Exon->Interval;
+                    NewExon->Length = 0;
+                    if (ReverseFlag)
+                    {
+                        NewExon->Start = Exon->End - strlen(Exon->Prefix);
+                        NewExon->End = Exon->End;                        
+                    }
+                    else
+                    {
+                        NewExon->Start = Exon->Start;
+                        NewExon->End = Exon->Start + strlen(Exon->Prefix);
+                    }
+                    strcpy(NewExon->Prefix, Exon->Prefix);
+                    // Assimilate all edges with AA into the new exon:
+                    Link = Exon->FirstBack;
+                    while (Link)
+                    {
+                        if (!Link->AA)
+                        {
+                            NextLink = Link->Next;
+                            DeleteExonLink(Exon, Link, 0);
+                            Link = NextLink;
+                            continue;
+                        }
+                        // Fix the reciprocal links to point to the new exon:
+                        for (OtherLink = Link->Exon->FirstForward; OtherLink; OtherLink = OtherLink->Next)
+                        {
+                            if (OtherLink->Exon == Exon && OtherLink->AA == Link->AA)
+                            {
+                                OtherLink->Exon = NewExon;
+                            }
+                        }
+                        if (NewExon->FirstBack)
+                        {
+                            NewExon->LastBack->Next = Link;
+                        }
+                        else
+                        {
+                            NewExon->FirstBack = Link;
+                        }
+                        NewExon->LastBack = Link;
+                        Link = Link->Next;
+                    }
+                    if (NewExon->LastBack)
+                    {
+                        NewExon->LastBack->Next = NULL;
+                    }
+                    Exon->FirstBack = NULL;
+                    Exon->LastBack = NULL;
+                    GNode->Interval->LastExon->Next = NewExon;
+                    GNode->Interval->LastExon = NewExon;
+                }
+                if (AAEdgeForward)
+                {
+                    NewExon = (ExonNode*)calloc(1, sizeof(ExonNode));
+                    NewExon->Interval = Exon->Interval;
+                    NewExon->Length = 0;
+                    if (ReverseFlag)
+                    {
+                        NewExon->Start = Exon->Start;
+                        NewExon->End = Exon->Start + strlen(Exon->Suffix);
+                        strcpy(NewExon->Suffix, Exon->Suffix);
+                    }
+                    else
+                    {
+                        NewExon->Start = Exon->End - strlen(Exon->Suffix);
+                        NewExon->End = Exon->End;
+                        strcpy(NewExon->Suffix, Exon->Suffix);
+                    }
+                    // Assimilate all edges with AA into the new exon:
+                    Link = Exon->FirstForward;
+                    while (Link)
+                    {
+                        if (!Link->AA)
+                        {
+                            NextLink = Link->Next;
+                            DeleteExonLink(Exon, Link, 1);
+                            Link = NextLink;
+                            continue;
+                        }
+                        // Fix the reciprocal links to point to the new exon:
+                        for (OtherLink = Link->Exon->FirstBack; OtherLink; OtherLink = OtherLink->Next)
+                        {
+                            if (OtherLink->Exon == Exon && OtherLink->AA == Link->AA)
+                            {
+                                OtherLink->Exon = NewExon;
+                            }
+                        }
+                        if (NewExon->FirstForward)
+                        {
+                            NewExon->LastForward->Next = Link;
+                        }
+                        else
+                        {
+                            NewExon->FirstForward = Link;
+                        }
+                        NewExon->LastForward = Link;
+                        Link = Link->Next;
+                    }
+                    if (NewExon->LastForward)
+                    {
+                        NewExon->LastForward->Next = NULL;
+                    }
+                    Exon->FirstForward = NULL;
+                    Exon->LastForward = NULL;
+                    GNode->Interval->LastExon->Next = NewExon;
+                    GNode->Interval->LastExon = NewExon;
+                }
+                NextExon = Exon->Next;
+                DeleteExon(Exon);
+                Exon = NextExon;
+                continue;
+            } // if exon contains only stop codons
+            if (FirstStop == -1)
+            {
+                // This exon contains no stop codons.  Leave it alone, move on.
+                Exon = Exon->Next;
+                continue;
+            }
+            if (FirstStop == 0)
+            {
+                // This exon begins with one or more stop codons.  Delete all back edges except
+                // those containing an amino acid:
+                if (AAEdgeBack)
+                {
+                    NewExon = (ExonNode*)calloc(1, sizeof(ExonNode));
+                    NewExon->Interval = Exon->Interval;
+                    NewExon->Length = 0;
+                    if (ReverseFlag)
+                    {
+                        NewExon->Start = Exon->End - strlen(Exon->Prefix);
+                        NewExon->End = Exon->End;
+                    }
+                    else
+                    {
+                        NewExon->Start = Exon->Start;
+                        NewExon->End = Exon->Start + strlen(Exon->Prefix);
+                    }
+                    strcpy(NewExon->Prefix, Exon->Prefix);
+                    // Assimilate all edges with AA into the new exon:
+                    Link = Exon->FirstBack;
+                    while (Link)
+                    {
+                        if (!Link->AA)
+                        {
+                            NextLink = Link->Next;
+                            DeleteExonLink(Exon, Link, 0);
+                            Link = NextLink;
+                            continue;
+                        }
+                        // Fix the reciprocal links to point to the new exon:
+                        for (OtherLink = Link->Exon->FirstForward; OtherLink; OtherLink = OtherLink->Next)
+                        {
+                            if (OtherLink->Exon == Exon)
+                            {
+                                OtherLink->Exon = NewExon;
+                            }
+                        }
+                        if (NewExon->FirstBack)
+                        {
+                            NewExon->LastBack->Next = Link;
+                        }
+                        else
+                        {
+                            NewExon->FirstBack = Link;
+                        }
+                        NewExon->LastBack = Link;
+                        Link = Link->Next;
+                    }
+                    if (NewExon->LastBack)
+                    {
+                        NewExon->LastBack->Next = NULL;
+                    }
+                    Exon->FirstBack = NULL;
+                    Exon->LastBack = NULL;
+                    GNode->Interval->LastExon->Next = NewExon;
+                    GNode->Interval->LastExon = NewExon;
+                }
+                else
+                {
+                    Link = Exon->FirstBack;
+                    while (Link)
+                    {
+                        NextLink = Link->Next;
+                        DeleteExonLink(Exon, Link, 0);
+                        Link = NextLink;
+                    }
+                }
+                // Delete the exon's prefix, and move its start position up:
+                if (ReverseFlag)
+                {
+                    Exon->End -= strlen(Exon->Prefix) + 3 * FirstReal;
+                }
+                else
+                {
+                    Exon->Start += strlen(Exon->Prefix) + 3 * FirstReal;
+                }
+                Exon->Prefix[0] = '\0';
+                NewSequence = (char*)calloc(Exon->Length - FirstReal + 1, sizeof(char)); // 1 byte for null
+                strcpy(NewSequence, Exon->Sequence + FirstReal);
+                SafeFree(Exon->Sequence);
+                Exon->Sequence = NewSequence;
+                Exon->Length = Exon->Length - FirstReal;
+                // We'll revisit this exon in the next loop pass, in case it has more stop codons later on.
+                continue;
+            }  // if sequence starts with stop codon
+
+            // This exon contains a stop codon, preceded by 'real' AAs.  Build a new exon 
+            // to hold our suffix.  The old exon gets truncated and gets its genomic 
+            // pos changed.
+            NewExon = (ExonNode*)calloc(1, sizeof(ExonNode));
+            NewExon->Interval = Exon->Interval;
+            NewExon->Length = Exon->Length - FirstStop - 1;
+            NewExon->Sequence = (char*)calloc(NewExon->Length + 1, sizeof(char));
+            strcpy(NewExon->Sequence, Exon->Sequence + FirstStop + 1);
+            if (ReverseFlag)
+            {
+                NewExon->Start = Exon->Start;
+                NewExon->End = Exon->End - strlen(Exon->Prefix) - (FirstStop + 1) * 3;
+                Exon->Start = NewExon->End + 3;
+            }
+            else
+            {
+                NewExon->End = Exon->End;
+                NewExon->Start = Exon->Start + strlen(Exon->Prefix) + (FirstStop + 1) * 3;
+                Exon->End = NewExon->Start - 3;
+            }
+
+            NewExon->FirstForward = Exon->FirstForward;
+            NewExon->LastForward = Exon->LastForward;
+            for (Link = Exon->FirstForward; Link; Link = Link->Next)
+            {
+                for (OtherLink = Link->Exon->FirstBack; OtherLink; OtherLink = OtherLink->Next)
+                {
+                    if (OtherLink->Exon == Exon)
+                    {
+                        OtherLink->Exon = NewExon;
+                    }
+                }
+            }
+            Exon->FirstForward = NULL;
+            Exon->LastForward = NULL;
+            Exon->Sequence[FirstStop] = '\0';
+            Exon->Length = FirstStop;
+            strcpy(NewExon->Suffix, Exon->Suffix);
+            Exon->Suffix[0] = '\0';
+            GNode->Interval->LastExon->Next = NewExon;
+            GNode->Interval->LastExon = NewExon;
+            Exon = Exon->Next;
+            continue;
+        } // Iteration over exons
+    } // Iteration over GNodes (intervals)
+} // PurgeNonCodingExonChunks
+
+int SetExonLinkExtensionLengthsBack(ExonNode* Exon, int MinimumORFLength, int IncludeBody)
+{
+    ExonLink* Link;
+    int Length;
+    int Pos;
+    //
+
+    // First case: We're starting INTO the exon sequence.  We may stop partway.
+    if (IncludeBody && Exon->Sequence)
+    {
+        Length = 0;
+        for (Pos = Exon->Length - 1; Pos >= 0; Pos--)
+        {
+            if (Exon->Sequence[Pos] == 'X')
+            {
+                return Length;
+                break;
+            }
+            Length++;
+            if (Length >= MinimumORFLength)
+            {
+                // That's long enough already!
+                return Length;
+            }
+        }
+    }
+    else
+    {
+        Length = 0;
+    }
+
+    if (Exon->MaxBackOverall != -1)
+    {
+        return Length + Exon->MaxBackOverall;
+    }
+
+    // Set Link->MaxLength for each link back.  We always do this when we're called with
+    // IncludeBody == 0, we MAY do it for IncludeBody == 1 (as necessary)
+    Exon->MaxBackOverall = 0;
+    for (Link = Exon->FirstBack; Link; Link = Link->Next)
+    {
+        // If the extension for the link is already known, note it and continue
+        if (Link->MaxLength != -1)
+        {
+            Exon->MaxBackOverall = max(Exon->MaxBackOverall, Link->MaxLength);
+            continue;
+        }
+        if (Link->AA)
+        {
+            Link->MaxLength = 1;
+        }
+        else
+        {
+            Link->MaxLength = 0;
+        }
+        Link->MaxLength += SetExonLinkExtensionLengthsBack(Link->Exon, MinimumORFLength, 1);
+        Exon->MaxBackOverall = max(Exon->MaxBackOverall, Link->MaxLength);
+    }
+    return Length + Exon->MaxBackOverall;
+}
+
+int SetExonLinkExtensionLengthsForward(ExonNode* Exon, int MinimumORFLength, int IncludeBody)
+{
+    ExonLink* Link;
+    int Length;
+    int Pos;
+    //
+
+    // First case: We're starting INTO the exon sequence.  We may stop partway.
+    if (IncludeBody && Exon->Sequence)
+    {
+        Length = 0;
+        for (Pos = 0; Pos < Exon->Length; Pos++)
+        {
+            if (Exon->Sequence[Pos] == 'X')
+            {
+                return Length;
+                break;
+            }
+            Length++;
+            if (Length >= MinimumORFLength)
+            {
+                // That's long enough already!
+                return Length;
+            }
+        }
+    }
+    else
+    {
+        Length = 0;
+    }
+
+    if (Exon->MaxForwardOverall != -1)
+    {
+        return Length + Exon->MaxForwardOverall;
+    }
+
+    // Set Link->MaxLength for each link back.  We always do this when we're called with
+    // IncludeBody==0, we MAY do it for IncludeBody==1 (as necessary)
+    Exon->MaxForwardOverall = 0;
+    for (Link = Exon->FirstForward; Link; Link = Link->Next)
+    {
+        // If the extension for the link is already known, note it and continue
+        if (Link->MaxLength != -1)
+        {
+            Exon->MaxForwardOverall = max(Exon->MaxForwardOverall, Link->MaxLength);
+            continue;
+        }
+        if (Link->AA)
+        {
+            Link->MaxLength = 1;
+        }
+        else
+        {
+            Link->MaxLength = 0;
+        }
+        Link->MaxLength += SetExonLinkExtensionLengthsForward(Link->Exon, MinimumORFLength, 1);
+        Exon->MaxForwardOverall = max(Exon->MaxForwardOverall, Link->MaxLength);
+    }
+    return Length + Exon->MaxForwardOverall;
+}
+
+// Short open reading frame pruning:
+// - Determine the maximum length of each reading frame attainable by linking forward along the graph F1...Fm
+// - Determine the maximum length of each reading frame attainable by linking backward along the graph B1...Bn
+// - If there are no stop codons: 
+//   If len + max(F1...Fm) + max(B1...Bn) < N, kill the exon and all links
+//   Else:
+//     If len + max(B1...Bn) + Fi < N, kill forward link i
+//     If len + max(F1...Fm) + Bi < N, kill backward link i
+// - Let S be the length of the suffix (all AAs after the last stop codon)
+//   If S + max(F1...Fm) < N, mask S and remove all forward links
+//   Else if S + Fi < N, remove Fi
+// - Let P be the length of the prefix (all AAs up to the first stop codon)
+//   If P + max(B1...Bn) < N, mask P and remove all backward links
+//   Else if P + Bi < N, remove Bi
+void PruneShortORFs(int ReverseFlag, int MinimumORFLength)
+{
+    GeneNode* GNode;
+    ExonNode* Exon;
+    ExonNode* NextExon = NULL;
+    ExonLink* Link;
+    ExonLink* NextLink;
+    int LinkIndex;
+    int PrefixLength;
+    int SuffixLength;
+    int Pos;
+    char* NewSequence;
+    int CutsPerformed;
+    int Flag;
+    int GeneNodeIndex = 0;
+    int ExonIndex;
+    //
+    // if MinOrfLength <= 0, then don't filter.
+    if (MinimumORFLength <= 0)
+    {
+        return;
+    }
+    // Iterate over all exons in all intervals.  Init the link lengths.
+    for (GNode = g_GeneFirst; GNode; GNode = GNode->Next)
+    {
+        for (Exon = GNode->Interval->FirstExon; Exon; Exon = Exon->Next)
+        {
+            for (Link = Exon->FirstForward; Link; Link = Link->Next)
+            {
+                Link->MaxLength = -1;
+            }
+            for (Link = Exon->FirstBack; Link; Link = Link->Next)
+            {
+                Link->MaxLength = -1;
+            }
+            // Exon->MaxBackOverall is set to -1 to indicate that it hasn't been
+            // processed yet.
+            Exon->MaxBackOverall = -1;
+            Exon->MaxForwardOverall = -1;
+        }
+    }
+    // Iterate over all exons in all intervals.  Set the max lengths of all
+    // their links.
+    for (GNode = g_GeneFirst; GNode; GNode = GNode->Next)
+    {
+        for (Exon = GNode->Interval->FirstExon; Exon; Exon = Exon->Next)
+        {
+            if (Exon->MaxBackOverall == -1)
+            {
+                SetExonLinkExtensionLengthsBack(Exon, MinimumORFLength, 0);
+            }
+            if (Exon->MaxForwardOverall == -1)
+            {
+                SetExonLinkExtensionLengthsForward(Exon, MinimumORFLength, 0);
+            }
+        }
+    }
+
+    for (GNode = g_GeneFirst; GNode; GNode = GNode->Next)
+    {
+        //printf("Start gene node #%d\n", GeneNodeIndex);
+        Exon = GNode->Interval->FirstExon;
+        ExonIndex = 0;
+        while (Exon)
+        {
+            //printf("Start GeneNode#%d exon#%d\n", GeneNodeIndex, ExonIndex);
+            ExonIndex++;
+            
+            // Measure the exon, its prefix, and its suffix.
+            PrefixLength = 0;
+            SuffixLength = 0;
+            if (Exon->Sequence)
+            {
+                for (Pos = 0; Pos < Exon->Length; Pos++)
+                {
+                    if (Exon->Sequence[Pos] == 'X')
+                    {
+                        break;
+                    }
+                    PrefixLength++;
+                }
+                for (Pos = Exon->Length - 1; Pos >= 0; Pos--)
+                {
+                    if (Exon->Sequence[Pos] == 'X')
+                    {
+                        break;
+                    }
+                    SuffixLength++;
+                }
+            }
+            // Consider removing the exon entirely:
+            if (Exon->Length + Exon->MaxBackOverall + Exon->MaxForwardOverall < MinimumORFLength)
+            {
+                //printf("*Delete the exon entirely!\n");
+                // Zap!  Free the exon, and its links.
+                NextExon = Exon->Next;
+                DeleteExon(Exon);
+                Exon = NextExon;
+                continue;
+            }
+
+            if (PrefixLength == Exon->Length)
+            {
+                // This exon contains no stop codons.  And we cannot delete it entirely,
+                // but we can perhaps still prune a few links.
+                // Try removing forward links:
+                Link = Exon->FirstForward;
+                LinkIndex = 0;
+                while (Link)
+                {
+                    if (Exon->MaxBackOverall + Exon->Length + Link->MaxLength < MinimumORFLength)
+                    {
+                        // This link can't be part of a full-length ORF.  So, let's remove the link:
+                        NextLink = Link->Next;
+                        //printf("*Delete a forward link\n");
+                        DeleteExonLink(Exon, Link, 1);
+                        Link = NextLink;
+                        LinkIndex++;
+                        continue;
+                    }
+                    LinkIndex++;
+                    Link = Link->Next;
+                }
+                // Try removing backward links:
+                Link = Exon->FirstBack;
+                LinkIndex = 0;
+                while (Link)
+                {
+                    if (Exon->MaxForwardOverall + Exon->Length + Link->MaxLength < MinimumORFLength)
+                    {
+                        // This link can't be part of a full-length ORF.  So, let's remove the link:
+                        NextLink = Link->Next;
+                        //printf("*Delete a backward link\n");
+                        DeleteExonLink(Exon, Link, 0);
+                        Link = NextLink;
+                        LinkIndex++;
+                        continue;
+                    }
+                    LinkIndex++;
+                    Link = Link->Next;
+                }
+            }
+            else
+            {
+                // This exon contains at least one stop codon.  We'll consider pruning the 
+                // prefix (everything up to and including the stop), the suffix (the stop
+                // and everything beyond it).
+                CutsPerformed = 0;
+                // First, consider removing the prefix (or some incoming links):
+                if (PrefixLength + Exon->MaxBackOverall < MinimumORFLength)
+                {
+                    // We can cut the prefix!  First delete all backward links:
+                    Link = Exon->FirstBack;
+                    while (Link)
+                    {
+                        NextLink = Link->Next;
+                        DeleteExonLink(Exon, Link, 0);
+                        Link = NextLink;
+                    }
+                    // If there's at least one non-stop character after the prefix,
+                    // then the exon still has a sequence:
+                    if (Exon->Length > PrefixLength + 1)
+                    {
+                        NewSequence = (char*)calloc(Exon->Length - PrefixLength, sizeof(char)); 
+                        strcpy(NewSequence, Exon->Sequence + PrefixLength + 1);
+                        Exon->Length = Exon->Length - PrefixLength - 1;
+                        SafeFree(Exon->Sequence);
+                        Exon->Sequence = NewSequence;
+                        // Fix the genomic start coordinate:
+                        if (ReverseFlag)
+                        {
+                            Exon->End -= strlen(Exon->Prefix) + (PrefixLength + 1)*3;
+                        }
+                        else
+                        {
+                            Exon->Start += strlen(Exon->Prefix) + (PrefixLength + 1)*3;
+                        }
+                    }
+                    else
+                    {
+                        // The exon's body is gone!
+                        if (ReverseFlag)
+                        {
+                            Exon->End -= strlen(Exon->Prefix) + Exon->Length * 3;
+                        }
+                        else
+                        {
+                            Exon->Start += strlen(Exon->Prefix) + Exon->Length * 3;
+                        }
+                        Exon->Length = 0;
+                        SafeFree(Exon->Sequence);
+                        Exon->Sequence = NULL;
+                    }
+                    CutsPerformed++;
+                    Exon->Prefix[0] = '\0';
+                }
+                // Consider removing the suffix (or some outgoing links):
+                if (SuffixLength + Exon->MaxForwardOverall < MinimumORFLength)
+                {
+                    // Delete all forward links:
+                    //printf("*Delete all forward links\n");
+                    Link = Exon->FirstForward;
+                    while (Link)
+                    {
+                        NextLink = Link->Next;
+                        DeleteExonLink(Exon, Link, 1);
+                        Link = NextLink;
+                    }
+                    if (Exon->Length > SuffixLength + 1)
+                    {
+                        NewSequence = (char*)calloc(Exon->Length - SuffixLength, sizeof(char));
+                        strncpy(NewSequence, Exon->Sequence, Exon->Length - SuffixLength - 1);
+                        Exon->Length = Exon->Length - SuffixLength - 1;
+                        NewSequence[Exon->Length] = '\0';
+                        SafeFree(Exon->Sequence);
+                        Exon->Sequence = NewSequence;
+                        if (ReverseFlag)
+                        {
+                            Exon->Start += strlen(Exon->Suffix) + (SuffixLength + 1) * 3;
+                        }
+                        else
+                        {
+                            Exon->End -= strlen(Exon->Suffix) + (SuffixLength + 1) * 3;
+                        }
+                    }
+                    else
+                    {
+                        // The exon's body is gone!
+                        if (ReverseFlag)
+                        {
+                            Exon->Start += strlen(Exon->Suffix) + Exon->Length * 3;
+                        }
+                        else
+                        {
+                            Exon->End -= strlen(Exon->Suffix) + Exon->Length * 3;
+                        }
+                        Exon->Length = 0;
+                        SafeFree(Exon->Sequence);
+                        Exon->Sequence = NULL;
+
+                    }
+                    Exon->Suffix[0] = '\0';
+                    CutsPerformed++;
+                }
+                // If we removed the exon body, and there's no prefix or suffix left, then cut the exon itself:
+                if (Exon->Start == Exon->End)
+                {
+                    NextExon = Exon->Next;
+                    DeleteExon(Exon);
+                    Exon = NextExon;
+                    continue;
+                }
+                // If we cut a prefix and cut a suffix, then we have no links, and it's entirely possible 
+                // that no real sequence remains. Check to see that we have a non-stop-codon:
+                if (CutsPerformed == 2) 
+                {
+                    Flag = 0;
+                    for (Pos = 0; Pos < Exon->Length; Pos++)
+                    {
+                        if (Exon->Sequence[Pos] != 'X')
+                        {
+                            Flag = 1;
+                            break;
+                        }
+                    }
+                    if (!Flag)
+                    {
+                        NextExon = Exon->Next;
+                        //printf("*Delete the exon itself\n");
+                        DeleteExon(Exon);
+                        Exon = NextExon;
+                        continue;
+                    }
+                }
+                // Even if we didn't cut the prefix (or suffix) outright, we may be able to remove
+                // some incoming and outgoing links:
+                Link = Exon->FirstBack;
+                LinkIndex = 0;
+                while (Link)
+                {
+                    if (Link->MaxLength + PrefixLength < MinimumORFLength)
+                    {
+                        NextLink = Link->Next;
+                        //printf("*Delete ONE backward link\n");
+                        DeleteExonLink(Exon, Link, 0);
+                        Link = NextLink;
+                        LinkIndex++;
+                        continue;
+                    }
+                    Link = Link->Next;
+                    LinkIndex++;
+                }
+                Link = Exon->FirstForward;
+                LinkIndex = 0;
+                while (Link)
+                {
+                    if (Link->MaxLength + SuffixLength < MinimumORFLength)
+                    {
+                        NextLink = Link->Next;
+                        //printf("*Delete ONE forward link\n");
+                        DeleteExonLink(Exon, Link, 1);
+                        Link = NextLink;
+                        LinkIndex++;
+                        continue;
+                    }
+                    Link = Link->Next;
+                    LinkIndex++;
+                }
+            } // if the exon contains a stop codon
+            Exon = Exon->Next;
+        } // exon loop
+        GeneNodeIndex++;
+    } // gene node loop
+}
+
+// Prepare a splice-db for a particular target gene.  We'll read all the intervals for the given
+// chromosome number, perform the merge and interstect algorithms, and then construct a set 
+// of genomic intervals which 'satisfy' the target.
+void PrepareOneGeneSpliceDB(int ChromosomeNumber, int ReverseFlag, int IntervalStart, 
+    int IntervalEnd, char* CustomFileName, char* GeneName, int MinORFLength)
+{
+    char FileName[1024];
+    char GenomeFileName[1024];
+    FILE* GenomicFile;
+    FILE* CustomFile;
+    GeneNode* GNode;
+    IntervalNode* Interval;
+    int SatisfiedOne;
+    char ReverseChar;
+    //
+    if (ReverseFlag)
+    {
+        ReverseChar = '-';
+    }
+    else
+    {
+        ReverseChar = '+';
+    }
+    sprintf(FileName, "NewSpliceDB\\%d%c.filtered", ChromosomeNumber, ReverseChar);
+    //ParseIntervalsFlatFile(FileName, -1);
+    ParseIntervalsESTBinaryFile(FileName);
+    
+    /////////////////////////////////////////////////////////////////
+    //// For purposes of debugging, we can trim the list of intervals a bit.  (Debug printout of a whole
+    //// chromosome is unwieldy!)  In production, we MUST NOT trim, because one of the intervals in the
+    //// master-interval may be linked to a far-away interval.
+    //PruneEdge = IntervalStart - 5000;
+    //while (g_FirstInterval->End < PruneEdge)
+    //{
+    //    RemoveInterval(g_FirstInterval, 0);
+    //}
+    //PruneEdge = IntervalEnd + 5000;
+    //while (g_LastInterval->Start > PruneEdge)
+    //{
+    //    RemoveInterval(g_LastInterval, 0);
+    //}
+
+    printf("BEFORE merge:\n");
+    DebugPrintIntervals(1, 1, -1, -1);
+    MergeIntervals();
+    printf("AFTER merge:\n");
+    DebugPrintIntervals(1, 2, -1, -1);
+    IntersectIntervals();
+    printf("AFTER intersect:\n");
+    DebugPrintIntervals(1, 3, -1, -1);
+
+    //sprintf(GenomeFileName, "C:\\source\\Bafna\\Splice\\chromFa\\chr%d.trie", ChromosomeNumber);
+    sprintf(GenomeFileName, "e:\\Chromosome\\chr%d.trie", ChromosomeNumber);
+    GenomicFile = fopen(GenomeFileName, "rb");
+    CustomFile = fopen(CustomFileName, "wb");
+    // Create the gene node list.  First, add every interval that overlaps
+    // the requested 'master interval':
+    for (Interval = g_FirstInterval; Interval; Interval = Interval->Next)
+    {
+        if (Interval->Start > IntervalEnd)
+        {
+            break;
+        }
+        if (Interval->End < IntervalStart)
+        {
+            continue;
+        }
+        GNode = (GeneNode*)calloc(1, sizeof(GeneNode));
+        GNode->Interval = Interval;
+        Interval->GNode = GNode;
+        if (g_GeneFirst)
+        {
+            g_GeneLast->Next = GNode;
+            GNode->Prev = g_GeneLast;
+        }
+        else
+        {
+            g_GeneFirst = GNode;
+        }
+        g_GeneLast = GNode;
+    }
+    // Iterate: Find the first interval overlapping the master which is not satisfied.  Then, satisfy it.
+    // Break after every interval overlapping the master has been satisfied.
+    while (1)
+    {
+        SatisfiedOne = 0;
+        for (GNode = g_GeneFirst; GNode; GNode = GNode->Next)
+        {
+            if (GNode->Interval->End < IntervalStart || GNode->Interval->Start > IntervalEnd)
+            {
+                continue; // We needn't satisfy this one, since it's not in the master-interval.
+            }
+            if (!GNode->Interval->Satisfied)
+            {
+                SatisfyIntervalForward(GNode, 0);
+                SatisfyIntervalBack(GNode, 0);
+                GNode->Interval->Satisfied = 1;
+                SatisfiedOne = 1;
+                break;
+            }
+        }
+        if (!SatisfiedOne)
+        {
+            // Everyone's happy, so stop now.
+            break;
+        }
+    }
+   
+    BuildAndWriteExons(GenomicFile, CustomFile, ReverseFlag, GeneName, ChromosomeNumber, MinORFLength);
+    fclose(GenomicFile);
+    fclose(CustomFile);
+
+    FreeGeneNodes();
+    // Free the interval list!
+    while (g_FirstInterval)
+    {
+        RemoveInterval(g_FirstInterval, 0);
+    }
+
+}
+
+// Parse a binary file listiing genomic intervals, with links between
+// them.  Convert it into an exon graph and write it out.
+void PrepareSpliceDB(int ChromosomeNumber, int ReverseFlag, int MinORFLength)
+{
+    FILE* StatsFile;
+    char ReverseChar;
+    char FileName[1024];
+    char ChromosomeFileName[1024];
+    char OutputFileName[1024];
+    int GeneCount;
+
+    if (ReverseFlag)
+    {
+        ReverseChar = '-';
+    }
+    else
+    {
+        ReverseChar = '+';
+    }
+    
+    ////////////////////////////////////////////////////////////////////////////
+    // HUMAN data sources:
+    // We can parse ESTs, or gene finder output, or BOTH.  (Both may be slow)
+    // ESTREF, if reference sequences are included, or EST, if only ESTs are included:
+    //sprintf(FileName, "ESTREF\\%d%c.filtered", ChromosomeNumber, ReverseChar); // %%% hard-coded path
+    sprintf(FileName, "EST\\%d%c.filtered", ChromosomeNumber, ReverseChar); // %%% hard-coded path
+    ParseIntervalsESTBinaryFile(FileName);
+    sprintf(FileName, "GeneFindDB\\%d%c.dat", ChromosomeNumber, ReverseChar); // %%% hard-coded path
+    ParseIntervalsGeneFindBinaryFile(FileName);
+    sprintf(ChromosomeFileName, "e:\\Chromosome\\chr%d.trie", ChromosomeNumber);
+    sprintf(OutputFileName, "ESTSpliceDB\\%d%c.dat", ChromosomeNumber, ReverseChar);
+
+    printf("BEFORE merge:\n");
+    DebugPrintIntervals(-1, 1, -1, -1); 
+    MergeIntervals();
+    printf("AFTER merge:\n");
+    DebugPrintIntervals(-1, 2, -1, -1); 
+    IntersectIntervals();
+    printf("AFTER intersect:\n");
+    DebugPrintIntervals(-1, 3, -1, -1); 
+    
+    GeneCount = WriteGenesForIntervals(ChromosomeFileName, OutputFileName, ChromosomeNumber, ReverseFlag, MinORFLength);
+    //WriteGenesForIntervals("C:\\source\\Bafna\\Splice\\chromFa\\chr11.trie", "ESTSpliceDB\\11-.dat", 11, 1);
+    StatsFile = fopen("SplicePrepStats.txt", "a+");
+    //fprintf(StatsFile, "Genes have been written out.  Statistics:\n");
+    fprintf(StatsFile, "%d\t", ChromosomeNumber);
+    fprintf(StatsFile, "%d\t", ReverseFlag);
+    fprintf(StatsFile, "%d\t", GeneCount);
+    fprintf(StatsFile, "%d\t", g_StatsIncompleteGeneCount);
+    fprintf(StatsFile, "%d\t", g_StatsLargestGeneSize);
+    fprintf(StatsFile, "%d\t", g_StatsLargestGeneRecordNumber);
+    fprintf(StatsFile, "%d\t", g_StatsIntervalsBeforeMerge);
+    fprintf(StatsFile, "%d\t", g_StatsEdgesBeforeMerge);
+    fprintf(StatsFile, "%d\t", g_StatsIntervalsAfterMerge);
+    fprintf(StatsFile, "%d\t", g_StatsEdgesAfterMerge);
+    fprintf(StatsFile, "%d\t", g_StatsIntervalsAfterIntersect);
+    fprintf(StatsFile, "%d\t", g_StatsEdgesAfterIntersect);
+    fprintf(StatsFile, "%d\t", g_StatsTotalExonsWritten);
+    fprintf(StatsFile, "%d\t", g_StatsTotalEdgesWritten);
+    printf("Exon counts:\n");
+    fprintf(StatsFile, "\t%d\t", g_PolymorphismCount);
+    printf("%d\t%d\t%d\t\t%d\t%d\t%d\t", g_UFTotalExons, g_UFTotalEdges, g_UFTotalAA, g_TotalExons, g_TotalEdges, g_TotalAA);
+    fprintf(StatsFile, "\t%d\t%d\t%d\t\t%d\t%d\t%d\t", g_UFTotalExons, g_UFTotalEdges, g_UFTotalAA, g_TotalExons, g_TotalEdges, g_TotalAA);
+    // How many exons are there...if you count adjacent exons as a single 'real' exon?
+    fprintf(StatsFile, "\t%d\t%d\t", g_TotalTrueExons, g_TotalTrueEdges);
+    fprintf(StatsFile, "\n");
+    fclose(StatsFile);
+    
+    // Free the interval list!
+    while (g_FirstInterval)
+    {
+        RemoveInterval(g_FirstInterval, 0);
+    }
+}
diff --git a/SpliceDB.h b/SpliceDB.h
new file mode 100644
index 0000000..5ee7f36
--- /dev/null
+++ b/SpliceDB.h
@@ -0,0 +1,150 @@
+//Title:          SpliceDB.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef SPLICEDB_H
+#define SPLICEDB_H
+
+#include <stdio.h>
+#include "Utils.h"
+#include "Trie.h"
+
+typedef struct GenomeDAGLink
+{
+    struct GenomeDAGNode* Node;
+    struct GenomeDAGLink* Next;
+    int Count;
+} GenomeDAGLink;
+
+#define MAX_DAG_NODE_LINKS 3
+
+typedef struct GenomeDAGNode
+{
+    int Start;
+    int End;
+    char* Sequence;
+    GenomeDAGLink* FirstForward;
+    GenomeDAGLink* FirstBack;
+    //GenomeDAGNode** Next[MAX_DAG_NODE_LINKS];
+    //GenomeDAGNode** Prev[MAX_DAG_NODE_LINKS];
+    struct ExonNode** Exons;
+    //GenomeDAGLink* FirstForward;
+    //GenomeDAGLink* FirstBack;
+} GenomeDAGNode;
+
+typedef struct IntervalNode
+{
+    int Start;
+    int End; // exclusive
+    int Occurrences;
+    int Satisfied;
+    unsigned int OriginalFilePos;
+    struct EdgeNode* FirstForward;
+    struct EdgeNode* LastForward;
+    struct EdgeNode* FirstBack;
+    struct EdgeNode* LastBack;
+    struct IntervalNode* Prev;
+    struct IntervalNode* Next;
+    struct ExonNode* FirstExon;
+    struct ExonNode* LastExon;
+    struct GeneNode* GNode; // non-null while this interval is in a pending gene.
+    int DAGNodeCount;
+    GenomeDAGNode* DAGNodes;
+    int Flags; // for keeping track of which reading frames we permit!
+} IntervalNode;
+
+typedef struct GeneNode
+{
+    IntervalNode* Interval;
+    // RX is the minimum covered length of any path originating at this interval and extending forward.
+    // We set RX during the 'satisfaction' process, so that we can note the (partial) satisfaction
+    // of intervals other than the seed.
+    // The 'covered' section of a path is the portion that's already part of the gene.
+    // RX is initialized to 0.  
+    // During satisfaction procedure: 
+    // If RX is big enough already, return without recursing.  Otherwise:
+    // RX is set to 9999 if we have no forward edges.
+    // Otherwise, RX is set to the minimum value of the outgoing edge's interval's length (plus return value of the recursive 
+    // satisfaction call, if any).
+    int RX;
+    int LX;
+    struct GeneNode* Prev;
+    struct GeneNode* Next;
+} GeneNode;
+
+// ExonNode is used while constructing the database.
+// In production, use ExonStruct from Spliced.h instead.
+typedef struct ExonNode
+{
+    IntervalNode* Interval;
+    struct ExonLink* FirstForward;
+    struct ExonLink* LastForward;
+    struct ExonLink* FirstBack;
+    struct ExonLink* LastBack;
+    struct ExonNode* Next;
+    char Prefix[3]; // one or two characters, and null-terminator
+    char Suffix[3];
+    int Index;
+    int Start; // start (in genomic coordinates, usually same as the parent interval's start)
+    int End; //end (in genomic coordinates)
+    int Length; // length in amino acids (not in genomic coordinates)
+    char* Sequence;
+    GenomeDAGNode* DAGNode;
+    int MaxForwardOverall;
+    int MaxBackOverall;
+
+} ExonNode;
+
+typedef struct ExonLink
+{
+    char AA;
+    ExonNode* Exon;
+    int Power;
+    // maximum peptide length achievable with this amino acid (if any) and the next exon,
+    // until stop codon or edge of graph.
+    int MaxLength; 
+    struct ExonLink* Next;
+} ExonLink;
+
+typedef struct EdgeNode
+{
+    int Count;
+    float Score;
+    IntervalNode* Interval;
+    struct EdgeNode* Prev;
+    struct EdgeNode* Next;
+} EdgeNode;
+
+void PrepareSpliceDB(int ChromosomeNumber, int ReverseFlag, int MinORFLength);
+void PrepareOneGeneSpliceDB(int ChromosomeNumber, int ReverseFlag, int IntervalStart, int IntervalEnd,
+    char* CustomFileName, char* GeneName, int MinORFLength);
+void TestSpliceDB(int argc, char** argv);
+#endif // SPLICEDB_H
diff --git a/SpliceScan.c b/SpliceScan.c
new file mode 100644
index 0000000..249723c
--- /dev/null
+++ b/SpliceScan.c
@@ -0,0 +1,1003 @@
+//Title:          SpliceScan.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+//
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+#include "CMemLeak.h"
+#include "Utils.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include "Errors.h"
+#include "Trie.h"
+#include "Inspect.h"
+#include "Spliced.h"
+#include "SpliceDB.h"
+
+// SpliceScan.c is not used in production.  Its job is to take a standard
+// database (.trie format), and check whether the proteins in that database
+// are present in a splice-tolerant database.  Because EST coverage is 
+// incomplete, we may be missing some exons from a gene, or missing some 
+// genes entirely.  And due to polymorphisms, errors
+// in gene and protein sequencing, there will be some minor differences.
+// We want to quantify how many genes are missing, and how extensive the
+// differences are.
+
+// We do the following for each protein:
+// - Take all 8-mers from the protein, put them in a trie.  
+//  ASSUMPTION: If the protein is present at all, there will be 8 consecutive residues 
+//   present without error
+//  ASSUMPTION: any proteins with length <8aa can be ignored (yes, IPI has records of length <8...)
+// - Use the trie to search each gene in the splice-tolerant database
+// - Count the number of leaves that were matched.  If the rate is above (near?) our best so far,
+//   flag all the characters that were matched.  Remember the percentage of characters matched,
+//   span from the first to the last, and which gene record was so good.
+//  ASSUMPTION: 8mers aren't repeated within a protein
+//  ASSUMPTION: Chance matches are very rare.  Our numbers will be distorted only slightly
+//   by considering 8mers that appear at the wrong position.  
+// - When you run out of genes, or when you get 95% or better match, report the best match.  
+//  Free the trie and start the next record.
+
+//#define SS_TRIE_DEBUG 1
+
+#define PROTEIN_NAME_BLOCK 81
+#define SS_BLOCK_SIZE 1000
+#define MAX_TRIE_NODE_COUNT 2000000
+int g_TrieNodeMatches[SS_BLOCK_SIZE];
+int TrieNodeHitFlags[MAX_TRIE_NODE_COUNT];
+int g_NextTrieLeafIndex;
+
+typedef struct SSTrieNode
+{
+    void* Children[26]; // SSTrieNodes or SSTrieLeafs
+    struct SSTrieNode* FailureNode;
+    int FailureDepth;
+#ifdef SS_TRIE_DEBUG
+    int Depth; // for debugging only!
+    char Buffer[16]; // for debugging only!
+#endif // SS_TRIE_DEBUG
+} SSTrieNode;
+
+typedef struct SSTrieLeafNode
+{
+    int ProteinNumber;
+    int ProteinPos;
+    struct SSTrieLeafNode* Next;
+} SSTrieLeafNode;
+
+typedef struct SSTrieLeaf
+{
+    struct SSTrieNode* FailureNode;
+    int FailureDepth;
+    // Index is a pointer into the array MAX_TRIE_NODE_COUNT.  We use an index
+    // instead of storing the hit flag in the leaf.  Why?  Because we have 
+    // to reset all the flags to zero after every gene record.  Doing so with 
+    // memset is much faster than traversing the trie!
+    int Index; 
+    SSTrieLeafNode* Head;
+#ifdef SS_TRIE_DEBUG
+    char Buffer[16]; // for debugging only!
+#endif // SS_TRIE_DEBUG
+} SSTrieLeaf;
+
+void SSTrieFailureNodeHelper(SSTrieNode* Root, char* Buffer, SSTrieNode* FailedNode, int Depth)
+{
+    int SuffixStart;
+    int BufferPos;
+    int FailureDepth;
+    SSTrieNode* FailureNode = NULL;
+    SSTrieNode* Node;
+    SSTrieLeaf* Leaf;
+    int AA;
+    //
+    ////////////////////////////////////////////////////////////////////////////
+    // Set this node's failure-node, by finding the longest proper suffix of Buffer
+    // that reaches a node:
+    if (Depth > 1)
+    {
+        for (SuffixStart = 1; SuffixStart < Depth; SuffixStart++)
+        {
+            Node = Root;
+            for (BufferPos = SuffixStart; BufferPos < Depth; BufferPos++)
+            {
+                Node = Node->Children[Buffer[BufferPos]];
+                if (!Node)
+                {
+                    break;
+                }
+            }
+            if (Node)
+            {
+                // The suffix matched!
+                FailureDepth = Depth - SuffixStart;
+                FailureNode = Node;
+                break;
+            }
+        }
+        if (!FailureNode)
+        {
+            FailureNode = Root;
+            FailureDepth = 0;
+        }
+        if (Depth == 8)
+        {
+            Leaf = (SSTrieLeaf*)FailedNode;
+            Leaf->FailureDepth = FailureDepth;
+            Leaf->FailureNode = FailureNode;
+        }
+        else
+        {
+            FailedNode->FailureDepth = FailureDepth;
+            FailedNode->FailureNode = FailureNode;
+        }
+    }
+    else
+    {
+        // A depth-1 node.  Always gets the root as its failure node:
+        FailedNode->FailureDepth = 0;
+        FailedNode->FailureNode = Root;
+    }
+    ////////////////////////////////////////////////////////////////////////////
+    // Set our children's failure-nodes:
+    if (Depth < 8)
+    {
+        for (AA = 0; AA < 26; AA++)
+        {
+            Node = FailedNode->Children[AA];
+            if (Node)
+            {
+                Buffer[Depth] = AA;
+                SSTrieFailureNodeHelper(Root, Buffer, Node, Depth + 1);
+            }
+        }
+    }
+}
+
+// Initialize all the failure nodes for the trie.
+void SetSSTrieFailureNodes(SSTrieNode* Root)
+{
+    char Buffer[16];
+    int AA;
+    SSTrieNode* Child;
+
+    // Root never gets a failure node:
+    Root->FailureDepth = 0;
+    Root->FailureNode = NULL;
+
+    // For other nodes: Populate a Buffer with your string.  Then find the 
+    // longest proper suffix of Buffer that reaches a node.
+    for (AA = 0; AA < 26; AA++)
+    {
+        Child = Root->Children[AA];
+        if (Child)
+        {
+            Buffer[0] = AA;
+            SSTrieFailureNodeHelper(Root, Buffer, Child, 1);
+        }
+    }
+}
+
+void FreeSSTrieNode(SSTrieNode* Root, int Depth)
+{
+    SSTrieLeaf* Leaf;
+    SSTrieLeafNode* Node;
+    SSTrieLeafNode* Prev = NULL;
+    int AA;
+    //
+    if (!Root)
+    {
+        return;
+    }
+    if (Depth == 8)
+    {
+        Leaf = (SSTrieLeaf*)Root;
+        for (Node = Leaf->Head; Node; Node = Node->Next)
+        {
+            SafeFree(Prev);
+            Prev = Node;
+        }
+        SafeFree(Prev);
+        SafeFree(Leaf);
+        return;
+    }
+    for (AA = 0; AA < 26; AA++)
+    {
+        if (Root->Children[AA])
+        {
+            FreeSSTrieNode(Root->Children[AA], Depth + 1);
+        }
+    }
+    SafeFree(Root);
+}
+
+SSTrieNode* ConstructSSTrie(char** Sequences, int BlockSize)
+{
+    SSTrieNode* Root;
+    SSTrieNode* CurrentNode;
+    SSTrieNode* NextNode;
+    SSTrieLeaf* Leaf;
+    SSTrieLeafNode* Node;
+    int Len;
+    int StartPos;
+    int PeptidePos;
+    int SequencePos;
+    int AA;
+    int ProteinNumber;
+    char* Sequence;
+    //
+    Root = (SSTrieNode*)calloc(1, sizeof(SSTrieNode));
+    g_NextTrieLeafIndex = 0;
+    for (ProteinNumber = 0; ProteinNumber < BlockSize; ProteinNumber++)
+    {
+        Sequence = Sequences[ProteinNumber];
+        Len = strlen(Sequence);
+        for (StartPos = 0; StartPos <= Len-8; StartPos++)
+        {
+            SequencePos = StartPos;
+            PeptidePos = 0;
+            CurrentNode = Root;
+            // Add nodes for the first n-1 positions:
+            for (PeptidePos = 0; PeptidePos < 7; PeptidePos++)
+            {
+                AA = Sequence[StartPos + PeptidePos] - 'A';
+                if (AA < 0 || AA > 25)
+                {
+                    break; // invalid character in protein sequence!
+                }
+                NextNode = CurrentNode->Children[AA];
+                if (!NextNode)
+                {
+                    NextNode = (SSTrieNode*)calloc(1, sizeof(SSTrieNode));
+                    CurrentNode->Children[AA] = NextNode;
+#ifdef SS_TRIE_DEBUG
+                    NextNode->Depth = PeptidePos + 1;
+                    strncpy(NextNode->Buffer, Sequence + StartPos, PeptidePos + 1);
+                    NextNode->Buffer[PeptidePos + 1] = '\0';
+#endif //SS_TRIE_DEBUG
+                }
+                CurrentNode = NextNode;
+            }
+            // Add a leaf node for the nth position:
+            AA = Sequence[StartPos + PeptidePos] - 'A';
+            if (AA < 0 || AA > 25)
+            {
+                continue; // invalid character in protein sequence!
+            }
+            Leaf = CurrentNode->Children[AA];
+            if (!Leaf)
+            {
+                Leaf = (SSTrieLeaf*)calloc(1, sizeof(SSTrieLeaf));
+                //Leaf->ProteinPos = StartPos + PeptidePos;
+                Leaf->Index = g_NextTrieLeafIndex++;
+                CurrentNode->Children[AA] = Leaf;
+                Leaf->Head = (SSTrieLeafNode*)calloc(1, sizeof(SSTrieLeafNode));
+                Leaf->Head->ProteinNumber = ProteinNumber;
+                Leaf->Head->ProteinPos = StartPos;
+#ifdef SS_TRIE_DEBUG
+                strncpy(Leaf->Buffer, Sequence + StartPos, 8);
+                Leaf->Buffer[8] = '\0';
+#endif
+            }
+            else
+            {
+                for (Node = Leaf->Head; Node->Next; Node = Node->Next)
+                {
+                    ;;
+                }
+                Node->Next = (SSTrieLeafNode*)calloc(1, sizeof(SSTrieLeafNode));
+                Node->Next->ProteinNumber = ProteinNumber;
+                Node->Next->ProteinPos = StartPos;
+            }
+        } // Loop on start positions
+    } // Loop on proteins
+    return Root;
+}
+
+int SSTrieCoverSequence(SSTrieNode* Root, char* MatchFlags, int Depth, int ProteinNumber)
+{
+    SSTrieLeaf* Leaf;
+    int AA;
+    int Sum = 0;
+    SSTrieNode* Child;
+    SSTrieLeafNode* Node;
+    int X;
+    //
+    if (Depth == 8)
+    {
+        Leaf = (SSTrieLeaf*)Root;
+        if (TrieNodeHitFlags[Leaf->Index])
+        {
+            for (Node = Leaf->Head; Node; Node = Node->Next)
+            {
+                if (Node->ProteinNumber == ProteinNumber)
+                {
+                    for (X = 0; X < 8; X++)
+                    {
+                        if (!MatchFlags[Node->ProteinPos + X])
+                        {
+                            Sum += 1;
+                            MatchFlags[Node->ProteinPos + X] = 1;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        for (AA = 0; AA < 26; AA++)
+        {
+            Child = Root->Children[AA];
+            if (Child)
+            {
+                Sum += SSTrieCoverSequence(Child, MatchFlags, Depth + 1, ProteinNumber);
+            }
+        }
+    }
+    return Sum;
+}
+
+
+// Recursive main function for scanning through splice-tolerant database with a trie.  
+void SSDatabaseScanHelper(ExonStruct* Exon, int Len, int Pos, SSTrieNode* Node, int Depth)
+{
+    SSTrieLeaf* Leaf;
+    int NextLen = 0;
+    int EdgeIndex;
+    SSTrieNode* NextNode;
+    SSTrieLeafNode* LeafNode;
+    ExonEdge* Edge;
+    //
+    if (!Node)
+    {
+        return;
+    }
+    if (Depth == 8)
+    {
+        Leaf = (SSTrieLeaf*)Node;
+        if (!TrieNodeHitFlags[Leaf->Index])
+        {
+            //Leaf->HitFlag = 1;
+            TrieNodeHitFlags[Leaf->Index] = 1;
+            for (LeafNode = Leaf->Head; LeafNode; LeafNode = LeafNode->Next)
+            {        
+                g_TrieNodeMatches[LeafNode->ProteinNumber]++;
+            }
+        }
+        return;
+    }
+    Len = Exon->Length;
+    if (Pos >= Len)
+    {
+        for (EdgeIndex = 0; EdgeIndex < Exon->ForwardEdgeCount; EdgeIndex++)
+        {
+            Edge = Exon->ForwardEdges + EdgeIndex;
+            if (Edge->AA)
+            {
+                NextNode = Node->Children[Edge->AA - 'A'];
+                if (NextNode)
+                {
+                    NextLen = Edge->Exon->Length;
+                    SSDatabaseScanHelper(Edge->Exon, NextLen, 0, NextNode, Depth + 1);
+                }
+            }
+            else
+            {
+                NextLen = Edge->Exon->Length;
+                SSDatabaseScanHelper(Edge->Exon, NextLen, 0, Node, Depth);
+            }
+        }
+    }
+    else
+    {
+        NextNode = Node->Children[Exon->Sequence[Pos] - 'A'];
+        if (NextNode)
+        {
+            SSDatabaseScanHelper(Exon, Len, Pos + 1, NextNode, Depth + 1);
+        }
+    }
+}
+
+void DebugPrintSSTrie(SSTrieNode* Node, int Depth, char* Buffer)
+{
+    int AA;
+    SSTrieNode* Child;
+#ifdef SS_TRIE_DEBUG
+    SSTrieNode* FailureNode;
+#endif
+    SSTrieLeafNode* LeafNode;
+    SSTrieLeaf* Leaf;
+    //
+#ifdef SS_TRIE_DEBUG
+    for (AA = 0; AA < Depth; AA++)
+    {
+        printf(" ");
+    }
+    if (Depth == 8)
+    {
+        Leaf = (SSTrieLeaf*)Node;
+        FailureNode = Leaf->FailureNode;
+        if (FailureNode)
+        {
+            printf("Leaf '%s' failure '%s' (depth %d)\n", Leaf->Buffer, FailureNode->Buffer, FailureNode->Depth);
+        }
+        else
+        {
+            printf("Leaf '%s' (NO FAILURE NODE)\n", Leaf->Buffer);
+        }
+    }
+    else
+    {
+        FailureNode = Node->FailureNode;
+        if (FailureNode)
+        {
+            printf("Node '%s' d%d failure node '%s' (depth %d)\n", Node->Buffer, Node->Depth, 
+                FailureNode->Buffer, FailureNode->Depth);
+        }
+        else
+        {
+            printf("Node '%s' d%d NO FAILURE NODE\n", Node->Buffer, Node->Depth);
+        }
+        for (AA = 0; AA < 26; AA++)
+        {
+            Child = Node->Children[AA];
+            if (Child)
+            {
+                Buffer[Depth] = 'A' + AA;
+                DebugPrintSSTrie(Child, Depth + 1, Buffer);
+            }
+        }
+    }
+    return;
+#endif
+    if (Depth < 8)
+    {
+        for (AA = 0; AA < 26; AA++)
+        {
+            Child = Node->Children[AA];
+            if (Child)
+            {
+                Buffer[Depth] = 'A' + AA;
+                DebugPrintSSTrie(Child, Depth + 1, Buffer);
+            }
+        }
+    }
+    Buffer[8] = '\0';
+    Leaf = (SSTrieLeaf*)Node;
+    for (LeafNode = Leaf->Head; LeafNode; LeafNode = LeafNode->Next)
+    {
+        printf("%s at pos %d in record #%d\n", Buffer, LeafNode->ProteinPos, LeafNode->ProteinNumber);
+    }
+}
+
+void SSDatabaseScanExon(ExonStruct* Exon, SSTrieNode* Root, int StartDepth)
+{
+    SSTrieNode* CurrentNode;
+    SSTrieNode* Child;
+    int StartPos;
+    int Pos;
+    int Depth;
+    int AA;
+    SSTrieLeaf* Leaf;
+    SSTrieLeafNode* LeafNode;
+    int LinkIndex;
+    ExonEdge* Edge;
+    //
+    CurrentNode = Root;
+    Pos = 0;
+    Depth = StartDepth;
+    //printf("\n--->Start exon scan: Exon len %d, start depth is %d\n", Exon->Length, StartDepth);
+
+    // It's possible that we started at a leaf, if an edge-AA finished us off.  If so,
+    // flag the match and return:
+    if (StartDepth == 8)
+    {
+        // We're at a leaf!  Flag this match:
+        Leaf = (SSTrieLeaf*)CurrentNode;
+        if (!TrieNodeHitFlags[Leaf->Index])
+        {
+            TrieNodeHitFlags[Leaf->Index] = 1;
+            for (LeafNode = Leaf->Head; LeafNode; LeafNode = LeafNode->Next)
+            {        
+                g_TrieNodeMatches[LeafNode->ProteinNumber]++;
+                //printf("At start of exon %d hit word %d\n", Exon->Index, LeafNode->ProteinPos); 
+            }
+            
+        }
+        return;
+    }
+    StartPos = 0;
+    while (1)
+    {
+        if (Pos >= Exon->Length)
+        {
+            // We've reached the end of the exon.  Follow all outgoing edges:
+            for (LinkIndex = 0; LinkIndex < Exon->ForwardEdgeCount; LinkIndex++)
+            {
+                Edge = Exon->ForwardEdges + LinkIndex;
+                AA = Edge->AA;
+                if (AA)
+                {
+                    Child = CurrentNode->Children[AA - 'A'];
+                    if (Child)
+                    {
+                        SSDatabaseScanExon(Edge->Exon, Child, Depth + 1);
+                    }
+                }
+                else
+                {
+                    SSDatabaseScanExon(Edge->Exon, CurrentNode, Depth);
+                }
+            }
+            // If we were already partway down the trie when we started this exon, return:
+            if (StartDepth)
+            {
+                return;
+            }
+            else
+            {
+                // Advance to the next starting point, and jump back to the root:
+                StartPos++;
+                if (StartPos >= Exon->Length)
+                {
+                    return; // done with all peptides that begin in this exon!
+                }
+                CurrentNode = Root;
+                Pos = StartPos;
+                Depth = 0;
+            }
+        }
+        //printf("%c Pos %d, current node '%s' depth %d=%d\n", Exon->Sequence[Pos], Pos, CurrentNode->Buffer, CurrentNode->Depth, Depth);
+        AA = Exon->Sequence[Pos] - 'A';
+        if (CurrentNode->Children[AA])
+        {
+            CurrentNode = CurrentNode->Children[AA];
+            Depth++;
+            if (Depth == 8)
+            {
+                // We're at a leaf!  Flag this match:
+                Leaf = (SSTrieLeaf*)CurrentNode;
+                if (!TrieNodeHitFlags[Leaf->Index])
+                {
+                    TrieNodeHitFlags[Leaf->Index] = 1;
+                    for (LeafNode = Leaf->Head; LeafNode; LeafNode = LeafNode->Next)
+                    {        
+                        g_TrieNodeMatches[LeafNode->ProteinNumber]++;
+                        //printf("At position %d in exon %d hit word %d\n", Pos, Exon->Index, LeafNode->ProteinPos); 
+                    }
+                    
+                }
+                // If our starting depth is >0, then don't go to a failure node,
+                // just return.  (We only use failure nodes when doing "normal"
+                // linear parsing along an exon)
+                if (StartDepth)
+                {
+                    return;
+                }
+                //Depth = Leaf->FailureDepth;
+                //CurrentNode = Leaf->FailureNode;
+                StartPos++;
+                Depth = 0;
+                CurrentNode = Root;
+                Pos = StartPos;
+                continue;
+            }
+            Pos++;
+        }
+        else
+        {
+            // Our match has ended.  Stop now, or use a failure node:
+            if (StartDepth)
+            {
+                return;
+            }
+            // If we are in the root now, then we should just advance by
+            // one character:
+            if (!Depth)
+            {
+                Pos++;
+                StartPos++;
+            }
+            else
+            {
+                //// We're not in the root, so we can use a failure node.
+                //// Pos stays where it is.
+                //Depth = CurrentNode->FailureDepth;
+                //CurrentNode = CurrentNode->FailureNode;
+                StartPos++;
+                CurrentNode = Root;
+                Pos = StartPos;
+                Depth = 0;
+            }
+        }
+    }
+}
+
+typedef struct SSMatchInfo
+{
+    int Coverage;
+    int RecordNumber;
+    int ChromosomeNumber;
+    int Strand;
+    int ApproximatePosition;
+    int CoverageStart;
+    int CoverageEnd;
+} SSMatchInfo;
+
+// From high to low coverage
+int CompareSSMatchInfo(const SSMatchInfo* a, const SSMatchInfo* b)
+{
+    if (a->Coverage > b->Coverage)
+    {
+        return -1;
+    }
+    if (a->Coverage < b->Coverage)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+// Keep the top n matches for each protein, where n is this number:
+#define MATCHES_PER_PROTEIN 5
+#define LAST_MATCH_FOR_PROTEIN 4
+
+// Main function: Given an array of protein Sequences (with names in NameBuffer) and the splicedb file name,
+// scan through the genes in the splice-db to find words from the proteins
+void SSDatabaseScanProteins(int FirstRecordNumber, char** Sequences, char* NameBuffer, char* SpliceDBFileName, 
+    int BlockSize, FILE* OutputFile)
+{
+    int ProteinNumber;
+    int MaxSequenceLength = 0;
+    SSTrieNode* Root;
+    SSMatchInfo* AllMatchInfo;
+    SSMatchInfo* MatchInfo;
+    int SequenceLengths[SS_BLOCK_SIZE];
+    int RecordNumber = 0;
+    char* MatchFlags;
+    FILE* SpliceDBFile;
+    GeneStruct* CurrentGene;
+    ExonStruct* Exon;
+    int EdgeIndex;
+    int AA;
+    int ExonIndex;
+    int Len;
+    int Coverage;
+    SSTrieNode* Node;
+    int GeneNumber;
+    ExonEdge* Edge;
+    int MatchIndex;
+    //
+    AllMatchInfo = (SSMatchInfo*)calloc(SS_BLOCK_SIZE * MATCHES_PER_PROTEIN, sizeof(SSMatchInfo));
+    for (ProteinNumber = 0; ProteinNumber < BlockSize; ProteinNumber++)
+    {
+        SequenceLengths[ProteinNumber] = strlen(Sequences[ProteinNumber]);
+        MaxSequenceLength = max(MaxSequenceLength, SequenceLengths[ProteinNumber]);
+    }
+    MatchFlags = (char*)calloc(MaxSequenceLength, sizeof(char));
+    Root = ConstructSSTrie(Sequences, BlockSize);
+    SetSSTrieFailureNodes(Root);
+    //DebugPrintSSTrie(Root, 0, DebugBuffer);
+    SpliceDBFile = fopen(SpliceDBFileName, "rb");
+    if (!SpliceDBFile)
+    {
+        REPORT_ERROR_S(8, SpliceDBFileName);
+        return;
+    }
+    memset(TrieNodeHitFlags, 0, sizeof(int) * MAX_TRIE_NODE_COUNT);
+    GeneNumber = 0;
+    while (1)
+    {
+        GeneNumber++;
+        if (GeneNumber%100 == 0)
+        {
+            printf("%d ", GeneNumber);
+        }
+        //ResetSSTrieFlags(Root, 0);
+        memset(TrieNodeHitFlags, 0, sizeof(int) * g_NextTrieLeafIndex);
+        memset(g_TrieNodeMatches, 0, sizeof(int) * SS_BLOCK_SIZE);
+        CurrentGene = LoadGene(SpliceDBFile);
+        if (!CurrentGene)
+        {
+            break;
+        }
+        // Iterate over exons:
+        for (ExonIndex = 0; ExonIndex < CurrentGene->ExonCount; ExonIndex++)
+        {
+            Exon = CurrentGene->Exons + ExonIndex;
+            Len = Exon->Length;
+
+            // Try starting a match with the incoming edge:
+            for (EdgeIndex = 0; EdgeIndex < Exon->BackEdgeCount; EdgeIndex++)
+            {
+                Edge = Exon->BackwardEdges + EdgeIndex;
+                AA = Edge->AA - 'A';
+                if (AA >= 0 && AA < 26)
+                {
+                    Node = Root->Children[AA];
+                    //SSDatabaseScanHelper(Exon, Len, 0, Node, 1);
+                    if (Node)
+                    {
+                        SSDatabaseScanExon(Exon, Node, 1);
+                    }
+                }
+            }
+            SSDatabaseScanExon(Exon, Root, 0);
+            //for (Pos = 0; Pos < Len; Pos++)
+            //{
+            //    SSDatabaseScanHelper(Exon, Len, Pos, Root, 0);
+            //}
+        }
+        // Rate the quality of the match, saving it if it's good:
+        for (ProteinNumber = 0; ProteinNumber < BlockSize; ProteinNumber++)
+        {
+            Coverage = g_TrieNodeMatches[ProteinNumber];
+            if (Coverage > SequenceLengths[ProteinNumber] * 0.1)
+            {
+                //printf("\nProtein #%d: %s\n", ProteinNumber, NameBuffer + (ProteinNumber * PROTEIN_NAME_BLOCK));
+                //DebugPrintGene(CurrentGene);
+                memset(MatchFlags, 0, sizeof(char) * MaxSequenceLength);
+                Coverage = SSTrieCoverSequence(Root, MatchFlags, 0, ProteinNumber);
+                //for (AA = 0; AA < SequenceLengths[ProteinNumber]; AA++)
+                //{
+                //    printf("%d\t%c\t%d\t\n", AA, Sequences[ProteinNumber][AA], MatchFlags[AA]);
+                //}
+                // If this coverage is better than the lowest-saved-coverage for the protein,
+                // then replace the lowest-saved-coverage and sort the list:
+                if (Coverage > AllMatchInfo[ProteinNumber * MATCHES_PER_PROTEIN + LAST_MATCH_FOR_PROTEIN].Coverage)
+                { 
+                    MatchInfo = AllMatchInfo + ProteinNumber * MATCHES_PER_PROTEIN + LAST_MATCH_FOR_PROTEIN;
+                    MatchInfo->Coverage = Coverage;
+                    MatchInfo->RecordNumber = RecordNumber;
+                    MatchInfo->ChromosomeNumber = CurrentGene->ChromosomeNumber;
+                    MatchInfo->Strand = CurrentGene->ForwardFlag;
+                    MatchInfo->ApproximatePosition = CurrentGene->Exons[0].Start;
+
+                    for (AA = 0; AA < SequenceLengths[ProteinNumber]; AA++)
+                    {
+                        if (MatchFlags[AA])
+                        {
+                            MatchInfo->CoverageStart = AA;
+                            //BestStartPos[ProteinNumber] = AA;
+                            break;
+                        }
+                    }
+                    for (AA = SequenceLengths[ProteinNumber] - 1; AA >= 0; AA--)
+                    {
+                        if (MatchFlags[AA])
+                        {
+                            MatchInfo->CoverageEnd = AA;
+                            //BestEndPos[ProteinNumber] = AA;
+                            break;
+                        }
+                    }
+                    qsort(AllMatchInfo + ProteinNumber * MATCHES_PER_PROTEIN, MATCHES_PER_PROTEIN, sizeof(SSMatchInfo), (QSortCompare)CompareSSMatchInfo);
+                }
+            }
+        }
+        RecordNumber += 1;
+        FreeGene(CurrentGene);
+        // If we've got 95% of the protein, then stop now - we probably won't 
+        // get any more!
+        //if (BestCoverage > 0.95*SequenceLength)
+        //{
+        //    break;
+        //}
+    }
+    // Print the match:
+    for (ProteinNumber = 0; ProteinNumber < BlockSize; ProteinNumber++)
+    {
+        fprintf(OutputFile, "%d\t", FirstRecordNumber + ProteinNumber);
+        fprintf(OutputFile, "%s\t", NameBuffer + (ProteinNumber * PROTEIN_NAME_BLOCK));
+        fprintf(OutputFile, "%d\t", SequenceLengths[ProteinNumber]);
+        for (MatchIndex = 0; MatchIndex < MATCHES_PER_PROTEIN; MatchIndex++)
+        {
+            MatchInfo = AllMatchInfo + ProteinNumber * MATCHES_PER_PROTEIN + MatchIndex;
+            if (MatchInfo->Coverage)
+            {
+                fprintf(OutputFile, "%d\t", MatchInfo->ChromosomeNumber);
+                fprintf(OutputFile, "%d\t", MatchInfo->Strand);
+                fprintf(OutputFile, "%d\t", MatchInfo->ApproximatePosition);
+                fprintf(OutputFile, "%d\t", MatchInfo->Coverage);
+            }
+        }
+        fprintf(OutputFile, "\n");
+    }
+    FreeSSTrieNode(Root, 0);
+
+    // Cleanup:
+    SafeFree(MatchFlags);
+    SafeFree(AllMatchInfo);
+}
+
+typedef struct SSHashNode
+{
+    char TrueSequence[8];
+    int ProteinIndex;
+    int ProteinPos;
+    int MatchFlag;
+    struct SSHashNode* Next;
+} SSHashNode;
+
+#define SS_HASH_MAX 5000000
+// Big hash:
+SSHashNode* SSHash[SS_HASH_MAX];
+
+void ClearSSHash()
+{
+    int HashIndex;
+    SSHashNode* Node;
+    SSHashNode* Prev;
+    // 
+    for (HashIndex = 0; HashIndex <  SS_HASH_MAX; HashIndex++)
+    {
+        Prev = NULL;
+        Node = SSHash[HashIndex];
+        while (Node)
+        {
+            SafeFree(Prev);
+            Prev = Node;
+            Node = Node->Next;
+        }
+        SafeFree(Prev);
+        SSHash[HashIndex] = NULL;
+    }
+}
+
+#define HASH_SEQUENCE(Buffer)\
+HashValue = 0;\
+for (X = 0; X < 8; X++)\
+{\
+    HashValue += Buffer[X] * X * X;\
+    HashValue %= SS_HASH_MAX;\
+}\
+    
+// Hashing *may* be faster than trie; it hasn't been implemented yet.
+void PopulateSSHash(char** SequenceBuffer, int BlockSize)
+{
+    int ProteinIndex;
+    int Pos;
+    int Len;
+    //
+    for (ProteinIndex = 0; ProteinIndex < BlockSize; ProteinIndex++)
+    {
+        Len = strlen(SequenceBuffer[ProteinIndex]);
+        for (Pos = 0; Pos < Len - 7; Pos++)
+        {
+        }
+    }
+    
+}
+
+// For more rapid scanning of proteins...let's use a hash instead of a trie.  
+void SSQDatabaseScanProteins(char** SequenceBuffer, char* NameBuffer, char* SpliceDBFileName, int BlockSize)
+{
+    ClearSSHash();
+    PopulateSSHash(SequenceBuffer, BlockSize);
+}
+
+// Main method:
+void SSDatabaseScan(char* TrieFileName, char* IndexFileName, char* SpliceDBFileName,
+    int FirstRecord, int LastRecord)
+{
+    //GeneStruct* CurrentGene;
+    //GeneStruct* LoadGene(FILE* File)
+    int DummyInt;
+    int LastFilePos = -1;
+    int FilePos;
+    FILE* TrieFile;
+    FILE* IndexFile;
+    char* SequenceBuffer[SS_BLOCK_SIZE];
+    char NameBuffer[PROTEIN_NAME_BLOCK * SS_BLOCK_SIZE];
+    int BytesRead;
+    int RecordLength;
+    int BlockIndex = 0;
+    FILE* OutputFile;
+    int RecordNumber;
+    int BlockFirstRecordNumber = 0;
+    //
+    TrieFile = fopen(TrieFileName, "rb");
+    if (!TrieFile)
+    {
+        REPORT_ERROR_S(8, TrieFileName);
+        return;
+    }
+    IndexFile = fopen(IndexFileName, "rb");
+    if (!IndexFile)
+    {
+        REPORT_ERROR_S(8, IndexFileName);
+        return;
+    }
+    OutputFile = fopen("SSDatabaseScan.txt", "wb");
+    if (!OutputFile)
+    {
+        printf("** Error: Failed to open SSDatabaseScan.txt\n");
+        return;
+    }
+    // Header:
+    fprintf(OutputFile, "RecordNumber\tProtein\tLength\tChromosome\tForwardFlag\tApproxPos\tCoverage\t\n");
+    // Read protein records from the trie database.  Once you accumulate a block
+    // of them in the trie, launch a scan through the exon graph with SSDatabaseScanProteins.
+    RecordNumber = 0;
+    while (1)
+    {
+        BytesRead = ReadBinary(&DummyInt, sizeof(int), 1, IndexFile);
+        if (!BytesRead)
+        {
+            // End of file.  Scan our last block, if we have anything in the block:
+            if (LastFilePos >= 0 && BlockIndex)
+            {
+                fseek(TrieFile, FilePos, SEEK_SET);
+                SequenceBuffer[BlockIndex - 1] = (char*)calloc(30000, sizeof(char));
+                ReadBinary(SequenceBuffer[BlockIndex - 1], sizeof(char), 30000, TrieFile);
+                SSDatabaseScanProteins(BlockFirstRecordNumber, SequenceBuffer, NameBuffer, SpliceDBFileName, BlockIndex, OutputFile);
+            }
+            break;
+        }
+        BytesRead = ReadBinary(&DummyInt, sizeof(int), 1, IndexFile);
+        BytesRead = ReadBinary(&FilePos, sizeof(int), 1, IndexFile);
+        // 
+        if (LastFilePos >= 0 && RecordNumber >= FirstRecord)
+        {
+            RecordLength = FilePos - LastFilePos - 1;
+            SequenceBuffer[BlockIndex - 1] = (char*)calloc(RecordLength + 1, sizeof(char));
+            fseek(TrieFile, LastFilePos, SEEK_SET);
+            ReadBinary(SequenceBuffer[BlockIndex - 1], sizeof(char), RecordLength, TrieFile);
+            if (BlockIndex == SS_BLOCK_SIZE || (LastRecord >= 0 && RecordNumber >= LastRecord))
+            {
+                SSDatabaseScanProteins(BlockFirstRecordNumber, SequenceBuffer, NameBuffer, SpliceDBFileName, BlockIndex, OutputFile);
+                for (BlockIndex = 0; BlockIndex < BlockIndex; BlockIndex++)
+                {
+                    SafeFree(SequenceBuffer[BlockIndex]);
+                }
+                BlockIndex = 0;
+                // If we hit the last record, then stop now.
+                if (LastRecord >= 0 && RecordNumber >= LastRecord)
+                {
+                    break;
+                }
+            }
+        }
+        LastFilePos = FilePos;
+        ReadBinary(NameBuffer + BlockIndex*PROTEIN_NAME_BLOCK, sizeof(char), 80, IndexFile);
+        NameBuffer[BlockIndex*PROTEIN_NAME_BLOCK + 80] = '\0';
+        if (RecordNumber >= FirstRecord)
+        {
+            if (BlockIndex == 0)
+            {
+                BlockFirstRecordNumber = RecordNumber;
+            }
+            BlockIndex++;
+        }
+        RecordNumber++;
+    }
+    fclose(IndexFile);
+    fclose(TrieFile);
+}
diff --git a/SpliceScan.h b/SpliceScan.h
new file mode 100644
index 0000000..63346ae
--- /dev/null
+++ b/SpliceScan.h
@@ -0,0 +1,39 @@
+//Title:          SpliceScan.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef SPLICE_SCAN_H
+#define SPLICE_SCAN_H
+
+void SSDatabaseScan(char* TrieFileName, char* IndexFileName, char* SpliceDBFileName,
+    int FirstRecord, int LastRecord);
+
+#endif // SPLICE_SCAN_H
diff --git a/Spliced.c b/Spliced.c
new file mode 100644
index 0000000..c061dfe
--- /dev/null
+++ b/Spliced.c
@@ -0,0 +1,2113 @@
+//Title:          Spliced.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include "Utils.h"
+#include <stdio.h>
+#include <math.h>
+#include <stdlib.h>
+#include "Trie.h"
+#include "Spliced.h"
+#include "Inspect.h"
+#include "ExonGraphAlign.h"
+#include "Errors.h"
+// The left and right extensions of a tag can have at most this many successes.
+#define MAX_SIDE_EXTENSIONS 128
+
+// For keeping a linked list of genes in memory.  In practice, we generally DON'T use this list,
+// instead we load and search one gene at a time.  (Genomes are big!)
+GeneStruct* FirstGene;
+GeneStruct* LastGene;
+
+ExonStruct** g_TagExonArray;
+int g_TagExonArrayPos;
+ExonEdge** g_TagSpliceArray;
+int g_TagSpliceArrayPos;
+// Tags often contain residues K/Q and I/L.  We wish to report the true residue found
+// in the exon, undoing Q->K and I->L substitutions as necessary.  So, we log the 
+// matched tag chars in g_TagBuffer.
+char* g_TagBuffer;
+char* g_TagBufferSpliced;
+int g_TagBufferPos;
+int g_TagBufferPosSpliced;
+
+static char* MatchedBases;
+char* ExtensionBufferLeft;
+char* ExtensionBufferRight;
+int* ExtensionLeftDecorations;
+int* ExtensionRightDecorations;
+int* ExtensionGenomicStart;
+int* ExtensionGenomicEnd;
+MSSpectrum** ExtensionSpectra;
+
+static int MH_MinMatchMass;
+static int MH_MaxMatchMass;
+static char* MH_MatchBuffer;
+static char* MH_MatchBufferSpliced;
+static int* MH_MatchDecoration;
+static int MH_MatchCount;
+static char* MH_Buffer;
+static char* MH_BufferSpliced;
+static ExonStruct** MH_MatchExons;
+static ExonEdge** MH_MatchEdges;
+static int MH_MatchExonPos;
+static int MH_MatchEdgePos;
+//static int MH_MatchSplicePos;
+
+ExonEdge* GetReciprocalExonEdge(ExonEdge* Edge, int ForwardFlag);
+
+// Free one Gene (and its exons)
+void FreeGene(GeneStruct* Gene)
+{
+    int Index;
+    //
+    if (!Gene)
+    {
+        return;
+    }
+    for (Index = 0; Index < Gene->ExonCount; Index++)
+    {
+        SafeFree(Gene->Exons[Index].ForwardEdges);
+        SafeFree(Gene->Exons[Index].BackwardEdges);
+        SafeFree(Gene->Exons[Index].Sequence);
+    }
+    SafeFree(Gene->Exons);
+    SafeFree(Gene);
+}
+
+// Free the global list of genes.  (Not used in practice, since we load one at a time)
+void FreeGenes()
+{
+    GeneStruct* Gene;
+    GeneStruct* Prev = NULL;
+    //
+    for (Gene = FirstGene; Gene; Gene = Gene->Next)
+    {
+        if (Prev)
+        {
+            FreeGene(Prev);
+        }
+        Prev = Gene;
+    }
+    if (Prev)
+    {
+        FreeGene(Prev);
+    }
+    FirstGene = NULL;
+    LastGene = NULL;
+}
+
+// For debugging purposes: Print out a list of exons (with partial sequences) and edges.
+// (Mostly for verifying database generation worked)
+void DebugPrintGene(GeneStruct* Gene)
+{
+    int ExonIndex;
+    ExonStruct* Exon;
+    int EdgeIndex;
+    //
+    printf("*Gene %s (%s) has %d exons\n", Gene->Name, Gene->SprotName, Gene->ExonCount);
+    for (ExonIndex = 0; ExonIndex < Gene->ExonCount; ExonIndex++)
+    {
+        Exon = Gene->Exons + ExonIndex;
+        printf("Exon %d from %d-%d cov %d: \n", Exon->Index, Exon->Start, Exon->End, Exon->Occurrences);
+        if (Exon->Sequence)
+        {
+            printf(Exon->Sequence);
+        }
+        else
+        {
+            printf("<none>");
+        }
+        printf("\n");
+        //printf("  Exon from %d-%d coverage %d sequence %s...\n", Exon->Start, Exon->End, Exon->Occurrences, Buffer);
+        for (EdgeIndex = 0; EdgeIndex < Exon->ForwardEdgeCount; EdgeIndex++)
+        {
+            printf("  >> (%d) '%c' to exon #%d %d-%d\n", Exon->ForwardEdges[EdgeIndex].Power, 
+                Exon->ForwardEdges[EdgeIndex].AA, Exon->ForwardEdges[EdgeIndex].Exon->Index, 
+                Exon->ForwardEdges[EdgeIndex].Exon->Start, Exon->ForwardEdges[EdgeIndex].Exon->End);
+        }
+        for (EdgeIndex = 0; EdgeIndex < Exon->BackEdgeCount; EdgeIndex++)
+        {
+            printf("  << (%d) '%c' to exon #%d %d-%d\n", Exon->BackwardEdges[EdgeIndex].Power, 
+                Exon->BackwardEdges[EdgeIndex].AA, Exon->BackwardEdges[EdgeIndex].Exon->Index, 
+                Exon->BackwardEdges[EdgeIndex].Exon->Start, Exon->BackwardEdges[EdgeIndex].Exon->End);
+        }
+    }
+}
+
+// For debugging: Print *all* our genes, and their exons and edges
+void DebugPrintGenes()
+{
+    GeneStruct* Gene;
+    //
+    printf("Genes:\n");
+    for (Gene = FirstGene; Gene; Gene = Gene->Next)
+    {
+        printf("\n");
+        DebugPrintGene(Gene);
+    }
+}
+
+// Load one gene from the (binary) gene file.  Does some basic error checking, in case of 
+// obsolete or broken file formats.
+GeneStruct* LoadGene(FILE* File)
+{
+    char Buffer[1024];
+    int Bytes;
+    GeneStruct* Gene;
+    int ExonIndex;
+    int OtherExonIndex;
+    ExonStruct* Exon;
+    ExonStruct* OtherExon;
+    int Length;
+    int EdgeIndex;
+    char AA;
+    int LinkPower;
+    //
+    Bytes = ReadBinary(Buffer, sizeof(char), GENE_NAME_LENGTH, File);
+    if (!Bytes)
+    {
+        return NULL; // eof
+    }   
+    Gene = (GeneStruct*)calloc(1, sizeof(GeneStruct));
+    strncpy(Gene->Name, Buffer, GENE_NAME_LENGTH);
+    ReadBinary(Gene->SprotName, sizeof(char), GENE_NAME_LENGTH, File);
+    ReadBinary(&Gene->ChromosomeNumber, sizeof(int), 1, File);
+    if (!Gene->ChromosomeNumber)
+    {
+        printf("** Warning: No chromosome number for gene '%s'\n", Gene->Name);
+    }
+    ReadBinary(&Gene->ForwardFlag, sizeof(char), 1, File);
+    ReadBinary(&Gene->ExonCount, sizeof(int), 1, File);
+    if (Gene->ExonCount < 1 || Gene->ExonCount > MAX_GENE_EXONS)
+    {
+        printf("** Warning: suspicious exon-count %d encountered in LoadGene().  File position is %ld.\n", Gene->ExonCount, ftell(File));
+        return NULL;
+    }
+    //fread(&GIIDBlock, sizeof(int), 10, File);
+    Gene->Exons = (ExonStruct*)calloc(Gene->ExonCount, sizeof(ExonStruct));
+
+    // Read the gene's exons:
+    for (ExonIndex = 0; ExonIndex < Gene->ExonCount; ExonIndex++)
+    {
+        //printf("Filepos %d, now read exon %d of %d\n", ftell(File), ExonIndex, Gene->ExonCount);
+        Exon = Gene->Exons + ExonIndex;
+        Exon->Gene = Gene;
+        Bytes = ReadBinary(&Exon->Start, sizeof(int), 1, File);
+        if (!Bytes)
+        {
+            printf("** Error: EOF encountered while reading exon %d of gene '%s'\n", ExonIndex, Gene->Name);
+            break;
+        }
+        Exon->Index = ExonIndex;
+        ReadBinary(&Exon->End, sizeof(int), 1, File);
+        ReadBinary(&Length, sizeof(int), 1, File);
+        if (Length < 0 || Length > 10000)
+        {
+            printf("** Error: Bogus sequence length %d encountered while reading exon %d of gene '%s'\n", Length, ExonIndex, Gene->Name);
+            break;
+        }
+        ReadBinary(&Exon->Occurrences, sizeof(int), 1, File);
+        Exon->Length = Length;
+        if (Length)
+        {
+            Exon->Sequence = (char*)calloc(Length + 1, sizeof(char));
+            ReadBinary(Exon->Sequence, sizeof(char), Length, File);
+        }
+        else
+        {
+            Exon->Sequence = NULL;
+        }
+        //printf("%d '%s'\n", ExonIndex, Exon->Sequence); // 
+        ReadBinary(&Exon->Prefix, sizeof(char), 2, File);
+        ReadBinary(&Exon->Suffix, sizeof(char), 2, File);
+        ReadBinary(&Exon->BackEdgeCount, sizeof(int), 1, File);
+        if (Exon->BackEdgeCount < 0 || Exon->BackEdgeCount > 500)
+        {
+            printf("** zomg broken back edge count in LoadGene() exon %d gene '%s'\n", ExonIndex, Gene->Name);
+        }
+        ReadBinary(&Exon->ForwardEdgeCount, sizeof(int), 1, File);
+        if (Exon->ForwardEdgeCount < 0 || Exon->ForwardEdgeCount > 500)
+        {
+            printf("** zomg broken forward edge count in LoadGene() exon %d gene '%s'\n", ExonIndex, Gene->Name);
+        }
+        
+        if (Exon->ForwardEdgeCount)
+        {
+            Exon->ForwardEdges = (ExonEdge*)calloc(Exon->ForwardEdgeCount, sizeof(ExonEdge));
+        }
+        if (Exon->BackEdgeCount)
+        {
+            Exon->BackwardEdges = (ExonEdge*)calloc(Exon->BackEdgeCount, sizeof(ExonEdge));
+        }
+        // Read all the edges for this exon.  (Read all the back-edges, and THEN take care of old forward-edges)
+        for (EdgeIndex = 0; EdgeIndex < Exon->BackEdgeCount; EdgeIndex++)
+        {
+            Bytes = ReadBinary(&OtherExonIndex, sizeof(int), 1, File);
+            if (!Bytes)
+            {
+                printf("** Error: EOF encountered while reading exon %d edge %d of gene '%s'\n", ExonIndex, EdgeIndex, Gene->Name);
+                break;
+            }
+            ReadBinary(&LinkPower, sizeof(int), 1, File);
+            ReadBinary(&AA, sizeof(char), 1, File);
+            if (OtherExonIndex < 0 || OtherExonIndex >= Gene->ExonCount)
+            {
+                printf("** Error: Illegal exon back-link %d encountered for exon %d edge %d gene '%s'\n", OtherExonIndex, ExonIndex, EdgeIndex, Gene->Name);
+            }
+            else
+            {
+                OtherExon = Gene->Exons + OtherExonIndex;
+                Exon->BackwardEdges[EdgeIndex].Exon = OtherExon;
+                Exon->BackwardEdges[EdgeIndex].AA = AA;
+                Exon->BackwardEdges[EdgeIndex].Power = LinkPower;
+                Exon->BackwardEdges[EdgeIndex].Source = Exon;
+            }
+        }
+    } // exon loop
+    // We set all the back-links while we're reading the exons in.  Now, let's go through
+    // and fix all the forward-links.  
+    SetExonForwardEdges(Gene);
+    return Gene;
+}
+
+// INPUT: A gene where the backward edges are populated, and the exon forward edges are allocated but *not* populated.
+// Result: Forward edges are populated.
+void SetExonForwardEdges(GeneStruct* Gene)
+{
+    int ExonIndex;
+    ExonStruct* Exon;
+    ExonStruct* OtherExon;
+    int EdgeIndex;
+    int OtherEdgeIndex;
+    int ForwardEdgeSet;
+    char AA;
+    int LinkPower;
+    //
+    for (ExonIndex = 0; ExonIndex < Gene->ExonCount; ExonIndex++)
+    {
+        Exon = Gene->Exons + ExonIndex;
+        for (EdgeIndex = 0; EdgeIndex < Exon->BackEdgeCount; EdgeIndex++)
+        {
+            // The first empty slot in the OtherExon forward arrays will now be set:
+            OtherExon = Exon->BackwardEdges[EdgeIndex].Exon;
+            AA = Exon->BackwardEdges[EdgeIndex].AA;
+            LinkPower = Exon->BackwardEdges[EdgeIndex].Power;
+            ForwardEdgeSet = 0;
+            for (OtherEdgeIndex = 0; OtherEdgeIndex < OtherExon->ForwardEdgeCount; OtherEdgeIndex++)
+            {
+                if (!OtherExon->ForwardEdges[OtherEdgeIndex].Exon)
+                {
+                    OtherExon->ForwardEdges[OtherEdgeIndex].Exon = Exon;
+                    OtherExon->ForwardEdges[OtherEdgeIndex].AA = AA;
+                    OtherExon->ForwardEdges[OtherEdgeIndex].Power = LinkPower;
+                    OtherExon->ForwardEdges[OtherEdgeIndex].Source = OtherExon;
+                    ForwardEdgeSet = 1;
+                    break;
+                }
+            }
+            if (!ForwardEdgeSet)
+            {
+                REPORT_ERROR_IIS(26, OtherExon->Index, Exon->Index, Gene->Name);
+            }
+        }
+    }
+}
+
+// Load genes from a binary file, built by running inspect with splicedb arguments.
+// (In practice, we don't call this - we just load ONE gene at a time!)
+void LoadGenes(char* FileName)
+{
+    FILE* File;
+    GeneStruct* Gene;
+    //
+    File = fopen(FileName, "rb");
+    if (!File)
+    {
+        printf("** Error: Unable to open gene file '%s'\n", FileName);
+        return;
+    }
+    while (1)
+    {
+        Gene = LoadGene(File);
+        if (!Gene)
+        {
+            break;
+        }
+
+        // Insert new gene into list:
+        if (LastGene)
+        {
+            LastGene->Next = Gene;
+            Gene->Prev = LastGene;
+        }
+        else
+        {
+            FirstGene = Gene;
+        }
+        LastGene = Gene;
+    }
+    fclose(File);
+}
+
+// Static structures used in splice-tolerant search.  Tag extension builds up an array
+// of extension-matches for the left and for the right, then tries each combination of
+// a legal right-extension and a legal left-extension.
+char* SLeftMatchBuffer = NULL; // The AAs of the extension
+char* SLeftMatchBufferSpliced = NULL; // The AAs of the extension, with splice boundaries
+int* SLeftMatchDecoration = NULL; // The decoration to be attached over the extension
+int* SLeftGenomicPosition = NULL;
+int* SRightGenomicPosition = NULL;
+char* SLeftPrefixes = NULL; // The AA just *beyond* the extension
+char* SRightMatchBuffer = NULL;
+char* SRightMatchBufferSpliced = NULL;
+int* SRightMatchDecoration = NULL;
+char* SRightSuffixes = NULL;
+ExonStruct** SLeftExon = NULL; // The exons reached by prefix extension.  SLeftExon[MatchNumber*16 + ExonIndex]
+ExonEdge** SLeftEdge = NULL; // The splice boundaries crossed by prefix extension.
+//int* SLeftSpliceScore = NULL; // The scores of splice boundaries used in prefix extension.
+int* SLeftExonCount = NULL;
+int* SLeftSpliceCount = NULL;
+ExonStruct** SRightExon = NULL; // The exons reached by suffix extension.  SLeftExon[MatchNumber*16 + ExonIndex]
+ExonEdge** SRightEdge = NULL; // The splice boundaries crossed by suffix extension.
+//int* SRightSpliceScore = NULL;
+int* SRightExonCount = NULL;
+int* SRightSpliceCount = NULL;
+
+void AllocSpliceStructures()
+{
+    if (SLeftMatchBuffer)
+    {
+        return; // It seems we've already allocated them.
+    }
+    
+    // The Spliced buffer is made extra-long so that we can afford to add two symbol chars
+    // per amino acid.
+    SLeftMatchBuffer = (char*)calloc(MAX_EXTENSION_LENGTH * MAX_SIDE_EXTENSIONS, sizeof(char));
+    SLeftMatchBufferSpliced = (char*)calloc(MAX_SEXTENSION_LENGTH * MAX_SIDE_EXTENSIONS, sizeof(char));
+    SLeftMatchDecoration = (int*)calloc(MAX_SIDE_EXTENSIONS + 1, sizeof(int));
+    SLeftGenomicPosition = (int*)calloc(MAX_SIDE_EXTENSIONS + 1, sizeof(int));
+    SLeftPrefixes = (char*)calloc(MAX_SIDE_EXTENSIONS, sizeof(char));
+    SRightMatchBuffer = (char*)calloc(MAX_EXTENSION_LENGTH * MAX_SIDE_EXTENSIONS, sizeof(char));
+    SRightMatchBufferSpliced = (char*)calloc(MAX_SEXTENSION_LENGTH * MAX_SIDE_EXTENSIONS, sizeof(char));
+    SRightMatchDecoration = (int*)calloc(MAX_SIDE_EXTENSIONS + 1, sizeof(int));
+    SRightGenomicPosition = (int*)calloc(MAX_SIDE_EXTENSIONS + 1, sizeof(int));
+    SRightSuffixes = (char*)calloc(MAX_SIDE_EXTENSIONS, sizeof(char));
+
+    SLeftExon = (ExonStruct**)calloc(MAX_SIDE_EXTENSIONS * MAX_EXTENSION_EXONS, sizeof(ExonStruct*));
+    SLeftEdge = (ExonEdge**)calloc(MAX_SIDE_EXTENSIONS * MAX_EXTENSION_EXONS, sizeof(ExonEdge*));
+    //SLeftSpliceScore = (int*)calloc(MAX_SIDE_EXTENSIONS * MAX_EXTENSION_EXONS, sizeof(int));
+    SLeftExonCount = (int*)calloc(MAX_SIDE_EXTENSIONS, sizeof(int));
+    SLeftSpliceCount = (int*)calloc(MAX_SIDE_EXTENSIONS, sizeof(int));
+
+    SRightExon = (ExonStruct**)calloc(sizeof(ExonStruct*), MAX_SIDE_EXTENSIONS * MAX_EXTENSION_EXONS);
+    SRightEdge = (ExonEdge**)calloc(MAX_SIDE_EXTENSIONS * MAX_EXTENSION_EXONS, sizeof(ExonEdge*));
+    //SRightSpliceScore = (int*)calloc(sizeof(int), MAX_SIDE_EXTENSIONS * MAX_EXTENSION_EXONS);
+    SRightExonCount = (int*)calloc(sizeof(int), MAX_SIDE_EXTENSIONS);
+    SRightSpliceCount = (int*)calloc(sizeof(int), MAX_SIDE_EXTENSIONS);
+
+    g_TagExonArray = (ExonStruct**)calloc(sizeof(ExonStruct*), 16);
+    g_TagSpliceArray = (ExonEdge**)calloc(sizeof(ExonEdge*), 16);
+    g_TagBuffer = (char*)calloc(sizeof(int), 10);
+    g_TagBufferSpliced = (char*)calloc(sizeof(int), MAX_EXTENSION_EXONS);
+
+    MH_Buffer = (char*)calloc(sizeof(char), 128);
+    MH_BufferSpliced = (char*)calloc(sizeof(char), 128);
+    MH_MatchExons = (ExonStruct**)calloc(sizeof(ExonStruct*), MAX_EXTENSION_EXONS);
+    MH_MatchEdges = (ExonEdge**)calloc(sizeof(ExonEdge*), MAX_EXTENSION_EXONS);
+    //MH_MatchSplices = (int*)calloc(sizeof(int), MAX_EXTENSION_EXONS);
+
+    MatchedBases = (char*)calloc(sizeof(char), 512);
+    ExtensionBufferLeft = (char*)calloc(sizeof(char), MAX_EXTENSION_LENGTH*512);
+    ExtensionBufferRight = (char*)calloc(sizeof(char), MAX_EXTENSION_LENGTH*512);
+    ExtensionLeftDecorations = (int*)calloc(sizeof(int), 512);
+    ExtensionRightDecorations = (int*)calloc(sizeof(int), 512);
+    ExtensionGenomicStart = (int*)calloc(sizeof(int), 512);
+    ExtensionGenomicEnd = (int*)calloc(sizeof(int), 512);
+
+    ExtensionSpectra = (MSSpectrum**)calloc(sizeof(MSSpectrum*), 512);
+}
+
+
+// Helper function: We've successfully extended a tag either forward (Direction=1) or backward (Direction=-1)
+// along the peptide.  Set the genomic endpoint, and the flanking (prefix or suffix) amino acid character.
+// What makes the job tricky is that we may have finished at the edge of an exon, either by using up an
+// incoming edge (if Pos==-1) or by using up the entire exon (if Pos+Direction falls off the edge).  
+// If we used up the full exon and there's an edge, report the AA for the first edge.  (TODO: Sort edges, maybe).  
+// If we used up a full exon and there's nothing to link to, report char '-'
+void SetMatchPrefixSuffix(ExonStruct* Exon, int Pos, int Direction)
+{
+    int Length = 0;
+    char AA;
+    ExonEdge* Edge;
+    if (Direction > 0)
+    {
+        // Direction is 1, so set RightGenomicPosition:
+        if (Exon->Start < 0)
+        {
+            // This exon has no knwon genomic position:
+            SRightGenomicPosition[MH_MatchCount] = -1;
+        }
+        else if (Exon->Sequence)
+        {
+            if (Exon->Gene->ForwardFlag)
+            {
+                if (Pos > -1)
+                {
+                    SRightGenomicPosition[MH_MatchCount] = Exon->Start + (Pos+1)*3 + strlen(Exon->Prefix);
+                }
+                else
+                {
+                    SRightGenomicPosition[MH_MatchCount] = Exon->Start + strlen(Exon->Prefix);
+                }
+            }
+            else
+            {
+                if (Pos > -1)
+                {
+                    // Yes, still add prefix length here:
+                    SRightGenomicPosition[MH_MatchCount] = Exon->End - (Pos+1)*3 - strlen(Exon->Prefix);
+                }
+                else
+                {
+                    SRightGenomicPosition[MH_MatchCount] = Exon->End - strlen(Exon->Prefix);
+                }
+            }
+        }
+        else
+        {
+            if (Exon->Gene->ForwardFlag)
+            {
+                SRightGenomicPosition[MH_MatchCount] = Exon->Start + strlen(Exon->Prefix); 
+            }
+            else
+            {
+                SRightGenomicPosition[MH_MatchCount] = Exon->End - strlen(Exon->Prefix);
+            }
+        }
+        Length = Exon->Length;
+        if (Pos + Direction < Length)
+        {
+            SRightSuffixes[MH_MatchCount] = Exon->Sequence[Pos + Direction];
+            return;
+        }
+        // If we have a forward-edge, use the (most common) forward edge aa
+        if (Exon->ForwardEdgeCount)
+        {
+            Edge = Exon->ForwardEdges;
+            AA = Edge->AA;
+            if (!AA && Edge->Exon->Sequence)
+            {
+                AA = Edge->Exon->Sequence[0];
+            }
+            SRightSuffixes[MH_MatchCount] = AA;
+            return;
+        }
+        SRightSuffixes[MH_MatchCount] = '-';
+        return;
+    }
+    else
+    {
+        // Direction is -1.  Set LeftGenomicPosition:
+        if (Exon->Start < 0)
+        {
+            SLeftGenomicPosition[MH_MatchCount] = -1;
+        }
+        else if (Exon->Sequence)
+        {
+            if (Exon->Gene->ForwardFlag)
+            {
+                if (Pos >= 0)
+                {
+                    SLeftGenomicPosition[MH_MatchCount] = Exon->Start + Pos*3 + strlen(Exon->Prefix);
+                }
+                else
+                {
+                    // We never used any sequence from the exon proper, we used
+                    // an incoming aa-edge:
+                    SLeftGenomicPosition[MH_MatchCount] = Exon->End - strlen(Exon->Suffix); 
+                }
+            }
+            else
+            {
+                if (Pos >= 0)
+                {
+                    // Yes, still add prefix length here:
+                    SLeftGenomicPosition[MH_MatchCount] = Exon->End - Pos*3 - strlen(Exon->Prefix);
+                }
+                else
+                {
+                    SLeftGenomicPosition[MH_MatchCount] = Exon->Start + strlen(Exon->Suffix);
+                }
+            }
+        }
+        else
+        {
+            if (Exon->Gene->ForwardFlag)
+            {
+                SLeftGenomicPosition[MH_MatchCount] = Exon->End - strlen(Exon->Suffix); 
+            }
+            else
+            {
+                SLeftGenomicPosition[MH_MatchCount] = Exon->Start + strlen(Exon->Suffix); 
+            }
+        }
+
+        if (Pos + Direction >= 0 && Exon->Sequence)
+        {
+            SLeftPrefixes[MH_MatchCount] = Exon->Sequence[Pos + Direction];
+            return;
+        }
+        else if (Exon->BackEdgeCount)
+        {
+            Edge = Exon->BackwardEdges;
+            AA = Edge->AA;
+            if (!AA && Edge->Exon->Sequence)
+            {
+                Length = strlen(Edge->Exon->Sequence);
+                AA = Edge->Exon->Sequence[Length-1];
+            }
+            SLeftPrefixes[MH_MatchCount] = AA;
+            return;
+        }
+        SLeftPrefixes[MH_MatchCount] = '-';
+        return;
+    }
+ }
+
+// Copy the exon list from MH_MatchExons into either SLeftExon or SRightExon,
+// initializing the left-over entries to NULL
+void MatchHelperSetExons(int Direction)
+{
+    ExonStruct** MatchExons;
+    ExonEdge** MatchEdges;
+    int Index;
+    if (Direction < 0)
+    {
+        MatchExons = SLeftExon;
+        MatchEdges = SLeftEdge;
+        //MatchSplices = SLeftSpliceScore;
+    }
+    else
+    {
+        MatchExons = SRightExon;
+        MatchEdges = SRightEdge;
+        //MatchSplices = SRightSpliceScore;
+    }
+    for (Index = 0; Index < MAX_EXTENSION_EXONS; Index++)
+    {
+        if (Index >= MH_MatchExonPos)
+        {
+            MatchExons[MH_MatchCount * MAX_EXTENSION_EXONS + Index] = NULL;    
+            //MatchSplices[MH_MatchCount*MAX_EXTENSION_EXONS + Index] = -1;    
+        }
+        else
+        {
+            MatchExons[MH_MatchCount * MAX_EXTENSION_EXONS + Index] = MH_MatchExons[Index];
+            //MatchSplices[MH_MatchCount*MAX_EXTENSION_EXONS + Index] = MH_MatchSplices[Index];
+        }
+        if (Index >= MH_MatchEdgePos)
+        {
+            MatchEdges[MH_MatchCount * MAX_EXTENSION_EXONS + Index] = NULL; 
+        }
+        else
+        {
+            MatchEdges[MH_MatchCount * MAX_EXTENSION_EXONS + Index] = MH_MatchEdges[Index]; 
+        }
+    }
+}
+
+// Recursion counter - tracks calls to MatchFlankingMassSpliceHelper, so that
+// we can bail out if we take absurdly long.  (If we have a 3000Da+ flanking mass
+// and a lot of SNPs, then the search time becomes unacceptable)
+int g_SpliceHelperRecursionCount = 0;
+// The largest count we ever saw before the limit was added: 1862528167
+// Second-largest 816910, 99.99% were <30000
+#define MAX_HELPER_RECURSION_COUNT 30000
+// Recursable function for matching MatchMass.  We start out with decoration DecorationMatchIndex, and we try
+// smaller decorations (with smaller index number) as we go.  We start out with FlankingMass = 0 on the first
+// call; it's nonzero if we hit a splice junction and recurse.  
+int MatchFlankingMassSpliceHelper(MSSpectrum* Spectrum, TrieTag* Tag, ExonStruct* Exon, 
+    int StartPos, int Direction, int MatchMass, int ModsRemaining, 
+    int DecorationMassIndex, int FlankingMass, int BufferPos, int BufferPosSpliced)
+{
+    int Pos;
+    int AAMass;
+    int Diff;
+    int AbsDiff;
+    int MandatoryDecorationChange = 0;
+    int BridgeBufferPos;
+    int BridgeDMI;
+    ExonStruct* BridgeExon;
+    int BridgeMass;
+    //char* EdgeAA;
+    int EdgeCount;
+    //ExonStruct** EdgeExon;
+    //int* EdgePower;
+    ExonEdge* Edges;
+    int EdgeIndex;
+    int Length;
+    int VerboseFlag = 0;
+    int OldMatchExonPos;
+    //int OldMatchSplicePos;
+    int OldMatchEdgePos;
+    int BridgeBufferPosSpliced;
+
+    g_SpliceHelperRecursionCount++;
+
+    //////////////////////////
+    // StartPos < 0 if we're starting at the edge of the exon and working inward.
+    if (StartPos < 0) 
+    {
+        if (Direction>0)
+        {
+            Pos = 0;
+        }
+        else
+        {
+            if (Exon->Sequence)
+            {
+                Pos = Exon->Length - 1;
+            }
+            else
+            {
+                Pos = -1;
+            }
+        }
+        MH_MatchExons[MH_MatchExonPos] = Exon;
+        MH_MatchExonPos++;
+        if (MH_MatchExonPos >= MAX_EXTENSION_EXONS)
+        {
+            // Bail out!  We extended across too many exons!
+            return 0;
+        }
+    }
+    else
+    {
+        // The tag includes the character at StartPos, so move to the next character:
+        Pos = StartPos + Direction;
+    }
+    Length = Exon->Length;
+    
+    // First, we'll extend out as far as possible WITHOUT bridging:
+    while (1)
+    {
+        if (Pos < 0 || Pos >= Length)
+        {
+            break;
+        }
+        if (DecorationMassIndex < 0)
+        {
+            break;
+        }
+        AAMass = PeptideMass[Exon->Sequence[Pos]];
+        if (!AAMass)
+        {
+            // We've reached a stop codon.
+            DecorationMassIndex = -1;
+            break;
+        }
+        FlankingMass += AAMass;
+        MH_Buffer[BufferPos++] = Exon->Sequence[Pos];
+        MH_BufferSpliced[BufferPosSpliced++] = Exon->Sequence[Pos];
+        Diff = MatchMass  - (FlankingMass + AllDecorations[DecorationMassIndex].Mass);
+        AbsDiff = abs(Diff);
+        if (AbsDiff < GlobalOptions->FlankingMassEpsilon)
+        {
+            // Aha!  This is *probably* a match.  Check to be sure we have the bases we need:
+            if (CheckForPTAttachmentPoints(DecorationMassIndex, MH_Buffer, 0, BufferPos - 1, 1))
+            {
+                if (VerboseFlag)
+                {
+                    printf("Side is match!  Dec-index %d, flank %.2f.\n", DecorationMassIndex, FlankingMass / (float)MASS_SCALE);
+                    printf("Copy to buffer.  Match count is %d, bufferpos is %d\n", MH_MatchCount, BufferPos);
+                }
+                strncpy(MH_MatchBuffer + MAX_EXTENSION_LENGTH * MH_MatchCount, MH_Buffer, BufferPos);
+                MH_MatchBuffer[MAX_EXTENSION_LENGTH * MH_MatchCount + BufferPos] = '\0';
+                strncpy(MH_MatchBufferSpliced + MAX_SEXTENSION_LENGTH * MH_MatchCount, MH_BufferSpliced, BufferPosSpliced);
+                MH_MatchBufferSpliced[MAX_SEXTENSION_LENGTH * MH_MatchCount + BufferPosSpliced] = '\0';
+                // Set prefix or suffix for this extension:
+                SetMatchPrefixSuffix(Exon, Pos, Direction);
+                MH_MatchDecoration[MH_MatchCount] = DecorationMassIndex;
+                MatchHelperSetExons(Direction);
+                MH_MatchCount++;
+                if (MH_MatchCount >= MAX_SIDE_EXTENSIONS)
+                {
+                    return MH_MatchCount;
+                }
+            }
+        }
+        // Move the DecorationMassIndex, if needed.
+        while (MandatoryDecorationChange || FlankingMass + AllDecorations[DecorationMassIndex].Mass > MH_MinMatchMass)
+        {
+            // The flanking sequence's mass is significantly bigger than our (decorated) target mass.
+            // Move to a smaller decoration:
+            MandatoryDecorationChange = 0;
+            DecorationMassIndex--;
+            if (DecorationMassIndex<0)
+            {
+                break;
+            }
+            // Skip any decorations that include phosphorylation, if we're not on phospho mode.
+            if (!GlobalOptions->PhosphorylationFlag && g_PhosphorylationMod > -1 && AllDecorations[DecorationMassIndex].Mods[g_PhosphorylationMod])
+            {
+                MandatoryDecorationChange = 1;
+                continue;
+            }
+            if (AllDecorations[DecorationMassIndex].TotalMods > ModsRemaining)
+            {
+                continue;
+            }
+            // This decoration is acceptable.  Check for a match:
+            Diff = MatchMass  - (FlankingMass + AllDecorations[DecorationMassIndex].Mass);
+            AbsDiff = abs(Diff);
+            if (AbsDiff < GlobalOptions->FlankingMassEpsilon) 
+            {
+                // Aha!  This is *probably* a match.  Check to be sure we have the bases we need:
+                if (CheckForPTAttachmentPoints(DecorationMassIndex, MH_Buffer, 0, BufferPos-1, 1))
+                {
+                    if (VerboseFlag)
+                    {
+                        printf("Left is match!  Dec-index %d, flank %.2f.\n", DecorationMassIndex, FlankingMass / (float)MASS_SCALE);
+                    }
+                    strncpy(MH_MatchBuffer + MAX_EXTENSION_LENGTH * MH_MatchCount, MH_Buffer, BufferPos);
+                    MH_MatchBuffer[MAX_EXTENSION_LENGTH * MH_MatchCount + BufferPos] = '\0';
+                    strncpy(MH_MatchBufferSpliced + MAX_SEXTENSION_LENGTH * MH_MatchCount, MH_BufferSpliced, BufferPosSpliced);
+                    MH_MatchBufferSpliced[MAX_SEXTENSION_LENGTH * MH_MatchCount + BufferPosSpliced] = '\0';
+
+                    // Set prefix or suffix for this extension:
+                    SetMatchPrefixSuffix(Exon, Pos, Direction);
+                    MH_MatchDecoration[MH_MatchCount] = DecorationMassIndex;
+                    MatchHelperSetExons(Direction);
+                    MH_MatchCount++;
+                    if (MH_MatchCount >= MAX_SIDE_EXTENSIONS)
+                    {
+                        return MH_MatchCount;
+                    }
+                    MandatoryDecorationChange = 1;
+                }
+            }
+        }
+        Pos += Direction;
+    }
+
+    // If DMI < 0, then our flanking mass became too large or we hit a stop codon:
+    if (DecorationMassIndex<0)
+    {
+        return MH_MatchCount;
+    }
+
+    // Now: We reached the end of the exon, so next we'll try each edge:
+    if (Direction > 0)
+    {
+        Edges = Exon->ForwardEdges;
+        EdgeCount = Exon->ForwardEdgeCount;
+    }
+    else
+    {
+        Edges = Exon->BackwardEdges;
+        EdgeCount = Exon->BackEdgeCount;
+    }
+    // Save our current state (FlankingMass, BufferPos, and DecorationMassIndex).  After trying each edge,
+    // we return to this state.
+    BridgeMass = FlankingMass;
+    BridgeBufferPos = BufferPos;
+    BridgeBufferPosSpliced = BufferPosSpliced;
+    BridgeDMI = DecorationMassIndex;
+    OldMatchExonPos = MH_MatchExonPos;
+    OldMatchEdgePos = MH_MatchEdgePos;
+
+    for (EdgeIndex = 0; EdgeIndex < EdgeCount; EdgeIndex++)
+    {
+        FlankingMass = BridgeMass;
+        BufferPos = BridgeBufferPos;
+        BufferPosSpliced = BridgeBufferPosSpliced;
+        DecorationMassIndex = BridgeDMI;
+        MH_MatchExonPos = OldMatchExonPos;
+        MH_MatchEdgePos = OldMatchEdgePos;
+        
+        BridgeExon = Edges[EdgeIndex].Exon;  
+        MH_MatchEdges[MH_MatchEdgePos] = Edges + EdgeIndex;
+        MH_MatchEdgePos++;
+        // Extend with the edge amino acid:
+        if (Edges[EdgeIndex].AA)
+        {
+            AAMass = PeptideMass[Edges[EdgeIndex].AA];
+            if (!AAMass)
+            {
+                continue; // terminator
+            }
+            FlankingMass += AAMass;
+            // If this is a "true edge" (not an adjacent-edge), then note the splicing:
+            if (Edges[EdgeIndex].Power)
+            {
+                MH_BufferSpliced[BufferPosSpliced++] = ';';
+                MH_BufferSpliced[BufferPosSpliced++] = Edges[EdgeIndex].AA;
+                MH_BufferSpliced[BufferPosSpliced++] = ';';
+            }
+            else
+            {
+                MH_BufferSpliced[BufferPosSpliced++] = Edges[EdgeIndex].AA;
+            }
+            MH_MatchExons[MH_MatchExonPos] = BridgeExon;
+            MH_MatchExonPos++;
+            if (MH_MatchExonPos >= MAX_EXTENSION_EXONS)
+            {
+                // Bail out!  We extended across too many exons!
+                MH_MatchExonPos--;
+                continue;
+            }
+
+            MH_Buffer[BufferPos++] = Edges[EdgeIndex].AA; //EdgeAA[EdgeIndex];
+            Diff = MatchMass  - (FlankingMass + AllDecorations[DecorationMassIndex].Mass);
+            AbsDiff = abs(Diff);
+            if (AbsDiff < GlobalOptions->FlankingMassEpsilon)
+            {
+                // Aha!  This is *probably* a match.  Check to be sure we have the bases we need:
+                if (CheckForPTAttachmentPoints(DecorationMassIndex, MH_Buffer, 0, BufferPos-1, 1))
+                {
+                    if (VerboseFlag)
+                    {
+                        printf("Side is match!  Dec-index %d, flank %.2f.\n", DecorationMassIndex, FlankingMass / (float)MASS_SCALE);
+                    }
+                    strncpy(MH_MatchBuffer + MAX_EXTENSION_LENGTH * MH_MatchCount, MH_Buffer, BufferPos);
+                    MH_MatchBuffer[MAX_EXTENSION_LENGTH * MH_MatchCount + BufferPos] = '\0';
+                    strncpy(MH_MatchBufferSpliced + MAX_SEXTENSION_LENGTH * MH_MatchCount, MH_BufferSpliced, BufferPosSpliced);
+                    MH_MatchBufferSpliced[MAX_SEXTENSION_LENGTH * MH_MatchCount + BufferPosSpliced] = '\0';
+                    // Set prefix or suffix for this extension:
+                    if (Direction > 0)
+                    {
+                        // Direction > 0: set suffix!
+                        if (BridgeExon->Sequence)
+                        {
+                            SRightSuffixes[MH_MatchCount] = BridgeExon->Sequence[0];
+                        }
+                        else
+                        {
+                            SRightSuffixes[MH_MatchCount] = '-';
+                        }
+                        if (Exon->Start < 0)
+                        {
+                            SRightGenomicPosition[MH_MatchCount] = -1;
+                        }
+                        else if (Exon->Gene->ForwardFlag)
+                        {
+                            SRightGenomicPosition[MH_MatchCount] = BridgeExon->Start + strlen(BridgeExon->Prefix);
+                        }
+                        else
+                        {
+                            SRightGenomicPosition[MH_MatchCount] = BridgeExon->End - strlen(BridgeExon->Prefix);
+                        }
+
+                    }
+                    else
+                    {
+                        // Direction < 0: set prefix!
+                        if (BridgeExon->Sequence)
+                        {
+                            if (strlen(BridgeExon->Sequence) < 1)
+                            {
+                                MH_MatchCount = MH_MatchCount;
+                            }
+                            SLeftPrefixes[MH_MatchCount] = BridgeExon->Sequence[strlen(BridgeExon->Sequence)-1];
+                        }
+                        else
+                        {
+                            SLeftPrefixes[MH_MatchCount] = '-';
+                        }
+                        if (Exon->Start < 0)
+                        {
+                            SLeftGenomicPosition[MH_MatchCount] = -1;
+                        }
+                        else if (Exon->Gene->ForwardFlag)
+                        {
+                            SLeftGenomicPosition[MH_MatchCount] = BridgeExon->End - strlen(BridgeExon->Suffix);
+                        }
+                        else
+                        {
+                            SLeftGenomicPosition[MH_MatchCount] = BridgeExon->Start + strlen(BridgeExon->Suffix);
+                        }
+                    }
+                    MH_MatchDecoration[MH_MatchCount] = DecorationMassIndex;
+                    MatchHelperSetExons(Direction);
+                    MH_MatchCount++;
+                    if (MH_MatchCount >= MAX_SIDE_EXTENSIONS)
+                    {
+                        return MH_MatchCount;
+                    }
+                }
+            }
+            // Move the DecorationMassIndex, if needed.
+            while (MandatoryDecorationChange || FlankingMass + AllDecorations[DecorationMassIndex].Mass > MH_MinMatchMass)
+            {
+                // The flanking sequence's mass is significantly bigger than our (decorated) target mass.
+                // Move to a smaller decoration:
+                MandatoryDecorationChange = 0;
+                DecorationMassIndex--;
+                if (DecorationMassIndex<0)
+                {
+                    break;
+                }
+                // Skip any decorations that include phosphorylation, if we're not on phospho mode:
+                if (!GlobalOptions->PhosphorylationFlag && g_PhosphorylationMod>-1 && AllDecorations[DecorationMassIndex].Mods[g_PhosphorylationMod])
+                {
+                    MandatoryDecorationChange = 1;
+                    continue;
+                }
+                if (AllDecorations[DecorationMassIndex].TotalMods > ModsRemaining)
+                {
+                    continue;
+                }
+                // And, check for a match:
+                Diff = MatchMass  - (FlankingMass + AllDecorations[DecorationMassIndex].Mass);
+                AbsDiff = abs(Diff);
+                if (AbsDiff < GlobalOptions->FlankingMassEpsilon) 
+                {
+                    // Aha!  This is *probably* a match.  Check to be sure we have the bases we need:
+                    if (CheckForPTAttachmentPoints(DecorationMassIndex, MH_Buffer, 0, BufferPos-1, 1))
+                    {
+                        if (VerboseFlag)
+                        {
+                            printf("Left is match!  Dec-index %d, flank %.2f.\n", DecorationMassIndex, FlankingMass / (float)MASS_SCALE);
+                        }
+                        strncpy(MH_MatchBuffer + MAX_EXTENSION_LENGTH*MH_MatchCount, MH_Buffer, BufferPos);
+                        MH_MatchBuffer[MAX_EXTENSION_LENGTH * MH_MatchCount + BufferPos] = '\0';
+                        strncpy(MH_MatchBufferSpliced + MAX_SEXTENSION_LENGTH * MH_MatchCount, MH_BufferSpliced, BufferPosSpliced);
+                        MH_MatchBufferSpliced[MAX_SEXTENSION_LENGTH * MH_MatchCount + BufferPosSpliced] = '\0';
+                        // Set prefix or suffix for this extension:
+                        if (Direction > 0)
+                        {
+                            if (BridgeExon->Sequence)
+                            {
+                                SRightSuffixes[MH_MatchCount] = BridgeExon->Sequence[0];
+                            }
+                            else
+                            {
+                                SRightSuffixes[MH_MatchCount] = '-';
+                            }
+                            if (BridgeExon->Start < 0)
+                            {
+                                SRightGenomicPosition[MH_MatchCount] = -1;
+                            }
+                            else if (Exon->Gene->ForwardFlag)
+                            {
+                                SRightGenomicPosition[MH_MatchCount] = BridgeExon->Start + strlen(BridgeExon->Prefix);
+                            }
+                            else
+                            {
+                                SRightGenomicPosition[MH_MatchCount] = BridgeExon->End - strlen(BridgeExon->Prefix);
+                            }
+                        }
+                        else
+                        {
+                            if (BridgeExon->Sequence)
+                            {
+                                SLeftPrefixes[MH_MatchCount] = BridgeExon->Sequence[strlen(BridgeExon->Sequence)-1];
+                            }
+                            else
+                            {
+                                SLeftPrefixes[MH_MatchCount] = '-';
+                            }
+                            if (BridgeExon->Start < 0)
+                            {
+                                SLeftGenomicPosition[MH_MatchCount] = -1;
+                            }
+                            else if (Exon->Gene->ForwardFlag)
+                            {
+                                SLeftGenomicPosition[MH_MatchCount] = BridgeExon->End - strlen(BridgeExon->Suffix);
+                            }
+                            else
+                            {
+                                SLeftGenomicPosition[MH_MatchCount] = BridgeExon->Start + strlen(BridgeExon->Suffix);
+                            }
+                        }
+
+                        MH_MatchDecoration[MH_MatchCount] = DecorationMassIndex;
+                        MatchHelperSetExons(Direction);
+                        MH_MatchCount++;
+                        if (MH_MatchCount >= MAX_SIDE_EXTENSIONS)
+                        {
+                            return MH_MatchCount;
+                        }
+                        MandatoryDecorationChange = 1;
+                    }
+                }
+            }
+            MH_MatchExonPos--;
+        } // If the edge has an AA
+        else
+        {
+            if (Edges[EdgeIndex].Power)
+            {
+                MH_BufferSpliced[BufferPosSpliced++] = ':';
+            }
+        }
+
+        // Recurse!  Call MatchFlankingMassSpliceHelper again:
+        MatchFlankingMassSpliceHelper(Spectrum,  Tag, BridgeExon, -1, Direction, MatchMass, ModsRemaining,
+            DecorationMassIndex, FlankingMass, BufferPos, BufferPosSpliced);
+        if (MH_MatchCount >= MAX_SIDE_EXTENSIONS)
+        {
+            return MH_MatchCount;
+        }
+        if (g_SpliceHelperRecursionCount >= MAX_HELPER_RECURSION_COUNT)
+        {
+            return MH_MatchCount;
+        }
+    }  // Iteration over edges
+    return MH_MatchCount;
+}
+
+int MatchFlankingMassSpliced(MSSpectrum* Spectrum, TrieTag* Tag, ExonStruct* Exon, int StartPos, int Direction, 
+    int MatchMass, int ModsRemaining)
+{
+    static int DecorationMassIndex;
+    static int AAMass;
+    //
+    /////////////////////////////////////////////////////////
+    // If prefix mass is zero, that qualifies as a match always.
+    MH_MatchCount = 0;
+    if (MatchMass < GlobalOptions->FlankingMassEpsilon) 
+    {  
+        if (Direction < 0)
+        {
+            SLeftMatchDecoration[0] = PlainOldDecorationIndex;
+            SLeftExon[0] = NULL;
+            SLeftEdge[0] = NULL;
+            SLeftMatchBuffer[0] = '\0';
+            SLeftMatchBufferSpliced[0] = '\0';
+            SetMatchPrefixSuffix(Exon, StartPos, Direction);
+        }
+        else
+        {
+            SRightMatchDecoration[0] = PlainOldDecorationIndex;
+            SRightExon[0] = NULL;
+            SRightEdge[0] = NULL;
+            SRightMatchBuffer[0] = '\0';
+            SRightMatchBufferSpliced[0] = '\0';
+            SetMatchPrefixSuffix(Exon, StartPos, Direction);
+        }
+        return 1;
+    }
+
+    MH_MinMatchMass = MatchMass - GlobalOptions->FlankingMassEpsilon;
+    MH_MaxMatchMass = MatchMass + GlobalOptions->FlankingMassEpsilon;
+    if (Direction < 0)
+    {
+        MH_MatchBuffer = SLeftMatchBuffer;
+        MH_MatchBufferSpliced = SLeftMatchBufferSpliced;
+        MH_MatchDecoration = SLeftMatchDecoration;
+        MH_MatchExonPos = 0;
+        //MH_MatchSplices = SLeftSpliceScore;
+        //MH_MatchSplicePos = 0;
+        MH_MatchEdgePos = 0;
+    }
+    else
+    {
+        MH_MatchBuffer = SRightMatchBuffer;
+        MH_MatchBufferSpliced = SRightMatchBufferSpliced;
+        MH_MatchDecoration = SRightMatchDecoration;
+        MH_MatchExonPos = 0;
+        //MH_MatchSplices = SRightSpliceScore;
+        //MH_MatchSplicePos = 0;
+        MH_MatchEdgePos = 0;
+    }
+
+    DecorationMassIndex = AllDecorationCount - 1;
+    // Skip over any decorations that use up too many pt-mods:
+    while (1)
+    {
+        if (AllDecorations[DecorationMassIndex].TotalMods > ModsRemaining)
+        {
+            DecorationMassIndex--;
+            continue;
+        }
+        break;        
+    }
+
+    MH_MatchExonPos = 0;
+    MH_MatchEdgePos = 0;
+    // Perform tag extension, following edges as needed:
+    g_SpliceHelperRecursionCount = 0;
+    MatchFlankingMassSpliceHelper(Spectrum, Tag, Exon, StartPos, Direction,
+        MatchMass, ModsRemaining, DecorationMassIndex, 0, 0, 0);
+    return MH_MatchCount;
+
+}
+
+// Copies a string to a destination, in reverse character order.
+void ReverseStringCopy(char* Target, char* Source)
+{
+    int Length;
+    char* SourceChar;
+    //
+    Length = strlen(Source);
+    for (SourceChar = Source + Length - 1; SourceChar >= Source; SourceChar--)
+    {
+        *Target = *SourceChar;
+        Target++;
+    }
+
+}
+
+#define MINIMUM_EXON_LENGTH 4
+
+// A tag has been matched.  Its left edge lies at LeftExonPos in LeftExon (or at -1 if its leftmost character
+// comes from an edge).  Its right edge lies at RightExonPos in RightExon (or at -1 if its rightmost character
+// comes from an edge).  Try to extend out to a prefix/suffix mass match.  Analogous to the GetMatches() function
+// in standard trie search.  The difference is that our extension can follow an exon edge.
+void GetSplicedMatches(SearchInfo* Info, TrieNode* Node, ExonStruct* LeftExon, int LeftExonPos, 
+    ExonStruct* RightExon, int RightExonPos)
+{
+    int LeftMatchCount;
+    int RightMatchCount;
+    int LeftMatchIndex;
+    int RightMatchIndex;
+    int ModIndex;
+    int Length;
+    int ModsRemaining;
+    int Pos;
+    Peptide* Match;
+    int VerboseFlag = 0;
+    int ForwardFlag;
+    MSSpectrum* Spectrum;
+    static int PTMLimit[MAX_PT_MODTYPE];
+    TrieTagHanger* TagNode;
+    int ExtensionIndex;
+    int ExtensionCount = 0;
+    int ExtensionFound;
+    int UsedTooMany;
+    int ExIndex;
+    int ExonCount;
+    int SpliceScoreCount;
+    ExonStruct* TempExon;
+    ExonStruct* AllExons[256];
+    ExonEdge* AllEdges[256];
+    char SplicedBases[256];
+    int AllEdgeCount;
+    int AllExonCount;
+    PeptideSpliceNode* SpliceTail;
+    PeptideSpliceNode* SpliceNode;
+    //int GenomicLocation;
+    PeptideSpliceNode* PrevSpliceNode;
+    int EdgeIndex;
+    ExonEdge* TempEdge;
+    int GenomicStart;
+    int GenomicEnd;
+    char* ShortExonCheck;
+    int DistanceFromLastJunction;
+    int InvalidExonFlag;
+    //////////////
+    //printf("GetSplicedMatches() called for tag %s\n", Node->FirstTag->Tag->Tag); 
+    if (!Node->FirstTag)
+    {
+        return;
+    } 
+    ForwardFlag = LeftExon->Gene->ForwardFlag;
+    for (TagNode = Node->FirstTag; TagNode; TagNode = TagNode->Next)
+    {
+        if (VerboseFlag)
+        {
+            printf("Matched tag '%s' (pre %.2f post %.2f).\n  Left exon %d pos %d, right exon %d pos %d\n",
+                TagNode->Tag->Tag, TagNode->Tag->PrefixMass / (float)MASS_SCALE, TagNode->Tag->SuffixMass / (float)MASS_SCALE,
+                LeftExon->Start, LeftExonPos, RightExon->Start, RightExonPos);
+        }
+	/*
+	printf("TagNode: %p\n",TagNode);
+	fflush(stdout);
+	printf("Tag: %p\n",TagNode->Tag);
+	fflush(stdout);
+	printf("Seq: %s\n",TagNode->Tag->Tag);
+	fflush(stdout);
+	printf("PSpectrum: %p\n",TagNode->Tag->PSpectrum);
+	fflush(stdout);
+	*/
+	Spectrum = TagNode->Tag->PSpectrum;
+        Info->Spectrum = Spectrum;
+        memcpy(PTMLimit, g_PTMLimit, sizeof(int) * AllPTModCount);
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (TagNode->Tag->AminoIndex[ModIndex] < 0)
+            {
+                break;
+            }
+            PTMLimit[TagNode->Tag->ModType[ModIndex]->Index] -= 1;
+        }
+        ModsRemaining = GlobalOptions->MaxPTMods - TagNode->Tag->ModsUsed;
+        if (ModsRemaining < 0)
+        {
+            continue;
+        }
+        LeftMatchCount = MatchFlankingMassSpliced(Spectrum, TagNode->Tag, LeftExon, LeftExonPos, -1, TagNode->Tag->PrefixMass, ModsRemaining);
+        if (LeftMatchCount == 0)
+        {
+            continue;
+        }
+        RightMatchCount = MatchFlankingMassSpliced(Spectrum, TagNode->Tag, RightExon, RightExonPos, 1, TagNode->Tag->SuffixMass, ModsRemaining);
+        if (RightMatchCount == 0)
+        {
+            continue;
+        }
+
+        // Consider each combination of left-decoration and right-decoration:
+        for (LeftMatchIndex = 0; LeftMatchIndex < LeftMatchCount; LeftMatchIndex++)
+        {
+            for (RightMatchIndex = 0; RightMatchIndex < RightMatchCount; RightMatchIndex++)
+            {
+                if (VerboseFlag)
+                {
+                    printf("LMI %d RMI %d Count %d\n", LeftMatchIndex, RightMatchIndex, ExtensionCount);
+                }
+                UsedTooMany = 0;
+                for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+                {
+
+                    if (AllDecorations[SLeftMatchDecoration[LeftMatchIndex]].Mods[ModIndex] + 
+                        AllDecorations[SRightMatchDecoration[RightMatchIndex]].Mods[ModIndex] > PTMLimit[ModIndex])
+                    {
+                        UsedTooMany = 1;
+                        break;
+                    }
+                }
+                if (UsedTooMany)
+                {
+                    continue;
+                }
+                if (AllDecorations[SLeftMatchDecoration[LeftMatchIndex]].TotalMods + 
+                    AllDecorations[SRightMatchDecoration[RightMatchIndex]].TotalMods > ModsRemaining)
+                {
+                    continue;
+                }
+                if (GlobalOptions->MandatoryModIndex > -1 && 
+                    !TagNode->Tag->MandatoryModUsed &&
+                    AllDecorations[SLeftMatchDecoration[LeftMatchIndex]].Mods[GlobalOptions->MandatoryModIndex] == 0 &&
+                    AllDecorations[SRightMatchDecoration[RightMatchIndex]].Mods[GlobalOptions->MandatoryModIndex] == 0)
+                {
+                    continue; // We don't have our mandatory PTM (biotin, or whatever)
+                }
+                if (LeftExon->Gene->ForwardFlag)
+                {
+                    GenomicStart = SLeftGenomicPosition[LeftMatchIndex];
+                    GenomicEnd = SRightGenomicPosition[RightMatchIndex];
+                }
+                else
+                {
+                    GenomicStart = SRightGenomicPosition[RightMatchIndex];
+                    GenomicEnd = SLeftGenomicPosition[LeftMatchIndex];
+                }               
+                // Don't produce the same extension multiple times:
+                ExtensionFound = 0;
+                for (ExtensionIndex = 0; ExtensionIndex < ExtensionCount; ExtensionIndex++)
+                {
+                    if (!strcmp(ExtensionBufferLeft + ExtensionIndex*MAX_EXTENSION_LENGTH, SLeftMatchBuffer + LeftMatchIndex*MAX_EXTENSION_LENGTH)
+                    && !strcmp(ExtensionBufferRight + ExtensionIndex*MAX_EXTENSION_LENGTH, SRightMatchBuffer + RightMatchIndex*MAX_EXTENSION_LENGTH)
+                    && ExtensionLeftDecorations[ExtensionIndex] == SLeftMatchDecoration[LeftMatchIndex]
+                    && ExtensionRightDecorations[ExtensionIndex] == SRightMatchDecoration[RightMatchIndex]
+                    && ExtensionSpectra[ExtensionIndex] == TagNode->Tag->PSpectrum)
+                    {
+                        // Gosh, looks like we found the same peptide again (probably by starting with
+                        // another valid tag).  Let's check whether the genomic endpoints are the
+                        // same as well:
+                        if (GenomicStart == ExtensionGenomicStart[ExtensionIndex] && GenomicEnd == ExtensionGenomicEnd[ExtensionIndex])
+                        {
+                            ExtensionFound = 1;
+                            break;
+                        }
+                    }
+                }
+                if (ExtensionFound)
+                {
+                    continue;
+                }
+                ExtensionLeftDecorations[ExtensionCount] = SLeftMatchDecoration[LeftMatchIndex];
+                ExtensionRightDecorations[ExtensionCount] = SRightMatchDecoration[RightMatchIndex];
+                strcpy(ExtensionBufferLeft + ExtensionCount * MAX_EXTENSION_LENGTH, SLeftMatchBuffer + LeftMatchIndex * MAX_EXTENSION_LENGTH);
+                strcpy(ExtensionBufferRight + ExtensionCount * MAX_EXTENSION_LENGTH, SRightMatchBuffer + RightMatchIndex * MAX_EXTENSION_LENGTH);
+                ExtensionSpectra[ExtensionCount] = TagNode->Tag->PSpectrum;
+
+                // MatchedBases is concatenated together from five sources:
+                // prefixAA
+                //    |        TAG
+                //    A BBBBBB CCC DDDDDDD E
+                //      left       right   |
+                //       ext.       ext.   suffix
+                Pos = strlen(SLeftMatchBuffer + MAX_EXTENSION_LENGTH * LeftMatchIndex);
+                MatchedBases[0] = SLeftPrefixes[LeftMatchIndex];
+                ReverseStringCopy(MatchedBases + 1, SLeftMatchBuffer + MAX_EXTENSION_LENGTH*LeftMatchIndex);
+                g_TagBuffer[g_TagBufferPos] = '\0';
+                strcpy(MatchedBases + 1 + Pos, g_TagBuffer);
+                //strcpy(MatchedBases + 1 + Pos, TagNode->Tag->Tag);
+                strcpy(MatchedBases + 1 + Pos + strlen(TagNode->Tag->Tag), SRightMatchBuffer + MAX_EXTENSION_LENGTH*RightMatchIndex);
+                Length = strlen(MatchedBases+1);
+                MatchedBases[strlen(MatchedBases+1)+2] = '\0';
+                MatchedBases[strlen(MatchedBases+1)+1] = SRightSuffixes[RightMatchIndex];
+
+                // Set SplicedBases, and check for unacceptably short exons:
+                Pos = strlen(SLeftMatchBufferSpliced + MAX_SEXTENSION_LENGTH * LeftMatchIndex);
+                ReverseStringCopy(SplicedBases, SLeftMatchBufferSpliced + MAX_SEXTENSION_LENGTH * LeftMatchIndex);
+                g_TagBufferSpliced[g_TagBufferPosSpliced] = '\0';
+                strcpy(SplicedBases + Pos, g_TagBufferSpliced);
+                strcpy(SplicedBases + Pos + strlen(g_TagBufferSpliced), SRightMatchBufferSpliced + MAX_SEXTENSION_LENGTH * RightMatchIndex);
+                DistanceFromLastJunction = 999;
+                InvalidExonFlag = 0;
+                for (ShortExonCheck = SplicedBases; *ShortExonCheck; ShortExonCheck++)
+                {
+                    switch (*ShortExonCheck)
+                    {
+                    case ';':
+                        if (DistanceFromLastJunction < MINIMUM_EXON_LENGTH)
+                        {
+                            InvalidExonFlag = 1;
+                        }
+                        else
+                        {
+                            ShortExonCheck += 2; // We're at the start of ;x;, skip over aa and other ;
+                            DistanceFromLastJunction = 0;
+                        }
+                        break;
+                    case ':':
+                        if (DistanceFromLastJunction < MINIMUM_EXON_LENGTH)
+                        {
+                            InvalidExonFlag = 1;
+                        }
+                        DistanceFromLastJunction = 0;
+                        break;
+                    default:
+                        DistanceFromLastJunction++;
+                        break;
+                    }
+                    if (InvalidExonFlag)
+                    {
+                        break;
+                    }
+                }
+                // Reject, if unacceptably short exons were used:
+                if (InvalidExonFlag)
+                {
+                    continue;
+                }
+
+                ExtensionGenomicStart[ExtensionCount] = GenomicStart;
+                ExtensionGenomicEnd[ExtensionCount] = GenomicEnd;
+                Match = AddNewMatch(Info, -1, TagNode->Tag, 
+                    MatchedBases + 1, Length,
+                    strlen(SLeftMatchBuffer + MAX_EXTENSION_LENGTH * LeftMatchIndex),
+                    SLeftMatchDecoration[LeftMatchIndex], SRightMatchDecoration[RightMatchIndex],
+                    GenomicStart, GenomicEnd);
+
+                if (Match)
+                {
+                    // We might have some splice nodes stored here.  If so, free them:
+                    if (Match->SpliceHead)
+                    {
+                        PrevSpliceNode = NULL;
+                        for (SpliceNode = Match->SpliceHead; SpliceNode; SpliceNode = SpliceNode->Next)
+                        {
+                            SafeFree(PrevSpliceNode);
+                            PrevSpliceNode = SpliceNode;
+                        }
+                        SafeFree(PrevSpliceNode);
+                        Match->SpliceHead = NULL;
+                    }
+                    //Match->GenomicLocation = GenomicLocation;
+
+                    Match->ChromosomeNumber = LeftExon->Gene->ChromosomeNumber;
+                    Match->ChromosomeForwardFlag = LeftExon->Gene->ForwardFlag;
+                    Match->RecordNumber = Info->RecordNumber;
+                    // Copy in the list of exons and the splice scores:
+                    ExonCount = 0;
+                    SpliceScoreCount = 0;
+                    AllExonCount = 0;
+                    AllEdgeCount = 0;
+                    // Read exons from the prefix:
+                    for (ExIndex = 0; ExIndex < MAX_EXTENSION_EXONS; ExIndex++)
+                    {
+                        TempExon = SLeftExon[LeftMatchIndex * MAX_EXTENSION_EXONS + ExIndex];
+                        if (!TempExon)
+                        {
+                            ExIndex--;
+                            break;
+                        }
+                    }
+                    while (ExIndex > -1)
+                    {
+                        AllExons[AllExonCount] = SLeftExon[LeftMatchIndex * MAX_EXTENSION_EXONS + ExIndex];
+                        AllExonCount++;
+                        ExIndex--;
+                    }
+                    // Read edges from the prefix:
+                    for (ExIndex = 0; ExIndex < MAX_EXTENSION_EXONS; ExIndex++)
+                    {
+                        TempEdge = SLeftEdge[LeftMatchIndex * MAX_EXTENSION_EXONS + ExIndex];
+                        if (!TempEdge)
+                        {
+                            ExIndex--;
+                            break;
+                        }
+                    }
+                    while (ExIndex > -1)
+                    {
+                        AllEdges[AllEdgeCount] = GetReciprocalExonEdge(SLeftEdge[LeftMatchIndex * MAX_EXTENSION_EXONS + ExIndex], 0);
+                        AllEdgeCount++;
+                        ExIndex--;
+                    }
+                    // Read exons from the tag:
+                    for (ExIndex = 0; ExIndex < g_TagExonArrayPos; ExIndex++)
+                    {
+                        if (AllExonCount && (AllExons[AllExonCount-1] == g_TagExonArray[ExIndex]))
+                        {
+                            continue;
+                        }
+                        AllExons[AllExonCount] = g_TagExonArray[ExIndex];
+                        AllExonCount++;
+                    }
+                    // Read edges from the tag:
+                    for (ExIndex = 0; ExIndex < g_TagSpliceArrayPos; ExIndex++)
+                    {
+                        AllEdges[AllEdgeCount] = g_TagSpliceArray[ExIndex];
+                        AllEdgeCount++;
+                    }
+                    // Read exons from the suffix:
+                    for (ExIndex = 0; ExIndex < MAX_EXTENSION_EXONS; ExIndex++)
+                    {
+                        TempExon = SRightExon[RightMatchIndex * MAX_EXTENSION_EXONS + ExIndex];
+                        if (TempExon)
+                        {
+                            if (AllExonCount && (AllExons[AllExonCount-1] == TempExon))
+                            {
+                                continue;
+                            }
+                            AllExons[AllExonCount] = TempExon;
+                            AllExonCount++;
+                        }
+                        else
+                        {
+                            break; // After the first null exon comes undefined rubbish data
+                        }
+                    }
+                    // Read edges from the suffix:
+                    for (ExIndex = 0; ExIndex < MAX_EXTENSION_EXONS; ExIndex++)
+                    {
+                        TempEdge = SRightEdge[RightMatchIndex * MAX_EXTENSION_EXONS + ExIndex];
+                        if (TempEdge)
+                        {
+                            AllEdges[AllEdgeCount] = TempEdge;
+                            AllEdgeCount++;
+                        }
+                        else
+                        {
+                            break; // After the first null exon comes undefined rubbish data
+                        }
+                    }
+                    // Store the sequence, with splice boundaries indicated:
+                    SafeFree(Match->SplicedBases);
+                    Match->SplicedBases = (char*)calloc(sizeof(char), 256);
+                    strncpy(Match->SplicedBases, SplicedBases, 256);
+                    
+                    // We know the exons, now we'll store all the splicing info for the match:
+                    SpliceTail = NULL;
+                    for (EdgeIndex = 0; EdgeIndex < AllEdgeCount; EdgeIndex++)
+                    {
+                        if (AllEdges[EdgeIndex]->Power)
+                        {
+                            SpliceNode = (PeptideSpliceNode*)calloc(sizeof(PeptideSpliceNode), 1);
+                            if (ForwardFlag)
+                            {
+                                SpliceNode->DonorPos = AllEdges[EdgeIndex]->Source->End;
+                                SpliceNode->AcceptorPos = AllEdges[EdgeIndex]->Exon->Start;
+                            }
+                            else
+                            {
+                                SpliceNode->DonorPos = AllEdges[EdgeIndex]->Source->Start;
+                                SpliceNode->AcceptorPos = AllEdges[EdgeIndex]->Exon->End;
+                            }
+                            SpliceNode->ChromosomeNumber = LeftExon->Gene->ChromosomeNumber;
+                            if (SpliceTail)
+                            {
+                                SpliceTail->Next = SpliceNode;
+                            }
+                            else
+                            {
+                                Match->SpliceHead = SpliceNode;
+                            }
+                            SpliceTail = SpliceNode;
+                        }
+                    }
+
+                    //// %%% SANITY CHECK SPLICING %%%
+                    //if (Match->SpliceHead && (!strstr(Match->SplicedBases, ";") && !strstr(Match->SplicedBases, ":")))
+                    //{
+                    //    printf("Warning: Match found with no true splicing, but splice junction stored!\n");
+                    //    printf("%s %s\n", Match->Bases, Match->SplicedBases);
+                    //    printf("SpliceNode: %d-%d\n", Match->SpliceHead->DonorPos, Match->SpliceHead->AcceptorPos);
+                    //    DebugPrintGene(LeftExon->Gene);
+                    //}
+                    //if (!Match->SpliceHead && (strstr(Match->SplicedBases, ";") || strstr(Match->SplicedBases, ":")))
+                    //{
+                    //    printf("Warning: Match found with true splicing, but splice junction not stored!\n");
+                    //    printf("%s %s\n", Match->Bases, Match->SplicedBases);
+                    //    //printf("SpliceNode: %d-%d\n", Match->SpliceHead->DonorPos, Match->SpliceHead->AcceptorPos);
+                    //    DebugPrintGene(LeftExon->Gene);
+                    //}
+                } // if match
+                ExtensionCount = min(511, ExtensionCount + 1);
+            } // RightMatchIndex
+        } // LeftMatchIndex
+    } // Tag loop
+    return;
+}
+
+// Integrity checking of a gene.  (For debugging use only)
+void CheckGene(GeneStruct* Gene)
+{
+    int ExonIndex;
+    int EdgeIndex;
+    ExonStruct* Exon;
+    ExonStruct* Exon2;
+    //
+    if (!Gene)
+    {
+        return;
+    }
+    for (ExonIndex = 0; ExonIndex < Gene->ExonCount; ExonIndex++)
+    {
+        Exon = Gene->Exons + ExonIndex;
+        if (Exon->Start < 0 || Exon->End < 0 || Exon->Start >= Exon->End)
+        {
+            printf("*ERROR\n");
+        }
+        for (EdgeIndex = 0; EdgeIndex < Exon->BackEdgeCount; EdgeIndex++)
+        {
+            Exon2 = Exon->BackwardEdges[EdgeIndex].Exon;
+            if (!Exon2)
+            {
+                printf("*ERROR!\n");
+            }
+        }
+        for (EdgeIndex = 0; EdgeIndex < Exon->ForwardEdgeCount; EdgeIndex++)
+        {
+            Exon2 = Exon->ForwardEdges[EdgeIndex].Exon;
+            if (!Exon2)
+            {
+                printf("*ERROR!\n");
+            }
+        }        
+    }
+}
+
+// Given an exon, search it for tag matches.  If AnchoredFlag is true, then we've already
+// matched part of the tag (and Root isn't the root of the entire trie)
+void GetSplicedTagMatches(SearchInfo* Info, ExonStruct* LeftExon, int LeftExonPos, ExonStruct* Exon, 
+    TrieNode* Root, int AnchoredFlag)
+{
+    int AnchorMax;
+    int AnchorPos;
+    int OldExonPos;
+    int OldSplicePos;
+    char AA;
+    ExonStruct* BridgedExon;
+    TrieNode* CurrentNode;
+    TrieNode* SubNode;
+    int EdgeIndex;
+    int SequenceLength;
+    int SequencePos;
+    int OldTagBufferPos;
+    int OldTagBufferPos2;
+    int OldTagBufferPosSpliced;
+    int OldTagBufferPosSpliced2;
+
+    int Index = 0;
+    TrieTagHanger * TempTag = NULL;
+
+    //printf("New Cal!!!\n");
+    //fflush(stdout);
+
+    //
+    OldExonPos = g_TagExonArrayPos;
+    OldSplicePos = g_TagSpliceArrayPos;
+    OldTagBufferPos = g_TagBufferPos;
+    OldTagBufferPosSpliced = g_TagBufferPosSpliced;
+    SequenceLength = Exon->Length;
+    if (AnchoredFlag)
+    {
+        AnchorMax = min(1, SequenceLength); // it's possible that sequencelength is 0!
+    }
+    else
+    {
+        AnchorMax = SequenceLength;
+    }
+    //
+    // printf("Root: %p\n",Root);
+    //for(Index = 0; Index < TRIE_CHILD_COUNT; ++Index)
+    //{
+    //	printf(" Child[%c] = %p\n",Index + 'A',Root->Children[Index]);
+    //}
+    //getchar();
+
+    // fflush(stdout);
+    //printf("AnchoredFlag %d sequencelen %d anchormax %d\n", AnchoredFlag, SequenceLength, AnchorMax);
+    //printf("Exon %d: %s\n",Exon->Index,Exon->Sequence);
+    for (AnchorPos = 0; AnchorPos < AnchorMax; AnchorPos++)
+    {
+      //printf("Seq char: %c\n",Exon->Sequence[AnchorPos]);
+      //fflush(stdout);
+      if(Exon->Sequence[AnchorPos]- 'A' >= 0 && Exon->Sequence[AnchorPos] - 'A' < TRIE_CHILD_COUNT)
+        CurrentNode = Root->Children[Exon->Sequence[AnchorPos] - 'A'];
+      else
+	{
+	  CurrentNode = Root->Children['X' - 'A'];
+	
+	  printf("Searching Gene: %s Exon: %d/%d\n",Exon->Gene->Name, Exon->Index, Exon->Gene->ExonCount);
+	  printf("Root: %p Transition: **%c**\n",Root,Exon->Sequence[AnchorPos]);
+	  printf("ExonLength: %d\n",SequenceLength);
+	  printf("Sequence: %s\n",Exon->Sequence);
+	  printf("AnchorPos: %d\n",AnchorPos);
+	  fflush(stdout);
+	}
+      if (!CurrentNode)
+        {
+            continue;
+        }
+	//printf("Current Node is not NULL!\n");
+
+        SequencePos = AnchorPos;
+        g_TagBufferPos = OldTagBufferPos;
+        g_TagBufferPosSpliced = OldTagBufferPosSpliced;
+        g_TagBuffer[g_TagBufferPos++] = Exon->Sequence[AnchorPos];
+        g_TagBufferSpliced[g_TagBufferPosSpliced++] = Exon->Sequence[AnchorPos];
+
+        // If we're performing a tagless search, then our tag may have length 1, 
+        // so we could get matches right now:
+	    
+        if (CurrentNode->FirstTag)
+	  {
+	  if (AnchoredFlag)
+            {
+                GetSplicedMatches(Info, CurrentNode, LeftExon, LeftExonPos, Exon, AnchorPos);
+            }
+            else
+            {
+                GetSplicedMatches(Info, CurrentNode, Exon, AnchorPos, Exon, AnchorPos);
+            }
+        }
+	
+        while (1)
+        {
+            SequencePos++;
+            //printf("Exon %d anchor %d sequence pos %d\n", Exon->Index, AnchorPos, SequencePos);
+            //fflush(stdout);
+	    if (SequencePos >= SequenceLength)
+            {
+	      //printf("Following an edge forward...\n");
+              //fflush(stdout);
+	      // Try to follow any edges forward
+                OldTagBufferPos2 = g_TagBufferPos;
+                OldTagBufferPosSpliced2 = g_TagBufferPosSpliced;
+                for (EdgeIndex = 0; EdgeIndex < Exon->ForwardEdgeCount; EdgeIndex++)
+                {
+                    g_TagBufferPos = OldTagBufferPos2;
+                    g_TagBufferPosSpliced = OldTagBufferPosSpliced2;
+                    AA = Exon->ForwardEdges[EdgeIndex].AA;
+                    if (AA)
+                    {
+                        SubNode = CurrentNode->Children[AA-'A'];
+                        g_TagBuffer[g_TagBufferPos++] = AA;
+                        if (Exon->ForwardEdges[EdgeIndex].Power)
+                        {
+                            g_TagBufferSpliced[g_TagBufferPosSpliced++] = ';';
+                            g_TagBufferSpliced[g_TagBufferPosSpliced++] = AA;
+                            g_TagBufferSpliced[g_TagBufferPosSpliced++] = ';';
+                        }
+                        else
+                        {
+                            g_TagBufferSpliced[g_TagBufferPosSpliced++] = AA;
+                        }
+                    }
+                    else
+                    {
+                        SubNode = CurrentNode;
+                        if (Exon->ForwardEdges[EdgeIndex].Power > 0)
+                        {
+                            g_TagBufferSpliced[g_TagBufferPosSpliced++] = ':';
+                        }
+
+                    }
+                    if (!SubNode)
+                    {
+                        continue;
+                    }
+                    BridgedExon = Exon->ForwardEdges[EdgeIndex].Exon;
+                    if (AA)
+                    {
+                        g_TagExonArray[g_TagExonArrayPos++] = BridgedExon;
+                        g_TagSpliceArray[g_TagSpliceArrayPos++] = Exon->ForwardEdges + EdgeIndex;  //Exon->ForwardEdgePower[EdgeIndex];
+                    }
+                    if (SubNode->FirstTag && AA)
+                    {
+                        if (AnchoredFlag)
+                        {
+                            GetSplicedMatches(Info, SubNode, LeftExon, LeftExonPos, BridgedExon, -1);
+                        }
+                        else
+                        {
+                            GetSplicedMatches(Info, SubNode, Exon, AnchorPos, BridgedExon, -1);
+                        }
+                    }
+                    if (!AA)
+                    {
+                        g_TagExonArray[g_TagExonArrayPos++] = BridgedExon;
+                        g_TagSpliceArray[g_TagSpliceArrayPos++] = Exon->ForwardEdges + EdgeIndex; //Exon->ForwardEdgePower[EdgeIndex];
+                    }
+
+                    // We've now spanned an edge with our tag. 
+                    if (AnchoredFlag)
+                    {
+                        GetSplicedTagMatches(Info, LeftExon, LeftExonPos, BridgedExon, SubNode, 1);
+                    }
+                    else
+                    {
+                        GetSplicedTagMatches(Info, Exon, AnchorPos, BridgedExon, SubNode, 1);
+                    }
+                    g_TagExonArrayPos = OldExonPos;
+                    g_TagSpliceArrayPos = OldSplicePos;
+                }
+                break;
+            } // following an edge forward
+            else
+            {
+	      //printf("OldCurrNode: %p\n",CurrentNode);
+                CurrentNode = CurrentNode->Children[Exon->Sequence[SequencePos] - 'A'];
+		//printf("CurrentNode updated on %c!!!\n",Exon->Sequence[SequencePos]);
+		//printf("NewCurrNode: %p\n",CurrentNode);
+		//fflush(stdout);
+                g_TagBuffer[g_TagBufferPos++] = Exon->Sequence[SequencePos];
+                g_TagBufferSpliced[g_TagBufferPosSpliced++] = Exon->Sequence[SequencePos];
+                if (!CurrentNode)
+                {
+                    break;
+                }
+                if (CurrentNode->FirstTag)
+                {
+                    if (AnchoredFlag)
+                    {
+                        GetSplicedMatches(Info, CurrentNode, LeftExon, LeftExonPos, Exon, SequencePos);
+                    }
+                    else
+                    {
+                        GetSplicedMatches(Info, CurrentNode, Exon, AnchorPos, Exon, SequencePos);
+                    }
+                }
+            }
+        } // sequencepos iteration
+    } // anchorpos
+}
+
+// Given an edge record for one exon, get the corresponding edge struct for the linked exon.
+// If ForwardFlag is 1, then the edge passed is a forward edge (and the reciprocal edge is a backward edge)
+// If ForwardFlag is 0, then the edge passed is a backward edge (and the reciprocal edge is a forward edge)
+ExonEdge* GetReciprocalExonEdge(ExonEdge* Edge, int ForwardFlag)
+{
+    int EdgeIndex;
+    ExonEdge* OtherEdge;
+    if (ForwardFlag)
+    {
+        for (EdgeIndex = 0; EdgeIndex < Edge->Exon->BackEdgeCount; EdgeIndex++)
+        {
+            OtherEdge = Edge->Exon->BackwardEdges + EdgeIndex;
+            if (OtherEdge->Exon == Edge->Source && OtherEdge->AA == Edge->AA)
+            {
+                return Edge->Exon->BackwardEdges + EdgeIndex;
+            }
+        }
+    }
+    else
+    {
+        for (EdgeIndex = 0; EdgeIndex < Edge->Exon->ForwardEdgeCount; EdgeIndex++)
+        {
+            OtherEdge = Edge->Exon->ForwardEdges + EdgeIndex;
+            if (OtherEdge->Exon == Edge->Source && OtherEdge->AA == Edge->AA)
+            {
+                return Edge->Exon->ForwardEdges + EdgeIndex;
+            }
+        }
+    }
+    INSPECT_ASSERT(0);
+    return NULL;
+}
+
+void SearchSplicableGene(SearchInfo* Info, GeneStruct* Gene)
+{
+    TrieNode* CurrentNode;
+    int ExonIndex;
+    int SequencePos;
+    int SequenceLength;
+    int EdgeIndex;
+    int VerboseFlag = 0;
+    ExonStruct* Exon;
+    ExonStruct* ActiveExon;
+    int TotalSequenceLength = 0;
+    int TotalExonCount = 0;
+    ExonEdge* Edge;
+    int Index;
+
+
+    // CheckGene(Gene);
+    //if (VerboseFlag)
+    //{
+    //printf("Gene %d: '%s' (%d exons)\n", Info->RecordNumber, Gene->Name, Gene->ExonCount);
+	//}
+    for (ExonIndex = 0; ExonIndex < Gene->ExonCount; ExonIndex++)
+    {
+        SequencePos = 0;
+        Exon = Gene->Exons + ExonIndex;
+        ActiveExon = Exon;
+        if (VerboseFlag)
+        {
+            printf("Search exon %d: '%s'\n", ExonIndex, Exon->Sequence);
+        }
+
+        SequenceLength = Exon->Length;
+        TotalExonCount++;
+        TotalSequenceLength += SequenceLength;
+	
+        ////////////////////////////////////////////////////////////
+        // Try starting with each edge INTO the exon.  XXX-T-AGXX
+        for (EdgeIndex = 0; EdgeIndex < Exon->BackEdgeCount; EdgeIndex++)
+        {
+            Edge = Exon->BackwardEdges + EdgeIndex;
+            if (!Edge->AA)
+            {
+                continue;
+            }
+            if (Edge->Source->Start == 31887068 && Edge->AA == 'D' && Edge->Source->Index == 463)
+            {
+                Edge = Edge;
+            }
+            g_TagExonArray[0] = Edge->Exon;
+            g_TagExonArray[1] = Exon;
+            g_TagExonArrayPos = 2;
+            g_TagSpliceArray[0] = GetReciprocalExonEdge(Edge, 0);
+            g_TagSpliceArrayPos = 1;
+            g_TagBuffer[0] = Edge->AA;
+            g_TagBufferPos = 1;
+            if (Edge->Power)
+            {
+                g_TagBufferSpliced[0] = ';';
+                g_TagBufferSpliced[1] = Edge->AA;
+                g_TagBufferSpliced[2] = ';';
+                g_TagBufferPosSpliced = 3;
+            }
+            else
+            {
+                g_TagBufferSpliced[0] = Edge->AA;
+                g_TagBufferPosSpliced = 1;
+            }
+            CurrentNode = Info->Root->Children[Edge->AA - 'A'];
+            if (CurrentNode)
+            {
+                GetSplicedTagMatches(Info, Edge->Exon, -1, Exon, CurrentNode, 1);
+                // Special for tagless search:
+                if (CurrentNode->FirstTag)
+                {
+                    GetSplicedMatches(Info, CurrentNode, Edge->Exon, -1, Exon, -1);
+                }
+            }
+        }
+        g_TagExonArray[0] = Exon;
+        g_TagExonArrayPos = 1;
+        g_TagSpliceArrayPos = 0;
+        g_TagBufferPos = 0;
+        g_TagBufferPosSpliced = 0;
+        GetSplicedTagMatches(Info, Exon, 0, Exon, Info->Root, 0);
+    } // loop over exons
+}
+
+// Main method for Spliced.c: Given a collection of tags (Root) and a binary splicedb (FileName), search
+// for matches to the current list of spectra (list head GlobalOptions->FirstSpectrum, but we get to them
+// via back-links from tags).  Score matches with Scorer.  If GeneNames is not null, then it's an array
+// of gene names to be searched, and we skip any gene whose name isn't on the list.
+void SearchSplicableGenes(SearchInfo* Info)
+{
+    FILE* File;
+    GeneStruct* Gene;
+    int VerboseFlag = 0;
+    int RecordNumber = -1;
+    int TotalSequenceLength = 0;
+    int TotalExonCount = 0;
+    int Index;
+
+    //
+    AllocSpliceStructures();
+
+    File = Info->DB->DBFile;
+    if (!File)
+    {
+        printf("** Erorr: Unable to open gene database '%s'.  No search performed.\n", Info->DB->FileName);
+        return;
+    }
+    fseek(File, 0, 0);
+    //printf("Gene: %s\n",Gene->Name);
+    printf("SEARCH SPLICEABLE GENES...\n");
+    printf("Root: %p\n",Info->Root);
+    for(Index = 0; Index < TRIE_CHILD_COUNT; ++Index)
+      {
+	printf(" Child[%c] = %p\n",Index + 'A',Info->Root->Children[Index+'A']);
+      }
+    getchar();
+    
+    fflush(stdout);
+
+    
+    while (1)
+    {
+        RecordNumber++;
+        //StartTime = clock();
+        Gene = LoadGene(File);
+        if (!Gene)
+        {
+            break;
+        }
+        SearchSplicableGene(Info, Gene);
+
+        FreeGene(Gene);
+
+    } // while genes
+    printf("Searched %d genes, %d exons, %d bases\n", RecordNumber, TotalExonCount, TotalSequenceLength);
+}
+
+// For debugging: Exercise the splice functionality.
+void TestSplicing()
+{
+    if (!SLeftMatchBuffer)
+    {
+        AllocSpliceStructures();
+    }
+    LoadGenes("C:\\source\\Inspect\\Inspect\\SpliceDB\\Ch1.dat");
+    DebugPrintGenes();
+}
+
+
+// inspect test splicedb <DBPath> <Start> <End> [<DesiredProtein>]
+// Print out all genes which overlap the region of interest.  Useful for asking
+// why a particular known protein isn't present in its entirety.  If a protein
+// sequence is provided as well, we align that sequence against the exon graph,
+// determining how much of it is present in database.
+void TestSpliceDB(int argc, char** argv)
+{
+    int StartPos;
+    int EndPos;
+    FILE* DBFile;
+    GeneStruct* Gene;
+    int GeneStart;
+    int GeneEnd;
+    int ExonIndex;
+    char* DesiredProtein = NULL;
+    int DesiredProteinLength;
+    FILE* ProteinFile;
+    int BytesRead;
+    //
+    if (argc < 4)
+    {
+        printf("** Not enough args - bailing out\n");
+        return;
+    }
+    DBFile = fopen(argv[3], "rb");
+    if (!DBFile)
+    {
+        printf("** Error: Can't open splicedb at '%s'\n", argv[3]);
+        return;
+    }
+    StartPos = 0;
+    EndPos = -1;
+    if (argc > 4)
+    {
+        StartPos = atoi(argv[4]);
+    }
+    if (argc > 5)
+    {
+        EndPos = atoi(argv[5]);
+    }
+    if (argc > 6)
+    {
+        // Read protein sequence:
+        ProteinFile = fopen(argv[6], "rb");
+        if (!ProteinFile)
+        {
+            printf("** Error: Can't read target protein sequence from '%s'\n", argv[6]);
+            return;
+        }
+        fseek(ProteinFile, 0, 2);
+        DesiredProteinLength = ftell(ProteinFile);
+        DesiredProtein = (char*)calloc(DesiredProteinLength, sizeof(char));
+        fseek(ProteinFile, 0, 0);
+        BytesRead = ReadBinary(DesiredProtein, sizeof(char), DesiredProteinLength, ProteinFile);
+        DesiredProtein[BytesRead] = '\0';
+        fclose(ProteinFile);
+    }
+    ////////////////////////
+    while (1)
+    {
+        Gene = LoadGene(DBFile);
+        if (!Gene)
+        { 
+            break;
+        }
+        // Decide whether to print the gene:
+        GeneStart = -1;
+        GeneEnd = -1;
+        for (ExonIndex = 0; ExonIndex < Gene->ExonCount; ExonIndex++)
+        {
+            if (GeneStart < 0 || GeneStart > Gene->Exons[ExonIndex].Start)
+            {
+                GeneStart = Gene->Exons[ExonIndex].Start;
+            }
+            if (GeneEnd < 0 || GeneEnd < Gene->Exons[ExonIndex].End)
+            {
+                GeneEnd = Gene->Exons[ExonIndex].End;
+            }
+        }
+        if (GeneEnd >= StartPos && (EndPos < 0 || GeneStart < EndPos))
+        {
+            //DebugPrintGene(Gene);
+            if (DesiredProtein)
+            {
+                AlignSequenceAgainstExonGraph(Gene, DesiredProtein, NULL, -10, -1);
+            }
+            else
+            {
+                DebugPrintGene(Gene);
+            }
+        }
+        FreeGene(Gene);
+    }
+
+}
diff --git a/Spliced.h b/Spliced.h
new file mode 100644
index 0000000..5f311c9
--- /dev/null
+++ b/Spliced.h
@@ -0,0 +1,120 @@
+//Title:          Spliced.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef SPLICED_H
+#define SPLICED_H
+
+#include <stdio.h>
+#include "Utils.h"
+#include "Trie.h"
+
+#define GENE_NAME_LENGTH 256
+// Maximum length (in AA) of an extension:
+#define MAX_EXTENSION_LENGTH 64
+// max length (in chars) of extension with splice chars included
+#define MAX_SEXTENSION_LENGTH 192
+// No gene can have this many exons or more:
+#define MAX_GENE_EXONS 50000 
+
+#define MAX_EXTENSION_EXONS 128
+
+// Splice-aware database search code.
+// Here is our basic approach:
+// - Construct an exon-only nucleotide database, stored as a graph.  Each node is an exon (in some reading frame).  Edges
+// may contain one additional amino acid (to ''glue'' the extra nucleotides at the edges of the exons).  This construction
+// is performed offline by the script SplicePrepare.py
+// - Using a trie of tags (built in Tagger.c, just as they are for ordinary search), search the graph.  
+// Tags (and their extensions) may follow edges between nodes.
+
+typedef struct ExonEdge
+{
+    char AA; // can be NULL
+    int Power; // if zero, this is an adjacent-edge and not a splice junction
+    struct ExonStruct* Source; // the source exon (DONOR, if this is a forward edge)
+    struct ExonStruct* Exon; // the target exon (ACCEPTOR, if this is a forward edge)
+
+    // We construct a linked list of exon edges when parsing from an XML file.  Then, when
+    // the gene is complete, we convert the linked list to an array.  The linked list
+    // uses the Next member; the finished array does not.
+    struct ExonEdge* Next;
+} ExonEdge;
+
+typedef struct ExonStruct
+{
+    int Start;
+    int End;
+    int Index;
+    char* Sequence;
+    int Length; // length of our sequence, in amino acids. 
+    char Prefix[3];
+    char Suffix[3];
+    int BackEdgeCount;
+    int ForwardEdgeCount;
+    ExonEdge* ForwardEdges;
+    ExonEdge* BackwardEdges;
+    ExonEdge* BackEdgeHead; // used during XML parse only
+    ExonEdge* BackEdgeTail; // used during XML parse only
+    int Occurrences;
+    struct GeneStruct* Gene;
+} ExonStruct;
+
+// GeneStructs can be stored in a doubly-linked list.
+typedef struct GeneStruct
+{
+    char Name[GENE_NAME_LENGTH + 1];
+    char SprotName[GENE_NAME_LENGTH + 1];
+    int ChromosomeNumber;
+    int ForwardFlag;
+    int ExonCount; // Size of the Exons arrays
+    struct ExonStruct* Exons;
+    struct GeneStruct* Next; 
+    struct GeneStruct* Prev;
+} GeneStruct;
+
+void TestSplicing(); // internal testing junk!
+
+// Main method for Spliced.c: Given a collection of tags (Root) and a binary splicedb (FileName), search
+// for matches to the current list of spectra (list head GlobalOptions->FirstSpectrum, but we get to them
+// via back-links from tags).  Score matches with Scorer.  If GeneNames is not null, then it's an array
+// of gene names to be searched, and we skip any gene whose name isn't on the list.
+void SearchSplicableGenes(SearchInfo* Info);
+//void SearchSplicableGenes(TrieNode* Root, char* FileName, char** GeneNames, ScoringFunction Scorer,
+//    int DBNumber);
+GeneStruct* LoadGene(FILE* File);
+void FreeGene(GeneStruct* Gene);
+void DebugPrintGene(GeneStruct* Gene);
+//void SearchSplicableGene(TrieNode* Root, GeneStruct* Gene, ScoringFunction Scorer, int DBNumber);
+void SearchSplicableGene(SearchInfo* Info, GeneStruct* Gene);
+void AllocSpliceStructures();
+void SetExonForwardEdges(GeneStruct* Gene);
+
+#endif // SPLICED_H
diff --git a/StripPTM.py b/StripPTM.py
new file mode 100644
index 0000000..56dea0b
--- /dev/null
+++ b/StripPTM.py
@@ -0,0 +1,117 @@
+#Title:          StripPTM.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+A handy function to strip unnecessary modifications from a peptide.
+"""
+from Utils import *
+Initialize()
+
+AMINO_ACIDS = "ACDEFGHIKLMNOPQRSTUVWY" # O and U are included, for now.
+INVALID_MASS = 99999
+
+def StripNeedlessModifications(DB, Annotation):
+    """
+    Replace "*" in annotations with "-".
+    Also, correct "unnecessary PTM" annotations.
+    Return (DBPos, FixedAnnotation) as a tuple.
+    We fix the following:
+    Y.A+|Y|BCD.Z   -> YABCD
+    XY.A+|XY|BCD.Z -> XYABCD
+    JYZ.A+|XYZ|BCD.Z -> JYZABCD
+    X.ABCD+|Y|.Y   -> ABCDY
+    X.ABCD+|YZ|.YZ -> ABCDYZ
+    X.ABCD+|YZJ|.YZH -> ABCDYZJ
+    """
+    VerboseFlag = 1
+    Peptide = GetPeptideFromModdedName(Annotation)
+    # Find where this peptide occurs within the database:
+    Aminos = Peptide.Aminos
+    if Peptide.Prefix in AMINO_ACIDS:
+        Aminos = Peptide.Prefix + Aminos
+    if Peptide.Suffix in AMINO_ACIDS:
+        Aminos += Peptide.Suffix
+    DBPos = DB.find(Aminos)
+    if Peptide.Prefix in AMINO_ACIDS:
+        DBPos += 1
+    if not Peptide.Modifications.keys():
+        # An unmodified peptide?  We don't deal with those!
+        return (DBPos, Annotation.replace("*", "-"))
+    # Check whether a simple endpoint-shift can abolish all
+    # modifications:
+    ModIndex = Peptide.Modifications.keys()[0]
+    ModMass = Peptide.Modifications[ModIndex][0].Mass
+    # Try a shift to the LEFT:
+    if ModIndex < 3:
+        FlankMass = 0
+        NewAA = ""
+        for ShiftCharCount in (1, 2, 3):
+            if DBPos - ShiftCharCount < 0:
+                break
+            AA = DB[DBPos - ShiftCharCount]
+            FlankMass += Global.AminoMass.get(AA, INVALID_MASS)
+            NewAA = AA + NewAA # prepend to the new chars
+            if abs(FlankMass - ModMass) <= 2:
+                # The mass matches!  Let's shift the annotation.
+                if (DBPos - ShiftCharCount > 0):
+                    Prefix = DB[DBPos - ShiftCharCount - 1]
+                else:
+                    Prefix = "-"
+                FixedAnnotation = "%s.%s%s.%s"%(Prefix, NewAA, Peptide.Aminos, Peptide.Suffix)
+                if VerboseFlag:
+                    print "-%d The fix is in: %s to %s"%(ShiftCharCount, Annotation, FixedAnnotation)
+                return (DBPos - ShiftCharCount, FixedAnnotation.replace("*", "-"))
+    # Try a shift to the RIGHT:
+    if ModIndex >= len(Peptide.Aminos) - 3:
+        NewAA = ""
+        FlankMass = 0
+        OldEndpoint = DBPos + len(Peptide.Aminos) - 1
+        for ShiftCharCount in (1, 2, 3):
+            if OldEndpoint + ShiftCharCount >= len(DB):
+                print "Off the end of the DB with %s shifted by %d"%(Annotation, ShiftCharCount)
+                continue
+            AA = DB[OldEndpoint + ShiftCharCount]
+            FlankMass += Global.AminoMass.get(AA, INVALID_MASS)
+            NewAA += AA # append the new amino acid
+            if abs(FlankMass - ModMass) <= 2:
+                # The mass matches!  Let's shift the annotation.
+                if (OldEndpoint + ShiftCharCount + 1)<len(DB):
+                    Suffix = DB[OldEndpoint + ShiftCharCount + 1]
+                else:
+                    Suffix = "-"
+                FixedAnnotation = "%s.%s%s.%s"%(Peptide.Prefix, Peptide.Aminos, NewAA, Suffix)
+                if VerboseFlag:
+                    print "+%d The fix is in: %s to %s"%(ShiftCharCount, Annotation, FixedAnnotation)
+                return (DBPos, FixedAnnotation.replace("*", "-"))
+    # We can't edit away the PTM.  Just fix any asterisks:
+    return (DBPos, Annotation.replace("*", "-"))
diff --git a/Summary.py b/Summary.py
new file mode 100644
index 0000000..ff7fa50
--- /dev/null
+++ b/Summary.py
@@ -0,0 +1,471 @@
+#Title:          Summary.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+The new Summary script, modified for lower memory usage.  This version
+assumes that the top annotation is the correct one, and does not consider
+re-assigning a spectrum to a close runner-up or to a homologous protein.
+
+- Iterate over annotations.  Filter any that aren't a top-scoring annotation with
+good p-value.
+- Remember the best modified and the best unmodified annotation for each database interval
+- Generate the protein report: one sub-report for each protein, sorted by coverage
+- Sub-report includes a table of covered residues, followed by a list of peptide rows.
+- Each peptide row gives a spectrum count and score for the best spectrum
+- Peptide rows are sorted by protein position, then mass
+"""
+import os
+import time
+import sys
+import struct
+import traceback
+import shutil
+import getopt
+import Label
+import MakeImage
+import MSSpectrum
+import SelectProteins
+import ResultsParser
+from Utils import *
+Initialize()
+
+UsageInfo = """Summary.py - Parse search results, and generate either a webpage
+  summarizing the results, or a filtered database for unrestrictive PTM search.
+
+Required options:
+ -r [FileName] - The name of the results file to parse.  If a directory is
+    specified, then all .txt files within the directory will be combined into
+    one report
+ -d [FileName] - The name of the database file (.trie format) searched.
+         (allows more than one database file; use multiple -d options)
+ 
+Additional options:
+ -b [FileName] - Second-pass database filename.  If specified, the proteins
+    selected will be written out to a database (.fasta, .trie and .index files)
+    suitable for unrestrictive search.
+ -w [FileName] - Webpage directory.  If specified, a webpage will be written
+    to the specified directory, summarizing the proteins identified, and the
+    degree of coverage.
+
+ (Note: Either -b or -w, or both, must be provided)
+    
+ -p [Value] - Cutoff p-value.  Annotations with inferior p-value are ignored.
+    Defaults to 0.05.
+ -e [Count] - Minimum number of peptides that a protein must annotate in order
+    to add it to the report or the filtered database.  Defaults to 1.
+ -m [Count] - Minimum number of spectra that a protein must annotate in order
+    to add it to the report or the filtered database.  By default, this count
+    is set to (SpectrumCount / ProteinsInDatabase) * 2.  If the protein
+    database has already been filtered, set this parameter to 1.
+ -v [Count] - Verbose spectrum output count.  If set, report [Count] spectra
+    for each distinct peptide identified.  This option is slower and
+    consumes more memory, but can be more informative.
+ -i [SpectraPath] - For use if verbose spectrum output (-v) is enabled.
+    Images will be generated for each annotation, if the Python Imaging
+    Library (PIL) is installed.  This option generates many files on disk,
+    so it's recommended that you set the summary file (-w option) in its own
+    directory.  "SpectraPath" is the path to the folder with MS2 spectra
+    
+    
+Examples:
+    Summary.py -r Frac1Ouput.txt -d Database%ssprot.trie -p 0.1
+    Summary.py -r Frac1Ouput.txt -d Database%ssprot.trie -w F1Summary\index.html -v
+"""%(os.sep, os.sep)
+
+class SummarizerClass(ResultsParser.ResultsParser):
+    def __init__(self):
+        # SharedProteinSpectra[ProteinA][ProteinB] = number of spectra for
+        # which the annotation is found in both protein A and protein B.  When
+        # we accept one of the two proteins, the other one loses some annotations!
+        self.SharedProteinSpectra = {}
+        # SharedProteinPeptides is similar to SharedProteinSpectra, but tracks
+        # distinct peptide records
+        self.SharedProteinPeptides = {} 
+        self.AnnotationCounts = {} # ProteinID -> count
+        # BestRepresentatives[(DBStart, DBEnd)] is a list of peptide
+        # instances for the best spectra for that positon.
+        self.BestRepresentatives = {}
+        self.BestModRepresentatives = {}
+        self.ResultsFileName = None
+        self.DatabasePath = []
+        self.SecondPassDatabasePath = None
+        self.MinimumProteinHits = None
+        self.SummaryPagePath = None
+        self.MZXMLPath = None
+        self.PValueCutoff = 0.05
+        # Keys are peptides (after I->L substitution), values are lists of protein
+        # record numbers.  We keep this dictionary so that we needn't repeatedly map
+        # the same protein to the database.
+        self.PeptideDict = {}
+        self.VerboseProteinReportCount = 0
+        self.GenerateSpectrumImagesFlag = 0
+        # Very short peptides are uninformative...skip them.
+        self.MinimumPeptideLength = 7
+        self.SpectrumCount = 0
+        self.IntervalHitCount = {}
+        self.MinimumPeptides = 1
+        ResultsParser.ResultsParser.__init__(self)
+    def GetSpectrumPath(self, Path, SpectraPath):
+        """
+        This requires a bit of trickery, because sometimes the results are
+        generated on a unix machine (creating a unix path), and this script
+        is run on a windows machine (can't split a unix path). So I have going
+        to hack things out on my own.
+        """
+        FileName = None
+        if Path.find("/") >= 0:
+            "results files made on a unix machine"
+            LastSlash = Path.rfind("/")
+            FileName = Path[LastSlash+1:]
+        else:
+            "results files made on a windows machine, hopefully.  Any other users, go home"
+            LastBackSlash = Path.rfind("\\")
+            FileName = Path[LastBackSlash+1:]
+        if not FileName:
+            print "unable to create a path to the spectrum file %s"%Path
+            return Path
+        return os.path.join(SpectraPath,FileName)
+    def WriteSecondPassDatabase(self):
+        """
+        Write out the "present" proteins to our second-pass database.
+        self.ProteinSelector is responsible for deciding which peptides
+        belong to which proteins.
+        """
+        Bits = os.path.split(self.SecondPassDatabasePath)
+        DBPathStub = os.path.splitext(self.SecondPassDatabasePath)[0]
+        if len(Bits[0]) == 0:
+            DBPath = os.path.join("Database", "%s.trie"%DBPathStub)
+            IndexPath = os.path.join("Database", "%s.index"%DBPathStub)
+            FastaPath = os.path.join("Database", "%s.fasta"%DBPathStub)
+        else:
+            DBPath = DBPathStub + ".trie"
+            IndexPath = DBPathStub + ".index"
+            FastaPath = DBPathStub + ".fasta"
+        print "Writing second-pass database to %s..."%DBPath
+        DBFile = open(DBPath, "wb")
+        IndexFile = open(IndexPath, "wb")
+        FASTAFile = open(FastaPath, "wb")
+        DBFilePos = 0
+        for (ProteinID, ScoreTuple) in self.ProteinSelector.SelectedProteins.items():
+            (PeptideCount, SpectrumCount) = ScoreTuple
+            if SpectrumCount < self.MinimumProteinHits:
+                continue
+            if PeptideCount < self.MinimumPeptides:
+                continue
+            # Let's write out the protein.  Write to the INDEX file, the
+            # TRIE file, and a FASTA file.  (The FASTA file is just for
+            # humans to read)
+            ProteinName = self.ProteinSelector.ProteinNames[ProteinID]
+            ProteinSequence = self.ProteinSelector.ProteinSequences[ProteinID]
+            Str = struct.pack("<qi80s", 0, DBFilePos, ProteinName[:80])
+            IndexFile.write(Str)
+            DBFile.write("%s*"%ProteinSequence)
+            DBFilePos += len(ProteinSequence) + 1
+            Pos = 0
+            FASTAFile.write(">%s\n"%ProteinName)
+            while Pos < len(ProteinSequence):
+                Chunk = ProteinSequence[Pos:Pos+70]
+                FASTAFile.write(Chunk)
+                FASTAFile.write("\n")
+                Pos += 70
+        IndexFile.close()
+        FASTAFile.close()
+        DBFile.close()
+    def GetProteinHREF(self, ProteinID, ProteinName):
+        # By default, just print the name with no hyperlinking.
+        # Subclass can override to hyperlink to IPI, swiss-prot, etc
+        return ProteinName
+    def WriteSummaryPage(self):
+        """
+        Produce the protein report.  The index file contains the protein coverage
+        information.  If verbose output is requested, also contains one row per peptide.
+        """
+        Dir = os.path.split(self.SummaryPagePath)[0]
+        try:
+            os.makedirs(Dir)
+        except:
+            pass
+        # Populate PeptidesForProtein.  Keys are protein IDs.  Values are lists
+        # of peptide annotations; the annotations are, in turn, keys for
+        # self.ProteinSelector.BestRepresentatives
+        self.PeptidesForProtein = {}
+        for (Annotation, RepList) in self.ProteinSelector.BestRepresentatives.items():
+            if len(RepList) < 1:
+                continue
+            Peptide = RepList[0][1]
+            ProteinID = self.ProteinSelector.PeptideProteins.get(Peptide.Aminos, None)
+            if ProteinID == None:
+                continue
+            PeptideList = self.PeptidesForProtein.get(ProteinID, [])
+            PeptideList.append(Annotation)
+            self.PeptidesForProtein[ProteinID] = PeptideList
+        # Sort proteins from "best" to "worst".  For now, just sort by the
+        # number of distinct peptides.
+        SortedProteins = []
+        for (ProteinID, AnnotationList) in self.PeptidesForProtein.items():
+            AnnotationCount = len(AnnotationList)
+            if AnnotationCount >= self.MinimumPeptides:
+                SortedProteins.append((AnnotationCount, ProteinID))
+        SortedProteins.sort()
+        SortedProteins.reverse()
+        # Start the index file:
+        self.SummaryPageDir = os.path.split(self.SummaryPagePath)[0]
+        # Ensure the directory exists:
+        try:
+            os.makedirs(self.SummaryPageDir)
+        except:
+            pass
+        self.IndexFile = open(self.SummaryPagePath, "w")
+        self.IndexFile.write("<html><title>Protein Report</title>\n")
+        # Iterate over proteins, writing one record for each one:
+        for (PeptideCount, ProteinID) in SortedProteins:
+            ProteinName = self.ProteinSelector.ProteinNames[ProteinID]
+            if ProteinName[:3]== "XXX":
+                #print "Found a fake protein %s"%ProteinName
+                continue
+            #print "Write protein %s (%s), %s peptides"%(ProteinID, self.ProteinSelector.ProteinNames[ProteinID], PeptideCount)
+            self.WriteProteinSummary(ProteinID)
+        self.IndexFile.close()
+    def WriteProteinSummary(self, ProteinID):
+        """
+        Write summary page section for a single protein.
+        """
+        ##########################################################
+        # Determine coverage, and sort peptides by position within the protein:
+        ProteinName = self.ProteinSelector.ProteinNames[ProteinID]
+        ProteinSequence = self.ProteinSelector.ProteinSequences[ProteinID]
+        Coverage = [0] * len(ProteinSequence)
+        PeptideCount = len(self.PeptidesForProtein.get(ProteinID, []))
+        for Annotation in self.PeptidesForProtein.get(ProteinID, []):
+            Peptide = self.ProteinSelector.BestRepresentatives[Annotation][0][1]
+            MatchPos = ProteinSequence.find(Peptide.Aminos)
+            if MatchPos == -1:
+                print "** Error: Peptide '%s' assigned to incompatible protein %s '%s'"%(Peptide.Aminos, ProteinID, ProteinName)
+            for Pos in range(MatchPos, MatchPos + len(Peptide.Aminos)):
+                Coverage[Pos] += 1
+        CoverFlags = 0
+        for CoverageCount in Coverage:
+            if CoverageCount:
+                CoverFlags += 1
+        CoverageRate = CoverFlags / float(len(ProteinSequence))
+        # Write header:
+        SpectrumCount = self.ProteinSelector.ProteinSpectrumCounts[ProteinID]
+        HREF = self.GetProteinHREF(ProteinID, ProteinName)
+        self.IndexFile.write("<h3>%s</h3>\n<b>%s peptides, %s spectra, %.1f%% coverage</b><br>\n"%(HREF, PeptideCount, SpectrumCount, CoverageRate*100))
+        # Write protein sequence:
+        ColorUncovered = "#aaaaaa"
+        ColorCovered = "#000000"
+        OldColor = ColorUncovered
+        OldBoldFlag = 0
+        BoldFlag = 0
+        self.IndexFile.write("<tt>")
+        for Pos in range(len(ProteinSequence)):
+            ResidueNumber = Pos + 1
+            if ResidueNumber%50 == 1:
+                if BoldFlag:
+                    self.IndexFile.write("</b>")
+                    BoldFlag = 0                  
+                self.IndexFile.write("</font><br>\n<font color=#000000>")
+                OldColor = ColorCovered
+                OldBoldFlag = 0
+                for Foo in range(4 - len(str(ResidueNumber))):
+                    self.IndexFile.write(" ")
+                self.IndexFile.write("%d "%ResidueNumber)
+            if ResidueNumber % 10 == 1:
+                self.IndexFile.write(" ")
+            if Coverage[Pos]:
+                Color = ColorCovered
+            else:
+                Color = ColorUncovered
+                BoldFlag = 0
+            if Color != OldColor:
+                self.IndexFile.write("</font><font color=%s>"%Color)
+                OldColor = Color
+            if BoldFlag != OldBoldFlag:
+                if BoldFlag:
+                    self.IndexFile.write("<b>")
+                else:
+                    self.IndexFile.write("</b>")
+                OldBoldFlag = BoldFlag
+            self.IndexFile.write("%s"%ProteinSequence[Pos])
+        self.IndexFile.write("<br><br></font><font color=#000000></tt>\n\n")
+        ###############################################
+        # Write individual peptides, if requested:
+        if self.VerboseProteinReportCount:
+            # Write out peptides:
+            self.WritePeptideHeader(ProteinID, self.IndexFile)
+            SortedAnnotations = []
+            for Annotation in self.PeptidesForProtein.get(ProteinID, []):
+                RepresentativeList = self.ProteinSelector.BestRepresentatives[Annotation]
+                Peptide = RepresentativeList[0][1]
+                Pos = ProteinSequence.find(Peptide.Aminos)
+                SortedAnnotations.append((Pos, Pos + len(Peptide.Aminos) - 1, Annotation))
+            SortedAnnotations.sort()
+            for (StartPos, EndPos, Annotation) in SortedAnnotations:
+                IntervalString = "%s-%s"%(StartPos + 1, EndPos + 1)
+                TotalHitCount = self.ProteinSelector.AnnotationSpectrumCounts[Annotation]
+                RepresentativeList = self.ProteinSelector.BestRepresentatives[Annotation]
+                RepresentativeList.reverse() # they're sorted from worst-to-best; fix that.
+                for Index in range(len(RepresentativeList)):
+                    Peptide = RepresentativeList[Index][-1]
+                    self.WritePeptideLine(self.IndexFile, IntervalString, ProteinID, Index, Peptide, TotalHitCount)
+            self.WritePeptideFooter(self.IndexFile)
+        self.IndexFile.write("<hr>")
+    def WritePeptideFooter(self, IndexFile):
+        IndexFile.write("</table>\n")
+    def WritePeptideHeader(self, ProteinID, IndexFile):
+        IndexFile.write("<table><tr><td><b>Residues</b></td><td><b>Total Spectra</b></td><td><b>Peptide</b></td><td><b>p-value</b></td><td><b>MQScore</b></td><td><b>File</b></td><td><b>Scan</b></td></tr>")
+    def WritePeptideLine(self, File, IntervalStr, ProteinID, SpectrumIndex, Peptide, TotalHitCount):
+        Dir = os.path.split(self.SummaryPagePath)[0]
+        Annotation = Peptide.GetModdedName()
+        SpecFileName = Peptide.SpectrumFilePath.replace("/","\\").split("\\")[-1]
+        if self.GenerateSpectrumImagesFlag:
+            ImageFileName = "%s.%s.png"%(Annotation, SpectrumIndex)
+            ImageFilePath = os.path.join(Dir, ImageFileName)
+            Maker = MakeImage.MSImageMaker()
+            MSpectrum = MSSpectrum.SpectrumClass()
+            Path = self.GetSpectrumPath(Peptide.SpectrumFilePath, self.MZXMLPath)
+            FileName = "%s:%s"%(Path, Peptide.SpectrumFilePos)
+            try:
+                #SpectrumFile = Label.OpenAndSeekFile(FileName)
+                #print FileName
+                #MSpectrum.ReadPeaksFromFile(SpectrumFile, FileName)
+                #MSpectrum.RankPeaksByIntensity()
+                #SpectrumFile.close()
+                #Label.LabelSpectrum(MSpectrum, Peptide)
+                #Maker.ConvertSpectrumToImage(MSpectrum, ImageFilePath, Peptide, Width = 500, Height = 380)
+                Args = " -r %s -b %d -a %s -w %s -p"%(Path, int(Peptide.SpectrumFilePos), Annotation, ImageFilePath)
+                ArgsList = Args.split()
+                #print "Parsing Results for %s, scan %s"%(FileName, Scan)
+                Dymo = Label.LabelClass()
+                Dymo.ParseCommandLine(ArgsList)
+                #Dymo.LoadModel(0, Dymo.PeptideHasPhosphorylation)
+                Dymo.Main()
+                
+            except:
+                traceback.print_exc()
+            File.write("<tr><td>%s</td><td>%s</td><td><a href=\"%s\">%s</td><td>%s</td><td>%s</td>"%(IntervalStr, TotalHitCount, ImageFileName, Peptide.GetFullModdedName(), Peptide.PValue, Peptide.MQScore))
+            File.write("<td>%s</td><td>%s</td></tr>\n"%(SpecFileName, Peptide.ScanNumber))
+        else:
+            File.write("<tr><td>%s</td><td>%s</td><td>%s</td><td>%s</td><td>%s</td>"%(IntervalStr, TotalHitCount, Peptide.GetFullModdedName(), Peptide.PValue, Peptide.MQScore))
+            File.write("<td>%s</td><td>%s</td></tr>\n"%(SpecFileName, Peptide.ScanNumber))
+    def SetMinimumProteinHits(self):
+        ProteinCount = len(self.ProteinNames)
+        self.MinimumProteinHits = (2 * self.SpectrumCount) / ProteinCount
+        self.MinimumProteinHits = max(self.MinimumProteinHits, 2)
+        print "%s spectra with a valid annotation, %s proteins in all"%(self.SpectrumCount, ProteinCount)
+        print "Minimum hits required to accept an additional protein: ", self.MinimumProteinHits
+    def Main(self):
+        self.ProteinSelector = SelectProteins.ProteinSelector()
+        print self.PValueCutoff
+        self.ProteinSelector.PValueCutoff = self.PValueCutoff
+        self.ProteinSelector.LoadMultipleDB(self.DatabasePath)
+        # If we're expected to write out a summary page, then keep track
+        # of the top N representatives for each annotation:
+        if self.SummaryPagePath:
+            if self.VerboseProteinReportCount:
+                self.ProteinSelector.RetainRepresentativeCount = self.VerboseProteinReportCount
+            else:
+                self.ProteinSelector.RetainRepresentativeCount = 1
+        self.ProcessResultsFiles(self.ResultsFileName, self.ProteinSelector.ParseAnnotations)
+        self.ProteinSelector.ChooseProteins()
+        if self.SecondPassDatabasePath:
+            self.WriteSecondPassDatabase()
+            print "Second-pass database written to:", self.SecondPassDatabasePath
+        if self.SummaryPagePath:
+            self.WriteSummaryPage()
+            print "Summary page written to:", self.SummaryPagePath
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "r:d:b:p:w:m:v:i:e:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-r":
+                # -r results file(s)
+                if not os.path.exists(Value):
+                    print "** Error: couldn't find results file '%s'\n\n"%Value
+                    print UsageInfo
+                    sys.exit(1)
+                self.ResultsFileName = Value
+            elif Option == "-d":
+                # -d database
+                if not os.path.exists(Value):
+                    print "** Error: couldn't find database file '%s'\n\n"%Value
+                    print UsageInfo
+                    sys.exit(1)
+                self.DatabasePath.append(Value)
+            elif Option == "-b":
+                # -b Second-pass database
+                self.SecondPassDatabasePath = Value
+            elif Option == "-i":
+                self.GenerateSpectrumImagesFlag = 1
+                self.MZXMLPath = Value
+            elif Option == "-m":
+                # -m Minimum number of spectra for a new protein
+                self.MinimumProteinHits = int(Value)
+            elif Option == "-e":
+                # -e Minimum number of peptides for a new protein
+                self.MinimumPeptides = int(Value)
+            elif Option == "-w":
+                # -w Summary page filename
+                self.SummaryPagePath = Value
+            elif Option == "-p":
+                # -p p-value cutoff
+                self.PValueCutoff = float(Value)
+                print self.PValueCutoff
+            elif Option == "-v":
+                # -v Verbose output flag
+                self.VerboseProteinReportCount = int(Value)
+        # Error out, if we didn't see required options:
+        if not OptionsSeen.has_key("-d") or not OptionsSeen.has_key("-r"):
+            print "** Please specify database (-d) and results file (-r)"
+            print UsageInfo
+            sys.exit(1)
+        # If neither -b nor -w was specified, assume they want a summary:
+        if not OptionsSeen.has_key("-b") and not OptionsSeen.has_key("-w"):
+            self.SummaryPagePath = os.path.join("ProteinSummary", "index.html")
+            print "** Summary page will be written to '%s'; use -w to override this"%Summarizer.SummaryPagePath
+        print "Summary page path:", self.SummaryPagePath
+        
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "(psyco not found - running in non-optimized mode)"
+    Summarizer = SummarizerClass()
+    Summarizer.ParseCommandLine(sys.argv[1:])
+    StartTime = time.clock()
+    Summarizer.Main()
+    EndTime = time.clock()
+    print "ELAPSED:", EndTime - StartTime
diff --git a/SystemTest.py b/SystemTest.py
new file mode 100644
index 0000000..d49c97a
--- /dev/null
+++ b/SystemTest.py
@@ -0,0 +1,251 @@
+#Title:          SystemTest.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+SystemTest.py is the master test script for the Inspect toolkit.
+It should be run after the inspect executable has been installed to
+the current directory (and built, if necessary).
+
+Run with no command-line arguments to perform a full test.
+"""
+import os
+import sys
+import traceback
+try:
+    from Utils import *
+    Initialize()
+except:
+    print "** Error: Unable to load Utils!"
+if hasattr(os, "sysconf"):
+    IS_WINDOWS = 0
+else:
+    IS_WINDOWS = 1
+
+if IS_WINDOWS:
+    INSPECT_EXECUTABLE = "inspect"
+else:
+    INSPECT_EXECUTABLE = "./inspect"
+
+SystemTestDir = "SystemTest"
+
+class InspectRunner:
+    def __init__(self):
+        self.ErrorCount = 0
+        self.TestsRun = 0
+        self.TempOutputName = "SystemTestTemp.txt"
+    def RunTestSearch(self, InputFileName, DesiredPeptide):
+        "Run inspect, and verify that the desired peptide is the top match."
+        Command = "%s -i %s -o %s"%(INSPECT_EXECUTABLE, InputFileName, self.TempOutputName)
+        print Command
+        self.TestsRun += 1
+        try:
+            # Remove old output before running test:
+            if os.path.exists(self.TempOutputName):
+                os.remove(self.TempOutputName)
+            # Run inspect:
+            
+            os.system(Command)
+        except:
+            traceback.print_exc()
+            self.ErrorCount += 1
+            return
+        self.VerifyTestSearchResults(InputFileName, self.TempOutputName, DesiredPeptide)
+    def VerifyTestSearchResults(self, InputFileName, OutputFileName, DesiredPeptide):
+        if not os.path.exists(OutputFileName):
+            print "** Error: No test output written for input '%s' to %s"%(InputFileName, OutputFileName)
+            self.ErrorCount += 1
+            return    
+        File = open(OutputFileName, "rb")
+        GoodHitPosition = None
+        HitIndex = 0
+        TopHit = None
+        for FileLine in File.xreadlines():
+            Bits = FileLine.split("\t")
+            try:
+                Score = float(Bits[5])
+            except:
+                continue # header line
+            if not TopHit:
+                TopHit = Bits[2][2:-2]
+            HitIndex += 1
+            if Bits[2][2:-2] == DesiredPeptide:
+                GoodHitPosition = HitIndex
+                break
+        if GoodHitPosition == 1:
+            print "Test '%s' passed - top hit was '%s'"%(InputFileName, DesiredPeptide)
+            return
+        self.ErrorCount += 1
+        print "** Error for test '%s':\n  Top hit was '%s'\n  Desired hit '%s' was seen at position %s"%(InputFileName, TopHit, DesiredPeptide, GoodHitPosition)
+    def Summarize(self):
+        print 
+        print "-=- "*18
+        print "System test summary: Ran %s tests, encountered %s errors."%(self.TestsRun, self.ErrorCount)
+    def TestParentMassCorrection(self):
+        # TestSpectra.pkl:
+        # K.VLVLDTDYK.K, K.CLMEGAGDVAFVK.H, R.TPEVDDEALEK.F
+        InputFileName = os.path.join(SystemTestDir, "TestPMC.txt")
+        Command = "%s -i %s -o %s"%(INSPECT_EXECUTABLE, InputFileName, self.TempOutputName)
+        self.TestsRun += 1
+        print Command
+        os.system(Command)
+        if not os.path.exists(self.TempOutputName):
+            print "Error: TestPMC produced no output!"
+            self.ErrorCount += 1
+            return
+        File = open(self.TempOutputName, "rb")
+        Bits = File.readline().split("\t")
+        File.close()
+        if len(Bits) < 5:
+            print "* Error: TestPMC produced invalid output!"
+            self.ErrorCount += 1
+            return
+        Mass = float(Bits[3])
+        Charge = int(Bits[4])
+        DesiredCharge = 2
+        DesiredMass = 1065.6
+        if Charge != DesiredCharge or abs(Mass - DesiredMass) > 1.0:
+            print "* Error: TestPMC produced invalid charge+mass (%s, %s), should be (%s, %s)"%(Charge, Mass, DesiredCharge, DesiredMass)
+        else:
+            print "TestPMC successful: Parent mass %s within tolerance"%Mass
+        print "Parent mass correction complete."
+            
+    def TestTagging(self, InputFileName, Annotation, TagLength = None):
+        """
+        Run inspect in tag-generation mode.  Verify that one or more of the
+        tags are correct for the target peptide (Annotation, a string).
+        """
+        DesiredPeptide = GetPeptideFromModdedName(Annotation)
+        Command = "%s -i %s -o %s"%(INSPECT_EXECUTABLE, InputFileName, self.TempOutputName)
+        self.TestsRun += 1
+        try:
+            # Remove old output before running test:
+            if os.path.exists(self.TempOutputName):
+                os.remove(self.TempOutputName)
+            # Run inspect:
+            print Command
+            os.system(Command)
+        except:
+            traceback.print_exc()
+            self.ErrorCount += 1
+            return
+        if not os.path.exists(self.TempOutputName):
+            print "** Error: No test output written for input '%s' to %s"%(InputFileName, self.TempOutputName)
+            self.ErrorCount += 1
+            return
+        ValidTagCount = 0
+        TagCount = 0
+        File = open(self.TempOutputName, "rb")
+        for FileLine in File.xreadlines():
+            Bits = FileLine.split("\t")
+            if FileLine[0] == "#" or len(Bits) < 7:
+                continue
+            #print Bits
+            TagAminos = Bits[5]
+            if TagLength != None and len(TagAminos) != TagLength:
+                print "* Error in test '%s': Tag has length %s != %s"%(InputFileName, len(TagAminos), TagLength)
+                self.ErrorCount += 1
+            Tag = PeptideClass(Bits[5])
+            Tag.PrefixMass = float(Bits[4])
+            Tag.SuffixMass = float(Bits[6])
+            if DesiredPeptide.IsValidTag(Tag):
+                ValidTagCount += 1
+            TagCount += 1
+        if not ValidTagCount:
+            print "* Test '%s' failed: No valid tags among %s attempts, for peptide %s"%(InputFileName, TagCount, DesiredPeptide.GetModdedName())
+            self.ErrorCount += 1
+        else:
+            print "Tag test successful - found %s valid tags"%ValidTagCount
+    def TestMS2DBConstruction(self):
+        InputFileName = os.path.join(SystemTestDir, "BuildSimpleChromosome.txt")
+        TempDBPath = "Temp.ms2db"
+        Command = "%s -i %s -o %s"%(INSPECT_EXECUTABLE, InputFileName, TempDBPath)
+        try:
+            print Command
+            os.system(Command)
+        except:
+            print Command
+            traceback.print_exc()
+            self.ErrorCount += 1
+        try:
+            File = open(TempDBPath)
+        except:
+            print "** MS2DB test failed: No db constructed"
+            self.ErrorCount += 1
+            return
+        MS2DB = File.read()
+        File.close()
+        #Pos = MS2DB.find("RERERERA")
+        Pos = MS2DB.find("RERE")
+        if Pos == -1:
+            print "** MS2DB test failed: Expected peptide '%s' not present"%"RERE"
+            self.ErrorCount += 1
+        # Now that the database has been constructed, let's search it:
+        # TempInputFileName = "TempMS2DB.in"
+        # TempScriptFile = open(TempInputFileName, "wb")
+        # TempScriptFile.write("db,%s\n"%os.path.abspath(TempDBPath))
+        # TempScriptFile.write("spectra,SystemTest/TestSpectrum.dta\n")
+        # TempScriptFile.write("protease,None\n")
+        # TempScriptFile.write("mod,+57,C,fix\n")
+        # TempScriptFile.close()
+        # Command = "%s -i %s -o %s"%(INSPECT_EXECUTABLE, TempInputFileName, self.TempOutputName)
+        # self.TestsRun += 1
+        #try:
+            # Remove old output before running test:
+            # if os.path.exists(self.TempOutputName):
+             #   os.remove(self.TempOutputName)
+            # Run inspect:
+            #print Command
+            #os.system(Command)
+        #except:
+         #   traceback.print_exc()
+         #   self.ErrorCount += 1
+         #   return
+        #self.VerifyTestSearchResults(TempInputFileName, self.TempOutputName, "VKEAMAPK")
+        #try:
+        #    os.remove(TempInputFileName)
+        #except:
+        #    pass
+        #print "MS2DB search complete"
+    def RunTests(self):
+        self.TestMS2DBConstruction()
+        self.TestTagging(os.path.join(SystemTestDir, "TestInputTag1.txt"), "VKEAMAPK", TagLength = 1)
+        self.TestTagging(os.path.join(SystemTestDir, "TestInputTag3.txt"), "VKEAMAPK", TagLength = 3)
+        self.RunTestSearch(os.path.join(SystemTestDir, "TestInput.txt"), "VKEAMGuserPK")
+        self.RunTestSearch(os.path.join(SystemTestDir, "TestInputMod.txt"), "VKEAMG+14PK")
+        self.RunTestSearch(os.path.join(SystemTestDir, "TestMS2.txt"), "AAEAATTDLTYR")
+        self.RunTestSearch(os.path.join(SystemTestDir, "TestCDTA.txt"), "EIQIAEATVPK");
+        self.TestParentMassCorrection()
+        self.Summarize()
+    
+if __name__ == "__main__":
+    Runner = InspectRunner()
+    Runner.RunTests()
diff --git a/SystemTest/BuildSimpleChromosome.txt b/SystemTest/BuildSimpleChromosome.txt
new file mode 100644
index 0000000..540ec59
--- /dev/null
+++ b/SystemTest/BuildSimpleChromosome.txt
@@ -0,0 +1,3 @@
+ReadGFF,SystemTest/SimpleGenes.gff
+GenomeFile,SystemTest/SimpleChromosome.trie
+ChromosomeName,simple
diff --git a/SystemTest/Shew_Short.fasta b/SystemTest/Shew_Short.fasta
new file mode 100644
index 0000000..a47f3c5
--- /dev/null
+++ b/SystemTest/Shew_Short.fasta
@@ -0,0 +1,20 @@
+>SO_0548 DNA-binding protein, HU family
+MNKTELIAKIAENADITKAQATRALKSFEAAITESMKNGDKISIVGFGSFETTTRAARTG
+RNPQTGKEIQIAEATVPKFKAGKTLRDSVN
+>SO_3146 DNA-binding protein, H-NS family
+MSEFLEILTHGRRFKAAVKDLSVEELRDLAAKLDKILVERESMEAEELQAIAARNAKIEE
+IRQQMEAVGLSIDDLGGVAVKASSKKRAPRPAKYQIEVDGEVIQWTGQGRMPTVFKNEVN
+KGRSMDDFLI
+>SO_1126 chaperone protein, DnaK
+MGKIIGIDLGTTNSCVAVLDGGKARVLENAEGDRTTPSIIAYTDDETIVGQPAKRQAVTN
+PNNTFFAIKRLIGRRFKDDEVQRDVNIMPFKIIAADNGDAWVESRGNKMAPPQVSAEILK
+KMKKTAEDFLGEEVTEAVITVPAYFNDSQRQATKDAGRIAGLEVKRIINEPTAAALAYGI
+DKKQGDNIVAVYDLGGGTFDISIIEIDSNDGDQTFEVLATNGDTHLGGEDFDNRLINYLA
+DEFKKEQGLDLRKDPLAMQRLKEAAEKAKIELSSTNQTEVNLPYITADATGPKHLVVKIT
+RAKLESLVEDLIIRTLEPLKVALADADLSVSDINEVILVGGQTRMPKVQEAVTNFFGKEP
+RKDVNPDEAVAVGAAIQAGVLSGDVKDVLLLDVTPLSLGIETMGSVMTKLIEKNTTIPTK
+AQQVFSTADDNQSAVTIHVLQGERKQASANKSLGQFNLDGIEPAPRGMPQIEVMFDIDAD
+GILHVSATDKKTGKKQNITIKASSGLSEEEVAQMVRDAEAHAEEDKKFEELVQSRNQADG
+LVHATKKQVEEAGDALPSEDKAKIEAAMSAVETAVKGNDKEAIEKATQALIEASAKLMEI
+AQAKAQTQGGAQEGAAKQSNATADDVVDAEFEEVKDDKK
+
diff --git a/SystemTest/Shew_dta.txt b/SystemTest/Shew_dta.txt
new file mode 100644
index 0000000..d333905
--- /dev/null
+++ b/SystemTest/Shew_dta.txt
@@ -0,0 +1,1451 @@
+=================================== "QC_Shew_07_02-pt3_31Mar07_OWL_07-03-07.3186.3186.1.dta" ==================================
+1319.69421 1   scan=3186 cs=1
+379.036 4.3
+381.198 8.2
+382.280 4.6
+383.081 4.6
+384.461 18.4
+385.142 5.0
+385.989 3.5
+387.036 25.9
+389.413 3.0
+394.441 4.8
+397.267 18.2
+398.925 4.1
+400.437 5.0
+403.206 14.6
+404.342 12.7
+407.127 26.3
+408.789 17.0
+409.516 24.5
+410.246 9.5
+411.799 24.6
+412.507 39.6
+413.335 47.0
+418.426 64.2
+419.234 17.9
+419.877 11.0
+421.143 11.9
+423.140 9.1
+424.066 8.7
+425.275 8.6
+426.279 37.6
+427.542 14.0
+429.523 9.0
+430.389 16.7
+435.210 28.8
+437.190 25.6
+439.275 7.7
+440.161 3.0
+442.548 13.6
+443.429 7.0
+445.412 5.2
+450.835 74.9
+452.336 10.6
+453.324 20.4
+454.037 48.8
+456.057 39.7
+457.631 31.3
+458.542 3.8
+460.223 3.0
+462.976 17.8
+464.093 12.5
+465.239 11.1
+466.437 3.5
+467.071 9.9
+468.181 47.9
+469.084 11.9
+470.165 36.4
+471.671 45.1
+472.475 19.7
+473.296 5.4
+474.256 14.7
+478.120 3.3
+479.761 14.6
+480.520 41.8
+481.188 2.1
+482.360 22.0
+483.471 37.7
+484.328 36.0
+485.116 5.8
+486.098 14.5
+487.285 10.4
+490.121 10.1
+493.558 13.7
+494.674 17.1
+495.418 11.1
+496.228 57.9
+497.427 2.2
+498.064 6.7
+500.060 10.2
+501.391 20.8
+503.038 5.1
+504.394 9.1
+507.419 15.6
+508.318 3.2
+510.254 24.5
+511.358 62.9
+512.397 21.6
+513.267 35.2
+514.259 16.5
+516.292 40.5
+518.346 14.2
+520.083 20.5
+521.388 17.7
+522.031 11.0
+523.228 61.1
+525.239 128.6
+526.396 19.9
+528.065 3.9
+529.502 32.6
+530.351 12.9
+531.355 51.6
+532.423 6.1
+533.399 10.4
+536.156 3.7
+537.109 25.8
+538.273 45.8
+539.218 129.9
+540.331 34.9
+541.180 54.1
+542.175 8.1
+543.263 48.6
+544.065 4.3
+545.348 22.1
+546.042 4.0
+547.433 210.6
+548.230 13.3
+551.507 22.1
+553.232 117.1
+554.234 17.3
+555.243 33.3
+556.489 32.3
+557.416 20.6
+558.486 6.5
+559.345 39.5
+560.324 3.4
+561.285 37.0
+563.346 6.3
+565.057 11.6
+566.089 28.3
+567.389 37.8
+568.179 20.3
+569.226 17.8
+570.043 12.3
+571.193 21.1
+573.616 4.1
+577.264 4.6
+578.458 8.8
+579.367 25.6
+582.212 11.7
+582.870 17.4
+585.097 52.0
+586.175 25.3
+587.215 27.9
+589.419 34.8
+592.514 31.7
+593.269 8.5
+594.515 18.4
+596.365 17.9
+597.399 54.4
+598.213 50.7
+599.302 30.6
+601.149 18.9
+602.589 6.5
+603.358 32.6
+604.145 18.2
+605.882 14.4
+606.622 9.7
+607.726 8.5
+608.536 28.4
+609.303 31.0
+610.516 6.3
+611.315 18.8
+612.249 7.5
+613.302 5.4
+614.446 27.4
+615.239 6.7
+616.151 12.7
+616.790 3.4
+617.482 2.4
+619.127 5.1
+620.914 28.2
+622.178 15.0
+623.426 15.3
+624.206 102.6
+625.494 18.7
+626.392 49.5
+627.441 17.7
+628.529 55.5
+629.192 12.3
+630.154 14.6
+631.157 18.5
+632.405 18.5
+635.468 5.2
+636.203 33.7
+637.183 7.4
+638.471 52.6
+639.307 25.5
+640.354 110.2
+642.279 126.2
+643.490 20.9
+644.323 34.2
+646.478 100.4
+647.640 15.8
+650.268 6.3
+651.290 22.8
+652.184 9.5
+653.243 9.4
+654.237 188.5
+655.164 52.0
+656.363 119.7
+657.380 3.6
+658.331 16.3
+659.246 29.6
+660.373 89.4
+661.274 27.1
+664.606 20.2
+665.326 36.1
+666.117 23.8
+667.660 3.9
+668.662 15.9
+669.628 17.1
+670.284 45.0
+672.315 87.5
+673.889 34.3
+674.522 32.5
+675.242 16.3
+677.354 18.2
+678.393 17.9
+679.441 59.8
+681.037 39.6
+681.642 37.0
+682.359 43.1
+684.154 81.4
+685.299 10.0
+686.357 44.1
+687.039 9.3
+690.548 15.1
+691.449 5.3
+694.459 6.9
+695.476 57.2
+696.453 48.7
+697.286 28.9
+698.294 55.6
+699.268 27.0
+700.183 7.4
+702.315 52.4
+704.496 22.2
+705.149 3.3
+706.418 25.8
+707.075 23.5
+708.232 10.4
+709.055 20.5
+709.938 20.1
+711.108 40.0
+712.412 8.4
+713.165 29.2
+713.844 32.4
+714.991 51.8
+716.203 14.3
+717.399 48.2
+718.585 29.6
+719.302 5.4
+720.376 16.8
+721.673 13.9
+722.570 9.9
+723.340 6.9
+724.336 2.6
+725.262 101.1
+726.195 54.1
+727.373 64.2
+728.391 61.7
+729.381 41.3
+731.811 17.2
+736.004 5.0
+737.154 44.5
+738.087 44.8
+739.216 91.0
+740.052 43.4
+741.332 61.5
+742.439 55.5
+743.363 133.7
+744.613 44.4
+745.431 60.7
+746.301 23.3
+751.102 38.6
+752.792 64.2
+753.594 37.2
+754.666 63.6
+755.413 119.5
+756.543 44.4
+757.349 43.9
+757.971 8.1
+761.529 54.9
+765.391 42.9
+766.250 23.9
+767.323 84.7
+768.381 22.3
+769.410 62.1
+770.292 6.8
+771.387 105.0
+772.619 71.9
+773.370 79.3
+774.283 48.8
+778.185 12.3
+779.705 10.6
+781.583 25.5
+782.466 5.8
+783.430 31.0
+784.320 58.9
+785.461 57.0
+786.288 41.6
+787.395 28.8
+788.252 11.8
+789.139 13.5
+789.776 5.9
+791.551 2.2
+793.303 16.9
+794.060 11.8
+795.035 21.0
+796.131 90.0
+797.107 41.2
+798.452 21.0
+799.209 43.9
+800.281 27.0
+803.150 46.9
+804.444 207.6
+805.377 23.3
+807.713 7.4
+808.333 53.2
+810.057 98.5
+811.301 76.0
+812.197 24.5
+813.286 45.9
+814.273 147.4
+815.378 123.6
+816.303 3.4
+817.366 13.7
+822.894 14.4
+825.154 38.5
+826.298 70.5
+827.208 34.1
+828.306 65.9
+829.476 28.7
+830.815 12.7
+831.472 8.6
+832.940 17.9
+833.589 15.7
+835.750 9.1
+836.569 52.1
+837.445 12.1
+838.199 15.6
+839.645 37.3
+841.237 7.1
+842.523 31.5
+843.501 40.4
+844.499 71.9
+845.468 15.4
+846.257 17.9
+849.330 23.9
+851.529 26.2
+854.089 189.8
+855.297 83.1
+856.337 62.8
+857.746 12.5
+860.363 11.6
+861.145 8.7
+863.844 13.6
+864.606 7.6
+867.343 19.9
+868.169 24.2
+869.425 6.5
+870.398 12.2
+871.664 39.6
+872.415 34.2
+873.302 24.7
+873.909 12.0
+874.554 41.2
+875.794 11.0
+880.570 16.7
+882.619 21.3
+883.433 36.9
+884.370 48.0
+885.604 38.0
+886.302 72.3
+887.129 34.8
+888.852 21.3
+890.733 10.2
+895.586 55.4
+896.444 2.3
+898.212 36.6
+899.477 10.6
+900.817 19.0
+902.167 125.0
+903.170 30.2
+904.399 17.7
+907.529 22.7
+908.503 6.1
+909.558 43.1
+910.463 3.8
+911.353 8.2
+913.197 66.1
+914.224 4.1
+915.128 12.7
+916.411 49.9
+917.245 22.9
+918.455 9.0
+919.108 8.7
+920.771 2.6
+922.460 17.4
+923.908 6.6
+925.252 196.8
+926.431 130.1
+927.383 76.4
+928.372 5.6
+930.245 9.6
+931.629 34.5
+932.409 13.2
+934.795 15.2
+935.477 32.7
+936.499 41.1
+937.373 25.7
+938.570 11.4
+940.057 39.0
+941.380 9.9
+943.258 224.8
+944.245 96.7
+945.469 34.9
+946.829 6.5
+948.373 12.1
+949.374 25.1
+951.471 29.4
+952.763 20.7
+953.608 31.6
+954.801 14.8
+955.523 9.6
+956.516 35.1
+957.273 14.3
+958.379 26.1
+959.488 22.2
+960.284 16.4
+961.412 44.5
+962.246 5.8
+967.325 62.2
+968.487 15.5
+969.233 36.5
+971.361 101.5
+973.415 30.4
+974.830 22.3
+981.311 11.8
+983.151 8.7
+985.215 183.0
+986.022 37.3
+986.653 61.6
+987.631 16.7
+988.515 7.8
+993.842 6.1
+996.921 17.4
+997.686 19.7
+998.521 8.0
+1001.468 2.8
+1003.288 53.3
+1004.421 29.2
+1006.367 72.5
+1007.250 75.4
+1009.506 12.6
+1011.265 17.8
+1012.564 8.2
+1013.579 21.3
+1015.196 29.8
+1020.111 25.9
+1021.011 13.9
+1021.967 8.0
+1024.431 78.7
+1025.297 11.8
+1025.926 13.7
+1027.360 10.8
+1028.639 5.4
+1029.713 36.3
+1031.225 10.7
+1032.864 5.9
+1036.760 6.1
+1038.344 86.7
+1039.144 47.8
+1039.786 16.7
+1041.324 20.5
+1042.164 71.8
+1043.127 45.6
+1044.615 34.9
+1045.645 30.6
+1047.153 15.3
+1054.530 14.7
+1056.300 317.6
+1057.341 149.8
+1059.372 21.1
+1060.367 37.8
+1061.410 35.1
+1069.957 4.5
+1074.147 199.8
+1075.142 58.9
+1077.379 392.8
+1078.031 11.0
+1078.640 112.6
+1084.310 37.3
+1085.611 21.2
+1086.347 7.2
+1089.500 2.4
+1090.765 3.7
+1094.035 4.8
+1101.455 3.9
+1102.705 19.2
+1103.552 9.5
+1105.449 8.6
+1108.468 14.9
+1110.370 25.1
+1111.064 8.0
+1113.137 6.4
+1115.752 36.8
+1116.462 14.7
+1119.903 3.0
+1120.560 3.9
+1123.235 11.5
+1125.553 4.0
+1128.516 19.1
+1129.724 27.6
+1130.841 8.9
+1131.494 3.5
+1134.293 9.9
+1137.386 106.9
+1138.331 7.1
+1141.214 11.4
+1145.438 3.9
+1146.144 17.4
+1146.855 10.9
+1155.367 148.4
+1156.396 93.0
+1159.724 8.0
+1167.367 41.1
+1173.125 63.1
+1173.869 36.5
+1174.490 13.0
+1182.127 33.1
+1184.230 32.2
+1191.309 4.3
+1192.453 4.3
+1198.006 22.8
+1204.139 26.7
+1206.883 7.7
+1207.673 10.5
+1214.094 2.8
+1216.542 5.8
+1217.260 46.6
+1219.255 11.1
+1229.442 11.7
+1230.316 13.3
+1232.582 9.7
+1236.331 10.0
+1245.958 5.2
+1247.680 27.3
+1250.812 7.8
+1256.999 27.4
+1257.651 18.2
+1258.975 18.4
+1270.249 17.8
+1273.464 5.4
+1275.289 16.1
+1276.513 43.3
+1278.499 7.0
+1280.116 15.3
+1282.114 14.5
+1283.600 46.3
+1284.438 42.3
+1287.047 19.2
+1289.917 5.9
+1291.428 15.4
+1294.722 24.5
+1299.315 2.4
+1300.755 75.1
+1301.527 365.7
+1302.412 360.7
+1303.358 17.8
+=================================== "QC_Shew_07_02-pt3_31Mar07_OWL_07-03-07.2599.2599.1.dta" ==================================
+1198.67297 1   scan=2599 cs=1
+338.156 11.1
+343.316 7.5
+353.282 75.3
+354.724 11.5
+357.232 2.8
+359.517 3.9
+362.098 3.4
+365.116 12.6
+366.429 4.9
+371.361 45.8
+372.174 3.8
+379.583 23.3
+380.322 16.0
+383.265 14.5
+384.965 39.7
+385.588 8.7
+390.333 5.2
+391.283 10.1
+393.262 3.6
+394.225 11.2
+395.403 3.2
+397.074 24.1
+398.432 13.8
+399.372 13.1
+400.291 12.9
+406.229 22.7
+407.320 13.8
+408.424 9.5
+409.145 3.0
+413.428 18.0
+415.235 6.3
+416.028 6.0
+421.051 10.2
+423.222 59.7
+424.589 85.7
+425.297 87.4
+426.301 44.7
+427.266 19.7
+433.067 4.3
+436.425 24.5
+438.173 125.9
+439.552 24.9
+440.280 2.5
+441.028 4.5
+442.056 2.8
+443.272 17.7
+444.498 81.4
+445.535 6.9
+449.310 10.5
+450.312 11.0
+451.298 11.8
+452.010 8.4
+452.618 11.6
+454.104 61.4
+455.300 50.2
+456.435 20.6
+460.538 2.8
+463.577 5.8
+466.158 189.4
+467.255 64.4
+468.337 50.3
+469.332 10.9
+470.306 18.2
+471.322 20.0
+472.309 18.1
+475.129 4.2
+478.224 55.1
+480.379 37.8
+481.912 1.7
+484.173 152.2
+487.404 7.1
+490.391 14.3
+491.836 10.2
+493.320 26.2
+494.192 14.8
+495.813 80.7
+496.590 4.6
+497.391 48.5
+498.288 17.0
+499.469 11.5
+500.466 3.1
+502.126 13.3
+506.189 4.3
+507.274 12.7
+508.420 36.6
+509.207 5.9
+510.254 4.2
+511.405 11.9
+512.372 32.5
+513.181 36.3
+514.296 57.8
+515.380 117.1
+516.447 11.3
+517.555 8.8
+520.010 45.0
+520.762 31.8
+522.170 30.4
+523.560 10.2
+524.405 9.3
+525.480 19.1
+526.212 33.4
+527.032 10.3
+528.389 7.9
+529.527 14.1
+533.089 3.9
+535.401 2.6
+536.385 67.8
+537.252 280.9
+538.352 82.5
+539.260 16.9
+540.299 25.5
+541.210 30.0
+542.527 20.7
+548.090 7.5
+548.988 6.9
+550.068 20.9
+551.620 11.1
+553.193 163.0
+554.061 68.0
+554.849 79.2
+555.451 192.5
+556.275 30.1
+559.205 3.9
+561.629 2.1
+562.246 9.6
+563.598 18.6
+564.204 18.9
+565.421 17.7
+566.584 13.6
+567.225 184.5
+568.055 21.4
+571.387 15.8
+572.385 20.9
+573.241 15.2
+577.214 7.9
+578.129 40.0
+579.096 48.0
+580.321 38.2
+581.465 106.1
+582.513 45.9
+583.377 107.0
+584.674 36.9
+585.379 32.8
+586.088 12.3
+586.926 11.2
+591.414 28.3
+592.174 13.3
+593.057 11.2
+595.219 16.4
+596.205 55.1
+597.313 164.8
+598.240 9.1
+599.453 14.6
+601.415 10.0
+603.662 12.9
+604.541 14.8
+606.615 17.6
+607.393 76.7
+608.656 60.3
+609.463 78.6
+610.668 4.5
+612.184 8.4
+615.287 5.8
+616.028 6.2
+620.457 20.9
+621.433 61.1
+622.112 29.6
+624.156 127.9
+625.207 80.2
+626.239 130.4
+627.341 90.3
+628.199 28.0
+631.579 8.3
+636.520 25.1
+637.436 36.2
+638.281 77.4
+639.193 53.5
+641.483 29.1
+642.421 85.5
+643.438 28.0
+644.501 51.2
+645.413 17.9
+647.911 55.3
+649.431 70.0
+650.180 36.4
+651.199 20.6
+653.308 16.6
+654.316 113.9
+655.310 53.0
+656.341 48.1
+657.546 10.8
+660.324 14.9
+663.742 10.6
+664.370 41.2
+666.196 550.9
+667.261 190.3
+668.488 39.8
+669.801 24.7
+670.583 7.3
+671.416 11.2
+672.281 22.1
+674.254 26.8
+674.856 15.8
+677.899 29.8
+678.543 72.9
+679.258 14.4
+680.293 28.9
+681.155 16.7
+684.297 321.9
+685.238 65.0
+690.266 31.6
+691.115 12.7
+691.769 8.6
+693.183 32.3
+694.365 4.3
+695.293 154.4
+696.122 98.7
+697.150 72.3
+697.929 11.8
+701.349 5.8
+702.270 8.5
+704.435 21.9
+706.382 42.7
+708.134 79.7
+709.248 77.2
+710.232 79.5
+711.323 36.9
+712.078 13.7
+713.469 11.8
+714.326 53.7
+715.320 78.5
+716.498 13.5
+717.413 11.5
+719.383 110.6
+720.276 51.2
+721.274 13.1
+722.022 1.9
+724.277 12.8
+725.187 39.2
+726.254 12.9
+727.362 29.1
+728.266 64.5
+737.201 465.4
+738.328 217.8
+739.126 42.4
+743.175 10.7
+744.951 8.6
+746.504 7.9
+747.477 21.8
+748.783 19.0
+749.927 17.7
+750.946 31.0
+752.712 7.8
+753.761 22.4
+754.620 54.7
+755.309 378.1
+756.280 80.7
+757.561 7.4
+761.551 3.0
+762.850 7.7
+763.515 97.4
+764.275 40.5
+765.168 5.8
+766.163 9.1
+767.219 77.8
+768.060 32.1
+771.287 12.9
+774.126 10.3
+775.201 4.1
+777.321 5.6
+780.224 5.7
+781.255 9.9
+782.174 14.4
+783.311 156.3
+784.424 23.3
+788.443 22.7
+790.287 30.8
+791.659 45.9
+792.450 38.8
+793.379 19.3
+794.494 25.4
+795.478 13.4
+796.441 7.1
+798.086 15.2
+800.728 8.3
+802.085 7.3
+803.168 48.0
+804.715 14.8
+806.219 25.1
+807.048 35.3
+808.232 184.8
+809.351 160.9
+810.325 57.9
+810.992 9.9
+812.379 59.2
+813.444 18.1
+816.302 5.7
+820.201 248.9
+821.268 273.2
+822.228 15.4
+823.791 18.7
+824.403 298.0
+825.281 91.2
+826.244 32.2
+827.141 83.9
+828.399 45.8
+829.560 8.5
+831.332 7.8
+836.895 46.4
+838.200 1069.1
+839.328 545.2
+842.361 66.5
+843.245 15.1
+848.354 52.7
+850.007 20.3
+852.020 16.4
+854.216 8.5
+855.116 16.6
+855.761 11.4
+856.393 279.0
+857.523 68.9
+859.955 13.8
+865.649 42.6
+866.258 502.3
+867.265 248.4
+883.566 10.8
+884.410 28.0
+889.356 4.7
+891.234 59.6
+892.366 45.8
+893.448 71.7
+896.171 9.9
+901.468 7.9
+902.381 13.9
+903.292 13.1
+909.119 93.2
+910.309 34.0
+911.425 31.8
+917.327 2.1
+918.603 31.5
+919.356 306.0
+920.227 97.1
+921.226 37.4
+921.909 4.7
+925.458 14.3
+926.708 8.5
+928.938 5.3
+931.467 15.1
+935.513 30.3
+937.324 1984.2
+938.343 565.1
+938.947 6.6
+939.679 22.5
+940.551 6.6
+955.328 280.8
+956.430 147.2
+957.343 4.0
+963.951 15.3
+966.352 20.2
+967.428 8.6
+968.628 17.3
+972.277 17.1
+975.213 8.1
+977.441 3.1
+978.552 2.1
+980.831 11.7
+982.752 12.0
+990.356 15.0
+991.463 6.9
+993.403 4.8
+998.783 4.4
+999.796 12.1
+1008.368 6.0
+1010.945 12.5
+1015.960 11.4
+1020.183 16.1
+1021.619 3.9
+1023.501 24.1
+1027.984 3.4
+1032.245 13.3
+1033.117 15.6
+1034.426 17.5
+1035.539 4.9
+1042.792 9.6
+1043.700 10.3
+1045.310 4.3
+1049.479 12.9
+1050.222 7.7
+1052.456 7.8
+1053.718 21.5
+1055.391 3.5
+1058.200 18.2
+1060.490 9.0
+1065.561 3.2
+1067.359 17.1
+1068.342 4.1
+1069.259 36.7
+1070.011 39.8
+1072.364 32.0
+1073.851 9.9
+1075.034 15.6
+1079.383 9.2
+1080.358 2.1
+1083.620 40.9
+1090.258 3.6
+1095.532 3.6
+1098.125 3.2
+1099.807 3.9
+1103.615 13.1
+1104.701 5.3
+1110.164 9.6
+1111.996 46.5
+1113.114 19.5
+1115.906 4.7
+1117.053 4.1
+1122.531 39.7
+1129.947 9.2
+1131.160 20.0
+1132.609 10.3
+1134.414 20.2
+1135.541 7.8
+1136.350 4.2
+1140.338 22.1
+1145.908 1.9
+1148.870 11.2
+1153.263 31.9
+1154.066 7.5
+1154.724 17.7
+1155.775 52.4
+1160.474 24.8
+1162.516 66.0
+1163.623 57.1
+1164.722 25.2
+1165.377 11.6
+1167.206 16.4
+1173.268 12.9
+1177.439 2.1
+1178.364 29.0
+1180.520 1339.3
+1181.654 574.5
+1182.615 2.5
+1183.478 22.2
+1187.544 10.6
+1190.782 26.4
+
+=================================== "QC_Shew_07_02-pt3_31Mar07_OWL_07-03-07.2778.2778.1.dta" ==================================
+960.50385 1   scan=2778 cs=1
+270.430 3.0
+271.380 23.6
+272.488 10.8
+280.182 30.2
+281.209 16.7
+282.662 6.1
+285.355 12.0
+288.300 485.2
+289.343 65.7
+294.188 7.7
+295.022 7.4
+297.573 12.4
+298.250 98.2
+299.219 11.0
+301.575 2.1
+303.736 9.6
+308.207 18.4
+308.995 4.9
+310.219 14.2
+311.431 53.3
+312.905 25.2
+316.273 54.3
+317.317 11.5
+320.337 13.5
+323.056 15.1
+326.808 47.1
+328.186 10.8
+329.211 1.5
+330.291 4.6
+331.425 7.6
+337.802 3.8
+339.212 14.4
+340.193 31.6
+342.343 13.1
+348.159 14.0
+350.193 4.1
+351.343 6.5
+353.024 7.7
+353.667 2.5
+354.308 57.2
+355.139 15.7
+356.352 10.8
+357.111 15.9
+358.400 17.1
+359.205 31.5
+363.808 38.4
+365.363 4.5
+368.095 10.0
+369.149 6.9
+370.378 3.0
+372.134 92.0
+374.375 26.2
+378.495 24.8
+379.508 9.3
+380.211 3.8
+380.936 6.0
+382.099 109.7
+383.470 13.1
+386.185 1.7
+387.218 8.5
+388.537 12.7
+391.229 23.8
+392.669 3.0
+393.350 6.9
+396.074 60.9
+397.151 32.6
+397.779 13.4
+399.294 91.8
+400.237 57.5
+401.220 17.6
+402.093 22.5
+403.317 12.2
+408.996 12.4
+411.450 21.9
+412.389 32.7
+413.209 10.2
+414.127 18.3
+415.054 5.8
+417.312 621.7
+418.317 114.3
+421.029 16.8
+422.494 6.8
+423.273 8.3
+424.295 34.2
+425.032 2.8
+426.317 21.9
+427.202 123.4
+428.710 26.2
+429.552 16.7
+430.972 12.9
+431.754 8.2
+435.268 9.0
+436.391 3.0
+438.332 13.5
+440.347 6.7
+441.469 9.1
+442.388 6.0
+443.304 19.9
+444.408 3.9
+445.299 48.3
+447.578 2.5
+448.256 10.2
+450.334 8.8
+451.275 20.3
+452.340 3.6
+453.009 8.0
+455.192 10.5
+455.819 7.3
+457.234 21.9
+459.308 22.5
+460.331 6.3
+463.196 24.1
+464.963 23.4
+466.066 17.0
+466.902 14.5
+468.352 26.2
+469.177 60.6
+470.916 13.0
+472.398 9.0
+473.390 2.1
+474.367 13.5
+475.576 3.2
+476.321 6.0
+478.190 7.4
+480.555 10.8
+481.637 6.4
+482.655 10.1
+483.535 25.2
+485.056 9.7
+485.794 8.3
+486.415 7.3
+487.052 5.0
+489.074 7.9
+490.460 10.5
+491.139 6.8
+492.295 4.1
+493.505 14.1
+494.244 9.0
+495.321 18.4
+496.516 23.5
+497.415 23.0
+498.560 47.8
+499.566 2.7
+502.449 4.9
+503.194 9.2
+504.824 16.3
+506.347 12.2
+507.627 5.2
+508.360 4.7
+509.305 42.7
+510.270 3.2
+511.278 112.4
+512.175 52.5
+513.732 51.2
+515.689 13.1
+516.407 12.5
+520.307 6.4
+521.108 9.7
+522.209 44.5
+523.063 7.2
+524.270 30.1
+525.416 74.5
+526.465 42.4
+527.166 42.9
+528.248 40.6
+529.373 154.5
+530.219 56.0
+531.354 1.7
+534.233 18.5
+538.290 33.4
+540.122 123.6
+541.505 28.4
+542.367 117.6
+543.125 18.4
+544.210 67.8
+546.422 106.8
+547.357 21.6
+548.066 4.5
+556.611 33.4
+558.160 27.4
+558.985 9.9
+559.955 6.4
+560.940 18.9
+564.334 22.3
+565.396 20.8
+566.383 24.4
+567.210 34.8
+568.700 4.4
+569.485 7.1
+572.922 16.7
+574.253 15.1
+575.188 3.6
+576.495 16.1
+578.734 35.1
+581.401 16.7
+582.326 12.7
+584.356 10.4
+585.306 4.3
+586.380 26.8
+592.305 6.5
+593.260 12.1
+596.073 13.0
+597.347 3.6
+598.930 4.7
+599.894 12.9
+600.532 28.2
+601.409 20.2
+604.086 2.1
+606.484 16.6
+608.236 9.5
+609.427 9.3
+610.207 48.1
+612.572 24.6
+613.268 5.3
+616.364 19.2
+618.052 10.0
+619.404 21.6
+621.947 13.7
+626.086 13.2
+627.257 55.2
+628.234 155.7
+629.589 29.5
+631.991 4.8
+637.391 29.5
+638.235 73.3
+639.108 13.7
+640.453 11.1
+643.019 14.6
+644.325 15.9
+645.414 106.6
+646.508 68.8
+647.458 2.5
+653.240 22.6
+655.152 201.8
+656.118 152.3
+657.083 11.9
+658.482 9.2
+660.520 35.1
+662.559 18.2
+663.805 27.1
+666.645 3.6
+668.394 13.1
+671.328 16.2
+672.113 18.9
+673.333 178.2
+674.314 17.7
+678.354 4.5
+680.381 16.9
+681.291 3.5
+682.215 5.1
+683.197 34.9
+685.253 3.0
+688.385 11.4
+689.174 2.7
+690.713 9.6
+692.341 6.1
+693.703 4.2
+696.735 24.3
+698.364 31.7
+699.092 9.4
+700.198 23.7
+701.583 21.8
+702.888 21.6
+704.069 8.2
+705.169 9.7
+706.378 12.6
+708.739 18.1
+709.347 4.0
+710.421 15.6
+711.221 10.3
+711.961 2.7
+712.605 41.5
+713.361 28.1
+714.392 58.6
+715.181 124.9
+716.293 45.3
+718.529 10.2
+720.748 10.7
+722.517 6.9
+724.354 7.8
+726.814 51.1
+729.553 7.3
+730.270 8.2
+731.179 5.4
+732.480 270.8
+733.458 169.1
+735.014 12.4
+735.967 7.7
+736.662 11.5
+738.878 8.1
+740.261 8.2
+741.523 29.4
+742.421 21.6
+743.099 17.0
+744.441 34.0
+747.529 15.7
+748.727 16.6
+749.835 15.9
+750.706 36.4
+751.774 15.2
+752.853 7.1
+755.391 24.0
+756.139 5.6
+758.477 19.3
+760.606 6.0
+765.428 21.8
+768.355 74.1
+769.384 33.5
+770.345 3.8
+772.553 6.3
+773.365 18.3
+776.187 17.3
+776.871 3.4
+779.085 30.7
+782.713 9.1
+783.377 7.2
+784.419 5.8
+786.188 66.1
+787.118 25.4
+788.637 17.2
+789.240 7.9
+790.330 10.2
+791.848 14.4
+792.541 4.4
+794.716 16.8
+795.417 7.5
+796.656 25.1
+797.363 3.6
+798.455 4.5
+799.413 13.6
+801.389 8.8
+803.227 13.6
+804.296 3.2
+806.664 4.2
+807.556 3.6
+809.351 43.3
+810.823 12.2
+811.629 12.3
+812.522 6.2
+814.433 20.5
+815.511 25.3
+816.465 4.0
+818.372 17.2
+819.375 16.5
+820.336 10.8
+821.929 11.3
+822.638 22.4
+824.536 23.6
+826.810 54.1
+827.544 353.0
+828.449 237.1
+829.550 64.5
+830.502 22.4
+831.517 34.7
+832.957 15.7
+837.693 18.7
+838.367 8.1
+839.847 18.2
+840.506 7.9
+843.532 3.8
+845.448 7068.0
+846.486 2611.9
+847.175 9.9
+852.341 2.5
+854.213 33.0
+854.855 8.4
+855.995 46.6
+857.062 50.7
+860.595 6.2
+861.587 26.1
+863.285 28.9
+864.455 33.8
+865.896 24.9
+867.094 13.7
+867.760 40.7
+869.292 10.0
+870.184 8.6
+871.557 66.3
+874.371 28.3
+876.438 45.5
+877.888 30.1
+879.520 4.9
+880.205 47.9
+881.555 103.6
+882.498 28.9
+883.410 7.2
+884.387 6.6
+885.360 9.3
+888.819 37.1
+890.965 35.0
+894.445 9.6
+895.159 35.1
+896.580 11.6
+897.499 14.3
+898.297 40.4
+899.763 149.3
+900.461 132.7
+901.602 15.4
+902.674 3.4
+906.095 26.3
+907.052 18.3
+908.911 34.9
+912.460 32.3
+913.697 25.6
+914.352 35.4
+916.531 91.4
+918.400 144.1
+919.450 77.4
+920.055 5.6
+922.197 25.2
+923.782 39.2
+924.937 104.7
+925.575 126.7
+926.378 76.2
+928.218 23.2
+930.409 94.3
+931.053 87.0
+931.944 56.3
+933.039 10.3
+934.502 21.9
+935.546 10.1
+937.748 80.0
+940.002 127.4
+940.688 26.1
+941.494 125.5
+942.466 1434.5
+943.436 2255.4
+944.437 852.1
+945.306 25.3
+948.063 5.4
+
diff --git a/SystemTest/SimpleChromosome.trie b/SystemTest/SimpleChromosome.trie
new file mode 100644
index 0000000..09b62b6
--- /dev/null
+++ b/SystemTest/SimpleChromosome.trie
@@ -0,0 +1 @@
+AGCGGGAGAGAGAGAGAGAGAGAGAGAGAGAGCGAGAGAGCGTGAGCGCGCGCAAGCTAGCGAGCAAACCAGAGAGACAGACCGAGAGAGGGACCAGGAGAGAGACCCAGAGAGAGAAGAAGAAGCCAGAAGCCGAGCTCTGTCAGGGCTCAACCTCCAACTTGTTTCAGTTCATTCATCCTTCTCTCCTTTCCGCTCAGACTGTAGAGCTCGGTCTCTCCAAGTTTGTGCCTAAGAAGATGATAATCACACAAACAAGTCACTGTTACATGACCAGCCTTGGGATTCTTTTCCTGATTAATATTCTCCCTGGAACCACTGGTCAAGGGGAATCAAGACGACAAGAACCCGGGGACTTTGTGAAGCAGGACATTGGCGGGCTGTCTCCTAAGCATGCCCCAGATATTCCTGATGACAGCACTGACAACATCACTATCTTCACCAGAATCTTGGATCGTCTTCTGGACGGCTATGACAACCGGCTGCGACCTG [...]
\ No newline at end of file
diff --git a/SystemTest/SimpleGenes.gff b/SystemTest/SimpleGenes.gff
new file mode 100644
index 0000000..681e140
--- /dev/null
+++ b/SystemTest/SimpleGenes.gff
@@ -0,0 +1,5 @@
+SampleChromosome	Natalie	exon	3	14	1	+	0	Parent=Gene1
+SampleChromosome	Natalie	exon	23	34	1	+	0	Parent=Gene1
+SampleChromosome	Natalie	exon	26	34	1	+	0	Parent=Gene2
+SampleChromosome	Natalie	exon	56	65	1	+	0	Parent=Gene3
+SampleChromosome	Natalie	exon	84	97	1	+	2	Parent=Gene3
diff --git a/SystemTest/TestCDTA.txt b/SystemTest/TestCDTA.txt
new file mode 100644
index 0000000..e15363d
--- /dev/null
+++ b/SystemTest/TestCDTA.txt
@@ -0,0 +1,5 @@
+spectra,SystemTest/Shew_dta.txt
+instrument,FT-Hybrid
+protease,Trypsin
+SequenceFile,SystemTest/Shew_Short.fasta
+mod,+57,C,fix
diff --git a/SystemTest/TestInput.txt b/SystemTest/TestInput.txt
new file mode 100644
index 0000000..b9aebc2
--- /dev/null
+++ b/SystemTest/TestInput.txt
@@ -0,0 +1,26 @@
+# Spectrum file-name.  Multiple 'spectra' lines can be used to search
+# several .dta or .pkl files.  Specify a directory name to search every
+# file in that directory (non-recursive)
+spectra,SystemTest/TestSpectrum.dta
+
+# Instrument type (QTOF or ESI-ION-TRAP)
+instrument,ESI-ION-TRAP
+
+# Protease - nonstandard digests are penalized.  
+# Options are trypsin, chymotrypsin, lysc, aspn, gluc
+protease,None
+
+# Path to the database file (as written out by PrepDB.py)
+DB,TestDatabase.trie
+
+#mod,mass,residues,fix/opt,name
+
+# Specify one modification (the test peptide was actually 
+# VKEAMAPK, not VKEAMGPK, so a +14 mod will work)
+mod,+14,G,opt,user-defined
+
+# Note: MOST searches should include the +57 modification
+# for the protecting group on cysteines.
+mod,+57,C,fix
+
+mods,1
diff --git a/SystemTest/TestInputMod.txt b/SystemTest/TestInputMod.txt
new file mode 100644
index 0000000..04fde94
--- /dev/null
+++ b/SystemTest/TestInputMod.txt
@@ -0,0 +1,8 @@
+spectra,SystemTest/TestSpectrum.dta
+instrument,ESI-ION-TRAP
+protease,None
+DB,TestDatabase.trie
+
+# This line causes Inspect to run a 'blind' search, where any mass modification
+# up to mass 200Da is allowed.  (Very slow on large databases):
+unrestrictive,1
diff --git a/SystemTest/TestInputTag1.txt b/SystemTest/TestInputTag1.txt
new file mode 100644
index 0000000..86faee2
--- /dev/null
+++ b/SystemTest/TestInputTag1.txt
@@ -0,0 +1,9 @@
+spectra,SystemTest/TestSpectrum.dta
+instrument,ESI-ION-TRAP
+protease,None
+DB,TestDatabase.trie
+mod,+57,C,fix
+mods,1
+taglength,1
+tagsonly
+
diff --git a/SystemTest/TestInputTag3.txt b/SystemTest/TestInputTag3.txt
new file mode 100644
index 0000000..01d4888
--- /dev/null
+++ b/SystemTest/TestInputTag3.txt
@@ -0,0 +1,9 @@
+spectra,SystemTest/TestSpectrum.dta
+instrument,ESI-ION-TRAP
+protease,None
+DB,TestDatabase.trie
+mod,+57,C,fix
+mods,1
+taglength,3
+tagsonly
+
diff --git a/SystemTest/TestMS2.txt b/SystemTest/TestMS2.txt
new file mode 100644
index 0000000..ebddbf6
--- /dev/null
+++ b/SystemTest/TestMS2.txt
@@ -0,0 +1,8 @@
+spectra,SystemTest/Yeast.ms2
+instrument,ESI-ION-TRAP
+protease,Trypsin
+
+#DB,TestDatabase.trie
+
+SequenceFile,SystemTest/YeastSmall.fasta
+mod,+57,C,fix
diff --git a/SystemTest/TestPMC.txt b/SystemTest/TestPMC.txt
new file mode 100644
index 0000000..4b21462
--- /dev/null
+++ b/SystemTest/TestPMC.txt
@@ -0,0 +1,7 @@
+spectra,SystemTest/TestSpectra.pkl
+instrument,ESI-ION-TRAP
+protease,None
+DB,TestDatabase.trie
+mod,+57,C,fix
+pmconly,1
+mods,1
diff --git a/SystemTest/TestSpectra.pkl b/SystemTest/TestSpectra.pkl
new file mode 100644
index 0000000..ccdb3a1
--- /dev/null
+++ b/SystemTest/TestSpectra.pkl
@@ -0,0 +1,1773 @@
+533.28 12345.67 2
+151.2 5390.0
+172.0 5028.0
+180.5 2581.0
+184.3 65135.0
+185.1 1791417.0
+186.1 160666.0
+186.8 23.0
+202.8 13982.0
+207.1 10560.0
+209.9 5390.0
+211.1 35087.0
+211.7 6078.0
+213.1 7171183.0
+214.1 887589.0
+215.2 94536.0
+216.3 4106.0
+219.8 2130.0
+222.6 6979.0
+239.2 47363.0
+239.9 3.0
+242.1 2597.0
+245.0 6343.0
+246.5 30612.0
+247.2 798.0
+248.5 5421.0
+249.6 5878.0
+253.4 16252.0
+258.0 10514.0
+261.1 4105.0
+261.7 5927.0
+265.0 30162.0
+266.2 44759.0
+267.2 1296471.0
+268.1 201492.0
+269.2 39197.0
+274.3 13954.0
+275.2 67736.0
+275.8 1.0
+278.8 13686.0
+281.2 94777.0
+282.0 33108.0
+283.7 15660.0
+292.3 127058.0
+293.1 29770.0
+294.5 15211.0
+295.6 38326.0
+302.5 4572.0
+305.5 5286.0
+307.3 1749.0
+308.9 25083.0
+309.5 39412.0
+310.3 1184256.0
+312.2 3816479.0
+313.4 350869.0
+314.1 41337.0
+314.7 7136.0
+321.4 23893.0
+325.6 11750.0
+326.3 397383.0
+327.2 98934.0
+328.2 94218.0
+329.3 18430.0
+332.3 30533.0
+334.9 75433.0
+339.6 6176.0
+340.3 24714.0
+341.5 20109.0
+343.5 36429.0
+344.4 2796.0
+345.9 22232.0
+346.8 10606.0
+350.7 4379.0
+354.0 22273.0
+358.2 40279.0
+364.8 51812.0
+368.1 5552.0
+369.2 21720.0
+371.3 1749.0
+372.2 4457.0
+373.5 15591.0
+379.0 12822.0
+380.3 154243.0
+381.4 54914.0
+382.0 4.0
+388.9 12738.0
+389.9 17339.0
+392.5 21223.0
+394.3 45628.0
+395.5 20497.0
+397.4 1015323.0
+398.5 234354.0
+399.3 11.0
+404.9 6160.0
+407.9 107198.0
+408.9 33407.0
+409.7 7234.0
+411.1 8348.0
+412.4 6086.0
+413.3 4457.0
+414.2 5359.0
+415.4 33816.0
+416.2 9314.0
+418.6 72935.0
+420.0 42304.0
+420.7 54475.0
+422.2 8291.0
+423.0 8684.0
+424.4 47436.0
+425.4 1358187.0
+426.4 392094.0
+427.6 933483.0
+428.4 110663.0
+429.3 143489.0
+430.2 53360.0
+430.8 7010.0
+431.7 50899.0
+432.5 2511.0
+433.3 10783.0
+434.1 6218.0
+436.2 25753.0
+437.9 42137.0
+441.7 133956.0
+442.5 4980.0
+443.4 19846.0
+444.1 7.0
+445.2 35205.0
+446.0 25.0
+447.4 25510.0
+448.5 4933.0
+450.4 8161.0
+451.4 12710.0
+453.1 16538.0
+456.4 19959.0
+459.2 4379.0
+461.1 5342.0
+463.1 8366.0
+465.3 5384.0
+468.5 17921.0
+469.2 30451.0
+469.8 9.0
+470.8 16300.0
+471.6 3784.0
+474.7 1784.0
+477.3 6967.0
+481.7 14238.0
+483.8 91491.0
+484.6 171372.0
+486.5 178165.0
+487.5 36287.0
+489.1 100315.0
+489.7 5787.0
+492.3 32722.0
+493.5 3374.0
+494.3 37120.0
+495.1 43495.0
+496.4 8160.0
+497.1 18.0
+499.5 5753.0
+505.0 11091.0
+506.4 16280.0
+508.5 33703.0
+509.3 11448.0
+510.4 48115.0
+512.4 66935.0
+513.3 17408.0
+515.2 19987.0
+515.9 57252.0
+517.3 27245.0
+518.3 38028.0
+519.0 7.0
+520.5 19696.0
+521.7 21662.0
+522.3 28852.0
+523.4 44714.0
+524.5 78861.0
+525.5 137108.0
+526.5 840780.0
+527.4 206179.0
+528.4 94072.0
+529.0 35.0
+530.3 23777.0
+531.1 30463.0
+535.3 16902.0
+540.5 438350.0
+541.4 254479.0
+542.6 49349.0
+544.5 120573.0
+545.5 14799.0
+548.3 89281.0
+549.1 8424.0
+550.0 23165.0
+551.7 22014.0
+552.3 22402.0
+553.7 4543.0
+554.8 15768.0
+556.5 67909.0
+558.6 186876.0
+559.6 38382.0
+560.6 19349.0
+561.5 4506.0
+562.4 17040.0
+563.9 30871.0
+567.0 14923.0
+567.7 7231.0
+569.2 18826.0
+570.0 43252.0
+570.7 40744.0
+576.4 27868.0
+578.3 37027.0
+579.0 17.0
+581.2 11862.0
+585.8 11202.0
+588.3 34625.0
+589.0 3.0
+590.6 45434.0
+591.3 10312.0
+593.9 11394.0
+594.6 31336.0
+596.1 28878.0
+599.9 27065.0
+600.8 9783.0
+603.6 8349.0
+605.8 32512.0
+608.2 9548.0
+609.3 6418.0
+613.9 28913.0
+621.2 19801.0
+622.7 25512.0
+623.5 248741.0
+624.6 101891.0
+625.4 11783.0
+626.4 27504.0
+628.5 41288.0
+630.7 15924.0
+634.7 42605.0
+635.6 17233.0
+637.2 12494.0
+638.0 16525.0
+639.4 20236.0
+640.8 323591.0
+641.5 5714043.0
+642.5 1859248.0
+643.5 375036.0
+645.0 40057.0
+645.7 11.0
+646.7 1798.0
+649.9 4779.0
+653.4 1211.0
+657.4 47758.0
+659.5 87105.0
+660.4 5546.0
+667.4 15667.0
+671.3 8437.0
+672.5 19269.0
+673.4 54740.0
+678.3 8211.0
+679.9 6552.0
+683.8 13941.0
+685.7 3062.0
+687.6 19265.0
+690.3 16952.0
+693.0 4623.0
+694.6 6031.0
+695.3 3952.0
+696.2 9830.0
+698.9 6626.0
+700.7 28163.0
+703.5 14559.0
+707.3 50122.0
+707.9 5349.0
+708.9 68522.0
+711.7 34640.0
+718.4 10184.0
+719.2 25325.0
+720.8 6199.0
+722.5 4035.0
+723.5 4361.0
+725.3 26713.0
+726.8 107967.0
+727.6 86575.0
+728.6 28054.0
+731.2 2661.0
+736.5 252360.0
+737.5 66384.0
+738.5 223658.0
+739.5 136451.0
+740.4 90229.0
+746.7 21403.0
+753.3 13110.0
+754.4 12183936.0
+755.4 4816542.0
+756.4 2736858.0
+757.4 667465.0
+758.4 197965.0
+759.3 8041.0
+761.3 2270.0
+762.4 9530.0
+769.8 21250.0
+770.4 9397.0
+771.4 7409.0
+772.5 13269.0
+775.1 9990.0
+783.6 2687.0
+784.5 7197.0
+787.4 137576.0
+788.5 25460.0
+796.5 19556.0
+799.8 10087.0
+806.3 36471.0
+808.3 6340.0
+814.6 125632.0
+815.4 23214.0
+816.5 22919.0
+817.3 16850.0
+818.3 24779.0
+820.2 33895.0
+827.4 11089.0
+831.9 1392.0
+835.5 298945.0
+836.4 175308.0
+838.2 13674.0
+846.0 2289.0
+849.3 23645.0
+852.7 580178.0
+853.4 22303784.0
+854.3 8664932.0
+855.3 2489524.0
+856.2 217073.0
+856.8 14.0
+859.8 7362.0
+863.3 24506.0
+868.8 1503.0
+874.6 33001.0
+876.5 5401.0
+877.3 28104.0
+891.1 3538.0
+893.3 11443.0
+901.3 79610.0
+902.2 48761.0
+904.1 27722.0
+906.1 8699.0
+911.2 2501.0
+919.2 195606.0
+920.2 100094.0
+921.0 16844.0
+925.1 20719.0
+927.8 6905.0
+935.6 5998.0
+936.8 6240.0
+948.4 39712.0
+951.5 3029.0
+952.2 11436.0
+960.5 1731.0
+963.1 19209.0
+966.4 678935.0
+967.4 261603.0
+968.2 147220.0
+976.4 27633.0
+977.6 22498.0
+995.6 3603.0
+1000.1 19739.0
+1003.6 4573.0
+1011.4 2914.0
+1017.0 11894.0
+1018.4 31847.0
+1019.8 4967.0
+1028.6 3414.0
+1030.7 14837.0
+1044.4 5145.0
+1047.4 4571.0
+1055.0 5603.0
+1060.5 5752.0
+1074.8 2108.0
+1095.7 1385.0
+1097.5 19117.0
+1105.5 648.0
+1107.5 3.0
+1110.5 2426.0
+1116.7 11819.0
+1123.8 4820.0
+1165.5 4537.0
+1181.8 7919.0
+1183.4 3712.0
+1187.5 4571.0
+1202.0 4628.0
+1211.8 4269.0
+1236.6 2846.0
+1259.5 2889.0
+1311.6 2651.0
+1438.5 4573.0
+1468.2 2116.0
+1469.5 3922.0
+1485.6 4062.0
+1500.7 4107.0
+1507.8 2806.0
+1515.9 3070.0
+1538.8 3259.0
+1541.6 5431.0
+1542.8 2460.0
+1579.4 1984.0
+1582.8 30.0
+1589.0 4967.0
+
+698.84 12345.67 2
+183.2 627.0
+191.3 926.0
+198.3 251.0
+201.0 1029.0
+211.7 364.0
+214.7 499.0
+218.5 963.0
+220.1 2120.0
+221.4 462.0
+222.3 493.0
+228.9 4167.0
+229.8 2188.0
+240.0 1256.0
+244.1 1418.0
+245.1 698.0
+246.1 67354.0
+247.1 7803.0
+247.7 4.0
+253.2 337.0
+255.0 2333.0
+256.1 5114.0
+257.0 6458.0
+260.9 4314.0
+261.7 631.0
+268.1 733.0
+272.2 1383.0
+273.1 1578.0
+273.9 98821.0
+275.0 10056.0
+275.9 6406.0
+282.2 470.0
+291.7 605.0
+293.7 894.0
+296.2 2084.0
+297.0 915.0
+301.4 184.0
+313.2 1032.0
+314.2 1474.0
+316.4 1830.0
+317.9 5240.0
+319.2 3431.0
+322.2 1652.0
+326.9 1042.0
+328.9 947.0
+330.3 4.0
+330.9 1153.0
+334.1 2128.0
+337.5 2544.0
+339.9 695.0
+341.1 1345.0
+349.9 883.0
+351.1 184.0
+353.1 2859.0
+353.8 5.0
+356.7 2489.0
+358.5 2523.0
+360.3 1760.0
+361.1 4.0
+363.7 403.0
+364.7 728.0
+369.3 11218.0
+371.1 2872.0
+374.1 3840.0
+375.0 3303.0
+376.0 601.0
+379.0 237.0
+380.1 514.0
+384.9 1549.0
+385.9 2967.0
+386.6 1710.0
+388.0 6505.0
+389.0 5203.0
+390.5 556.0
+391.3 3960.0
+393.2 33193.0
+394.2 8629.0
+395.1 1751.0
+399.7 6622.0
+401.6 1149.0
+402.9 6269.0
+404.4 11858.0
+405.1 123529.0
+406.1 21010.0
+407.3 4834.0
+407.9 443.0
+412.2 3892.0
+415.3 1577.0
+416.2 1743.0
+419.9 2260.0
+420.8 650.0
+422.2 1169.0
+423.0 372.0
+425.6 7679.0
+427.0 601.0
+429.1 3690.0
+430.1 2271.0
+431.4 1395.0
+433.3 325.0
+434.8 3880.0
+440.0 1721.0
+442.0 2835.0
+442.7 1994.0
+444.1 780.0
+445.6 2095.0
+451.8 410.0
+452.8 675.0
+454.2 860.0
+454.9 1103.0
+456.2 2079.0
+458.4 813.0
+459.8 478.0
+464.1 18044.0
+465.3 10262.0
+466.2 927.0
+470.3 2689.0
+471.1 4998.0
+472.0 3849.0
+472.7 1400.0
+478.0 3597.0
+483.0 4665.0
+485.0 3838.0
+486.2 2013.0
+487.9 7448.0
+490.8 870.0
+496.4 1958.0
+497.1 550.0
+498.0 7041.0
+500.2 1733.0
+501.1 5560.0
+501.8 3276.0
+502.7 6176.0
+503.5 712.0
+504.2 450.0
+506.1 20201.0
+507.0 5120.0
+508.1 4191.0
+511.1 2177.0
+513.1 9683.0
+514.1 18983.0
+515.5 7519.0
+516.3 19908.0
+518.2 643.0
+519.2 2030.0
+524.0 2740.0
+527.6 2278.0
+529.2 1358.0
+531.1 2134.0
+532.7 4981.0
+534.0 61758.0
+535.2 23686.0
+536.2 5637.0
+541.3 3209.0
+544.4 3430.0
+545.4 7750.0
+546.7 1134.0
+553.2 8745.0
+553.9 426.0
+554.9 2320.0
+556.3 1030.0
+560.2 463.0
+560.8 3366.0
+562.4 15805.0
+563.2 39141.0
+564.4 15816.0
+567.5 1274.0
+568.7 1079.0
+569.4 2841.0
+570.9 10764.0
+571.8 2024.0
+573.1 6084.0
+574.1 10543.0
+575.3 1861.0
+576.5 3074.0
+580.5 528.0
+586.0 6214.0
+587.0 4524.0
+589.1 6692.0
+591.1 6959.0
+592.0 1908.0
+597.6 996.0
+599.0 1371.0
+600.6 788.0
+608.7 3430.0
+609.9 1677.0
+611.4 4420.0
+613.1 996.0
+614.2 1489.0
+615.7 1005.0
+617.1 6574.0
+618.1 885.0
+619.3 2173.0
+623.9 4468.0
+624.6 4207.0
+626.3 3211.0
+627.9 914.0
+631.5 3148.0
+634.0 15430.0
+640.3 3541.0
+644.1 12462.0
+645.1 8739.0
+647.1 1761.0
+649.2 536.0
+651.1 808.0
+652.8 4881.0
+654.3 409.0
+656.1 4416.0
+657.4 3642.0
+658.7 7594.0
+660.9 6871.0
+662.2 49673.0
+663.2 14529.0
+664.4 10058.0
+667.9 525.0
+668.6 3229.0
+670.4 657.0
+671.3 1239.0
+672.6 3301.0
+674.6 1529.0
+676.2 3869.0
+678.4 12761.0
+679.3 9189.0
+680.8 2296.0
+681.7 13156.0
+682.6 2288.0
+683.4 1851.0
+684.3 694.0
+685.3 746.0
+688.5 2459.0
+689.9 49906.0
+690.9 21177.0
+692.7 588.0
+693.4 1530.0
+699.4 881.0
+707.2 792.0
+708.2 336.0
+715.6 412.0
+716.2 599.0
+718.4 1560.0
+719.3 4944.0
+720.0 3617.0
+721.2 1626.0
+723.3 4438.0
+724.0 2617.0
+725.7 493.0
+726.4 535.0
+733.0 1864.0
+734.6 253.0
+735.3 160631.0
+736.3 61458.0
+737.4 20539.0
+742.8 1475.0
+749.8 651.0
+755.2 4301.0
+758.5 2435.0
+761.2 2269.0
+764.1 909.0
+771.8 1105.0
+772.9 1249.0
+775.4 3409.0
+783.6 3314.0
+785.2 319.0
+786.8 334.0
+788.2 4043.0
+794.0 120.0
+796.8 1868.0
+797.4 1216.0
+798.0 6239.0
+799.0 1.0
+800.4 2254.0
+804.6 6413.0
+805.2 436.0
+806.3 50490.0
+807.4 26601.0
+808.3 1121.0
+813.7 2621.0
+816.0 3311.0
+822.6 588.0
+832.4 3168.0
+834.2 10167.0
+835.2 18982.0
+839.1 372.0
+841.9 3148.0
+844.7 1382.0
+845.3 11.0
+846.2 2371.0
+847.2 3783.0
+851.6 2837.0
+855.8 372.0
+860.2 2830.0
+861.4 3541.0
+862.7 4041.0
+863.4 327620.0
+864.4 95558.0
+865.3 10696.0
+867.1 795.0
+876.4 467.0
+878.0 2544.0
+886.9 1710.0
+888.2 1016.0
+892.2 453.0
+895.5 2803.0
+896.3 458.0
+899.0 2935.0
+902.8 4610.0
+905.9 6288.0
+907.2 2530.0
+909.4 305.0
+913.2 4078.0
+915.2 4921.0
+916.2 12312.0
+918.9 3973.0
+925.2 581.0
+926.3 1524.0
+927.1 1245.0
+929.4 233.0
+933.3 13397.0
+934.0 8793.0
+935.2 3756.0
+938.3 1108.0
+941.9 1385.0
+946.8 603.0
+948.3 1413.0
+952.9 1945.0
+957.7 338.0
+959.5 1407.0
+960.5 410.0
+962.6 264.0
+965.3 638.0
+966.0 1807.0
+969.3 1117.0
+971.2 1626.0
+973.0 2030.0
+974.5 14086.0
+975.5 14392.0
+977.0 9641.0
+981.4 3670.0
+986.3 6219.0
+987.3 8361.0
+988.3 1124.0
+991.7 15621.0
+992.4 193384.0
+993.4 115474.0
+994.5 11689.0
+995.3 2.0
+996.5 878.0
+999.1 899.0
+1004.1 38544.0
+1005.3 23030.0
+1006.2 5906.0
+1007.4 1906.0
+1010.8 387.0
+1014.4 293.0
+1017.4 1324.0
+1031.0 958.0
+1036.8 892.0
+1050.7 824.0
+1053.0 399.0
+1062.1 736.0
+1063.7 2103.0
+1073.0 361.0
+1075.4 6551.0
+1079.4 3675.0
+1082.3 1482.0
+1087.2 2783.0
+1088.4 3752.0
+1089.4 3592.0
+1105.0 5968.0
+1105.9 1127.0
+1118.4 1646.0
+1119.9 378.0
+1123.4 262065.0
+1124.4 127052.0
+1125.5 17558.0
+1133.3 3703.0
+1134.1 3315.0
+1135.2 2533.0
+1136.2 2913.0
+1149.8 649.0
+1151.1 42506.0
+1152.2 23761.0
+1153.4 7274.0
+1155.3 468.0
+1157.2 559.0
+1166.2 2439.0
+1180.9 1177.0
+1183.5 1414.0
+1205.2 528.0
+1212.6 664.0
+1214.5 1770.0
+1224.7 374.0
+1227.9 1045.0
+1229.4 1307.0
+1232.2 7564.0
+1233.1 7841.0
+1236.4 4378.0
+1246.4 815.0
+1250.2 30353.0
+1251.3 11885.0
+1252.4 19983.0
+1253.1 1.0
+1263.6 328.0
+1280.5 550.0
+1281.6 694.0
+1284.2 365.0
+1294.2 362.0
+1307.1 493.0
+1327.6 573.0
+1333.9 590.0
+1335.3 387.0
+1340.8 401.0
+1355.2 171.0
+1367.6 506.0
+1375.5 560.0
+1381.5 730.0
+1397.6 471.0
+1420.8 1068.0
+1439.6 952.0
+1452.2 452.0
+1454.8 327.0
+1463.8 591.0
+1474.5 701.0
+1497.3 307.0
+1511.5 461.0
+1531.3 307.0
+1574.9 262.0
+1575.9 453.0
+1579.3 328.0
+1581.6 328.0
+1588.5 434.0
+1596.3 710.0
+1630.3 560.0
+1632.7 224.0
+1639.7 382.0
+1655.0 414.0
+1659.4 566.0
+1673.2 365.0
+1695.6 325.0
+1704.0 621.0
+1709.1 246.0
+1729.2 435.0
+1752.3 567.0
+1761.6 568.0
+1788.3 273.0
+1794.5 238.0
+1850.8 217.0
+1859.0 713.0
+1891.0 394.0
+1892.9 641.0
+1903.6 790.0
+1911.1 329.0
+1919.4 509.0
+1933.3 223.0
+1951.1 246.0
+1955.4 621.0
+1967.7 396.0
+1968.3 652.0
+1990.7 1107.0
+1992.9 513.0
+1994.2 470.0
+1997.5 224.0
+
+623.28 12345.67 2
+160.3 525.0
+161.8 474.0
+163.1 238.0
+164.2 784.0
+165.0 770.0
+166.9 966.0
+168.2 1496.0
+170.6 872.0
+175.2 1289.0
+175.9 1.0
+178.6 724.0
+179.4 884.0
+181.1 11524.0
+182.4 2691.0
+183.2 10614.0
+184.5 288.0
+185.3 3331.0
+187.0 1222.0
+188.2 556.0
+190.2 1793.0
+191.5 2540.0
+193.2 403.0
+194.0 1010.0
+195.2 1340.0
+196.2 2845.0
+197.2 2150.0
+199.1 106283.0
+200.3 9382.0
+201.3 5260.0
+202.3 620.0
+203.2 3152.0
+206.4 1690.0
+208.4 3678.0
+209.3 3182.0
+210.2 1791.0
+211.0 17726.0
+212.1 3600.0
+213.2 1951.0
+214.9 2445.0
+217.6 755.0
+219.3 660.0
+220.5 7284.0
+221.5 1483.0
+222.3 6851.0
+223.0 634.0
+224.2 2570.0
+225.4 966.0
+227.1 57603.0
+228.2 10228.0
+229.2 8534.0
+230.3 1186.0
+231.3 11152.0
+232.4 1950.0
+234.8 899.0
+236.3 264.0
+237.3 2032.0
+238.3 4006.0
+240.2 10595.0
+241.4 571.0
+242.2 1101.0
+243.1 3899.0
+244.1 4855.0
+245.1 4604.0
+246.6 971.0
+248.4 2328.0
+252.3 831.0
+253.1 1152.0
+254.3 5680.0
+255.2 978.0
+256.0 709.0
+257.6 723.0
+258.3 34646.0
+259.4 2880.0
+260.8 902.0
+263.1 569.0
+264.2 8156.0
+265.4 6731.0
+266.3 22564.0
+267.2 4284.0
+268.2 5078.0
+268.9 544.0
+270.9 760.0
+274.1 791.0
+275.7 861.0
+276.3 50370.0
+277.2 3198.0
+278.3 832.0
+279.6 741.0
+280.6 3643.0
+281.7 2938.0
+282.7 3986.0
+283.5 1029.0
+284.2 9222.0
+285.0 2098.0
+286.3 1899.0
+290.9 462.0
+292.2 14421.0
+293.2 13662.0
+294.5 475.0
+295.5 4315.0
+296.3 5659.0
+298.3 5108.0
+300.4 4193.0
+301.6 1334.0
+303.0 1037.0
+305.4 710.0
+306.4 563.0
+308.3 2567.0
+310.3 66666.0
+311.2 14463.0
+312.2 3249.0
+313.8 2938.0
+314.5 3.0
+316.1 14038.0
+317.9 3036.0
+320.4 473.0
+323.3 131.0
+323.9 2946.0
+324.7 1471.0
+325.3 697.0
+326.3 58805.0
+327.4 10313.0
+328.3 201506.0
+329.3 24220.0
+330.1 2704.0
+331.2 1839.0
+332.7 346.0
+336.8 1967.0
+338.0 3326.0
+338.8 685.0
+339.6 883.0
+341.2 7421.0
+342.2 3988.0
+343.6 2400.0
+344.2 3949.0
+345.0 1086.0
+345.7 2.0
+346.8 4893.0
+349.1 868.0
+349.9 323.0
+351.6 4470.0
+352.4 2376.0
+353.8 3474.0
+354.7 3694.0
+355.4 7860.0
+356.8 1184.0
+358.3 2436.0
+359.9 13481.0
+360.9 2890.0
+361.5 1709.0
+364.0 2923.0
+366.6 2571.0
+368.2 244.0
+369.3 2484.0
+370.4 352.0
+371.0 1782.0
+372.7 521.0
+377.4 1164.0
+378.5 1719.0
+381.4 7365.0
+382.6 3990.0
+383.4 14808.0
+384.5 4462.0
+385.2 1096.0
+386.9 615.0
+387.6 467.0
+388.6 2074.0
+389.3 24845.0
+389.9 26728.0
+390.8 9291.0
+391.9 3082.0
+393.0 1749.0
+394.2 695.0
+395.2 11145.0
+396.2 6669.0
+397.3 1981.0
+398.2 3599.0
+399.4 71560.0
+400.4 17896.0
+401.5 15911.0
+402.1 5723.0
+403.2 2935.0
+405.0 469.0
+407.2 528.0
+409.3 40462.0
+410.4 24794.0
+411.4 2299.0
+413.5 9817.0
+415.1 781.0
+416.2 384.0
+420.6 1731.0
+423.0 5225.0
+424.1 3777.0
+424.8 2425.0
+425.7 5819.0
+426.6 9509.0
+427.4 122732.0
+428.4 27073.0
+429.3 3819.0
+430.0 836.0
+431.3 15878.0
+432.2 2174.0
+436.9 376.0
+438.9 1503.0
+439.5 567.0
+440.4 1113.0
+441.3 39487.0
+442.2 12254.0
+443.2 6834.0
+444.3 1103.0
+445.3 618.0
+445.9 443.0
+448.6 648.0
+449.5 2222.0
+451.9 5329.0
+452.8 196.0
+453.6 1690.0
+454.9 367.0
+456.8 1867.0
+457.9 5333.0
+459.1 2449.0
+459.8 945.0
+460.5 104166.0
+461.4 35137.0
+462.4 5056.0
+463.3 3060.0
+464.0 1.0
+465.4 664.0
+466.1 214.0
+469.2 874.0
+470.4 616.0
+471.0 914.0
+471.6 3.0
+472.4 834.0
+473.4 1376.0
+475.8 619.0
+477.6 1860.0
+479.1 721.0
+480.5 666.0
+481.3 3225.0
+481.9 689.0
+483.4 5433.0
+485.3 1301.0
+487.1 1091.0
+490.1 4757.0
+491.5 3521.0
+492.5 620.0
+494.6 2030.0
+495.3 904.0
+496.3 7989.0
+497.6 1064.0
+498.4 5330.0
+499.6 1663.0
+502.4 260.0
+506.4 1829.0
+507.2 7189.0
+508.2 2316.0
+510.2 9488.0
+510.9 848.0
+513.5 1914.0
+514.6 2855.0
+516.6 6005.0
+517.2 3.0
+518.4 495.0
+520.4 2369.0
+521.4 614.0
+522.5 561.0
+524.4 15777.0
+525.6 18934.0
+526.3 4493.0
+527.6 1775.0
+528.3 4833.0
+529.3 1655.0
+530.8 2470.0
+531.5 727.0
+533.2 560.0
+534.6 4220.0
+535.4 1943.0
+536.3 1612.0
+537.5 986.0
+538.4 5951.0
+540.1 880.0
+541.0 228.0
+542.4 98225.0
+543.5 32438.0
+544.4 13297.0
+545.2 2967.0
+546.4 1140.0
+548.3 1994.0
+550.5 2290.0
+551.2 937.0
+552.3 16328.0
+553.4 11545.0
+555.0 5233.0
+556.4 30260.0
+557.3 12134.0
+558.4 8438.0
+559.4 6297.0
+560.7 1253.0
+561.4 513.0
+562.7 3151.0
+564.1 14483.0
+564.9 12193.0
+566.1 740.0
+566.8 1153.0
+567.7 1208.0
+570.2 20676.0
+571.4 38175.0
+573.2 922378.0
+574.3 64579.0
+575.6 17692.0
+577.0 7849.0
+578.1 6517.0
+578.9 3603.0
+579.9 622.0
+582.2 1268.0
+583.6 7853.0
+584.4 4915.0
+586.2 2267.0
+587.6 1651.0
+588.4 704.0
+589.4 207345.0
+590.5 50029.0
+591.4 20022.0
+592.3 9965.0
+593.1 10946.0
+594.1 3189.0
+595.2 14777.0
+596.4 15599.0
+597.2 4783.0
+598.5 1638.0
+600.5 7628.0
+601.5 16610.0
+602.3 4824.0
+603.3 2565.0
+603.9 3933.0
+605.5 22455.0
+606.4 22129.0
+607.4 5360.0
+609.1 1626.0
+610.2 4676.0
+611.2 2405.0
+614.7 84840.0
+615.6 26879.0
+616.3 7161.0
+617.2 616.0
+618.2 788.0
+618.8 697.0
+622.7 388.0
+623.4 2778.0
+627.1 456.0
+627.8 252.0
+628.8 1222.0
+631.6 364.0
+633.8 632.0
+636.8 548.0
+639.3 27398.0
+640.7 29601.0
+641.4 29635.0
+642.3 13469.0
+643.3 8764.0
+645.0 3785.0
+646.1 5966.0
+647.6 1371.0
+648.5 533.0
+651.5 1833.0
+652.3 1206.0
+653.0 1054.0
+654.8 1581.0
+655.6 2326.0
+656.3 3439.0
+657.3 122913.0
+658.4 31026.0
+659.2 10217.0
+660.3 2740.0
+662.1 784.0
+663.6 913.0
+664.2 1764.0
+665.3 1654.0
+666.3 2418.0
+667.3 7798.0
+668.2 4867.0
+669.8 368.0
+671.2 3693.0
+672.9 8019.0
+674.2 5211.0
+675.5 4863.0
+676.5 662.0
+679.8 228.0
+685.1 4815.0
+686.3 12999.0
+687.3 8808.0
+689.6 409.0
+692.5 2969.0
+694.5 2059.0
+696.3 8599.0
+697.0 94.0
+698.3 904.0
+699.2 1097.0
+701.6 580.0
+703.6 775.0
+704.4 243550.0
+705.4 96896.0
+706.4 17140.0
+707.1 1643.0
+708.4 741.0
+710.5 73.0
+714.3 846.0
+715.3 1269.0
+718.9 1477.0
+720.4 1663.0
+724.4 5125.0
+726.2 4304.0
+727.2 4928.0
+728.4 2899.0
+731.9 1261.0
+736.4 6155.0
+737.4 3929.0
+738.5 5115.0
+739.6 8838.0
+741.1 1406.0
+742.3 6696.0
+743.5 3155.0
+744.2 5402.0
+744.9 447.0
+749.1 554.0
+750.2 2775.0
+750.8 219.0
+751.6 220.0
+752.3 2305.0
+753.2 1456.0
+754.2 26266.0
+755.3 7662.0
+756.4 11450.0
+757.3 4288.0
+758.3 6443.0
+759.5 2811.0
+760.6 400.0
+761.3 879.0
+763.2 902.0
+764.5 280.0
+765.1 2227.0
+765.7 563.0
+766.4 340.0
+768.3 19166.0
+769.3 12947.0
+770.5 4653.0
+771.6 857.0
+772.4 15226.0
+773.5 2899.0
+774.2 4345.0
+775.4 1001.0
+776.4 1001.0
+777.1 1641.0
+777.9 181.0
+778.6 1934.0
+780.3 5930.0
+781.3 744.0
+783.1 1663.0
+783.8 930.0
+784.4 1247.0
+785.4 3225.0
+786.2 31474.0
+787.2 13146.0
+788.2 5296.0
+789.2 2522.0
+790.4 3731.0
+791.4 451.0
+793.6 790.0
+794.5 1402.0
+796.5 2247.0
+797.4 3.0
+799.1 327.0
+800.3 627.0
+801.4 16551.0
+802.4 8007.0
+803.2 7782.0
+806.4 941.0
+808.7 579.0
+810.1 1.0
+811.1 1253.0
+812.3 3592.0
+813.3 1521.0
+814.3 1835.0
+817.5 660.0
+818.7 37447.0
+819.4 918482.0
+820.4 386567.0
+821.4 90417.0
+822.2 3315.0
+823.3 6906.0
+823.9 1302.0
+826.0 1438.0
+827.8 2293.0
+828.5 643.0
+829.3 6143.0
+830.2 4036.0
+831.6 3486.0
+833.6 1014.0
+834.3 2026.0
+836.5 419.0
+837.3 1570.0
+838.0 2638.0
+839.2 32613.0
+840.3 13179.0
+841.1 5162.0
+842.2 1312.0
+844.5 1003.0
+847.4 3049.0
+848.8 1749.0
+849.6 6795.0
+851.3 7103.0
+852.2 5417.0
+854.2 2308.0
+855.3 5195.0
+856.6 573.0
+857.2 45461.0
+858.3 10102.0
+859.4 9343.0
+862.5 959.0
+864.0 1568.0
+865.0 1988.0
+866.2 605.0
+867.3 2921.0
+869.2 10174.0
+870.3 12332.0
+871.3 5745.0
+873.2 3474.0
+874.6 977.0
+875.3 4191.0
+879.5 7.0
+880.9 4224.0
+882.4 542.0
+883.3 2319.0
+884.2 4028.0
+885.2 1009.0
+886.2 538.0
+889.7 1025.0
+891.1 449.0
+893.6 447.0
+894.4 2641.0
+899.4 389.0
+900.5 9945.0
+901.4 11504.0
+902.4 1570.0
+903.4 1926.0
+907.2 350.0
+910.5 1673.0
+913.3 456.0
+915.3 274.0
+916.4 324.0
+917.4 1885.0
+918.4 666583.0
+919.3 254572.0
+920.3 62927.0
+921.5 325.0
+924.4 5932.0
+925.2 624.0
+926.4 2411.0
+927.2 2950.0
+928.4 5990.0
+930.5 1654.0
+931.5 2675.0
+935.2 2631.0
+940.4 658.0
+941.4 1188.0
+942.4 4579.0
+943.6 2123.0
+944.3 757.0
+948.6 539.0
+949.6 299.0
+951.5 3822.0
+952.4 24010.0
+953.3 10559.0
+954.3 10425.0
+955.4 2010.0
+956.4 306.0
+957.2 1884.0
+958.3 1589.0
+960.2 447.0
+962.4 821.0
+963.8 333.0
+967.4 2729.0
+968.4 530.0
+969.4 1495.0
+970.4 34124.0
+971.3 3452.0
+972.0 671.0
+980.1 2757.0
+981.3 3481.0
+983.4 72.0
+984.7 522.0
+986.3 4479.0
+987.4 1828.0
+988.1 1.0
+991.7 5449.0
+992.4 42.0
+993.1 264.0
+994.0 443.0
+996.2 1148.0
+998.4 8650.0
+999.4 5547.0
+1000.5 5606.0
+1001.8 560.0
+1002.9 438.0
+1003.6 1345.0
+1005.3 220.0
+1010.0 612.0
+1012.3 6434.0
+1013.9 337.0
+1015.2 4672.0
+1016.6 2412.0
+1018.0 5252.0
+1019.1 482.0
+1024.4 274.0
+1027.2 2816.0
+1028.2 883.0
+1029.4 89879.0
+1030.4 49686.0
+1031.5 9081.0
+1036.5 347.0
+1037.6 3069.0
+1038.4 129.0
+1039.4 2056.0
+1043.7 482.0
+1044.6 440.0
+1045.5 943.0
+1046.6 5299.0
+1047.3 454009.0
+1048.3 219202.0
+1049.4 48203.0
+1050.3 1679.0
+1053.9 1286.0
+1055.4 3598.0
+1056.4 1977.0
+1057.5 9525.0
+1058.4 2978.0
+1059.4 1722.0
+1063.3 2913.0
+1064.0 1416.0
+1065.2 580.0
+1069.5 2801.0
+1071.3 4739.0
+1071.9 1412.0
+1073.2 1257.0
+1076.0 632.0
+1077.0 676.0
+1079.7 438.0
+1081.3 31033.0
+1082.2 18537.0
+1083.2 4449.0
+1083.9 1.0
+1086.8 191.0
+1087.7 572.0
+1088.9 801.0
+1092.1 323.0
+1094.4 600.0
+1095.5 330.0
+1096.7 571.0
+1098.3 4122.0
+1099.2 62769.0
+1100.3 22410.0
+1101.2 11433.0
+1103.6 933.0
+1104.8 640.0
+1108.5 831.0
+1112.5 2246.0
+1113.8 939.0
+1115.2 1044.0
+1117.0 8540.0
+1118.1 3714.0
+1119.4 5337.0
+1122.9 193.0
+1123.7 400.0
+1124.4 321.0
+1126.2 1557.0
+1128.0 484.0
+1128.8 1709.0
+1130.9 287.0
+1134.0 439.0
+1136.1 237.0
+1139.3 561.0
+1140.6 2492.0
+1141.3 2847.0
+1142.3 287.0
+1144.4 30410.0
+1145.3 23573.0
+1146.0 871.0
+1146.7 3857.0
+1147.4 162.0
+1151.8 54.0
+1155.7 374.0
+1158.2 2996.0
+1159.6 833.0
+1160.4 304.0
+1162.5 304.0
+1163.5 732.0
+1168.8 180.0
+1173.1 1614.0
+1175.4 376.0
+1176.7 99.0
+1178.2 357.0
+1180.4 110.0
+1182.6 573.0
+1185.4 60.0
+1189.5 446.0
+1197.0 458.0
+1207.7 621.0
+1211.0 484.0
+1213.8 501.0
+1217.6 518.0
+1222.2 660.0
+1224.3 438.0
+1227.1 765.0
+1231.5 433.0
+1232.5 359.0
+1234.7 82.0
+1235.9 458.0
+1242.5 175.0
+1243.8 688.0
+1247.8 224.0
+1250.3 874.0
+1254.7 314.0
+1255.3 389.0
+1259.4 110.0
+1262.1 335.0
+1269.9 241.0
+1274.6 797.0
+1277.7 471.0
+1280.2 220.0
+1296.2 2069.0
+1300.4 188.0
+1301.1 437.0
+1305.5 209.0
+1306.9 561.0
+1309.1 308.0
+1310.4 604.0
+1311.1 327.0
+1313.2 470.0
+1319.9 518.0
+1322.7 366.0
+1324.4 72.0
+1334.6 261.0
+1336.5 268.0
+1344.3 1447.0
+1346.6 401.0
+1347.6 540.0
+1351.2 192.0
+1353.1 413.0
+1353.7 304.0
+1354.5 446.0
+1359.0 531.0
+1364.0 224.0
+1368.8 268.0
+1375.8 173.0
+1383.9 290.0
+1387.2 535.0
+1389.0 506.0
+1391.9 302.0
+1401.4 609.0
+1405.7 484.0
+1409.8 344.0
+1415.2 493.0
+1422.7 224.0
+1423.4 414.0
+1424.9 617.0
+1425.7 374.0
+1427.7 82.0
+1429.5 339.0
+1431.6 747.0
+1434.2 382.0
+1435.4 453.0
+1437.5 912.0
+1440.2 713.0
+1441.3 244.0
+1443.5 994.0
+1447.6 344.0
+1448.9 263.0
+1451.8 191.0
+1453.7 342.0
+1454.7 342.0
+1464.1 174.0
+1464.9 265.0
+1465.6 181.0
+1473.2 192.0
+1479.0 626.0
+1483.8 28.0
+1485.2 398.0
+1486.0 632.0
+1489.9 780.0
+1492.7 518.0
+1494.8 268.0
+1497.9 564.0
+1502.3 1218.0
+1504.2 1109.0
+1505.8 382.0
+1506.8 936.0
+1509.4 310.0
+1510.3 116.0
+1512.3 193.0
+1514.1 1031.0
+1517.3 851.0
+1518.0 783.0
+1519.8 191.0
+1523.0 558.0
+1526.3 285.0
+1527.6 458.0
+1533.2 163.0
+1542.7 1083.0
+1552.7 632.0
+1565.2 250.0
+1567.6 192.0
+1568.5 163.0
+1570.7 290.0
+1571.3 339.0
+1572.3 909.0
+1577.3 495.0
+1580.9 285.0
+1582.4 312.0
+1591.8 995.0
+1593.6 261.0
+1594.8 128.0
+1597.8 330.0
+1602.3 457.0
+1605.8 211.0
+1612.3 561.0
+1613.4 752.0
+1615.8 330.0
+1620.6 285.0
+1621.7 270.0
+1623.5 1155.0
+1624.2 537.0
+1625.2 909.0
+1628.4 63.0
+1630.3 453.0
+1631.5 433.0
+1632.6 535.0
+1636.6 280.0
+1640.8 211.0
+1641.6 301.0
+1642.6 695.0
+1645.6 377.0
+1652.3 413.0
+1656.3 758.0
+1661.2 927.0
+1662.0 245.0
+1663.7 362.0
+1665.2 373.0
+1677.2 95.0
+1684.7 422.0
+1685.7 397.0
+1686.4 1.0
+1687.5 236.0
+1688.3 258.0
+1690.3 473.0
+1695.2 459.0
+1700.6 369.0
+1701.5 1084.0
+1703.3 376.0
+1705.2 1732.0
+1706.5 344.0
+1714.7 234.0
+1719.0 350.0
+1721.6 398.0
+1730.1 51.0
+1734.0 468.0
+1736.4 372.0
+1743.4 549.0
+1744.5 578.0
+1747.0 447.0
+1749.6 870.0
+1750.7 356.0
+1756.7 1002.0
+1768.3 398.0
+1770.3 304.0
+1771.3 619.0
+1774.0 887.0
+1775.1 225.0
+1777.6 376.0
+1780.2 891.0
+1786.0 309.0
+1790.7 245.0
+1792.3 261.0
+1794.4 162.0
+1795.7 468.0
+1797.6 179.0
+1805.0 387.0
+1808.7 2.0
+1811.7 384.0
+1812.9 884.0
+1829.5 711.0
+1831.6 535.0
+1833.6 535.0
+1837.0 947.0
+1838.0 162.0
+1838.7 372.0
+1841.9 737.0
+1845.5 234.0
+1849.7 270.0
+1856.3 352.0
+1857.3 424.0
+1859.4 335.0
+1867.1 136.0
+1868.7 249.0
+1870.3 654.0
+1880.5 359.0
diff --git a/SystemTest/TestSpectrum.dta b/SystemTest/TestSpectrum.dta
new file mode 100644
index 0000000..d6fcdae
--- /dev/null
+++ b/SystemTest/TestSpectrum.dta
@@ -0,0 +1,131 @@
+873.95 2
+129.2 4817117.0
+129.9 96.0
+147.2 448662.0
+148.0 117858.0
+183.1 1749307.0
+183.7 8.0
+224.0 33559.0
+226.2 147973.0
+228.1 4392773.0
+228.7 38.0
+229.1 277275.0
+240.1 203282.0
+244.2 1289841.0
+244.9 31.0
+277.1 79783.0
+285.9 308883.0
+286.2 127712.0
+288.1 161002.0
+297.0 100733.0
+297.3 96080.0
+298.9 70539.0
+308.2 62321.0
+315.2 1303618.0
+315.4 564476.0
+316.3 101625.0
+322.0 49709.0
+322.3 136818.0
+326.3 122342.0
+327.0 73412.0
+339.2 293492.0
+339.3 218142.0
+339.9 39.0
+346.4 74730.0
+357.3 2712944.0
+358.0 14.0
+358.1 17.0
+364.0 162256.0
+368.3 55444.0
+378.7 58351.0
+379.6 232041.0
+379.7 436209.0
+380.3 91029.0
+385.3 210606.0
+385.4 246725.0
+386.6 122039.0
+387.9 10579208.0
+388.8 724544.0
+392.8 79137.0
+393.1 115509.0
+396.2 38927.0
+402.9 79175.0
+403.9 65971.0
+406.6 47412.0
+407.3 95341.0
+413.8 114220.0
+416.7 32485.0
+417.5 226840.0
+418.3 79899.0
+419.8 84798.0
+420.0 412990.0
+420.5 232510.0
+420.6 25.0
+422.9 170358.0
+423.6 65648.0
+425.3 186123.0
+427.6 131170.0
+428.5 3030179.0
+429.4 560828.0
+430.1 9.0
+431.2 174309.0
+446.4 682814.0
+447.3 92575.0
+455.2 170323.0
+485.3 67081.0
+488.1 150338.0
+488.2 74141.0
+488.8 106.0
+501.2 62882.0
+513.4 205403.0
+514.2 5.0
+517.2 6485638.0
+518.2 493244.0
+518.3 790176.0
+519.2 146916.0
+519.5 52953.0
+531.0 77063.0
+541.0 133883.0
+541.2 62920.0
+542.2 382995.0
+559.2 1572418.0
+560.0 140280.0
+560.1 19.0
+560.6 4.0
+586.5 74840.0
+604.4 67209.0
+612.4 274635.0
+613.2 44321.0
+614.1 331074.0
+628.2 736475.0
+629.3 27633.0
+629.4 194441.0
+630.3 4750656.0
+631.3 1040444.0
+631.9 119229.0
+632.4 186042.0
+645.5 180598.0
+646.2 28936448.0
+646.3 15599104.0
+647.2 5003226.0
+647.3 8997120.0
+648.2 3226851.0
+648.3 1537127.0
+649.0 179994.0
+650.8 103859.0
+656.4 130084.0
+695.4 164154.0
+727.3 188009.0
+728.3 45501.0
+729.3 82736.0
+729.5 101977.0
+745.0 69734.0
+745.3 627959.0
+745.9 95202.0
+747.5 311995.0
+756.3 87195.0
+774.2 245314.0
+774.4 578725.0
+774.8 111.0
+775.4 366855.0
+776.6 73345.0
diff --git a/SystemTest/Yeast.ms2 b/SystemTest/Yeast.ms2
new file mode 100644
index 0000000..f9eeb80
--- /dev/null
+++ b/SystemTest/Yeast.ms2
@@ -0,0 +1,1149 @@
+
+S	006099	006099	 641.99
+Z	2	1282.97
+Z	3	1923.95
+186.4 35.9468955993652
+191.1 29.613317489624
+201.2 82.8019027709961
+204.0 167.65022277832
+206.1 36.6520576477051
+209.4 21.1239356994629
+218.4 42.0477523803711
+219.2 30.990306854248
+227.2 57.4513549804688
+228.1 22.0139961242676
+237.1 21.2120952606201
+239.1 35.2356872558594
+243.3 25.4867935180664
+246.4 122.990943908691
+251.1 49.2037315368652
+254.0 61.7847213745117
+256.9 151.22412109375
+267.0 36.8427696228027
+270.0 25.8793964385986
+272.1 642.616027832031
+272.9 13.9499454498291
+278.2 29.1785507202148
+280.1 69.7298736572266
+282.0 62.0832252502441
+283.4 41.801929473877
+285.2 85.9555892944336
+290.2 25.7369613647461
+296.2 40.9112319946289
+297.2 20.1072216033936
+298.2 73.1739807128906
+304.9 89.5611114501953
+308.3 33.9126281738281
+310.1 46.537483215332
+311.4 10.3522138595581
+315.0 56.7888412475586
+321.1 117.581436157227
+323.2 58.6494178771973
+325.1 448.190612792969
+326.3 107.182983398438
+327.4 110.143035888672
+337.0 38.5364837646484
+338.3 1098.029296875
+339.3 41.2637481689453
+340.3 11.1756210327148
+343.2 645.440063476563
+344.3 109.936744689941
+351.3 29.0518474578857
+355.2 33.4019088745117
+356.4 30.1679000854492
+360.5 26.5615749359131
+362.2 18.0467681884766
+369.1 298.878936767578
+371.1 12.5077867507935
+383.2 44.0830192565918
+384.8 188.012496948242
+389.4 11.3310432434082
+395.9 137.669387817383
+396.7 89.0941619873047
+399.1 81.6594543457031
+400.1 78.5207290649414
+401.2 25.1709098815918
+403.1 22.3223552703857
+404.2 121.889595031738
+406.9 49.677303314209
+409.1 43.7738418579102
+410.2 62.7529335021973
+412.3 12.8203449249268
+414.2 1419.58569335938
+415.3 332.163116455078
+421.2 10.6162090301514
+424.2 84.2117385864258
+426.1 168.869049072266
+427.3 95.0237503051758
+428.0 109.249137878418
+431.5 32.5814361572266
+439.2 2071.63598632813
+440.2 408.921997070313
+440.9 15.2504940032959
+443.3 28.6818408966064
+447.4 60.4266128540039
+448.4 11.2975292205811
+451.3 37.678295135498
+454.4 75.4133224487305
+456.4 76.3940124511719
+458.5 67.6319198608398
+460.1 24.6743640899658
+465.1 57.8467826843262
+470.1 91.4192962646484
+472.5 8.72362995147705
+474.2 88.6505813598633
+475.9 119.517150878906
+479.4 128.116409301758
+482.4 19.2141532897949
+483.2 48.2660903930664
+484.4 121.271522521973
+486.6 28.458251953125
+487.4 164.915145874023
+488.0 28.6708030700684
+490.2 15.172703742981
+491.2 22.9361953735352
+492.8 37.0322074890137
+493.7 35.3836669921875
+497.4 1556.947265625
+498.3 74.538215637207
+500.1 70.1072235107422
+510.2 83.919059753418
+512.6 30.4224014282227
+515.3 756.706909179688
+516.2 153.287338256836
+517.3 246.687286376953
+520.4 25.2067165374756
+522.4 16.1743965148926
+526.3 42.132453918457
+527.2 164.588928222656
+534.0 177.418548583984
+535.3 33.273811340332
+544.3 66.3913269042969
+550.6 61.2340927124023
+552.6 1630.35693359375
+553.4 501.324157714844
+555.0 365.45947265625
+556.4 164.618499755859
+558.2 11.3074045181274
+559.3 134.589447021484
+561.8 1224.5458984375
+562.4 167.068695068359
+568.1 111.532554626465
+570.7 337.444793701172
+571.3 384.015441894531
+572.3 301.724792480469
+573.3 11.5521078109741
+578.6 33.6315460205078
+580.1 635.729064941406
+581.4 13.2606468200684
+583.2 12.0318899154663
+584.0 128.675155639648
+587.4 51.7380027770996
+588.4 138.934951782227
+589.1 122.758239746094
+594.7 256.360260009766
+596.4 53.9915237426758
+597.6 709.977294921875
+598.5 426.9580078125
+599.4 68.6525573730469
+602.4 63.3077049255371
+606.3 302.083190917969
+607.7 35.7342338562012
+608.7 121.486000061035
+610.5 80.8682250976563
+611.4 148.939575195313
+612.6 185.941467285156
+613.6 25.428466796875
+614.8 113.093994140625
+615.7 59.2823028564453
+616.4 353.466247558594
+617.5 143.952346801758
+618.6 374.312347412109
+619.4 99.4176940917969
+623.7 3357.48388671875
+624.4 1359.62329101563
+625.4 245.867614746094
+626.1 157.986923217773
+627.2 519.459045410156
+629.3 19.5083637237549
+632.1 505.914093017578
+632.8 517.481567382813
+657.8 43.5045852661133
+667.5 1928.78002929688
+668.4 264.997253417969
+669.2 24.4608688354492
+673.2 124.966384887695
+679.4 107.868896484375
+683.2 14.2855787277222
+684.4 8.41092777252197
+693.4 13.253529548645
+694.7 131.752044677734
+695.7 245.257415771484
+700.1 34.7297515869141
+702.2 31.5073890686035
+704.5 53.2440452575684
+709.6 71.0768890380859
+710.3 26.4182605743408
+711.6 69.4583587646484
+712.8 223.488006591797
+713.4 398.023956298828
+714.1 42.4429244995117
+716.4 268.347808837891
+721.7 29.4761581420898
+723.7 30.2028484344482
+724.4 77.1617126464844
+727.7 23.8826656341553
+728.8 83.3675689697266
+730.9 61.7215995788574
+731.5 365.945495605469
+732.4 25.6780529022217
+733.5 86.0997161865234
+734.1 211.706298828125
+734.7 170.057266235352
+735.7 34.4355850219727
+736.3 21.3248996734619
+737.8 28.2123718261719
+740.5 64.4994964599609
+745.9 35.8622665405273
+749.3 139.081253051758
+750.2 90.3683624267578
+751.2 58.2744140625
+754.8 272.833679199219
+755.5 94.048957824707
+756.5 41.7789077758789
+758.6 54.7928771972656
+761.7 210.011077880859
+764.6 142.994476318359
+765.6 81.9965362548828
+766.7 35.577995300293
+768.5 3041.2265625
+769.6 572.0908203125
+771.3 45.8496932983398
+772.4 23.0608959197998
+773.0 127.99210357666
+774.3 116.940147399902
+782.7 57.8522033691406
+785.8 36.3534317016602
+787.4 58.3450508117676
+789.7 17.1753330230713
+801.5 156.923110961914
+808.5 93.7870178222656
+811.7 138.829620361328
+812.9 95.1789703369141
+815.9 49.2497062683105
+819.5 57.8614921569824
+823.4 146.516845703125
+825.5 110.653259277344
+826.3 847.304016113281
+827.5 261.314758300781
+833.6 313.015563964844
+834.3 75.0192184448242
+835.6 6.64657402038574
+836.5 29.6156463623047
+840.3 30.5793743133545
+844.2 826.542785644531
+845.5 95.272834777832
+851.5 9.39962959289551
+852.5 215.784210205078
+854.0 22.95090675354
+862.3 55.8864212036133
+868.8 310.330688476563
+869.5 4365.05078125
+870.6 973.645568847656
+880.7 109.654144287109
+881.6 418.934722900391
+882.5 242.965194702148
+883.5 30.5551948547363
+890.1 90.925910949707
+893.7 108.532112121582
+901.1 103.438674926758
+901.9 26.619327545166
+904.4 37.5361213684082
+909.4 71.6884689331055
+910.6 160.241668701172
+922.5 1927.29174804688
+923.5 521.376098632813
+925.3 93.7609786987305
+926.7 105.412010192871
+927.4 411.131469726563
+928.5 197.210342407227
+930.5 96.873405456543
+934.3 46.5086402893066
+940.5 2199.56323242188
+941.3 386.321563720703
+945.4 1276.38610839844
+946.4 184.895385742188
+948.4 14.1699657440186
+949.7 105.596832275391
+964.7 27.335994720459
+973.7 91.5315170288086
+978.5 37.3656005859375
+979.4 90.653694152832
+980.6 230.451599121094
+981.5 78.6788864135742
+983.5 18.730318069458
+992.2 62.2943420410156
+993.5 791.578002929688
+994.8 218.580230712891
+1011.5 1050.68334960938
+1012.4 75.6817169189453
+1023.6 21.024393081665
+1035.9 130.89958190918
+1039.5 44.0413208007813
+1040.7 45.8695411682129
+1055.3 61.0222091674805
+1073.4 7.92942237854004
+1084.7 90.3775177001953
+1090.0 212.279205322266
+1091.0 40.6307792663574
+1095.7 12.9097557067871
+1106.3 51.4776039123535
+1108.4 295.580932617188
+1109.5 21.5643367767334
+1120.1 46.7118492126465
+1122.7 48.6416053771973
+1123.7 101.746391296387
+1134.5 53.9113693237305
+1140.4 57.3741493225098
+1249.4 18.1081371307373
+
+S	004596	004596	 833.57
+Z	2	1666.13
+Z	3	2498.69
+233.9 9.06327342987061
+241.1 5.11276388168335
+253.0 29.5022850036621
+255.2 13.4334888458252
+258.2 19.8327140808105
+259.2 48.1433982849121
+267.1 6.46290969848633
+271.3 16.4752368927002
+277.3 21.7187061309814
+278.3 7.87414360046387
+282.3 51.4676246643066
+283.0 54.5101699829102
+292.2 54.9369888305664
+294.4 32.9214401245117
+301.1 48.046501159668
+316.3 10.6048393249512
+317.0 30.692834854126
+329.2 21.925012588501
+329.9 34.8108901977539
+346.1 16.7230834960938
+348.3 9.27395248413086
+356.2 7.91190099716187
+357.3 70.3492050170898
+374.1 125.380523681641
+392.3 153.714630126953
+394.2 19.604907989502
+398.2 21.0054054260254
+400.5 73.8589553833008
+413.4 11.3629217147827
+417.3 126.462875366211
+422.0 14.8418464660645
+427.3 51.3735427856445
+430.3 17.6019592285156
+435.6 53.4573135375977
+441.2 43.047061920166
+445.3 144.098297119141
+450.0 28.6957931518555
+452.4 30.8523025512695
+458.5 6.60467672348022
+475.2 61.2120590209961
+479.5 10.9543313980103
+485.2 50.4099273681641
+486.1 59.972469329834
+487.3 108.26229095459
+501.6 50.5225563049316
+502.9 34.3544769287109
+521.4 51.9593124389648
+522.1 14.2406911849976
+526.0 19.6839714050293
+527.0 74.3941650390625
+528.5 28.6051902770996
+530.7 57.061164855957
+538.6 53.1338119506836
+540.1 49.4479827880859
+540.9 31.368989944458
+544.5 24.9528007507324
+555.8 11.3694086074829
+573.4 97.236328125
+574.3 45.6546630859375
+575.4 130.573364257813
+577.2 6.5386176109314
+584.6 6.45523118972778
+588.4 44.2757339477539
+592.2 92.0804977416992
+593.4 92.0999450683594
+597.2 67.4226150512695
+602.0 26.0033073425293
+615.0 86.8636474609375
+616.4 25.6954402923584
+623.3 90.7534637451172
+624.1 7.25228643417358
+629.4 8.37987899780273
+633.5 10.1180047988892
+644.4 123.277397155762
+645.5 26.6690483093262
+647.4 57.6000633239746
+651.2 37.3147773742676
+655.3 162.406112670898
+659.3 76.8048095703125
+670.5 29.8815269470215
+673.4 219.581573486328
+674.3 99.6774368286133
+681.0 64.9308395385742
+685.9 165.341735839844
+688.4 41.0708198547363
+694.9 462.405883789063
+698.0 69.1585311889648
+702.2 125.955169677734
+703.4 71.1014633178711
+704.5 33.5423851013184
+705.2 48.9885292053223
+708.1 66.7361297607422
+714.7 104.197769165039
+717.2 14.3000354766846
+721.2 252.020385742188
+722.2 77.3344573974609
+724.4 87.7767944335938
+728.2 68.5680694580078
+732.8 52.2872161865234
+744.3 391.912902832031
+745.6 184.961624145508
+746.5 156.845245361328
+750.7 657.696960449219
+751.4 18.371150970459
+755.5 40.9293518066406
+759.5 1001.6298828125
+760.3 185.179153442383
+766.2 18.4048862457275
+771.8 88.8187637329102
+774.9 77.6372375488281
+778.5 34.156867980957
+787.4 48.0149726867676
+788.5 61.3476409912109
+797.3 17.4006214141846
+798.8 68.5802688598633
+810.5 85.8645248413086
+814.7 314.213836669922
+815.4 734.52880859375
+816.4 413.814666748047
+817.6 52.8716354370117
+844.5 107.223091125488
+845.4 33.610710144043
+846.6 18.8185806274414
+847.5 7.90904998779297
+851.0 28.5967636108398
+855.0 36.3489379882813
+856.3 41.3064002990723
+874.7 9.3620433807373
+875.5 36.7420845031738
+885.6 9.04907894134521
+900.4 36.1689414978027
+901.3 21.0365180969238
+902.4 66.7550888061523
+921.3 313.239807128906
+922.2 124.673614501953
+923.7 60.0936737060547
+927.7 29.9614486694336
+931.4 43.4766387939453
+944.4 627.132751464844
+945.4 532.959594726563
+946.5 120.982429504395
+973.3 57.9656295776367
+974.4 77.9250793457031
+991.7 69.4336090087891
+992.4 226.383087158203
+993.1 84.7520141601563
+999.2 28.4588661193848
+1029.5 48.6811981201172
+1031.4 39.3587913513184
+1032.1 32.7809562683105
+1037.4 28.0909366607666
+1046.3 29.002513885498
+1056.1 65.2755584716797
+1059.4 108.616226196289
+1073.3 570.997436523438
+1074.5 197.580123901367
+1075.8 75.5585556030273
+1084.6 28.5858478546143
+1085.3 5.72312927246094
+1100.5 46.452522277832
+1117.6 42.2168426513672
+1118.5 16.8088397979736
+1121.3 52.8077545166016
+1122.3 76.6635971069336
+1127.2 30.0536308288574
+1144.5 549.320068359375
+1145.5 231.196197509766
+1146.7 32.1935005187988
+1154.5 13.3004770278931
+1157.7 11.2992610931396
+1170.4 25.4317169189453
+1173.6 25.4448413848877
+1178.7 83.2475204467773
+1179.8 60.0345001220703
+1180.5 5.71155214309692
+1189.2 38.1284255981445
+1190.2 17.0804290771484
+1191.6 53.1390533447266
+1202.7 66.1393280029297
+1228.5 24.0686721801758
+1247.4 30.144847869873
+1257.5 45.1692428588867
+1259.5 34.0688285827637
+1260.2 76.1449203491211
+1265.5 136.952331542969
+1267.3 109.77311706543
+1273.5 599.671142578125
+1274.4 558.226501464844
+1275.4 28.9847583770752
+1317.6 20.680456161499
+1336.3 143.956207275391
+1337.6 37.9440574645996
+1372.4 11.1991987228394
+1389.3 428.820587158203
+1390.7 23.7479400634766
+1429.7 21.7973117828369
+1491.7 44.3017616271973
+1517.1 23.8610935211182
+1518.7 95.5812835693359
+1519.5 7.1558256149292
+
+S	005975	005975	 702.56
+Z	2	1404.11
+Z	3	2105.66
+200.2 244.198944091797
+211.2 125.478965759277
+212.2 16.577657699585
+214.3 11.4341650009155
+215.3 5.64610004425049
+223.2 63.2361526489258
+228.1 286.697509765625
+229.6 16.2009506225586
+242.2 44.9520416259766
+245.1 17.505184173584
+260.3 144.833358764648
+261.4 69.0646896362305
+275.4 19.417350769043
+288.2 128.053085327148
+298.1 19.3211917877197
+300.3 10.1043291091919
+304.2 76.6266784667969
+307.3 60.6780471801758
+308.0 13.6618480682373
+312.0 9.87532234191895
+313.4 25.3828125
+315.0 20.2992763519287
+324.3 65.1580352783203
+325.1 20.2132091522217
+327.2 12.8937082290649
+338.2 11.7464742660522
+339.4 55.8018798828125
+341.2 25.3004417419434
+342.2 67.9788513183594
+347.3 346.322814941406
+348.3 58.1488418579102
+353.3 15.0066108703613
+354.4 27.8334083557129
+356.0 9.63239669799805
+358.2 14.5823936462402
+360.5 5.89802646636963
+368.1 3.92963528633118
+372.3 7.24454307556152
+374.3 21.0096054077148
+376.4 4.21282577514648
+379.6 4.66910982131958
+384.8 11.3986129760742
+388.4 40.2899208068848
+395.9 73.3092880249023
+397.3 193.537811279297
+401.4 13.8330335617065
+406.1 7.93343830108643
+408.1 17.9111042022705
+416.5 51.3518180847168
+417.5 76.2232894897461
+418.4 256.286834716797
+419.4 68.0492095947266
+423.4 18.4184894561768
+425.3 5.02486658096313
+426.3 65.3859939575195
+438.1 47.4040946960449
+440.3 161.905990600586
+441.3 26.0683917999268
+442.2 17.5846996307373
+443.6 43.2222785949707
+455.3 59.9161911010742
+457.1 75.4026107788086
+458.1 78.7186737060547
+463.3 18.6236782073975
+467.2 10.6252880096436
+470.5 8.72642421722412
+472.8 87.0850143432617
+476.4 5.52875137329102
+478.4 37.4836158752441
+487.5 130.571670532227
+492.5 8.30630302429199
+494.3 32.3030166625977
+495.1 9.66674995422363
+497.3 3.92838454246521
+505.4 1313.36706542969
+506.5 135.740921020508
+509.1 38.0695419311523
+510.4 18.4730243682861
+511.4 121.727012634277
+514.5 78.4575042724609
+516.3 61.8367080688477
+521.2 36.5624313354492
+525.4 33.6884727478027
+528.7 11.8382091522217
+532.2 333.392181396484
+532.8 14.3780479431152
+533.5 45.6845512390137
+537.0 3.61257028579712
+545.2 34.7418174743652
+552.9 11.3466119766235
+554.1 102.469604492188
+555.1 10.6421909332275
+556.4 51.0505142211914
+559.0 20.3898468017578
+563.0 96.0205841064453
+567.3 10.91233253479
+570.2 54.6397972106934
+571.4 263.921142578125
+572.1 73.5406875610352
+573.3 68.0976104736328
+575.1 77.4662170410156
+578.0 40.3536758422852
+580.2 204.993774414063
+585.6 60.9190216064453
+588.9 1152.40795898438
+590.2 38.9507522583008
+592.1 25.5116691589355
+594.5 30.7449226379395
+600.1 65.2834320068359
+601.0 73.0758590698242
+602.8 26.2031745910645
+604.4 300.026977539063
+605.5 42.6035766601563
+606.1 29.0496406555176
+609.5 181.237121582031
+612.1 32.6494140625
+619.4 63.0686531066895
+621.1 62.0305824279785
+626.1 56.7526702880859
+628.4 150.734649658203
+629.5 49.8539390563965
+632.9 140.969299316406
+634.6 221.341583251953
+635.4 26.9136543273926
+636.6 49.7521667480469
+637.5 47.7426071166992
+641.7 266.860137939453
+643.6 53.9990005493164
+645.5 232.62663269043
+647.5 226.769683837891
+649.4 145.022598266602
+650.2 358.644439697266
+650.8 155.972473144531
+651.5 60.8240661621094
+652.8 55.4289054870605
+655.3 11.1546087265015
+659.5 117.91535949707
+661.1 103.795867919922
+662.4 76.0803756713867
+665.5 5.84557199478149
+666.9 6590.75390625
+667.8 114.863128662109
+668.5 20.6604976654053
+669.8 80.9661865234375
+670.8 129.979095458984
+671.7 74.4041366577148
+672.5 10.6992282867432
+673.4 7.63064670562744
+674.4 27.5398654937744
+675.4 166.632598876953
+676.4 495.025024414063
+677.5 230.132858276367
+679.7 42.198314666748
+680.5 38.5691337585449
+682.5 17.8973026275635
+684.0 288.871368408203
+684.9 1798.48559570313
+685.9 254.203155517578
+689.4 10.351300239563
+691.4 94.4514617919922
+693.1 53.3338851928711
+711.3 17.4954223632813
+714.3 27.8164100646973
+719.3 171.809494018555
+720.8 64.9053344726563
+723.6 18.8676643371582
+730.0 38.5443572998047
+731.1 5.20265102386475
+734.5 32.1008682250977
+737.9 156.770599365234
+738.7 73.6795349121094
+739.4 21.3797340393066
+740.5 44.2326507568359
+749.2 47.5809860229492
+756.5 85.7226104736328
+757.7 44.4652557373047
+760.3 73.0944290161133
+761.4 53.5566673278809
+765.5 56.8680610656738
+766.5 4.47359132766724
+773.5 16.1217212677002
+775.5 46.4773635864258
+778.0 42.6657257080078
+780.6 31.6064567565918
+782.3 15.7793560028076
+783.6 170.060028076172
+785.4 135.614166259766
+791.2 47.928882598877
+795.0 208.91325378418
+795.6 28.0148868560791
+798.2 62.7083740234375
+800.2 380.663665771484
+801.4 9.69822692871094
+803.8 110.207542419434
+810.9 50.1951560974121
+811.8 83.2831420898438
+812.5 36.3833160400391
+816.2 13.966290473938
+817.7 45.6291694641113
+818.3 61.1527786254883
+830.1 11.6600093841553
+833.3 260.473815917969
+834.0 12.3208589553833
+834.7 112.854782104492
+837.0 36.0131454467773
+838.7 45.579174041748
+840.2 80.4257125854492
+845.3 19.3566246032715
+847.2 46.8500747680664
+847.8 148.996017456055
+848.7 38.8970489501953
+851.9 459.620239257813
+852.7 194.229232788086
+854.9 85.1216812133789
+857.7 66.8775482177734
+863.2 48.8110694885254
+864.4 164.661315917969
+865.3 365.423797607422
+866.3 49.4286422729492
+871.4 66.1395568847656
+872.3 81.3633651733398
+873.5 13.4700765609741
+874.4 35.9890632629395
+875.5 18.2322235107422
+876.8 9.85540866851807
+882.3 313.851837158203
+883.3 87.6082000732422
+884.2 32.3517189025879
+886.7 98.698356628418
+890.2 59.7312812805176
+891.2 45.8622016906738
+891.8 22.1744136810303
+893.1 20.6718139648438
+899.3 278.485504150391
+900.3 80.6021499633789
+905.9 8.03078842163086
+910.3 28.1656703948975
+914.7 34.9159507751465
+915.7 47.9202575683594
+930.5 43.4959259033203
+934.4 41.5366744995117
+935.3 47.1219825744629
+937.9 40.6703262329102
+943.7 17.0642185211182
+944.6 85.5159759521484
+947.5 428.296264648438
+948.7 136.942932128906
+951.3 15.1743173599243
+952.3 23.6910362243652
+968.4 10.0206346511841
+969.4 181.718063354492
+970.3 29.9817886352539
+983.3 40.5213432312012
+986.7 75.0085754394531
+987.4 22.8729152679443
+988.4 89.006965637207
+989.6 69.4317779541016
+990.6 25.6456508636475
+1003.6 86.861572265625
+1004.5 50.9892883300781
+1005.4 12.2137088775635
+1015.9 22.3824424743652
+1022.3 98.5564117431641
+1023.4 229.349945068359
+1024.5 16.3501873016357
+1026.4 6.50220108032227
+1028.5 60.1939582824707
+1029.5 79.4005737304688
+1030.5 36.3758544921875
+1035.6 25.5759105682373
+1039.6 143.11442565918
+1040.3 171.596115112305
+1041.5 73.3590621948242
+1044.8 84.3878021240234
+1045.7 22.8279666900635
+1046.4 69.415283203125
+1057.6 210.712661743164
+1058.5 44.5951919555664
+1062.5 730.289489746094
+1063.5 267.512573242188
+1064.4 10.4151649475098
+1065.5 73.1464309692383
+1076.5 21.3651866912842
+1078.5 38.1752815246582
+1082.2 222.550704956055
+1083.5 14.8064031600952
+1086.8 39.2629013061523
+1092.1 41.5884628295898
+1099.4 321.617858886719
+1101.6 20.9681720733643
+1108.6 13.6788148880005
+1116.4 47.1218719482422
+1118.0 62.8035011291504
+1118.7 25.3689498901367
+1126.5 47.0022087097168
+1129.7 16.3078765869141
+1131.0 64.7820434570313
+1137.4 27.5963973999023
+1141.4 117.911560058594
+1143.4 34.6314277648926
+1144.1 44.4455184936523
+1144.8 12.3725433349609
+1145.4 35.4225387573242
+1146.5 21.6615524291992
+1158.6 199.043838500977
+1159.5 544.716247558594
+1160.2 59.1385192871094
+1160.8 17.697847366333
+1166.4 28.6122779846191
+1176.5 3301.90576171875
+1177.5 1429.97766113281
+1194.1 81.3943939208984
+1205.2 44.0304222106934
+1211.7 22.6078281402588
+1212.4 66.2081451416016
+1218.5 209.972213745117
+1221.8 60.764289855957
+1222.7 5.46742630004883
+1223.6 31.7276344299316
+1224.7 41.3597106933594
+1228.7 90.4682006835938
+1230.4 15.1203279495239
+1239.7 110.184677124023
+1241.1 76.9298400878906
+1257.5 32.2757186889648
+1258.7 5.31881237030029
+1259.7 42.3198509216309
+1281.4 28.8357028961182
+1300.2 133.588577270508
+1301.0 21.0918998718262
+1301.8 39.0974578857422
+1372.7 5.29332780838013
+
+S	006176	006176	 521.15
+Z	2	1041.29
+Z	3	1561.43
+150.0 99.5292739868164
+152.3 24.8964080810547
+155.8 207.423416137695
+164.4 73.7251739501953
+166.1 20.3963298797607
+167.4 56.4421691894531
+170.9 108.634124755859
+173.1 173.533508300781
+174.2 37.6329040527344
+178.0 75.6242446899414
+179.0 35.9586982727051
+183.2 191.816421508789
+184.3 37.8338394165039
+186.3 207.774597167969
+187.0 207.990249633789
+189.3 40.4748420715332
+189.9 81.716911315918
+193.5 127.7421875
+195.1 521.047424316406
+197.1 103.121147155762
+198.3 21.614372253418
+201.2 312.417938232422
+202.2 124.550903320313
+204.3 43.8117828369141
+207.1 132.988067626953
+208.2 686.391052246094
+209.2 114.778541564941
+214.2 164.669555664063
+215.4 25.4923419952393
+216.7 184.227172851563
+218.5 287.131683349609
+225.1 3456.8671875
+226.2 92.256477355957
+227.0 59.1759757995605
+228.3 620.92138671875
+229.2 525.492736816406
+230.6 149.691055297852
+231.3 159.055297851563
+233.2 33.5002593994141
+237.4 61.6736602783203
+240.2 74.5882263183594
+243.3 19.8281497955322
+250.2 197.245040893555
+251.2 162.863800048828
+253.4 31.8894538879395
+264.8 57.371395111084
+268.3 141.637969970703
+274.1 172.328796386719
+275.1 25.5934238433838
+279.1 180.300064086914
+280.3 137.019592285156
+281.0 160.750122070313
+283.1 61.654468536377
+283.9 43.0586090087891
+285.1 106.889259338379
+291.1 4545.6015625
+292.2 604.569396972656
+293.2 126.150260925293
+298.2 45.4005432128906
+302.2 59.6245574951172
+303.6 21.8396987915039
+308.4 155.952682495117
+309.3 110.394332885742
+311.2 64.5288696289063
+313.2 243.498977661133
+320.1 241.904708862305
+321.7 1670.7666015625
+322.3 652.284362792969
+325.6 267.42919921875
+326.9 310.211853027344
+328.1 257.203186035156
+329.4 101.441497802734
+332.2 339.5302734375
+332.8 280.295135498047
+333.9 20.0184745788574
+335.2 44.7012939453125
+338.2 2062.89501953125
+339.0 568.604553222656
+339.6 26.5377578735352
+343.4 39.5140609741211
+347.2 105.298751831055
+350.3 1040.91418457031
+351.2 45.4548950195313
+352.2 134.943908691406
+352.9 101.220916748047
+354.2 218.063583374023
+361.2 214.029571533203
+361.9 378.6259765625
+363.5 85.0887603759766
+366.6 119.213439941406
+373.8 177.017532348633
+375.3 634.314392089844
+379.4 358.606567382813
+381.1 344.165191650391
+385.4 51.2277984619141
+388.9 95.055061340332
+392.3 245.888320922852
+392.9 34.1393585205078
+395.1 80.3456802368164
+396.6 109.19938659668
+398.5 300.292816162109
+399.2 18.2609996795654
+405.8 398.365234375
+406.6 1514.87939453125
+407.3 1132.36779785156
+408.4 40.1826972961426
+409.2 44.524471282959
+410.7 95.4533462524414
+412.8 223.126602172852
+415.0 225.890472412109
+418.8 148.28759765625
+420.3 136.622268676758
+423.5 57.5325317382813
+425.8 181.402893066406
+426.5 197.004180908203
+429.5 150.588348388672
+437.5 159.617172241211
+438.9 155.705093383789
+440.0 320.820617675781
+441.4 59.1881942749023
+443.0 25.7806797027588
+444.3 257.557678222656
+452.2 488.827575683594
+453.2 122.581436157227
+454.2 89.0110778808594
+456.2 582.955322265625
+456.9 492.3125
+460.9 190.344879150391
+462.2 29.7451362609863
+463.4 166.561279296875
+464.2 342.171936035156
+465.3 28.275318145752
+468.3 724.613342285156
+469.9 317.924926757813
+473.9 520.170349121094
+476.6 106.93537902832
+478.3 254.534317016602
+479.4 258.606475830078
+481.1 429.047546386719
+483.9 419.321502685547
+485.7 182.188278198242
+487.3 71.4511795043945
+489.5 301.888153076172
+490.4 64.9893569946289
+492.0 3018.5400390625
+492.9 19.1942615509033
+496.9 55.5130004882813
+498.0 52.1472969055176
+502.6 647.311218261719
+503.4 3316.97265625
+505.4 1220.0107421875
+506.2 1401.5634765625
+509.4 1633.67236328125
+511.1 585.366455078125
+512.2 1213.20642089844
+529.0 325.823822021484
+532.0 189.374313354492
+534.0 341.669189453125
+535.2 46.2352294921875
+539.2 106.508460998535
+546.0 71.2236404418945
+551.3 2736.69946289063
+552.3 976.499267578125
+555.4 189.501678466797
+556.4 336.451629638672
+558.4 39.9683456420898
+568.1 65.471794128418
+571.5 76.160774230957
+572.5 23.0066204071045
+575.2 36.9830474853516
+577.1 226.924942016602
+578.5 191.365570068359
+579.1 286.493865966797
+581.1 367.433197021484
+582.4 133.620468139648
+586.5 248.491760253906
+588.7 362.93359375
+590.5 503.667633056641
+592.8 145.779647827148
+596.4 259.081512451172
+597.6 415.784301757813
+598.3 244.758529663086
+599.0 471.252563476563
+612.4 1846.80407714844
+613.4 75.7025833129883
+614.1 37.2765922546387
+618.7 170.722930908203
+619.6 313.047088623047
+621.4 660.327514648438
+622.2 21.7938098907471
+624.4 313.432464599609
+626.9 130.991302490234
+628.3 97.1897354125977
+632.6 191.810455322266
+634.2 85.4735488891602
+636.0 7497.3466796875
+636.9 1406.98706054688
+638.2 35.5022239685059
+639.0 115.526031494141
+639.6 33.7445449829102
+641.0 395.794189453125
+642.2 1687.37548828125
+643.4 509.403411865234
+644.5 130.244094848633
+646.3 38.5255012512207
+650.5 2003.96423339844
+651.5 757.571533203125
+652.2 152.041275024414
+656.4 34.3828544616699
+659.2 70.6767654418945
+660.1 281.738739013672
+660.9 147.216033935547
+662.6 552.89013671875
+663.8 219.385498046875
+665.5 369.198394775391
+666.7 41.4958038330078
+667.4 75.3739318847656
+669.1 9056.111328125
+670.0 1263.83227539063
+671.2 629.555541992188
+672.9 751.114501953125
+677.4 286.827606201172
+679.3 131.191223144531
+680.2 529.069458007813
+681.5 349.415435791016
+682.5 66.8927688598633
+687.7 52.2866020202637
+699.2 6555.517578125
+700.2 1734.43823242188
+701.4 363.563201904297
+708.2 1169.42895507813
+711.1 567.533569335938
+712.3 964.083862304688
+713.4 313.545013427734
+717.1 275.078643798828
+719.2 85.1127319335938
+720.5 443.043731689453
+721.6 266.181488037109
+723.5 522.390747070313
+725.6 114.072143554688
+730.7 188.755187988281
+731.3 29.9104385375977
+732.4 260.320922851563
+745.5 124.977264404297
+746.8 309.491943359375
+749.5 3493.01147460938
+750.7 388.916625976563
+751.5 303.732635498047
+755.5 214.958847045898
+763.4 291.977966308594
+764.9 97.63037109375
+767.4 61.3502807617188
+768.8 113.80931854248
+773.7 263.193145751953
+777.6 100.272850036621
+792.6 178.006988525391
+795.5 265.086547851563
+803.2 105.245651245117
+806.5 252.971466064453
+810.4 833.338012695313
+811.5 3686.44848632813
+812.6 2232.9130859375
+813.6 1352.23278808594
+814.4 90.9366302490234
+815.7 50.749153137207
+820.4 134.235275268555
+822.3 75.2090606689453
+832.0 52.4180221557617
+839.5 191.050765991211
+840.9 209.548797607422
+844.6 123.805145263672
+862.5 747.167541503906
+863.6 135.258483886719
+871.7 19.2006816864014
+872.9 101.640571594238
+882.6 270.211181640625
+892.3 112.96102142334
+893.4 102.064804077148
+894.2 228.017761230469
+895.7 68.2846908569336
+911.4 1574.09594726563
+912.6 826.477661132813
+913.7 46.9788780212402
+917.5 97.251220703125
+919.6 764.842529296875
+920.5 233.038986206055
+927.9 19.5952644348145
+931.7 88.3806304931641
+933.6 55.7731246948242
+934.6 274.783782958984
+940.4 53.4021797180176
+963.2 40.4569473266602
+1010.6 294.230255126953
+1012.9 91.7745132446289
+1020.5 73.7383041381836
+1046.6 576.338745117188
+1048.0 32.0886535644531
diff --git a/SystemTest/YeastSmall.fasta b/SystemTest/YeastSmall.fasta
new file mode 100644
index 0000000..fcb719c
--- /dev/null
+++ b/SystemTest/YeastSmall.fasta
@@ -0,0 +1,62 @@
+>YBR118W TEF2 SGDID:S0000322, Chr II from 477627-479003, Verified ORF
+MGKEKSHINVVVIGHVDSGKSTTTGHLIYKCGGIDKRTIEKFEKEAAELGKGSFKYAWVL
+DKLKAERERGITIDIALWKFETPKYQVTVIDAPGHRDFIKNMITGTSQADCAILIIAGGV
+GEFEAGISKDGQTREHALLAFTLGVRQLIVAVNKMDSVKWDESRFQEIVKETSNFIKKVG
+YNPKTVPFVPISGWNGDNMIEATTNAPWYKGWEKETKAGVVKGKTLLEAIDAIEQPSRPT
+DKPLRLPLQDVYKIGGIGTVPVGRVETGVIKPGMVVTFAPAGVTTEVKSVEMHHEQLEQG
+VPGDNVGFNVKNVSVKEIRRGNVCGDAKNDPPKGCASFNATVIVLNHPGQISAGYSPVLD
+CHTAHIACRFDELLEKNDRRSGKKLEDHPKFLKSGDAALVKFVPSKPMCVEAFSEYPPLG
+RFAVRDMRQTVAVGVIKSVDKTEKAAKVTKAAQKAAKK*
+>YER177W BMH1 SGDID:S0000979, Chr V from 545606-546409, Verified ORF
+MSTSREDSVYLAKLAEQAERYEEMVENMKTVASSGQELSVEERNLLSVAYKNVIGARRAS
+WRIVSSIEQKEESKEKSEHQVELICSYRSKIETELTKISDDILSVLDSHLIPSATTGESK
+VFYYKMKGDYHRYLAEFSSGDAREKATNASLEAYKTASEIATTELPPTHPIRLGLALNFS
+VFYYEIQNSPDKACHLAKQAFDDAIAELDTLSEESYKDSTLIMQLLRDNLTLWTSDMSES
+GQAEDQQQQQQHQQQQPPAAAEGEAPK*
+>YGL008C PMA1 SGDID:S0002976, Chr VII from 482669-479913, reverse complement, Verified ORF
+MTDTSSSSSSSSASSVSAHQPTQEKPAKTYDDAASESSDDDDIDALIEELQSNHGVDDED
+SDNDGPVAAGEARPVPEEYLQTDPSYGLTSDEVLKRRKKYGLNQMADEKESLVVKFVMFF
+VGPIQFVMEAAAILAAGLSDWVDFGVICGLLMLNAGVGFVQEFQAGSIVDELKKTLANTA
+VVIRDGQLVEIPANEVVPGDILQLEDGTVIPTDGRIVTEDCFLQIDQSAITGESLAVDKH
+YGDQTFSSSTVKRGEGFMVVTATGDNTFVGRAAALVNKAAGGQGHFTEVLNGIGIILLVL
+VIATLLLVWTACFYRTNGIVRILRYTLGITIIGVPVGLPAVVTTTMAVGAAYLAKKQAIV
+QKLSAIESLAGVEILCSDKTGTLTKNKLSLHEPYTVEGVSPDDLMLTACLAASRKKKGLD
+AIDKAFLKSLKQYPKAKDALTKYKVLEFHPFDPVSKKVTAVVESPEGERIVCVKGAPLFV
+LKTVEEDHPIPEDVHENYENKVAELASRGFRALGVARKRGEGHWEILGVMPCMDPPRDDT
+AQTVSEARHLGLRVKMLTGDAVGIAKETCRQLGLGTNIYNAERLGLGGGGDMPGSELADF
+VENADGFAEVFPQHKYRVVEILQNRGYLVAMTGDGVNDAPSLKKADTGIAVEGATDAARS
+AADIVFLAPGLSAIIDALKTSRQIFHRMYSYVVYRIALSLHLEIFLGLWIAILDNSLDID
+LIVFIAIFADVATLAIAYDNAPYSPKPVKWNLPRLWGMSIILGIVLAIGSWITLTTMFLP
+KGGIIQNFGAMNGIMFLQISLTENWLIFITRAAGPFWSSIPSWQLAGAVFAVDIIATMFT
+LFGWWSENWTDIVTVVRVWIWSIGIFCVLGGFYYEMSTSEAFDRLMNGKPMKEKKSTRSV
+EDFMAAMQRVSTQHEKET*
+>YOR230W WTM1 SGDID:S0005756, Chr XV from 770800-772113, Verified ORF
+MPKKVWKSSTPSTYEHISSLRPKFVSRVDNVLHQRKSLTFSNVVVPDKKNNTLTSSVIYS
+QGSDIYEIDFAVPLQEAASEPVKDYGDAFEGIENTSLSPKFVYQGETVSKMAYLDKTGET
+TLLSMSKNGSLAWFKEGIKVPIHIVQELMGPATSYASIHSLTRPGDLPEKDFSLAISDFG
+ISNDTETIVKSQSNGDEEDSILKIIDNAGKPGEILRTVHVPGTTVTHTVRFFDNHIFASC
+SDDNILRFWDTRTSDKPIWVLGEPKNGKLTSFDCSQVSNNLFVTGFSTGIIKLWDARAAE
+AATTDLTYRQNGEDPIQNEIANFYHAGGDSVVDVQFSATSSSEFFTVGGTGNIYHWNTDY
+SLSKYNPDDTIAPPQDATEESQTKSLRFLHKGGSRRSPKQIGRRNTAAWHPVIENLVGTV
+DDDSLVSIYKPYTEESE*
+>YBR189W RPS9B SGDID:S0000393, Chr II from 604465-604471,604885-605465, Verified ORF
+MPRAPRTYSKTYSTPKRPYESSRLDAELKLAGEFGLKNKREIYRISFQLSKIRRAARDLL
+TRDEKDPKRLFEGNALIRRLVRVGVLSEDKKKLDYVLALKVEDFLERRLQTQVYKLGLAK
+SVHHARVLITQRHIAVGKQIVNIPSFMVRLDSEKHIDFAPTSPFGGARPGRVARRNAARK
+AEASGEAAEEAEDEE*
+>YOR361C PRT1 SGDID:S0005888, Chr XV from 1017648-1015357, reverse complement, Verified ORF
+MKNFLPRTLKNIYELYFNNISVHSIVSRNTQLKRSKIIQMTTETFEDIKLEDIPVDDIDF
+SDLEEQYKVTEEFNFDQYIVVNGAPVIPSAKVPVLKKALTSLFSKAGKVVNMEFPIDEAT
+GKTKGFLFVECGSMNDAKKIIKSFHGKRLDLKHRLFLYTMKDVERYNSDDFDTEFREPDM
+PTFVPSSSLKSWLMDDKVRDQFVLQDDVKTSVFWNSMFNEEDSLVESRENWSTNYVRFSP
+KGTYLFSYHQQGVTAWGGPNFDRLRRFYHPDVRNSSVSPNEKYLVTFSTEPIIVEEDNEF
+SPFTKKNEGHQLCIWDIASGLLMATFPVIKSPYLKWPLVRWSYNDKYCARMVGDSLIVHD
+ATKNFMPLEAKALKPSGIRDFSFAPEGVKLQPFRNGDEPSVLLAYWTPETNNSACTATIA
+EVPRGRVLKTVNLVQVSNVTLHWQNQAEFLCFNVERHTKSGKTQFSNLQICRLTERDIPV
+EKVELKDSVFEFGWEPHGNRFVTISVHEVADMNYAIPANTIRFYAPETKEKTDVIKRWSL
+VKEIPKTFANTVSWSPAGRFVVVGALVGPNMRRSDLQFYDMDYPGEKNINDNNDVSASLK
+DVAHPTYSAATNITWDPSGRYVTAWSSSLKHKVEHGYKIFNIAGNLVKEDIIAGFKNFAW
+RPRPASILSNAERKKVRKNLREWSAQFEEQDAMEADTAMRDLILHQRELLKQWTEYREKI
+GQEMEKSMNFKIFDVQPEDASDDFTTIEEIVEEVLEETKEKVE*
+
+
diff --git a/TAG2.bn b/TAG2.bn
new file mode 100644
index 0000000..12370b7
Binary files /dev/null and b/TAG2.bn differ
diff --git a/TAG3.bn b/TAG3.bn
new file mode 100644
index 0000000..f547131
Binary files /dev/null and b/TAG3.bn differ
diff --git a/TagFile.c b/TagFile.c
new file mode 100644
index 0000000..5b208b4
--- /dev/null
+++ b/TagFile.c
@@ -0,0 +1,493 @@
+//Title:          IonScoring.h
+//Author:         Ari Frank
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+//
+//TagFile.c is responsible for parsing tag files from an external tagger
+
+#include "TagFile.h"
+#include "Trie.h"
+#include "Tagger.h"
+#include <stdio.h>
+#include "Errors.h"
+
+
+// Global variable
+ExternalTagHolder *TagHolder=NULL;
+
+// Reads all the contents of an external tag file
+// for each scan we have new tweaks, and new tags
+
+
+void ReadExternalTags(char *TagFilePath, int verbose)
+{
+	int MaxScan=0;
+	int TotalNumTags=0;
+	FILE *InputStream;
+	int GlobalTagIndex=0;
+	int i;
+
+	int LineNumber = 0;
+	char Buff[1024];
+	int ScanNumber,NumTags;
+
+	ScanTags *ThisScanTags=NULL;
+	
+	int NRead=0;	
+	int Charge=0;
+	float ParentMass = 0;
+
+	char*  TempAA;
+	char AnnotationBuffer[256];
+	char ModBuffer[256];
+	int AminoIndex;
+	MassDelta* Delta;
+	int ModBufferPos;
+	int ModIndex;
+	TrieTag *NewTag;
+	int   TweakIdx;
+	float Score,PrefixMass;
+	char  TagSeq[16];
+
+	if (verbose)
+		printf("Parsing tags from: %s\n",TagFilePath);
+
+	if (! TagHolder)
+	{
+		TagHolder = (ExternalTagHolder *)malloc(sizeof(ExternalTagHolder));
+	}
+
+	InputStream=fopen(TagFilePath,"r");
+	if (! InputStream)
+	{
+	  //printf("Error: couldn't read external tag file: %s\n",TagFilePath);
+	  //exit(1);
+	  REPORT_ERROR_S(8, TagFilePath);
+	  exit(8);
+	}
+
+	// read in two passes: first determine how many scans and tags there are
+	// in second passs allocate the memory for all the tag
+	
+	while (1)
+	{
+	  i = 0;
+	  if (! fgets(Buff,1024,InputStream))
+	    break;
+		
+	  ScanNumber = 0;
+	  NumTags = 0;
+		
+	  if (sscanf(Buff,"%d %d",&ScanNumber,&NumTags) == 2)
+	    {
+	      MaxScan=ScanNumber;
+	      TotalNumTags+=NumTags;
+	      LineNumber += 1;
+	    }
+		else
+		{
+		  //printf("Error parsing tag file1:\n%s\n",Buff);
+		  REPORT_ERROR_IS(14,LineNumber,TagFilePath);
+			exit(14);
+		}
+
+		// skip tweaks and tags this round
+		
+		for (i=0; i<TWEAK_COUNT+NumTags; i++)
+		  {
+		    fgets(Buff,1024,InputStream);
+		    LineNumber += 1;
+		  }
+	}
+	fclose(InputStream);
+
+	if (verbose)
+		printf("Allocating memory for %d scans and %d tags...\n",MaxScan+1,TotalNumTags);
+
+	TagHolder->MaxScanNumber = MaxScan;
+	TagHolder->AllScanTags = (ScanTags *)malloc((MaxScan+1)*sizeof(ScanTags));
+	TagHolder->AllExternalTrieTags = (TrieTag *)malloc(TotalNumTags*sizeof(TrieTag));
+
+	if (! TagHolder->AllScanTags || ! TagHolder->AllExternalTrieTags)
+	{
+	  //printf("Error: coudln't allocate sufficient memory to store all external tags!\n");
+	  REPORT_ERROR(1);
+		exit(1);
+	}
+
+	for (i=0; i<=MaxScan; i++)
+	{
+		TagHolder->AllScanTags[i].ScanNumber=i;
+		TagHolder->AllScanTags[i].NumTags=0;
+	}
+
+	// read again, this time store tags
+	InputStream=fopen(TagFilePath,"r");
+	if (! InputStream)
+	{
+	  //printf("Error: couldn't read external tag file: %s\n",TagFilePath);
+	  REPORT_ERROR_S(8, TagFilePath);
+	  exit(8);
+	}
+
+	
+	while (1)
+	{
+		ThisScanTags=NULL;
+		i = 0;
+		NRead=0;
+		ScanNumber=0;
+		NumTags=0;
+		LineNumber = 0;
+
+		if (! fgets(Buff,1024,InputStream))
+			break;
+
+		if (sscanf(Buff,"%d %d",&ScanNumber,&NumTags) != 2)
+		{
+		  //printf("Error parsing tag file2: %s\n",Buff);
+		  REPORT_ERROR_IS(14,LineNumber,TagFilePath);
+		  exit(14);
+		  
+		}
+
+		// read tweaks
+		ThisScanTags = &(TagHolder->AllScanTags[ScanNumber]);
+		for (i=0; i<TWEAK_COUNT; i++)
+		{
+			Charge=0;
+			ParentMass = 0;
+
+			fgets(Buff,1024,InputStream);
+			LineNumber += 1;
+			if (sscanf(Buff,"%d %f",&Charge,&ParentMass) != 2)
+			{
+			  //printf("Error parsing tag file3: %s\n",Buff);
+			  //exit(1); 
+			  REPORT_ERROR_IS(14,LineNumber,TagFilePath);
+			  exit(14);
+			}
+
+			ThisScanTags->Tweaks[i].Charge = Charge;
+			if (Charge>0)
+			{
+				ThisScanTags->Tweaks[i].ParentMass=(int)(ParentMass* MASS_SCALE + 0.5);
+			}
+			else
+				ThisScanTags->Tweaks[i].ParentMass=0;
+		}
+
+		ThisScanTags->NumTags = NumTags;
+		ThisScanTags->Tags = &(TagHolder->AllExternalTrieTags[GlobalTagIndex]);
+
+		// read tags (use Stephen's code to parse tags
+		for (i=0; i<NumTags; i++)
+		{
+						
+			NewTag = &(TagHolder->AllExternalTrieTags[GlobalTagIndex++]);
+			fgets(Buff,1024,InputStream);
+			LineNumber += 1;
+			if (sscanf(Buff,"%d\t%f\t%f\t%s",&TweakIdx,&Score,&PrefixMass,TagSeq) != 4)
+			{
+			  //printf("Error parsing tag file4: %s\n",Buff);
+			  //printf("Index: %d  ScanNumber: %d   GTI: %d\n",i,ScanNumber,GlobalTagIndex);
+			  //	exit(1); 
+			  REPORT_ERROR_IS(14,LineNumber,TagFilePath);
+			  exit(14);
+			}
+
+
+			//5	24.407	1988.619	SQLK
+			memset(NewTag, 0, sizeof(TrieTag));
+			for (ModIndex=0; ModIndex<MAX_PT_MODS; ModIndex++)
+				NewTag->AminoIndex[ModIndex]=-1;
+
+                // Special code:
+                // PepNovo may include MODIFICATIONS in its tags - so, we must parse them.
+                // We assume that (a) modifications are written in the form %+d, and (b) we
+                // know of the modification type from the inspect input file.
+            TempAA = TagSeq;
+            AminoIndex = 0;
+            ModBufferPos = 0;
+                
+            while (*TempAA)
+            {
+	      if (*TempAA >= 'A' && *TempAA <= 'Z')
+                {
+					// an amino acid - so, finish the modification-in-progress, if there is one.
+                    if (ModBufferPos && AminoIndex)
+                    {
+		      if (NewTag->ModsUsed == MAX_PT_MODS)
+                        {
+			  printf("** Error tagging scan %d from file %s: Too many PTMs!\n", ScanNumber,TagFilePath);
+			  break;
+                        }
+                        ModBuffer[ModBufferPos] = '\0';
+                        Delta = FindPTModByName(NewTag->Tag[AminoIndex - 1], ModBuffer);
+                        if (Delta)
+                        {
+							NewTag->AminoIndex[NewTag->ModsUsed] = AminoIndex - 1;
+                            NewTag->ModType[NewTag->ModsUsed] = Delta;
+							NewTag->ModsUsed++;
+                        }
+                        else
+                        {
+			  printf("** Error tagging scan %d from file %s: Modification %s not understood!\n", ScanNumber, TagFilePath, ModBuffer);
+			  break;
+                        }
+					}
+                    ModBufferPos = 0;
+                    // Add the AA:
+                    NewTag->Tag[AminoIndex++] = *TempAA;
+                }// aa
+                else
+                {
+                    ModBuffer[ModBufferPos++] = *TempAA;
+                } // not aa
+                TempAA++;
+            }
+            NewTag->Tag[AminoIndex] = '\0';
+            // Finish any pending mod (COPY-PASTA FROM ABOVE)
+            if (ModBufferPos && AminoIndex)
+            {
+                if (NewTag->ModsUsed == MAX_PT_MODS)
+                {
+                    printf("** Error tagging scan %d from file %s: Too many PTMs!\n", ScanNumber, TagFilePath);
+                }
+                ModBuffer[ModBufferPos] = '\0';
+                Delta = FindPTModByName(NewTag->Tag[AminoIndex - 1], ModBuffer);
+                if (Delta)
+                {
+                    NewTag->AminoIndex[NewTag->ModsUsed] = AminoIndex - 1;
+                    NewTag->ModType[NewTag->ModsUsed] = Delta;
+                    NewTag->ModsUsed++;
+                }
+				else
+                {
+                    printf("** Error tagging scan %d from file %s: Modification %s not understood!\n",ScanNumber, TagFilePath, ModBuffer);
+                }
+            }
+
+            NewTag->Charge = ThisScanTags->Tweaks[TweakIdx].Charge;
+            NewTag->ParentMass = ThisScanTags->Tweaks[TweakIdx].ParentMass;
+            NewTag->PSpectrum = NULL;
+            NewTag->Tweak = ThisScanTags->Tweaks + TweakIdx;
+            NewTag->PrefixMass = (int)(PrefixMass * MASS_SCALE + 0.5);
+            NewTag->SuffixMass = NewTag->ParentMass - NewTag->PrefixMass - PARENT_MASS_BOOST;
+            NewTag->Score = Score;
+			NewTag->TagLength =0;
+            
+			for (TempAA = NewTag->Tag; *TempAA; TempAA++)
+            {
+                NewTag->SuffixMass -= PeptideMass[*TempAA];
+				NewTag->TagLength++;
+            }
+			
+			NewTag->Tag[NewTag->TagLength]='\0';
+
+            for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+            {
+                if (NewTag->AminoIndex[ModIndex] >= 0 && NewTag->ModType[ModIndex])
+                {
+                    NewTag->SuffixMass -= NewTag->ModType[ModIndex]->RealDelta;
+                }
+            }
+		}
+	}
+	fclose(InputStream);
+
+	if (verbose)
+	{
+		printf("Done reading %d tags\n",GlobalTagIndex);
+		printf("Max ScanNumber with tags %d\n",TagHolder->MaxScanNumber);
+	}
+}
+
+
+void FreeExternalTagHolder()
+{
+	if (TagHolder)
+	{
+		free(TagHolder->AllScanTags);
+		free(TagHolder->AllExternalTrieTags);
+		free(TagHolder);
+	}
+}
+
+
+
+void WriteExternalTags(char *OutFile)
+{
+	int i;
+	FILE *OutStream;
+	int TweakIdx;
+	int TagIdx;
+	ScanTags *ThisScan;
+	TrieTag * Tag;
+	int Index;
+	int ModIndex;
+
+	if (! TagHolder)
+		return;
+
+	printf("Writing tags to %s..\n",OutFile);
+
+	OutStream=fopen(OutFile,"w");
+	if (! OutStream)
+	{
+	  REPORT_ERROR_S(8, OutFile);
+	  exit(8);
+	  //printf("Error couldn't open file for writing: %s\n",OutFile);
+	  //exit(1);
+	}
+
+	for (i=0; i<=TagHolder->MaxScanNumber; i++)
+	{
+		
+		
+		ThisScan = &(TagHolder->AllScanTags[i]);
+		
+		if (ThisScan->NumTags<=0)
+			continue;
+
+		//printf("%d %d\n",i,ThisScan->NumTags);
+		fprintf(OutStream,"%d\t%d\n",i,ThisScan->NumTags);
+		
+		for (TagIdx=0; TagIdx<ThisScan->NumTags; TagIdx++)
+		{
+			Tag = ThisScan->Tags + TagIdx;
+			
+
+			fprintf(OutStream,"%d\t%.3f\t%.2f\t",Tag->Charge, (float)(Tag->PrefixMass / (float)MASS_SCALE),
+												 (float)(Tag->ParentMass / (float)MASS_SCALE));
+
+			for (Index = 0; Index < Tag->TagLength; Index++)
+			{
+			  //int ModIndex;
+				fprintf(OutStream,"%c", Tag->Tag[Index]);
+
+				for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+				{	
+					if (Tag->AminoIndex[ModIndex]==Index)
+						fprintf(OutStream,"%s", Tag->ModType[ModIndex]->Name);
+				}
+			}
+			fprintf(OutStream,"\n");
+		}
+	}
+
+	fclose(OutStream);
+
+	printf("Done writing tags (Max ScanNumber with tags %d)..\n",TagHolder->MaxScanNumber);
+}
+
+
+
+TrieNode *AddExternalTags(TrieNode *Root, SpectrumNode *Node)
+{
+	int ScanNumber = Node->ScanNumber;
+	MSSpectrum* Spectrum = Node->Spectrum;	
+	int DuplicateFlag;
+	int NumTags;
+	int TagIdx;
+	int TweakIdx;
+	SpectrumTweak *Tweaks;
+	TrieTag * Tags;
+	TrieTag *NewTag;
+	
+		
+	//
+	if (!Root)
+	  {
+	    Root = NewTrieNode();
+	    Root->FailureNode = Root;
+	  }
+
+	for (TweakIdx=0; TweakIdx<TWEAK_COUNT; TweakIdx++)
+		Node->Tweaks[TweakIdx].Charge=0;
+
+	if (ScanNumber> TagHolder->MaxScanNumber)
+		return Root;
+	
+	NumTags = TagHolder->AllScanTags[ScanNumber].NumTags;
+	if (NumTags<=0)
+		return Root;
+
+	Tweaks = TagHolder->AllScanTags[ScanNumber].Tweaks;
+	for (TweakIdx=0; TweakIdx<TWEAK_COUNT; TweakIdx++)
+	{
+		Node->Tweaks[TweakIdx]= Tweaks[TweakIdx];
+	}
+
+	Tags = TagHolder->AllScanTags[ScanNumber].Tags;
+
+	// Construct a root, if we don't have one already.  
+	if (!Root)
+	  {
+	    Root = NewTrieNode();
+	    Root->FailureNode = Root;
+	  }
+	for (TagIdx = 0; TagIdx < NumTags; TagIdx++)
+	  {
+		NewTag = Tags + TagIdx;
+		TweakIdx = 0;
+
+		NewTag->PSpectrum = Spectrum; 	// Add pointers from Tag to Spectrum
+
+		// make tag point to the spectrum's Tweak in case they need to share the 
+		// later on same information
+
+		for (TweakIdx=0; TweakIdx<TWEAK_COUNT; TweakIdx++)
+		{
+			if (NewTag->Tweak->Charge == Node->Tweaks[TweakIdx].Charge &&
+				NewTag->Tweak->ParentMass == Node->Tweaks[TweakIdx].ParentMass)
+			{
+				NewTag->Tweak = Node->Tweaks + TweakIdx;
+				break;
+			}
+		}
+
+		if (TweakIdx == TWEAK_COUNT)
+		{
+			printf("BAD Error: Tweak went missing?!\n");
+			exit(1);
+		}
+
+		
+
+        AddTagToTrie(Root, NewTag, &DuplicateFlag);
+    }
+    //DebugPrintTrieTags(Root);
+    return Root;
+}
+
+
+
+
diff --git a/TagFile.h b/TagFile.h
new file mode 100644
index 0000000..204dca5
--- /dev/null
+++ b/TagFile.h
@@ -0,0 +1,67 @@
+//Title:          IonScoring.h
+//Author:         Ari Frank
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef __TAGFILE_H__
+#define __TAGFILE_H__
+
+#include "Trie.h"
+#include "Inspect.h"
+
+typedef struct ScanTags {
+	int ScanNumber;
+	int NumTags;
+
+	SpectrumTweak Tweaks[TWEAK_COUNT];
+	TrieTag *Tags;
+} ScanTags;
+
+
+typedef struct ExternalTagHolder {
+
+	int MaxScanNumber;
+
+	ScanTags *AllScanTags;
+
+	TrieTag *AllExternalTrieTags; // one allocation for all tags
+} ExternalTagHolder;
+
+TrieNode *AddExternalTags(TrieNode *Root, SpectrumNode *Spectrum);
+
+void ReadExternalTags(char *TagFilePath,  int verbose);
+
+void FreeExternalTagHolder();
+
+void WriteExternalTags( char *OutFile);
+
+
+#endif
+
diff --git a/TagSkewScores.dat b/TagSkewScores.dat
new file mode 100644
index 0000000..9291304
Binary files /dev/null and b/TagSkewScores.dat differ
diff --git a/Tagger.c b/Tagger.c
new file mode 100644
index 0000000..e581ea4
--- /dev/null
+++ b/Tagger.c
@@ -0,0 +1,2148 @@
+//Title:          Tagger.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+// Tag generation.  Given a spectrum, generate TrieTag objects.
+// See TagTrainer.py for the generation of the tagging model based on
+// empirical ion frequencies.
+
+#include "CMemLeak.h"
+#include "Inspect.h"
+#include "Utils.h"
+#include "Tagger.h"
+#include "Spectrum.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include "Trie.h"
+#include "Mods.h"
+#include "Score.h"
+#include "FreeMod.h"
+#include "BN.h"
+#include "Scorpion.h"
+#include "Errors.h"
+
+// For each mass (in daltons), we have an expected ratio of the +1 isotope to
+// the main peak.  These ratios rise from near-0 to near 1 as mass goes from 0
+// up to 1750.  (For larger masses, isotope sets are more complex!)
+#define ISOTOPE_WEIGHT_COUNT 1750
+
+// How far can the isotope-peak-to-main-peak ratio differ from what we expect?
+// (This controls whether the members Is and HasPlausibleIsotopicPeak are set
+// for spectral peaks.  In practice, isotope ratios can vary quite a bit, so 
+// we're fairly permissive)
+#define TIGHT_ISOTOPE_RATIO_DIFFERENCE 0.5
+#define MAX_ISOTOPE_RATIO_DIFFERENCE 0.8
+
+// How many intensity-ranks do we track?  (We're granular for top-10 peaks, then less so 
+// for crappier peaks)
+#define INTENSITY_RANK_COUNT 22
+
+// Number of charges and sectors for which we have a tagging model.  The size
+// of the array g_TruthByNodeType will equal CHARGE_COUNT * CHUNK_COUNT, similarly for other
+// model params.
+#define CHARGE_COUNT 3
+#define CHUNK_COUNT 3
+
+// JumpingHash[n] has a list of all amino acids (and modifications) that have a mass
+// rounding off to n daltons.  To find, e.g., a jump matching 80.3, we'd check 
+// JumpingHash[79] and JumpingHash[80] and JumpingHash[81].
+#define MAX_JUMPING_HASH 1024
+JumpNode** JumpingHash = NULL;
+
+// 
+StringNode* FirstTagCheckNode;
+StringNode* LastTagCheckNode;
+
+// JumpsByAA is a 2D array containing pointers to all jumps for each amino acid.
+// We iterate over JumpsByAA when setting jump scores.
+JumpNode** JumpsByAA; //[AMINO_ACIDS * GlobalOptions->DeltasPerAA];
+
+// Skew histo is used in scoring edges - SkewHistoStep[n] is the score penalty we
+// apply to an edge that deviates from the ideal jump size by floor(n/100).
+// Hard-coded, and should probably be part of the tagging model!
+double SkewHistoStep[] = {0.0000, 0.0000, -0.0037, -0.0501, -0.1061, -0.2106, -0.2418, 
+    -0.3466, -0.4198, -0.4800, -0.4861, -0.4863, -0.4926, -0.5421, -0.5893, -0.6851, 
+    -0.7173, -0.8108, -0.8811, -0.9275, -0.9302, -0.9302, -0.9353, -0.9802, -1.0259, 
+    -1.1027, -1.1395, -1.2253, -1.2640, -1.2921, -1.2931, -1.2931, -1.2960, -1.3207, 
+    -1.3587, -1.4249, -1.4567, -1.5158, -1.5444, -1.5696, -1.5710, -1.5710, -1.5710, 
+    -1.5989, -1.6153, -1.6641, -1.6884, -1.7345, -1.7544, -1.7708, -1.7714, -1.7714, 
+    -1.7714, -1.7903, -1.8150, -1.8556, -1.8819, -1.9274, -1.9462, -1.9716, -1.9723, 
+    -1.9723, -1.9737, -1.9976, -2.0244, -2.0640, -2.0957, -2.1574, -2.1966, -2.2151, 
+    -2.2151, -2.2151, -2.2178, -2.2512, -2.2848, -2.3753, -2.4070, -2.4806, -2.5463, 
+    -2.5879, -2.5891, -2.5891, -2.5917, -2.6571, -2.6951, -2.8598, -2.9352, -3.0991, 
+    -3.1702, -3.2468, -3.2493, -3.2493, -3.2669, -3.4412, -3.5462, -3.9350, -4.1434, 
+    -4.8738, -5.7776, -6.5501};
+
+// IsotopeWeights[n] is the expected ratio for the +1 isotope to the +0 isotope, given
+// a peptide that whose weight is n daltons.
+float IsotopeWeights[ISOTOPE_WEIGHT_COUNT];
+
+// Forward declarations:
+void TagGraphAddEndpointNodes(TagGraph* Graph, MSSpectrum* Spectrum);
+void DebugPrintTagList(MSSpectrum* Spectrum, TrieTag* Tags, int TagCount);
+void PrintTagToLine(FILE* OutputFile, TrieTag* Tag);
+void DebugPrintTagGraph(MSSpectrum* Spectrum, TagGraph* Graph);
+void DebugPrintTagsForPeptide(MSSpectrum* Spectrum, TagGraph* Graph, TrieTag* Tags, int TagCount);
+TrieTag* TagGraphGenerateTags(TagGraph* Graph, MSSpectrum* Spectrum, int* TagCount, 
+    int MaximumTagCount, SpectrumTweak* Tweak, float TagEdgeScoreMultiplier,
+    PRMBayesianModel* Model);
+
+
+// New (as of 3/2005) tagging model.  We load one model in for each NodeType.  (A NodeType is a combination of charge
+// and sector - e.g. charge 3+ and middle sector).  This model is used in scoring tag graph nodes, and in scoring
+// PRMs.
+
+#define BY_RANK_TINY 20
+#define BY_RANK_MISSING 21
+#define BY_RANK_COUNT 22
+#define BY_SKEW_COUNT 5
+typedef struct TaggingModel
+{
+    int BRank[BY_RANK_COUNT];
+    int SisterBRank[BY_RANK_COUNT];
+    int SisterBSkew[BY_SKEW_COUNT];
+    int BSkew[BY_SKEW_COUNT];
+    int SkewableBRank[BY_RANK_COUNT];
+    int YRank[BY_RANK_COUNT];
+    int SisterYRank[BY_RANK_COUNT];
+    int SisterYSkew[BY_SKEW_COUNT];
+    int YSkew[BY_SKEW_COUNT];
+    int SkewableYRank[BY_RANK_COUNT];    
+    int Witness[512];
+    int BIsotope[4];
+    int YIsotope[4];
+} TaggingModel;
+
+typedef struct TagMaster
+{
+    TaggingModel Models[CHARGE_COUNT * CHUNK_COUNT];
+    float PTMPenalty;
+} TagMaster;
+
+TagMaster MasterTaggingModel;
+
+// Constructor for TagGraph
+TagGraph* ConstructTagGraph(MSSpectrum* Spectrum)
+{
+    TagGraph* Graph;
+    Graph = (TagGraph*)calloc(1, sizeof(TagGraph));
+    return Graph;
+}
+
+// Destructor for a node from a TagGraph, as well as the nodes edges.
+void FreeTagGraphNode(TagGraphNode* Node)
+{
+    TagGraphEdge* Edge;
+    TagGraphEdge* PrevEdge = NULL;
+    if (!Node)
+    {
+        return;
+    }
+    for (Edge = Node->FirstEdge; Edge; Edge = Edge->Next)
+    {
+        SafeFree(PrevEdge);
+        PrevEdge = Edge;
+    }
+    SafeFree(PrevEdge);
+    // Back edges:
+    SafeFree(Node->BackEdge);
+    SafeFree(Node->BackEdgeDouble);
+    SafeFree(Node->BackEdgeTriple);
+    SafeFree(Node);
+}
+
+// Destructor for a TagGraph.
+void FreeTagGraph(TagGraph* Graph)
+{
+    TagGraphNode* TagNode;
+    TagGraphNode* PrevTagNode = NULL;
+
+    //
+    if (!Graph)
+    {
+        return;
+    }
+    SafeFree(Graph->BackEdgeBuffer);
+    for (TagNode = Graph->FirstNode; TagNode; TagNode = TagNode->Next)
+    {
+        if (PrevTagNode)
+        {
+            FreeTagGraphNode(PrevTagNode);
+        }
+        PrevTagNode = TagNode;
+    }
+    if (PrevTagNode)
+    {
+        FreeTagGraphNode(PrevTagNode);
+    }
+    SafeFree(Graph->NodeIndex);
+    Graph->FirstNode = NULL;
+    Graph->LastNode = NULL;
+    SafeFree(Graph);
+}
+
+// Somewhat ugly macro for inserting a tag graph node into the list.
+// (Note that since we're not inserting phosphate loss peaks, this is
+// overkill - we will always be inserting at the end of the list in practice!)
+#define INSERT_TAGNODE_ASC(First, Last, Node)\
+{\
+  InsertAfter = (Last);						\
+  while ((InsertAfter) && (InsertAfter)->Mass > (Node)->Mass)	\
+    {								\
+      (InsertAfter) = (InsertAfter)->Prev;			\
+    }								\
+  if (InsertAfter)						\
+    {								\
+      if ((InsertAfter)->Next)					\
+	{							\
+	  InsertAfter->Next->Prev = Node;			\
+	}							\
+      Node->Next = InsertAfter->Next;				\
+      InsertAfter->Next = Node;					\
+      Node->Prev = InsertAfter;					\
+    }								\
+  else								\
+  {						\
+    Node->Next = First;				\
+    if (First)\
+      {						\
+        First->Prev = Node;			\
+      }						\
+    First = Node;				\
+  }						\
+  if (InsertAfter == Last)			\
+    {						\
+      Last = Node;				\
+    }						\
+}
+
+#define INSERT_TAGNODE_DESC(First, Last, Node)\
+{\
+InsertAfter = Last;\
+while (InsertAfter && InsertAfter->Mass < Node->Mass)\
+{\
+    InsertAfter = InsertAfter->Prev;\
+}\
+if (InsertAfter)\
+{\
+    if (InsertAfter->Next)\
+    {\
+        InsertAfter->Next->Prev = Node;\
+    }\
+    Node->Next = InsertAfter->Next;\
+    InsertAfter->Next = Node;\
+    Node->Prev = InsertAfter;\
+}\
+else\
+{\
+    Node->Next = First;\
+    if (First)\
+    {\
+        First->Prev = Node;\
+    }\
+    First = Node;\
+}\
+if (InsertAfter == Last)\
+{\
+    Last = Node;\
+}\
+}
+
+// Take a new (empty) tag graph, and add nodes to it.  For each spectral peak, add 2 nodes (one b, one y).
+// Also add endpoint nodes. 
+void TagGraphAddNodes(TagGraph* Graph, MSSpectrum* Spectrum)
+{
+    int PeakIndex;
+    TagGraphNode* FirstBNode = NULL;
+    TagGraphNode* LastBNode = NULL;
+    TagGraphNode* FirstYNode = NULL;
+    TagGraphNode* LastYNode = NULL;
+    TagGraphNode* InsertAfter = NULL;
+    TagGraphNode* Node;
+    TagGraphNode* MergingBNode;
+    TagGraphNode* MergingYNode;
+    int NodeMass;
+    int NodeIndex;
+    int MinPRMMass = 50 * DALTON;
+    // Iterate over peaks.  For each peak, add a b and a y node.  We'll build two lists,
+    // b node list (FirstBNode / LastBNode) and y node list (FirstYNode / LastYNode), then
+    // merge the lists.
+    for (PeakIndex = 0; PeakIndex < Spectrum->PeakCount; PeakIndex++)
+    {
+        NodeMass = Spectrum->Peaks[PeakIndex].Mass - HYDROGEN_MASS;
+	//printf("Peak Index %d, NodeMass %d\n",PeakIndex,NodeMass);
+        // Filter any nodes whose mass is negative, or <50 but not zero, or larger than the precursor mass.  
+        // (A node at, say, PRM 19 couldn't possibly be part of a true peptide, since the smallest mass-jump 
+        // is 57)
+        if (NodeMass > -GlobalOptions->Epsilon && (NodeMass < GlobalOptions->Epsilon || NodeMass > MinPRMMass) && (NodeMass < Spectrum->ParentMass + GlobalOptions->ParentMassEpsilon))
+        {
+	  Node = (TagGraphNode*)calloc(1, sizeof(TagGraphNode));
+            Node->NodeType = evGraphNodeB;
+            Node->OriginalPeakIndex = PeakIndex;
+            Node->IntensityRankB = Spectrum->Peaks[PeakIndex].IntensityRank;
+            Node->BIndex = PeakIndex;
+            Node->YIndex = -1;
+            Node->IntensityRankY = -1;
+            Node->Mass = NodeMass;
+            Node->IonTypeFlags = ION_FLAG_B;
+            INSERT_TAGNODE_ASC(FirstBNode, LastBNode, Node);
+	    if (0)//(Spectrum->Charge > 2 && Spectrum->Peaks[PeakIndex].IntensityRank < 16)
+	      { //charge 3 spectra have high intensity doubly-charged peaks. I need to put those into the graph
+		Node = (TagGraphNode*)calloc(1, sizeof(TagGraphNode));
+		Node->NodeType = evGraphNodeB;
+		Node->OriginalPeakIndex = PeakIndex;
+		Node->IntensityRankB = Spectrum->Peaks[PeakIndex].IntensityRank;
+		Node->BIndex = PeakIndex;
+		Node->YIndex = -1;
+		Node->IntensityRankY = -1;
+		Node->Mass = (NodeMass * 2) - HYDROGEN_MASS; //the single charge mass, if the peak was doubly charged
+		Node->IonTypeFlags = ION_FLAG_B;
+		INSERT_TAGNODE_ASC(FirstBNode, LastBNode, Node);
+	      }
+	    //printf("Peak %d intensity %f rank %d\n",Spectrum->Peaks[PeakIndex].Mass, Spectrum->Peaks[PeakIndex].Intensity, Spectrum->Peaks[PeakIndex].IntensityRank);
+        }
+	//else
+	//	  {
+	//    printf("NodeMass is %d <= %d\n",NodeMass,-GlobalOptions->Epsilon);
+	//    printf("NodeMass is %d >= %d or <= %d\n",NodeMass,GlobalOptions->Epsilon,MinPRMMass);
+	//    printf("NodeMass is %d <= %d\n",NodeMass,Spectrum->ParentMass + GlobalOptions->ParentMassEpsilon);
+	//    printf("ParentMass %d\n",Spectrum->ParentMass);
+	    //   //getch();
+	  // }
+	 NodeMass = Spectrum->ParentMass - Spectrum->Peaks[PeakIndex].Mass;
+	 //printf("Peak Index %d, NodeMass %d\n",PeakIndex,NodeMass);
+        if (NodeMass > -GlobalOptions->Epsilon && (NodeMass < GlobalOptions->Epsilon || NodeMass > MinPRMMass) && (NodeMass < Spectrum->ParentMass + GlobalOptions->ParentMassEpsilon))
+        {
+	  Node = (TagGraphNode*)calloc(1, sizeof(TagGraphNode));
+            Node->NodeType = evGraphNodeY;
+            Node->OriginalPeakIndex = PeakIndex;
+            Node->IntensityRankY = Spectrum->Peaks[PeakIndex].IntensityRank;
+            Node->IntensityRankB = -1;
+            Node->YIndex = PeakIndex;
+            Node->BIndex = -1;
+            Node->Mass = NodeMass;
+            Node->IonTypeFlags = ION_FLAG_Y;
+            INSERT_TAGNODE_DESC(FirstYNode, LastYNode, Node);
+	    if (0)//(Spectrum->Charge > 2 && Spectrum->Peaks[PeakIndex].IntensityRank < 16)
+	      { //charge 3 spectra have high intensity doubly-charged peaks. I need to put those into the graph
+		Node = (TagGraphNode*)calloc(1, sizeof(TagGraphNode));
+		Node->NodeType = evGraphNodeY;
+		Node->OriginalPeakIndex = PeakIndex;
+		Node->IntensityRankY = Spectrum->Peaks[PeakIndex].IntensityRank;
+		Node->IntensityRankB = -1;
+		Node->YIndex = PeakIndex;
+		Node->BIndex = -1;
+		Node->Mass = (NodeMass * 2) - HYDROGEN_MASS; //the single charge mass, if the peak was doubly charged
+		Node->IonTypeFlags = ION_FLAG_Y;
+		INSERT_TAGNODE_DESC(FirstYNode, LastYNode, Node);
+	      }
+        }
+        // We could insert phosphate-loss peaks for b and y nodes at this point.
+        // There are cases (particularly for breaks next to the phosphorylation site,
+        // and for phosphoserines) when the phosphate-loss peak is MORE LIKELY than
+        // the original peak.  However, if we insert phosphate-loss peaks, we end up
+        // with 4 nodes per peak rather than 2...that slows the speed of tag generation 
+        // down *considerably*, and probably lowers selectivity quite a bit.
+    }
+    ///////////////////////////////////////////
+    // Merge b and y node lists into the list Graph->FirstNode...Graph->LastNode
+    MergingBNode = FirstBNode;
+    MergingYNode = LastYNode;
+    while (1)
+    {
+        if (!MergingBNode && !MergingYNode)
+        {
+            break;
+        }
+        if (!MergingBNode || (MergingYNode && (MergingBNode->Mass > MergingYNode->Mass)))
+        {
+            // Insert the y node into the master list:
+            Node = MergingYNode->Prev; // temp
+            if (!Graph->FirstNode)
+            {
+                Graph->FirstNode = MergingYNode;
+                Graph->LastNode = MergingYNode;
+                MergingYNode->Next = NULL;
+                MergingYNode->Prev = NULL;
+            }
+            else
+            {
+                MergingYNode->Prev = Graph->LastNode;
+                Graph->LastNode->Next = MergingYNode;
+                Graph->LastNode = MergingYNode;
+            }
+            MergingYNode->Next = NULL;
+            MergingYNode = Node;            
+        }
+        else
+        {
+            // Insert the b node into the master list:
+            Node = MergingBNode->Next; // temp
+            if (!Graph->FirstNode)
+            {
+                Graph->FirstNode = MergingBNode;
+                Graph->LastNode = MergingBNode;
+                MergingBNode->Next = NULL;
+                MergingBNode->Prev = NULL;
+            }
+            else
+            {
+                MergingBNode->Prev = Graph->LastNode;
+                Graph->LastNode->Next = MergingBNode;
+                Graph->LastNode = MergingBNode;
+            }
+            MergingBNode->Next = NULL;
+            MergingBNode = Node;
+        }
+        Graph->NodeCount++;
+    }
+
+    TagGraphAddEndpointNodes(Graph, Spectrum);
+    for (Node = Graph->FirstNode, NodeIndex = 0; Node; Node = Node->Next, NodeIndex++)
+    {
+        Node->Index = NodeIndex;
+    }
+}
+
+// Insert another node into the tag-graph.  (Used only for a few nodes, as this isn't super fast)
+void InsertTagGraphNode(TagGraph* Graph, TagGraphNode* Node)
+{
+    TagGraphNode* TempNode;
+    // Iterate backwards, until either TempNode points to a smaller PRM or we fall of the edge of the list.
+    for (TempNode = Graph->LastNode; TempNode && TempNode->Mass > Node->Mass; TempNode = TempNode->Prev)
+    {
+        ;;
+    }
+    if (!TempNode)
+    { 
+        // This new node is smaller than any we've seen.
+        if (Graph->FirstNode)
+        {
+            Graph->FirstNode->Prev = Node;
+        }
+        else
+        {
+            Graph->LastNode = Node;
+        }
+        Node->Next = Graph->FirstNode;
+        Graph->FirstNode = Node;
+    }
+    else if (TempNode->Next)
+    {
+        TempNode->Next->Prev = Node;
+        Node->Next = TempNode->Next;
+        Node->Prev = TempNode;
+        TempNode->Next = Node;
+    }
+    else
+    {
+        Node->Prev = Graph->LastNode;
+        Graph->LastNode->Next = Node;
+        if (Graph->LastNode == Graph->FirstNode)
+        {
+            Graph->FirstNode = Node;
+        }
+        Graph->LastNode = Node;
+    }
+    Graph->NodeCount++;
+}
+
+// Add the "goalpost nodes" to our tag graph, mass 0 and parent mass:
+void TagGraphAddEndpointNodes(TagGraph* Graph, MSSpectrum* Spectrum)
+{
+    TagGraphNode* Node;
+    int ModType;
+
+    // LEFT edge:
+    Node = (TagGraphNode*)calloc(1, sizeof(TagGraphNode));
+    Node->Mass = 0;
+    Node->NodeType = evGraphNodeLeft;
+    Node->IonTypeFlags = ION_FLAG_B;
+    Node->OriginalPeakIndex = -1;
+    
+    InsertTagGraphNode(Graph, Node);
+
+    // LEFT EDGE plus N-terminal mod:
+    for (ModType = 0; ModType < AllPTModCount; ModType++)
+    {
+        if (AllKnownPTMods[ModType].Flags & DELTA_FLAG_N_TERMINAL)
+        {
+	  Node = (TagGraphNode*)calloc(1, sizeof(TagGraphNode));
+            Node->Mass = AllKnownPTMods[ModType].Mass;
+            Node->NodeType = evGraphNodeLeftMod;
+            Node->IonTypeFlags = ION_FLAG_B;
+            Node->OriginalPeakIndex = -1;
+            // The node stores a pointer to the MassDelta, so that the tag
+            // will also include the MassDelta:
+            Node->PTM = MassDeltaByIndex[MAX_PT_MODTYPE * MDBI_ALL_MODS + ModType];
+            //Node->PTM = ModType;
+            InsertTagGraphNode(Graph, Node);
+        }
+    }
+
+    Node = (TagGraphNode*)calloc(1, sizeof(TagGraphNode));
+    Node->Mass = Spectrum->ParentMass - PARENT_MASS_BOOST;
+    Node->NodeType = evGraphNodeRight;
+    Node->IonTypeFlags = ION_FLAG_Y;
+    Node->OriginalPeakIndex = -1;
+    InsertTagGraphNode(Graph, Node);
+
+    // RIGHT EDGE minus C-terminal PTM:
+    for (ModType = 0; ModType < AllPTModCount; ModType++)
+    {
+        if (AllKnownPTMods[ModType].Flags & DELTA_FLAG_C_TERMINAL)
+        {
+	  Node = (TagGraphNode*)calloc(1, sizeof(TagGraphNode));
+            Node->Mass = Spectrum->ParentMass - PARENT_MASS_BOOST - AllKnownPTMods[ModType].Mass;
+            Node->NodeType = evGraphNodeRightMod;
+            Node->IonTypeFlags = ION_FLAG_Y;
+            Node->OriginalPeakIndex = -1;
+            Node->PTM = MassDeltaByIndex[MAX_PT_MODTYPE * MDBI_ALL_MODS + ModType];
+            //Node->PTM = ModType;
+            InsertTagGraphNode(Graph, Node);
+        }
+    }
+}
+
+// Print all the PRM nodes from a tag graph.  (Handy for debugging tagging)
+void DebugPrintTagGraph(MSSpectrum* Spectrum, TagGraph* Graph)
+{
+    TagGraphNode* Node;
+    //
+    printf(">->Printing tag graph...\n");
+    for (Node = Graph->FirstNode; Node; Node = Node->Next)
+    {
+#ifdef DEBUG_TAG_GENERATION
+        printf("%s\n", Node->VerboseNodeInfo);
+#else
+        printf("%.2f %.2f\n", Node->Mass / (float)MASS_SCALE, Node->Score);
+        //printf("At %.2f node %d ion types %d score %.3f:\n", Node->Mass/100.0, Node->NodeType, Node->IonTypeFlags, Node->Score);
+        //printf("  b%d y%d in%.2f io%.2f is%.2f\n", Node->IntensityRankB, Node->IntensityRankY, Node->IntensityScore, Node->IonTypeScore, Node->IsotopeScore);
+
+        if (Node->BIndex > -1)
+        {
+        }
+        if (Node->YIndex > -1)
+        {
+        }
+#endif
+        
+    }
+    printf("<-<End of tag graph.\n");
+}
+
+// The JumpingHash stores, for each mass (rounded to nearest integer), a list
+// of amino acids (or modified amino acids) matching the mass.  When constructing
+// tags, we allow a move from node A to node B if we find a jump whose size matches
+// the mass difference between nodes A and B.  (We check three hash buckets, to
+// compensate for roundoff screwery)
+JumpNode* JumpingHashAddJump(int Mass, char Amino, MassDelta* Delta)
+{
+    int HashBucket;
+    JumpNode* Node;
+    JumpNode* NewNode;
+    JumpNode* Prev;
+    // HashBucket = Mass, rounded to nearest int
+    FAST_ROUND(Mass / (float)MASS_SCALE, HashBucket); 
+    if (HashBucket < 0 || HashBucket >= MAX_JUMPING_HASH)
+    {
+        printf("** ERROR: Bad mass in JumpingHashAddJump\n");
+        printf("Mass %d amino %c delta %s\n", Mass, Amino, Delta->Name);
+        return NULL;
+    }
+    // HashBucket = (int)Mass;
+    //if (Mass > HashBucket + 0.5)
+    //{
+    //    HashBucket += 1;
+    //}
+    NewNode = (JumpNode*)calloc(1, sizeof(JumpNode));
+    NewNode->Amino = Amino;
+    NewNode->Mass = Mass;
+    NewNode->Delta = Delta;  // The PTM for this jump, or -1 if there's no mod.
+    if (NewNode->Delta)
+    {
+        NewNode->Score = Delta->Score;
+    }
+    Node = JumpingHash[HashBucket];
+
+    if (!Node)
+    {
+        // Add a brand new entry to the hash:
+        JumpingHash[HashBucket] = NewNode;
+    }
+    else
+    {
+        // Add this jump to the end of the list.  
+        // (Lists are short, so we don't bother keeping a tail pointer or two-way links)
+        for (; Node; Node = Node->Next)
+        {
+            Prev = Node;
+        }
+        Prev->Next = NewNode;
+    }
+    return NewNode;
+}
+
+// Populate the jumping hash with each amino acid, and each modified amino acid.
+void PopulateJumpingHash()
+{
+    int Amino;
+    int PTModIndex;
+    MassDelta* Delta;
+    int ModForAAIndex;
+    JumpNode* JNode;
+
+    FreeJumpingHash(); // free any old stuff
+
+    // Allocate memory:
+    JumpingHash = (JumpNode**)calloc(MAX_JUMPING_HASH, sizeof(JumpNode*));
+    SafeFree(JumpsByAA);
+    JumpsByAA = (JumpNode**)calloc(sizeof(JumpNode*), AMINO_ACIDS * GlobalOptions->DeltasPerAA);
+
+    memset(JumpsByAA, 0, sizeof(JumpNode*) * AMINO_ACIDS * GlobalOptions->DeltasPerAA);
+    for (Amino = 'A'; Amino<='Y'; Amino++)
+    {
+        if (PeptideMass[Amino]<0.01)
+        {
+            continue; // Not an amino acid ("O" or "U" or "J" or somesuch)
+        }
+        ModForAAIndex = 0;
+        // Don't build a jump node for unmodified Q or unmodified I, because they are accounted
+        // for by the jumps for unmodified K and L.
+        if (Amino != 'Q' && Amino != 'I')
+        {
+            JNode = JumpingHashAddJump(PeptideMass[Amino], (char)Amino, NULL);
+            JumpsByAA[(Amino-'A') * GlobalOptions->DeltasPerAA] = JNode;
+            ModForAAIndex = 1;
+        }
+        for (PTModIndex = 0; PTModIndex < GlobalOptions->DeltasPerAA; PTModIndex++)
+        {
+            Delta = &MassDeltas[Amino - 'A'][PTModIndex];
+            
+            if (Delta->Flags)
+            {
+                if (!(Delta->Flags & (DELTA_FLAG_C_TERMINAL | DELTA_FLAG_N_TERMINAL)))
+                {
+                    JNode = JumpingHashAddJump(Delta->RealDelta + PeptideMass[Amino], (char)Amino, Delta);
+                    JumpsByAA[(Amino-'A') * GlobalOptions->DeltasPerAA + ModForAAIndex] = JNode;
+                    ModForAAIndex++;
+                }
+            }
+            else
+            {
+                // There are no more PTMs in MassDeltas[Amino], so stop iterating:
+                break;
+            }
+        }
+    }
+}
+
+// Destructor for the JumpingHash contents.
+void FreeJumpingHash()
+{
+    int HashBucket;
+    JumpNode* Node;
+    JumpNode* Prev = NULL;
+    //
+    if (JumpingHash)
+    {
+        for (HashBucket = 0; HashBucket < MAX_JUMPING_HASH; HashBucket++)
+        {
+            Prev = NULL;
+            for (Node = JumpingHash[HashBucket]; Node; Node = Node->Next)
+            {
+                SafeFree(Prev);
+                Prev = Node;
+            }
+            SafeFree(Prev);
+            JumpingHash[HashBucket] = NULL;
+        }
+        SafeFree(JumpingHash);
+        JumpingHash = NULL;
+    }
+    SafeFree(JumpsByAA);
+    JumpsByAA = NULL;
+}
+
+void DebugPrintTagGraphEdges(TagGraph* Graph)
+{
+    TagGraphNode* Node;
+    TagGraphEdge* Edge;
+    for (Node = Graph->FirstNode; Node; Node = Node->Next)
+    {
+        printf("Node at %.2f: (score %.2f)\n", Node->Mass / (float)MASS_SCALE, Node->Score);
+        for (Edge = Node->FirstEdge; Edge; Edge = Edge->Next)
+        {
+            printf("-->Add '%c' (%.2f, skew %.2f) to reach %.2f\n", Edge->Jump->Amino, Edge->Jump->Mass / (float)MASS_SCALE,
+                ((Edge->ToNode->Mass - Node->Mass) - (Edge->Jump->Mass))/(float)MASS_SCALE,
+                Edge->ToNode->Mass / (float)MASS_SCALE);
+        }
+    }
+}
+
+// Called after populating the tag graph with nodes.
+// Now we add edges between any two nodes that can be linked by a JUMP (an amino acid, or 
+// an amino acid plus a decoration)
+void TagGraphPopulateEdges(TagGraph* Graph)
+{
+    TagGraphNode* Node;
+    TagGraphNode* OtherNode;
+    int JumpSize;
+    // For efficiency, we *never* consider a jump smaller or larger than these boundaries.
+    // Note that glycine has mass 57.02, and tryptophan has size 186
+    // (If there are PTMs, we do consider jumps of MaxJumpSize + MaxPTMMass; that's probably overkill)
+    int MinJumpSize = GLYCINE_MASS - (DALTON * 2);
+    int MaxJumpSize;
+    int MaxAA;
+    int ModIndex;
+    int MaxSkew;
+    TagGraphEdge* Edge;
+    int IntSkew;
+    int EdgeCount = 0;
+    int ModJumpCount = 0;
+    int AA;
+    JumpNode* JNode;
+    int Bucket;
+    int HashBucket;
+    MaxAA = 0;
+    for (AA = 'A'; AA < 'X'; AA++)
+    {
+        MaxAA = max(MaxAA, PeptideMass[AA]);
+    }
+    MaxJumpSize = MaxAA;
+    for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+    {
+      MaxJumpSize = (int)(max(MassDeltaByIndex[MAX_PT_MODTYPE * MDBI_ALL_MODS + ModIndex]->RealDelta + MaxAA, MaxJumpSize));
+        for (AA = 0; AA < 26; AA++)
+        {
+            ModJumpCount += AllKnownPTMods[ModIndex].Allowed[AA];
+        }
+    }
+    MaxJumpSize += GlobalOptions->ParentMassEpsilon;
+
+    MaxSkew = sizeof(SkewHistoStep) / sizeof(double) - 1;
+
+    // We do a double-loop over the graph to find all legal edges.
+    for (Node = Graph->FirstNode; Node; Node = Node->Next)
+    {
+        if (Node->NodeType == evGraphNodeRight || Node->NodeType == evGraphNodeRightMod)
+        {
+            // This is a right-edge, so no edges emit from it:
+            continue;
+        }
+
+        for (OtherNode = Node->Next; OtherNode; OtherNode = OtherNode->Next)
+        {
+            if (OtherNode->NodeType == evGraphNodeLeft || OtherNode->NodeType == evGraphNodeLeftMod)
+            {
+                // This is a left-edge, so no edges enter it:
+                continue;
+            }
+            JumpSize = OtherNode->Mass - Node->Mass;
+            if (JumpSize < MinJumpSize)
+            {
+                continue;
+            }
+            if (JumpSize > MaxJumpSize)
+            {
+                break;
+            }
+            FAST_ROUND(JumpSize / (float)MASS_SCALE, HashBucket);
+            for (Bucket = HashBucket - 1; Bucket < HashBucket + 2; Bucket++)
+            {
+                if (Bucket < 0 || Bucket >= MAX_JUMPING_HASH)
+                {
+                    continue;
+                }
+                for (JNode = JumpingHash[Bucket]; JNode; JNode = JNode->Next)
+                {
+                    IntSkew = JumpSize - JNode->Mass;
+                    if (abs(IntSkew) > GlobalOptions->Epsilon)
+                    {
+                        continue;
+                    }
+                    if (JNode->Delta)
+                    {
+                        if (GlobalOptions->TagPTMMode == 1 || GlobalOptions->MaxPTMods == 0 || JNode->Delta->Score < -5)
+                        {
+                            continue;
+                        }
+                    }
+                    // Allocate a TagGraphEdge, initialize it, and add it to this node's list of edges:
+                    Edge = (TagGraphEdge*)calloc(1, sizeof(TagGraphEdge));
+                    Edge->Jump = JNode;
+                    Edge->FromNode = Node;
+                    Edge->ToNode = OtherNode;
+                    Edge->Skew = IntSkew;
+                    // For now, no skew scoring:
+                    //if (IntSkew > MaxSkew)
+                    //{
+                    //    Edge->Score = (float)SkewHistoStep[MaxSkew];
+                    //}
+                    //else
+                    //{
+                    //    Edge->Score = (float)SkewHistoStep[IntSkew];
+                    //}
+                    Edge->Score = JNode->Score;
+                    if (!Node->FirstEdge)
+                    {
+                        Node->FirstEdge = Edge;
+                        Node->LastEdge = Edge;
+                    }
+                    else
+                    {
+                        Node->LastEdge->Next = Edge;
+                        Node->LastEdge = Edge;
+                    }
+                    if (Edge->Jump->Delta)
+                    {
+                        Edge->Score += MasterTaggingModel.PTMPenalty;
+                    }
+                    //GlobalStats->TagGraphEdges++;
+                    EdgeCount++;
+                } // Jnode loop
+            } // bucket loop
+        }
+    }
+}
+
+// For quick-sort of tags - list from highest to lowest score.
+int CompareTagScores(const TrieTag* TagA, const TrieTag* TagB)
+{
+    if (TagA->Score > TagB->Score)
+    {
+        return -1;
+    }
+    if (TagA->Score < TagB->Score)
+    {
+        return 1;
+    }
+    if (TagA->PrefixMass < TagB->PrefixMass)
+    {
+        return -1;
+    }
+    if (TagA->PrefixMass > TagB->PrefixMass)
+    {
+        return 1;
+    }
+    if (TagA < TagB)
+    {
+        return -1;
+    }
+    if (TagA > TagB)
+    {
+        return -1;
+    }
+    return 0;
+}
+
+int TagSkewBinCount;
+float* TagSkewScore = NULL;
+float* TagTotalAbsSkewScore = NULL;
+
+void FreeTagSkewScores()
+{
+    SafeFree(TagSkewScore);
+    TagSkewScore = NULL;
+    SafeFree(TagTotalAbsSkewScore);
+    TagTotalAbsSkewScore = NULL;
+}
+
+void SetTagSkewScores()
+{
+    char FilePath[2048];
+    FILE* TagSkewFile;
+    //
+    if (TagSkewScore)
+    {
+        return;
+    }
+    sprintf(FilePath, "%s%s", GlobalOptions->ResourceDir, "TagSkewScores.dat");
+    TagSkewFile = fopen(FilePath, "rb");
+    if (!TagSkewFile)
+    {
+        REPORT_ERROR_S(3, FilePath);
+        // To avoid crashing later, set up a length-1 array:
+        TagSkewBinCount = 1;
+        TagSkewScore = (float*)calloc(1, sizeof(float));
+        TagSkewScore[0] = 0;
+        TagTotalAbsSkewScore = (float*)calloc(1, sizeof(float));
+        TagTotalAbsSkewScore[0] = 0;
+        return;
+    }
+    // Read the number of entries:
+    ReadBinary(&TagSkewBinCount, sizeof(int), 1, TagSkewFile);
+    // Allocate arrays:
+    TagSkewScore = (float*)calloc(TagSkewBinCount, sizeof(float));
+    TagTotalAbsSkewScore = (float*)calloc(TagSkewBinCount, sizeof(float));
+    // Populate arrays:
+    ReadBinary(TagSkewScore, sizeof(float), TagSkewBinCount, TagSkewFile);
+    ReadBinary(TagTotalAbsSkewScore, sizeof(float), TagSkewBinCount, TagSkewFile);
+    fclose(TagSkewFile);
+}
+
+static TrieTag* AllTags = NULL;
+//// New tag generation function: Generates tags of a (more-or-less) arbitrary length!
+//TrieTag* TagGraphGenerateTagsOld(TagGraph* Graph, MSSpectrum* Spectrum, int* TagCount, 
+//    int MaximumTagCount, SpectrumTweak* Tweak, float TagEdgeScoreMultiplier)
+//{
+//    TagGraphNode* TagNodes[12];
+//    TagGraphEdge* TagEdges[12];
+//    int NodeIndex;
+//    int EdgeIndex;
+//    TagGraphNode* Node;
+//    TagGraphEdge* Edge;
+//    TagGraphNode* LeftNode;
+//    TagGraphNode* RightNode;
+//    int CurrentDepth;
+//    int InternalNodes;
+//    float NodeScore;
+//    TrieTag* Tag;
+//    int TagAllocation;
+//    int BacktrackFlag;
+//    int AllTagCount = 0;
+//    int Bin;
+//    float ScoreToBeat = -9999;
+//    //
+//    *TagCount = 0;
+//    TagAllocation = 1024;
+//    if (!AllTags)
+//    {
+//        AllTags = (TrieTag*)calloc(sizeof(TrieTag), TagAllocation);
+//    }
+//    NodeIndex = 0;
+//    EdgeIndex = -1;
+//    BacktrackFlag = 0;
+//    CurrentDepth = 0;
+//    TagNodes[0] = Graph->FirstNode;
+//    while (1)
+//    {
+//        // If we're BACKTRACKING, then move to a sibling or parent:
+//        if (BacktrackFlag)
+//        {
+//            // Move the root of the subtree, if necessary:
+//            if (CurrentDepth == 0)
+//            {
+//                // Move to the next 'first' node:
+//                TagNodes[0] = TagNodes[0]->Next;
+//                if (!TagNodes[0])
+//                {
+//                    break;
+//                }
+//                BacktrackFlag = 0;
+//                continue;
+//            }
+//            // Move to a sibling, if we can:
+//            TagEdges[CurrentDepth - 1] = TagEdges[CurrentDepth - 1]->Next;
+//            if (TagEdges[CurrentDepth - 1])
+//            {
+//                TagNodes[CurrentDepth] = TagEdges[CurrentDepth - 1]->ToNode;
+//                BacktrackFlag = 0;
+//                continue;
+//            }
+//            // No more siblings - move up one level.
+//            CurrentDepth--;
+//            continue;
+//        }
+//
+//        // Special case for level 1: Skip tag nodes with silly masses like 20Da.
+//        if (CurrentDepth == 0)
+//        {
+//            Node = TagNodes[0];
+//            if (Node->Mass > GlobalOptions->ParentMassEpsilon && Node->Mass < GLYCINE_MASS - GlobalOptions->Epsilon)
+//            {
+//                BacktrackFlag = 1;
+//                continue;
+//            }
+//        }
+//
+//        // If we're deep enough, report a tag and start backtracking:
+//        if (CurrentDepth >= GlobalOptions->GenerateTagLength)
+//        {
+//            BacktrackFlag = 1;
+//            LeftNode = TagNodes[0];
+//            RightNode = TagNodes[CurrentDepth];
+//            Tag = AllTags + (*TagCount);
+//            InternalNodes = 0;
+//            NodeScore = 0;
+//            for (NodeIndex = 0; NodeIndex <= CurrentDepth; NodeIndex++)
+//            {
+//                Node = TagNodes[NodeIndex];
+//                if (Node->OriginalPeakIndex > 0)
+//                {
+//                    NodeScore += Node->Score;
+//                    InternalNodes++;
+//                }
+//                Tag->Nodes[NodeIndex] = TagNodes[NodeIndex];
+//            }
+//            NodeScore *= (GlobalOptions->GenerateTagLength + 1) / (float)max(1, InternalNodes);
+//            Tag->Score = NodeScore;
+//            Tag->ModsUsed = 0;
+//            memset(Tag->ModType, 0, sizeof(MassDelta*) * MAX_PT_MODS);
+//            memset(Tag->AminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+//            if (LeftNode->NodeType == evGraphNodeLeftMod)
+//            {
+//                Tag->AminoIndex[Tag->ModsUsed] = 0;
+//                Tag->ModType[Tag->ModsUsed] = LeftNode->PTM;
+//                Tag->ModsUsed++;
+//            }
+//            for (EdgeIndex = 0; EdgeIndex < CurrentDepth; EdgeIndex++)
+//            {
+//                Edge = TagEdges[EdgeIndex];
+//                Tag->Score += TagEdges[EdgeIndex]->Score;
+//                Tag->Tag[EdgeIndex] = Edge->Jump->Amino;
+//                if (Edge->Jump->Delta)
+//                {
+//                    Tag->AminoIndex[Tag->ModsUsed] = EdgeIndex;
+//                    Tag->ModType[Tag->ModsUsed] = Edge->Jump->Delta;
+//                    Tag->ModsUsed++;
+//                }
+//            }
+//            // Set skew info:
+//            Tag->TotalSkew = 0;
+//            Tag->TotalAbsSkew = 0;
+//            for (EdgeIndex = 0; EdgeIndex < CurrentDepth; EdgeIndex++)
+//            {
+//                Edge = TagEdges[EdgeIndex];
+//                Tag->TotalSkew += Edge->Skew;
+//                Tag->TotalAbsSkew += abs(Edge->Skew);
+//            }
+//            ////////////////////////////////////////////////////
+//            // If the total skew is large, penalize the tag's score:
+//            Bin = (int)fabs((Tag->TotalSkew / 50.0) + 0.5);
+//            if (Bin >= TagSkewBinCount)
+//            {
+//                Bin = TagSkewBinCount - 1;
+//            }
+//            Tag->Score += TagSkewScore[Bin] * TagEdgeScoreMultiplier;
+//            Bin = (int)fabs((Tag->TotalAbsSkew / 50.0) + 0.5);
+//            if (Bin >= TagSkewBinCount)
+//            {
+//                Bin = TagSkewBinCount - 1;
+//            }
+//            Tag->Score += TagTotalAbsSkewScore[Bin] * TagEdgeScoreMultiplier;
+//            ////////////////////////////////////////////////////
+//            Tag->Tag[EdgeIndex] = '\0';
+//            if (Tag->Score < ScoreToBeat)
+//            {
+//                // Abort the tag - it's not good enough!
+//                continue; 
+//            }
+//            if (RightNode->NodeType == evGraphNodeRightMod)
+//            {
+//                Tag->AminoIndex[Tag->ModsUsed] = CurrentDepth;
+//                Tag->ModType[Tag->ModsUsed] = RightNode->PTM;
+//                Tag->ModsUsed++;
+//            }
+//            Tag->PSpectrum = Spectrum;
+//            Tag->Tweak = Tweak;
+//            Tag->TagLength = CurrentDepth;
+//            Tag->ParentMass = Spectrum->ParentMass;
+//            Tag->Charge = Spectrum->Charge;
+//            Tag->PrefixMass = TagNodes[0]->Mass;
+//            Tag->SuffixMass = Spectrum->ParentMass - PARENT_MASS_BOOST - TagNodes[CurrentDepth]->Mass;
+//            (*TagCount)++;
+//            AllTagCount++;
+//            // If we've got as many tags as we can handle, drop all but the best.  (Don't
+//            // just reallocate; we could end up with a *lot*!)
+//            if ((*TagCount) + 5 >= TagAllocation)
+//            {
+//                qsort(AllTags, *TagCount, sizeof(TrieTag), (QSortCompare)CompareTagScores);
+//                *TagCount = TagAllocation / 2;
+//                if (MaximumTagCount >= 0)
+//                {
+//                    ScoreToBeat = AllTags[min(TagAllocation - 5, MaximumTagCount)].Score;
+//                }
+//                else
+//                {
+//                    ScoreToBeat = AllTags[*TagCount].Score;
+//                }
+//            }
+//            continue;
+//        } // If we're at tag depth
+//
+//        // We're not at tag depth yet. 
+//        // Move to our first available child:
+//        TagEdges[CurrentDepth] = TagNodes[CurrentDepth]->FirstEdge;
+//        if (!TagEdges[CurrentDepth])
+//        {
+//            BacktrackFlag = 1;
+//            continue;
+//        }
+//        else
+//        {
+//            CurrentDepth++;
+//            TagNodes[CurrentDepth] = TagEdges[CurrentDepth - 1]->ToNode;
+//        }
+//    }
+//    // Sort the tags, by score:
+//    qsort(AllTags, *TagCount, sizeof(TrieTag), (QSortCompare)CompareTagScores);
+//    return AllTags;
+//
+//}
+
+// Build a trie from a list of tags.  Returns the trie root. 
+// AllTags is the tag array, TagCount its size.
+// Since we construct one big trie for many spectra, we take Root as an
+// argument; Root is NULL on the first call.
+// If GlobalOptions->GenerateCount is >= 0 and < TagCount, then we stop after adding
+// that many tags.
+TrieNode* BuildTrieFromTags(TrieTag* AllTags, int TagCount, TrieNode* Root, int MaximumTagCount)
+{
+    int DuplicateFlag;
+    int TagsInTrie = 0;
+    int TagIndex;
+    TrieTag* Tag;
+
+    int Index;
+
+    //printf("BuildTrieFromTags...\n");
+    // Construct a root, if we don't have one already.  
+    if (!Root)
+    {
+      
+        Root = NewTrieNode();
+        Root->FailureNode = Root;
+	
+    }
+    for (TagIndex = 0; TagIndex < TagCount; TagIndex++)
+    {
+        AddTagToTrie(Root, AllTags + TagIndex, &DuplicateFlag);
+        if (!DuplicateFlag)
+        {
+            TagsInTrie++;
+            Tag = AllTags + TagIndex;
+            if (MaximumTagCount >= 0 && TagsInTrie >= MaximumTagCount)
+            {
+                break;
+            }
+        }
+    }
+
+    
+    //DebugPrintTrieTags(Root);
+    return Root;
+}
+
+void DebugPrintTagList(MSSpectrum* Spectrum, TrieTag* Tags, int TagCount)
+{
+    int TagIndex;
+    TrieTag* Tag;
+    int Index;
+    for (TagIndex = 0; TagIndex < TagCount; TagIndex++)
+    {
+        Tag = Tags + TagIndex;
+#ifdef DEBUG_TAG_GENERATION
+        printf("%s\n", Tag->TagScoreDetails);
+#endif
+        printf("%d: %.2f: %s %.2f %.2f\n", TagIndex, Tag->Score, Tag->Tag, Tag->PrefixMass / (float)MASS_SCALE, (Spectrum->ParentMass - PARENT_MASS_BOOST - Tag->SuffixMass) / (float)MASS_SCALE);
+        for (Index = 0; Index < Tag->TagLength; Index++)
+        {
+            printf("%c", Tag->Tag[Index]);
+            fflush(stdout);
+            if (Tag->AminoIndex[Index]>-1)
+            {
+                printf("%s", Tag->ModType[Index]->Name);
+                fflush(stdout);
+            }
+        }
+        printf("\n");
+    }
+}
+
+// Called when searching in tagless mode.  (Tagless mode performs *no* database filtering; it's 
+// appropriate for searching a small database, typically a database formed by an initial search run) 
+// The trie, in this case, will have a child for each amino acid (prefix 0, suffix = parent mass - amino mass)
+TrieNode* GenerateDummyTags(MSSpectrum* Spectrum, TrieNode* Root)
+{
+    TrieTag* Tag;
+    char* Aminos = "ACDEFGHKLMNPRSTVWY"; // skip I and Q, because they're synonymous with L and K
+    char* Amino;
+    int DuplicateFlag;
+    int ModIndex;
+    int TweakIndex;
+    SpectrumTweak* Tweak;
+    // Set up the root, if it doesn't exist already:
+    if (!Root)
+    {
+        Root = NewTrieNode();
+        Root->FailureNode = Root;
+    }
+    for (TweakIndex = 0; TweakIndex < TWEAK_COUNT; TweakIndex++)
+    {
+        Tweak = Spectrum->Node->Tweaks + TweakIndex;
+        if (!Tweak->Charge)
+        {
+            continue;
+        }
+        // Loop over alphabet soup, add one tag per amino:
+        for (Amino = Aminos; *Amino; Amino++)
+        {
+            Tag = NewTrieTag();
+            Tag->Tag[0] = *Amino;
+            Tag->Tag[1] = '\0';
+            memset(Tag->ModType, 0, sizeof(MassDelta*) * MAX_PT_MODS);
+            memset(Tag->AminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+            Tag->PSpectrum = Spectrum;
+            Tag->Charge = Tweak->Charge;
+            Tag->ParentMass = Tweak->ParentMass;
+            Tag->Tweak = Tweak;
+            Tag->PrefixMass = 0;
+            Tag->SuffixMass = Tweak->ParentMass - PeptideMass[*Amino] - PARENT_MASS_BOOST;
+            Tag->TagLength = 1;
+            //GlobalStats->TagsGenerated++;
+            AddTagToTrie(Root, Tag, &DuplicateFlag);
+            // ...ok, also allow mods on this first amino
+            for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+            {
+                if (AllKnownPTMods[ModIndex].Allowed[*Amino - 'A'])
+                {
+                    Tag = NewTrieTag();
+                    Tag->Tag[0] = *Amino;
+                    Tag->Tag[1] = '\0';
+                    memset(Tag->ModType, 0, sizeof(MassDelta*) * MAX_PT_MODS);
+                    memset(Tag->AminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+                    Tag->ModType[0] = MassDeltaByIndex[(*Amino-'A') * MAX_PT_MODTYPE + ModIndex];
+                    Tag->AminoIndex[0] = 0;
+                    Tag->ModsUsed = 1;
+                    Tag->PSpectrum = Spectrum;
+                    Tag->Charge = Tweak->Charge;
+                    Tag->Tweak = Tweak;
+                    Tag->ParentMass = Tweak->ParentMass;
+                    Tag->PrefixMass = 0;
+                    Tag->SuffixMass = Tweak->ParentMass - PeptideMass[*Amino] - PARENT_MASS_BOOST - AllKnownPTMods[ModIndex].Mass;
+                    Tag->TagLength = 1;
+                    //GlobalStats->TagsGenerated++;
+                    AddTagToTrie(Root, Tag, &DuplicateFlag);
+                }
+            }
+        }
+    }
+    return Root;
+}
+
+TrieNode* GenerateTagsFromSpectrum(MSSpectrum* Spectrum, TrieNode* Root, int MaximumTagCount, SpectrumTweak* Tweak)
+{
+    TrieTag* Tags;
+    int TagCount;
+    
+    // Note: Spectrum load and preprocessing methods need to be called before calling this function.
+    // Call these: 
+    //SpectrumFindIsotopicPeaks(Spectrum);
+    //IntensityRankPeaks(Spectrum);
+    //SpectrumCorrectParentMass(Spectrum);
+
+    //printf("GenerateTagsFromSpectrum...\n");
+    if (Spectrum->Graph)
+    {
+        FreeTagGraph(Spectrum->Graph);
+        Spectrum->Graph = NULL;
+    }
+    if (GlobalOptions->TaglessSearchFlag)
+    {
+        return GenerateDummyTags(Spectrum, Root);
+    }
+    Spectrum->Graph = ConstructTagGraph(Spectrum);
+    TagGraphAddNodes(Spectrum->Graph, Spectrum);
+    //printf("From spectrum with %d peaks, graph with %d nodes\n",Spectrum->PeakCount,Spectrum->Graph->NodeCount);
+    
+    TagGraphScorePRMNodes(NULL, Spectrum->Graph, Spectrum, Tweak);
+    //DebugPrintTagGraph(Spectrum, Spectrum->Graph); 
+    TagGraphPopulateEdges(Spectrum->Graph);
+    
+#ifdef DEBUG_TAG_GENERATION
+    DebugPrintTagGraph(Spectrum, Spectrum->Graph);
+    DebugPrintTagGraphEdges(Spectrum->Graph); ////
+#endif
+    Tags = TagGraphGenerateTags(Spectrum->Graph, Spectrum, &TagCount, MaximumTagCount, Tweak, TAG_EDGE_SCORE_MULTIPLIER, NULL);
+
+#ifdef DEBUG_TAG_GENERATION
+    DebugPrintTagList(Spectrum, Tags, 300);
+#endif
+    DebugPrintTagsForPeptide(Spectrum, Spectrum->Graph, Tags, TagCount);
+    Root = BuildTrieFromTags(Tags, TagCount, Root, MaximumTagCount);
+    if (0)
+    {
+        DebugPrintTrieTags(Root);
+    }
+
+    // The caller should usually invoke InitializeTrieFailureNodes next.  When doing a batch of 
+    // spectra, however, we do InitializeTrieFailureNodes once at the end.
+
+    return Root;
+}
+
+// Build a hash (Graph->NodeIndex) for quick lookup of nodes based on mass.  This is used
+// in GetBYScore, when choosing a PTM attachment point.  
+void TagGraphBuildNodeIndex(TagGraph* Graph)
+{
+    TagGraphNode* Node;
+    int Bucket;
+    int BucketMax;
+    SafeFree(Graph->NodeIndex);
+    Graph->NodeIndexSize = ((int)(Graph->LastNode->Mass / DALTON)) + 1;
+    Graph->NodeIndex = (TagGraphNode**)calloc(Graph->NodeIndexSize, sizeof(TagGraphNode*));
+    for (Node = Graph->FirstNode; Node; Node = Node->Next)
+    {
+      BucketMax = (int)(min(Graph->NodeIndexSize - 1, Node->Mass / DALTON + 1));
+      for (Bucket = max(0, (int)(Node->Mass / DALTON) - 1); Bucket <= BucketMax; Bucket++)
+        {
+            if (!Graph->NodeIndex[Bucket])
+            {
+                Graph->NodeIndex[Bucket] = Node;
+            }
+        }
+    }
+}
+
+int NiceCheckAA(char AA1, char AA2)
+{
+    if (AA1 == 'I')
+    {
+        AA1= 'L';
+    }
+    if (AA2 == 'I')
+    {
+        AA2= 'L';
+    }
+    if (AA1 == 'Q')
+    {
+        AA1= 'K';
+    }
+    if (AA2 == 'Q')
+    {
+        AA2= 'K';
+    }
+    return (AA1 == AA2);
+}
+
+void DebugCheckTagMatch(int TagIndex, TrieTag* Tag, int* Masses, int MassCount, char* Peptide)
+{
+    int Pos;
+    int Diff;
+    int ParentMass;
+    int TagAAIndex;
+    //
+    ParentMass = Masses[MassCount-1];
+    for (Pos = 0; Pos < MassCount - 3; Pos++)
+    {
+        Diff = abs(Tag->PrefixMass - Masses[Pos]);
+        if (Diff > 2 * DALTON)
+        {
+            continue;
+        }
+        Diff = abs((ParentMass - Masses[Pos+3]) - Tag->SuffixMass);
+        if (Diff > 2 * DALTON)
+        {
+            continue;
+        }
+        for (TagAAIndex = 0; TagAAIndex < Tag->TagLength; TagAAIndex++)
+        {
+            if (!NiceCheckAA(Peptide[Pos + TagAAIndex + 1], Tag->Tag[TagAAIndex]))
+            {
+                continue;
+            }
+        }
+        printf("Matched by tag #%d: '%s', prefix %.2f, suffix %.2f\n", TagIndex, Tag->Tag, Tag->PrefixMass / (float)MASS_SCALE, Tag->SuffixMass / (float)MASS_SCALE);
+    }
+}
+
+// Sometimes we don't generate tags for a peptide, and it's not obvious why.
+// In such casses, include a line of the form "tagcheck,PEPTIDE" in the input file.
+// Then, this function will compare the tags for this peptide against the actual tags
+// (and actual tag graph).
+void DebugPrintTagsForPeptide(MSSpectrum* Spectrum, TagGraph* Graph, TrieTag* Tags, int TagCount)
+{
+    StringNode* Node;
+    int MassCount;
+    int Masses[64];
+    char* Amino;
+    int AminoMass;
+    int AccumMass;
+    char Peptide[64];
+    int PeptideLength;
+    int TagIndex;
+    TrieTag* Tag;
+    int MassIndex;
+    TagGraphNode* GraphNode;
+    //
+    for (Node = FirstTagCheckNode; Node; Node = Node->Next)
+    {
+        printf("--- Check tagging results for %s Charge %d\n", Node->String, Spectrum->Charge);
+        MassCount = 0;
+        AccumMass = 0;
+        PeptideLength = 0;
+        // Parse the peptide string.  For now, DROP all mods.
+        for (Amino = Node->String; *Amino; Amino++)
+        {
+            AminoMass = PeptideMass[*Amino];
+            if (AminoMass)
+            {
+                AccumMass += AminoMass;
+                Masses[MassCount++] = AccumMass;
+                Peptide[PeptideLength++] = *Amino;
+            }
+        }
+        Peptide[PeptideLength] = '\0';
+        ///////////////////////////////////////////////////////////
+        // Loop over tags, and see whether any tag matches the peptide:
+        for (TagIndex = 0; TagIndex < TagCount; TagIndex++)
+        {   
+            Tag = Tags + TagIndex;
+            DebugCheckTagMatch(TagIndex, Tag, Masses, MassCount, Peptide);
+        }
+        ///////////////////////////////////////////////////////////
+        // Loop over PRMs in the peptide, and see how taggable they are:
+        for (MassIndex = 0; MassIndex < MassCount; MassIndex++)
+        {
+            AccumMass = Masses[MassIndex];
+            printf("Mass %d (%.2f):\n", MassIndex, AccumMass / (float)MASS_SCALE);
+            for (GraphNode = Graph->FirstNode; GraphNode; GraphNode = GraphNode->Next)
+            {
+                if (GraphNode->Mass > AccumMass + DALTON)
+                {
+                    break;
+                }
+                if (GraphNode->Mass < AccumMass - DALTON)
+                {
+                    continue;
+                }
+                printf("  Node at %.2f (%.2f) score %.2f\n", GraphNode->Mass / (float)MASS_SCALE, 
+                    (GraphNode->Mass - AccumMass) / (float)MASS_SCALE, GraphNode->Score);
+            }
+        }
+    }
+}
+TagGraphNode* TagTestGetBestNode(TagGraph* Graph, int PRM)
+{
+    int MinMass;
+    int MaxMass;
+    TagGraphNode* TGNode;
+    TagGraphNode* BestNode = NULL;
+    //
+    MinMass = PRM - 50;
+    MaxMass = PRM + 50;
+    for (TGNode = Graph->FirstNode; TGNode; TGNode = TGNode->Next)
+    {
+        if (TGNode->Mass > MaxMass)
+        {
+            break;
+        }
+        if (TGNode->Mass < MinMass)
+        {
+            continue;
+        }
+        if (!BestNode || BestNode->Score < TGNode->Score)
+        {
+            BestNode = TGNode;
+        }
+    }
+    return BestNode;
+}
+
+void TestTaggingCallback(SpectrumNode* Node, int Charge, int ParentMass, Peptide* Annotation)
+{
+    static int* TrueTagRankHistogram = NULL;
+    static int SpectrumCount = 0;
+    int Rank;
+    int Cumulative;
+    FILE* ResultsFile;
+    int TagIndex;
+    int TagCount;
+    TrieTag* Tags;
+    int FoundFlag;
+    TrieTag* TestTag;
+    int PRM[64];
+    int Mass;
+    int AminoIndex;
+    int ModIndex;
+    int TrieTagCount;
+    int MatchLength;
+    BayesianModel* Model;
+    TagGraphNode* Node0;
+    TagGraphNode* Node1;
+    TagGraphNode* Node2;
+    TagGraphNode* Node3;
+    float TagScore;
+    TrieNode* Root;
+    int DuplicateFlag;
+    int VerboseFlag = 0;
+    //
+    Root = NULL;
+    if (!Node)
+    {
+        if (!Charge)
+        {
+            // Initialization call:
+	  TrueTagRankHistogram = (int*)calloc(512, sizeof(int));
+        }
+        else
+        {
+            // Completion call:
+            ResultsFile = fopen("TagTestingResults.txt", "w");
+            Cumulative = 0;
+            fprintf(ResultsFile, "Tagging results on %d spectra\n", SpectrumCount);
+            for (Rank = 0; Rank < 512; Rank++)
+            {
+                Cumulative += TrueTagRankHistogram[Rank];
+                fprintf(ResultsFile, "%d\t%d\t%.2f\t%.2f\t\n",
+                    Rank, TrueTagRankHistogram[Rank], TrueTagRankHistogram[Rank] / (float)SpectrumCount,
+                    Cumulative / (float)SpectrumCount);
+            }
+            //SafeFree(TrueTagRankHistogram);
+        }
+        return;
+    }
+    // Standard call: Given a spectrum, generate some tags.  Remember the rank of the first true tag.
+
+    Root = NewTrieNode();
+    Root->FailureNode = Root;
+
+    Node->Tweaks[0].Charge = Charge;
+    Node->Tweaks[0].ParentMass = Annotation->ParentMass;
+    Node->Spectrum->Charge = Charge;
+    Node->Spectrum->ParentMass = Annotation->ParentMass;
+    WindowFilterPeaks(Node->Spectrum, 0, 0);
+    PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+    //SpectrumComputeBinnedIntensities(Node);
+    //SpectrumComputeNoiseDistributions(Node);
+    //SpectrumAssignIsotopeNeighbors(Node->Spectrum);
+    //SpectrumFindIsotopicPeaks(Node->Spectrum);
+    Node->Spectrum->Graph = ConstructTagGraph(Node->Spectrum);
+    TagGraphAddNodes(Node->Spectrum->Graph, Node->Spectrum);
+    TagGraphPopulateEdges(Node->Spectrum->Graph);
+    TagGraphScorePRMNodes(NULL, Node->Spectrum->Graph, Node->Spectrum, Node->Tweaks);
+    Tags = TagGraphGenerateTags(Node->Spectrum->Graph, Node->Spectrum, &TagCount, 1024, Node->Tweaks, TAG_EDGE_SCORE_MULTIPLIER, NULL);
+    if (Charge > 2)
+    {
+        Model = BNCharge3TaggingBN;
+    }
+    else
+    {
+        Model = BNCharge2TaggingBN;
+    }
+    // Set our PRM array, so we can check tag prefix masses:
+    //printf("\nTags for: %s\n", Annotation->Bases); 
+    Mass = 0;
+    MatchLength = strlen(Annotation->Bases);
+    for (AminoIndex = 0; AminoIndex < MatchLength-1; AminoIndex++)
+    {
+        switch (Annotation->Bases[AminoIndex])
+        {
+        case 'I':
+            Annotation->Bases[AminoIndex] = 'L';
+            break;
+        case 'Q':
+            Annotation->Bases[AminoIndex] = 'K';
+            break;
+        default:
+            break;
+        }
+        PRM[AminoIndex] = Mass;
+        Mass += PeptideMass[Annotation->Bases[AminoIndex]];
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (Annotation->AminoIndex[ModIndex] == AminoIndex && Annotation->ModType[ModIndex])
+            {
+                Mass +=Annotation->ModType[ModIndex]->RealDelta;
+            }
+        }
+    }
+    ///////////////////////////////////////////////////////////////
+    // Optional verbose debugging:
+    // For each theoretical tag, look for the best tag that can be generated.
+    if (VerboseFlag)
+    {
+        //GetPRMFeatures(Node->Spectrum, Node->Tweaks, Model, 97870);
+        //DebugPrintScorpPRMScores(Node->Spectrum, Node->Tweaks);
+        for (AminoIndex = 0; AminoIndex < MatchLength-2; AminoIndex++)
+        {
+            Node0 = TagTestGetBestNode(Node->Spectrum->Graph, PRM[AminoIndex]);
+            Node1 = TagTestGetBestNode(Node->Spectrum->Graph, PRM[AminoIndex + 1]);
+            Node2 = TagTestGetBestNode(Node->Spectrum->Graph, PRM[AminoIndex + 2]);
+            Node3 = TagTestGetBestNode(Node->Spectrum->Graph, PRM[AminoIndex + 3]);
+            printf("Theoretical tag %.2f %s:\n", PRM[AminoIndex] / (float)MASS_SCALE, Annotation->Bases + AminoIndex);
+            if (Node0)
+            {
+                TagScore = Node0->Score;
+                printf(" Node0 %.2f score %.2f\n", Node0->Mass / (float)MASS_SCALE, Node0->Score);
+            }
+            else
+            {
+                printf(" <Node0 missing>\n");
+                TagScore = -9999;
+            }
+            if (Node1)
+            {
+                TagScore += Node1->Score;
+                printf(" Node1 %.2f score %.2f\n", Node1->Mass / (float)MASS_SCALE, Node1->Score);
+            }
+            else
+            {
+                printf(" <Node1 missing>\n");
+                TagScore = -9999;
+            }
+            if (Node2)
+            {
+                TagScore += Node2->Score;
+                printf(" Node2 %.2f score %.2f\n", Node2->Mass / (float)MASS_SCALE, Node2->Score);
+            }
+            else
+            {
+                printf(" <Node2 missing>\n");
+                TagScore = -9999;
+            }
+            if (Node3)
+            {
+                TagScore += Node3->Score;
+                printf(" Node3 %.2f score %.2f\n", Node3->Mass / (float)MASS_SCALE, Node3->Score);
+            }
+            else
+            {
+                printf(" <Node3 missing>\n");
+                TagScore = -9999;
+            }
+            if (Node0 && Node0->OriginalPeakIndex < 0)
+            {
+                TagScore *= (float)1.3333;
+            }
+            if (Node3 && Node3->OriginalPeakIndex < 0)
+            {
+                TagScore *= (float)1.3333;
+            }
+
+            printf("overall: %.2f\n", TagScore);
+        }
+    }
+    ///////////////////////////////////////////////////////////////
+    // Check each tag to see whether it's correct:
+    TagCount = min(TagCount, 512);
+    FoundFlag = 0;
+    TrieTagCount = 0;
+    for (TagIndex = 0; TagIndex < TagCount; TagIndex++)
+    {
+        TestTag = Tags + TagIndex;
+        DuplicateFlag = 0;
+        AddTagToTrie(Root, TestTag, &DuplicateFlag);
+        if (!DuplicateFlag)
+        {
+            TrieTagCount++;
+            //if (TrieTagCount <= 10)
+            {
+                if (VerboseFlag)
+                {
+                    printf("%.2f\t%s\t%.2f\n", TestTag->PrefixMass / (float)MASS_SCALE, TestTag->Tag, TestTag->Score);
+                }
+                
+            }
+            for (AminoIndex = 0; AminoIndex < MatchLength-2; AminoIndex++)
+            {
+                if (abs(TestTag->PrefixMass - PRM[AminoIndex]) < GlobalOptions->ParentMassEpsilon)
+                {
+                    if (TestTag->Tag[0] == Annotation->Bases[AminoIndex] &&
+                        TestTag->Tag[1] == Annotation->Bases[AminoIndex + 1] &&
+                        TestTag->Tag[2] == Annotation->Bases[AminoIndex + 2])
+                    {
+                        TrueTagRankHistogram[TrieTagCount]++;
+                        FoundFlag = 1;
+                    }
+                }
+            }
+        }
+        if (FoundFlag)
+        {
+            break;
+        }
+    }
+    if (!FoundFlag)
+    {
+        // we missed, too bad.  don't need to poke the histogram.
+        //TrueTagRankHistogram[511]++;
+    }
+    FreeTrieNode(Root);
+    Root = NULL;
+
+    SpectrumCount++;
+
+}
+
+void TestTagging(char* OracleFile, char* OracleDir)
+{
+    InitBayesianModels(); // to use new PRM scoring
+    InitStats();
+    TestTaggingCallback(NULL, 0, 0, NULL); // initialization
+    TrainOnOracleFile(OracleFile, OracleDir, TestTaggingCallback);
+    TestTaggingCallback(NULL, 1, 0, NULL); // completion
+}
+
+void TrainTaggingCallback(SpectrumNode* Node, int Charge, int ParentMass, Peptide* Annotation)
+{
+    static int SpectrumCount = 0;
+    int TagIndex;
+    int TagCount;
+    TrieTag* Tags;
+    int FoundFlag;
+    TrieTag* TestTag;
+    int PRM[64];
+    int Mass;
+    int AminoIndex;
+    int ModIndex;
+    int TrieTagCount;
+    int MatchLength;
+    int FeatureIndex;
+    BayesianModel* Model;
+    TrieNode* Root;
+    int DuplicateFlag;
+    int TrueTagFlag;
+    static FILE* TagTrainingFile = NULL;
+    //
+    Root = NULL;
+
+    if (!TagTrainingFile)
+    {
+        TagTrainingFile = fopen("TagTraining.txt", "w");
+    }
+    // Standard call: Given a spectrum, generate some tags.  Test the first n tags, and write
+    // out a feature-vector for each.
+
+    // ** skip modded peptides:
+    if (Annotation->ModType[0])
+    {
+        return;
+    }
+    Root = NewTrieNode();
+    Root->FailureNode = Root;
+
+    Node->Tweaks[0].Charge = Charge;
+    Node->Tweaks[0].ParentMass = Annotation->ParentMass;
+    Node->Spectrum->Charge = Charge;
+    Node->Spectrum->ParentMass = Annotation->ParentMass;
+    WindowFilterPeaks(Node->Spectrum, 0, 0);
+    PrepareSpectrumForIonScoring(PRMModelCharge2, Node->Spectrum, 0);
+    //SpectrumComputeBinnedIntensities(Node);
+    //SpectrumComputeNoiseDistributions(Node);
+    //SpectrumAssignIsotopeNeighbors(Node->Spectrum);
+    //SpectrumFindIsotopicPeaks(Node->Spectrum);
+    Node->Spectrum->Graph = ConstructTagGraph(Node->Spectrum);
+    TagGraphAddNodes(Node->Spectrum->Graph, Node->Spectrum);
+    TagGraphPopulateEdges(Node->Spectrum->Graph);
+    TagGraphScorePRMNodes(NULL, Node->Spectrum->Graph, Node->Spectrum, Node->Tweaks);
+    Tags = TagGraphGenerateTags(Node->Spectrum->Graph, Node->Spectrum, &TagCount, 1024, Node->Tweaks, TAG_EDGE_SCORE_MULTIPLIER, NULL);
+    if (Charge > 2)
+    {
+        Model = BNCharge3TaggingBN;
+    }
+    else
+    {
+        Model = BNCharge2TaggingBN;
+    }
+    // Set our PRM array, so we can check tag prefix masses:
+    Mass = 0;
+    MatchLength = strlen(Annotation->Bases);
+    for (AminoIndex = 0; AminoIndex < MatchLength-1; AminoIndex++)
+    {
+        switch (Annotation->Bases[AminoIndex])
+        {
+        case 'I':
+            Annotation->Bases[AminoIndex] = 'L';
+            break;
+        case 'Q':
+            Annotation->Bases[AminoIndex] = 'K';
+            break;
+        default:
+            break;
+        }
+        PRM[AminoIndex] = Mass;
+        Mass += PeptideMass[Annotation->Bases[AminoIndex]];
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (Annotation->AminoIndex[ModIndex] == AminoIndex && Annotation->ModType[ModIndex])
+            {
+                Mass +=Annotation->ModType[ModIndex]->RealDelta;
+            }
+        }
+    }
+    ///////////////////////////////////////////////////////////////
+    // Check each tag to see whether it's correct:
+    TagCount = min(TagCount, 512);
+    FoundFlag = 0;
+    TrieTagCount = 0;
+    for (TagIndex = 0; TagIndex < min(10, TagCount); TagIndex++)
+    {
+        TrueTagFlag = 0;
+        TestTag = Tags + TagIndex;
+        DuplicateFlag = 0;
+        Root = AddTagToTrie(Root, TestTag, &DuplicateFlag);
+        if (!DuplicateFlag)
+        {
+            TrieTagCount++;
+            for (AminoIndex = 0; AminoIndex < MatchLength-2; AminoIndex++)
+            {
+                if (abs(TestTag->PrefixMass - PRM[AminoIndex]) < GlobalOptions->ParentMassEpsilon)
+                {
+                    if (TestTag->Tag[0] == Annotation->Bases[AminoIndex] &&
+                        TestTag->Tag[1] == Annotation->Bases[AminoIndex + 1] &&
+                        TestTag->Tag[2] == Annotation->Bases[AminoIndex + 2])
+                    {
+                        TrueTagFlag = 1;
+                    }
+                }
+            }
+        }
+        if (TrueTagFlag)
+        {
+            fprintf(TagTrainingFile, "+1 ");
+        }
+        else
+        {
+            fprintf(TagTrainingFile, "-1 ");
+        }
+        FeatureIndex = 1;
+        fprintf(TagTrainingFile, "%d:%.3f ", FeatureIndex++, TestTag->Score);
+        fprintf(TagTrainingFile, "\n");
+    }
+    if (!FoundFlag)
+    {
+        // we missed, too bad.  don't need to poke the histogram.
+        //TrueTagRankHistogram[511]++;
+    }
+    FreeTrieNode(Root);
+    Root = NULL;
+
+    SpectrumCount++;
+
+}
+
+void TrainTagging(char* OracleFile, char* OracleDir)
+{
+    InitBayesianModels(); // to use new PRM scoring
+    InitStats();
+    TrainOnOracleFile(OracleFile, OracleDir, TrainTaggingCallback);
+}
+
+// Using flanking amino acid info, score the remaining nodes in Model.
+float SetTaggingFlankScore(PRMBayesianModel* Model, TagGraphNode** TagNodes, TagGraphEdge** TagEdges, int Depth, int RightEndpointFlag)
+{
+    PRMBayesianNode* Node;
+    PRMBayesianNodeHolder* Holder;
+    char PrefixAA = UNKNOWN_AMINO;
+    char SuffixAA = UNKNOWN_AMINO;
+    float Score = 0;
+    int TableIndex;
+    int ParentIndex;
+    //
+    if (Depth)
+    {
+        PrefixAA = TagEdges[Depth - 1]->Jump->Amino;
+    }
+    if (!RightEndpointFlag)
+    {
+        SuffixAA = TagEdges[Depth]->Jump->Amino;
+    }
+    for (Holder = Model->FirstFlank; Holder; Holder = Holder->Next)
+    {
+        Node = Holder->Node;
+        switch (Node->Type)
+        {
+        case evFlank:
+            Node->Value = IonScoringGetFlank(Node, PrefixAA, SuffixAA);
+            break;
+        case evPrefixAA:
+            if ((PrefixAA - 'A') == Node->Flag)
+            {
+                Node->Value = 1;
+            }
+            else
+            {
+                Node->Value = 0;
+            }
+            break;
+        case evSuffixAA:
+            if ((SuffixAA - 'A') == Node->Flag)
+            {
+                Node->Value = 1;
+            }
+            else
+            {
+                Node->Value = 0;
+            }
+            break;
+        default:
+            // We already knew this node's value (based on intensity).  Now we know its parents' values (based 
+            // in part on flanking amino acids).  ASSUME that all parents are in the FlankList.
+            TableIndex = Node->Value;
+            for (ParentIndex = 0; ParentIndex < Node->ParentCount; ParentIndex++)
+            {
+                TableIndex += Node->Parents[ParentIndex]->Value * Node->ParentBlocks[ParentIndex];
+            }
+            Score = Node->ProbTable[TableIndex];
+            // The score from the NOISE MODEL has already been integrated.  So, we're done.
+            break;
+        }
+    }
+    return Score;
+}
+
+
+// New tag generation function: Generates tags of a (more-or-less) arbitrary length!
+// Incorporates a more sophisticated intensity scoring function that considers
+// amino acid effects.  
+TrieTag* TagGraphGenerateTags(TagGraph* Graph, MSSpectrum* Spectrum, int* TagCount, 
+    int MaximumTagCount, SpectrumTweak* Tweak, float TagEdgeScoreMultiplier,
+    struct PRMBayesianModel* Model)
+{
+    TagGraphNode* TagNodes[12];
+    TagGraphEdge* TagEdges[12];
+    int NodeIndex;
+    int EdgeIndex;
+    TagGraphNode* Node;
+    TagGraphEdge* Edge;
+    TagGraphNode* LeftNode;
+    TagGraphNode* RightNode;
+    int CurrentDepth;
+    int InternalNodes;
+    float NodeScore;
+    TrieTag* Tag;
+    int TagAllocation;
+    int BacktrackFlag;
+    int AllTagCount = 0;
+    int Bin;
+    float FlankScore[12];
+    float ScoreToBeat = -9999;
+    //
+    if (!Model)
+    {
+        if (Tweak->Charge < 3)
+        {
+            Model = TAGModelCharge2;
+        }
+        else
+        {
+            Model = TAGModelCharge3;
+        }
+    }
+    *TagCount = 0;
+    TagAllocation = 1024;
+    if (!AllTags)
+    {
+      AllTags = (TrieTag*)calloc(TagAllocation, sizeof(TrieTag));
+    }
+    NodeIndex = 0;
+    EdgeIndex = -1;
+    BacktrackFlag = 0;
+    CurrentDepth = 0;
+    TagNodes[0] = Graph->FirstNode;
+    // Main iteration: Depth-first traversal through the DAG, up to a maximum depth of 
+    // GlobalOptions->GenerateTagLength, and with each possible root (TagNodes[0]).
+    while (1)
+    {
+        // If we're BACKTRACKING, then move to a sibling or parent:
+        if (BacktrackFlag)
+        {
+            // Move the root of the subtree, if necessary:
+            if (CurrentDepth == 0)
+            {
+                // Move to the next 'first' node:
+                TagNodes[0] = TagNodes[0]->Next;
+                if (!TagNodes[0])
+                {
+                    break;
+                }
+                BacktrackFlag = 0;
+                continue;
+            }
+            // Move to a sibling, if we can:
+            TagEdges[CurrentDepth - 1] = TagEdges[CurrentDepth - 1]->Next;
+            if (TagEdges[CurrentDepth - 1])
+            {
+                TagNodes[CurrentDepth] = TagEdges[CurrentDepth - 1]->ToNode;
+                BacktrackFlag = 0;
+                FlankScore[CurrentDepth - 1] = SetTaggingFlankScore(Model, TagNodes, TagEdges, CurrentDepth - 1, 0);
+                continue;
+            }
+            // No more siblings - move up one level.
+            CurrentDepth--;
+            continue;
+        }
+
+        // Special case for level 1: Skip tag nodes with silly masses like 20Da.
+        if (CurrentDepth == 0)
+        {
+            Node = TagNodes[0];
+            if (Node->Mass > GlobalOptions->ParentMassEpsilon && Node->Mass < GLYCINE_MASS - GlobalOptions->Epsilon)
+            {
+                BacktrackFlag = 1;
+                continue;
+            }
+        }
+
+        // If we're deep enough, report a tag and start backtracking:
+        if (CurrentDepth >= GlobalOptions->GenerateTagLength)
+        {
+            FlankScore[CurrentDepth] = SetTaggingFlankScore(Model, TagNodes, TagEdges, CurrentDepth, 1);
+            BacktrackFlag = 1;
+            LeftNode = TagNodes[0];
+            RightNode = TagNodes[CurrentDepth];
+            Tag = AllTags + (*TagCount);
+            InternalNodes = 0;
+            NodeScore = 0;
+            for (NodeIndex = 0; NodeIndex <= CurrentDepth; NodeIndex++)
+            {
+                Node = TagNodes[NodeIndex];
+                if (Node->OriginalPeakIndex > 0)
+                {
+                    NodeScore += Node->Score;
+                    NodeScore += FlankScore[NodeIndex];
+                    InternalNodes++;
+                }
+                Tag->Nodes[NodeIndex] = TagNodes[NodeIndex];
+            }
+            NodeScore *= (GlobalOptions->GenerateTagLength + 1) / (float)max(1, InternalNodes);
+            Tag->Score = NodeScore;
+            Tag->ModsUsed = 0;
+            memset(Tag->ModType, 0, sizeof(MassDelta*) * MAX_PT_MODS);
+            memset(Tag->AminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+            if (LeftNode->NodeType == evGraphNodeLeftMod)
+            {
+                // Sanity check: The first AA must be one where this mod can
+                // be attached!
+                if (!AllKnownPTMods[LeftNode->PTM->Index].Allowed[TagEdges[0]->Jump->Amino - 'A'])
+                {
+                    continue;
+                }
+                Tag->AminoIndex[Tag->ModsUsed] = 0;
+                Tag->ModType[Tag->ModsUsed] = LeftNode->PTM;
+                Tag->ModsUsed++;
+            }
+            for (EdgeIndex = 0; EdgeIndex < CurrentDepth; EdgeIndex++)
+            {
+                Edge = TagEdges[EdgeIndex];
+                Tag->Score += TagEdges[EdgeIndex]->Score;
+                Tag->Tag[EdgeIndex] = Edge->Jump->Amino;
+                if (Edge->Jump->Delta)
+                {
+                    Tag->AminoIndex[Tag->ModsUsed] = EdgeIndex;
+                    Tag->ModType[Tag->ModsUsed] = Edge->Jump->Delta;
+                    Tag->ModsUsed++;
+                }
+            }
+            // Set skew info:
+            Tag->TotalSkew = 0;
+            Tag->TotalAbsSkew = 0;
+            for (EdgeIndex = 0; EdgeIndex < CurrentDepth; EdgeIndex++)
+            {
+                Edge = TagEdges[EdgeIndex];
+                Tag->TotalSkew += Edge->Skew;
+                Tag->TotalAbsSkew += abs(Edge->Skew);
+            }
+            ////////////////////////////////////////////////////
+            // If the total skew is large, penalize the tag's score:
+            Bin = (int)(fabs((Tag->TotalSkew / 50.0)));
+            if (Bin >= TagSkewBinCount)
+            {
+                Bin = TagSkewBinCount - 1;
+            }
+            Tag->Score += TagSkewScore[Bin] * TagEdgeScoreMultiplier;
+            Bin = (int)(fabs((Tag->TotalAbsSkew / 50.0)));
+            if (Bin >= TagSkewBinCount)
+            {
+                Bin = TagSkewBinCount - 1;
+            }
+            Tag->Score += TagTotalAbsSkewScore[Bin] * TagEdgeScoreMultiplier;
+            ////////////////////////////////////////////////////
+            Tag->Tag[EdgeIndex] = '\0';
+            if (Tag->Score < ScoreToBeat)
+            {
+                // Abort the tag - it's not good enough!
+                continue; 
+            }
+            if (RightNode->NodeType == evGraphNodeRightMod)
+            {
+                // Sanity check: The first AA must be one where this mod can
+                // be attached!
+                if (!AllKnownPTMods[RightNode->PTM->Index].Allowed[TagEdges[CurrentDepth - 1]->Jump->Amino - 'A'])
+                {
+                    continue;
+                }
+                Tag->AminoIndex[Tag->ModsUsed] = CurrentDepth;
+                Tag->ModType[Tag->ModsUsed] = RightNode->PTM;
+                Tag->ModsUsed++;
+            }
+            Tag->PSpectrum = Spectrum;
+            Tag->Tweak = Tweak;
+            Tag->TagLength = CurrentDepth;
+            Tag->ParentMass = Spectrum->ParentMass;
+            Tag->Charge = Spectrum->Charge;
+            Tag->PrefixMass = TagNodes[0]->Mass;
+            Tag->SuffixMass = Spectrum->ParentMass - PARENT_MASS_BOOST - TagNodes[CurrentDepth]->Mass;
+            (*TagCount)++;
+            AllTagCount++;
+            // If we've got as many tags as we can handle, drop all but the best.  (Don't
+            // just reallocate; we could end up with a *lot*!)
+            if ((*TagCount) + 5 >= TagAllocation)
+            {
+                qsort(AllTags, *TagCount, sizeof(TrieTag), (QSortCompare)CompareTagScores);
+                *TagCount = TagAllocation / 2;
+                if (MaximumTagCount >= 0)
+                {
+                    ScoreToBeat = AllTags[min(TagAllocation - 5, MaximumTagCount)].Score;
+                }
+                else
+                {
+                    ScoreToBeat = AllTags[*TagCount].Score;
+                }
+            }
+	    //printf("Added a tag for %d - %s - %d\n",Tag->PrefixMass, Tag->Tag, Tag->SuffixMass);
+            continue;
+        } // If we're at tag depth
+
+        // We're not at tag depth yet. 
+        // Move to our first available child:
+        TagEdges[CurrentDepth] = TagNodes[CurrentDepth]->FirstEdge;
+        if (!TagEdges[CurrentDepth])
+        {
+            BacktrackFlag = 1;
+            continue;
+        }
+        else
+        {
+            CurrentDepth++;
+            TagNodes[CurrentDepth] = TagEdges[CurrentDepth - 1]->ToNode;
+            FlankScore[CurrentDepth - 1] = SetTaggingFlankScore(Model, TagNodes, TagEdges, CurrentDepth - 1, 0);
+        }
+    }
+    // Sort the tags, by score:
+    qsort(AllTags, *TagCount, sizeof(TrieTag), (QSortCompare)CompareTagScores);
+    return AllTags;
+
+}
diff --git a/Tagger.h b/Tagger.h
new file mode 100644
index 0000000..c3b5ad7
--- /dev/null
+++ b/Tagger.h
@@ -0,0 +1,199 @@
+//Title:          Tagger.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef TAGGER_H
+#define TAGGER_H
+// Tagger.h defines objects and functions related to the building of a tag graph,
+// and the generation on short (usually tripeptide) tags from the graph.  Each 
+// node in the graph represents a prefix residue mass (PRM), and so the nodes can 
+// be thought of as points along an m/z axis.  A (directed) edge in the graph 
+// represents a valid jump, where a "jump" is the mass of an amino acid (or modified
+// amino acid).  The tag graph is used to construct tags (formally, paths of length
+// three).  This is one approach to the local de novo interpretation problem.
+
+#include "Inspect.h"
+#include "Trie.h"
+#include "Spectrum.h"
+#include "IonScoring.h"
+
+// Each graph node has some set of witness ions (e.g. b and y peaks).  We encode 
+// the a set of ions as a bitfield.  
+// For instance, B and Y is ION_FLAG_B | ION_FLAG_Y = 0x11
+#define ION_FLAG_B 0x0001
+#define ION_FLAG_BH2O 0x0002
+#define ION_FLAG_BNH3 0x0004
+#define ION_FLAG_A 0x0008
+#define ION_FLAG_Y 0x0010
+#define ION_FLAG_YH2O 0x0020
+#define ION_FLAG_YNH3 0x0040
+#define ION_FLAG_B2 0x0080
+#define ION_FLAG_Y2 0x0100
+
+// INTENSITY_RANK_COUNT is the number of entries in IntensityRankBOdds and IntensityRankYOdds
+#define INTENSITY_RANK_COUNT 22
+
+// Probability that a peak with this intensity-rank is a b peak:
+extern float* IntensityRankBOdds;
+
+// Probability that a peak with this intensity-rank is a y peak:
+extern float* IntensityRankYOdds;
+
+// Probability that a peak with reported m/z this far from the expected m/z is a true b or y peak:
+extern double SkewHistoStep[100];
+
+// The witness scores for a node are based upon the collection of ion
+// types bearing witness to a particular break-point.  Similar to Danciek 
+// scores.  Scores are empirically derived from a training dataset (currently ISB).  dataset.  We reckon these odds
+// separately for low, medium and high mass peaks.  "Low" is "below 33% of precursor mass", 
+// and "High" is "above 66% of precursor mass".
+
+// A JumpNode captures the mass, amino acid, and PTM (if any) of a valid 
+// edge length for the tag graph.
+typedef struct JumpNode
+{
+    int Mass;
+    struct JumpNode* Next;
+    char Amino;
+    // ASSUME: We only permit one modification per peptide in a tag. 
+    MassDelta* Delta;
+    float Score;
+} JumpNode;
+
+// The type of a graph node indicates whether it was created by interpreting a spectral peak as a b or y ion,
+// or whether it is an endpoint (evGraphNodeLeft, evGraphNodeRight).  The special types evGraphNodeLeftMod,
+// evGraphNodeRightMod are created when N- and C-terminal PTMs are allowed.  
+typedef enum evGraphNodeType
+{
+    evGraphNodeB = 0,
+    evGraphNodeY,
+    evGraphNodeLeft,
+    evGraphNodeLeftMod,
+    evGraphNodeRight,
+    evGraphNodeRightMod
+} evGraphNodeType;
+
+typedef struct TagGraphNode
+{
+#ifdef DEBUG_TAG_GENERATION
+    char VerboseNodeInfo[2048];
+#endif
+    int OriginalPeakIndex;
+    int BIndex;
+    int YIndex;
+    int IntensityRankB;
+    int IntensityRankY;
+    evGraphNodeType NodeType; 
+    // A graph node is scored based upon its intensity score (intensity-rank of the b and y peak),
+    // its isotope score (whether the b and y peaks are apparently secondary isotopic peaks, primary
+    // peaks with children, or lone peaks) and its ion type score (the witness set).
+    float IntensityScore;
+    float IsotopeScore;
+    float IonTypeScore;
+    float Score;
+    //float ScoreB;
+    //float ScoreY;
+    int IonTypeFlags;
+    int Mass;
+    // List of edges leading forward in the graph:
+    struct TagGraphEdge* FirstEdge;
+    struct TagGraphEdge* LastEdge;
+    // Next and previous nodes (sorted by mass):
+    struct TagGraphNode* Next;
+    struct TagGraphNode* Prev;
+    MassDelta* PTM; // Is non-null only if NodeType is LeftMod or RightMod
+    // BackEdge, BackEdgeDouble, and BackEdgeTriple are set only when carrying out blind
+    // mod search.  They speed up the big d.p. extension algorithm.
+    struct TagGraphBackEdge** BackEdge; //[26]; // List of edges matching an unmodified aa
+    struct TagGraphBackEdge** BackEdgeDouble; //[26*26]; // List of edges matching two unmodified aa's
+    struct TagGraphBackEdge** BackEdgeTriple; //[26*26*26]; // List of edges matching three unmodified aa's
+    int Index; // This is set AFTER all the graph nodes have been created and sorted.
+} TagGraphNode;
+
+// BackEdge points to a graph node whose mass is smaller by 1, 2, or 3 unmodified amino acid masses.
+typedef struct TagGraphBackEdge
+{
+    TagGraphNode* FromNode;
+    TagGraphNode* ToNode;
+    int Score;
+    int Skew;
+    // If this edge is a double-amino-acid jump, then HalfMass is the mass after the first amino acid.
+    int HalfMass; 
+    int HalfMass2;  // For triples
+    struct TagGraphBackEdge* Next;
+} TagGraphBackEdge;
+
+// Each NODE in the graph owns a list of EDGES.  Each edge joins to a higher-mass node
+typedef struct TagGraphEdge
+{
+    TagGraphNode* FromNode;
+    TagGraphNode* ToNode;
+    JumpNode* Jump;
+    float Score;
+    struct TagGraphEdge* Next;
+    int Skew;
+} TagGraphEdge;
+
+// A TagGraph has pointer to its first/last nodes, an index (for quickly finding nodes for a PRM),
+// and a buffer of back edges (populated only in blind mode).
+typedef struct TagGraph
+{
+    TagGraphNode* FirstNode;
+    TagGraphNode* LastNode;
+    // Index: Points to the first node that could match a given rounded-to-amu mass
+    TagGraphNode** NodeIndex; 
+    int NodeIndexSize;
+    int NodeCount; // Number of nodes in the list FirstNode...LastNode.
+    struct TagGraphBackEdge* BackEdgeBuffer;
+} TagGraph;
+
+void TagGraphBuildNodeIndex(TagGraph* Graph);
+TrieNode* GenerateTagsFromSpectrum(MSSpectrum* Spectrum, TrieNode* Root, int MaximumTagCount, SpectrumTweak* Tweak);
+void CorrectParentMass(MSSpectrum* Spectrum);
+int LoadIntensityRankOdds(char* FileName);
+int LoadWitnessScores(char* FileName);
+void PopulateJumpingHash();
+int FindIntensePeak(MSSpectrum* Spectrum, int Mass, float MaxIntensity, float* FoundIntensity);
+void SpectrumFindIsotopicPeaks(MSSpectrum* Spectrum);
+TagGraph* ConstructTagGraph(MSSpectrum* Spectrum);
+void TagGraphAddNodes(TagGraph* Graph, MSSpectrum* Spectrum);
+void TagGraphPopulateEdges(TagGraph* Graph);
+void FreeTagGraph(TagGraph* Graph);
+void FreeJumpingHash();
+void FreeTagGraphNode(TagGraphNode* Node);
+void TestTagging(char* OracleFile, char* OracleDir);
+void TrainTagging(char* OracleFile, char* OracleDir);
+int CompareTagScores(const TrieTag* TagA, const TrieTag* TagB);
+TrieNode* BuildTrieFromTags(TrieTag* AllTags, int TagCount, TrieNode* Root, int MaximumTagCount);
+void SetTagSkewScores();
+void FreeTagSkewScores();
+// declaration of TagGraphGenerateTags moved out, since it uses PRMBayesianModel
+#endif // TAGGER_H
diff --git a/TrainPTMFeatures.py b/TrainPTMFeatures.py
new file mode 100644
index 0000000..70bf3c1
--- /dev/null
+++ b/TrainPTMFeatures.py
@@ -0,0 +1,762 @@
+#Title:          TrainPTMFeatures.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+"""
+Plan:
+Output a large collection of features for each post-translational modification accepted on a
+search of a part-bogus database.  All modifications on the bogus proteins are incorrect.
+An equivalent number of modifications on the non-bogus proteins are incorrect.  Let's compute
+a variety of features for the PTMs observed.
+
+Input:
+A collection of annotated spectra, output by SelectSites.py
+Output:
+A file listing all the observed modification sites, with various features computed.  
+
+Then, we train a model to distinguish between good (correct DB) and bad (incorrect DB)
+modifications.  Model types: LDA, logistic regression, SVM, etc.
+(Another experiment: Search unmodified spectra against a mutated database,
+judge correct precisely those modifications which "undo" the mutations)
+"""
+
+UsageInfo = """
+TrainPTMFeatures: Train model on PTM features from on a data-set.
+Run this AFTER running ComputePTMFeatures.
+
+Arguments:
+ -m [model-type]: Train a model and report its accuracy on the specified
+    (-u) file
+ -u [FeatureFile]: Path to the feature-file written out by ComputePTMFeatures
+ -v [FeatureFile]: Write scored features out to the specified file
+ -w [FileName]: Write model to the specified file.   (Set either -w OR -r)
+ -r [FileName]: Read a model from the specified file (Set either -w OR -r)
+
+Optional:
+ -f [Flag]: Perform feature selection (1 for accumulate, 2 for prune)
+ -e [TestingFile]: Path to the feature-file that serves as a testing set.
+    If not specified, then the same features (-u) will be used for testing.
+    For use with -f flag only.
+ -R [Path]: Report ROC curve to the specified file
+"""
+import os
+import sys
+import struct
+import traceback
+import getopt
+import MSSpectrum
+import PyInspect
+import random
+import shutil
+import Learning
+import BasicStats
+import ResultsParser
+import SpectralSimilarity
+random.seed(1)
+from Utils import *
+Initialize()
+try:
+    from numpy import *
+    import numpy.linalg
+    FloatType = float
+    MatrixMultiply = dot
+except:
+    print "** Warning: Unable to import Numpy.  Logit training not available"
+
+ValidFeatureIndices = [2,3,5,22,23,24,26]
+
+class FeatureBits:
+    SpectrumCount = 0
+    ModlessSpectrumCount = 1
+    BestMQScore = 2
+    BestDeltaScore = 3
+    PeptideCount = 4
+    ConsensusMQScore = 5
+    PeptideLength = 6
+    TotalCutScore = 7
+    MedianCutScore = 8
+    YPresent = 9
+    BPresent = 10
+    BYIntensity = 11
+    NTT = 12
+    ModdedFraction = 13
+    SpectraThisModType = 15
+    SitesThisModType = 16
+    Dot = 18
+    Shared01 = 19
+    Shared11 = 20
+    Correlation = 21
+    LogSpectrumCount = 22
+    LogPeptideLength = 23
+    LogSpecThisType = 24
+    LogSitesThisType = 25
+    DeltaVsBigDB = 26
+    
+class FormatBits:
+    DBPos = 1
+    ModificationMass = 2
+    ModifiedAA = 3
+    ProteinName = 4
+    ModifiedResidueNumber = 5
+    Peptide = 6
+    Charge = 7
+    TrueProteinFlag = 8
+    SisterAnnotationFlag = 9
+    BestSpectrumPath = 10
+    BestModlessSpectrumPath = 11
+    BestModlessMQScore = 12
+    BigDBAnnotation = 13
+    BigDBMQScore = 14
+    SpectrumCount = 15
+    ModlessSpectrumCount = 16
+    BestMQScore = 17
+    BestDeltaScore = 18
+    PeptideCount = 19
+    ConsensusMQScore = 20
+    NTT = 27
+    ModdedFraction = 28
+    SpectraWithThisModType = 30
+    SitesWithThisModType = 31
+    LogSpectrumCount = 37
+    LogSpectraThisModType = 39
+    LogSitesThisModType = 40
+    ConsensusDeltaBigDB = 41
+    FirstFeature = 15
+    LastFeature = 41
+    FeatureCount = LastFeature - FirstFeature + 1
+    ModelScore = 42 # score for the PEPTIDE SPECIES
+    ModelPValue = 43 # p-value (probability false given this score) for the PEPTIDE SPECIES
+    SitePValue = 44 # p-value (probability false given several species) for the SITE 
+    KnownPTMName = 45
+    KnownPTMAnnotation = 46
+    KnownPTMScore = 47
+    KnownPTMSitePValue = 48
+    
+class PTMFeatureTrainer(ResultsParser.ResultsParser):
+    def __init__(self):
+        self.ResultsFileName = None
+        self.DBPath = None
+        self.OutputPath = "PTMFeatures.txt"
+        self.TempFileDir = "PTMFeatures"
+        self.PTMs = {} # keys of the form (DBPos, Mass)
+        self.CoverageThreshold = 2 # at least this many spectra to consider a residue 'covered'.
+        self.QuickParseFlag = 0 # if true, then parse only the first n lines
+        self.SpectrumDir = None
+        self.SuperSpectrumDir = None
+        self.PoolFlag = 0
+        self.ModelType = None
+        self.SisterProteins = {} # protein index -> sister protein's index
+        self.InputFeaturePath = None
+        self.ModelTestFilePath = None
+        # Dictionary of unmodified peptides, for computing the coverage level:
+        self.UnmodifiedPeptides = {}
+        self.FeatureSelectionFlag = None
+        self.CachedProteinNames = []
+        self.CachedFilePaths = []
+        self.CachedFixedFilePaths = []
+        self.StartOutputDBPos = 0
+        self.HeaderLines = []
+        self.ReportROCPath = None
+        self.OutputFeaturePath = None
+        self.ReadModelFilePath2 = None
+        self.ReadModelFilePath3 = None
+        self.TrainingSetDBRatio = 1.0
+        self.TestingSetDBRatio = 1.0
+        self.WriteModelFilePath2 = None
+        self.WriteModelFilePath3 = None
+        ResultsParser.ResultsParser.__init__(self)
+    def TrainFacultative(self):
+        """
+        Train paired models for CONSTITUTIVE ("always") and FACULTATIVE ("sometimes") PTMs.
+        """
+        # Train a model on all PTMs, to get initial scores for all PTMs.
+        # The initial model uses only CONSTITUTIVE features, and its output
+        # is used only to provide an ORACLE for the facultative model:
+        print "TRAIN model on all features:"
+        self.Model.Train(self.TrainingSetAll)
+        print "SCORE all features:"
+        self.Model.Test(self.TrainingSetAll)
+        ##############################################################
+        print "Generate SUB-MODEL of only facultative features:"
+        # Sort facultative instances by score:
+        SortedList = []
+        for Vector in self.TrainingSetAll.AllVectors:
+            if not Vector.FileBits[FormatBits.SisterAnnotationFlag]:
+                continue
+            SortedList.append((Vector.Score, Vector))
+        SortedList.sort()
+        FacFeatureSet = Learning.FeatureSetClass()
+        ChunkSize = min(len(SortedList) / 4, 1000)
+        print "Sorted list of %s facultative features, chunk size is %s"%(len(SortedList), ChunkSize)
+        for (Score, Vector) in SortedList[:ChunkSize]:
+            NewVector = Learning.FeatureVector()
+            NewVector.FileBits = Vector.FileBits[:]
+            NewVector.Features = Vector.Features[:]
+            NewVector.TrueFlag = 0
+            FacFeatureSet.AllVectors.append(NewVector)
+            FacFeatureSet.FalseVectors.append(NewVector)
+        for (Score, Vector) in SortedList[-ChunkSize:]:
+            NewVector = Learning.FeatureVector()
+            NewVector.FileBits = Vector.FileBits[:]
+            NewVector.Features = Vector.Features[:]
+            NewVector.TrueFlag = 1
+            FacFeatureSet.AllVectors.append(NewVector)
+            FacFeatureSet.TrueVectors.append(NewVector)
+        FacFeatureSet.SetCounts()
+        FacFeatureSet.GetPriorProbabilityFalse(self.TrainingSetDBRatio)
+        ##############################################################
+        # Write out the FACULTATIVE feature set:
+        FacTrainingFile = open("FacultativeTrainingSet.txt", "wb")
+        for HeaderLine in self.HeaderLines:
+            FacTrainingFile.write(HeaderLine)
+        for Vector in FacFeatureSet.AllVectors:
+            Bits = Vector.FileBits[:]
+            if Vector.TrueFlag:
+                Bits[FormatBits.TrueProteinFlag] = "1"
+            else:
+                Bits[FormatBits.TrueProteinFlag] = "0"
+            Str = string.join(Bits, "\t")
+            FacTrainingFile.write(Str + "\n")
+        FacTrainingFile.close()
+        ##############################################################
+        # Train the sub-model:
+        self.FacModel = self.GetModelObject(self.FeaturesF)
+        self.FacModel.Train(FacFeatureSet)
+        self.FacModel.Test(FacFeatureSet)
+        self.FacModel.ReportAccuracy(FacFeatureSet) # invokes ComputeOddsTrue
+        ##############################################################
+        # Apply the trained fac-model to *all* facultative features, and
+        # train an overall model on all *constitutive* features:
+        self.FeatureSetC = Learning.FeatureSetClass()
+        self.FeatureSetF = Learning.FeatureSetClass()
+        for Vector in self.TrainingSetAll.AllVectors:
+            if Vector.FileBits[FormatBits.SisterAnnotationFlag]:
+                FeatureSet = self.FeatureSetF
+            else:
+                FeatureSet = self.FeatureSetC
+            FeatureSet.AllVectors.append(Vector)
+            if Vector.TrueFlag:
+                FeatureSet.TrueVectors.append(Vector)
+            else:
+                FeatureSet.FalseVectors.append(Vector)
+        self.FeatureSetC.SetCounts()
+        self.FeatureSetF.SetCounts()
+        self.FeatureSetC.GetPriorProbabilityFalse(self.TrainingSetDBRatio)
+        self.FeatureSetF.GetPriorProbabilityFalse(self.TrainingSetDBRatio)
+        # Score facultative-feature, using facultative-model:
+        self.FacModel.Test(self.FeatureSetF)
+        # Train constitutive-ONLY model, and score constitutive features:
+        self.ConModel = self.GetModelObject(self.FeaturesC)
+        self.ConModel.Train(self.FeatureSetC)
+        self.ConModel.Test(self.FeatureSetC)
+        self.ConModel.ReportAccuracy(self.FeatureSetC) # to invoke ComputeOddsTrue
+        ##############################################################
+        # Save our models:
+        if self.WriteModelFilePath:
+            (Stub, Extension) = os.path.splitext(self.WriteModelFilePath)
+            ConModelPath = "%s.con"%Stub
+            FacModelPath = "%s.fac"%Stub
+            self.ConModel.SaveModel(ConModelPath)
+            self.FacModel.SaveModel(FacModelPath)
+        ##############################################################
+        # Write out the scored features:
+        OutputFile = open(self.OutputFeaturePath, "wb")
+        for Line in self.HeaderLines:
+            OutputFile.write(Line)
+        for Vector in self.TrainingSetAll.AllVectors:
+            if Vector.FileBits[FormatBits.SisterAnnotationFlag]:
+                PValue = self.FacModel.GetPValue(Vector.Score)
+            else:
+                PValue = self.ConModel.GetPValue(Vector.Score)
+            while len(Vector.FileBits) <= FormatBits.ModelPValue:
+                Vector.FileBits.append("")
+            Vector.FileBits[FormatBits.ModelScore] = str(Vector.Score)
+            Vector.FileBits[FormatBits.ModelPValue] = str(PValue)
+            Str = string.join(Vector.FileBits, "\t")
+            OutputFile.write(Str + "\n")
+    def GetModelObject(self, Features):
+        if self.ModelType == "lda":
+            return Learning.LDAModel(Features)
+        elif self.ModelType == "svm":
+            return Learning.SVMModel(Features)
+        elif self.ModelType == "logit":
+            return Learning.LogitModel(Features)
+        else:
+            print "** Model type NOT KNOWN!", self.ModelType
+            return
+    def TrainModel(self):
+        """
+        Our training data-set is in self.InputFeaturePath.
+        Let's train a model to predict which entries come from the true database.
+        """
+        if not self.InputFeaturePath:
+            print "* Please specify an input feature-file."
+            print UsageInfo
+            sys.exit(-1)
+        # Load in features for a collection of TRUE and FALSE instances.
+        File = open(self.InputFeaturePath, "rb")
+        self.FeatureNames = {}
+        FeatureCount = FormatBits.LastFeature - FormatBits.FirstFeature + 1
+        # We have one set of features for facultative sites, and one for constitutive.
+        # Note that some features (modification rate, correlation with unmodified peptide)
+        # are applicable to F but not C.
+        #self.FeaturesF = range(FeatureCount)
+        # For constitutive modifications: Modification rate, protein coverage,
+        # and number of unmodified peptides are all off-limits.  (Those features
+        # are "dead giveaways" that we have a non-shuffled protein!)
+        #self.FeaturesC = [2, 3, 5, 22, 24, 25, 26]
+        self.FeaturesC = ValidFeatureIndices[:]
+        #self.FeaturesC = range(FeatureCount)
+        self.FeaturesF = self.FeaturesC
+        self.FeaturesAll = []
+        for FeatureIndex in self.FeaturesF:
+            if FeatureIndex in self.FeaturesC:
+                self.FeaturesAll.append(FeatureIndex)
+        # We can OVERRIDE the list of features here, to forbid the use of some:
+        print "Permitted features all:", self.FeaturesAll
+        # Parse the features from the TRAINING and TESTING files.  We generate
+        # training sets for the FACULTATIVE (F) and for CONSTITUTIVE (C) sites.
+        self.TrainingSet2 = Learning.FeatureSetClass()
+        self.TrainingSet2.Type = "Charge-2"
+        self.TrainingSet3 = Learning.FeatureSetClass()
+        self.TrainingSet3.Type = "Charge-3"
+        #self.TrainingSetAll = Learning.FeatureSetClass()
+        #self.TrainingSetAll.Type = "All"
+        self.ParseFeatureFile(self.InputFeaturePath, self.TrainingSet2, self.TrainingSet3,
+                              self.TrainingSetDBRatio)
+        if self.ModelTestFilePath:
+            self.TestingSet2 = FeatureSetClass()
+            self.TestingSet3 = FeatureSetClass()
+            self.ParseFeatureFile(self.ModelTestFilePath, self.TestingSet2, self.TestingSet3,
+                self.TestingSetAll, self.TestingSetDBRatio)
+        # SPECIAL values for model, which don't actually cause training:
+        if self.ModelType == "feature":
+            print "\n\nSINGLE feature:"
+            self.TrainOneFeature(self.TrainingSet2)
+            self.TrainOneFeature(self.TrainingSet3)
+            return
+        if self.ModelType == "featurescatter":
+            print "\n\nFeature+feature scatter-plots:"
+            self.ProduceFeatureScatterPlots(self.TrainingSetAll)
+            return
+        if self.ModelType == "summary":
+            self.PerformFeatureSummary()
+            return
+        # Instantiate our model:
+        self.Model2 = self.GetModelObject(self.FeaturesAll)
+        self.Model3 = self.GetModelObject(self.FeaturesAll)
+        # Load a pre-trained model, if we received a path:
+        if self.ReadModelFilePath2:
+            self.Model2.LoadModel(self.ReadModelFilePath2)
+            self.Model3.LoadModel(self.ReadModelFilePath3)
+        #######################################################################
+        # Special value for feature selection (3) means that we train a model on
+        # all data, then use it to generate a sub-feature-set for a facultative model!
+        if self.FeatureSelectionFlag == 3:
+            self.TrainFacultative()
+            return
+        #######################################################################
+        # If we're not doing feature selection: Train on the training set,
+        # and then (if we have a testing set) test on the testing set.
+        if not self.FeatureSelectionFlag:
+            # Train the model (unless we just loaded it in):
+            if not self.ReadModelFilePath2:
+                self.Model2.Train(self.TrainingSet2)
+                self.Model3.Train(self.TrainingSet3)
+            # Compute the score of each vector:
+            if self.ModelTestFilePath:
+                
+                self.Model2.Test(self.TestingSet2)
+                self.Model2.ReportAccuracy(self.TestingSet2)
+                
+                self.Model3.Test(self.TestingSet3)
+                self.Model3.ReportAccuracy(self.TestingSet3)
+                self.WriteScoredFeatureSet(self.TestingSet2, self.TestingSet3)
+            else:
+                
+                self.Model2.Test(self.TrainingSet2)
+                self.Model2.ReportAccuracy(self.TrainingSet2)
+                shutil.copyfile("PValues.txt", "PValues.chg2.txt")
+                
+                self.Model3.Test(self.TrainingSet3)
+                self.Model3.ReportAccuracy(self.TrainingSet3)
+                shutil.copyfile("PValues.txt", "PValues.chg3.txt")
+                #if self.ReportROCPath:
+                #    self.Model.ReportROC(self.TrainingSetAll, self.ReportROCPath)
+                self.WriteScoredFeatureSet(self.TrainingSet2, self.TrainingSet3)
+            if self.WriteModelFilePath2:
+                self.Model2.SaveModel(self.WriteModelFilePath2)
+                self.Model3.SaveModel(self.WriteModelFilePath3)
+            return
+        #######################################################################
+        # We're doing feature selection.  We'll need to write out feature files,
+        # then call TrainMachineLearner
+        print "Feature names:", self.FeatureNames
+        print "AllFeatures:", self.FeaturesAll
+        self.WriteFeaturesToFile(self.TrainingSet2, "PTMFeatures.2.txt")
+        self.WriteFeaturesToFile(self.TrainingSet3, "PTMFeatures.3.txt")
+        # *** Additive and subtractive aren't done here, the user can do it!
+    def WriteFeaturesToFile(self, TrainingSet, FileName):
+        print "Write features to %s..."%FileName
+        File = open(FileName, "wb")
+        File.write("#Index\tValidFlag\t")
+        for Key in self.FeaturesAll:
+            File.write("%s\t"%self.FeatureNames[Key])
+        File.write("\n")
+        TrainingSet.SaveTabDelimited(File)
+        File.close()
+    def ProduceFeatureScatterPlots(self, FeatureSet):
+        """
+        Iterate over all pairs of (distinct) features.  For each pair, produce a scatter-plot
+        with N true points and N false points.
+        """
+        OutputFile = open("FeatureScatterPlots.txt", "wb")
+        VectorCount = 200
+        TrueVectors = FeatureSet.TrueVectors[:]
+        random.shuffle(TrueVectors)
+        TrueVectors = TrueVectors[:VectorCount]
+        FalseVectors = FeatureSet.FalseVectors[:]
+        random.shuffle(FalseVectors)
+        FalseVectors = FalseVectors[:VectorCount]
+        # Write a HEADER:
+        HeaderStr = ""
+        for FeatureIndex in range(len(self.FeaturesAll)):
+            Feature = self.FeaturesAll[FeatureIndex]
+            HeaderStr += "T %s\tF %s\t"%(self.FeatureNames[Feature], self.FeatureNames[Feature])
+        OutputFile.write(HeaderStr + "\n")
+        # Write one row for each pair of vectors:
+        for RowIndex in range(len(TrueVectors)):
+            Str = ""
+            TrueVector = TrueVectors[RowIndex]
+            FalseVector = FalseVectors[RowIndex]
+            for Feature in self.FeaturesAll:
+                Str += "%s\t%s\t"%(TrueVector.Features[Feature], FalseVector.Features[Feature])
+            OutputFile.write(Str + "\n")
+        return
+    def WriteScoredFeatureSet(self, FeatureSet2, FeatureSet3):
+        # Write out the features with their model-scores:
+        if not self.OutputFeaturePath:
+            return
+        File = open(self.OutputFeaturePath, "wb")
+        for FileLine in self.HeaderLines:
+            File.write(FileLine)
+        SortedVectors = []
+        for Vector in FeatureSet2.AllVectors:
+            SortedVectors.append((int(Vector.FileBits[1]), Vector.FileBits[6], int(Vector.FileBits[7]), Vector))
+        for Vector in FeatureSet3.AllVectors:
+            SortedVectors.append((int(Vector.FileBits[1]), Vector.FileBits[6], int(Vector.FileBits[7]), Vector))
+        SortedVectors.sort()
+        for Tuple in SortedVectors:
+            Vector = Tuple[-1]
+            Charge = int(Tuple[2])
+            if Charge > 2:
+                Model = self.Model3
+            else:
+                Model = self.Model2
+            Bits = Vector.FileBits
+            while len(Bits) <= FormatBits.ModelPValue:
+                Bits.append("")
+            Bits[FormatBits.ModelScore] = str(Vector.Score)
+            Bits[FormatBits.ModelPValue] = str(self.Model2.GetPValue(Vector.Score))
+            Str = string.join(Bits, "\t")
+            File.write(Str + "\n")                
+        File.close()
+        return
+        # Iterate over all vectors, write them all out:
+        for Vector in FeatureSet2.AllVectors:
+            Bits = Vector.FileBits
+            while len(Bits) <= FormatBits.ModelPValue:
+                Bits.append("")
+            Bits[FormatBits.ModelScore] = str(Vector.Score)
+            Bits[FormatBits.ModelPValue] = str(self.Model2.GetPValue(Vector.Score))
+            Str = string.join(Bits, "\t")
+            File.write(Str + "\n")
+        # Iterate over all vectors, write them all out:
+        for Vector in FeatureSet3.AllVectors:
+            Bits = Vector.FileBits
+            while len(Bits) <= FormatBits.ModelPValue:
+                Bits.append("")
+            Bits[FormatBits.ModelScore] = str(Vector.Score)
+            Bits[FormatBits.ModelPValue] = str(self.Model3.GetPValue(Vector.Score))
+            Str = string.join(Bits, "\t")
+            File.write(Str + "\n")
+        File.close()
+    def ParseFeatureFile(self, FilePath, FeatureSet2, FeatureSet3, DBRatio):
+        """
+        Initialize the FeatureSet object, by parsing features from the specified FilePath.
+        Facultative features go to FeatureSetF, constitutive features go to FeatureSetC
+        """
+        File = open(FilePath, "rb")
+        # Parse the header line specially:
+        HeaderLine = File.readline()
+        self.HeaderLines.append(HeaderLine)
+        Bits = HeaderLine.strip().split("\t")
+        for BitIndex in range(len(Bits)):
+            if BitIndex >= FormatBits.FirstFeature:
+                self.FeatureNames[BitIndex - FormatBits.FirstFeature] = Bits[BitIndex]
+                #if BitIndex <= FormatBits.LastFeature:
+                #    print "Feature %s: %s"%(BitIndex - FormatBits.FirstFeature, Bits[BitIndex])
+        # Iterate over non-header lines:
+        LineNumber = 0
+        for FileLine in File.xreadlines():
+            LineNumber += 1
+            if FileLine[0] == "#":
+                self.HeaderLines.append(FileLine)
+                continue # skip comment line
+            if not FileLine.strip():
+                continue # skip blank line
+            Bits = FileLine.replace("\r","").replace("\n","").split("\t")
+            # If there are TOO MANY bits, then discard the extras:
+            Bits = Bits[:FormatBits.LastFeature + 1]
+            try:
+                TrueFlag = int(Bits[FormatBits.TrueProteinFlag])
+            except:
+                continue # skip; not a valid instance line
+            Charge = int(Bits[FormatBits.Charge])
+            SisterAnnotation = Bits[FormatBits.SisterAnnotationFlag]
+            Vector = Learning.FeatureVector()
+            if Charge > 2:
+                FeatureSet = FeatureSet3
+            else:
+                FeatureSet = FeatureSet2
+            try:
+                for FeatureBitIndex in range(FormatBits.FirstFeature, FormatBits.LastFeature + 1):
+                    FeatureIndex = FeatureBitIndex - FormatBits.FirstFeature
+                    #if FeatureIndex not in self.FeaturesAll:
+                    #    continue
+                    if FeatureBitIndex < len(Bits) and Bits[FeatureBitIndex].strip() and Bits[FeatureBitIndex] != "None":
+                        Vector.Features.append(float(Bits[FeatureBitIndex]))
+                    else:
+                        Vector.Features.append(0)
+                Vector.FileBits = Bits
+                Vector.TrueFlag = TrueFlag
+                if TrueFlag:
+                    FeatureSet.TrueVectors.append(Vector)
+                else:
+                    FeatureSet.FalseVectors.append(Vector)
+                FeatureSet.AllVectors.append(Vector)
+            except:
+                traceback.print_exc()
+                print "** Error on line %s column %s of feature file"%(LineNumber, FeatureIndex)
+                print Bits
+        File.close()
+        # Initialize counts:
+        for FeatureSet in (FeatureSet2, FeatureSet3):
+            FeatureSet.SetCounts()
+            FeatureSet.GetPriorProbabilityFalse(DBRatio)
+        print "CHARGE 1,2: Read in %s true and %s false vectors"%(FeatureSet2.TrueCount, FeatureSet2.FalseCount)
+        print "CHARGE  3+: Read in %s true and %s false vectors"%(FeatureSet3.TrueCount, FeatureSet3.FalseCount)
+    def ReportAccuracy(self, SortedList, ROCCurvePlotPath = None):
+        """
+        The list should have entries of the form (ModelScore, TrueFlag)
+        We'll sort them from high model scores to low, and report how many
+        TRUE positives we have for a given FALSE DISCOVERY RATE.
+        """
+        SortedList.sort()
+        SortedList.reverse()
+        AllTrueCount = 0
+        for Tuple in SortedList:
+            AllTrueCount += Tuple[-1]
+        AllFalseCount = len(SortedList) - AllTrueCount
+        print "SortedList has %s entries, %s true"%(len(SortedList), AllTrueCount)
+        # Iterate through the list from best to worst.  Report the number of hits
+        # before false positive rate rises above 1%, and before it rises above 5%.
+        # ALSO: Compute the area under the ROC curve!
+        TrueCount = 0
+        FalseCount = 0
+        Cutoffs = (0.01, 0.03, 0.05, 0.07, 0.1)
+        HitFlags = [0] * len(Cutoffs)
+        Thresholds = [0] * len(Cutoffs)
+        BestCounts = [0] * len(Cutoffs)
+        BestCountsGenerous = [0] * len(Cutoffs)
+        PrevStuff = None
+        TopCount = 0
+        TopCountFalse = 0
+        if ROCCurvePlotPath:
+            ROCCurvePlotFile = open(ROCCurvePlotPath, "wb")
+        ROCTPForFP = {}
+        ROCTPForFPCount = {}
+        # Find the cutoff that gives a particular DISCOVERY RATE:
+        for Index in range(len(SortedList)):
+            Tuple = SortedList[Index]
+            if Tuple[-1]:
+                TrueCount += 1
+            else:
+                FalseCount += 1
+            if (TrueCount + FalseCount) <= 200:
+                TopCount = (TrueCount + FalseCount)
+                TopCountFalse = FalseCount
+            OverallTPRate = TrueCount / float(max(1, AllTrueCount))
+            OverallFPRate = FalseCount / float(max(1, AllFalseCount))
+            Bin = int(round(OverallFPRate * 100))
+            ROCTPForFP[Bin] = ROCTPForFP.get(Bin, 0) + OverallTPRate
+            ROCTPForFPCount[Bin] = ROCTPForFPCount.get(Bin, 0) + 1
+            if ROCCurvePlotPath:
+                ROCCurvePlotFile.write("%s\t%s\t%s\t%s\t%s\t\n"%(Index, TrueCount, FalseCount, OverallFPRate, OverallTPRate))
+            #print Index, Tuple[0], TrueCount, FalseCount, OverallTrueCount, OverallFalseCount, OverallTPRate, OverallFPRate
+            if Tuple[0] == PrevStuff:
+                if TopCount == (TrueCount + FalseCount - 1):
+                    TopCount = (TrueCount + FalseCount)
+                    TopCountFalse = FalseCount
+                continue
+            PrevStuff = Tuple[0]
+            FDRate = FalseCount / float(max(1, TrueCount))
+            FDRate = min(1.0, FDRate)            
+            for CutIndex in range(len(Cutoffs)):
+                if FDRate > Cutoffs[CutIndex]:
+                    HitFlags[CutIndex] = 1
+                if not HitFlags[CutIndex]:
+                    BestCounts[CutIndex] = max(BestCounts[CutIndex], TrueCount)
+                    Thresholds[CutIndex] = Tuple[0]
+                if FDRate <= Cutoffs[CutIndex]:
+                    BestCountsGenerous[CutIndex] = max(BestCountsGenerous[CutIndex], TrueCount)
+        # Compute the area under the ROC curve.
+        for Bin in range(0, 100):
+            if ROCTPForFP.has_key(Bin):
+                ROCTPForFP[Bin] /= float(ROCTPForFPCount[Bin])
+        ROCArea = 0
+        for Bin in range(0, 100):
+            if ROCTPForFP.has_key(Bin):
+                ROCArea += 0.01 * ROCTPForFP[Bin]
+                #print "%s: %s"%(Bin, ROCTPForFP[Bin])
+            else:
+                # Interpolate between points:
+                PrevX = 0 # default
+                PrevY = 0 # default
+                for PrevBin in range(Bin - 1, -1, -1):
+                    if ROCTPForFP.has_key(PrevBin):
+                        PrevX = PrevBin
+                        PrevY = ROCTPForFP[PrevBin]
+                        break
+                NextX = 100
+                NextY = 1
+                for NextBin in range(Bin + 1, 101):
+                    if ROCTPForFP.has_key(NextBin):
+                        NextX = NextBin
+                        NextY = ROCTPForFP[NextBin]
+                        break
+                InterpolatedValue = PrevY + (Bin - PrevX) * float(NextY - PrevY) / (NextX - PrevX)
+                ROCArea += 0.01 * InterpolatedValue
+        for CutIndex in range(len(Cutoffs)):
+            Sensitivity = 100 * BestCounts[CutIndex] / float(max(1, AllTrueCount))
+            print "  At %.1f%% FDRate (cutoff %.5f), got %s PTMs (sensitivity %.2f%%)"%(Cutoffs[CutIndex] * 100, Thresholds[CutIndex],
+                BestCounts[CutIndex], Sensitivity)
+            print "  ->True sensitivity: %.4f%%"%(100 * BestCounts[CutIndex] / float(max(1, AllTrueCount - AllFalseCount)))
+        print "False positive rate amoung top %s sites: %s"%(TopCount, 100*TopCountFalse/float(max(1, TopCount)))
+        print "Overall, %s true and %s false features."%(TrueCount, FalseCount)
+        print "ROC curve area: %.5f"%ROCArea
+        # The 'score' we return is a tuple giving the best accuracy at several cutoffs:
+        return (BestCounts[2], BestCounts[0], BestCounts[4], BestCounts[3], BestCounts[2])
+    def PerformFeatureSummary(self):
+        for FeatureIndex in range(len(self.Features)):
+            TrueList = []
+            for Tuple in self.TrueTuples:
+                TrueList.append(Tuple[FeatureIndex])
+            TrueList.sort()
+            (TMean, TStdDev) = BasicStats.GetMeanStdDev(TrueList)
+            FalseList = []
+            for Tuple in self.FalseTuples:
+                FalseList.append(Tuple[FeatureIndex])
+            FalseList.sort()
+            (FMean, FStdDev) = BasicStats.GetMeanStdDev(FalseList)
+            print "Feature %s (%s):"%(FeatureIndex, self.FeatureNames[FeatureIndex])
+            print "  True: Mean %.4f, stddev %.4f (range %.4f..%.4f)"%(TMean, TStdDev, TrueList[0], TrueList[-1])
+            print "  False: Mean %.4f, stddev %.4f (range %.4f..%.4f)"%(FMean, FStdDev, FalseList[0], FalseList[-1])
+    def TrainOneFeature(self, TrainingSet):
+        """
+        Compute accuracy for a very simple-minded model:
+        Rank sites by the value of a SINGLE FEATURE (descending order)
+        """
+        for FeatureIndex in range(FormatBits.FeatureCount):
+            SortedList = []
+            for Vector in TrainingSet.TrueVectors:
+                SortedList.append((Vector.Features[FeatureIndex], random.random(), 1))
+            for Vector in TrainingSet.FalseVectors:
+                SortedList.append((Vector.Features[FeatureIndex], random.random(), 0))
+            # And report the accuracy of this lonely feature:
+            print
+            print "Feature %s (%s):"%(FeatureIndex, self.FeatureNames[FeatureIndex])
+            self.ReportAccuracy(SortedList)
+    def ParseCommandLine(self, Arguments):
+        (Options, Args) = getopt.getopt(Arguments, "m:u:v:r:w:f:e:R:D:")
+        OptionsSeen = {}
+        for (Option, Value) in Options:
+            OptionsSeen[Option] = 1
+            if Option == "-m":
+                self.ModelType = Value.lower()
+            elif Option == "-D":
+                self.TrainingSetDBRatio = float(Value)
+            elif Option == "-r":
+                if not os.path.exists(Value):
+                    print "** Error: Model file '%s' not found for reading.\n"%Value
+                    return 0
+                self.ReadModelFilePath2 = "%s.2"%Value
+                self.ReadModelFilePath3 = "%s.3"%Value
+            elif Option == "-w":
+                #self.WriteModelFilePath = Value
+                self.WriteModelFilePath2 = "%s.2"%Value
+                self.WriteModelFilePath3 = "%s.3"%Value
+            elif Option == "-u":
+                if not os.path.exists(Value):
+                    print "** Error: Feature file '%s' not found for reading.\n"%Value
+                    return 0
+                self.InputFeaturePath = Value
+            elif Option == "-v":
+                self.OutputFeaturePath = Value
+            elif Option == "-e":
+                self.ModelTestFilePath = Value
+            elif Option == "-f":
+                self.FeatureSelectionFlag = int(Value)
+            elif Option == "-R":
+                self.ReportROCPath = Value
+            else:
+                print "* Error: Unrecognized option %s"%Option
+                return 0
+        return 1 # success
+
+if __name__ == "__main__":
+    try:
+        import psyco
+        psyco.full()
+    except:
+        print "(psyco not installed; running unoptimized)"
+    Trainer = PTMFeatureTrainer()
+    Result = Trainer.ParseCommandLine(sys.argv[1:])
+    if not Result:
+        sys.exit(-1)
+    if Trainer.ModelType:
+        Trainer.TrainModel()
+        sys.exit()
+    print UsageInfo
+    sys.exit(-1)
+    
+    
diff --git a/Trie.c b/Trie.c
new file mode 100644
index 0000000..80407d8
--- /dev/null
+++ b/Trie.c
@@ -0,0 +1,2659 @@
+//Title:          Trie.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include "Trie.h"
+#include "Utils.h"
+#include <memory.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h> // for fabs
+#include <ctype.h>
+#include "Spectrum.h"
+#include "Mods.h"
+#include "Score.h"
+#include "Tagger.h"
+#include "BN.h"
+#include "Scorpion.h"
+#include "Errors.h"
+#include "SVM.h"
+#include "LDA.h"
+
+// If two tags have the same peptides, and are within this amount on prefix/Suffix masses, then
+// consider them identical and only use the top scorer. 
+// 1.5 daltons:
+#define IDENTICAL_TAG_EPSILON 1500
+
+// Number of chars allowed in a post-translational modification name.  ('phosphorylation' is a typical name)
+#define MAX_PTMOD_NAME 256
+
+#define TRIE_INDEX_RECORD_SIZE (LONG_LONG_SIZE + sizeof(int) + 80*sizeof(char))
+#define SPLICEDB_INDEX_RECORD_SIZE (LONG_LONG_SIZE + sizeof(int) + 80*sizeof(char))
+
+// Global variable storing configurable options:
+Options* GlobalOptions;
+InspectStats* GlobalStats;
+
+////////////////////////////////////////////////////////////////////////////////////////
+// Forward declarations:
+void FreeTrieTagHangerList(TrieTagHanger* Head, int FreeTags);
+void FlagMandatoryModUsage(TrieNode* Node);
+int ExtendTagMatchBlind(SearchInfo* Info, TrieNode* Node, char* Buffer, int BufferPos, int BufferEnd, int FilePos);
+int ProcessGeneHitsBlindTag();
+int InsertBlindTagMatch(BlindTagMatch* Match);
+void FreeBlindTagMatch(BlindTagMatch* This);
+void FreeAllBlindTagMatches(BlindTagMatch* This);
+int IsIdenticalBlindTagMatches(BlindTagMatch* NodeA, BlindTagMatch* NodeB);
+
+Peptide* FindMatchedPeptide(char* Bases);
+void AddPTMassesToTagTable(int TagTableSize, char* CurrentTag, float Mass, char FirstAllowedPep, int CharsLeft, int ModsLeft,
+                           int Peptide, int MinMod);
+
+
+// Indexed by characters.  (Use upper-case amino acid codes)
+int StandardPeptideMass[256];
+// PeptidMass may be different from StandardPeptideMass if a fixed modification
+// (e.g. +57 to all cysteine residues) has been applied.  
+int PeptideMass[256];
+
+// A decoration is a collection of post-translational modification.  This includes the
+// 'empty decoration', with no modifications, and mass 0.  Each decoration has an index;
+// they are ordered from smallest mass to largest.
+
+// Size of the decoration array:
+int DecorationMassCount;
+
+// Mass of each decoration:
+float* DecorationMasses;
+
+// Largest mass over all our decorations
+float DecorationMaxMass;
+
+// DecorationMassMods[DecorationIndex][n] is the index of the nth mod used in a particular decoration.
+// For decorations that use fewer than the maximum allowed number of mods, we store an index of -1.
+int** DecorationMassMods;
+
+// DecorationModCount[DecorationIndex] is the number of mods used in a decoration
+int* DecorationModCount;
+
+// PTModCount lists how many post-translational mods exist for each peptide.  (Faster than iterating
+// over a full 2D table of flags).  Indexed by peptide-char (entry #0 is alanine)
+int PTModCount[TRIE_CHILD_COUNT];
+
+// How many modifications are there, in all?
+int TotalPTMods;
+
+// SubDecorations tells how to get to a sub-decoration (a decoration containing fewer post-translational
+// modificaitons) from a parent decoration.  SubDecorations[DecorIndex][Modification] is the index 
+// of the decoration Decor with one such modification removed.  SubDecorations entries are -1 if the specified
+// mod isn't part of the specified decoration.
+int** SubDecorations;
+
+// PTMods lists the mass of each post-translational mod for each peptide.  (Redundant storage,
+// for fast lookups)
+float PTModMass[TRIE_CHILD_COUNT][MAX_PT_MODTYPE];
+
+// PTMods lists the index of each post-translational mod for each peptide.  (So, PTMods[0][0] is the 
+// first modification available to alanine
+int PTModIndex[TRIE_CHILD_COUNT][MAX_PT_MODTYPE];
+
+// Names of all known PTMods.  
+char PTModName[MAX_PT_MODTYPE][MAX_PTMOD_NAME];
+float ModMasses[MAX_PT_MODTYPE];
+
+//BlindTagMatch Pointers for the list of matches to a single Gene
+BlindTagMatch* FirstBlindTag = NULL;
+BlindTagMatch* LastBlindTag = NULL;
+
+void InitStats()
+{
+    if (GlobalStats)
+    {
+        memset(GlobalStats, 0, sizeof(InspectStats));
+    }
+    else
+    {
+      GlobalStats = (InspectStats*)calloc(1, sizeof(InspectStats));
+    }
+}
+
+// Set global options to reasonable default values:
+void InitOptions()
+{
+  GlobalOptions = (Options*)calloc(1, sizeof(Options));
+    GlobalOptions->MaxPTMods = 0; 
+    GlobalOptions->Epsilon = DEFAULT_EPSILON;
+    GlobalOptions->FlankingMassEpsilon = DEFAULT_FLANKING_MASS_EPSILON; 
+    GlobalOptions->OutputFile = stdout;
+    sprintf(GlobalOptions->ErrorFileName, "inspect.err");
+    GlobalOptions->ErrorCount = 0;
+    GlobalOptions->WarningCount = 0;
+    GlobalOptions->ReportAllMatches = 1;
+    GlobalOptions->ParentMassEpsilon = DEFAULT_PARENT_MASS_EPSILON;
+    GlobalOptions->ParentMassPPM = DEFAULT_PARENT_MASS_PPM;
+    GlobalOptions->ReportMatchCount = 10; // Don't report more than 10 to the page!
+    GlobalOptions->StoreMatchCount = 100;  // 
+    GlobalOptions->MandatoryModIndex = -1; // By default, there is no mandatory modification
+    GlobalOptions->GenerateTagCount = 100; 
+    GlobalOptions->GenerateTagLength = DEFAULT_TAG_LENGTH;
+    GlobalOptions->DynamicRangeMin = 105 * DALTON; 
+    GlobalOptions->DynamicRangeMax = 2000 * DALTON; 
+    GlobalOptions->TrieBlockSize = 250;
+    GlobalOptions->TagPTMMode = 2;
+    //strcpy(GlobalOptions->AminoFileName, FILENAME_AMINO_ACID_MASSES);
+    sprintf(GlobalOptions->InputFileName, "Input.txt");
+    GlobalOptions->MinPTMDelta = -200;
+    // Default maxptmdelta is 250, this allows us to find GlcNac (203) and biotin (226)
+    GlobalOptions->MaxPTMDelta = 250;
+    GlobalOptions->DeltaBinCount = (GlobalOptions->MaxPTMDelta - GlobalOptions->MinPTMDelta) * 10 + 1;
+    GlobalOptions->DeltasPerAA = max(512, GlobalOptions->DeltaBinCount * 2);
+    GlobalOptions->NewScoring = 0;
+    GlobalOptions->MinLogOddsForMutation = -100; //A sufficiently small number so that no guys are omitted
+    
+}
+
+//constructor for a new BlindTagMatch
+BlindTagMatch* NewBlindTagMatch()
+{
+    BlindTagMatch* This;
+    This = (BlindTagMatch*)calloc(1, sizeof(BlindTagMatch));
+    This->Next = NULL; //set a few pointers up for tidyness
+    This->Prev = NULL;
+    return This;
+}
+//destructor for BlindTagMatch.  This frees ALL connected nodes
+//following the next pointer
+void FreeAllBlindTagMatches(BlindTagMatch* This)
+{
+    This->Tag = NULL;  //free pointer to tag, but KEEP TAG
+    This->Prev = NULL;
+    if (This->Next){
+        FreeAllBlindTagMatches(This->Next);
+    }
+    This->Next = NULL;
+    SafeFree(This);
+}
+//This destructor assumes that the links have been
+//previously nullified, and the linked list fixed
+//and ready for this node to be wipedout
+void FreeBlindTagMatch(BlindTagMatch* This)
+{
+    This->Tag = NULL;//free pointer to tag, but KEEP TAG, it's part of the trie
+    This->Next = NULL;
+    This->Prev = NULL;
+    SafeFree(This);
+}
+
+
+// Constructor for a new TrieNode
+TrieNode* NewTrieNode()
+{
+    TrieNode* This;
+    int Index;
+    This = (TrieNode*)calloc(1, sizeof(TrieNode));
+    
+    return This;
+}
+
+
+// Free a trie node.  Also frees its tag-nodes (if any), and recursively frees its children.
+void FreeTrieNode(TrieNode* This)
+{
+    int Letter;
+    if (!This)
+    {
+        return;
+    }
+    // Free our tag nodes:
+    FreeTrieTagHangerList(This->FirstTag, 1);
+
+    // Free our children too!  Free them only AFTER
+    // we iterate over them, since Node->Next must be
+    // valid at the end of each loop-cycle.
+    for (Letter = 0; Letter < TRIE_CHILD_COUNT; Letter++)
+    {
+        // Nodes I and K always point to the same child as L and Q, respectively.
+        // So...don't free them twice!
+        if (Letter == ('I'-'A') || Letter == ('K'-'A'))
+        {
+            continue;
+        }
+        if (This->Children[Letter])
+        {
+            FreeTrieNode(This->Children[Letter]);
+        }
+    }
+
+    // Ok, now free ourselves:
+    SafeFree(This);
+}
+
+// Constructor for a TrieTag
+TrieTag* NewTrieTag()
+{
+    TrieTag* This;
+    int ModIndex;
+    //
+    This = (TrieTag*)calloc(sizeof(TrieTag), 1);
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        This->AminoIndex[ModIndex] = -1;
+    }
+    This->DBTagMatches = 0;
+    This->PrefixExtends = 0;
+    This->SuffixExtends = 0;
+    return This;
+}
+
+// Destructor for a TrieTag
+void FreeTrieTag(TrieTag* This)
+{
+    SafeFree(This);
+}
+
+// Trie construction helper function.
+// We've got a node on the Trie which completely matches a tag.  So,
+// add this tag to the list of TrieTagNodes on this TrieNode.
+TrieTagHanger* TrieNodeAddTagNode(TrieNode* Node, TrieTag* Tag, int* DuplicateFlag)
+{
+    TrieTagHanger* Hanger;
+    TrieTag* LocalTag;
+    
+    // Look at our current list of tags for the node.
+    // DON'T add this tag if we already have the same pre-and-post masses.
+    // (We normally add tags in order from best to worst, so in the event that
+    // two tags are quit similar, we keep the one with the higher score)
+    //printf("Adding tag '%s' %.2f...\n", Tag->Tag, Tag->PrefixMass); 
+    for (Hanger = Node->FirstTag; Hanger; Hanger = Hanger->Next)
+    {
+        if (Hanger->Tag->PSpectrum == Tag->PSpectrum)
+        {
+            if ((abs(Hanger->Tag->SuffixMass - Tag->SuffixMass) < IDENTICAL_TAG_EPSILON) &&
+                (abs(Hanger->Tag->PrefixMass - Tag->PrefixMass) < IDENTICAL_TAG_EPSILON))
+            {
+                // Prefer the new prefix/suffix, if this new tag scores higher:
+                if (Hanger->Tag->Score < Tag->Score)
+                {
+                    Hanger->Tag->PrefixMass = Tag->PrefixMass;
+                    Hanger->Tag->SuffixMass = Tag->SuffixMass;
+                }
+                *DuplicateFlag = 1;
+                return Hanger;
+            }
+        }
+    }
+
+    Hanger = NewTrieTagHanger();
+    LocalTag = NewTrieTag();
+    memcpy(LocalTag, Tag, sizeof(TrieTag));
+    Hanger->Tag = LocalTag;
+    //Hanger->Tag = Tag;
+    if (Node->LastTag)
+    {
+        Node->LastTag->Next = Hanger;
+        Hanger->Prev = Node->LastTag;
+    }
+    else
+    {
+        Node->FirstTag = Hanger;
+    }
+    Node->LastTag = Hanger;
+    *DuplicateFlag = 0;
+    return Hanger;
+}
+
+// We've got a tag - add it to the trie.
+TrieNode* AddTagToTrie(TrieNode* Root, TrieTag* Tag, int* DuplicateFlag)
+{
+    TrieNode* Node;
+    TrieNode* NextNode;
+    int Index;
+    int TagLength;
+    char TagChar;
+
+    int Index2;
+
+    //printf("Adding tag: %s\n",Tag->Tag);
+    // 
+    // First, travel down the trie, matching the specified tag as far as possible (perhaps completely):
+    Index = 0;
+    TagLength = Tag->TagLength; 
+    Node = Root;
+
+    //printf("Gene: %s\n",Gene->Name);
+    //    printf("**Root: %p\n",Root);
+    //for(Index2 = 0; Index2 < TRIE_CHILD_COUNT; ++Index2)
+    // {
+    //	printf(" Child[%c] = %p\n",Index2 + 'A',Root->Children[Index2]);
+    // }
+    //getchar();
+
+    //fflush(stdout);
+
+    while (1)
+    {
+        TagChar = Tag->Tag[Index];
+        if (TagChar == 'I')
+        {
+            TagChar = 'L';
+        }
+        else if (TagChar == 'K')
+        {
+            TagChar = 'Q';
+        }
+        // Look up our child for this letter:
+        NextNode = Node->Children[TagChar - 'A'];
+        if (!NextNode)
+        {
+	  
+            // Ok, we matched as far as possbile - next we'll add children to match the remainder of the tag.
+            break;
+        }
+        Node = NextNode;
+        Index++;
+        // Did we match the tag completely?
+        if (Index == TagLength)
+        {
+            // Aha - this tag is in the trie!  Add the tag to the list:
+	  //printf("Tag is already in trie!!\n");
+	  //fflush(stdout);
+	  //getchar();
+            TrieNodeAddTagNode(Node, Tag, DuplicateFlag);
+            return Node;  
+        }
+    }
+    // Ok, we didn't match the entire tag...so, start adding child nodes now!
+    while (Index < TagLength)
+    {
+        NextNode = NewTrieNode();
+        NextNode->Letter = Tag->Tag[Index];
+        NextNode->Depth = Index + 1;
+        Node->Children[Tag->Tag[Index] - 'A'] = NextNode;
+	//printf("Adding trans %c to node %p\n",Tag->Tag[Index],Node);
+	//printf("Child[%c] = %p = %p\n",Tag->Tag[Index],Node->Children[Tag->Tag[Index]-'A'],Root->Children[Tag->Tag[Index] - 'A']);
+
+        // Extra child I and L to same place, K and Q to same place.
+        switch (Tag->Tag[Index])
+        {
+        // Special case for aminos with same mass (I and L equal, K and Q are off by <.1):
+        // Child pointers for I and L, and for K and Q both point to the same place.
+        case 'I':
+            Node->Children['L'-'A'] = NextNode;
+            break;
+        case 'L':
+            Node->Children['I'-'A'] = NextNode;
+            break;
+        case 'K':
+            Node->Children['Q'-'A'] = NextNode;
+            break;
+        case 'Q':
+            Node->Children['K'-'A'] = NextNode;
+            break;
+        default:
+            break;
+        }
+        Node = NextNode;
+        Index++;
+    }
+
+    //printf("**Root: %p\n",Root);
+    //for(Index2 = 0; Index2 < TRIE_CHILD_COUNT; ++Index2)
+    // {
+    //	printf(" Child[%c] = %p\n",Index2 + 'A',Root->Children[Index2]);
+    // }
+    //getchar();
+
+    //    fflush(stdout);
+    TrieNodeAddTagNode(Node, Tag, DuplicateFlag);
+
+    return Node;
+}
+
+// Constructor for a TagHanger
+TrieTagHanger* NewTrieTagHanger()
+{
+    TrieTagHanger* This;
+    //
+    This = (TrieTagHanger*)calloc(1, sizeof(TrieTagHanger));
+    return This;
+}
+
+// Destructor for a TagHanger
+void FreeTrieTagHanger(TrieTagHanger* This)
+{
+    SafeFree(This);
+}
+
+// Destructor for a TagHanger list
+void FreeTrieTagHangerList(TrieTagHanger* Head, int FreeTags)
+{
+    TrieTagHanger* Prev = NULL;
+    //
+    for (; Head; Head = Head->Next)
+    {
+        if (Prev)
+        {
+            if (FreeTags)
+            {
+                FreeTrieTag(Prev->Tag);
+            }
+            FreeTrieTagHanger(Prev);
+        }
+        Prev = Head;
+    }
+    if (Prev)
+    {
+        if (FreeTags)
+        {
+            FreeTrieTag(Prev->Tag);
+        }
+        FreeTrieTagHanger(Prev);
+    }
+}
+
+// Prints a Trie node, using indentation to denote depth.
+// The entry point is DebugPrintTrie, which calls this function.
+int DebugPrintTrieHelper(TrieNode* Root, char* TagSoFar)
+{
+    char Buffer[1024];
+    char TagBuffer[1024];
+    int Index;
+    int BufferPos;
+    int TagLength;
+    TrieTagHanger* Node;
+    int Letter;
+    int TagCount = 0;
+    //
+    TagLength = strlen(TagSoFar);
+    BufferPos = 0;
+    for (Index = 0; Index < TagLength; Index++)
+    {
+        Buffer[BufferPos++]=' ';
+        Buffer[BufferPos++]=' ';
+        Buffer[BufferPos++]=' ';
+    }
+    Buffer[BufferPos] = '\0';
+    //
+    strcpy(TagBuffer, TagSoFar);
+    if (Root->Letter)
+    {
+        BufferPos = strlen(TagBuffer);
+        TagBuffer[BufferPos++] = Root->Letter;
+        TagBuffer[BufferPos++] = '\0';
+    }
+    printf("%s%s\n", Buffer, TagBuffer);
+
+    // Print attached tags:
+    for (Node = Root->FirstTag; Node; Node = Node->Next)
+    {
+        printf("%s%s: prefix %d,suffix %d mods %d\n", Buffer, Node->Tag->Tag, Node->Tag->PrefixMass,Node->Tag->SuffixMass, Node->Tag->ModsUsed);
+        TagCount++;
+    }
+    if (Root->FailureNode)
+    {
+        printf("%s Failure: Skip %d, depth %d\n", Buffer, Root->FailureLetterSkip, Root->FailureNode->Depth);
+    }
+    else
+    {
+        printf("%s (no failure node set)\n", Buffer);
+    }
+
+    // Print children:
+    for (Letter = 0; Letter < TRIE_CHILD_COUNT; Letter++)
+    {
+        if (Root->Children[Letter])
+        {
+            
+            TagCount += DebugPrintTrieHelper(Root->Children[Letter], TagBuffer);
+        }
+    }
+    return TagCount;
+}
+
+// Print out a trie and all its nodes to stdout.
+void DebugPrintTrie(TrieNode* Root)
+{
+    int TagCount;
+
+    printf("-->Trie:\n");
+    TagCount = DebugPrintTrieHelper(Root, "");
+    printf("Total tags: %d\n", TagCount);
+    printf("(end of trie nodes)\n");
+}
+
+// Set up all the failure nodes for our trie
+// The failure node for a trie node is the node you jump to when you're currently
+// matching that trie node, but then break a match because none of your children
+// match the *next* character.  If another trie node matches a substring of this
+// node (not a prefix, but any other substring), we must try matching that node as 
+// well.
+// Example: Suppose we have nodes ABCDE and BCD, and we're scanning text ABCDF.
+// when we reach F, we jump to ABCDE's failure node BCD, and move the anchor to B.
+void InitializeTrieFailureNodes(TrieNode* Root, TrieNode* Node, char* Tag)
+{
+    TrieNode* FailureNode;
+    int Letter;
+    int TagLength;
+    int StartIndex = 0;
+    int EndIndex = 0;
+    //
+    TagLength = strlen(Tag);
+    if (!Root)
+    {
+        return;
+    }
+    if (Node == Root)
+    {
+        // Failure on the root means the letter can't start a tag; no speedup, just step forward:
+        Root->FailureNode = Root;
+        Root->FailureLetterSkip = 1;
+    }
+    else
+    {
+        // There's a real tag.  Navigate to the SHORTEST node-with-tags which matches a suffix of our tag.
+        // Try knocking off one letter, then two, and so on:
+        for (StartIndex=1; StartIndex<TagLength; StartIndex++)
+        {
+            FailureNode = Root;
+            for (EndIndex = StartIndex; EndIndex<TagLength; EndIndex++)
+            {
+                Letter = Tag[EndIndex];
+                if (Letter == 'I')
+                {
+                    Letter = 'L';
+                }
+                if (Letter == 'Q')
+                {
+                    Letter = 'K';
+                }
+                if (!FailureNode->Children[Letter - 'A'])
+                {
+                    // We can't go deeper in the trie...and we saw NO TAGS!  So, we needn't go here.
+                    // Suppose you have tags GGGROOVY and GROOVY.  After matching GGGROOVY, you can
+                    // jump to the 3rd G (you needn't handle the 2nd, even though it matches
+                    // partway down the trie).  Of course, if you had GGGROOVY, GROOVY and GGROO, you would only
+                    // jump to the 2nd G - because we'd have found a tag in here..
+                    FailureNode = Root;
+                    break; 
+                }
+                FailureNode = FailureNode->Children[Letter - 'A']; // move down the tree.
+                // If there are tags, STOP NOW.  (Don't jump from PANTS->ANTS if ANT has a tag)
+                if (FailureNode->FirstTag)
+                {
+                    break;
+                }
+            }
+            // If we're not pointing at root, then we're pointing at a good failure node:
+            if (FailureNode != Root)
+            {
+                Node->FailureNode = FailureNode;
+                Node->FailureLetterSkip = StartIndex;
+                break;
+            }
+        }
+        if (!Node->FailureNode)
+        {
+            // Hmm...no good failure nodes found?  That means we can jump forward over our full tag!
+            Node->FailureNode = Root;
+            Node->FailureLetterSkip = strlen(Tag);
+        }
+    }
+    // Now, handle all our children:
+    for (Letter = 0; Letter < TRIE_CHILD_COUNT; Letter++)
+    {
+        if (Node->Children[Letter])
+        {
+            Tag[TagLength] = 'A'+Letter;
+            Tag[TagLength+1] = '\0';
+            InitializeTrieFailureNodes(Root, Node->Children[Letter], Tag);
+        }
+    }
+}
+
+// Constructor for a Peptide
+Peptide* NewPeptideNode()
+{
+    Peptide* This;
+    This = (Peptide*)calloc(1, sizeof(Peptide));
+    if (!This)
+    {
+        printf("** Fatal error: Unable to allocate a new peptide!\n");
+        return NULL;
+    }
+    memset(This->AminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+    return This;
+}
+
+// Destructor for a Peptide
+void FreePeptideNode(Peptide* Pep)
+{
+    PeptideMatch* Node;
+    PeptideMatch* Prev = NULL;
+    PeptideSpliceNode* PSNode;
+    PeptideSpliceNode* PSPrev;
+    if (!Pep)
+    {
+        return;
+    }
+    // Free the list of PeptideSpliceNodes, starting with SpliceHead:
+    PSPrev = NULL;
+    for (PSNode = Pep->SpliceHead; PSNode; PSNode = PSNode->Next)
+    {
+        SafeFree(PSPrev);
+        PSPrev = PSNode;
+    }
+    SafeFree(PSPrev);
+    // Free the list of PeptideMatch instances, starting with First:
+    for (Node = Pep->First; Node; Node = Node->Next)
+    {
+        SafeFree(Prev);
+        Prev = Node;
+    }
+    SafeFree(Prev);
+    SafeFree(Pep->SplicedBases);
+    SafeFree(Pep->PetDelta);
+    SafeFree(Pep);
+}
+
+MassDelta* GetPeptideModFromAnnotation(Peptide* Match, char* ModBuffer, int ModCount, int AminoIndex)
+{
+    int MaxModsFromParsedPeptide = 10;
+    MassDelta* Delta;
+    //
+    if (!Match->PetDelta)
+    {
+      Match->PetDelta = (MassDelta*)calloc(MaxModsFromParsedPeptide, sizeof(MassDelta));
+    }
+    if (ModCount >= MaxModsFromParsedPeptide)
+    {
+        return NULL;
+    }
+    Delta = Match->PetDelta + ModCount;
+    Delta->Flags = DELTA_FLAG_VALID;
+    if(!CompareStrings(ModBuffer,"phos"))
+    { //necessary switch, cannot do atoi("phos") and expect real numbers
+        Delta->RealDelta = 80 * DALTON;
+        Delta->Flags |= DELTA_FLAG_PHOSPHORYLATION;
+        Match->SpecialFragmentation = FRAGMENTATION_PHOSPHO; // special flags we need
+        Match->SpecialModPosition = Match->AminoIndex[ModCount];
+    }
+    else
+    {
+        Delta->RealDelta = atoi(ModBuffer) * DALTON;
+    }
+    Delta->Delta = Delta->RealDelta / 100; // tenth-of-a-dalton
+    Match->AminoIndex[ModCount] = AminoIndex - 1;
+    Match->ModType[ModCount] = Delta;
+    return Delta;
+}
+
+// Produce a peptide from an annotation string.  The annotation string
+// consists of amino acids, plus - possibly - some modification masses.
+// Valid examples: 
+// GPLLVQDVVFTDEMAHFDR
+// VLVLDTDY+16KK
+// SVTDC-2TSNFCLFQSNSK
+Peptide* GetPeptideFromAnnotation(char* Annotation)
+{
+    char ModBuffer[32];
+    int AminoIndex = 0;
+    int ModCount = 0;
+    int ModBufferPos;
+    Peptide* Match;
+    MassDelta* Delta;
+    int PRM = 0;
+    char* BaseAnnotation;
+    int MaxModsFromParsedPeptide = 10;
+    //
+    if (!Annotation)
+    {
+        return NULL;
+    }
+    ModBufferPos = 0;
+    Match = NewPeptideNode();
+    BaseAnnotation = Annotation;
+    if (BaseAnnotation[1] == '.')
+    {
+        Match->PrefixAmino = BaseAnnotation[0];
+        Annotation += 2;
+    }
+    while (*Annotation)
+    {
+        if ((*Annotation >= 'A' && *Annotation <= 'Z') || *Annotation == '.')
+        {
+            // It's an amino acid, or period.  
+            // Finish any pending mod:
+            if (ModBufferPos)
+            {
+                ModBuffer[ModBufferPos] = '\0';
+                Delta = GetPeptideModFromAnnotation(Match, ModBuffer, ModCount, AminoIndex);
+                if (!Delta)
+                {
+                    printf("*** Warning: Invalid modifications in '%s', not parsing\n", Annotation);
+                    FreePeptideNode(Match);
+                    return NULL;
+                }
+                PRM += Delta->RealDelta;
+                ModBufferPos = 0;
+                ModCount += 1;
+                // Bail out if we have too many PTMs to cope with:
+                if (ModCount == MAX_PT_MODS)
+                {
+                    return NULL;
+                }
+            }
+            // It's a dot - set the prefix and break:
+            if (*Annotation == '.')
+            {
+                Match->SuffixAmino = *(Annotation + 1);
+                break;
+            }
+            // It's an amino acid - add the AA mass:
+            Match->Bases[AminoIndex++] = *Annotation;
+            PRM += PeptideMass[*Annotation];
+        }
+        else
+        {
+            ModBuffer[ModBufferPos++] = *Annotation;
+        }
+        Annotation++;
+    }
+    Match->Bases[AminoIndex] = '\0';
+    // Finish any pending mod:
+    if (ModBufferPos)
+    {
+        ModBuffer[ModBufferPos] = '\0';
+        Delta = GetPeptideModFromAnnotation(Match, ModBuffer, ModCount, AminoIndex);
+        if (!Delta)
+        {
+            printf("*** Warning: Invalid modifications in '%s', not parsing\n", Annotation);
+            FreePeptideNode(Match);
+            return NULL;
+        }
+        PRM += Delta->RealDelta;
+        ModBufferPos = 0;
+        ModCount += 1;
+        // Bail out if we have too many PTMs to cope with:
+        if (ModCount == MAX_PT_MODS)
+        {
+            return NULL;
+        }
+    }
+    Match->ParentMass = PRM + PARENT_MASS_BOOST;
+    return Match;
+}
+
+
+int GetPeptideParentMass(Peptide* Match)
+{
+    int Mass = PARENT_MASS_BOOST;
+    char* Amino;
+    int ModIndex;
+    for (Amino = Match->Bases; *Amino; Amino++)
+    {
+        Mass += PeptideMass[*Amino];
+    }
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (Match->AminoIndex[ModIndex] >= 0)
+        {
+            Mass += Match->ModType[ModIndex]->RealDelta;
+        }
+    }
+    Match->ParentMass = Mass;
+    return Mass;
+}
+
+int LoadPeptideMassesCallback(int LineNumber, int FilePos, char* LineBuffer, void* UserData)
+{
+    float Mass;
+    char* Str;
+    char Letter;
+
+ 
+    // Name:
+    Str = strtok(LineBuffer, " ");
+    // 3-letter abbrev:
+ 
+    Str = strtok(NULL, " ");
+    if (!Str)
+    {
+        REPORT_ERROR(0);
+        return 0;
+    }
+ 
+    // 1-letter abbrev:
+    Str = strtok(NULL, " ");
+    if (!Str)
+    {
+        REPORT_ERROR(0);
+        return 0;
+    }
+ 
+    Letter = Str[0];
+    // mass:
+    Str = strtok(NULL, " ");
+    if (!Str)
+    {
+        REPORT_ERROR(0);
+        return 0;
+    }
+ 
+    Mass = (float)atof(Str);
+    ROUND_MASS(Mass, StandardPeptideMass[Letter]);
+ 
+    return 1;
+}
+
+// Read peptide masses from a file.
+int LoadPeptideMasses(char* FileName)
+{
+    FILE* AAFile;
+    //
+    if (!FileName)
+    {
+        // Use a sensible default:
+        FileName = FILENAME_AMINO_ACID_MASSES;
+    }
+    AAFile = fopen(FileName, "r");
+    if (!AAFile)
+    {
+        REPORT_ERROR_S(8, FileName);
+        return 0;
+    }
+    ParseFileByLines(AAFile, LoadPeptideMassesCallback, NULL, 0);
+
+    // This absurdly high mass for the delimiter, *, ensures that it won't be part of a match:
+    StandardPeptideMass[42] = 9999999;
+    memcpy(PeptideMass, StandardPeptideMass, sizeof(int) * 256);
+    return 1;
+}
+
+
+// We read in large chunks of the file at once.  When we get past SCAN_BUFFER_A, it's time to shunt what we've got
+// to the front of the buffer.  And if our buffer ends before SCAN_BUFFER_B, we try to read more data (until
+// we reach eof)
+#define SCAN_BUFFER_SIZE 5242880
+#define SCAN_BUFFER_A 5232680
+#define SCAN_BUFFER_B 5242680
+#define RECORD_END '*'
+ 
+// We have matched a tag in the peptide database, and the flanking series (plus some PTMods) matches our
+// flanking mass.  Check to be sure these PTMods can be attached to the flanking series.  (For example:
+// if the flanking sequence AAG plus a phosphate mass matches our prefix mass, that's NOT a match, because
+// neither glycine nor alanine is phosphorylatable)
+// Simplification: Assume that multiple PTMods can be attached to one base.  (This assumption isn't always valid, but
+// it's nontrivial to know when it is; the user can toss out any unreasonable constructs later)
+// From Start to End, INCLUSIVE.
+int CheckForPTAttachmentPoints(int DecorationMassIndex, char* Buffer, int Start, int End, int BufferDir)
+{
+    int ModIndex;
+    int ModsLeft[MAX_PT_MODTYPE];
+    int BufferPos;
+    int Done;
+    int PeptideIndex;
+    int Legal;
+
+    memcpy(ModsLeft, AllDecorations[DecorationMassIndex].Mods, sizeof(int)*MAX_PT_MODTYPE);
+    for (BufferPos = Start; BufferPos <= End; BufferPos++)
+    {
+        Done = 1; //by default
+        PeptideIndex = Buffer[BufferPos] - 'A';
+        for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+        {
+            if (ModsLeft[ModIndex])
+            {
+                Legal = 1;
+                // Avoid attaching a C-terminal PTM, if we're not at the C terminus:
+                if (AllKnownPTMods[ModIndex].Flags & DELTA_FLAG_C_TERMINAL)
+                {
+                    if (BufferDir < 0 || BufferPos != End)
+                    {
+                        Legal = 0;
+                    }
+                }
+                // Avoid attaching an N-terminal PTM, if we're not at the N terminus:
+                if (AllKnownPTMods[ModIndex].Flags & DELTA_FLAG_N_TERMINAL)
+                {
+                    if (BufferDir > 0 || BufferPos != Start)
+                    {
+                        Legal = 0;
+                    }
+                }
+                if (Legal)
+                {
+                    ModsLeft[ModIndex] = max(0, ModsLeft[ModIndex] - AllKnownPTMods[ModIndex].Allowed[PeptideIndex]);
+                }
+                if (ModsLeft[ModIndex])
+                {
+                    Done = 0;
+                }
+            }
+        }
+        if (Done)
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+// MAX_SIDE_MODS is how many flanking matches we're allowed for an initial tag match.
+// (For instance: The preceding aminos may match with no PTMs, or we may be able to match
+// with one fewer amino and a PTM)
+#define MAX_SIDE_MODS 10
+int LeftMatchPos[MAX_SIDE_MODS];
+int LeftMatchDecoration[MAX_SIDE_MODS];
+int RightMatchPos[MAX_SIDE_MODS];
+int RightMatchDecoration[MAX_SIDE_MODS];
+
+// MatchFlankingMass is called when we matched a trie tag, and we are checking whether the
+// flanking amino acids match our prefix or suffix mass.
+// WARNING: If there are two or more decorations with the same mass, this method will FAIL, because we'll only
+// consider ONE such decoration.
+int MatchFlankingMass(MSSpectrum* Spectrum, TrieTag* Tag, char* Buffer, int StartPos, int BufferDir, int BufferEnd, int MatchMass, int ModsRemaining)
+{
+    int MatchCount = 0;
+    int Pos;
+    int Mass;
+    int Diff;
+    int AbsDiff;
+    int FlankingMass;
+    int MandatoryDecorationChange = 0;
+    int DecorationMassIndex;
+    int Verbose = 0;
+    int* MatchPos;
+    int* MatchDecoration;
+    int MinMatchMass = MatchMass - GlobalOptions->FlankingMassEpsilon;
+    //
+    if (BufferDir<0)
+    {
+        MatchPos = LeftMatchPos;
+        MatchDecoration = LeftMatchDecoration;
+    }
+    else
+    {
+        MatchPos = RightMatchPos;
+        MatchDecoration = RightMatchDecoration;
+    }
+
+    /////////////////////////////////////////////////////////
+    // If prefix mass is zero, that qualifies as a match always.
+    if (MatchMass < GlobalOptions->FlankingMassEpsilon) 
+    {
+        MatchPos[MatchCount] = StartPos - BufferDir;
+        MatchDecoration[MatchCount] = PlainOldDecorationIndex; 
+        return 1;
+    }
+    DecorationMassIndex = AllDecorationCount - 1;
+    // Skip over any decorations that use up too many pt-mods:
+    while (1)
+    {
+        if (AllDecorations[DecorationMassIndex].TotalMods > ModsRemaining)
+        {
+            DecorationMassIndex--;
+            continue;
+        }
+        break;        
+    }
+    FlankingMass = 0;
+    for (Pos = StartPos; Pos >= 0; Pos += BufferDir)
+    {
+        if (Pos >= BufferEnd)
+        {
+            break;
+        }
+        if (Buffer[Pos] == '>' || Buffer[Pos] == '*')
+        {
+            break;
+        }
+        Mass = PeptideMass[Buffer[Pos]];
+        if (Mass == 0)
+        {
+            // Invalid peptide!
+            break;
+        }
+        FlankingMass += Mass;
+        Diff = MatchMass  - (FlankingMass + AllDecorations[DecorationMassIndex].Mass);
+        AbsDiff = abs(Diff);
+        if (AbsDiff < GlobalOptions->FlankingMassEpsilon)
+        {
+            // Aha!  This is *probably* a match.  Check to be sure we have the bases we need:
+            if (CheckForPTAttachmentPoints(DecorationMassIndex, Buffer, min(Pos, StartPos), max(Pos, StartPos), BufferDir))
+            {
+                if (Verbose)
+                {
+                    printf("Side is match!  Dec-index %d, flank %.2f.\n", DecorationMassIndex, FlankingMass / (float)MASS_SCALE);
+                }
+                MatchPos[MatchCount] = Pos;
+                MatchDecoration[MatchCount] = DecorationMassIndex;
+                MatchCount++;
+                if (MatchCount == MAX_SIDE_MODS)
+                {
+                    return MatchCount;
+                }
+
+            }
+        }
+        // Move the DecorationMassIndex, if needed.
+        while (MandatoryDecorationChange || FlankingMass + AllDecorations[DecorationMassIndex].Mass > MinMatchMass)
+        {
+            // The flanking sequence's mass is significantly bigger than our (decorated) target mass.
+            // Move to a smaller decoration:
+            MandatoryDecorationChange = 0;
+            DecorationMassIndex--;
+            if (DecorationMassIndex<0)
+            {
+                break;
+            }
+            // Skip any decorations that include phosphorylation, if we're not on phospho mode:
+            if (!GlobalOptions->PhosphorylationFlag && g_PhosphorylationMod>-1 && AllDecorations[DecorationMassIndex].Mods[g_PhosphorylationMod])
+            {
+                MandatoryDecorationChange = 1;
+                continue;
+            }
+            if (AllDecorations[DecorationMassIndex].TotalMods > ModsRemaining)
+            {
+                continue;
+            }
+            // And, check for a match:
+            Diff = MatchMass  - (FlankingMass + AllDecorations[DecorationMassIndex].Mass);
+            AbsDiff = abs(Diff);
+            if (AbsDiff < GlobalOptions->FlankingMassEpsilon) 
+            {
+                // Aha!  This is *probably* a match.  Check to be sure we have the bases we need:
+                if (CheckForPTAttachmentPoints(DecorationMassIndex, Buffer, min(Pos, StartPos), max(Pos, StartPos), BufferDir))
+                {
+                    if (Verbose)
+                    {
+                        printf("Left is match!  Dec-index %d, flank %.2f.\n", DecorationMassIndex, FlankingMass / (float)MASS_SCALE);
+                    }
+                    MatchPos[MatchCount] = Pos;
+                    MatchDecoration[MatchCount] = DecorationMassIndex;
+                    MatchCount++;
+                    if (MatchCount == MAX_SIDE_MODS)
+                    {
+                        return MatchCount;
+                    }
+                    MandatoryDecorationChange = 1;
+                }
+            }
+        }
+        if (DecorationMassIndex<0)
+        {
+            break;
+        }
+    }
+    return MatchCount;
+}
+// We extend LEFT and RIGHT from the match region (running from BufferPos to BufferEnd, INCLUSIVE),
+// attempting to match our tag's prefix mass and Suffix mass.  Extension works like this:
+// - DecoratedMassIndex starts out pointing at our largest decoration, FlankingMass starts at 0
+// - At each iteration step:
+// --  move one base further along, and add its mass to FlankingMass
+// --  If FlankingMass plus the mass of our decoration matches our tag, we have a match.
+// --  If FlankingMass plus the mass of our decoration is too LARGE, decrement DecoratedMassIndex
+//        until we have a match, run out of decorations, or the mass again becomes too SMALL.
+// --  At some point, we'll run out of decorations (FlankingMass becomes larger than the tag mass), and stop.
+//FilePos and BufferPos point to the last character in the matched tag.
+void GetMatches(SearchInfo* Info, TrieNode* Node, char* Buffer, int BufferPos, int BufferEnd, int FilePos)
+{
+    TrieTagHanger* TagNode;
+    int ModsRemaining;
+
+    int LeftMatchCount;
+    int RightMatchCount;
+    int LeftMatchIndex;
+    int RightMatchIndex;
+    int ModIndex;
+    int UsedTooMany;
+    static int PTMLimit[MAX_PT_MODTYPE];
+    // To avoid repeated scoring:
+    int ExtensionIndex = 0;
+    int ExtensionCount = 0;
+    static int StartingPoints[512];
+    static int EndingPoints[512];
+    static int ExtensionLeftDecorations[512]; 
+    static int ExtensionRightDecorations[512];
+    static MSSpectrum* ExtensionSpectra[512];
+    int startOfPeptideFilePos;
+    int ExtensionFound;
+
+    int validTag = 1;
+    MSSpectrum* Spectrum;
+    //
+    if (!Node->FirstTag)
+    {
+        return;
+    }
+    //GlobalStats->TagMatches++;
+
+    //printf("Extend matches of '%s' at position %d\n", Node->FirstTag->Tag->Tag, FilePos);
+    //Log("Extend matches of '%s' at position %d\n", Node->FirstTag->Tag->Tag, FilePos);
+    // Try each tag corresponding to this TrieNode.
+    for (TagNode = Node->FirstTag; TagNode; TagNode = TagNode->Next)
+    {
+        Spectrum = TagNode->Tag->PSpectrum;
+        Info->Spectrum = Spectrum;
+        memcpy(PTMLimit, g_PTMLimit, sizeof(int) * AllPTModCount);
+	//If ther are mods in the tag, then these must count towards to PTMLiimt
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (TagNode->Tag->AminoIndex[ModIndex] < 0)
+            {
+                break;
+            }
+
+	    //Also check that the PTM is valid!!!
+	    if(AllKnownPTMods[TagNode->Tag->ModType[ModIndex]->Index].Allowed[Buffer[BufferPos - 2 + TagNode->Tag->AminoIndex[ModIndex]]] == 0)
+	    {
+	      validTag = 0;
+	    }
+            PTMLimit[TagNode->Tag->ModType[ModIndex]->Index] -= 1;
+        }
+	  if(validTag == 0)
+	    continue;
+        ModsRemaining = GlobalOptions->MaxPTMods - TagNode->Tag->ModsUsed;
+        if (ModsRemaining < 0)
+        {
+            continue;
+        }
+	//See how many prefix matches there are.  Populates LeftMatchDecorations array
+        LeftMatchCount = MatchFlankingMass(Spectrum, TagNode->Tag, Buffer, BufferPos - TagNode->Tag->TagLength, -1, BufferEnd, TagNode->Tag->PrefixMass, ModsRemaining);
+        if (LeftMatchCount == 0)
+        {
+            continue;
+        }
+	//See how many suffix matches there are.  Populates RightMatchDecorations array
+        RightMatchCount = MatchFlankingMass(Spectrum, TagNode->Tag, Buffer, BufferPos + 1, 1, BufferEnd, TagNode->Tag->SuffixMass, ModsRemaining);
+        if (RightMatchCount == 0)
+        {
+            continue;
+        }
+        // Consider each combination of left-extension and right-extension:
+        for (LeftMatchIndex = 0; LeftMatchIndex<LeftMatchCount; LeftMatchIndex++)
+        {
+            for (RightMatchIndex = 0; RightMatchIndex<RightMatchCount; RightMatchIndex++)
+            {
+                UsedTooMany = 0;
+		//Check that there aren't too many of any type of modification with the selected extensions
+                for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+                {
+                    if (AllDecorations[LeftMatchDecoration[LeftMatchIndex]].Mods[ModIndex] + 
+                        AllDecorations[RightMatchDecoration[RightMatchIndex]].Mods[ModIndex] > PTMLimit[ModIndex])
+                    {
+                        UsedTooMany = 1;
+                        break;
+                    }
+                }
+                if (UsedTooMany)
+                {
+                    continue;
+                }
+		//Check that the total number of mods is within the limits
+                if (AllDecorations[LeftMatchDecoration[LeftMatchIndex]].TotalMods + 
+                    AllDecorations[RightMatchDecoration[RightMatchIndex]].TotalMods > ModsRemaining)
+                {
+                    continue;
+                }
+                if (GlobalOptions->MandatoryModIndex > -1 && 
+                    !TagNode->Tag->MandatoryModUsed &&
+                    AllDecorations[LeftMatchDecoration[LeftMatchIndex]].Mods[GlobalOptions->MandatoryModIndex] == 0 &&
+                    AllDecorations[RightMatchDecoration[RightMatchIndex]].Mods[GlobalOptions->MandatoryModIndex] == 0)
+                {
+                    continue; // We don't have our mandatory PTM (biotin, or whatever)
+                }
+                ExtensionFound = 0;
+                for (ExtensionIndex = 0; ExtensionIndex < ExtensionCount; ExtensionIndex++)
+                {
+                    if (StartingPoints[ExtensionIndex] == LeftMatchPos[LeftMatchIndex] && EndingPoints[ExtensionIndex] == RightMatchPos[RightMatchIndex]
+                    && ExtensionLeftDecorations[ExtensionIndex] == LeftMatchDecoration[LeftMatchIndex]
+                    && ExtensionRightDecorations[ExtensionIndex] == RightMatchDecoration[RightMatchIndex]
+                    && ExtensionSpectra[ExtensionIndex] == TagNode->Tag->PSpectrum)
+                    {
+                        ExtensionFound = 1;
+                        break;
+                    }
+                }
+                if (ExtensionFound)
+                {
+                    continue;
+                }
+                StartingPoints[ExtensionCount] = LeftMatchPos[LeftMatchIndex];
+                EndingPoints[ExtensionCount] = RightMatchPos[RightMatchIndex];
+                ExtensionLeftDecorations[ExtensionCount] = LeftMatchDecoration[LeftMatchIndex];
+                ExtensionRightDecorations[ExtensionCount] = RightMatchDecoration[RightMatchIndex];
+                ExtensionSpectra[ExtensionCount] = TagNode->Tag->PSpectrum;
+                Info->Spectrum = TagNode->Tag->PSpectrum;
+
+		//printf("FilePos: %d\n",FilePos);
+		startOfPeptideFilePos = FilePos - TagNode->Tag->TagLength - ((BufferPos - Node->Depth + 1) - LeftMatchPos[LeftMatchIndex]) + 1;
+		AddNewMatch(Info,startOfPeptideFilePos,TagNode->Tag,
+			    Buffer + LeftMatchPos[LeftMatchIndex],
+			    RightMatchPos[RightMatchIndex] - LeftMatchPos[LeftMatchIndex] + 1,
+			    (BufferPos - Node->Depth + 1) - LeftMatchPos[LeftMatchIndex],
+			    LeftMatchDecoration[LeftMatchIndex], RightMatchDecoration[RightMatchIndex],
+			    0, 0);
+		ExtensionCount = min(511,ExtensionCount);
+	    }
+        }
+    }
+    return;
+}
+
+//Extending Tags for a blind search requires a separate function.  
+//We keep Tags where only one side (suffix or prefix) is extendable.  It is a simple 
+//extension, because no PTMs are allowed.  If both sides are extendable
+//then it is a nomod match, and sent to the regular scorer. 
+int ExtendTagMatchBlind(SearchInfo* Info, TrieNode* Node, char* Buffer, int BufferPos, int BufferEnd, int FilePos)
+{
+    TrieTagHanger* Hanger;
+    MSSpectrum* Spectrum;
+    int LeftMatchCount;
+    int RightMatchCount;
+    int ModsRemaining = 0; //always zero for this simple extension
+    int Extensions = 0;
+
+    for (Hanger = Node->FirstTag; Hanger; Hanger = Hanger->Next)
+    {
+        Spectrum = Hanger->Tag->PSpectrum;
+        Info->Spectrum = Spectrum;
+        //by virtue of getting here, we know that this TAG (tripeptide) has matched the database
+        Hanger->Tag->DBTagMatches++;
+        LeftMatchCount = MatchFlankingMass(Spectrum, Hanger->Tag, Buffer, BufferPos - Hanger->Tag->TagLength, -1, BufferEnd, Hanger->Tag->PrefixMass, ModsRemaining);
+
+        RightMatchCount = MatchFlankingMass(Spectrum, Hanger->Tag, Buffer, BufferPos + 1, 1, BufferEnd, Hanger->Tag->SuffixMass, ModsRemaining);
+        if (LeftMatchCount + RightMatchCount == 1)
+        {
+            //set up the BlindTagMatchObject, representing this match.
+            //Match = NewBlindTagMatch();
+            //Match->Tag = Hanger->Tag;
+            //Match->TagDBLoc = BufferPos - Hanger->Tag->TagLength; //pos of the first char
+            if (LeftMatchCount)
+            {
+                Hanger->Tag->PrefixExtends ++;
+            //    Match->ExtendLR = -1;
+            //    Match->ExtendDBLoc = LeftMatchPos[0]; //only one match position possible, bc no mods
+            //    Match->ExtendLength = Match->TagDBLoc - Match->ExtendDBLoc;
+            }
+            else
+            {
+                Hanger->Tag->SuffixExtends ++;
+            //    Match->ExtendLR = 1; //right extension
+            //    Match->ExtendDBLoc = RightMatchPos[0];
+            //    Match->ExtendLength = Match->ExtendDBLoc - Match->TagDBLoc;
+            }
+            //InsertBlindTagMatch(Match);
+            Hanger->Tag->PrefixExtends += LeftMatchCount;
+            Hanger->Tag->SuffixExtends += RightMatchCount;
+            Extensions++;
+        }
+        else if (LeftMatchCount + RightMatchCount == 2)
+        {
+            //send to regular scorer, it's a two sided hit
+        }
+        //printf ("Extend matches of '%s' at position %d\n", Node->FirstTag->Tag->Tag, FilePos);
+        //printf ("Returned RightMatch %d, returned LeftMatch %d\n",RightMatchCount,LeftMatchCount);
+    }
+    return Extensions;
+
+}
+
+void GetProteinID(int RecordNumber, DatabaseFile* DB, char* Name)
+{
+    int Dummy[16];
+    int RecordSize;
+    if (!DB || !DB->IndexFile)
+    {
+        Name[0] = '?';
+        Name[1] = '\0';
+        return;
+    }
+    if (DB->Type == evDBTypeSpliceDB)
+    {
+        RecordSize = SPLICEDB_INDEX_RECORD_SIZE;
+    }
+    else
+    {
+        RecordSize = TRIE_INDEX_RECORD_SIZE;
+    }
+    
+
+    fseek(DB->IndexFile, TRIE_INDEX_RECORD_SIZE * RecordNumber, SEEK_SET);
+    ReadBinary(&Dummy, LONG_LONG_SIZE, 1, DB->IndexFile);
+    ReadBinary(&Dummy, sizeof(int), 1, DB->IndexFile);
+    ReadBinary(Name, sizeof(char), 80, DB->IndexFile);
+    Name[80] = '\0';
+    //Log("Record %d has ID %s\n", Pep->RecordNumber, Pep->Name);
+}
+
+
+void SortModifications(int* AminoIndex, MassDelta** ModType)
+{
+    int AminoIndexSorted[MAX_PT_MODS];
+    MassDelta* ModTypeSorted[MAX_PT_MODS];
+    int MinAminoIndex = 0;
+    int NextSortedPosition = 0;
+    int Index;
+    int MinAminoPos = 0;
+    //
+    memset(AminoIndexSorted, -1, sizeof(int)*MAX_PT_MODS);
+    memset(ModTypeSorted, 0, sizeof(MassDelta*)*MAX_PT_MODS);
+    while (1)
+    {
+        // Find the smallest amino acid index in AminoIndex, skipping
+        // over entries of -1 (which are empty)
+        MinAminoIndex = -1;
+        for (Index = 0; Index < MAX_PT_MODS; Index++)
+        {
+            if (AminoIndex[Index]>-1 && (MinAminoIndex<0 || AminoIndex[Index]<MinAminoIndex))
+            {
+                MinAminoIndex = AminoIndex[Index];
+                MinAminoPos = Index;
+            }
+        }
+        if (MinAminoIndex==-1)
+        {
+            // Everything's been moved to the sorted list.  Jane, stop this crazy thing!
+            break;
+        }
+        // MOVE these entries into the sorted list:
+        AminoIndexSorted[NextSortedPosition] = AminoIndex[MinAminoPos];
+        AminoIndex[MinAminoPos] = -1;
+        ModTypeSorted[NextSortedPosition] = ModType[MinAminoPos];
+        ModType[MinAminoPos] = NULL;
+        NextSortedPosition++;
+    }
+    // Move the sorted shadows back into the real arrays:
+    memcpy(AminoIndex, AminoIndexSorted, sizeof(int)*MAX_PT_MODS);
+    memcpy(ModType, ModTypeSorted, sizeof(MassDelta*)*MAX_PT_MODS);
+    // Hooray!
+}
+
+#define SCORE_PTM_ATTACH_IMPOSSIBLE (float)-999999999.0
+
+// Diagram of the dynamic programming table for optimal mod positioning:
+// Suppose we have three decorations (zero, one or two attachments of the same PTM),
+// and the PTMs should be attached at B and C in prefix ABCDE.  Then the grid
+// looks like this:
+//      A  B  C  D  E
+//  0   x--x
+//         |
+//  1      x--x 
+//            |
+//  2         x--x--x
+//
+// (Columns for amino acids, rows for decorations, vertical moves mean an attachment)
+//
+// Find the optimal way to place modifications (from FullDecoration) on a polypeptide
+// (Peptide) with length PeptideLength; store the mod-placements in AminoIndex / ModType
+void FindOptimalPTModPositions(MSSpectrum* Spectrum, char* Peptide, 
+    int PeptideLength, int FullDecoration, int BaseMass, int* AminoIndex, 
+    MassDelta** ModType, int VerboseFlag, SpectrumTweak* Tweak)
+{
+    float* ScoreMatrix = NULL;
+    int* SubDecorationMatrix = NULL;
+    int PeptidePos;
+    int DecorationIndex;
+    float BestScore;
+    char Amino;
+    int ModIndex;
+    int Mass;
+    float Score;
+    int ModCount;
+    int CanBridge;
+    int ModsNeeded;
+    float BYScore;
+    int ModAdder;
+    int AminoAcidIndex;
+    int BestSubDecoration;
+    int SubDecoration;
+    PRMBayesianModel* Model;
+    /// 
+    VerboseFlag  = 0;
+    memset(AminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+    memset(ModType, 0, sizeof(MassDelta*) * MAX_PT_MODS);
+    if (FullDecoration == PlainOldDecorationIndex)
+    {
+        return; // No mods to place!
+    }
+    if (Spectrum->Charge > 2)
+    {
+        Model = PRMModelCharge3;
+    }
+    else
+    {
+        Model = PRMModelCharge2;
+    }
+    // D.P. tables.  ScoreMatrix holds the score at each cell; SubDecorationMatrix tells
+    // the previous subdecoration (and hence, how to backtrack through the table)
+    ScoreMatrix = (float*)calloc(PeptideLength * AllDecorationCount, sizeof(float));
+    SubDecorationMatrix = (int*)calloc(PeptideLength * AllDecorationCount, sizeof(int));
+
+    // Fill the dynamic programming table.  Outer loop over amino acids,
+    // inner loop over decorations.
+    Mass = BaseMass;
+    for (PeptidePos = 0; PeptidePos < PeptideLength; PeptidePos++)
+    {
+        Amino = Peptide[PeptidePos];
+        AminoAcidIndex = Amino - 'A';
+        Mass += PeptideMass[Amino];
+        BestScore = 0;
+        for (DecorationIndex = 0; DecorationIndex < AllDecorationCount; DecorationIndex++)
+        {
+            if (!IsSubDecoration[DecorationIndex][FullDecoration])
+            {
+                continue;
+            }
+            BestScore = 0; 
+            BestSubDecoration = DecorationIndex;
+            BYScore = GetIonPRMFeatures(Spectrum, Tweak, Model, Mass + AllDecorations[DecorationIndex].Mass, 0);
+            //BYScore = (int)(100 * GetPRMFeatures(Spectrum, Tweak, Model, Mass + AllDecorations[DecorationIndex].Mass, 0));
+            if (PeptidePos)
+            {
+                // Consider attaching nothing at this peptide:
+                BestScore += ScoreMatrix[(PeptidePos - 1) * AllDecorationCount + DecorationIndex];
+            }
+            else
+            {
+                if (DecorationIndex != PlainOldDecorationIndex)
+                {
+                    BestScore += SCORE_PTM_ATTACH_IMPOSSIBLE;
+                }
+            }
+            BestScore += BYScore;
+            BestSubDecoration = DecorationIndex;
+			//Log printf("    No mod here: Score %.2f\n", BestScore);
+
+            // Consider attaching a modification at this peptide:
+            for (SubDecoration = 0; SubDecoration < AllDecorationCount; SubDecoration++)
+            {
+                if (SubDecoration == DecorationIndex)
+                {
+                    continue;
+                }
+                if (!IsSubDecoration[SubDecoration][DecorationIndex])
+                {
+                    continue;
+                }
+                CanBridge = 1;
+                for (ModIndex = 0; ModIndex < AllPTModCount; ModIndex++)
+                {
+                    // This decoration must contain all the mods from the subdecoration:
+                    ModsNeeded = AllDecorations[DecorationIndex].Mods[ModIndex] - AllDecorations[SubDecoration].Mods[ModIndex];
+                    if (ModsNeeded < 0)
+                    {
+                        CanBridge = 0;
+                        break;
+                    }
+                    // This amino acid must be able to support the modification(s):
+		    //printf("ModsNeeded: %d\n",ModsNeeded);
+		    //printf("AllKnownPTMods[%d].Allowed[%c]=%d\n",ModIndex,(char)(AminoAcidIndex+'A'),AllKnownPTMods[ModIndex].Allowed[AminoAcidIndex]);
+                    if (ModsNeeded > AllKnownPTMods[ModIndex].Allowed[AminoAcidIndex])
+                    {
+                        CanBridge = 0;
+                        break;
+                    }
+                    // If the decoration is terminal, then this attachment position must be terminal:
+                    if (ModsNeeded)
+                    {
+                        if ((AllKnownPTMods[ModIndex].Flags & DELTA_FLAG_C_TERMINAL) && PeptidePos < (PeptideLength - 1))
+                        {
+                            CanBridge = 0;
+                            break;
+                        }
+                        if ((AllKnownPTMods[ModIndex].Flags & DELTA_FLAG_N_TERMINAL) && PeptidePos)
+                        {
+                            CanBridge = 0;
+                            break;
+                        }
+                    }
+                }
+                if (CanBridge)
+                {
+                    if (PeptidePos)
+                    {
+                        Score = ScoreMatrix[(PeptidePos - 1) * AllDecorationCount + SubDecoration];
+                    }
+                    else
+                    {
+                        if (SubDecoration != PlainOldDecorationIndex)
+                        {
+                            Score = SCORE_PTM_ATTACH_IMPOSSIBLE; // Impossible!
+                        }
+                        else
+                        {
+                            Score = 0;
+                        }
+                    }
+                    //Log printf("    To Sub-decoration %d: Score %d\n", SubDecoration, Score);
+                    Score += BYScore;
+                    if (Score >= BestScore)
+                    {
+                        BestScore = Score;
+                        BestSubDecoration = SubDecoration;
+                    }
+                }
+            }
+            if (VerboseFlag)
+            {
+                //Log printf("    PeptidePos %d decoration %d: \n      Mass %d BYscore %.2f, best score %.2f, sub decoration %d\n", PeptidePos, DecorationIndex, 
+                    //(Mass + AllDecorations[DecorationIndex].Mass), BYScore, BestScore, BestSubDecoration);
+            }
+            ScoreMatrix[PeptidePos * AllDecorationCount + DecorationIndex] = BestScore;
+            SubDecorationMatrix[PeptidePos * AllDecorationCount + DecorationIndex] = BestSubDecoration;
+        }
+    }
+    // Fill in AminoIndex, ModType.  Start at the bottom right of the DP table (last amino acid,
+    // and full decoration), work back to the top row (first amino acid, no more decorations)
+    ModCount = 0;
+    DecorationIndex = FullDecoration;
+    PeptidePos = PeptideLength - 1;
+    while (PeptidePos >= 0)
+    {
+        SubDecoration = SubDecorationMatrix[PeptidePos * AllDecorationCount + DecorationIndex];
+        if (SubDecoration != DecorationIndex)
+        {
+            for (ModIndex = 0; ModIndex < MAX_PT_MODTYPE; ModIndex++)
+            {
+                ModsNeeded = AllDecorations[DecorationIndex].Mods[ModIndex] - AllDecorations[SubDecoration].Mods[ModIndex];
+                for (ModAdder = 0; ModAdder<ModsNeeded; ModAdder++)
+                {
+                    AminoIndex[ModCount] = PeptidePos;
+                    AminoAcidIndex = Peptide[PeptidePos] - 'A';
+		    //printf("Peptide: %s\n",Peptide);
+		    //printf("Amino acid Index=%c\n",Peptide[PeptidePos]);
+
+                    ModType[ModCount] = MassDeltaByIndex[AminoAcidIndex * MAX_PT_MODTYPE + ModIndex];
+		    //printf("Mod Delta: %d\n",ModType[ModCount]->Delta);
+                    ModCount++;
+                }
+            }
+        }
+        PeptidePos--;
+        DecorationIndex = SubDecoration;
+    }
+    // Free temp storage:
+    SafeFree(ScoreMatrix);
+    SafeFree(SubDecorationMatrix);
+}
+
+// Return TRUE if two matches are the same.
+// If we're performing an exon-graph search, then we only consider matches
+// to be the same if they have the same sequence AND genomic coordinates.
+int IsMatchDuplicate(Peptide* Match, Peptide* OldMatch, int PeptideLength)
+{
+    int CompareGenomicLocation = 1;
+
+    if (Match->DB && Match->DB->Type == evDBTypeTrie && OldMatch->DB && OldMatch->DB->Type == evDBTypeTrie)
+    {
+        CompareGenomicLocation = 0;
+    }
+    if (!CompareGenomicLocation)
+    {
+        if (!strncmp(Match->Bases, OldMatch->Bases, PeptideLength) && 
+            !memcmp(Match->AminoIndex, OldMatch->AminoIndex, sizeof(int)*MAX_PT_MODS) && 
+            !memcmp(Match->ModType, OldMatch->ModType, sizeof(int)*MAX_PT_MODS))
+        {
+            return 1;
+        }
+    }
+    else
+    {
+        // For exon graph search, we consider a match to be different if it has a different
+        // genomic location.  We may see the same peptide inside two different exons, and 
+        // we may have different options for splicing.
+        if (!strncmp(Match->Bases, OldMatch->Bases, PeptideLength) && 
+            !memcmp(Match->AminoIndex, OldMatch->AminoIndex, sizeof(int)*MAX_PT_MODS) && 
+            !memcmp(Match->ModType, OldMatch->ModType, sizeof(int)*MAX_PT_MODS) &&
+            Match->GenomicLocationStart == OldMatch->GenomicLocationStart &&
+            Match->GenomicLocationEnd == OldMatch->GenomicLocationEnd)
+        {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+// Store a match in this Spectrum's Node's match list.  Don't store duplicate matches.
+// Don't store more than GlobalOptions->StoreMatchCount matches.  Keep matches sorted
+// by InitialScore (or, if MQScoreFlag is set, by MatchQualityScore)
+Peptide* StoreSpectralMatch(MSSpectrum* Spectrum, Peptide* Match, int PeptideLength, int MQScoreFlag)
+{
+    Peptide* OldMatch;
+    Peptide* CrummyScoreOldMatch;
+    int VerboseFlag = 0;
+    int SameFlag = 0;
+    SpectrumNode* Node = Spectrum->Node;
+    int NTT;
+
+    
+    //
+    if (GlobalOptions->RequireTermini)
+    {
+        NTT = CountTrypticTermini(Match);
+        if (NTT < GlobalOptions->RequireTermini)
+        {
+            FreePeptideNode(Match);
+            return NULL;
+        }
+    }
+    //printf("NEC_ERROR: Store match %d '%s'\n", Match->InitialScore, Match->Bases); 
+    if (!Node->FirstMatch)
+    {
+        Node->FirstMatch = Match;
+        Node->LastMatch = Match;
+        Node->MatchCount++;
+    }
+    else
+    {
+        OldMatch = Node->FirstMatch;
+        while (1)
+        {
+            SameFlag = IsMatchDuplicate(Match, OldMatch, PeptideLength);
+            // Check to see whether it's the SAME as an existing match:
+            if (SameFlag)
+            {
+                // Old match is the same as our new peptide.  Free the new guy, and break:
+	      //printf("NEC_ERROR: This is a duplicate, do not add to list\n");
+                OldMatch->MatchQualityScore = max(OldMatch->MatchQualityScore, Match->MatchQualityScore); 
+                OldMatch->InitialScore = max(OldMatch->InitialScore, Match->InitialScore); 
+                SafeFree(Match);
+                Match = OldMatch;
+                //OldMatch->SeenCount++;
+                break;
+            }
+            if ((MQScoreFlag && Match->MatchQualityScore > OldMatch->MatchQualityScore) || (!MQScoreFlag && Match->InitialScore > OldMatch->InitialScore))
+            {
+	      //printf("NEC_ERROR: This is a good score, adding to list\n");
+                if (Node->FirstMatch == OldMatch)
+                {
+                    Node->FirstMatch = Match;
+                }
+                Match->Next = OldMatch;
+                Match->Prev = OldMatch->Prev;
+                if (OldMatch->Prev)
+                {
+                    OldMatch->Prev->Next = Match;
+                }
+                OldMatch->Prev = Match;
+                Node->MatchCount++;
+                // It's possible that we've already seen this peptide, but with a lower score.  (Why a lower score?
+                // probably because we searched with the WRONG parent mass before, and the RIGHT parent mass now!) So, iterate over
+                // the rest of the old matches, and if any is the same as this match, free it.
+                for (CrummyScoreOldMatch = Match->Next; CrummyScoreOldMatch; CrummyScoreOldMatch = CrummyScoreOldMatch->Next)
+                {
+                    SameFlag = IsMatchDuplicate(Match, CrummyScoreOldMatch, PeptideLength);
+                    if (SameFlag)
+                    {
+		      //printf("NEC_ERROR: This is a duplicate, but its better than the previous one\n");
+                        if (Node->LastMatch == CrummyScoreOldMatch)
+                        {
+                            Node->LastMatch = Node->LastMatch->Prev;
+                        }
+                        if (CrummyScoreOldMatch->Next)
+                        {
+                            CrummyScoreOldMatch->Next->Prev = CrummyScoreOldMatch->Prev;
+                        }
+                        if (CrummyScoreOldMatch->Prev)
+                        {
+                            CrummyScoreOldMatch->Prev->Next = CrummyScoreOldMatch->Next;
+                        }
+                        FreePeptideNode(CrummyScoreOldMatch);
+                        break;
+                    }
+                }
+                break;
+            }
+            OldMatch = OldMatch->Next;
+            if (!OldMatch)
+            {
+	      //printf("NEC_ERROR: adding to list\n");
+                // Save our new match at the end of the list.
+                Node->LastMatch->Next = Match;
+                Match->Prev = Node->LastMatch;
+                Node->LastMatch = Match;
+                Node->MatchCount++;
+                break;
+            }
+        }
+    }
+    if (Node->MatchCount > GlobalOptions->StoreMatchCount)
+    {
+        if (Match == Node->LastMatch)
+        {
+            Match = NULL;
+        }
+        OldMatch = Node->LastMatch->Prev;
+	//printf("NEC_ERROR: Removing the last match '%s'\n",Node->LastMatch->Bases);
+        FreePeptideNode(Node->LastMatch);
+        Node->LastMatch = OldMatch;
+        if (OldMatch)
+        {
+            OldMatch->Next = NULL;
+        }
+        Node->MatchCount--;
+    }
+    return Match;
+}
+
+
+// Record a new match in the global match list.  If it's a duplicate peptide, then
+// don't add it again.
+Peptide* AddNewMatch(SearchInfo* Info, int FilePos, TrieTag* Tag, char* MatchedBases, 
+                 int PeptideLength, int TagPosition, int PrefixDecoration, int SuffixDecoration, 
+                 int GenomicStart, int GenomicEnd)
+{
+    Peptide* Match;
+    char MatchedPeptideVerbose[256];
+    PeptideMatch* PepInfo;
+    int AminoIndex[MAX_PT_MODS];
+    MassDelta* ModType[MAX_PT_MODS];
+    int PrefixAminoIndex[MAX_PT_MODS];
+    MassDelta* PrefixModType[MAX_PT_MODS];
+    int SuffixAminoIndex[MAX_PT_MODS];
+    MassDelta* SuffixModType[MAX_PT_MODS];
+    int Mass;
+    int SuffixStart;
+    int AminoPos;
+    int ModIndex;
+    int TotalMods = 0;
+    float ScoreToBeat;
+    //int Score;
+    int VerboseFlag;
+    char* Amino;
+    //int PrecursorMass;
+    int ParentMassError;
+    MSSpectrum* Spectrum = Info->Spectrum;
+
+    int i;
+
+    //
+    memset(PrefixAminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+    memset(SuffixAminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+    memset(PrefixModType, 0, sizeof(MassDelta*) * MAX_PT_MODS);
+    memset(SuffixModType, 0, sizeof(MassDelta*) * MAX_PT_MODS);
+    memset(ModType, 0, sizeof(MassDelta*) * MAX_PT_MODS);
+    memset(AminoIndex, -1, sizeof(int) * MAX_PT_MODS);
+
+    SuffixStart = TagPosition + strlen(Tag->Tag);
+    
+    //Log("Prefix mods %d, suffix mods %d\n", PrefixDecoration, SuffixDecoration); 
+    TotalMods = Tag->ModsUsed + AllDecorations[PrefixDecoration].TotalMods + AllDecorations[SuffixDecoration].TotalMods;
+    //////////////////////////////////////////////////
+    // Optimally place the prefix and suffix PTMs:
+    VerboseFlag = 0;
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    // Temporarily adjust the charge and parent mass to reflect this candidate:
+    Spectrum->Charge = Tag->Charge;
+    Spectrum->ParentMass = PARENT_MASS_BOOST;
+    for (AminoPos = 0, Amino = MatchedBases; AminoPos<PeptideLength; AminoPos++,Amino++)
+    {
+        Spectrum->ParentMass += PeptideMass[*Amino];
+    }
+    Spectrum->ParentMass += AllDecorations[PrefixDecoration].Mass;
+    Spectrum->ParentMass += AllDecorations[SuffixDecoration].Mass;
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!Tag->ModType[ModIndex])
+        {
+            break;
+        }
+        Spectrum->ParentMass += Tag->ModType[ModIndex]->RealDelta; 
+    }
+    ////////////////////////////////////////////////////////////////////////
+    // Reject this parent mass, if it's too far from the theoretical mass.
+	//Use the corrected parent mass from the tweak, not the file mass.
+    //PrecursorMass = Spectrum->MZ * Spectrum->Charge - (HYDROGEN_MASS * (Spectrum->Charge - 1));
+    //ParentMassError = PrecursorMass - Spectrum->ParentMass;
+    ParentMassError = Tag->Tweak->ParentMass - Spectrum->ParentMass;
+    if (abs(ParentMassError) > GlobalOptions->ParentMassEpsilon)
+    {
+        // *** Reject this match, it doesn't match the parent mass!
+        return NULL;
+    } 
+
+    ////////////////////////////////////////////////////////////////////////////////////////
+    FindOptimalPTModPositions(Spectrum, MatchedBases, TagPosition, PrefixDecoration, 0, 
+        PrefixAminoIndex, PrefixModType, VerboseFlag, Tag->Tweak);
+    // Get the starting mass for our suffix match:
+    Mass = 0;
+    for (AminoPos = 0; AminoPos < TagPosition + Tag->TagLength; AminoPos++)
+    {
+        Mass += PeptideMass[MatchedBases[AminoPos]];
+    }
+    Mass += AllDecorations[PrefixDecoration].Mass;
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!Tag->ModType[ModIndex])
+        {
+            break;
+        }
+        Mass += Tag->ModType[ModIndex]->RealDelta;
+    }
+    FindOptimalPTModPositions(Spectrum, 
+        MatchedBases + TagPosition + Tag->TagLength, 
+        PeptideLength - TagPosition - Tag->TagLength, 
+        SuffixDecoration, 
+        Mass, 
+        SuffixAminoIndex, 
+        SuffixModType, 0, Tag->Tweak);
+    /////////////////////////////////////////////////////////
+    // Merge all the mods into one array, then sort it:
+    TotalMods = 0;
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!PrefixModType[ModIndex])
+        {
+            break;
+        }
+        ModType[TotalMods] = PrefixModType[ModIndex];
+        AminoIndex[TotalMods] = PrefixAminoIndex[ModIndex];
+        TotalMods++;
+    }
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!Tag->ModType[ModIndex])
+        {
+            break;
+        }
+        ModType[TotalMods] = Tag->ModType[ModIndex];
+        AminoIndex[TotalMods] = Tag->AminoIndex[ModIndex] + TagPosition;
+        TotalMods++;
+    }
+    for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+    {
+        if (!SuffixModType[ModIndex])
+        {
+            break;
+        }
+        ModType[TotalMods] = SuffixModType[ModIndex];
+        AminoIndex[TotalMods] = SuffixAminoIndex[ModIndex] + TagPosition + Tag->TagLength;
+        TotalMods++;
+    }
+    SortModifications(AminoIndex, ModType);
+
+    
+
+    /////////////////////////////////////////////////////////
+    // Score the match.  If the score's not good enough, then toss it:
+    ScoreToBeat = -999999;
+    if (Spectrum->Node->MatchCount >= GlobalOptions->StoreMatchCount)
+    {
+        ScoreToBeat = Spectrum->Node->LastMatch->MatchQualityScore;
+    }
+    Match = NewPeptideNode();
+    Match->ParentMassError = ParentMassError;
+    Match->Tweak = Tag->Tweak;
+    Match->DB = Info->DB;
+    memcpy(Match->AminoIndex, AminoIndex, sizeof(int)*MAX_PT_MODS);
+    memcpy(Match->ModType, ModType, sizeof(int)*MAX_PT_MODS);
+    if (FilePos)
+    {
+        Match->PrefixAmino = *(MatchedBases - 1);
+    }
+    Match->SuffixAmino = *(MatchedBases + PeptideLength);
+    strncpy(Match->Bases, MatchedBases, PeptideLength);
+    Match->FilePos = FilePos;
+    Match->RecordNumber = Info->RecordNumber;
+    VerboseFlag = 0;
+    
+    GetPeptideParentMass(Match);
+    
+    
+    if(GlobalOptions->RunMode & RUN_MODE_RAW_OUTPUT)
+      {
+	
+	
+	WriteMatchToString(Match,MatchedPeptideVerbose,1);
+	fprintf(GlobalOptions->OutputFile,"%s\t%d\t%s\n",Spectrum->Node->InputFile->FileName,Spectrum->Node->ScanNumber,MatchedPeptideVerbose);
+	//printf("%s\t%d\t%s\n",Spectrum->Node->InputFile->FileName,Spectrum->Node->ScanNumber,MatchedPeptideVerbose);
+	//fflush(stdout);
+	return NULL;
+      }
+    Spectrum->CandidatesScored++;
+    Tag->CandidatesScored++;
+    // Invoke the scoring function now:
+    ComputeMQScoreFeatures(Spectrum, Match, Match->ScoreFeatures, 0);
+    
+#ifdef MQSCORE_USE_SVM
+    
+    Match->MatchQualityScore = SVMComputeMQScore(Spectrum, Match, Match->ScoreFeatures);
+#else
+    
+    Match->MatchQualityScore = LDAComputeMQScore(Spectrum, Match, Match->ScoreFeatures);
+#endif
+    Match->InitialScore = (int)(1000 * Match->MatchQualityScore);
+
+
+
+    Match->GenomicLocationEnd = GenomicEnd;
+    Match->GenomicLocationStart = GenomicStart;
+    if (Match->MatchQualityScore < ScoreToBeat)
+    {
+        // Not good enough - forget it!
+      
+        SafeFree(Match);
+        return NULL;
+    }
+    // It's good enough to add to the list:
+    //printf("NEC_ERROR:Match: %s, Tweak[z=%d,m=%d], Score: %f\n",Match->Bases,Match->Tweak->Charge, Match->Tweak->ParentMass, Match->MatchQualityScore);
+    //for(i = 0; i < 16; ++i)
+    //  {
+    //	printf("ScoreFeature[%d] = %f\n",i,Match->ScoreFeatures[i]);
+    //  }
+
+    Match = StoreSpectralMatch(Spectrum, Match, PeptideLength, 1);
+    if (!Match)
+    {
+      
+        return NULL;
+    }
+    //DebugPrintMatch(Match);
+    // Store the match details, if requested:
+    if (GlobalOptions->ReportAllMatches)
+    {
+      PepInfo = (PeptideMatch*)calloc(1, sizeof(PeptideMatch));
+        PepInfo->FilePos = FilePos;
+        PepInfo->RecordNumber = Info->RecordNumber;
+        PepInfo->Tag = Tag;
+        if (Match->Last)
+        {
+            Match->Last->Next = PepInfo;
+        }
+        else
+        {
+            Match->First = PepInfo;
+        }
+        Match->Last = PepInfo;
+    }
+    return Match;
+}
+
+// Print a list (one per line) of all the decorations we generated for
+// the available post-translational modifications.
+void DebugPrintDecoratedMassList()
+{
+    int Index;
+    int ModIndex;
+    //
+    printf("Decorated masses:  (%d in all)\n", DecorationMassCount);
+    for (Index = 0; Index < DecorationMassCount; Index++)
+    {
+        printf("  %.2f: ",DecorationMasses[Index]);
+        for (ModIndex = 0; ModIndex <= GlobalOptions->MaxPTMods; ModIndex++)
+        {
+            if (DecorationMassMods[Index][ModIndex]<0)
+            {
+                // That's all the modifications in this one.
+                printf("(end)\n");
+                break;
+            }
+            printf("%d: %s (%.2f), ", DecorationMassMods[Index][ModIndex], PTModName[DecorationMassMods[Index][ModIndex]], ModMasses[DecorationMassMods[Index][ModIndex]]);
+        }
+    }
+    printf("End of decorated mass list.\n");
+}
+
+// Helper macro for quick-sort
+#define DECO_SWAP(a,b) \
+{ \
+fSwap = Masses[(a)]; \
+memcpy(TempSpace, Mods[(a)], sizeof(int) * GlobalOptions->MaxPTMods); \
+Masses[(a)] = Masses[b]; \
+memcpy(Mods[(a)], Mods[(b)], sizeof(int) * GlobalOptions->MaxPTMods); \
+Masses[(b)] = fSwap; \
+memcpy(Mods[(b)], TempSpace, sizeof(int) * GlobalOptions->MaxPTMods); \
+}
+
+// Sort decorations using QuickSort.  We're sorting the array Masses, but we'll 
+// also make the corresponding
+// changes to the 2D array Mods, to keep the arrays in synch.
+// Reminder: Quick-sort is done recursively.  Take the first element of the array as a pivot, then 
+// 'pseudo-sort' the remaining elements so that all the EARLY elements (those less than the pivot) 
+// come before all the LATE elements (those larger than the pivot).  The 'pseudo-sort' is done by 
+// moving a left-index and right-index in from the edges of the array until they meet).  
+// Then - here's the recursion-part - use quick-sort to sort the early and late elements.
+void QuickSortDecoratedMasses(float* Masses, int** Mods, int Count)
+{
+    float fSwap;
+    int TempSpace[1024];
+    float Pivot;
+    int LeftIndex;
+    int RightIndex;
+    // Sorting a list of one element is easy:
+    if (Count<2)
+    {
+        return;
+    }
+    // Sorting a list of two elements is easy:
+    if (Count == 2)
+    {
+        if (Masses[0] > Masses[1])
+        {
+            DECO_SWAP(0,1);
+        }
+        return;
+    }
+    // Now the REAL case begins:
+    Pivot = Masses[0];
+    LeftIndex = 1;
+    RightIndex = Count-1;
+    while (LeftIndex < RightIndex)
+    {
+        while (Masses[LeftIndex] <= Pivot) 
+        {
+            LeftIndex++;
+            if (LeftIndex == Count)
+            {
+                // Pivot element is the biggest of all!
+                DECO_SWAP(0, Count-1);
+                QuickSortDecoratedMasses(Masses, Mods, Count-1);
+                return;
+            }
+        }
+        while (Masses[RightIndex] > Pivot)
+        {
+            RightIndex--;
+        }
+        if (RightIndex == 0)
+        {
+            // Pivot element is the smallest of all!
+            QuickSortDecoratedMasses(Masses+1, Mods+1, Count-1);
+            return;
+        }
+        if (RightIndex > LeftIndex)
+        {
+            DECO_SWAP(LeftIndex, RightIndex);
+        }
+    }
+    DECO_SWAP(0, RightIndex);
+    QuickSortDecoratedMasses(Masses, Mods, RightIndex);
+    QuickSortDecoratedMasses(Masses+RightIndex+1, Mods+RightIndex+1, Count-RightIndex-1);
+
+}
+
+int PopulateDecoratedMassList(float* TotalMass, int** Mods, 
+                              float MassSoFar, int* UsedMods, int UsedModCount)
+{
+    int Index;
+    int MinModIndex;
+    int RecordsBuilt = 0;
+    //
+    // If our prefix is mod #1, don't do 1,0; just to 1,1 and onward.  (Decorations
+    // are listed from lowest PTM-index to largest)
+    if (UsedModCount)
+    {
+        MinModIndex = UsedMods[UsedModCount-1];
+    }
+    else
+    {
+        MinModIndex = 0;
+    }
+    // Consider adding no mods at all:
+    for (Index = 0; Index < UsedModCount; Index++)
+    {
+        Mods[0][Index] = UsedMods[Index];
+    }
+    TotalMass[0] = MassSoFar;
+    RecordsBuilt++;
+    if (UsedModCount == GlobalOptions->MaxPTMods)
+    {
+        return 1;
+    }
+    // Ok: Extend with each legal (lexigraphically subsequent) modificaiton!
+    for (Index = MinModIndex; Index < TotalPTMods; Index++)
+    {
+        UsedMods[UsedModCount] = Index;
+        RecordsBuilt += PopulateDecoratedMassList(TotalMass + RecordsBuilt, Mods + RecordsBuilt, 
+            MassSoFar + ModMasses[Index], UsedMods, UsedModCount+1);
+    }
+    return RecordsBuilt;
+
+}
+
+int GetDecoratedMassCount(int AvailableMods, int PermissibleModCount)
+{
+    int ModIndex;
+    int Total;
+    if (PermissibleModCount == 0)
+    {
+        return 1;
+    }
+    Total = 1; // If we add no more
+    for (ModIndex = 0; ModIndex < AvailableMods; ModIndex++)
+    {
+        Total += GetDecoratedMassCount(AvailableMods - ModIndex, PermissibleModCount - 1);
+    }
+    return Total;
+}
+
+//Trie.c::ProcessGeneHitsBlindTag
+//This function processes all the onesided hits to a single gene, from the
+//blind tagging option, send to a function to find the PTM site, and the scores.
+//1. Tags (or the container) is sent to the function SeekMatch1PTM
+int ProcessGeneHitsBlindTag()
+{
+    BlindTagMatch* Match;
+    int Counter = 0;
+    
+    for (Match = FirstBlindTag; Match; Match = Match->Next)
+    {
+        Counter ++;
+    }
+    printf ("Processing a gene with %d matches\n",Counter);
+    return 1;
+}
+//Trie.c::IsIdenticalBlindTagMatches
+//returns true (1) if the two tag matches are identical
+//else false
+//Conditions for Identity
+//1. Tags from the same spectra and Tweak
+//2. Tags have identical DBAnchorPoint
+//3. Tags extend in the same direction.
+
+int IsIdenticalBlindTagMatches(BlindTagMatch* NodeA, BlindTagMatch* NodeB)
+{
+    if (NodeA->Tag->PSpectrum != NodeB->Tag->PSpectrum)
+    {
+        return 0;
+    }
+    if (NodeA->Tag->Tweak != NodeB->Tag->Tweak)
+    {
+        return 0;
+    }
+    if (NodeA->ExtendDBLoc != NodeB->ExtendDBLoc)
+    {
+        return 0;
+    }
+    if (NodeA->ExtendLR != NodeB->ExtendLR)
+    { //this one may be unnecessary but it is in there for completeness
+        return 0;
+    }
+    return 1;
+}
+//Trie.c :: InsertBlindTagMatch
+//Inserts an object into a linked list, first testing
+//if the object is identical to an already existing entry.
+//If an object is not inserted, then it is freed (because the calling
+//function expects us to deal with this type of thing).  Similarly
+//with an object in the list which must be replaced.
+int InsertBlindTagMatch(BlindTagMatch* Match)
+{
+    BlindTagMatch* NodeA;
+    BlindTagMatch* Prev = NULL; //in case we do some swapping in the list
+    BlindTagMatch* Next = NULL;
+
+    if (FirstBlindTag == NULL) //just started
+    {
+        FirstBlindTag = Match;
+        LastBlindTag = Match;
+        return 1;
+    }
+
+    //cycle through the list, and see if there are any identical tags.
+    // if identical tags exist, then we keep only the one with the 
+    //longer extension.  In the absence of any twin, we put it at the end
+    for (NodeA = FirstBlindTag; NodeA; NodeA = NodeA->Next)
+    {
+        if (IsIdenticalBlindTagMatches(Match, NodeA))
+        {  //decide who to keep
+            if (NodeA->ExtendLength >= Match->ExtendLength)
+            { //winner is already in the list
+                FreeBlindTagMatch(Match);
+                return 1;
+            }
+            //have to remove item in the list. swap in Match
+            if (NodeA->Prev == NULL) //first Item
+            {
+                FirstBlindTag = Match;
+                Next = NodeA->Next; //temp
+                Match->Next = Next;
+                Next->Prev = Match;
+                FreeBlindTagMatch(NodeA);
+                return 1;
+            }
+            if (NodeA->Next == NULL) //last item
+            {
+                LastBlindTag = Match;
+                Prev = NodeA->Prev;
+                Match->Prev = Prev;
+                Prev->Next = Match;
+                FreeBlindTagMatch(NodeA);
+                return 1;
+            }
+            //default else, nodeA in the middle
+            Prev = NodeA->Prev;
+            Next = NodeA->Next;
+            Match->Prev = Prev;
+            Match->Next = Next;
+            Prev->Next = Match;
+            Next->Prev = Match;
+            FreeBlindTagMatch(NodeA);
+            return 1;
+        }
+    }
+    LastBlindTag->Next = Match; //add onto the end
+    Match->Prev = LastBlindTag; //point back
+    LastBlindTag = Match; //move end
+    return 1;
+}
+// Main method: Use a trie to search a data-file.  Return the number of proteins searched.
+int ScanFileWithTrie(SearchInfo* Info)
+{
+    FILE* File;
+    int FilePos = 0;
+    char* Buffer;
+    int BufferPos = 0;
+    int BufferEnd = 0;
+    int AnchorPos = -1; // -1 means that no anchor is set
+    TrieNode* Node;
+    TrieNode* NextNode;
+    int IsEOF = 0;
+    int BytesRead;
+    int OldPos;
+    int PaddingDistance = 50;
+    int Verbose = 0;
+    //
+    Info->RecordNumber = 0;
+    File = Info->DB->DBFile;
+    if (!File)
+    {
+        return 0;
+    }
+    fseek(File, 0, 0);
+    if (!Info->Root)
+    {
+        return 0;
+    }
+    
+    Buffer = (char*)calloc(SCAN_BUFFER_SIZE, sizeof(char));
+    Node = Info->Root;
+    // We'll scan in chunks of the file, and scan across them.  We try to always keep a buffer of 50 characters
+    // before and after the current position, so that we can look forward and back to get masses.  (When we match
+    // a tag-string, we look at surrounding masses).
+    while (1)
+    {
+      
+      //printf("Anc %d, Buf %d, BufEnd %d, FilePos %d, Char%c\n", AnchorPos, BufferPos, BufferEnd, FilePos, Buffer[BufferPos]);
+        // Periodically shunt data toward the front of the buffer:
+        if (BufferPos > SCAN_BUFFER_A && AnchorPos==-1)
+        {
+            // ......ppppBbbbbbbbbbE... <- diagram (p = pad, B = buffer start, E = buffer end)
+            // ppppBbbbbbbbbbE....      <- after move
+            memmove(Buffer, Buffer + BufferPos - PaddingDistance, BufferEnd - (BufferPos - PaddingDistance));
+            BufferEnd -= (BufferPos - PaddingDistance);
+            BufferPos = PaddingDistance;   
+        }
+
+        // Read more data, if we have room and we can:
+        if (BufferEnd < SCAN_BUFFER_B && !IsEOF)
+        {
+            BytesRead = ReadBinary(Buffer + BufferEnd, sizeof(char), SCAN_BUFFER_SIZE - BufferEnd, File);
+            if (!BytesRead)
+            {
+                IsEOF = 1;
+            }
+            BufferEnd += BytesRead;
+	    
+        }
+
+        if (AnchorPos!=-1)
+        {
+            // If we're anchored: Attempt to extend the current match.
+            if (Buffer[BufferPos] >= 'A' && Buffer[BufferPos] <= 'Z')
+            {
+                NextNode = Node->Children[Buffer[BufferPos] - 'A'];
+            }
+            else
+            {
+                NextNode = NULL;
+            }
+            // If we can extend the current match...
+            if (NextNode)
+            {
+                // Note any new matches:
+                if (NextNode->FirstTag)
+                {
+		  //if(GlobalOptions->RunMode & RUN_MODE_BLINDTAG)
+		  //{
+		  //    ExtendTagMatchBlind(Info, NextNode, Buffer, BufferPos, BufferEnd, FilePos);
+		  //}
+		  //else
+		  //{
+                        GetMatches(Info, NextNode, Buffer, BufferPos, BufferEnd, FilePos);
+			//}
+                }
+                // Travel down the trie:
+                Node = NextNode;
+                BufferPos++;
+                FilePos++;
+            }
+            else
+            {
+                // We could NOT extend the match.
+                // We're done with this anchor.  Clear the anchor, and use our FailureNode to jump
+                // forward.  (AnchorPos moves forward by FailureLetterSkip chars, and the BufferPos
+                // moves to the correct distance ahead of the anchor)
+                if (IS_ROOT(Node->FailureNode))
+                {
+                    AnchorPos = -1;
+                }
+                else
+                {
+                    AnchorPos = AnchorPos + Node->FailureLetterSkip;
+                    OldPos = BufferPos;
+                    BufferPos = AnchorPos + Node->FailureNode->Depth - 1;
+                    FilePos += (BufferPos - OldPos);
+                    // Process matches immediately:
+                    if (Node->FailureNode->FirstTag)
+                    {
+		      //                        if (GlobalOptions->RunMode & RUN_MODE_BLINDTAG)
+		      //{
+		      //    ExtendTagMatchBlind(Info, NextNode, Buffer, BufferPos, BufferEnd, FilePos);
+		      //}
+		      //else
+		      //{
+                            GetMatches(Info, Node->FailureNode, Buffer, BufferPos, BufferEnd, FilePos);
+			    //}
+                    }
+                    BufferPos++;
+                    FilePos++;
+                }
+                Node = Node->FailureNode;
+            }
+        }
+        else
+        {
+            // We're not currently anchored.  Process end-of-record tags, or attempt to start a 
+            // brand new match.
+            if (BufferPos>=BufferEnd || Buffer[BufferPos] == RECORD_END || !Buffer[BufferPos])
+            {
+                // END of a protein.
+	      //                if (GlobalOptions->RunMode & RUN_MODE_BLINDTAG)
+	      //{
+                    //ProcessGeneHitsBlindTag(); // Process the blind tags a gene at a time.
+                    //FreeAllBlindTagMatches(FirstBlindTag); //free up the hits
+                    //FirstBlindTag = NULL; //reset the pointers
+                    //LastBlindTag = NULL;
+	      //}
+                Info->RecordNumber++;   
+                AnchorPos = -1;
+            }
+            else 
+            {
+                // Now: Start a new match, if possible:
+                if (Buffer[BufferPos] >= 'A' && Buffer[BufferPos] <= 'Z')
+                {
+                    NextNode = Node->Children[Buffer[BufferPos] - 'A'];
+                }
+                else
+                {
+                    NextNode = NULL;
+                }
+                if (NextNode)
+                {
+                    // Note any new matches.  (Not likely, because 
+                    // at this point in the code, we're only at depth 1 in the
+                    // tree; tags of length 1 aren't very good)
+                    if (NextNode->FirstTag)
+                    {
+		      //                        if (GlobalOptions->RunMode & RUN_MODE_BLINDTAG)
+		      //{
+		      //    ExtendTagMatchBlind(Info, NextNode, Buffer, BufferPos, BufferEnd, FilePos);
+		      //}
+		      //else
+		      //{
+                            GetMatches(Info, NextNode, Buffer, BufferPos, BufferEnd, FilePos);
+			    //}
+                    }
+                    Node = NextNode;
+                    AnchorPos = BufferPos;
+                }
+            }
+            BufferPos++;
+            FilePos++;
+            if (BufferPos >= BufferEnd)
+            {
+                break;
+            }
+        } // if not anchored
+    } // Master while-loop
+
+    SafeFree(Buffer);
+    
+    return Info->RecordNumber + 1;
+}
+
+
+// Print just the tags from our trie:
+void DebugPrintTrieTags(TrieNode* Node)
+{
+    TrieTagHanger* Hanger;
+    int ChildIndex;
+    TrieNode* Failure;
+    char TagBuffer[256];
+    int Len;
+    int ModIndex;
+    if (!Node)
+    {
+      return;
+    }
+    for (Hanger = Node->FirstTag; Hanger; Hanger = Hanger->Next)
+    {
+        ModIndex = 0;
+        TagBuffer[0] = Hanger->Tag->Tag[0];
+        TagBuffer[1] = '\0';
+        if (Hanger->Tag->ModType[ModIndex] && Hanger->Tag->AminoIndex[ModIndex] == 0)
+        {
+            strcat(TagBuffer, Hanger->Tag->ModType[ModIndex]->Name);
+            ModIndex++;
+        }
+        Len = strlen(TagBuffer);
+        TagBuffer[Len] = Hanger->Tag->Tag[1];
+        TagBuffer[Len+1] = '\0';
+        if (Hanger->Tag->ModType[ModIndex] && Hanger->Tag->AminoIndex[ModIndex] == 0)
+        {
+            strcat(TagBuffer, Hanger->Tag->ModType[ModIndex]->Name);
+            ModIndex++;
+        }
+        Len = strlen(TagBuffer);
+        TagBuffer[Len] = Hanger->Tag->Tag[2];
+        TagBuffer[Len+1] = '\0';
+
+        if (Hanger->Tag->ModType[ModIndex] && Hanger->Tag->AminoIndex[ModIndex] == 0)
+        {
+            strcat(TagBuffer, Hanger->Tag->ModType[ModIndex]->Name);
+            ModIndex++;
+        }
+
+	//ARI_MOD - for tags of length 4
+	Len = strlen(TagBuffer);
+	TagBuffer[Len] = Hanger->Tag->Tag[3];
+	TagBuffer[Len+1] = '\0';
+
+
+	if(Hanger->Tag->ModType[ModIndex] && Hanger->Tag->AminoIndex[ModIndex] == 0)
+	  {
+	    strcat(TagBuffer,Hanger->Tag->ModType[ModIndex]->Name);
+	    ModIndex++;
+	  }
+
+
+        printf("Tag '%s' (prefix %.2f, Suffix %.2f) %.2f hits %d\n", TagBuffer, 
+            Hanger->Tag->PrefixMass / (float)MASS_SCALE, 
+            Hanger->Tag->SuffixMass / (float)MASS_SCALE,
+            Hanger->Tag->Score, Hanger->Tag->CandidatesScored);
+#ifdef DEBUG_TAG_GENERATION
+        printf("%s\n", Hanger->Tag->TagScoreDetails);
+#endif
+    }
+    Failure = Node->FailureNode;
+    if (Node->FirstTag && Failure)
+    {
+        printf("  Node %s has failure node depth %d letter %c.\n", Node->FirstTag->Tag->Tag, Failure->Depth, Failure->Letter);
+    }
+
+    for (ChildIndex = 0; ChildIndex < 26; ChildIndex++)
+    {
+        if (ChildIndex == 'I'-'A' || ChildIndex == 'Q'-'A')
+        {
+            continue; 
+        }
+        if (Node->Children[ChildIndex])
+        {
+            DebugPrintTrieTags(Node->Children[ChildIndex]);
+        }
+    }
+}
+
+void FlagMandatoryModUsage(TrieNode* Node)
+{
+    TrieTagHanger* Hanger;
+    int CharIndex;
+    int ModIndex;
+    //
+    if (!Node)
+    {
+        return;
+    }
+    for (Hanger = Node->FirstTag; Hanger; Hanger = Hanger->Next)
+    {
+        for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+        {
+            if (Hanger->Tag->ModType[ModIndex]->Index == GlobalOptions->MandatoryModIndex)
+            {
+                Hanger->Tag->MandatoryModUsed = 1;
+            }
+        }
+    }
+
+    for (CharIndex = 0; CharIndex < TRIE_CHILD_COUNT; CharIndex++)
+    {
+        FlagMandatoryModUsage(Node->Children[CharIndex]);
+    }
+}
+
+// COPYPASTA from WriteMatchToString.
+void WriteTagToString(TrieTag* Tag, char* Buffer, int IncludeMods)
+{
+    char* Stuff;
+    int AminoPos;
+    char NameChar;
+    int ModIndex;
+    int NameIndex;
+    //
+    Stuff = Buffer;
+
+    for (AminoPos = 0; AminoPos < strlen(Tag->Tag); AminoPos++)
+    {
+        *Stuff++ = Tag->Tag[AminoPos];
+        if (IncludeMods)
+        {
+            for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+            {
+                if (Tag->AminoIndex[ModIndex] == AminoPos && Tag->ModType[ModIndex])
+                {
+                    // Write the modification:
+                    for (NameIndex = 0; NameIndex < 4; NameIndex++)
+                    {
+                        NameChar = Tag->ModType[ModIndex]->Name[NameIndex];
+                        if (!NameChar)
+                        {
+                            break;
+                        }
+                        *Stuff++ = ConvertToLower(NameChar);
+                    }
+                }
+            }
+        }
+    }
+    *Stuff = '\0';
+}
+
+// Write (to a char buffer) the string version of a peptide, including modifications.
+// For example: "EAM+16APK".  Similar to the method PeptideClass.GetModdedName
+void WriteMatchToString(Peptide* Match, char* Buffer, int IncludeMods)
+{
+    char* Stuff;
+    int AminoPos;
+    char NameChar;
+    int ModIndex;
+    int NameIndex;
+    //
+    Stuff = Buffer;
+    
+    if (Match->PrefixAmino)
+    {
+        *Stuff++ = Match->PrefixAmino;
+    }
+    else
+    {
+        *Stuff++ = '-';
+    }
+    *Stuff++ = '.';
+    for (AminoPos = 0; AminoPos < strlen(Match->Bases); AminoPos++)
+    {
+        *Stuff++ = Match->Bases[AminoPos];
+        if (IncludeMods)
+        {
+            for (ModIndex = 0; ModIndex < MAX_PT_MODS; ModIndex++)
+            {
+                if (Match->AminoIndex[ModIndex]==AminoPos)
+                {
+                    // Write the modification:
+                    for (NameIndex = 0; NameIndex < 4; NameIndex++)
+                    {
+                        NameChar = Match->ModType[ModIndex]->Name[NameIndex];
+                        if (!NameChar)
+                        {
+                            break;
+                        }
+                        *Stuff++ = ConvertToLower(NameChar);
+                    }
+                }
+            }
+        }
+    }
+    *Stuff++ = '.';
+    if (Match->SuffixAmino)
+    {
+        *Stuff++ = Match->SuffixAmino;
+    }
+    else
+    {
+        *Stuff++ = '-';
+    }
+    *Stuff = '\0';
+}
diff --git a/Trie.h b/Trie.h
new file mode 100644
index 0000000..b33ac7d
--- /dev/null
+++ b/Trie.h
@@ -0,0 +1,309 @@
+//Title:          Trie.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef TRIE_H
+#define TRIE_H
+
+// Implementation of the Aho-Corasic algorithm for string search using 
+// trie automaton.  Also, implementation of the d.p. tag extension algorithm
+// in presence of PTMs.
+
+#include "Inspect.h"
+#include <stdio.h>
+#include "Utils.h"
+#include "Spectrum.h"
+#include "Mods.h"
+
+// Tags, produced from mass-spec analysis.  A tag consists of sequence of bases
+// (e.g. "QVL"), a prefix mass, and a Suffix mass.  Tags are stored in the nodes
+// of a trie.  
+// In the simple case, leaf nodes in the trie have just one tag.  But
+// there may be multiple tags with the same bases but different prefix/Suffix
+// masses, these two tags will be children of the same trie node.  (We use 
+// the TrieTagHanger struct to hold lists of tags.  The second class adds some
+// overhead; one advantage is that tags can be in more than one list)
+typedef struct TrieTag
+{
+#ifdef DEBUG_TAG_GENERATION
+    char TagScoreDetails[2048];
+#endif
+    int PrefixMass;
+    int SuffixMass;
+    int Charge;
+    int ParentMass;
+    int TagLength;
+    int CandidatesScored;
+    // How far this tag's mass is from the ACTUAL mass of the peptide
+    float Error; 
+    // Rank of this tag (0 being best)
+    int Rank;
+    // Score of this tag (higher is better):
+    float Score;
+    // PTMods used up in generating this tag.  These count against our allowable total.
+    // AminoIndex is set to -1 for unused entries.
+    int AminoIndex[MAX_PT_MODS]; 
+    MassDelta* ModType[MAX_PT_MODS]; 
+    int ModsUsed;
+    int MandatoryModUsed; // if GlobalOptions->MandatoryModIndex is set.
+    // The tag itself:
+    char Tag[MAX_TAG_LENGTH + 1];
+    struct MSSpectrum* PSpectrum;
+    SpectrumTweak* Tweak;
+    int PrefixExtends;
+    int SuffixExtends;
+    int DBTagMatches;
+    // Some members for training edge skew measures:
+    int TotalSkew;
+    int TotalAbsSkew;
+    struct TagGraphNode* Nodes[4];
+} TrieTag;
+
+// A trie (from 'reTRIEval') is a tree where each node corresponds to a word.  The root 
+// corresponds to an empty string, and a node's children correspond to that node's word 
+// extended by one letter.  The trie data structure allows fast searches for any of the
+// words in the trie.  In this case, the 'words' are short peptides, and the database
+// is swiss-prot or some other collection of peptides.
+//
+// During the search, the ANCHOR is the start of our current match (if any).  
+typedef struct TrieNode
+{
+    struct TrieNode* Children[TRIE_CHILD_COUNT]; 
+    // Depth == length of our string.
+    // Root has depth 0, its children have depth 1...
+    int Depth; 
+
+    // The failure node is an optimization which makes trie searching fast.
+    // Suppose we just finished matching the tag PANTS.  Naively, we would move on
+    // to tags starting with A.  But if we have a node ANT, maybe we can jump there 
+    // directly.  If we have no nodes starting with A, we can jump to the N.
+    // The FailureNode 'precomputes our jumps' - we move the anchor FailureLetterSkip 
+    // letters forward from the old anchor, and switch to the given failure node.
+    // If FailureNode is equal to the trie root, then we CLEAR THE ANCHOR.  
+    int FailureLetterSkip;
+    struct TrieNode* FailureNode; 
+
+    // Our (most recently added) letter:
+    char Letter;
+    // Our list of tags:
+    struct TrieTagHanger* FirstTag;
+    struct TrieTagHanger* LastTag;
+} TrieNode;
+
+typedef struct PeptideSpliceNode
+{
+    int ChromosomeNumber;
+    int DonorPos;
+    int AcceptorPos;
+    struct PeptideSpliceNode* Next;
+} PeptideSpliceNode;
+
+// A Peptide struct represents the peptide we use to annotate a tandem mass spectrum - possibly 
+// with PTMs, and prefix and suffix residues.
+typedef struct Peptide
+{
+    int ParentMassError;
+    char Bases[256];
+
+    char PrefixAmino; // The base BEFORE the peptide starts.  (Useful for checking trypsin fragmentation)
+
+    char SuffixAmino; // The base AFTER the peptide starts.
+
+    // FilePos is the byte-offset in the database where the peptide starts.  If the peptide is found
+    // multiple times, FilePos the position within the file of the FIRST occurrence of the peptide.
+    int FilePos; 
+
+    // RecordNumber is the protein record # where the peptide is found
+    int RecordNumber; 
+
+    int InitialScore;
+
+    float MatchQualityScore;
+
+    // For the best match, DeltaCN is the difference in score between this match and its runner-up.  For
+    // other matches, DeltaCN is the difference in score between them and the best match (i.e. DeltaCN is 
+    // negative for them).  We compute DeltaCN because a larger DeltaCN value generally indicates 
+    // a better match.  
+    float DeltaCN;
+
+    // DeltaCNOther is the difference in score between this peptide and the best runner-up that's NOT 
+    // the same peptide.  "Same" means "file-pos at most 2 steps away, or sequence has at most two diffs".
+    float DeltaCNOther;
+    struct Peptide* FamilyLeader;
+    float FScore;
+    float PValue;
+
+    // We may "own" our own mass delta, in which case we must free it when we dealloc:
+    MassDelta* PetDelta; 
+
+    // Track the nth post-translational modification by setting AminoIndex[n] to the index of the
+    // modified amino acid, and ModType[n] to the modification type.  Set AminoIndex to -1 for
+    // the extra records.
+    int AminoIndex[MAX_PT_MODS]; 
+    MassDelta* ModType[MAX_PT_MODS];
+    struct PeptideMatch* First;
+    struct PeptideMatch* Last;
+    struct Peptide* Next;
+    struct Peptide* Prev;
+    int PrefixMass; // Used only if this is a tag
+    int SuffixMass; // Used only if this is a tag 
+
+    // DB is a pointer to the database which this match comes from.
+    DatabaseFile* DB;
+    PeptideSpliceNode* SpliceHead; // if splice-tolerant
+    int GenomicLocationStart; // if splice-tolerant
+    int GenomicLocationEnd; // if splice-tolerant
+    int ChromosomeNumber; // if splice-tolerant
+    int ChromosomeForwardFlag; // if splice-tolerant
+    char* SplicedBases; // if splice-tolerant
+    int ParentMass;
+    SpectrumTweak* Tweak;
+    float ScoreFeatures[16];
+    int SpecialFragmentation;
+    int SpecialModPosition;
+} Peptide;
+
+typedef struct PeptideMatch
+{
+    int FilePos;
+    int RecordNumber;
+    TrieTag* Tag; 
+    struct PeptideMatch* Next;
+} PeptideMatch;
+
+typedef float (*ScoringFunction)(MSSpectrum* Spectrum, Peptide* Match, int VerboseFlag);
+
+typedef struct SearchInfo
+{
+    DatabaseFile* DB;
+    int RecordNumber;
+    //ScoringFunction Scorer;
+    MSSpectrum* Spectrum;
+    TrieNode* Root;
+    int VerboseFlag;
+} SearchInfo;
+
+//container for information about the blind tag match.
+//These matches extend on only one side, which I called Anchored
+typedef struct BlindTagMatch
+{
+    TrieTag* Tag;
+    struct BlindTagMatch* Next;
+    struct BlindTagMatch* Prev;
+    //denotes the direction of the matched (modless) extension
+    int ExtendLR; // -1 for Left, 1 for Right
+    int ExtendDBLoc; // the location in the database where the extension matches
+    int TagDBLoc; // the location in the DB where the first letter of the tag matches.
+    int ExtendLength; //length of the anchored extension XXXTAG---- means an extend len of 3
+} BlindTagMatch;
+
+// PTModCount lists how many post-translational mods exist for each peptide:
+extern int PTModCount[TRIE_CHILD_COUNT];
+
+// PTMods lists the mass of each post-translational mod for each peptide:
+extern float PTMods[TRIE_CHILD_COUNT][MAX_PT_MODTYPE];
+
+// Table of prefix and suffix peptide masses
+extern int PeptideMass[256];
+extern int StandardPeptideMass[256];
+
+#define IS_ROOT(node) ((node)->Depth == 0)
+
+
+// For constructing lists of TrieTags.  (A single TrieTag can be part of more than
+// one list, by using more than one TrieTagHanger)
+typedef struct TrieTagHanger
+{
+    struct TrieTagHanger* Prev;
+    struct TrieTagHanger* Next;
+    TrieTag* Tag;
+} TrieTagHanger;
+
+// Constructor: TrieNode
+TrieNode* NewTrieNode();
+
+// Destructor: TrieNode
+void FreeTrieNode(TrieNode* This);
+
+// Constructor: TrieTag
+TrieTag* NewTrieTag();
+
+// Destructor: TrieTag
+void FreeTrieTag(TrieTag* This);
+
+// Add a new tag to the trie.  New trie nodes will be added, if necessary, in order
+// to hold the tag.  (For instance, adding "CAT" to a root node with no children would
+// add three nodes: C, CA, and CAT).
+TrieNode* AddTagToTrie(TrieNode* Root, TrieTag* Tag, int* DuplicateFlag);
+
+// Constructor: TrieTagHanger
+TrieTagHanger* NewTrieTagHanger();
+
+// Destructor: TrieTagHanger
+void FreeTrieTagHanger(TrieTagHanger* This);
+
+// Debug: Print a trie to stdout
+void DebugPrintTrie(TrieNode* Root);
+
+// Print all matches 
+void PrintMatches(MSSpectrum* Spectrum, char* IndexFileName);
+
+// Load the masses for amino acids: n-terminal (left) and c-terminal (right) masses
+int LoadPeptideMasses(char* FileName);
+
+// Initialize GlobalOptions, a global variable storing configurable options.
+void InitOptions();
+
+void InitStats();
+
+// Important main method: Use a trie to search a data-file.
+int ScanFileWithTrie(SearchInfo* Info);
+
+int GetMaxTagRank(TrieNode* Root);
+//int ComparePeptideScores(const Peptide* A, const Peptide* B);
+void PrintMatch(Peptide* Match, FILE* IndexFile);
+void DebugPrintTrieTags(TrieNode* Node);
+void InitializeTrieFailureNodes(TrieNode* Root, TrieNode* Node, char* Tag);
+void FreePeptideNode(Peptide* Pep);
+void GetProteinID(int RecordNumber, DatabaseFile* DB, char* Name);
+void FlagMandatoryModUsage(TrieNode* Node);
+void WriteMatchToString(Peptide* Match, char* Buffer, int IncludeMods);
+Peptide* StoreSpectralMatch(MSSpectrum* Spectrum, Peptide* Match, int PeptideLength, int MQScoreFlag);
+Peptide* NewPeptideNode();
+int CheckForPTAttachmentPoints(int DecorationMassIndex, char* Buffer, int Start, int End, int BufferDir);
+Peptide* AddNewMatch(SearchInfo* Info, int FilePos, TrieTag* Tag, char* Peptide, 
+                 int PeptideLength, int TagPosition, int PrefixDecoration, int SuffixDecoration, 
+                 int GenomicStart, int GenomicEnd);
+Peptide* GetPeptideFromAnnotation(char* Annotation);
+int GetPeptideParentMass(Peptide* Match);
+void WriteTagToString(TrieTag* Tag, char* Buffer, int IncludeMods);
+
+#endif //TRIE_H
diff --git a/TrieUtils.py b/TrieUtils.py
new file mode 100644
index 0000000..907e9a6
--- /dev/null
+++ b/TrieUtils.py
@@ -0,0 +1,256 @@
+#Title:          TrieUtils.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+import os
+import sys
+import struct
+
+TRIE_FAIL = -1
+TRIE_BLOCKSIZE = 4096
+MIN_TRIE_SEARCH_SIZE = 512
+
+def Union(List,NewElement):
+    for i in range(0,len(List)):
+        if List[i] == NewElement:
+            return List
+
+    List.append(NewElement)
+    return List
+
+def UnionList(List1,List2):
+    for i in range(0,len(List2)):
+        List1 = Union(List1,List2[i])
+    return List1
+
+class TrieUtils:
+    def __init__(self):
+        #print "New TrieUtils!!"
+        return
+
+    def GetProteinSequence(self,ProteinNamePrefix, TrieFileName, IndexFileName):
+        if not os.path.exists(TrieFileName):
+            print "ERROR: TrieUtils.GetProteinSequence: %s is not a valid file name"%TrieFileName
+            return None
+        if not os.path.exists(IndexFileName):
+            print "ERROR: TrieUtils.GetProteinSequence: %s is not a valid file name"%IndexFileName
+            return None
+
+        IndexFile = open(IndexFileName ,'r')
+        
+        BlockSize = struct.calcsize("<qi80s")
+        while (1):
+            Block = IndexFile.read(BlockSize)
+            if not Block:
+                IndexFile.close()
+                return None
+            
+            Info = struct.unpack("<qi80s",Block)
+            Name = str(Info[2])
+            NullPos = Name.find("\0")
+            if NullPos !=- 1:
+                Name = Name[:NullPos]
+            TriePos = Info[1]
+            if Name[0:len(ProteinNamePrefix)] == ProteinNamePrefix:
+                TrieFile = open(TrieFileName,'r')
+                TrieFile.seek(TriePos)
+                Seq = ""
+                while Seq.find("*") < 0:
+                    Seq += TrieFile.read(256)
+                TrieFile.close()
+                IndexFile.close()
+                return Seq[0:Seq.find("*")]
+
+        IndexFile.close()
+        return None
+            
+
+    def GetProteinName(self,IndexFileName,ProteinID):
+        if not os.path.exists(IndexFileName):
+            print "ERROR: TrieUtils.GetProteinSequence: %s is not a valid file name"%IndexFileName
+            return None
+
+        IndexFile = open(IndexFileName ,'r')
+    
+        BlockSize = struct.calcsize("<qi80s")
+        IndexFile.seek(BlockSize*ProteinID)
+    
+        Block = IndexFile.read(BlockSize)
+        if not Block:
+            IndexFile.close()
+            return None
+    
+        Info = struct.unpack("<qi80s",Block)
+        Name = Info[2]
+        NullPos = Name.find("\0")
+        if NullPos !=- 1:
+            Name = Name[:NullPos]
+        TriePos = Info[1]
+        
+        return Name.strip()
+
+
+
+    def GetAllLocations(self,Peptides,TrieFileName):
+
+        (Transitions,Output,Failure) = self.BuildTrie(Peptides)
+        LocalDebug = 0
+        Locations = {}
+        for P in Peptides:
+            Locations[P] = []
+
+        TrieFile = open(TrieFileName,'r')
+        State = 0
+        ProteinID = 0
+        ResidueNum = 0
+        BlockCount = 0
+        TrieFile.seek(0,2)
+        FileBlocks = TrieFile.tell()/TRIE_BLOCKSIZE
+        TrieFile.seek(0)
+        pos = 0
+        while(1):
+            
+            TrieLine = TrieFile.read(TRIE_BLOCKSIZE)
+            BlockCount += 1
+            #print TrieLine
+            if not TrieLine:
+                print "Done with this file"
+                break
+            for i in range(0,len(TrieLine)):
+                if LocalDebug:
+                    print "[%s] - %s"%(i,TrieLine[i])
+                    print "%s:%s"%(ProteinID,ResidueNum)
+                if TrieLine[i] == '*':
+                    ResidueNum = 0
+                    ProteinID += 1
+                    State = 0
+                    if LocalDebug:
+                        print "Encountered a *, resetting"
+                        #raw_input()
+                    continue
+                while Transitions.get((State,TrieLine[i]),TRIE_FAIL) == TRIE_FAIL:
+                    if LocalDebug:
+                        print "Transition[%s,%s]->%s"%(State,TrieLine[i],TRIE_FAIL)
+                        print "FailState[%s]->%s"%(State,Failure[State])
+                    
+                    if(State == Failure[State]):
+                        print "Transition[%s,%s]->%s"%(State,TrieLine[i],TRIE_FAIL)
+                        print "FailState[%s]->%s"%(State,Failure[State])
+                        raw_input()
+                    State = Failure[State]
+                    
+                if LocalDebug:
+                    print "Transition[%s,%s]->%s"%(State,TrieLine[i],Transitions[(State,TrieLine[i])])
+                    #raw_input()
+                State = Transitions[(State,TrieLine[i])]
+                if Output.has_key(State):
+                    for Pep in Output[State]:
+                        if LocalDebug:
+                            print "*****%s - %s:%s"%(Pep,ProteinID,ResidueNum)
+                            raw_input()
+                        Locations[Pep].append((ProteinID,ResidueNum))
+                ResidueNum += 1
+            print pos
+            pos += len(TrieLine)
+            if len(TrieLine) < TRIE_BLOCKSIZE:
+                break
+            if LocalDebug:
+                print "Done with block!!"
+                #raw_input()
+        if LocalDebug:
+            print "Done!"
+            raw_input()
+        print "Finished!!!"
+        return Locations
+
+
+    def BuildTrie(self,Peptides):
+
+        #Build Transition and Output Functions
+        Transitions = {}
+        Output = {}
+        NewState = 0
+        #Str = ""
+        for Pep in Peptides:
+            State = 0
+            J = 0
+            #print Pep
+            #Str += Pep
+            while(J < len(Pep) and Transitions.get((State,Pep[J]),TRIE_FAIL) != TRIE_FAIL):
+                State = Transitions[(State,Pep[J])]
+                J += 1
+            for P in range(J,len(Pep)):
+                NewState += 1
+                Transitions[(State,Pep[P])] = NewState
+                State = NewState
+        
+        
+            List = Output.get(State,[])
+            List.append(Pep)
+            Output[State] = List
+        #print Str
+        #raw_input()
+        #Create a self loop at node 0, back to node 0
+        for AA in "ABCDEFGHIKLMNOPQRSTUVWXYZ":
+            S = Transitions.get((0,AA),TRIE_FAIL)
+            if S == TRIE_FAIL:
+                Transitions[(0,AA)] = 0
+    
+        #Create Failure Function
+        Queue = []
+        Failure = {}
+        for AA in "ABCDEFGHIKLMNOPQRSTUVWXYZ":
+            S = Transitions.get((0,AA),TRIE_FAIL)
+            if S != 0:
+                Queue = Union(Queue,S)
+                Failure[S] = 0
+        while len(Queue) > 0:
+            R = Queue.pop(0)
+            for AA in "ABCDEFGHIKLMNOPQRSTUVWXYZ":
+                S = Transitions.get((R,AA),TRIE_FAIL)
+                if S != TRIE_FAIL:
+                    Queue = Union(Queue,S)
+                    State = Failure[R]
+                    while(Transitions.get((State,AA),TRIE_FAIL) == TRIE_FAIL):
+                        State = Failure[State]
+                    Failure[S] = Transitions[(State,AA)]
+                    Output[S] = UnionList(Output.get(S,[]),Output.get(Failure[S],[]))
+
+        Failure[0] = 0
+        return (Transitions,Output,Failure)
+        
+                
+
+
+
+if __name__=="__main__":
+    print "TrieUtils.py"
diff --git a/Utils.c b/Utils.c
new file mode 100644
index 0000000..c7a8f70
--- /dev/null
+++ b/Utils.c
@@ -0,0 +1,683 @@
+//Title:          Utils.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#include "CMemLeak.h"
+#include "Utils.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdarg.h>
+
+
+
+// From high to low
+int CompareFloats(const float* a, const float* b)
+{
+    if (*a > *b)
+    {
+        return -1;
+    }
+    if (*a < *b)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+// From high to low
+int CompareInts(const int* a, const int* b)
+{
+    if (*a > *b)
+    {
+        return -1;
+    }
+    if (*a < *b)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+
+// Copy one line (up to a \r or \n character) from a source buffer to a target buffer.
+// Optionally, strip out spaces.  Return the position just AFTER the end of the line.
+// (If a line ends in \r\n, we'll end up processing the line, and then one empty line; that's okay)
+// If a line is very long, we stop copying, and skip over the rest of it.
+int CopyBufferLine(char* Source, int BufferPos, int BufferEnd, char* LineBuffer, int StripWhitespace)
+{
+    int LinePos = 0;
+    int LineComplete = 0;
+    int Chars = 0;
+    int Skipping = 0;
+    //
+    while (!LineComplete)
+    {
+        if (BufferPos > BufferEnd)
+        {
+            // Our line extends off the edge of the buffer.  That's probably a Bad Thing.
+            printf("** Warning: Ran off the edge of the buffer in CopyBufferLine.  Line too ling?\n");
+            LineBuffer[LinePos] = '\0';
+            return BufferPos;
+        }
+        switch (Source[BufferPos])
+        {
+        case ' ':
+            if (StripWhitespace)
+            {
+                BufferPos++;
+            }
+            else
+            {
+                if (!Skipping)
+                {
+                    LineBuffer[LinePos++] = Source[BufferPos];
+                }
+                BufferPos++;
+                Chars++;
+            }
+            break;
+        case '\r':
+        case '\n':
+            LineBuffer[LinePos] = '\0';
+            BufferPos++;
+            LineComplete = 1;
+            break;
+        case '\0':
+            LineBuffer[LinePos] = '\0';
+            LineComplete = 1;
+            break;
+        default:
+            if (!Skipping)
+            {
+                LineBuffer[LinePos++] = Source[BufferPos];
+            }
+            BufferPos++;
+            Chars++;
+            break;
+        }
+        if (Chars == MAX_LINE_LENGTH - 1)
+        {
+            printf("** Error: Line too long!  Truncating line.");
+            // Read the rest of the chars, but don't write them:
+            Chars = 0;
+            Skipping = 1;
+        }
+    }
+    return BufferPos;
+}
+
+void ParseFileByLines(FILE* File, FileLineParser LineParser, void* ParseData, int ProcessCommentLines)
+{
+    char LineBuffer[MAX_LINE_LENGTH];
+    char TextBuffer[BUFFER_SIZE * 2];
+    int LineNumber = 0;
+    int FilePos;
+    int NewFilePos = 0;
+    int BytesToRead;
+    int BufferEnd = 0;
+    int BytesRead;
+    int BufferPos = 0;
+    int KeepParsingFlag;
+    //
+    if (!File)
+    {
+        return;
+    }
+    NewFilePos = ftell(File);
+    while (1)
+    {
+        FilePos = NewFilePos;
+        BytesToRead = BUFFER_SIZE - BufferEnd;
+        BytesRead = ReadBinary(TextBuffer + BufferEnd, sizeof(char), BytesToRead, File);
+        BufferEnd += BytesRead;
+        TextBuffer[BufferEnd] = '\0';
+        if (BufferPos == BufferEnd)
+        { 
+            // We're done!
+            break;
+        }
+        BufferPos = CopyBufferLine(TextBuffer, BufferPos, BufferEnd, LineBuffer, 0);
+        if (!BufferPos)
+        {
+            // We encountered a null character.  Force advance:
+            BufferPos++;
+        }
+        NewFilePos = FilePos + BufferPos;
+        
+        LineNumber += 1;
+        // Now, move the remaining text to the start of the buffer:
+        memmove(TextBuffer, TextBuffer + BufferPos, BufferEnd - BufferPos);
+        BufferEnd -= BufferPos;
+        BufferPos = 0;
+        // Now, process this line of text!
+        // Skip empty lines:
+        if (!LineBuffer[0])
+        {
+            continue;
+        }
+        // Skip comment lines:
+        if (LineBuffer[0] == '#' && !ProcessCommentLines)
+        {
+            continue;
+        }
+        KeepParsingFlag = LineParser(LineNumber, FilePos, LineBuffer, ParseData);
+        if (!KeepParsingFlag)
+        {
+            break;
+        }
+    }
+}
+
+#define FORCE_UPPER(X) X = ((X) >= 'A' && (X) <= 'Z' ? (X) + 'a' - 'A' : (X));
+
+// Case-insensitive string comparison.  Returns -1 if A<B, 1 if A>B, 0 if same.
+int CompareStrings(const char* StringA, const char* StringB)
+{
+    const char* CharA;
+    const char* CharB;
+    char A;
+    char B;
+    CharA = StringA;
+    CharB = StringB;
+    while (1)
+    {
+        if (!*CharA && !*CharB)
+        {
+            return 0;
+        }
+        A = *CharA;
+        B = *CharB;
+        FORCE_UPPER(A);
+        FORCE_UPPER(B);
+        //if (isupper(A)) A = ConvertToLower(A);
+        //if (isupper(B)) B = ConvertToLower(B);
+        if (A < B)
+        {
+            return 1;
+        }
+        if (A > B)
+        {
+            return -1;
+        }
+        CharA++;
+        CharB++;
+    }
+}
+
+#ifdef __ppc__
+// Reads a little endian binary file for a big endian system
+size_t ReadBinary(void* Buffer, size_t ItemSize, size_t ItemCount, FILE* File) 
+{
+    size_t ItemIndex;
+    size_t ByteIndex;
+    unsigned char SwapValue;
+    char* CharBuffer;
+    int BytesRead;
+
+    BytesRead = fread(Buffer, size, MemberCount, File); // raw fread
+
+    CharBuffer = (char*)Buffer;
+
+    for (ItemIndex = 0; ItemIndex < ItemCount; ItemIndex++) 
+    {
+        for (ByteIndex = 0; ByteIndex < ItemSize >> 1; ByteIndex++)
+        {
+            // Swap the first and last bytes, then bytes 1 and max - 1, etc.
+            SwapValue = CharBuffer[size * ItemSize + ByteIndex];
+            CharBuffer[size * ItemIndex + ByteIndex] = CharBuffer[ItemSize * ItemIndex + ItemSize - ByteIndex - 1];
+            CharBuffer[size * ItemIndex + ItemSize - ByteIndex - 1] = SwapValue;
+        }
+    }
+    return BytesRead;
+}
+
+// We're on a big-endian system, and we must write out a little-endian file.
+size_t WriteBinary(void* Buffer, size_t ItemSize, size_t ItemCount, FILE* File)
+{
+    char ItemBuffer[256];
+    int ItemIndex;
+    int ByteIndex;
+    char* CharBuffer = (char*)Buffer;
+    //
+    // Write a byte-swapped version of each item to ItemBuffer, then write ItemBuffer to disk.
+    for (ItemIndex = 0; ItemIndex < ItemCount; ItemIndex++) 
+    {
+        for (ByteIndex = 0; ByteIndex < ItemSize; ByteIndex++)
+        {
+            ItemBuffer[ItemSize - ByteIndex - 1] = CharBuffer[ItemSize * ItemIndex + ByteIndex]
+            fwrite(ItemBuffer, ItemSize, 1, File); // raw fwrite
+        }
+    }
+}
+
+#else
+#define ReadBinary fread
+#define WriteBinary fwrite
+#endif
+
+char TranslateCodon(char* DNA)
+{
+    switch (DNA[0])
+    {
+    case 'T':
+    case 't':
+        switch (DNA[1])
+        {
+        case 'T':
+        case 't':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'F';
+            case 'C':
+            case 'c':
+                return 'F';
+            case 'A':
+            case 'a':
+                return 'L';
+            case 'G':
+            case 'g':
+                return 'L';
+            }
+            break;
+        case 'C':
+        case 'c':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'S';
+            case 'C':
+            case 'c':
+                return 'S';
+            case 'A':
+            case 'a':
+                return 'S';
+            case 'G':
+            case 'g':
+                return 'S';
+            }
+            break;
+        case 'A':
+        case 'a':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'Y';
+            case 'C':
+            case 'c':
+                return 'Y';
+            case 'A':
+            case 'a':
+                return 'X';
+            case 'G':
+            case 'g':
+                return 'X';
+            }
+            break;
+        case 'G':
+        case 'g':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'C';
+            case 'C':
+            case 'c':
+                return 'C';
+            case 'A':
+            case 'a':
+                return 'X';
+            case 'G':
+            case 'g':
+                return 'W';
+            }
+            break;
+        }
+        break;
+    case 'C':
+    case 'c':
+        switch (DNA[1])
+        {
+        case 'T':
+        case 't':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'L';
+            case 'C':
+            case 'c':
+                return 'L';
+            case 'A':
+            case 'a':
+                return 'L';
+            case 'G':
+            case 'g':
+                return 'L';
+            }
+            break;
+        case 'C':
+        case 'c':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'P';
+            case 'C':
+            case 'c':
+                return 'P';
+            case 'A':
+            case 'a':
+                return 'P';
+            case 'G':
+            case 'g':
+                return 'P';
+            }
+            break;
+        case 'A':
+        case 'a':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'H';
+            case 'C':
+            case 'c':
+                return 'H';
+            case 'A':
+            case 'a':
+                return 'Q';
+            case 'G':
+            case 'g':
+                return 'Q';
+            }
+            break;
+        case 'G':
+        case 'g':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'R';
+            case 'C':
+            case 'c':
+                return 'R';
+            case 'A':
+            case 'a':
+                return 'R';
+            case 'G':
+            case 'g':
+                return 'R';
+            }
+            break;
+        }
+        break;
+    case 'A':
+    case 'a':
+        switch (DNA[1])
+        {
+        case 'T':
+        case 't':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'I';
+            case 'C':
+            case 'c':
+                return 'I';
+            case 'A':
+            case 'a':
+                return 'I';
+            case 'G':
+            case 'g':
+                return 'M';
+            }
+            break;
+        case 'C':
+        case 'c':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'T';
+            case 'C':
+            case 'c':
+                return 'T';
+            case 'A':
+            case 'a':
+                return 'T';
+            case 'G':
+            case 'g':
+                return 'T';
+            }
+            break;
+        case 'A':
+        case 'a':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'N';
+            case 'C':
+            case 'c':
+                return 'N';
+            case 'A':
+            case 'a':
+                return 'K';
+            case 'G':
+            case 'g':
+                return 'K';
+            }
+            break;
+        case 'G':
+        case 'g':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'S';
+            case 'C':
+            case 'c':
+                return 'S';
+            case 'A':
+            case 'a':
+                return 'R';
+            case 'G':
+            case 'g':
+                return 'R';
+            }
+            break;
+        }
+        break;
+    case 'G':
+    case 'g':
+        switch (DNA[1])
+        {
+        case 'T':
+        case 't':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'V';
+            case 'C':
+            case 'c':
+                return 'V';
+            case 'A':
+            case 'a':
+                return 'V';
+            case 'G':
+            case 'g':
+                return 'V';
+            }
+            break;
+        case 'C':
+        case 'c':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'A';
+            case 'C':
+            case 'c':
+                return 'A';
+            case 'A':
+            case 'a':
+                return 'A';
+            case 'G':
+            case 'g':
+                return 'A';
+            }
+            break;
+        case 'A':
+        case 'a':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'D';
+            case 'C':
+            case 'c':
+                return 'D';
+            case 'A':
+            case 'a':
+                return 'E';
+            case 'G':
+            case 'g':
+                return 'E';
+            }
+            break;
+        case 'G':
+        case 'g':
+            switch (DNA[2])
+            {
+            case 'T':
+            case 't':
+                return 'G';
+            case 'C':
+            case 'c':
+                return 'G';
+            case 'A':
+            case 'a':
+                return 'G';
+            case 'G':
+            case 'g':
+                return 'G';
+            }
+            break;
+        }
+        break;
+    }
+    return 'X';
+}
+
+void WriteReverseComplement(char* Source, char* Destination)
+{
+    char* A;
+    char* B;
+    A = Source;
+    while (*A)
+    {
+        A++;
+    }
+    A--;
+    B = Destination;
+    while (A >= Source)
+    {
+        switch (*A)
+        {
+        case 'C':
+        case 'c':
+            *B = 'G';
+            break;
+        case 'G':
+        case 'g':
+            *B = 'C';
+            break;
+        case 'A':
+        case 'a':
+            *B = 'T';
+            break;
+        case 'T':
+        case 't':
+            *B = 'A';
+            break;
+        }
+        A--;
+        B++;
+    }
+}
+
+// Reverse a null-terminated string in place:
+void ReverseString(char* String)
+{
+    char* A;
+    char* Z;
+    char Temp;
+    int Len;
+    if (!String)
+    {
+        return;
+    }
+    Len = strlen(String);
+    if (!Len)
+    {
+        return;
+    }
+    Z = String + Len - 1;
+    A = String;
+    while (A < Z)
+    {
+        Temp = *Z;
+        *Z = *A;
+        *A = Temp;
+        A++;
+        Z--;
+    }
+}
+
+float GetMedian(float* Values, int ValueCount)
+{
+    qsort(Values, ValueCount, sizeof(float), (QSortCompare)CompareFloats);
+    if (ValueCount % 2)
+    {
+        return Values[ValueCount / 2];
+    }
+    else
+    {
+        return (Values[ValueCount / 2] + Values[(ValueCount / 2) - 1]) / (float)2.0;
+    }
+}
diff --git a/Utils.h b/Utils.h
new file mode 100644
index 0000000..9ea29a6
--- /dev/null
+++ b/Utils.h
@@ -0,0 +1,345 @@
+//Title:          Utils.h
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+#ifndef UTILS_H
+#define UTILS_H
+
+#include <memory.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifndef _WIN32
+// linux lacks these:
+#define max(X,Y) (((X)>(Y)) ? (X) : (Y))
+#define min(X,Y) (((X)>(Y)) ? (Y) : (X))
+#endif
+
+#ifdef _WIN32
+#define SEPARATOR '\\'
+#define SEPARATOR_STRING "\\"
+#else
+#define SEPARATOR '/'
+#define SEPARATOR_STRING "/"
+#endif
+
+// We don't like compiler warnings.  Therefore, we cast all our 
+// qsort comparison callbacks, in order to avoid this:
+// Warning: "passing arg 4 of `qsort' from incompatible pointer type"
+typedef int (*QSortCompare)(const void*, const void*);
+
+// For tokenizing strings:
+#define WHITESPACE " \r\n\t"
+
+// It seems that tolower is not defined on OSX, so we define our own:
+#define ConvertToLower(c) (((c)<'A' || (c)>'Z') ? (c) : ((c)-'A' + 'a'))
+
+#define DIGEST_TYPE_UNKNOWN 0
+#define DIGEST_TYPE_TRYPSIN 1
+
+#define MIN_VALID_PEPTIDE_LENGTH 6
+
+// For debugging tag generation.  (Comment it out to disable)
+//#define DEBUG_TAG_GENERATION
+
+// PaH!  sizeof() is returning 4 for sizeof(long long), which is INCORRECT.
+#define LONG_LONG_SIZE 8
+
+// There are many places where we index into an array by an amino acid's index (A == 0, C == 2, and so on).
+// These arrays waste a little space because there is no entry for B, J, O, U, X, Z.  But they are
+// very time-efficient.
+#define AMINO_ACIDS 26
+
+#define SCORE_TOP_MATCH_VERBOSELY 1
+#define BUFFER_SIZE 1024
+#define MAX_FILENAME_LEN 1024
+#define MAX_MATCHES 10
+
+// We divide the full m/z range into PRM BINS.  The width of each PRM bin
+// is 0.1Da, or BIN_SIZE / DALTON.  Masses are stored as ints, where one dalton equals a mass 
+// of 1000.  Mass bins are 0.1Da (10 units) in width.  There
+// are still some places where bin size is hard-coded.
+#define PRM_BIN_SIZE 100
+
+// Number of padding entries added to the end of the PRMScores array.  Useful if we want the score for a PRM that's
+// outside our mass range, but just barely.
+#define PRM_ARRAY_SLACK 10
+
+// Maximum allowed length for a line in line-based data files.  (If the line is longer
+// than this, we report an error)
+#define MAX_LINE_LENGTH 2048
+
+#define TAG_EDGE_SCORE_MULTIPLIER 20
+
+#define FAST_ROUND(Float, Int)\
+{\
+    Int = (int)((Float) + 0.5);\
+}
+
+#define ROUND_MASS(FloatMass, IntMass)\
+{\
+    (IntMass) = (int)(FloatMass * 1000 + 0.5);\
+}
+
+#define ROUND_MASS_TO_DELTA_BIN(x, bin) \
+{\
+(bin) = (int)(((x) + 200000) / 1000.0 + 0.5);\
+}
+
+#define MAX_FILENAME_LEN 1024
+
+
+// Instrument type LTQ is the default.  
+#define INSTRUMENT_TYPE_LTQ 0
+// QTOF: Fragmentation properties are different (b series is weaker, y series is stronger).
+// Parent masses are quite accurate, so parent mass correction is NOT performed
+#define INSTRUMENT_TYPE_QTOF 1
+// FT hybrid: Parent masses are extremely accurate, so parent mass correction is NOT performed.
+// The fragment masses can still be a bit inaccurate however.
+#define INSTRUMENT_TYPE_FT_HYBRID 2
+
+#define RUN_MODE_DEFAULT 0
+#define RUN_MODE_TAGS_ONLY 1
+#define RUN_MODE_MUTATION 2
+#define RUN_MODE_BLIND 4
+#define RUN_MODE_BLIND_TAG 8
+#define RUN_MODE_PMC_ONLY 16
+#define RUN_MODE_TAG_MUTATION 64
+#define RUN_MODE_PREP_MS2DB 32
+#define RUN_MODE_RAW_OUTPUT 128
+
+
+#define PMC_FEATURE_RAW 1
+#define PMC_FEATURE_AVG_RATIO 2
+#define PMC_FEATURE_AVG_DIFF 4
+
+typedef enum DatabaseType
+{
+    evDBTypeTrie = 0,
+    evDBTypeMS2DB,
+    evDBTypeSpliceDB
+} DatabaseType;
+
+typedef struct DatabaseFile
+{
+    char FileName[MAX_FILENAME_LEN + 1];
+    char IndexFileName[MAX_FILENAME_LEN + 1];
+    int Type;
+    struct DatabaseFile* Next;
+    FILE* DBFile;
+    FILE* IndexFile;
+} DatabaseFile;
+
+typedef struct StringNode
+{
+    struct StringNode* Next;
+    char* String;
+} StringNode;
+
+
+
+// Global options.  (Set on command-line or in config-file)
+typedef struct Options
+{
+    // RunMode is a set of flags describing which overall code path to take.
+    int RunMode; 
+
+    // maximum number of post-translational mods to allow in a match
+    int MaxPTMods; 
+
+    // maximum allowed mass error for prefix/suffix peptides
+    int Epsilon; 
+    int PeakPPM;
+
+    // maximum allowed mass error for prefix/suffix masses
+    int FlankingMassEpsilon;
+
+    // return at most this many matches in a search
+    int MaxMatches; 
+
+    // -v provides extended debugging info
+    int VerboseFlag; 
+
+    // amino acid input-file
+    char AminoFileName[MAX_FILENAME_LEN];
+
+    // -o output file (if not set, print matches to stdout)
+    char FinalOutputFileName[MAX_FILENAME_LEN]; 
+    char OutputFileName[MAX_FILENAME_LEN];  // Intermediate output, before p-value computation
+    char ErrorFileName[MAX_FILENAME_LEN];
+    int ErrorCount;
+    int WarningCount;
+    DatabaseFile* FirstDatabase;
+    DatabaseFile* LastDatabase;
+
+    // -m file listing legal post-translational modifications
+    char PTModFileName[MAX_FILENAME_LEN]; 
+
+    // -i input file name
+    char InputFileName[MAX_FILENAME_LEN];
+    char ResourceDir[MAX_FILENAME_LEN];
+
+    // either stdout, or opened OutputFileName)
+    FILE* OutputFile; 
+    // either stderr, or opened ErrorFileName:
+    FILE* ErrorFile;
+
+    // -t requests unit tests
+    int TestingFlag; 
+
+    // if true, we remember *all* the occurrences of matched peptides.
+    int ReportAllMatches; 
+
+    // How far we're allowed to tweak the parent mass of the spectrum.  (Parent masses are often off
+    // by one or two amu)
+    int ParentMassEpsilon;
+    int ParentMassPPM;
+
+    struct Peptide* TruePeptide;
+
+    char MandatoryModName[256];
+
+    int MandatoryModIndex;
+
+    // How many matches to report.  Defaults to 5.
+    int ReportMatchCount;
+
+    // How many matches to store for detailed scoring.  Defaults to 100.
+    int StoreMatchCount;
+
+    // How many tags shall we generate, and how long shall they be?
+    int GenerateTagCount;
+    int GenerateTagLength;
+
+    // Nonzero if this is this a trypsin digest, or some other type of specific digest.
+    // If DigestType != 0, then we can give a penalty for missed cleavages, and a bonus for matching termini
+    int DigestType; 
+
+    // Linked list of SpectrumNodes:
+    struct SpectrumNode* FirstSpectrum;
+    struct SpectrumNode* LastSpectrum;
+
+    // Linked list of InputFiles:
+    struct InputFileNode* FirstFile;
+    struct InputFileNode* LastFile;
+
+    int SpectrumCount;
+    int DynamicRangeMin;
+    int DynamicRangeMax;
+    int TaglessSearchFlag;
+
+    // If PhosphorylationFlag, then attempt to interpret phosphorylated peptides.  This has implications
+    // for tag-generation, as well as candidate scoring.
+    int PhosphorylationFlag;
+
+    int TagPTMMode; // 0 is free, 1 is forbidden, and 2 is penalized
+
+    int MultiChargeMode; // if 1, try multiple parent charge states.
+
+    int TrieBlockSize;
+    int InstrumentType;
+    // Options for unrestrictive PTM search:
+    // DeltaBinCount is the number of mass bins in the range [MinPTMDelta, MaxPTMDelta], 
+    // by default it equals 400 * 10 = 4000. 
+    int MinPTMDelta;
+    int MaxPTMDelta;
+    int DeltaBinCount;
+    int DeltasPerAA; // == max(DeltaBinCount*2, 512)
+    // If TRUE, then use PepNovo for tag generation (assumed to live in working directory!)
+    int ExternalTagger; 
+
+    // Options for producing an .ms2db file from .gff files:
+    StringNode* FirstGFFFileName;
+    StringNode* LastGFFFileName;
+    char GenomeFileName[MAX_FILENAME_LEN + 1];
+    char ChromosomeName[256 + 1];
+
+    // If XMLStrictFlag is set, then we'll complain about any unexpected
+    // tags or attributes.  This is useful when debugging .ms2db file
+    // generation.  In production, this flag won't generally be set, 
+    // because it is officially Allowable to add new tags and 
+    // attributes to an .ms2db file.
+    int XMLStrictFlag;
+
+    // if RequireTermini is 1 or 2, then we accept only semi-tryptic or fully-tryptic matches.
+    int RequireTermini;
+
+    int NewScoring; //temporary flag while we work on a new code path for scoring
+
+
+  float MinLogOddsForMutation; //MinimumLogOddsForAMutation
+} Options;
+
+extern Options* GlobalOptions;
+
+int CopyBufferLine(char* Source, int BufferPos, int BufferEnd, char* LineBuffer, int StripWhitespace);
+int CompareFloats(const float* a, const float* b);
+int CompareInts(const int* a, const int* b);
+int CompareStrings(const char* StringA, const char* StringB);
+char TranslateCodon(char* DNA);
+void WriteReverseComplement(char* Source, char* Destination);
+void ReverseString(char* String);
+
+#ifdef __ppc__
+size_t ReadBinary(void* Buffer, size_t ItemSize, size_t ItemCount, FILE* stream);
+size_t WriteBinary(void* Buffer, size_t ItemSize, size_t ItemCount, FILE* stream);
+#define BYTEORDER_BIG_ENDIAN
+#else
+#define ReadBinary fread
+#define WriteBinary fwrite
+#define BYTEORDER_LITTLE_ENDIAN
+#endif
+
+void AssertionFailed(char* Assertion, char* FileName, int LineNumber);
+
+#define INSPECT_ASSERT(expr) \
+    if (!(expr)) \
+    AssertionFailed(#expr, __FILE__, __LINE__)
+
+#define SafeFree(Pointer)\
+    if (Pointer) \
+    {\
+    free(Pointer);\
+    }
+
+// a FileLineParser is called once per line as a callback from ParseFileByLines()
+typedef int (*FileLineParser)(int FilePos, int LineNumber, char* LineBuffer, void* ParseData);
+
+void ParseFileByLines(FILE* File, FileLineParser Parser, void* ParseData, int ProcessCommentLines);
+float GetMedian(float* Values, int ValueCount);
+
+
+//#define PMC_USE_SVM
+#define MQSCORE_USE_SVM
+
+#define MQ_FEATURE_COUNT 7
+
+#endif //UTILS_H
diff --git a/Utils.py b/Utils.py
new file mode 100644
index 0000000..2c8351d
--- /dev/null
+++ b/Utils.py
@@ -0,0 +1,1074 @@
+#Title:          Utils.py
+#Author:         Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+#Created:        2005
+# Copyright 2007,2008,2009 The Regents of the University of California
+# All Rights Reserved
+#
+# Permission to use, copy, modify and distribute any part of this
+# program for educational, research and non-profit purposes, by non-profit
+# institutions only, without fee, and without a written agreement is hereby
+# granted, provided that the above copyright notice, this paragraph and
+# the following three paragraphs appear in all copies.
+#
+# Those desiring to incorporate this work into commercial
+# products or use for commercial purposes should contact the Technology
+# Transfer & Intellectual Property Services, University of California,
+# San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+# Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+#
+# IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+# FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+# INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+# IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+# OF SUCH DAMAGE.
+#
+# THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+# OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+# ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+# REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+# EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+# THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+"""
+Various support functions and constants to support ms/ms algorithms.
+Amino acid mass importer, ion type list generation.
+"""
+import Global
+import os
+import sys
+import string
+import types
+
+if hasattr(os, "sysconf"):
+    IS_WINDOWS = 0
+else:
+    IS_WINDOWS = 1
+
+
+# IDs for all chromosome files.  Usually ID == chromosome number, but we also
+# give numbers to X, Y, and all of the unlocalized ("random" as in "random access") sequences
+ChromosomeMap = {"chr1":1, "chr2":2, "chr3":3, "chr4":4,
+                 "chr5":5, "chr6":6, "chr7":7, "chr8":8,
+                 "chr9":9, "chr10":10, "chr11":11, "chr12":12,
+                 "chr13":13, "chr14":14, "chr15":15, "chr16":16,
+                 "chr17":17, "chr18":18, "chr19":19, "chr20":20,
+                 "chr21":21, "chr22":22, "chrX":23, "chrY":24,
+                 "chrM":25, "chr1_random":26, "chr2_random":27, "chr3_random":28,
+                 "chr4_random":29, "chr5_random":30, "chr6_random":31, "chr7_random":32,
+                 "chr8_random":33, "chr9_random":34, "chr10_random":35, "chr11_random":36,
+                 "chr12_random":37, "chr13_random":38, "chr14_random":39, "chr15_random":40,
+                 "chr16_random":41, "chr17_random":42, "chr18_random":43, "chr19_random":44,
+                 "chr20_random":45, "chr21_random":46, "chr22_random":47, "chrX_random":48,
+                 "chrx":23, "chry":24, "chrm":25, "chrx_random":48
+                 }
+
+ReverseChromosomeMap = ["", "chr1", "chr2", "chr3", "chr4", "chr5",
+    "chr6", "chr7", "chr8", "chr9", "chr10",
+    "chr11", "chr12", "chr13", "chr14", "chr15",
+    "chr16", "chr17", "chr18", "chr19", "chr20",
+    "chr21", "chr22", "chrX", "chrY", "chrM",
+    "chr1_random", "chr2_random", "chr3_random", "chr4_random", "chr5_random",
+    "chr6_random", "chr7_random", "chr8_random", "chr9_random", "chr10_random",
+    "chr10_random", "chr12_random", "chr13_random", "chr14_random", "chr15_random",
+    "chr16_random", "chr17_random", "chr18_random", "chr19_random", "chr20_random",
+    "chr21_random", "chr22_random", "chrX_random",]
+
+MassPTMods = {}
+
+class Bag:
+    "Generic argument-container class"
+    pass
+
+def FixupPath(Path):
+    if IS_WINDOWS:
+        return Path.replace("/", "\\")
+    else:
+        return Path.replace("\\", "/")
+
+class PTModClass:
+    InstanceCount = 0
+    "A type of post-translational modification, such as phosphorylation."
+    def __init__(self, Name):
+        self.Name = Name
+        # Peptides that this modification can affect:
+        self.Bases = {}
+        self.BaseString = ""
+        self.Mass = 0.0
+        self.Score = 0.0
+        PTModClass.InstanceCount += 1
+        try:
+            Mass = int(Name)
+            self.Mass = Mass
+        except:
+            pass
+    def __del__(self):
+        if PTModClass:
+            PTModClass.InstanceCount -= 1
+    def __str__(self):
+        return "<PTMod '%s'>"%self.Name
+
+def LoadPTMods():
+    """
+    Read the definitions of post-translational modifications from PTMods.txt.
+    Line format is tab-delimited, like this: "Alkylation	14.01564	CKRHDENQ"
+    (This is rarely used in practice, but is useful for search results that are annotated with names instead of masses)
+    """
+    FileName = ""
+    for path in sys.path:
+        FileName = os.path.join(path,"PTMods.txt")
+        if os.path.exists(FileName):
+            #print FileName
+            break
+        else:
+            FileName = ""
+    if FileName == "":
+        print "Utils: Unable to open PTMods.txt"
+        sys.exit(1)
+    File = open(FileName, 'r')
+    for FileLine in File.xreadlines():
+        FileLine = FileLine.strip()
+        if (not FileLine) or FileLine[0]=="#":
+            continue
+        Bits = FileLine.split("\t")
+        PTMod = PTModClass(Bits[0])
+        PTMod.Mass = float(Bits[1])
+        PTMod.BaseString = Bits[2]
+        for Char in Bits[2]:
+            PTMod.Bases[Char] = 1
+        Global.PTMods[PTMod.Name.lower()] = PTMod
+        Global.PTModByShortName[PTMod.Name[:3].lower()] = PTMod
+        Global.PTModByShortName[PTMod.Name[:4].lower()] = PTMod
+        Global.PTModList.append(PTMod)
+    File.close()
+
+class ProteinClass:
+    """
+    Class representing a single protein: a collection of peptides.
+    Can compute sequence coverage, as well as the list of modifications (PTM)
+    """
+    def __init__(self,Sequence, Type= None):
+        self.CellType = Type #use by sam for comparing cell types
+        self.Sequence = Sequence
+        self.SequenceCoverage = [0]*len(self.Sequence) #counts of spectra at each residue
+        self.Peptides = [] #list of UnmodifiedPeptide objects
+        self.PositionModSpectraDict = {} #key = (Name,Position) value = ModdedSpectra
+        self.Coverage = 0.0
+    def GenerateSequenceCoverage(self):
+        """
+        Goes through all the peptides and increments TotalSpectraCount for the
+        Residues that it covers.
+        """
+        for UPeptide in self.Peptides: #UnmodifiedPeptide
+            Start = self.Sequence.find(UPeptide.Aminos)
+            Len = len(UPeptide.Aminos)
+            for I in range(Start,Start+Len,1):
+                self.SequenceCoverage[I] += UPeptide.TotalSpectraCount
+        Covered =0
+        for I in range(len(self.Sequence)):
+            if self.SequenceCoverage[I] > 0:
+                Covered +=1
+        Coverage = Covered/ float(len(self.Sequence))
+        self.Coverage = Coverage
+    def GenerateModList(self):
+        """
+        This method runs through all the peptides in self.Peptides
+        and generates a list of modifications on residues
+        """
+        for UPep in self.Peptides:
+            PeptidePosition = self.Sequence.find(UPep.Aminos)
+            TotalSpectra = UPep.TotalSpectraCount
+            for Peptide in UPep.Peptides: # PeptideClass Object
+                for (AminoIndex, ModList) in Peptide.Modifications.items():
+                    SpectraThisPeptide = UPep.SpectraCount[Peptide.GetModdedName()]
+                    ModificationPosition = AminoIndex + PeptidePosition
+                    for Mod in ModList: #PTModClass objects
+                        Name = Mod.Name
+                        Key = (Name,ModificationPosition)
+                        if self.PositionModSpectraDict.has_key(Key):
+                            self.PositionModSpectraDict[Key] += SpectraThisPeptide
+                        else:
+                            self.PositionModSpectraDict[Key] = SpectraThisPeptide
+
+    def AddAnnotation(self,Peptide,SpectrumCounts):
+        "Add a Peptide to the protein"
+        Found = 0
+        for UPeptide in self.Peptides:
+            if UPeptide.Aminos == Peptide.Aminos: # same aminos already a UPep object,
+                    #perhaps this is a modified version of the same
+                UPeptide.AddAnnotation(Peptide,SpectrumCounts)
+                Found =1
+                break
+        if not Found:
+            ToAdd = UnmodifiedPeptide(Peptide,SpectrumCounts)
+            self.Peptides.append(ToAdd)
+
+class UnmodifiedPeptide:
+    """A wrapper for the PeptideClass, it contains all modified states of the
+    same amino acid sequence. Useful sometimes.
+    """
+    def __init__(self,Peptide,SpectrumCounts): #PeptideClass below
+        "constructor"
+        self.Aminos = Peptide.Aminos
+        self.UnmodifiedSpectraCount =0
+        self.TotalSpectraCount =0
+        self.Peptides = [] #an array of PeptideClass objects
+        self.SpectraCount ={} #key = fullname of peptide (no prefix/suffix), value = spectracount
+        self.AddAnnotation(Peptide,SpectrumCounts)
+        #maybe some sort of modification list of distinct modifications
+
+    def IsMe(self,Peptide):
+        if self.Aminos == Peptide.Aminos:
+            return 1
+        return 0
+
+    def PrintMe(self):    
+        print "UnmodifiedPeptide Object: %s"%self.UnmodifiedSequence
+        print "Total Spectra %d, UnmodifiedSpectra %d"%(self.TotalSpectraCount,self.UnmodifiedSpectraCount)
+        
+    def AddAnnotation(self,NewPeptide,SpectrumCounts):
+        "adds an annotation to my list, and updates tallies"
+        self.TotalSpectraCount += SpectrumCounts
+        if len (NewPeptide.Modifications) == 0:
+            if self.UnmodifiedSpectraCount == 0:
+                self.Peptides.append(NewPeptide)
+            self.UnmodifiedSpectraCount += SpectrumCounts
+        else:
+            #determine if we've already got this modification
+            Found =0
+            for MyPeptide in self.Peptides:
+                if MyPeptide.GetModdedName() == NewPeptide.GetModdedName():
+                    Found ==1
+                    break
+            if not Found:
+                self.Peptides.append(NewPeptide)
+        self.SpectraCount[NewPeptide.GetModdedName()] = SpectrumCounts
+    
+
+class ModificationTypeObject:
+    """
+    This holds information about a specific type of Modification.  Remember the format
+    mod,14,KR    TAB       #methylation.
+    mod,DELTA,AMINOS,POSITION,whatever TAB #Name
+    """
+    def __init__(self,Latin,Name,DMass,Residues,Position):
+        self.inLatin = Latin #inVivo, inVitro
+        self.Name = Name
+        self.DeltaMass = DMass
+        self.Residues = Residues
+        if self.Residues == "*":
+            self.Residues = "ACDEFGHIKLMNPQRSTVWY"
+        self.Position = Position
+        self.InspectID = ""
+        ##Do a littl processing.  the way a modification shows up in the Inspect
+        ## output is with a +43, or a -17, or possibly phos.  So I need to generate
+        ## the InspectIdentifier that can be compared later in the RemoveSelf function
+        if self.Name == "phosphorylation":
+            self.InspectID = "phos"
+        elif self.DeltaMass < 0:
+            self.InspectID = "%s"%self.DeltaMass
+        elif self.DeltaMass > 0:
+            "this is for positive values of DeltaMass which present a problem"
+            self.InspectID = "+" + "%s"%self.DeltaMass
+            #try:
+            #    if self.DeltaMass[0] == "+":
+            #       self.InspectID = "%s"%self.DeltaMass
+            #       self.DeltaMass = int(self.DeltaMass[1:]) 
+            #except:
+            #   self.InspectID = "+" + "%s"%self.DeltaMass
+        
+    def PrintMe(self):
+        "Simple Debugging printer"
+        print "I am a ModificationTypeObject for %s"%self.Name
+        print "InspectID %s"%self.InspectID
+        print "AcceptableResidues %s"%self.Residues
+        
+    def RemoveSelf(self,Annotation):
+        """
+        This method takes an input string and looks for modifications which correspond
+        to its identity.  If any are found, it removes them from the string and returns it.
+        It will remove all copies of itself from the String
+        """
+        InMod = 0
+        StartIndex = -1
+        ModString = ""
+        I = 0 #loop iterater
+        while I < len(Annotation):
+            Letter = Annotation[I]
+            if not Letter in string.uppercase:
+                if not InMod:
+                    StartIndex=I
+                InMod = 1
+                ModString += Letter
+            elif InMod:
+                #this is the first upper case letter after a modification.
+                if ModString == self.InspectID:
+                    #this is my Identifier, Check position and residue
+                    PositionCheck = 0 #false
+                    ResidueCheck = 0
+                    if self.Position == "nterminal" and StartIndex == 1:
+                        PositionCheck = 1
+                    elif not self.Position:
+                        PositionCheck = 1 #no position specified (it should be None)
+                    ###   Add other position things in here as you get them ###
+                    ModifiedResidue = Annotation[StartIndex-1]
+                    if self.Residues.find(ModifiedResidue) >= 0:
+                        #found the modified residue in self.residue string
+                        ResidueCheck = 1
+                    if PositionCheck and ResidueCheck:
+                        Front = Annotation[:StartIndex]
+                        EndIndex = StartIndex + len(self.InspectID)
+                        Back = Annotation[EndIndex:]
+                        Annotation = Front + Back
+                        I = StartIndex-1 ##### VERY IMPORTANT to go back once the Annotation has been reset.
+                #regardless of whether this was actually me or not, still reset the vars below
+                InMod = 0
+                ModString = ""
+            I += 1 #must increment for the while loop 
+        return Annotation
+
+    def IsMe(self, Identifier,Residue, Position):
+        """
+        Check to see if all the criteria match
+        """
+        if not Identifier == self.InspectID:
+            return 0
+        if self.Position == "nterminal":
+            if Position > 0: #zero indexed string
+                return 0
+        ### add other position identifiers if you have them
+        if self.Residues.find(Residue) < 0:
+            return 0 # returned a -1 for "not found"
+        return 1
+
+def LoadModifications():
+    """
+    This method reads in two files: InVivoModifications.txt and InVitroModifications.txt
+    It makes a ModificationTypeObject out of each mod listed in the files
+    (except fixed mods).  These input files are expected to be of the format
+    mod,14,KR    TAB       #methylation.
+    mod,DELTA_MASS,AMINOS,POSITION__TAB__#Modification name
+    """
+    FileName = ""
+    for path in sys.path:
+        FileName = os.path.join(path,"InVivoModifications.txt")
+        if os.path.exists(FileName):
+            #print FileName
+            break
+        else:
+            FileName = ""
+    if FileName == "":
+        print "Utils: Unable to open InVivoModifications.txt"
+        sys.exit(1)
+    LoadModificationsFromFile(FileName, Global.InVivoMods, "InVivo")
+    FileName = ""
+    for path in sys.path:
+        FileName = os.path.join(path,"InVitroModifications.txt")
+        if os.path.exists(FileName):
+            #print FileName
+            break
+        else:
+            FileName = ""
+    if FileName == "":
+        print "Utils: Unable to open InVitroModifications.txt"
+        sys.exit(1)
+    LoadModificationsFromFile(FileName, Global.InVitroMods, "InVitro")
+    
+def LoadModificationsFromFile(FileName, ModificationList, ChemistryType):
+    try:
+        File = open(FileName,"rb")
+    except:
+        #print "File '%s' not found - not loading mods"%FileName
+        return
+    for Line in File.xreadlines():
+        Line = Line.rstrip()
+        Data = Line.split("\t")
+        Name = Data[1][1:] #should get rid of the '#'
+        Latin = "InVivo"
+        InspectInput = Data[0].rstrip() #get rid of any right side junk
+        Data = InspectInput.split(",")
+        DeltaMass = int (Data[1])
+        Residues = Data[2]
+        if len(Data) > 3:
+            Position = Data[3]
+        else:
+            Position = None
+        Mod = ModificationTypeObject(ChemistryType, Name, DeltaMass, Residues, Position)
+        ModificationList.append(Mod)
+    File.close()
+
+class PeptideClass:
+    """
+    Class representing one peptide, possibly with modifications.  We get one PeptideClass instance
+    for every match from the trie-based search.  A PeptideClass instance can also (if its PrefixMass
+    and SuffixMass members are set) represent a tag.
+    """
+    # Track number of live instances:
+    InstanceCount = 0 
+    def __init__(self, Aminos = ""):
+        "Constructor - if we have amino acids, get our masses now."
+        self.Aminos = Aminos
+        self.Masses = []
+        # Modifications map amino acid indices to a list of PTModClass instances
+        self.Modifications = {}
+        self.Score = None
+        self.ID = None
+        self.RecordNumber = None
+        self.PValue = 0
+        self.DeltaCN = 0
+        self.DeltaCNOther = 0
+        if Aminos:
+            self.ComputeMasses()
+        PeptideClass.InstanceCount += 1
+    def GetPTMBeforeAfter(self, Mass):
+        PTMBefore = {}
+        PTMAfter = {}
+        for (AminoIndex, List) in self.Modifications.items():
+            for Entry in List:
+                if Entry.Mass == Mass:
+                    for OtherIndex in range(AminoIndex, len(self.Aminos)+1):
+                        PTMBefore[OtherIndex] = 1
+                    for OtherIndex in range(0, AminoIndex+1):
+                        PTMAfter[OtherIndex] = 1
+        return (PTMBefore, PTMAfter)
+        
+    def GetPhosphoBeforeAfter(self):
+        PhosBefore = {}
+        PhosAfter = {}
+        for (AminoIndex, List) in self.Modifications.items():
+            for Entry in List:
+                if Entry.Name == "Phosphorylation":
+                    for OtherIndex in range(AminoIndex, len(self.Aminos)+1):
+                        PhosBefore[OtherIndex] = 1
+                    for OtherIndex in range(0, AminoIndex+1):
+                        PhosAfter[OtherIndex] = 1
+        return (PhosBefore, PhosAfter)
+    def __del__(self):
+        if PeptideClass:
+            PeptideClass.InstanceCount -= 1
+    def GetParentMass(self):
+        if not self.Masses:
+            self.ComputeMasses()
+        return 19 + self.Masses[-1]
+    def ComputeMasses(self):
+        """
+        Populate our Masses list, based upon Aminos and Modifications.  Must be called,
+        if self.Modifications is edited!
+        """
+        self.Masses = [0]
+        Mass = 0
+        for Index in range(len(self.Aminos)):
+            Amino = self.Aminos[Index]
+            AminoMass = Global.AminoMass.get(Amino, None)
+            if AminoMass == None:
+                if Amino == "X":
+                    print "** Warning: Peptide '%s' contains wild-card amino X, mass is probably wrong."%(self.Aminos)
+                    AminoMass = 0
+                else:
+                    raise ValueError, "Bad amino '%s' in peptide '%s'"%(Amino, self.Aminos)
+            Mass += AminoMass
+            Mass += Global.FixedMods.get(Amino, 0)
+            for Mod in self.Modifications.get(Index, []):
+                Mass += Mod.Mass
+                # Warn, but don't fail here. (The trick case: We generate tag GVQ instead of GVK,
+                # and biotin can't attach to Q.  Bah!)
+                #if not Mod.Bases.has_key(Amino):
+                #    print "Warning: Amino '%s' in peptide '%s' has illegal modification %s at %s"%(Amino, self.Aminos, Mod.Name, Index)
+            self.Masses.append(Mass)
+    def GetPTMCount(self):
+        Total = 0
+        for Key in self.Modifications.keys():
+            Total += len(self.Modifications[Key])
+        return Total
+    def GetFullModdedName(self):
+        return "%s.%s.%s"%(self.Prefix, self.GetModdedName(), self.Suffix)
+    def GetModdedName(self):
+        "Returns the amino sequence with modifications included, like this: EAM+16APK"
+        Str = ""
+        for Index in range(len(self.Aminos)):
+            Amino = self.Aminos[Index]
+            Str += "%s"%(Amino)
+            for Mod in self.Modifications.get(Index, []):
+                Str += "%s"%(Mod.Name[:4].lower())
+        return Str
+    def __str__(self):
+        return "<Peptide '%s'>"%self.Aminos
+    def IsValidTag(self, TagPeptide, Epsilon = 2.0):
+        """
+        Returns true if TagPeptide is a valid tag for this (full-length) peptide
+        """
+        TotalResidueMass = self.Masses[-1]
+        TagLength = len(TagPeptide.Aminos)
+        TagAminos = TagPeptide.Aminos.replace("I", "L").replace("Q", "K")
+        Aminos = self.Aminos.replace("I", "L").replace("Q", "K")
+        for Pos in range(len(self.Masses)):
+            PrefixMass = self.Masses[Pos]
+            # Check flanking mass:
+            if abs(PrefixMass - TagPeptide.PrefixMass) > Epsilon:
+                #print "Pos %s: Invalid (prefix %s vs %s)"%(Pos, PrefixMass, TagPeptide.PrefixMass)
+                continue
+            # Check amino acids:
+            if Aminos[Pos:Pos + TagLength] != TagAminos:
+                #print "Pos %s: Invalid (aminos %s vs %s)"%(Pos, Aminos[Pos:Pos + TagLength], TagAminos)
+                continue
+            # Check suffix mass:
+            SuffixMass = TotalResidueMass - self.Masses[Pos + TagLength]
+            if abs(SuffixMass - TagPeptide.SuffixMass) > Epsilon:
+                #print "Pos %s: Invalid (suffix %s vs %s)"%(Pos, SuffixMass, TagPeptide.SuffixMass)
+                continue
+            return 1
+        #Mass = TagPeptide.PrefixMass + TagPeptide.SuffixMass + GetMass(TagPeptide.Aminos)
+        
+    def IsSame(self, OtherPeptide):
+        SubstDict = {"Q": "K", "I": "L"}
+        if len(self.Aminos) != len(OtherPeptide.Aminos):
+            return 0
+        for AminoIndex in range(len(self.Aminos)):
+            OurAmino = self.Aminos[AminoIndex]
+            TheirAmino = OtherPeptide.Aminos[AminoIndex]
+            OurMods = []
+            TheirMods = []
+            for Mod in self.Modifications.get(AminoIndex, []):
+                if Mod.Name[1:3] == "->":
+                    OurAmino = Mod.Name[-1].upper()
+                else:
+                    OurMods.append(Mod.Mass)
+            for Mod in OtherPeptide.Modifications.get(AminoIndex, []):
+                if Mod.Name[1:3] == "->":
+                    TheirAmino = Mod.Name[-1].upper()
+                else:
+                    TheirMods.append(Mod.Mass)
+            OurAmino = SubstDict.get(OurAmino, OurAmino)
+            TheirAmino = SubstDict.get(TheirAmino, TheirAmino)
+            if OurAmino != TheirAmino:
+                return 0
+            OurMods.sort()
+            TheirMods.sort()
+            if OurMods != TheirMods:
+                return 0
+        return 1
+    def __cmp__(self, OtherPeptide):
+        if (not isinstance(OtherPeptide, PeptideClass)):
+            return 1
+        # Sort by score, best to worst:
+        if self.Score > OtherPeptide.Score:
+            return -1
+        if self.Score < OtherPeptide.Score:
+            return 1
+        return 0
+    def GetNTT(self):
+        """
+        Returns the number of tryptic termini.  (assumes self.prefix and self.suffix
+        are set)
+        """
+        NTT = 0
+        if self.Prefix in ("-*X"):
+            NTT += 1
+        elif (self.Prefix in ("KR")) and (self.Aminos[0] !="P"):
+            NTT += 1
+        if self.Suffix in ("-*X"):
+            NTT += 1
+        elif (self.Aminos[-1] in "KR") and (self.Suffix != "P"):
+            NTT += 1
+        return NTT
+    def IsFullyTryptic(self):
+        if self.Prefix in ("-", "*"):
+            pass
+        elif (self.Prefix in ("K", "R")) and self.Aminos[0] != "P":
+            pass
+        else:
+            return 0
+        if self.Suffix in ("-", "*"):
+            pass
+        elif self.Aminos[-1] in ("K", "R") and self.Suffix != "P":
+            pass
+        else:
+            return 0
+        return 1
+    def GetNiceAnnnotation(self):
+        """
+        Return an annotation suitable for a filename.  *.ABC.D turns into -.ABC.D
+        """
+        Str = "%s.%s.%s"%(self.Prefix, self.Aminos, self.Suffix)
+        return Str.replace("*", "-")
+    
+def GetPeptideFromModdedName(TagName):
+    """
+    Parse a tag with form like "ATphosQ", adding PTMs at the correct spots.
+    """
+    StringPos = 0
+    Peptide = PeptideClass()
+    
+    # If the name has the form K.ABCDER.G, then strip off the prefix and suffix:
+    if len(TagName) > 4 and TagName[1] == "." and TagName[-2] == ".":
+        Peptide.Prefix = TagName[0]
+        Peptide.Suffix = TagName[-1]
+        TagName = TagName[2:-2]
+    
+    try:
+        while (1):
+            if StringPos >= len(TagName):
+                break
+            if TagName[StringPos] in string.uppercase:
+                Peptide.Aminos += TagName[StringPos]
+                StringPos += 1
+            else:
+                # It's a modification:
+                ModName = ""
+                while (StringPos<len(TagName) and TagName[StringPos] not in string.uppercase) and len(ModName)<4:
+                    if ModName and ModName[0] in ("-","+") and TagName[StringPos] not in "0123456789k":
+                        break
+                    ModName += TagName[StringPos]
+                    StringPos += 1
+                Mod = Global.PTModByShortName.get(ModName)
+                if len(ModName)<2:
+                    print "!???", TagName, ModName
+                if not Mod and ModName[-2]==">": #Mutation is annotated as "a->g", etc.
+                    Mod = PTModClass(ModName)
+                    Mod.Mass = Global.AminoMass[ModName[-1].upper()] - Global.AminoMass[ModName[0].upper()]
+                if not Mod and ModName[0] in ("-","+"):
+                    ModName = ModName.replace("(","")
+                    # Keep a cache of "mass mods":
+                    ModMass = int(ModName)
+                    Mod = MassPTMods.get(ModMass, None)
+                    if not Mod:
+                        Mod = PTModClass(ModName)
+                        Mod.Mass = ModMass
+                        MassPTMods[ModMass] = Mod
+                if Mod:
+                    Pos = len(Peptide.Aminos) - 1
+                    if not Peptide.Modifications.has_key(Pos):
+                        Peptide.Modifications[Pos] = []
+                    Peptide.Modifications[Pos].append(Mod)
+                else:
+                    print "** Warning: Unknown mod '%s' in '%s'"%(ModName, TagName)
+    except:
+        print TagName
+        raise
+    Peptide.ComputeMasses()
+    return Peptide
+
+class AminoClass:
+    def __init__(self, Name, ShortName, Abbreviation, LeftMass, RightMass):
+        self.Name = Name # "Histidine"
+        self.ShortName = ShortName # "His"
+        self.Abbreviation = Abbreviation # "H"
+        self.LeftMass = LeftMass
+        self.RightMass = RightMass
+        self.RequiredModification = None 
+        
+def LoadAminoAcids():
+    """
+    Read in the masses of all amino acids.
+    Populate dictionaries AminoMass, AminoMassRight and list AminoMasses
+    """
+    FileName = ""
+    for path in sys.path:
+        FileName = os.path.join(path,"AminoAcidMasses.txt")
+        if os.path.exists(FileName):
+            #print FileName
+            break
+        else:
+            FileName = ""
+    if FileName == "":
+        print "Utils: Unable to open AminoAcidMasses.txt"
+        sys.exit(1)
+    File = open(FileName,'r')
+    for FileLine in File.xreadlines():
+        # Line is whitespace-delimited.  Pieces are:
+        # Long, short, abbrev, left-mass, right-mass
+        # Example: "Glycine Gly G 57.02146 57.0520"
+        FileLine = FileLine.strip()
+        if FileLine[0] == "#":
+            continue
+        Bits = FileLine.split(" ")
+        if len(Bits)<5:
+            continue
+        LeftMass = float(Bits[3])
+        RightMass = float(Bits[4])
+        Global.AminoMass[Bits[2]] = LeftMass
+        Global.AminoMassRight[Bits[2]] = RightMass
+        Global.AminoMasses.append(LeftMass)
+        # Put the Amino object into Global.AminoAcids:
+        Amino = AminoClass(Bits[0], Bits[1], Bits[2], LeftMass, RightMass)
+        Global.AminoAcids[Amino.Abbreviation] = Amino
+    File.close()
+    Global.AminoMasses.sort()
+    
+
+def DebugPrintPTMods():
+    Keys = Global.PTMods.keys()
+    Keys.sort()
+    print "--PTMods--"
+    for Key in Keys:
+        PTMod = Global.PTMods[Key]
+        BaseString = ""
+        for Base in PTMod.Bases.keys():
+            BaseString += Base
+        print "  %s mass %s bases '%s'"%(PTMod.Name, PTMod.Mass, BaseString)
+    print "-----"
+    
+class IonClass:
+    """
+    Each IonClass corresponds to an ion type, such as b or y-nh3.
+    Each spectral peak gives rise to one PRM peak for each ion type;
+    these PRM peaks remember their associated ion class
+    """
+    def __init__(self, Name):
+        self.Name = Name
+        self.Opposite = None
+        self.Charge = 1
+        self.Score = 1.0
+    def __str__(self):
+        return "<ion '%s'>"%self.Name
+    def GetPRMMass(self, Mass, ParentMass):
+        """
+        Returns the prm peak for a spectrum peak of the given mass.  For instance,
+        for b ions, GetPRMMass() returns the peak mass minus 1.  (Because the spectral peak
+        appears 1amu to the right of the actual prefix mass)
+        """
+        return None
+    def GetPeakMass(self, Mass, ParentMass):
+        """
+        Returns the peak for a PRM of the given mass.  Inverse of GetPRMMass.
+        For instance, for b ions, GetPeakMass() returns the PRM plus 1.
+        """
+        return None
+
+
+
+AllIons = []
+Global.AllIonDict = {}
+def DefineIons():
+    """
+    Define all the ion types we care about.  
+    (This function is repetitive, but easy enough to maintain since the zoo of ion types
+    is pretty small...the scores should be in a datafile, though!)
+    """
+    IonB = IonClass("b")
+    IonB.GetPeakMass = lambda L, P:L+1
+    IonB.GetPRMMass = lambda M, P:M-1
+    AllIons.append(IonB)
+    #
+    IonBH = IonClass("b-h2o")
+    IonBH.GetPeakMass = lambda L, P:L-17
+    IonBH.GetPRMMass = lambda M, P:M+17
+    AllIons.append(IonBH)
+    #
+    IonBN = IonClass("b-nh3")
+    IonBN.GetPeakMass = lambda L, P:L-16
+    IonBN.GetPRMMass = lambda M, P:M+16
+    AllIons.append(IonBN)
+    #
+    Ion = IonClass("b-h2o-h2o")
+    Ion.GetPeakMass = lambda L, P:L-17-18
+    Ion.GetPRMMass = lambda M, P:M+17+18
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b-h2o-nh3")
+    Ion.GetPeakMass = lambda L, P:L-16-18
+    Ion.GetPRMMass = lambda M, P:M+16+18
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b-p'")
+    Ion.GetPeakMass = lambda L, P:L-79
+    Ion.GetPRMMass = lambda M, P:M+79
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b-p")
+    Ion.GetPeakMass = lambda L, P:L-97
+    Ion.GetPRMMass = lambda M, P:M+97
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b-p-h2o")
+    Ion.GetPeakMass = lambda L, P:L-97-18
+    Ion.GetPRMMass = lambda M, P:M+97+18
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b-p-nh3")
+    Ion.GetPeakMass = lambda L, P:L-97-17
+    Ion.GetPRMMass = lambda M, P: M+97+17
+    AllIons.append(Ion)
+    # for oxidized methionine:
+    Ion = IonClass("b-*")
+    Ion.GetPeakMass = lambda L, P:L-63
+    Ion.GetPRMMass = lambda M, P:M+63
+    AllIons.append(Ion)
+    #
+    IonY = IonClass("y")
+    IonY.GetPeakMass = lambda L, P:P-L
+    IonY.GetPRMMass = lambda M, P:P-M
+    AllIons.append(IonY)
+    #
+    IonYH = IonClass("y-h2o")
+    IonYH.GetPeakMass = lambda L, P:P-(L+18)
+    IonYH.GetPRMMass = lambda M, P:(P-M)-18
+    AllIons.append(IonYH)
+    #
+    IonYN = IonClass("y-nh3")
+    IonYN.GetPeakMass = lambda L, P:P-(L+17)
+    IonYN.GetPRMMass = lambda M, P:(P-M)-17
+    AllIons.append(IonYN)
+    #
+    Ion = IonClass("y-h2o-nh3")
+    Ion.GetPeakMass = lambda L, P:P-(L+17+18)
+    Ion.GetPRMMass = lambda M, P:(P-M)-17-18
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("y-h2o-h2o")
+    Ion.GetPeakMass = lambda L, P:P-(L+18+18)
+    Ion.GetPRMMass = lambda M, P:(P-M)-18-18
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("y-p'")
+    Ion.GetPeakMass = lambda L, P:(P-L)-80
+    Ion.GetPRMMass = lambda M, P:P-(M+80)
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("y-p")
+    Ion.GetPeakMass = lambda L, P:(P-L)-98
+    Ion.GetPRMMass = lambda M, P:P-(M+98)
+    AllIons.append(Ion)
+    # For oxidized methionine:
+    Ion = IonClass("y-*")
+    Ion.GetPeakMass = lambda L, P:(P-L)-64
+    Ion.GetPRMMass = lambda M, P:P-(M+64)
+    AllIons.append(Ion)
+    #
+    IonA = IonClass("a")
+    IonA.GetPeakMass = lambda L, P:L-27
+    IonA.GetPRMMass = lambda M,P:M+27
+    AllIons.append(IonA)
+    #
+    IonAN = IonClass("a-nh3")
+    IonAN.GetPeakMass = lambda L, P:L-27-17
+    IonAN.GetPRMMass = lambda M,P:M+27+17
+    AllIons.append(IonAN)
+    #
+    IonAH = IonClass("a-h2o")
+    IonAH.GetPeakMass = lambda L, P:L-27-18
+    IonAH.GetPRMMass = lambda M,P:M+27+18
+    AllIons.append(IonAH)
+    #
+    Ion = IonClass("b2")
+    Ion.GetPeakMass = lambda L,P:(L/2)+1
+    Ion.GetPRMMass = lambda M,P:(M-1)*2
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b2-h2o")
+    Ion.GetPeakMass = lambda L,P:(L/2)+1 - 9
+    Ion.GetPRMMass = lambda M,P:(M-1)*2 + 18
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b2-nh3")
+    Ion.GetPeakMass = lambda L,P:(L/2)+1 - 8.5
+    Ion.GetPRMMass = lambda M,P:(M-1)*2 + 17 
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b2-nh3-h2o")
+    Ion.GetPeakMass = lambda L,P:(L/2)+1 - 17.5
+    Ion.GetPRMMass = lambda M,P:(M-1)*2 + 35 
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("b2-p")
+    Ion.GetPeakMass = lambda L,P:(L/2)+1 - 49
+    Ion.GetPRMMass = lambda M,P:(M-1)*2 + 98 
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("y2")
+    Ion.GetPeakMass = lambda L,P:(P-L+1.0078)/2
+    Ion.GetPRMMass = lambda M,P:P - (M*2 - 1.0078)
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("y2-h2o")
+    Ion.GetPeakMass = lambda L,P:(P-L+1.0078 - 18)/2
+    Ion.GetPRMMass = lambda M,P:P - (M*2 - 1.0078 + 18)
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("y2-nh3")
+    Ion.GetPeakMass = lambda L,P:(P-L+1.0078 - 17)/2
+    Ion.GetPRMMass = lambda M,P:P - (M*2 - 1.0078 + 17)
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("y2-nh3-h2o")
+    Ion.GetPeakMass = lambda L,P:(P-L+1.0078 - 17 - 18)/2
+    Ion.GetPRMMass = lambda M,P:P - (M*2 - 1.0078 + 17 + 18)
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    Ion = IonClass("y2-p")
+    Ion.GetPeakMass = lambda L,P:(P-L+1.0078 - 98)/2
+    Ion.GetPRMMass= lambda M,P:P - (M*2 - 1.0078 + 98)
+    Ion.Charge =2
+    AllIons.append(Ion)
+    # For oxidized methionine:
+    Ion = IonClass("y2-*")
+    Ion.GetPeakMass = lambda L,P:(P-L+1.0078 - 64)/2
+    Ion.GetPRMMass = lambda M,P:P - (M*2 - 1.0078 + 64)
+    Ion.Charge = 2
+    AllIons.append(Ion)
+    #
+    
+    for Ion in AllIons:
+        Global.AllIonDict[Ion.Name] = Ion
+
+def GetMass(Str):
+    "Return the mass of a string of amino acids.  Useful in interactive mode."
+    Mass = 0
+    for Char in Str:
+        Mass += Global.AminoMass[Char]
+        Mass += Global.FixedMods.get(Char, 0)
+    return Mass
+
+        
+
+def GetIsotopePatterns():
+    Global.IsotopeWeights = {}
+    FileName = ""
+    for path in sys.path:
+        FileName = os.path.join(path,"IsotopePatterns.txt")
+        if os.path.exists(FileName):
+            #print FileName
+            break
+        else:
+            FileName = ""
+    if FileName == "":
+        print "Utils: Unable to open IsotopePatterns.txt"
+        sys.exit(1)
+    File = open(FileName,'r')
+    for FileLine in File.xreadlines():
+        Bits = FileLine.split("\t")
+        if len(Bits) < 2:
+            continue
+        Global.IsotopeWeights[int(Bits[0])] = float(Bits[1])
+        
+
+INITIALIZED = 0
+DummyIon = None
+def Initialize():
+    global INITIALIZED
+    global DummyIon
+    if INITIALIZED:
+        return 
+    DefineIons()
+
+    # dummy ion type, for the spectral edge peaks we put at mass 0 and at parent-mass:
+    DummyIon = IonClass("")
+    DummyIon.GetPeakMass = lambda L,P:L
+    DummyIon.GetPRMMass = lambda M,P:M
+
+    # Do this initialization once, up front:
+    LoadAminoAcids()
+    LoadPTMods()
+    LoadModifications()
+    GetIsotopePatterns()
+    INITIALIZED = 1
+
+#SAME AS INITIALIZE, BUT SPECIFY DIRECTORY FOR FILES
+
+def InitializeNonInspect(ResourceDir):
+    global INITIALIZED
+    global DummyIon
+    if INITIALIZED:
+        return 
+    DefineIons()
+
+
+    # dummy ion type, for the spectral edge peaks we put at mass 0 and at parent-mass:
+    DummyIon = IonClass("")
+    DummyIon.GetPeakMass = lambda L,P:L
+    DummyIon.GetPRMMass = lambda M,P:M
+
+    # Do this initialization once, up front:
+    LoadAminoAcidsNonInspect(ResourceDir)
+    LoadPTModsNonInspect(ResourceDir)
+    LoadModificationsNonInspect(ResourceDir)
+    GetIsotopePatternsNonInspect(ResourceDir)
+    INITIALIZED = 1
+
+def LoadAminoAcidsNonInspect(ResourceDir):
+    """
+    Read in the masses of all amino acids.
+    Populate dictionaries AminoMass, AminoMassRight and list AminoMasses
+    """
+    File = open(os.path.join(ResourceDir,"AminoAcidMasses.txt"),"r")
+    for FileLine in File.xreadlines():
+        # Line is whitespace-delimited.  Pieces are:
+        # Long, short, abbrev, left-mass, right-mass
+        # Example: "Glycine Gly G 57.02146 57.0520"
+        FileLine = FileLine.strip()
+        if FileLine[0] == "#":
+            continue
+        Bits = FileLine.split(" ")
+        if len(Bits)<5:
+            continue
+        LeftMass = float(Bits[3])
+        RightMass = float(Bits[4])
+        Global.AminoMass[Bits[2]] = LeftMass
+        Global.AminoMassRight[Bits[2]] = RightMass
+        Global.AminoMasses.append(LeftMass)
+        # Put the Amino object into Global.AminoAcids:
+        Amino = AminoClass(Bits[0], Bits[1], Bits[2], LeftMass, RightMass)
+        Global.AminoAcids[Amino.Abbreviation] = Amino
+    File.close()
+    Global.AminoMasses.sort()
+
+def LoadPTModsNonInspect(ResourceDir):
+    """
+    Read the definitions of post-translational modifications from PTMods.txt.
+    Line format is tab-delimited, like this: "Alkylation	14.01564	CKRHDENQ"
+    (This is rarely used in practice, but is useful for search results that are annotated with names instead of masses)
+    """
+    File = open(os.path.join(ResourceDir,"PTMods.txt"),"r")
+    for FileLine in File.xreadlines():
+        FileLine = FileLine.strip()
+        if (not FileLine) or FileLine[0]=="#":
+            continue
+        Bits = FileLine.split("\t")
+        PTMod = PTModClass(Bits[0])
+        PTMod.Mass = float(Bits[1])
+        PTMod.BaseString = Bits[2]
+        for Char in Bits[2]:
+            PTMod.Bases[Char] = 1
+        Global.PTMods[PTMod.Name.lower()] = PTMod
+        Global.PTModByShortName[PTMod.Name[:3].lower()] = PTMod
+        Global.PTModByShortName[PTMod.Name[:4].lower()] = PTMod
+        Global.PTModList.append(PTMod)
+    File.close()
+
+def LoadModificationsNonInspect(ResourceDir):
+    """
+    This method reads in two files: InVivoModifications.txt and InVitroModifications.txt
+    It makes a ModificationTypeObject out of each mod listed in the files
+    (except fixed mods).  These input files are expected to be of the format
+    mod,14,KR    TAB       #methylation.
+    mod,DELTA_MASS,AMINOS,POSITION__TAB__#Modification name
+    """
+    LoadModificationsFromFile(os.path.join(ResourceDir,"InVivoModifications.txt"), Global.InVivoMods, "InVivo")
+    LoadModificationsFromFile(os.path.join(ResourceDir,"InVitroModifications.txt"), Global.InVitroMods, "InVitro")
+
+def GetIsotopePatternsNonInspect(ResourceDir):
+    Global.IsotopeWeights = {}
+    File = open(os.path.join(ResourceDir,"IsotopePatterns.txt"), "r")
+    for FileLine in File.xreadlines():
+        Bits = FileLine.split("\t")
+        if len(Bits) < 2:
+            continue
+        Global.IsotopeWeights[int(Bits[0])] = float(Bits[1])
+
+def MakeDirectory(Dir):
+    if os.path.exists(Dir):
+        return 
+    try:
+        os.makedirs(Dir)
+    except:
+        raise
+    
diff --git a/base64.c b/base64.c
new file mode 100644
index 0000000..2f53e49
--- /dev/null
+++ b/base64.c
@@ -0,0 +1,217 @@
+// downloaded from web 
+#include "CMemLeak.h"
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include "base64.h"
+
+
+int GetPosition(char buf);
+
+static const unsigned char *b64_tbl = (const unsigned char*) "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const unsigned char b64_pad = '=';
+
+// base64 encode a group of between 1 and 3 input chars into a group of  4 output chars 
+static void encode_group(const unsigned char input[], unsigned char output[], int n)
+{
+   unsigned char ingrp[3];
+
+   ingrp[0] = n > 0 ? input[0] : 0;
+   ingrp[1] = n > 1 ? input[1] : 0;
+   ingrp[2] = n > 2 ? input[2] : 0;
+
+   // upper 6 bits of ingrp[0] 
+   output[0] = n > 0 ? b64_tbl[ingrp[0] >> 2] : b64_pad;
+
+   // lower 2 bits of ingrp[0] | upper 4 bits of ingrp[1] 
+   output[1] = n > 0 ? b64_tbl[((ingrp[0] & 0x3) << 4) | (ingrp[1] >> 4)] : b64_pad;
+
+   // lower 4 bits of ingrp[1] | upper 2 bits of ingrp[2] 
+   output[2] = n > 1 ? b64_tbl[((ingrp[1] & 0xf) << 2) | (ingrp[2] >> 6)] : b64_pad;
+
+   // lower 6 bits of ingrp[2] 
+   output[3] = n > 2 ? b64_tbl[ingrp[2] & 0x3f] : b64_pad;
+
+}
+
+// base64 decode a group of 4 input chars into a group of between 0 and
+// 3 output chars 
+static void decode_group(const unsigned char input[], unsigned char output[], int* n)
+{
+   unsigned char* t1;
+   unsigned char* t2;
+   *n = 0;
+
+   if (input[0] == '=')
+   {
+      return;
+   }
+
+   t1 = (unsigned char*)strchr((const char*)b64_tbl, input[0]);
+   t2 = (unsigned char*)strchr((const char*)b64_tbl, input[1]);
+
+   output[(*n)++] = ((t1 - b64_tbl) << 2) | ((t2 - b64_tbl) >> 4);
+
+   if (input[2] == '=')
+   {
+      return;
+   }
+
+   t1 = (unsigned char*) strchr ((const char*)b64_tbl, input[2]);
+
+   output[(*n)++] = ((t2 - b64_tbl) << 4) | ((t1 - b64_tbl) >> 2);
+
+   if (input[3] == '=')
+      return;
+
+   t2 = (unsigned char*) strchr ((const char*)b64_tbl, input[3]);
+
+   output[(*n)++] = ((t1 - b64_tbl) << 6) | (t2 - b64_tbl);
+
+   return;
+}
+
+int GetPosition(char buf)
+{
+
+  if (buf > 96)        // [a-z]
+  {
+    return (buf - 71);
+  }
+  else if (buf > 64)        // [A-Z]
+  {
+    return (buf - 65);
+  }
+  else if (buf > 47)        // [0-9]
+  {
+    return (buf + 4);
+  }
+  else if (buf == 43)
+  {
+    return 63;
+  }
+  else                // buf == '/'
+  {
+    return 64;
+  }
+}
+
+void b64_decode_mio(char* src, char* dest)
+{
+    char* temp;
+    int BlockCount = 0;
+
+    temp = dest;
+
+    while (*src)
+    {
+        
+        int register a;
+        int register b;
+        int t1,t2,t3,t4;
+        //printf("Block %d: '%c%c%c%c'\n", BlockCount, src[0], src[1], src[2], src[3]);
+        BlockCount++;
+        t1 = src[0];
+        t2 = src[1];
+        t3 = src[2];
+        t4 = src[3];
+
+        if (t1 == 61) // if == '='
+        {
+            return;
+        }
+        if (t1 > 96)  // [a-z]
+        {
+            a = (t1 - 71);
+        }
+        else if (t1 > 64) // [A-Z]
+        {
+            a = (t1 - 65);
+        }
+        else if (t1 > 47)        // [0-9]
+        {
+            a = (t1 + 4);
+        }
+        else if (t1 == 43)
+        {
+            a = 62;
+        }
+        else                // src[0] == '/'
+        {
+            a = 63;     
+        }
+        if (t2 > 96)        // [a-z]
+        {
+            b = (t2 - 71);
+        }
+        else if (t2 > 64)        // [A-Z]
+        {
+            b = (t2 - 65);
+        }
+        else if (t2 > 47)        // [0-9]
+        {
+            b = (t2 + 4);
+        }
+        else if (t2 == 43)
+        {
+            b = 62;
+        }
+        else                // src[0] == '/'
+        {
+            b = 63;     
+        }
+        *temp++ = (a << 2) | (b >> 4);     
+        if (t3 == 61)
+        {
+            return;
+        }
+        if (t3 > 96)        // [a-z]
+        {
+            a = (t3 - 71);
+        }
+        else if (t3 > 64)        // [A-Z]
+        {
+            a = (t3 - 65);
+        }
+        else if (t3 > 47)        // [0-9]
+        {
+            a = (t3 + 4);
+        }
+        else if (t3 == 43)
+        {
+            a = 62;
+        }
+        else                // src[0] == '/'
+        {
+            a = 63;     
+        }
+        *temp++ = (b << 4) | (a >> 2);
+        if (t4 == 61)
+        {
+            return;
+        }
+
+        if (t4 > 96)        // [a-z]
+        {
+            b = (t4 - 71);
+        }
+        else if (t4 > 64)        // [A-Z]
+        {
+            b = (t4 - 65);
+        }
+        else if (t4 > 47)        // [0-9]
+        {
+            b = (t4 + 4);
+        }
+        else if (t4 == 43)
+        {
+            b = 62;
+        }
+        else                // src[0] == '/'
+        {
+            b = 63;    
+        }
+        *temp++ = ( a << 6) | ( b );
+        src += 4;
+    }
+}
diff --git a/base64.h b/base64.h
new file mode 100644
index 0000000..0d11f94
--- /dev/null
+++ b/base64.h
@@ -0,0 +1,6 @@
+#ifndef BASE64_H
+#define BASE64_H
+
+void b64_decode_mio (char* dest, char* src);
+
+#endif // BASE64_H
diff --git a/docs/Analysis.html b/docs/Analysis.html
new file mode 100644
index 0000000..238aa6a
--- /dev/null
+++ b/docs/Analysis.html
@@ -0,0 +1,79 @@
+<h1>Inspect: A Proteomics Search Toolkit</h1>
+Copyright 2007, The Regents of the University of California
+<h3>Table of Contents</h1>
+<li><a href="index.html">Overview</a>
+<li><a href="Copyright.html">Copyright information</a>
+<li><a href="Installation.html">Installation</a>
+<li><a href="Database.html">Database</a>
+<li><a href="Searching.html">Searching</a>
+<li><a href="Analysis.html">Analysis</a>
+<li><a href="InspectTutorial.pdf">Basic Tutorial</a>
+<li><a href="InspectAdvancedTutorial.pdf">Advanced Tutorial</a>
+<li><a href="UnrestrictedSearchTutorial.pdf">Unrestricted Search Tutorial</a>
+<hr>
+<h2>Analysis</h2>
+Inspect writes search results to a tab-delimited file.  Up to ten search hits are written for each spectrum,
+but typcially all but the first can be discarded.  
+<br><br>
+The quality of each match can be determiend by the F-Score.  The F-score is a weighted sum of two factors.  First is the
+MQScore, or match quality score (in column 6).  Second
+is the delta-score (in column 14), the difference in MQScore between this match and the best alternative.
+Because delta-score is highly dependent on database size and search parameters, Inspect takes the ratio of
+the delta-score to the average delta-score for all top-scoring matches.
+<br><br>
+The preferred method to compute the false discovery rate (FDR) for a collection of matches is to employ a decoy
+database.  This method requires you to generate shuffled protein records before search using the "ShuffleDB" script
+(see the Database section for details).  Then, run the ComputeFDR.jar script to compute the empirical false discovery
+rate for a given f-score cutoff.
+<br><br>
+As of January 3, 2012, the columns have been updated slightly.  Below is a list of all the columns and their meaning:
+<li>SpectrumFile - The file searched
+<li>Scan# - The scan number within the file; this value is 0 for .dta files;  For MGF files, the scan# is equivalent to the SpecIndex, but is 0-based numbering.
+<li>Annotation - Peptide annotation, with prefix and suffix and (non-fixed) modifications indicated.
+Example: K.DFSQIDNAP+16EER.E
+<li>Protein - The name of the protein this peptide comes from.  (Protein names are stored to the .index file
+corresponding to the database .trie file)
+<li>Charge - Precursor charge.  If "multicharge" is set, or if no charge is specified in the source file, Inspect
+attempts to guess the charge.
+<li>MQScore	- Match quality score, the main measure of match quality.
+<li>Length - The length of the matched peptide in amino acids.</li>
+<li>TotalPRMScore - Summed score for break points (between amino acids), based upon a Bayesian network modeling
+fragmentation propensities
+<li>MedianPRMScore - Median score for break pounts.</li>
+<li>FractionY - The fraction of charge 1 y ions detected</li>
+<li>FractionB - The fraction of charge 1 b ions detected</li>
+<li>Intensity - Fraction of high-intensity peaks which are b or y fragments.  For a length-n peptide, the top n*3
+peaks are considered.
+<li>NTT - Number of tryptic termini (or Unused, if no protease was specified).  Note that the N- and C-terminus of
+a protein are both considered to be valid termini.
+<li>InspectFDR - This is the FDR of all matches with F-score equal to or greater than this match.  Since Inspect knows
+nothing about a decoy database, it is often best to run ComputeFDR.jar to compute an empirical FDR.
+<li>DeltaScore - Difference between the MQScore of this match and the best alternative
+<li>DeltaScoreOther - Difference between the MQScore of this match and the best alternative from a different locus.
+To see the difference between this and the previous column, consider a search that finds similar matches
+of the form "M+16MALGEER" and "MM+16ALGEER".  In such a case, DeltaScore would be very small, but DeltaScoreOther
+might still be large.
+<li>RecordNumber - Index of the protein record in the database
+<li>DBFilePos - Byte-position of this match within the database
+<li>SpecFilePos - Offset, in the input file, of this spectrum; useful for passing to the "Label" script (see below)
+<li>PrecursorMZ - The precursor m/z given in the spectrum file.
+<li>PrecursorError - The difference (in m/z units) between the precursor m/z given in the file and the theoretical m/z of the identified peptide.
+<li>SpecIndex - This is a one-based number of the index of the spectrum in the original spectrum file.  Only MS2+ spectra are counted.</li>
+<hr>
+<h2>Post-processing</h2>
+Python scripts for performing various analyses are included in the distribution.
+<b>Run a script with no command-line parameters to print a list of available arguments.</b><br>
+<li>Label.py - Given a spectrum and a peptide annotation, label the spectrum peaks with
+their associated fragments.  Produces a .png image for a spectrum, with associated peptide interpretation.  Requires
+the Python Imaging Library (PIL).  Sample command:
+<br>     <tt>Label.py Shewanella.mzXML:6200392 R.SNGSIGQNQ+14TPGR.V</tt>
+<li>ComputeFDR.jar - Given Inspect output, filter to a user-determined FDR.  The ComputeFDR.jar script can be used for many experiments but typical use for Inspect results would be</li>
+<pre>      java -jar ComputeFDR.jar -f InspectResult.out 3 XXX -n 1 -p 2 -s 14 1 -fdr 0.01</pre>
+<li>Summary.py - Given Inspect output, produce an html-format summary of the results.  The report provides
+a "protein-level" look at the results. This script is also used when
+producing a "second-pass" protein database, containing the proteins identified with high confidence.
+<li>PTMAnalysis.py - This script examines output from MS-Alignment (Inspect run in "blind" mode), and
+highlights the most plausible evidence for PTMs.  The script iteratively selects the most common
+post-translational modifications, and report the selections.  These selections require manual curation
+and/or validation.
+<hr>
diff --git a/docs/Copyright.html b/docs/Copyright.html
new file mode 100644
index 0000000..9930020
--- /dev/null
+++ b/docs/Copyright.html
@@ -0,0 +1,47 @@
+<h1>Inspect: A Proteomics Search Toolkit</h1>
+Copyright 2007, The Regents of the University of California
+<h3>Table of Contents</h1>
+<li><a href="index.html">Overview</a>
+<li><a href="Copyright.html">Copyright information</a>
+<li><a href="Installation.html">Installation</a>
+<li><a href="Database.html">Database</a>
+<li><a href="Searching.html">Searching</a>
+<li><a href="Analysis.html">Analysis</a>
+<li><a href="InspectTutorial.pdf">Basic Tutorial</a>
+<li><a href="InspectAdvancedTutorial.pdf">Advanced Tutorial</a>
+<li><a href="UnrestrictedSearchTutorial.pdf">Unrestricted Search Tutorial</a>
+
+<hr>
+<pre>
+Copyright information:
+----------------------
+
+Copyright 2007,2008,2009 The Regents of the University of California
+All Rights Reserved
+
+Permission to use, copy, modify and distribute any part of this
+program for educational, research and non-profit purposes, by non-profit
+institutions only, without fee, and without a written agreement is hereby
+granted, provided that the above copyright notice, this paragraph and
+the following three paragraphs appear in all copies.
+
+Those desiring to incorporate this work into commercial
+products or use for commercial purposes should contact the Technology
+Transfer & Intellectual Property Services, University of California,
+San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+
+IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+OF SUCH DAMAGE.
+
+THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+</pre>
diff --git a/docs/Database.html b/docs/Database.html
new file mode 100644
index 0000000..f953c6e
--- /dev/null
+++ b/docs/Database.html
@@ -0,0 +1,78 @@
+<h1>Inspect: A Proteomics Search Toolkit</h1>
+Copyright 2007, The Regents of the University of California
+<h3>Table of Contents</h1>
+<li><a href="index.html">Overview</a>
+<li><a href="Copyright.html">Copyright information</a>
+<li><a href="Installation.html">Installation</a>
+<li><a href="Database.html">Database</a>
+<li><a href="Searching.html">Searching</a>
+<li><a href="Analysis.html">Analysis</a>
+<li><a href="InspectTutorial.pdf">Basic Tutorial</a>
+<li><a href="InspectAdvancedTutorial.pdf">Advanced Tutorial</a>
+<li><a href="UnrestrictedSearchTutorial.pdf">Unrestricted Search Tutorial</a>
+
+<hr>
+<h1>Overview</h1>
+Inspect requires a database (a file of protein sequences) in order to interpret spectra.  You
+can specify one or more databases in the Inspect input file.  Databases can be stored in one
+of two formats: A .trie file (bare-bones format with sequence data only), or a .ms2db file
+(simple XML format with exon linkage information).  These two formats are discussed below.
+
+<h1>Sequence Databases (FASTA)</h1>
+For efficiency reasons, Inspect processes FASTA files into its own internal format before
+searching.  A database is stored a two files, one with the extension ".trie" (which holds peptide sequences),
+and one with the extension ".index" (which holds protein names and other meta-data).  To prepare
+the database, first copy the protein sequences of interest into a FASTA file in the Database
+subdirectroy.  Then, from the Inspect directory, run the Python script PrepDB.py as follows:<br>
+    <tt><b>python PrepDB.py FASTA MyStuff.fasta</b></tt><br>
+Replace "MyStuff.fasta" with the name of your FASTA database.  After PrepDB has run, the database
+files MyStuff.trie and MyStuff.index will be ready to search. PrepDB.py also handles
+Swiss-prot ".dat file" format as input.
+<br><br>
+Inspect can perform this processing automatically
+(see the "SequenceFile" option in the <a href="Searching.html">searching</a> documentation).  Running
+PrepDB.py is the preferred method since it creates a database file which can be re-used by many
+searches.
+<br><br>
+<b>Note:</b> The database should include all proteins known to be in the sample, otherwise some spectra
+will receive incorrect (and possibly misleading) annotations.  In particular, most databases should
+include trypsin (used to digest proteins) and human keratins (introduced during sample processing).
+The file "CommonContaminants.fasta", in the Inspect directory, contains several protein sequences you
+can append to your database.
+<br><br>
+<h1>Decoy records (ShuffleDB)</h1>
+Databases including "decoy proteins" (shuffled or reversed sequences) are emerging as the
+gold standard for computing false discovery rates.  Inspect can compute p-values in two
+ways:
+    <li>Compute the empirical false discovery rate by counting the number of hits to
+    invalid proteins.  <b>This is the recommended method.</b>  Given an f-score cutoff,
+    Inspect computes the number shuffled-protein hits above that threshold - these hits
+    are all invalid.  Inspect
+    then estimates the number of invalid hits which happen to fall within valid proteins.
+    This count provides an empirical false discovery rate, which is reported as the
+    "p-value".
+    <li>By fitting the distribution of F-scores as a mixture model, in the manner of
+    PeptideProphet.  This is how the initial p-values output by inspect are computed.
+    Use PValue.py <b>without</b> the "-S" option to compute p-values using this method.
+<br><br>
+To compute empirical false discovery rates:
+<li> Use the script ShuffleDB.py to append decoy records to a database before searching.  Decoy records have the
+flag "XXX" prefixed to their name.
+<li>After searching, use the script PValue.py (including the "-S" option) to carry out this analysis.
+
+<h1>MS2DB Format</h1>
+The MS2DB file format is a simple, extensible XML format for storing proteins.  The main benefits of
+using MS2DB format instead of FASTA files are:
+    <li>Reduced redundancy - Each exon is stored once, and only once
+    <li>Splice information - All isoforms (and sequence variants) corresponding to a locus are grouped
+    as one Gene, which reduces the usual confusion between proteins and records.
+    <li>Site-specific modifications - Known modifications, such as phosphorylation, can be
+    explicitly indicated.  Considering these site-specific modifications is much cheaper than
+    a search that attempts to discover new modifications.
+    <li>Rich annotations - The format has places to store information such as accession numbers from
+    sequence repositories, species name, etc.
+<br><br>
+You can use the script BuildMS2DB.jar to generate a MS2DB file.  As input, you will need:
+<li>One or more files in GFF3 format containing exon predictions
+<li>A FASTA file containing the sequences on which the exons are predicted</li>
+For more details on using BuildMS2DB.jar (and MS2DBShuffler.jar for building a decoy database) please read the information on proteogenomics found <a href=http://cseweb.ucsd.edu/~ncastell/Maize>here</a>
\ No newline at end of file
diff --git a/docs/InspectTutorial.pdf b/docs/InspectTutorial.pdf
new file mode 100644
index 0000000..66b7ec2
Binary files /dev/null and b/docs/InspectTutorial.pdf differ
diff --git a/docs/Installation.html b/docs/Installation.html
new file mode 100644
index 0000000..3f5c0ff
--- /dev/null
+++ b/docs/Installation.html
@@ -0,0 +1,42 @@
+<h1>Inspect: A Proteomics Search Toolkit</h1>
+Copyright 2007, The Regents of the University of California
+<h3>Table of Contents</h1>
+<li><a href="index.html">Overview</a>
+<li><a href="Copyright.html">Copyright information</a>
+<li><a href="Installation.html">Installation</a>
+<li><a href="Database.html">Database</a>
+<li><a href="Searching.html">Searching</a>
+<li><a href="Analysis.html">Analysis</a>
+<li><a href="InspectTutorial.pdf">Basic Tutorial</a>
+<li><a href="InspectAdvancedTutorial.pdf">Advanced Tutorial</a>
+<li><a href="UnrestrictedSearchTutorial.pdf">Unrestricted Search Tutorial</a>
+
+<hr>
+<h1>Installation</h1>
+To install Inspect, first unzip the source distribution.  You may need to compile it:
+<li> On Windows, use the included executable
+inspect.exe.  You can build the source code on Windows using the makefile, or
+the included Visual Studio project.
+<li> On Linux, first install <a href="http://sourceforge.net/projects/expat/">expat</a>.  Then,
+build Inspect using the included makefile.
+<li> Macintosh is not officially supported.  Follow the instructions for Linux, they are likely to work.
+<br><br>
+Other things to do while you're installing:
+<li> Inspect requires <a href="http://www.python.org">Python</a> (version 2.1 or later) in order
+to run various analysis and utility scripts.
+<li> (Optional) You may wish to
+install the <a href="http://www.pythonware.com/products/pil/">Python Imaging Library (PIL)</a>
+for generation of simple spectrum images.
+<li> (Optional) If the <a href="http://psyco.sourceforge.net/">psyco</a>
+library is installed, it is automatically loaded to speed up analysis scripts; this is entirely
+optional.
+<li> (Recommended) The Python numeric library (numpy) is required for some analysis scripts.
+<li>The distribution includes some system tests.  After installing, go to the Inspect directory, and
+run them to be sure that things are installed properly:<br>
+<tt><b>python SystemTest.py</b></tt><br>
+After the run completes, any errors will be reported.  Files used by the system tests are stored
+in the "SystemTest" subdirectory.  The test input file "TestInput.txt" is annotated
+with comments, and you can refer to it (or copy and modify it) when starting up searches.
+<br><br>
+If the tests fail, please <a href="mailto:spayne at ucsd.edu">submit a bug report</a> (and include
+any relevant-looking error messages).
diff --git a/docs/MS2DB.html b/docs/MS2DB.html
new file mode 100644
index 0000000..f89035e
--- /dev/null
+++ b/docs/MS2DB.html
@@ -0,0 +1,51 @@
+<h1>Inspect: A Proteomics Search Toolkit</h1>
+Copyright 2007, The Regents of the University of California
+<h3>Table of Contents</h1>
+<li><a href="index.html">Overview</a>
+<li><a href="Copyright.html">Copyright information</a>
+<li><a href="Installation.html">Installation</a>
+<li><a href="Database.html">Database</a>
+<li><a href="Searching.html">Searching</a>
+<li><a href="Analysis.html">Analysis</a>
+<li><a href="InspectTutorial.pdf">Basic Tutorial</a>
+<li><a href="InspectAdvancedTutorial.pdf">Advanced Tutorial</a>
+<li><a href="UnrestrictedSearchTutorial.pdf">Unrestricted Search Tutorial</a>
+
+<hr>
+
+MS2DB is a relatively straightforward file format.  For now, the documentation is limited to
+a  example.
+<br><br>
+<b>TODO:</b> Document all available tags and attributes.
+
+<h3>Abbreviated example</h3>
+<pre>
+<Database>
+<Gene ExonCount="14" Chromosome="chr1" ForwardFlag="1" Name="At1g02100">
+  <Exon Index="0" Start="389875" End="389943">
+    <ExonSequence Length="22">MAESRSNRAAVQATNDDASASK</ExonSequence>
+  </Exon>
+  <Exon Index="1" Start="390036" End="390250">
+    <ExonSequence Length="71">SCVKKGYMKDDYVHLFVKRPVRRSPIINRGYFSRWAAFRKLMSQFLLSGTSSKKQILSLGAGFDTTYFQLL</ExonSequence>
+    <LinkFrom Index="0" AA="L" />
+  </Exon>
+
+[.......]
+
+  <Exon Index="12" Start="392261" End="392300">
+    <ExonSequence Length="13">EHYCVTYAVNDAM</ExonSequence>
+    <LinkFrom Index="9" />
+  </Exon>
+  <Exon Index="13" Start="392373" End="392448">
+    <ExonSequence Length="25">GIFGDFGFTREGGGERMSSSASSPX</ExonSequence>
+    <LinkFrom Index="12" />
+  </Exon>
+  <CrossReference Database"Salk" ID="At1g02100.1">
+    <CRExons Index="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 12, 13 />
+  </CrossReference>
+  <CrossReference Database"Salk" ID="At1g02100.2">
+    <CRExons Index="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 />
+  </CrossReference>
+</Gene>
+</Database>
+</pre>
\ No newline at end of file
diff --git a/docs/PLSTutorial.pdf b/docs/PLSTutorial.pdf
new file mode 100644
index 0000000..de5adbb
Binary files /dev/null and b/docs/PLSTutorial.pdf differ
diff --git a/docs/RunningInspectOnTheFWGrid.pdf b/docs/RunningInspectOnTheFWGrid.pdf
new file mode 100644
index 0000000..20fdc8a
Binary files /dev/null and b/docs/RunningInspectOnTheFWGrid.pdf differ
diff --git a/docs/Searching.html b/docs/Searching.html
new file mode 100644
index 0000000..5ec764c
--- /dev/null
+++ b/docs/Searching.html
@@ -0,0 +1,128 @@
+<h1>Inspect: A Proteomics Search Toolkit</h1>
+Copyright 2007, The Regents of the University of California
+<h3>Table of Contents</h1>
+<li><a href="index.html">Overview</a>
+<li><a href="Copyright.html">Copyright information</a>
+<li><a href="Installation.html">Installation</a>
+<li><a href="Database.html">Database</a>
+<li><a href="Searching.html">Searching</a>
+<li><a href="Analysis.html">Analysis</a>
+<li><a href="InspectTutorial.pdf">Basic Tutorial</a>
+<li><a href="InspectAdvancedTutorial.pdf">Advanced Tutorial</a>
+<li><a href="UnrestrictedSearchTutorial.pdf">Unrestricted Search Tutorial</a>
+
+<hr>
+<h1>Searching</h1>
+
+To run a search, you first create an inspect <b>input file</b>.  The input file is text file that
+tells Inspect what to do.  Each line of the
+input file has the form [COMMAND],[VALUE].  For example, one line might be "spectra,spec18.dta",
+where the command is "spectra" and the value is "spec18.dta".  Inspect ignores blank lines.  You can
+include comments by starting lines with a hash character (#).  Here is an example
+of what an input file might look like:
+<pre>
+spectra,Fraction01.mzxml
+instrument,ESI-ION-TRAP
+protease,Trypsin
+DB,TestDatabase.trie
+# Protecting group on cysteine:
+mod,57,C,fix
+</pre>
+<br>Here are the
+available input file commands.  Those you are most likely to set are listed first.  The only required
+commands are one or more "spectra" commands, and either "db" or "SequenceFile".  Commands are
+case-insensitive (type "Spectra" or "spectra", it doesn't matter).  Values are case-insensitive with
+the exception (on Linux) of filenames.  If Inspect doesn't understand a command, it will print a
+warning and ignore it.
+<br><br>
+<li><b>spectra,[FILENAME]</b> - Specifies a spectrum file to search.  You can specify the name of a
+directory to search every file in that directory (non-recursively).<br>
+Preferred file formats: .mzXML and .mgf <br>
+Other accepted file formats: .mzData, .ms2 .dta.
+Note that multiple spectra in a single .dta file are <b>not</b> supported.
+<br>
+<li><b>db,[FILENAME]</b> - Specifies the name of a database (.trie file) to search.  The .trie file
+contains one or more protein sequences delimited by asterisks, with no whitespace or other data.
+Use PrepDB.py (see <a href="Database.html">Databases</a> to prepare a database.  You should specify
+at least one database.  You may specify several databases; if so, each database will be searched in turn.
+<li><b>SequenceFile,[FILENAME]</b> - Specifies the name of a FASTA-format protein database to search.  If
+you plan to search a large database, it is more efficient to preprocess it using PrepDB.py and use the "db"
+command instead.  You can specify at most one SequenceFile.
+<br>
+<li><b>protease,[NAME]</b> - Specifies the name of a protease.  "Trypsin", "None", and "Chymotrypsin" are
+the available values.  If tryptic digest is specified, then matches with non-tryptic termini are penalized.
+<br>
+<li><b>mod,[MASS],[RESIDUES],[TYPE],[NAME]</b> - Specifies an amino acid modification.  The delta mass
+(in daltons) and affected amino acids are required.  The first four characters of the name should be
+unique.  Valid values for "type" are "fix", "cterminal", "nterminal", and "opt" (the default).  For a guide
+to various known modification types, consult the following databases:
+<li> <a href="http://www.abrf.org/index.cfm/dm.home">ABRF mass delta reference</a>
+<li> <a href="http://www.unimod.org">UNIMOD database</a>
+<li>RESID database of modifications
+Examples:
+<br><tt>mod,+57,C,fix</tt> - Most searches should include this line.  It reflects the addition of CAM
+(carbamidomethylation, done by adding iodoacetamide) which prevents cysteines from forming disulfide bonds.
+<br><tt>mod,80,STY,opt,phosphorylation</tt>
+<br><tt>mod,16,M</tt> (Oxidation of methionine, seen in many samples)
+<br><tt>mod,43,*,nterminal</tt> (N-terminal carbamylation, common if sample is treated with urea)
+<br>
+<b>Important note:</b> When searching for phosphorylation sites, use a modification with the name "phosphorylation".
+This lets Inspect know that it should use its model of phosphopeptide fragmentation
+when generating tags and scoring matches.  (Phosphorylation of serine dramatically affects fragmentation, so
+modeling it as simply an 80Da offset is typically <b>not</b> sufficient to detect sites with high sensitivity)
+<li><b>Mods,[COUNT]</b> - Number of PTMs permitted in a single peptide.  Set this to 1 (or higher) if you
+               specify PTMs to search for.
+<li><b>Unrestrictive,[FLAG]</b> - If FLAG is 1, use the MS-Alignment algorithm to perform an <b>unrestrictive</b>
+search (allowing arbitrary modification masses).  Running an unrestrictive search with one mod per peptide is slower than the
+normal (tag-based) search; running time is approximately 1 second per spectrum per megabyte of database.  Running an unrestrictive search
+with two mods is significantly slower.  We recommend performing unrestrictive searches against a small database, containing proteins
+output by an earlier search.  (The "Summary.py" script can be used to generate a second-pass database
+from initial search results; see <a href="Analysis">Analysis</a>)
+<li><b>MaxPTMSize,[SIZE]</b> - For blind search, specifies the maximum modification size (in Da) to consider.
+Defaults to 250.  Larger values require more time to search.
+<li><b>PMTolerance,[MASS]</b> - Specifies the parent mass tolerance, in Daltons.  A candidate's
+flanking mass can differ from the tag's flanking mass by no more than ths amount.  Default value
+is 2.5.  Note that secondary ions are often selected for fragmentation, so parent mass errors near
+1.0Da or -1.0Da are not uncommon in typical datasets, even on FT machines.
+<li><b>ParentPPM,[MASS]</b> - Specifies a parent mass tolerance, in parts per million.  Alternative to PMTolerance.
+<li><b>IonTolerance,[MASS]</b> - Error tolerance for how far ion fragments (b and y peaks) can be
+    shifted from their expected masses.  Default is 0.5.  Higher values produce a more sensitive but much slower search.
+<li><b>PeakPPM,[MASS]</b> - Specifies a fragment mass tolerance, in parts per million.  Alternative to IonTolerance.
+<li><b>MultiCharge,[FLAG]</b> - If set to true, attempt to guess the precursor charge and mass, and consider
+multiple charge states if feasible.
+<li><b>Instrument,[TYPE]</b> - Options are ESI-ION-TRAP (default), QTOF, and FT-Hybrid.  If set to ESI-ION-TRAP,
+Inspect attempts to correct the parent mass.  If set to QTOF, Inspect uses a fragmentation model trained on
+QTOF data.  (QTOF data typically features a stronger y ladder and weaker b ladder than other spectra).
+<li><b>RequiredMod,[NAME]</b> - The specified modification MUST be found somewhere on the peptide.
+<li><b>TagCount,[COUNT]</b> - Number of tags to generate
+<li><b>TagLength,[LENGTH]</b> - Length of peptide sequence tags.  Defaults to 3.  Accepted values are 1 through 6.
+<li><b>RequireTermini,[COUNT]</b> - If set to 1 or 2, require 1 or 2 valid proteolytic termini.  Deprecated, because
+    the scoring model already incorporates the number of valid (tryptic) termini.
+<h3>Non-standard options:</h3>
+<b>TagsOnly</b> - Tags are generated and written to the specified output file.  No search is performed.
+
+<hr>
+<h2>Command-line arguments</h2>
+Inspect features a few command-line options.  Most options are specified in an <b>input file</b>, rather
+than on the command-line.   The
+command-line options are:
+    <li> <b>-i</b> Input file name.  Defaults to "Input.txt"
+    <li> <b>-o</b> Output file name.  Defaults to "Inspect.txt"
+    <li> <b>-e</b> Error file name.  Defaults to "Inspect.err".
+    <li> <b>-r</b> The resource directory.  Defaults to the current working directory.  The resource directory
+is where Inspect searches for its resource files such as AminoAcidMasses.txt.
+<br><br>
+Sample usage:<br>
+On Windows: <b>Inspect -i TripureIn.txt -o TripureOut.txt</b><br>
+On Linux: <b>./inspect -i TripureIn.txt -o TripureOut.txt</b><br>
+<h3>Error Reporting</h3>
+If Inspect encounters a problem - such as a spectrum file with garbled format, or
+running out of memory - it reports the problem to the error file.  One error (or warning)
+is reported per line of the file, and each error/warning type has an ID, to make them
+easier to parse.  If no error file is left behind after a run, then there were no errors - this
+is a good thing!
+<br><br>Here is a sample error message, where I gave inspect an incorrect file name:<br>
+<tt>[E0008] .\ParseInput.c:725:Unable to open requested file '.\Database\TestDatbaase.trie'</tt>
+<br>
+And here is a sample warning message, where - on a small search - Inspect was not able to re-fit the p-value distribution:<br>
+<tt>{W0010} .\PValue.c:396:Few spectra were searched; not recalibrating the p-value curve.</tt><br>
diff --git a/docs/UnrestrictedSearchTutorial.pdf b/docs/UnrestrictedSearchTutorial.pdf
new file mode 100644
index 0000000..038d0ba
Binary files /dev/null and b/docs/UnrestrictedSearchTutorial.pdf differ
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..2f11c01
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,42 @@
+<h1>InsPecT: A Proteomics Search Toolkit</h1>
+Copyright 2007, The Regents of the University of California
+<br>Version 20120103 documentation
+<h3>Table of Contents</h1>
+<li><a href="index.html">Overview</a>
+<li><a href="Copyright.html">Copyright information</a>
+<li><a href="Installation.html">Installation</a>
+<li><a href="Database.html">Database</a>
+<li><a href="Searching.html">Searching</a>
+<li><a href="Analysis.html">Analysis</a>
+<li><a href="InspectTutorial.pdf">Basic Tutorial</a>
+<li><a href="InspectAdvancedTutorial.pdf">Advanced Tutorial</a>
+<li><a href="UnrestrictedSearchTutorial.pdf">Unrestricted Search Tutorial</a>
+<hr>
+<h1>Documentation Overview</h1>
+This is the documentation for InsPecT, a MS/MS database search tool.  A general description of the
+program can be found <a href="http://proteomics.bioprojects.org/Software/Inspect.html">here</a>
+online.  The documentation contains two general types of information: tutorials and docs pages.
+The tutorials walk through basic setup and usage of InsPecT and are <b>highly</b> recommended.
+The docs pages are more detailed documentation on options available within the program.
+<br><br>
+InsPecT was developed at the University of California, San Diego and the project homepage
+is <a href="http://proteomics.ucsd.edu/">here</a>.  A Windows executable is
+available for download, as well as the ANSI C source code (which compiles on Windows,
+Linux, or Macintosh). Inspect is free for educational, research, and non-profit purposes.
+<br><br>
+The following publications provide additional information on InsPecT; you may wish to
+cite them if you use InsPecT search results in your research:
+<br>
+<li>S. Tanner, H. Shu, A. Frank, L.Wang, E. Zandi, M. Mumby, P.A. Pevzner, and V. Bafna.
+Inspect: Fast and accurate identification of post-translationally modified peptides
+from tandem mass spectra. Anal. Chem., 77(14):4626�4639, 2005.
+<li>Identification of Post-translational Modifications via Blind Search of
+Mass-Spectra. Dekel Tsur, Stephen Tanner, Ebrahim Zandi, Vineet Bafna, Pavel
+A. Pevzner. Nature Biotechnology 23, 1562-2567 (01 Dec 2005).
+<br><br>
+The authors and <a href="mailto:vbafna at cs.ucsd.edu">principal investigator</a> welcome questions, comments, and corrections.
+<hr>
+<h3>Bugs</h3>
+Bugs in Inspect are tracked using <a href="http://bugs.bioprojects.org">JIRA</a>.  If you encounter
+problems, please submit a bug report online!
+
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..c1852b5
--- /dev/null
+++ b/main.c
@@ -0,0 +1,863 @@
+//Title:          main.c
+//Authors:        Stephen Tanner, Samuel Payne, Natalie Castellana, Pavel Pevzner, Vineet Bafna
+//Created:        2005
+// Copyright 2007,2008,2009 The Regents of the University of California
+// All Rights Reserved
+//
+// Permission to use, copy, modify and distribute any part of this
+// program for educational, research and non-profit purposes, by non-profit
+// institutions only, without fee, and without a written agreement is hereby
+// granted, provided that the above copyright notice, this paragraph and
+// the following three paragraphs appear in all copies.
+//
+// Those desiring to incorporate this work into commercial
+// products or use for commercial purposes should contact the Technology
+// Transfer & Intellectual Property Services, University of California,
+// San Diego, 9500 Gilman Drive, Mail Code 0910, La Jolla, CA 92093-0910,
+// Ph: (858) 534-5815, FAX: (858) 534-7345, E-MAIL:invent at ucsd.edu.
+//
+// IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
+// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
+// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN
+// IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY
+// OF SUCH DAMAGE.
+//
+// THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE UNIVERSITY
+// OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
+// ENHANCEMENTS, OR MODIFICATIONS.  THE UNIVERSITY OF CALIFORNIA MAKES NO
+// REPRESENTATIONS AND EXTENDS NO WARRANTIES OF ANY KIND, EITHER IMPLIED OR
+// EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF
+// THE SOFTWARE WILL NOT INFRINGE ANY PATENT, TRADEMARK OR OTHER RIGHTS.
+
+
+// Inspect - a tool for efficient peptide MS/MS interpretation in the
+// presence of post-translational modifications.
+
+// Inspect can use partial de novo to generate tags, then use the tags 
+// to search a protein database for matching peptides.  A tag
+// has a prefix mass, a sequence of peptides, and a suffix mass.  Typically, 
+// tags are tripeptides.  We use a trie structure (Aho-Corasic algorithm) to find 
+// occurrences of our tag strings in the database, then examine the flanking masses 
+// to be sure they match.  The flanking mass comparison is a d.p. 'hit extension' 
+// algorithm.
+//
+// Inspect requires a database-file in the correct format.  The file
+// should contain peptides concatenated together, separated by asterisks.
+// No whitespace or newlines.  Like this:  PANTS*GWWYTT*GAAH
+// The PrepDB.py script compresses a Swiss-prot or FASTA database into
+// concatenated format.  An accompanying .index file is produced, so that the
+// name of a matched protein can be reported.
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include "Trie.h"
+#include "Utils.h"
+#include "Spectrum.h"
+#include "Mods.h"
+#include "Score.h"
+#include "Tagger.h"
+#include "FreeMod.h"
+#include "CMemLeak.h"
+#include "SVM.h"
+#include "BN.h"
+#include "LDA.h"
+#include "Run.h"
+#include "SNP.h"
+#include "SpliceDB.h"
+#include "ChargeState.h"
+#include "Scorpion.h"
+#include "ParseXML.h"
+#include "SpliceScan.h"
+#include "ParseInput.h"
+#include "PValue.h"
+#include "Errors.h"
+#include "BuildMS2DB.h"
+#include "IonScoring.h"
+#include "TagFile.h" //ARI_MOD
+
+// Global variables, shared between main.c and Trie.c:
+extern Options* GlobalOptions;
+extern MSSpectrum* Spectrum;
+
+// Array of spectra to be searched.  We put them into an array so that we can qsort
+// them.  (Not crucial, but it's nice to get output in order)
+extern SpectrumNode* g_BigNodeArray;
+
+extern StringNode* FirstTagCheckNode;
+extern StringNode* LastTagCheckNode;
+
+
+void PrintUsageInfo()
+{
+    printf("\nSample command-line:\n");
+    printf("Inspect.exe -i Foo.in -o Foo.txt -e ErrorsFoo.txt\n");
+    printf("Command-line arguments:\n");
+    printf(" -i InputFileName: Path to a config file specifying search parameters.\n");
+    printf(" -o OutputFileName: Output file for match results.  If not\n");
+    printf("          specified, output goes to stdout.\n");
+    printf(" -e ErrorFileName: Output file for errors and warnings, if any.  If not\n");
+    printf("          specified, any errors go to Inspect.err; if there are no errors.\n");
+    printf("          or warnings reported, this file will be erased at end of run.\n");
+    printf(" -r ResourceDir: Directory for resource files (such \n");
+    printf("     as AminoAcidMasses.txt).  Defaults to current directory. \n");
+    printf(" -a AminoAcidMassesFile: Specify a file containing non-standard amino acid masses. \n");
+    printf("  Consult the documentation (Inspect.html) for further details.\n");
+}
+
+void FreeSpectra()
+{
+    SpectrumNode* Node;
+    SpectrumNode* Prev = NULL;
+    //
+    for (Node = GlobalOptions->FirstSpectrum; Node; Node = Node->Next)
+    {
+        if (Prev)
+        {
+            FreeSpectrum(Prev->Spectrum);
+            Prev->Spectrum = NULL;
+            // Important: don't free spectrum nodes, because they all come from one big array!
+            //FreeSpectrumNode(Prev);
+        }
+        Prev = Node;
+        //FreeSpectrum(Node->Spectrum);
+    }
+    if (Prev)
+    {
+        FreeSpectrum(Prev->Spectrum);
+        Prev->Spectrum = NULL;
+    }
+    GlobalOptions->FirstSpectrum = NULL;
+    GlobalOptions->LastSpectrum = NULL;
+}
+
+
+void FreeGlobalOptions()
+{
+
+    StringNode* Prev;
+    StringNode* GFFNode;
+    DatabaseFile* PrevDB;
+    DatabaseFile* DatabaseNode;
+
+    if (!GlobalOptions)
+    {
+        return;
+    }
+
+    // Free the list FirstGFFFileName...LastGFFFileName
+    Prev = NULL;
+    for (GFFNode = GlobalOptions->FirstGFFFileName; GFFNode; GFFNode = GFFNode->Next)
+    {
+        if (Prev)
+        {
+            SafeFree(Prev->String);
+            SafeFree(Prev);
+        }
+        Prev = GFFNode;
+    }
+    if (Prev)
+    {
+        SafeFree(Prev->String);
+        SafeFree(Prev);
+    }
+
+    // Free the DatabaseFile list:
+    PrevDB = NULL;
+    for (DatabaseNode = GlobalOptions->FirstDatabase; DatabaseNode; DatabaseNode = DatabaseNode->Next)
+    {
+        SafeFree(PrevDB);
+        PrevDB = DatabaseNode;
+    }
+
+    // Save the overall struct:
+    SafeFree(GlobalOptions);
+    GlobalOptions = NULL;
+}
+
+// Free various structs that we built up.  (This isn't strictly necessary, since we're about
+// to exit process anyway, but it's good practice)
+// NOTE: After calling Cleanup(), you can't call Log() any more, because GlobalOptions no longer
+// points at a log file.
+void Cleanup()
+{
+    //printf("Cleaning up...\n");
+    FreeMassDeltaByMass();
+    FreeMassDeltas();
+    FreeIsSubDecoration();
+    //FreeTaggingModel();
+    FreeJumpingHash();
+    FreeSVMModels();
+    FreeBayesianModels();
+    FreeTagCheckNodes();
+    FreeInputFileNodes();
+    FreeLDAModels();
+    FreeCCModelSVM();
+    FreeTagSkewScores();
+    if (GlobalOptions)
+    {
+        FreeSpectra();
+        // Close our error file.  And if we never wrote errors or warnings, erase it!
+        if (GlobalOptions->ErrorFile)
+        {
+            fclose(GlobalOptions->ErrorFile);
+            GlobalOptions->ErrorFile = NULL;
+            if (!GlobalOptions->ErrorCount && !GlobalOptions->WarningCount)
+            {
+                unlink(GlobalOptions->ErrorFileName);
+            }
+        }
+        FreeGlobalOptions();
+    }
+    SafeFree(g_BigNodeArray);
+    g_BigNodeArray = NULL;
+    SafeFree(GlobalStats);
+    GlobalStats = NULL;
+    FreeExternalTagHolder(); //ARI_MOD
+}
+
+
+// Parse the command-line arguments, and populate GlobalOptions.  
+// Returns true on success, 0 if the args are invalid.
+int ReadCommandLineArgs(int argc, char** argv)
+{
+    int Index = 1;
+    int MoreArgs;
+    int Result;
+    char PeptideFilePath[2048];
+    int AASet = 0;
+    if (argc<2)
+    {
+        return 0;
+    }
+    while (Index < argc)
+    {
+        if (argv[Index][0] != '-')
+        {
+            REPORT_ERROR_S(18, argv[Index]);
+            return 0;
+        }
+        // Are there args after this one?
+        if (Index < argc-1)
+        {
+            MoreArgs = 1;
+        }
+        else
+        {
+            MoreArgs = 0;
+        }
+        switch (ConvertToLower(argv[Index][1]))
+        {
+        case 'i': // Input options file name
+            if (!MoreArgs)
+            {
+                REPORT_ERROR_S(19, "-i");
+                return 0;
+            }
+            strncpy(GlobalOptions->InputFileName, argv[Index + 1], MAX_FILENAME_LEN);
+            Index += 2;
+            break;
+        case 'o':
+            if (!MoreArgs)
+            {
+                REPORT_ERROR_S(19, "-o");
+                return 0;
+            }
+            strncpy(GlobalOptions->FinalOutputFileName, argv[Index + 1], MAX_FILENAME_LEN);
+            Index += 2;
+            break;        
+        case 'e':
+            if (!MoreArgs)
+            {
+                REPORT_ERROR_S(19, "-e");
+                return 0;
+            }
+            strncpy(GlobalOptions->ErrorFileName, argv[Index + 1], MAX_FILENAME_LEN);
+            Index += 2;
+            break;        
+
+        case 'r':
+            if (!MoreArgs)
+            {
+                REPORT_ERROR_S(19, "-r");
+                return 0;
+            }
+            strcpy(GlobalOptions->ResourceDir, argv[Index + 1]);
+            printf("Setting resource directory: '%s'\n", argv[Index + 1]);
+            if (*(GlobalOptions->ResourceDir + strlen(GlobalOptions->ResourceDir) - 1) != SEPARATOR)
+            {
+                strcat(GlobalOptions->ResourceDir, SEPARATOR_STRING);
+            }
+            printf("Resource directory is: '%s'\n", GlobalOptions->ResourceDir);
+            Index += 2;
+            break;
+        case 'v':
+            GlobalOptions->VerboseFlag = 1;
+            Index++;
+            break;
+	case 'a':
+	  strcpy(GlobalOptions->AminoFileName,argv[Index+1]);
+	  printf("Setting amino acid masses: '%s'\n", GlobalOptions->AminoFileName);
+	  AASet = 1;
+	  Index += 2;
+	  break;
+        default:
+            printf("Error: I don't understand this argument '%s'.\n", argv[Index]);
+            return 0;
+        }
+    }
+
+    // Read the table of amino acid masses:
+    if(AASet == 1)
+      {
+	sprintf(PeptideFilePath, "%s", GlobalOptions->AminoFileName);
+	Result = LoadPeptideMasses(PeptideFilePath);
+      
+	if(!Result)
+	  {
+	    sprintf(PeptideFilePath, "%s%s", GlobalOptions->ResourceDir,GlobalOptions->AminoFileName);
+	    Result = LoadPeptideMasses(PeptideFilePath);
+	    
+	  }
+      }
+    else
+      {
+	sprintf(PeptideFilePath, "%s%s",GlobalOptions->ResourceDir, FILENAME_AMINO_ACID_MASSES);
+	Result = LoadPeptideMasses(PeptideFilePath);
+	if (!Result)
+	  {
+	    Result = LoadPeptideMasses(NULL);
+	  }
+      }
+    if (!Result)
+    {
+        printf("Error - couldn't load amino acid masses!\n");
+        return 1;
+    }
+    // If -r argument wasn't passed, then use the current working directory:
+    if (!GlobalOptions->ResourceDir[0])
+    {
+        sprintf(GlobalOptions->ResourceDir, ".%c", SEPARATOR);
+    }
+    if (GlobalOptions->InputFileName)
+    {
+        //printf("Parse input file:\n");
+        Result = ParseInputFile();
+	
+        //printf("Input file parse result %d\n", Result);
+        if (!Result)
+        {
+            return 0;
+        }
+        SortSpectra();
+    }
+
+    // If no spectra were specified, then error out - unless we're running a 
+    // mode that requires no spectra.
+    if (!GlobalOptions->FirstSpectrum)
+    {
+        if (!GlobalOptions->RunMode & (RUN_MODE_PREP_MS2DB))
+        {
+            REPORT_ERROR(11);
+            return 0;
+        }
+    }
+
+    if (!(*GlobalOptions->FinalOutputFileName))
+    {
+        sprintf(GlobalOptions->FinalOutputFileName, "Inspect.txt");
+    }
+
+    return 1;
+}
+
+// Perform miscellaneous chores *after* reading the input script and *before* starting to search.
+int Initialize()
+{
+    char Path[2048];
+
+    sprintf(Path, "%s%s", GlobalOptions->ResourceDir, FILENAME_MASS_DELTAS);
+    if (!MassDeltas)
+    {
+       
+      if (GlobalOptions->RunMode & (RUN_MODE_BLIND|RUN_MODE_BLIND_TAG))
+       {
+            //LoadMassDeltas(Path, 0);
+       }
+       else
+       {
+    	  LoadMassDeltas(Path, GlobalOptions->RunMode & (RUN_MODE_MUTATION | RUN_MODE_TAG_MUTATION));  
+       }
+    }
+    InitBayesianModels();
+    SetTagSkewScores();
+
+    if(GlobalOptions->RunMode & (RUN_MODE_MUTATION | RUN_MODE_TAG_MUTATION))
+      LoadMassDeltas(Path,1);
+    if (GlobalOptions->RunMode & (RUN_MODE_BLIND | RUN_MODE_BLIND_TAG))
+    {
+      
+      //FreeMassDeltas();
+        LoadMassDeltas(NULL, 0);
+        AddBlindMods();
+    }
+    else
+    {
+        InitMassDeltaByMass();
+	//debugMassDeltaByMass();
+    }
+
+    PopulateJumpingHash();
+    //LoadFlankingAminoEffects();
+    //sprintf(Path, "%s%s", GlobalOptions->ResourceDir, FILENAME_SCORING_MODEL);
+    //Result = InitScoringModel(Path);
+    //if (!Result)
+    //{
+    //    printf("Error loading scoring model from file '%s'\n", Path);
+    //    return 0;
+    //}
+
+#ifdef MQSCORE_USE_SVM
+    InitPValueSVM();
+#else
+    InitPValueLDA();
+#endif
+
+    return 1;
+}
+
+// Offshoot of main() for handling spliced-database creation and maintenance:
+void MainSpliceDB(int argc, char** argv)
+{
+    int ChromosomeNumber;
+    int ReverseFlag;
+    char* GeneName;
+    char* CustomFileName;
+    int IntervalStart = -1;
+    int IntervalEnd = -1;
+    int MinORFLength;
+    char SNPFileName[256];
+    //
+    // inspect <chromosome> <reverseflag> [ GeneName, OutputFileName, IntervalStart, IntervalEnd ]
+    
+    ChromosomeNumber = atoi(argv[1]);
+    ReverseFlag = atoi(argv[2]);
+    if (argc > 3)
+    {
+        MinORFLength = atoi(argv[3]);
+    }
+    else
+    {
+        MinORFLength = 50;//DEFAULT_MINIMUM_ORF_LENGTH;
+    }
+    
+    if (MinORFLength == 0)
+    {
+        MinORFLength = -1;
+    }
+    printf("MainSpliceDB() chrom %d reverse %d minorf %d\n", ChromosomeNumber, ReverseFlag, MinORFLength);
+    // Read a linked-list of all the polymorphisms we'd like to account for:
+    sprintf(SNPFileName, "SNP\\%d.snp", ChromosomeNumber);
+    ParsePolyNodes(SNPFileName); // %%% ARABIDOPSIS: No polynodes available
+    printf("PolyNodes parsed\n");
+    if (argc > 4)
+    {
+        GeneName = argv[4];
+        CustomFileName = argv[5];
+        IntervalStart = atoi(argv[6]);
+        IntervalEnd = atoi(argv[7]);
+        PrepareOneGeneSpliceDB(ChromosomeNumber, ReverseFlag, IntervalStart, IntervalEnd, CustomFileName, GeneName, MinORFLength);
+    }
+    else
+    {
+        printf("PrepareSpliceDB...\n");
+        PrepareSpliceDB(ChromosomeNumber, ReverseFlag, MinORFLength);
+    }
+    FreePolyNodes();
+    
+}
+
+// MainTraining() is called if the first command-line argument is "train".
+// Syntax is:
+// inspect.exe train [model] [OracleFile] [SpectrumDir] [extra]
+// Example:
+// inspect.exe train pmc c:\ms\TrainingSet.txt c:\ms\TrainingSet
+// 
+// Output format depends on the particular model, but generally we spew out a delimited text file
+// which can be processed by a wrapper-script.
+int MainTraining(int argc, char** argv)
+{
+    char* ModelName;
+    char* OracleFile;
+    char OracleDir[1024];
+    int Len;
+    //
+    if (argc < 5)
+    {
+        printf("Error: Not enough arguments to train!\n");
+        printf("Please provide model name, oracle file, and spectrum directory.\n");
+        printf("Sample: inspect.exe train pmc c:\\ms\\TrainingSet.txt c:\\ms\\TrainingSet\n");
+        return -1;
+    }
+    InitOptions();
+    ModelName = argv[2];
+    OracleFile = argv[3];
+    // Guarantee that OracleDir ends with a delimiter:
+    strcpy(OracleDir, argv[4]);
+    Len = strlen(OracleDir);
+
+    if (Len && OracleDir[Len] != SEPARATOR)
+    {
+        OracleDir[Len] = SEPARATOR;
+        OracleDir[Len + 1] = '\0';
+    }
+    // Various trainings are available:
+    if (!CompareStrings(ModelName, "pmc"))
+    {
+        //TrainPMC(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "cc"))
+    {
+        //TrainCC(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "pepprm"))
+    {
+        LoadPeptideMasses("AminoAcidMasses.txt");
+        PeptideMass['C'] += CAM_MASS; // ASSUMED: All cysteines in the training set carry the +57 modification.
+        //GlobalOptions->InstrumentType = INSTRUMENT_TYPE_QTOF; 
+        TrainPepPRM(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "tag"))
+    {
+        LoadPeptideMasses("AminoAcidMasses.txt");
+        PeptideMass['C'] += CAM_MASS; // ASSUMED: All cysteines in the training set carry the +57 modification.
+        LoadMassDeltas(NULL, 0);
+        InitMassDeltaByMass();
+        PopulateJumpingHash();
+        TrainTagging(OracleFile, OracleDir);
+    }
+
+    else
+    {
+        printf("Unknown model name '%s' - no training performed.\n", ModelName);
+    }
+    return 0;
+}
+
+int MainTesting(int argc, char** argv)
+{
+    char* ModelName;
+    char* OracleFile;
+    char OracleDir[1024];
+    int Len;
+    //
+
+    InitOptions();
+    ModelName = argv[2];
+    OracleFile = argv[3];
+    // Guarantee that OracleDir ends with a delimiter:
+    if (argc > 4)
+    {
+        strcpy(OracleDir, argv[4]);
+        Len = strlen(OracleDir);
+        if (Len && OracleDir[Len] != SEPARATOR)
+        {
+            OracleDir[Len] = SEPARATOR;
+            OracleDir[Len + 1] = '\0';
+        }
+    }
+    // Various tests are available:
+    if (!CompareStrings(ModelName, "pmc"))
+    {
+        //TestPMC(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "splicedbug"))
+    {
+        TestSpliceDB(argc, argv);
+    }
+    else if (!CompareStrings(ModelName, "cc"))
+    {
+        LoadPeptideMasses("AminoAcidMasses.txt");
+        PeptideMass['C'] += CAM_MASS; // ASSUMED: All cysteines in the training set carry the +57 modification.
+        //TestCC(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "prmq"))
+    {
+        LoadPeptideMasses("AminoAcidMasses.txt");
+        PeptideMass['C'] += CAM_MASS; // ASSUMED: All cysteines in the training set carry the +57 modification.
+        // The oracle file contains the true match for a spectrum, followed by many false matches.
+        // Compute the total (average) PRM score for each, sort them, and report the position of the 
+        // true peptide within the list.  (Report a histogram of these positions)
+        TestPRMQuickScoring(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "pepprm"))
+    {
+        LoadPeptideMasses("AminoAcidMasses.txt");
+        PeptideMass['C'] += CAM_MASS; // ASSUMED: All cysteines in the training set carry the +57 modification.
+        TestPepPRM(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "lda"))
+    {
+        LoadPeptideMasses("AminoAcidMasses.txt");
+        PeptideMass['C'] += CAM_MASS; // ASSUMED: All cysteines in the training set carry the +57 modification.
+        TestLDA(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "tag"))
+    {
+        LoadPeptideMasses("AminoAcidMasses.txt");
+        PeptideMass['C'] += CAM_MASS; // ASSUMED: All cysteines in the training set carry the +57 modification.
+        LoadMassDeltas(NULL, 0);
+        InitMassDeltaByMass();
+        PopulateJumpingHash();
+        // The oracle file contains the true match for a spectrum, followed by many false matches.
+        // Compute the total (average) PRM score for each, sort them, and report the position of the 
+        // true peptide within the list.  (Report a histogram of these positions)
+        TestTagging(OracleFile, OracleDir);
+    }
+    else if (!CompareStrings(ModelName, "pvalue"))
+    {
+        // Read in positive and negative feature-vectors, and produce a histogram:
+        TestPValue(OracleFile);
+    }
+    else
+    {
+        printf("Unknown model name '%s' - no testing performed.\n", ModelName);
+    }
+
+    return 0;
+
+}
+
+// SpliceFind: Arguments are a genomic database, and "string-table" protein database.
+// We also get a range of protein numbers.  We then look through the genome to find the 
+// best (approximate!) match for each protein.
+int MainSpliceFind(int argc, char* argv[])
+{
+    int FirstRecord;
+    int LastRecord; // inclusive!
+    char IndexFileName[512];
+    char* Temp;
+    //
+    if (argc < 6)
+    {
+        printf("** Not enough args for splice find.  Sample run:\n");
+        printf("inspect splicefind database\\ipiv313.trie ESTSpliceDB\\Genome.dat 0 1000\n");
+        return -1;
+    }
+    FirstRecord = atoi(argv[4]);
+    LastRecord = atoi(argv[5]);
+    if (LastRecord <= FirstRecord && LastRecord > -1)
+    {
+        printf("** Bad record numbers: %s to %s\n", argv[4], argv[5]);
+        return -1;
+    }
+    strcpy(IndexFileName, argv[2]);
+    for (Temp = IndexFileName + strlen(IndexFileName); Temp >= IndexFileName; Temp--)
+    {
+        if (*Temp == '.')
+        {
+            *Temp = '\0';
+            break;
+        }
+    }
+    strcat(IndexFileName, ".index");
+    SSDatabaseScan(argv[2], IndexFileName, argv[3], FirstRecord, LastRecord);
+    return 1;
+}
+
+int LoadAndScoreSpectrum()
+{
+    //char* FilePath = "PTMScore\\HEKMerged\\Spectra\\H\\R.HIADLAGNSEVILPVPAFNVINGGS+244HAG.N.2.dta";
+    //char* Annotation = "R.HIADLAGNSEVILPVPAFNVINGGS+244HAG.N";
+    char* FilePath = "SystemTest\\TestSpectrum.dta";
+    char* Annotation = "VKEAMAPK";
+    MSSpectrum* Spectrum;
+    int FilePosition = 0; // Default: byte offset 0
+    SpectrumNode* Node;
+    FILE* SpectrumFile;
+    //
+    Node = (SpectrumNode*)calloc(1, sizeof(SpectrumNode));
+    Node->FilePosition = FilePosition;
+    Node->ScanNumber = 0;
+    Node->InputFile = (InputFileNode*)calloc(1, sizeof(InputFileNode));
+    strncpy(Node->InputFile->FileName, FilePath, MAX_FILENAME_LEN);
+    // Guess the file format:
+    Node->InputFile->Format = GuessSpectrumFormatFromExtension(FilePath);
+    SpectrumFile = fopen(FilePath, "rb");
+    fseek(SpectrumFile, Node->FilePosition, 0);
+    Node->Spectrum = (MSSpectrum*)calloc(1, sizeof(MSSpectrum));
+    Spectrum = Node->Spectrum;
+    Node->Spectrum->Node = Node;
+    SpectrumLoadFromFile(Node->Spectrum, SpectrumFile);
+    fclose(SpectrumFile);
+    WindowFilterPeaks(Node->Spectrum, 0, 0);
+    IntensityRankPeaks(Node->Spectrum);
+    //SpectrumComputeNoiseDistributions(Node);
+    //SpectrumComputeBinnedIntensities(Node);
+    printf("Tweak and score...\n");
+    TweakSpectrum(Node);
+    ////////////////////////////////////
+    // Score:
+    ////////////////////////////////////
+    // Free:
+    // The PySpectrum object wraps a Spectrum object, but also a SpectrumNode and an InputFileNode.  
+    // So, free those as well:
+    if (Spectrum->Node->InputFile)
+    {
+        free(Spectrum->Node->InputFile);
+        Spectrum->Node->InputFile = NULL;
+    }
+    if (Spectrum->Node)
+    {
+        FreeSpectrumNode(Spectrum->Node);
+    }
+    else
+    {
+        FreeSpectrum(Spectrum);
+    }
+    return 0;
+}
+
+int TestMain(int argc, char* argv[])
+{
+    char Buffer[2048];
+    // For temp test scaffolding
+    InitOptions();
+    InitErrors();
+    InitStats();
+    Initialize();
+    printf(">>> Start <<<\n");
+    Cleanup();
+    printf(">>> End <<<\n");
+    ReadBinary(Buffer, sizeof(char), 1, stdin);
+    return 1;
+}
+
+// Program entry point.  Parses arguments, does initialization of global data, 
+// then either runs unit tests or calls RunTrieSearch.
+int main(int argc, char** argv)
+{
+    int Result;
+    clock_t StartTime;
+    clock_t EndTime;
+    float ElapsedTime;
+    int ChromosomeNumber;
+    //
+
+    //return TestMain(argc, argv);
+    // Jump into the training/testing code, maybe:
+    if (argc > 1 && !CompareStrings(argv[1], "train"))
+    {
+        return MainTraining(argc, argv);
+    }
+    if (argc > 1 && !CompareStrings(argv[1], "test"))
+    {
+        return MainTesting(argc, argv);
+    }
+    if (argc > 1 && !CompareStrings(argv[1], "splicefind"))
+    {
+        return MainSpliceFind(argc, argv);
+    }
+    
+    /////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+    printf("\nInsPecT version %s\n  Interpretation of Peptides with Post-translational Modifications.\n", INSPECT_VERSION_NUMBER);
+    printf("  Copyright 2007,2008,2009 The Regents of the University of California\n");
+    printf("  [See Docs directory for usage manual and copyright information]\n\n");
+    fflush(stdout);
+    // Allocate stuff:
+    AllocMassDeltaByIndex();
+
+    // Slightly hacky behavior: If the first argument is an integer, then
+    // jump to the splice-db code:
+    if (argc > 1)
+    {
+        ChromosomeNumber = atoi(argv[1]);
+        if (ChromosomeNumber)
+        {
+            MainSpliceDB(argc, argv);
+            goto cleanup;
+        }
+    }
+
+    // Set the (global) default options:
+    InitOptions();
+    InitErrors();
+    InitStats();
+
+    // Parse arguments.  If ReadCommandLineArgs returns false, we didn't get
+    // valid arguments, so we print usage info and quit.
+    Result = ReadCommandLineArgs(argc, argv);
+    if (!Result)
+    {
+        PrintUsageInfo();
+        goto cleanup;
+    }
+
+    // Open the error file *after* parsing the command-line:
+    GlobalOptions->ErrorFile = fopen(GlobalOptions->ErrorFileName, "wb");
+    if (!GlobalOptions->ErrorFile)
+    {
+        GlobalOptions->ErrorFile = stderr;
+    }
+
+    printf("Initialize:\n");
+    Result = Initialize();
+    if (!Result)
+    {
+        printf("Initialization FAILED - aborting search.\n");
+        goto cleanup;
+    }
+    
+
+    ///////////////////////////////////////////////////
+    // Main function: Run the search!
+    StartTime = clock();
+
+    // Set an intermediate output file name, if we're performing a search.
+    // (We write to the intermediate file, then perform p-value computation)
+    if (!(GlobalOptions->RunMode & (RUN_MODE_TAGS_ONLY | RUN_MODE_PMC_ONLY | RUN_MODE_PREP_MS2DB | RUN_MODE_RAW_OUTPUT)))
+    {
+    
+        sprintf(GlobalOptions->OutputFileName, "%s.tmp", GlobalOptions->FinalOutputFileName);
+    }
+    else
+    {
+        sprintf(GlobalOptions->OutputFileName, "%s", GlobalOptions->FinalOutputFileName);
+    }
+    GlobalOptions->OutputFile = fopen(GlobalOptions->OutputFileName, "w");
+    if (!GlobalOptions->OutputFile)
+    {
+        REPORT_ERROR_S(8, GlobalOptions->OutputFileName);
+        goto cleanup;
+    }
+
+    if (GlobalOptions->RunMode & RUN_MODE_PREP_MS2DB)
+    {
+        BuildMS2DB();
+    }
+    else if (GlobalOptions->RunMode & RUN_MODE_PMC_ONLY)
+    {
+        // Just correct charges and parent masses, don't search anything:
+        PerformSpectrumTweakage();
+    }
+    else if ((GlobalOptions->RunMode & RUN_MODE_TAGS_ONLY) && !GlobalOptions->ExternalTagger)
+    {
+        PerformTagGeneration();
+    }
+    else
+    {
+        RunSearch();
+    }
+    
+    EndTime = clock();
+    ElapsedTime = (float)((EndTime - StartTime) / (float)CLOCKS_PER_SEC);
+    printf("Elapsed time: %.4f seconds.\n", ElapsedTime);
+    printf("Inspect run complete.\n");
+ 
+cleanup:
+    Cleanup(); 
+    return 0;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/inspect.git



More information about the debian-med-commit mailing list