[med-svn] [bedtools] 04/14: Refactored KeyListOps, Context, mapFile for KeyListOps re-usability by other tools.

Charles Plessy plessy at moszumanska.debian.org
Thu Mar 6 22:56:44 UTC 2014


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch master
in repository bedtools.

commit 2268ee4fd792e109f2af5477c5e1b6208b5cba20
Author: nkindlon <nek3d at virginia.edu>
Date:   Thu Feb 27 23:45:48 2014 -0500

    Refactored KeyListOps, Context, mapFile for KeyListOps re-usability by other tools.
---
 src/intersectFile/Makefile                         |   1 +
 src/mapFile/Makefile                               |   1 +
 src/mapFile/mapFile.cpp                            |  67 +---
 src/mapFile/mapFile.h                              |  85 +----
 src/mapFile/mapMain.cpp                            | 138 --------
 src/nekSandbox1/Makefile                           |   1 +
 src/regressTest/Makefile                           |   1 +
 src/sampleFile/Makefile                            |   1 +
 src/utils/BinTree/Makefile                         |   1 +
 src/utils/Contexts/ContextBase.cpp                 | 120 ++++++-
 src/utils/Contexts/ContextBase.h                   |  21 +-
 src/utils/Contexts/ContextIntersect.h              |   2 +
 src/utils/Contexts/ContextMap.cpp                  |  93 +-----
 src/utils/Contexts/ContextMap.h                    |  20 +-
 src/utils/Contexts/Makefile                        |   1 +
 src/utils/FileRecordTools/Records/BamRecord.cpp    |   5 +
 src/utils/FileRecordTools/Records/BamRecord.h      |   1 +
 .../FileRecordTools/Records/Bed12Interval.cpp      |  26 ++
 src/utils/FileRecordTools/Records/Bed12Interval.h  |   1 +
 src/utils/FileRecordTools/Records/Bed3Interval.cpp |  20 ++
 src/utils/FileRecordTools/Records/Bed3Interval.h   |   2 +
 src/utils/FileRecordTools/Records/Bed4Interval.cpp |   5 +
 src/utils/FileRecordTools/Records/Bed4Interval.h   |   1 +
 src/utils/FileRecordTools/Records/Bed5Interval.cpp |  13 +
 src/utils/FileRecordTools/Records/Bed5Interval.h   |   1 +
 src/utils/FileRecordTools/Records/Bed6Interval.cpp |  17 +
 src/utils/FileRecordTools/Records/Bed6Interval.h   |   1 +
 .../FileRecordTools/Records/BedGraphInterval.cpp   |  11 +
 .../FileRecordTools/Records/BedGraphInterval.h     |   1 +
 .../FileRecordTools/Records/BedPlusInterval.cpp    |  15 +
 .../FileRecordTools/Records/BedPlusInterval.h      |   2 +
 src/utils/FileRecordTools/Records/GffRecord.cpp    |  36 ++
 src/utils/FileRecordTools/Records/GffRecord.h      |   1 +
 src/utils/FileRecordTools/Records/Record.cpp       |   6 +-
 src/utils/FileRecordTools/Records/Record.h         |   2 +
 src/utils/GenomeFile/Makefile                      |   1 +
 src/utils/KeyListOps/KeyListOps.cpp                | 364 ++++++++++++++++++++
 src/utils/KeyListOps/KeyListOps.h                  |  54 +++
 src/utils/KeyListOps/KeyListOpsMethods.cpp         | 368 +++++++++++++++++++++
 src/utils/KeyListOps/KeyListOpsMethods.h           | 113 +++++++
 src/utils/{NewChromsweep => KeyListOps}/Makefile   |  12 +-
 src/utils/NewChromsweep/Makefile                   |   1 +
 src/utils/RecordOutputMgr/Makefile                 |   1 +
 src/utils/general/Makefile                         |   2 +-
 src/utils/general/QuickString.cpp                  |  65 ++++
 src/utils/general/QuickString.h                    |  19 ++
 test/map/test-map.sh                               |  97 ++++--
 47 files changed, 1399 insertions(+), 418 deletions(-)

diff --git a/src/intersectFile/Makefile b/src/intersectFile/Makefile
index e265b33..8c81049 100644
--- a/src/intersectFile/Makefile
+++ b/src/intersectFile/Makefile
@@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
            -I$(UTILITIES_DIR)/FileRecordTools/ \
            -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
            -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
+			-I$(UTILITIES_DIR)/KeyListOps/ \
            -I$(UTILITIES_DIR)/RecordOutputMgr/ \
            -I$(UTILITIES_DIR)/NewChromsweep \
            -I$(UTILITIES_DIR)/BinTree \
diff --git a/src/mapFile/Makefile b/src/mapFile/Makefile
index 17bb42d..8628242 100644
--- a/src/mapFile/Makefile
+++ b/src/mapFile/Makefile
@@ -29,6 +29,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
            -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
            -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
            -I$(UTILITIES_DIR)/RecordOutputMgr/ \
+			-I$(UTILITIES_DIR)/KeyListOps/ \
            -I$(UTILITIES_DIR)/NewChromsweep \
            -I$(UTILITIES_DIR)/VectorOps \
            -I$(UTILITIES_DIR)/BinTree \
diff --git a/src/mapFile/mapFile.cpp b/src/mapFile/mapFile.cpp
index 88dcc26..8dbf24a 100644
--- a/src/mapFile/mapFile.cpp
+++ b/src/mapFile/mapFile.cpp
@@ -47,74 +47,11 @@ bool FileMap::mapFiles()
 			RecordKeyList keySet(hitSet.getKey());
 			RecordKeyList resultSet(hitSet.getKey());
 			_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
-			SummarizeHits(resultSet);
-			_recordOutputMgr->printRecord(resultSet.getKey(), _output);
+			_recordOutputMgr->printRecord(resultSet.getKey(), _context->getColumnOpsVal(resultSet));
     	} else {
-			SummarizeHits(hitSet);
-			_recordOutputMgr->printRecord(hitSet.getKey(), _output);
+			_recordOutputMgr->printRecord(hitSet.getKey(), _context->getColumnOpsVal(hitSet));
 		}
     }
     return true;
 }
 
-void FileMap::ExtractColumnFromHits(RecordKeyList &hits) {
-  _column_vec.clear();
-  RecordKeyList::const_iterator_type iter = hits.begin();
-  for (; iter != hits.end(); iter = hits.next()) 
-  {
-    _column_vec.push_back(iter->value()->getField(_context->getColumn()).str());
-  }
-} 
-
-void FileMap::SummarizeHits(RecordKeyList &hits) {
-
-    const QuickString & operation = _context->getColumnOperation();
-    _output.clear();
-
-    if (hits.size() == 0) {
-        if (operation == "count" || operation == "count_distinct")
-            _output.append("0");
-        else
-            _output.append(_context->getNullValue().str());
-        return;
-    } 
-
-    _tmp_output.str("");
-    _tmp_output.clear();
-
-    ExtractColumnFromHits(hits);
-
-    VectorOps vo(_column_vec);
-    if (operation == "sum") 
-        _tmp_output << setprecision (PRECISION) << vo.GetSum();
-    else if (operation == "mean")
-        _tmp_output << setprecision (PRECISION) << vo.GetMean();
-    else if (operation == "median")
-        _tmp_output << setprecision (PRECISION) << vo.GetMedian();
-    else if (operation == "min")
-        _tmp_output << setprecision (PRECISION) << vo.GetMin();
-    else if (operation == "max")
-        _tmp_output << setprecision (PRECISION) << vo.GetMax();
-    else if (operation == "absmin")
-        _tmp_output << setprecision (PRECISION) << vo.GetAbsMin();
-    else if (operation == "absmax")
-        _tmp_output << setprecision (PRECISION) << vo.GetAbsMax();
-    else if (operation == "mode")
-        _tmp_output << vo.GetMode();
-    else if (operation == "antimode")
-        _tmp_output << vo.GetAntiMode();
-    else if (operation == "count") 
-        _tmp_output << setprecision (PRECISION) << vo.GetCount();
-    else if (operation == "count_distinct")
-        _tmp_output << setprecision (PRECISION) << vo.GetCountDistinct();
-    else if (operation == "collapse")
-        _tmp_output << vo.GetCollapse();
-    else if (operation == "distinct")
-        _tmp_output << vo.GetDistinct();
-    else {
-        cerr << "ERROR: " << operation << " is an unrecognized operation\n";
-        exit(1);
-    }
-    _output.append(_tmp_output.str());
-
-}
diff --git a/src/mapFile/mapFile.h b/src/mapFile/mapFile.h
index cb1da08..fbb431a 100644
--- a/src/mapFile/mapFile.h
+++ b/src/mapFile/mapFile.h
@@ -18,10 +18,11 @@ using namespace std;
 #include <iomanip>
 #include "VectorOps.h"
 #include "RecordKeyList.h"
+#include "KeyListOps.h"
+#include "ContextMap.h"
 
 using namespace std;
 
-class ContextMap;
 class BlockMgr;
 class RecordOutputMgr;
 
@@ -35,90 +36,8 @@ public:
 
 private:
     ContextMap *_context;
-    Record *_queryRec;
-    Record *_databaseRec;
     BlockMgr *_blockMgr;
     RecordOutputMgr *_recordOutputMgr;
-
-    vector<string> _column_vec; // vector to hold current column's worth of data
-
-    ostringstream _tmp_output;
-    QuickString _output;  // placeholder for the results of mapping B to each a in A.
-    //------------------------------------------------
-    // private methods
-    //------------------------------------------------
-    void Map();
-    void SummarizeHits(RecordKeyList &hits);
-    void ExtractColumnFromHits(RecordKeyList &hits);
-
 };
 
 #endif /* MAPFILE_H */
-
-
-/*
-#include "bedFile.h"
-#include "chromsweep.h"
-#include "VectorOps.h"
-#include "api/BamReader.h"
-#include "api/BamWriter.h"
-#include "api/BamAux.h"
-#include "BamAncillary.h"
-using namespace BamTools;
-
-
-#include <vector>
-#include <iostream>
-#include <algorithm>
-#include <numeric>
-#include <fstream>
-#include <iomanip>
-#include <stdlib.h>
-using namespace std;
-
-
-
-class BedMap {
-
-public:
-
-    // constructor
-    BedMap(string bedAFile, string bedBFile, int column, string operation,
-                   float overlapFraction, bool sameStrand, 
-                   bool diffStrand, bool reciprocal, 
-                   bool choseNullValue, string nullValue, 
-                   bool printHeader);
-
-    // destructor
-    ~BedMap(void);
-
-private:
-
-    //------------------------------------------------
-    // private attributes
-    //------------------------------------------------
-    string _bedAFile;
-    string _bedBFile;
-    int _column;
-    string _operation;
-    bool  _sameStrand;
-    bool  _diffStrand;
-    bool  _reciprocal;
-    float _overlapFraction;
-    string _nullValue;
-    bool  _printHeader;
-    
-    // instance of a bed file class.
-    BedFile *_bedA, *_bedB;
-    
-    vector<string> _column_vec; // vector to hold current column's worth of data
-
-    //------------------------------------------------
-    // private methods
-    //------------------------------------------------
-    void Map();
-    string MapHits(const BED &a, const vector<BED> &hits);
-    void ExtractColumnFromHits(const vector<BED> &hits);
-};
-*/
-//#endif /* MAPFILE_H */
diff --git a/src/mapFile/mapMain.cpp b/src/mapFile/mapMain.cpp
index a9eeb36..f08e56b 100644
--- a/src/mapFile/mapMain.cpp
+++ b/src/mapFile/mapMain.cpp
@@ -38,144 +38,6 @@ int map_main(int argc, char* argv[]) {
     return retVal ? 0 : 1;
 }
 
-
-/*
-int map_main(int argc, char* argv[]) {
-
-    // our configuration variables
-    bool showHelp = false;
-
-    // input files
-    string bedAFile;
-    string bedBFile;
-    int column = 5;
-    string operation = "sum";
-    string nullValue = ".";
-
-    // input arguments
-    float overlapFraction = 1E-9;
-
-    bool haveBedA           = false;
-    bool haveBedB           = false;
-    bool haveColumn         = false;
-    bool haveOperation      = false;
-    bool haveFraction       = false;
-    bool reciprocalFraction = false;
-    bool sameStrand         = false;
-    bool diffStrand         = false;
-    bool printHeader        = false;
-    bool choseNullValue     = false;
-
-    // check to see if we should print out some help
-    if(argc <= 1) showHelp = true;
-
-    for(int i = 1; i < argc; i++) {
-        int parameterLength = (int)strlen(argv[i]);
-
-        if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
-        (PARAMETER_CHECK("--help", 5, parameterLength))) {
-            showHelp = true;
-        }
-    }
-
-    if(showHelp) map_help();
-
-    // do some parsing (all of these parameters require 2 strings)
-    for(int i = 1; i < argc; i++) {
-
-        int parameterLength = (int)strlen(argv[i]);
-
-        if(PARAMETER_CHECK("-a", 2, parameterLength)) {
-            if ((i+1) < argc) {
-                haveBedA = true;
-                bedAFile = argv[i + 1];
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
-            if ((i+1) < argc) {
-                haveBedB = true;
-                bedBFile = argv[i + 1];
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
-            if ((i+1) < argc) {
-                haveColumn = true;
-                column = atoi(argv[i + 1]);
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-o", 2, parameterLength)) {
-            if ((i+1) < argc) {
-                haveOperation = true;
-                operation = argv[i + 1];
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
-            if ((i+1) < argc) {
-                haveFraction = true;
-                overlapFraction = atof(argv[i + 1]);
-                i++;
-            }
-        }
-        else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
-            reciprocalFraction = true;
-        }
-        else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
-            sameStrand = true;
-        }
-        else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
-            diffStrand = true;
-        }
-        else if (PARAMETER_CHECK("-null", 5, parameterLength)) {
-            nullValue = argv[i + 1];
-            choseNullValue = true;
-            i++;
-        }
-        else if(PARAMETER_CHECK("-header", 7, parameterLength)) {
-            printHeader = true;
-        }
-        else {
-            cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
-            showHelp = true;
-        }
-    }
-
-    // make sure we have both input files
-    if (!haveBedA || !haveBedB) {
-        cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (reciprocalFraction && !haveFraction) {
-        cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (sameStrand && diffStrand) {
-        cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
-        showHelp = true;
-    }
-
-    if (!showHelp) {
-
-        BedMap *bm = new BedMap(bedAFile, bedBFile, column, operation,
-                                       overlapFraction, sameStrand,
-                                       diffStrand, reciprocalFraction,
-                                       choseNullValue, nullValue, 
-                                       printHeader);
-        delete bm;
-        return 0;
-    }
-    else {
-        map_help();
-        return 0;
-    }
-}
-*/
-
 void map_help(void) {
 
     cerr << "\nTool:    bedtools map (aka mapBed)" << endl;
diff --git a/src/nekSandbox1/Makefile b/src/nekSandbox1/Makefile
index fbe6d86..df8aba7 100644
--- a/src/nekSandbox1/Makefile
+++ b/src/nekSandbox1/Makefile
@@ -10,6 +10,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
 			-I$(UTILITIES_DIR)/FileRecordTools/ \
 		   -I$(UTILITIES_DIR)/FileRecordTools/FileReaders \
            -I$(UTILITIES_DIR)/FileRecordTools/Records \
+ 			-I$(UTILITIES_DIR)/KeyListOps/ \
            -I$(UTILITIES_DIR)/general \
            -I$(UTILITIES_DIR)/NewChromsweep \
            -I$(UTILITIES_DIR)/GenomeFile/ \
diff --git a/src/regressTest/Makefile b/src/regressTest/Makefile
index e9ceebf..8ffeeab 100644
--- a/src/regressTest/Makefile
+++ b/src/regressTest/Makefile
@@ -18,6 +18,7 @@ INCLUDES = -I$(UTILITIES_DIR)/bedFile/ \
            -I$(UTILITIES_DIR)/FileRecordTools/ \
 		   -I$(UTILITIES_DIR)/FileRecordTools/FileReaders \
            -I$(UTILITIES_DIR)/FileRecordTools/Records \
+  			-I$(UTILITIES_DIR)/KeyListOps/ \
            -I$(UTILITIES_DIR)/general
 
 # ----------------------------------
diff --git a/src/sampleFile/Makefile b/src/sampleFile/Makefile
index 2042291..9ccbe5a 100644
--- a/src/sampleFile/Makefile
+++ b/src/sampleFile/Makefile
@@ -17,6 +17,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
            -I$(UTILITIES_DIR)/FileRecordTools/ \
            -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
            -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
+ 			-I$(UTILITIES_DIR)/KeyListOps/ \
           -I$(UTILITIES_DIR)/RecordOutputMgr/ \
             -I$(UTILITIES_DIR)/version/
 
diff --git a/src/utils/BinTree/Makefile b/src/utils/BinTree/Makefile
index de04c81..c29b5eb 100644
--- a/src/utils/BinTree/Makefile
+++ b/src/utils/BinTree/Makefile
@@ -11,6 +11,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
            -I$(UTILITIES_DIR)/FileRecordTools/ \
            -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
            -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
+			-I$(UTILITIES_DIR)/KeyListOps/ \
            -I$(UTILITIES_DIR)/BamTools/include \
            -I$(UTILITIES_DIR)/BamTools/src/ \
 	        -I$(UTILITIES_DIR)/version/
diff --git a/src/utils/Contexts/ContextBase.cpp b/src/utils/Contexts/ContextBase.cpp
index cd30b20..adbc47a 100644
--- a/src/utils/Contexts/ContextBase.cpp
+++ b/src/utils/Contexts/ContextBase.cpp
@@ -52,20 +52,16 @@ ContextBase::ContextBase()
   _hasConstantSeed(false),
   _seed(0),
   _forwardOnly(false),
-  _reverseOnly(false)
+  _reverseOnly(false),
+  _hasColumnOpsMethods(false)
 {
 	_programNames["intersect"] = INTERSECT;
 	_programNames["sample"] = SAMPLE;
 	_programNames["map"] = MAP;
 
-	_validScoreOps.insert("sum");
-	_validScoreOps.insert("max");
-	_validScoreOps.insert("min");
-	_validScoreOps.insert("mean");
-	_validScoreOps.insert("mode");
-	_validScoreOps.insert("median");
-	_validScoreOps.insert("antimode");
-	_validScoreOps.insert("collapse");
+	if (hasColumnOpsMethods()) {
+		_keyListOps = new KeyListOps();
+	}
 }
 
 ContextBase::~ContextBase()
@@ -79,6 +75,11 @@ ContextBase::~ContextBase()
 		delete _files[i];
 		_files[i] = NULL;
 	}
+	if (hasColumnOpsMethods()) {
+		delete _keyListOps;
+		_keyListOps = NULL;
+	}
+
 }
 
 bool ContextBase::determineOutputType() {
@@ -176,6 +177,19 @@ bool ContextBase::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
         else if (strcmp(_argv[_i], "-seed") == 0) {
 			if (!handle_seed()) return false;
         }
+        else if (strcmp(_argv[_i], "-o") == 0) {
+			if (!handle_o()) return false;
+        }
+        else if (strcmp(_argv[_i], "-c") == 0) {
+			if (!handle_c()) return false;
+        }
+        else if (strcmp(_argv[_i], "-null") == 0) {
+			if (!handle_null()) return false;
+        }
+        else if (strcmp(_argv[_i], "-delim") == 0) {
+			if (!handle_delim()) return false;
+        }
+
 	}
 	return true;
 }
@@ -191,6 +205,12 @@ bool ContextBase::isValidState()
 	if (!determineOutputType()) {
 		return false;
 	}
+	if (hasColumnOpsMethods()) {
+		FileRecordMgr *dbFile = getFile(hasIntersectMethods() ? _databaseFileIdx : 0);
+		if (!_keyListOps->isValidColumnOps(dbFile)) {
+			return false;
+		}
+	}
 	return true;
 }
 
@@ -363,3 +383,85 @@ bool ContextBase::handle_ubam()
     markUsed(_i - _skipFirstArgs);
 	return true;
 }
+
+
+// Methods specific to column operations.
+// for col ops, -c is the string of columns upon which to operate
+bool ContextBase::handle_c()
+{
+	if (!hasColumnOpsMethods()) {
+		return false;
+	}
+    if ((_i+1) < _argc) {
+        _keyListOps->setColumns(_argv[_i + 1]);
+        markUsed(_i - _skipFirstArgs);
+        _i++;
+        markUsed(_i - _skipFirstArgs);
+    }
+    return true;
+}
+
+
+// for col ops, -o is the string of operations to apply to the columns (-c)
+bool ContextBase::handle_o()
+{
+	if (!hasColumnOpsMethods()) {
+		return false;
+	}
+    if ((_i+1) < _argc) {
+    	 _keyListOps->setOperations(_argv[_i + 1]);
+        markUsed(_i - _skipFirstArgs);
+        _i++;
+        markUsed(_i - _skipFirstArgs);
+    }
+    return true;
+}
+
+
+// for col ops, -null is a NULL vakue assigned
+// when no overlaps are detected.
+bool ContextBase::handle_null()
+{
+	if (!hasColumnOpsMethods()) {
+		return false;
+	}
+    if ((_i+1) < _argc) {
+    	 _keyListOps->setNullValue(_argv[_i + 1]);
+        markUsed(_i - _skipFirstArgs);
+        _i++;
+        markUsed(_i - _skipFirstArgs);
+    }
+    return true;
+}
+
+//for col ops, delimStr will appear between each item in
+//a collapsed but delimited list.
+bool ContextBase::handle_delim()
+{
+	if (!hasColumnOpsMethods()) {
+		return false;
+	}
+    if ((_i+1) < _argc) {
+    	 _keyListOps->setDelimStr(_argv[_i + 1]);
+        markUsed(_i - _skipFirstArgs);
+        _i++;
+        markUsed(_i - _skipFirstArgs);
+    }
+    return true;
+}
+
+void ContextBase::setColumnOpsMethods(bool val)
+{
+	_hasColumnOpsMethods = val;
+	if (val) {
+		_keyListOps = new KeyListOps();
+	}
+}
+
+const QuickString &ContextBase::getColumnOpsVal(RecordKeyList &keyList) const {
+	if (!hasColumnOpsMethods()) {
+		return _nullStr;
+	}
+	return _keyListOps->getOpVals(keyList);
+}
+
diff --git a/src/utils/Contexts/ContextBase.h b/src/utils/Contexts/ContextBase.h
index 872193f..b4bf122 100644
--- a/src/utils/Contexts/ContextBase.h
+++ b/src/utils/Contexts/ContextBase.h
@@ -24,6 +24,7 @@
 #include "NewGenomeFile.h"
 #include "api/BamReader.h"
 #include "api/BamAux.h"
+#include "KeyListOps.h"
 
 
 class ContextBase {
@@ -144,6 +145,13 @@ public:
     //methods.
     virtual bool hasIntersectMethods() const { return false; }
 
+    // determine whether column operations like those used in map
+    // are available.
+    void setColumnOpsMethods(bool val);
+    virtual bool hasColumnOpsMethods() const { return _hasColumnOpsMethods; }
+    const QuickString &getColumnOpsVal(RecordKeyList &keyList) const;
+    //methods applicable only to column operations.
+
 protected:
 	PROGRAM_TYPE _program;
 
@@ -191,15 +199,11 @@ protected:
     int _bamHeaderAndRefIdx;
     int _maxNumDatabaseFields;
     bool _useFullBamTags;
-    QuickString _columnOperation;
-    int _column;
-    QuickString _nullValue;
 	bool _reportCount;
 	int _maxDistance;
 	bool _reportNames;
 	bool _reportScores;
 	QuickString _scoreOp;
-	set<QuickString> _validScoreOps;
 
 	int _numOutputRecords;
 
@@ -208,6 +212,10 @@ protected:
 	bool _forwardOnly;
 	bool _reverseOnly;
 
+	bool _hasColumnOpsMethods;
+	KeyListOps *_keyListOps;
+	QuickString _nullStr; //placeholder return value when col ops aren't valid.
+
 	void markUsed(int i) { _argsProcessed[i] = true; }
 	bool isUsed(int i) const { return _argsProcessed[i]; }
 	bool cmdArgsValid();
@@ -231,6 +239,11 @@ protected:
 	virtual bool handle_split();
 	virtual bool handle_sorted();
 	virtual bool handle_ubam();
+
+	virtual bool handle_c();
+	virtual bool handle_o();
+	virtual bool handle_null();
+	virtual bool handle_delim();
 };
 
 #endif /* CONTEXTBASE_H_ */
diff --git a/src/utils/Contexts/ContextIntersect.h b/src/utils/Contexts/ContextIntersect.h
index 0144a12..b066e94 100644
--- a/src/utils/Contexts/ContextIntersect.h
+++ b/src/utils/Contexts/ContextIntersect.h
@@ -21,6 +21,8 @@ public:
 
 	//NOTE: Query and database files will only be marked as such by either the
 	//parseCmdArgs method, or by explicitly setting them.
+	FileRecordMgr *getQueryFile() { return getFile(_queryFileIdx); }
+	FileRecordMgr *getDatabaseFile() { return getFile(_databaseFileIdx); }
     int getQueryFileIdx() const { return _queryFileIdx; }
 	void setQueryFileIdx(int idx) { _queryFileIdx = idx; }
 	int getDatabaseFileIdx() const { return _databaseFileIdx; }
diff --git a/src/utils/Contexts/ContextMap.cpp b/src/utils/Contexts/ContextMap.cpp
index d94d088..e3f8241 100644
--- a/src/utils/Contexts/ContextMap.cpp
+++ b/src/utils/Contexts/ContextMap.cpp
@@ -12,13 +12,7 @@ ContextMap::ContextMap()
 	// map requires sorted input
 	setSortedInput(true);
 	setLeftJoin(true);
-
-	// default to BED score column
-	setColumn(5);
-	// default to "sum"
-	setColumnOperation("sum");
-	// default to "." as a NULL value
-	setNullValue('.');
+	setColumnOpsMethods(true);
 }
 
 ContextMap::~ContextMap()
@@ -44,75 +38,22 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
 		if (isUsed(_i - _skipFirstArgs)) {
 			continue;
 		}
-        else if (strcmp(_argv[_i], "-o") == 0) {
-			if (!handle_o()) return false;
-        }
-        else if (strcmp(_argv[_i], "-c") == 0) {
-			if (!handle_c()) return false;
-        }
-        else if (strcmp(_argv[_i], "-null") == 0) {
-			if (!handle_null()) return false;
-        }
-	}
-	return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
-}
-
+		if (strcmp(_argv[_i], "-c") == 0) {
+			//bypass intersect's use of the -c option, because -c
+			//means writeCount for intersect, but means columns for map.
+			if (!ContextBase::handle_c()) return false;
+		}
 
-bool ContextMap::isValidState()
-{
-	if (!ContextIntersect::isValidState()) {
-		return false;
 	}
-
-    if (getDatabaseFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
-         //throw Error
-        cerr << endl << "*****" 
-             << endl 
-             << "***** ERROR: BAM database file not currently supported for the map tool." 
-             << endl;
-        exit(1);
-    }
-	// TODO 
-	// enforce any specific checks for Map.
-    return true;
-}
-
-
-// for map, -c is the column upon which to operate
-bool ContextMap::handle_c()
-{
-    if ((_i+1) < _argc) {
-        setColumn(atoi(_argv[_i + 1]));
-        markUsed(_i - _skipFirstArgs);
-        _i++;
-        markUsed(_i - _skipFirstArgs);
-    }
-    return true;
-}
-
-
-// for map, -o is the operation to apply to the column (-c)
-bool ContextMap::handle_o()
-{
-    if ((_i+1) < _argc) {
-        setColumnOperation(_argv[_i + 1]);
-        markUsed(_i - _skipFirstArgs);
-        _i++;
-        markUsed(_i - _skipFirstArgs);
-    }
-    return true;
-}
-
-
-// for map, -null is a NULL vakue assigned
-// when no overlaps are detected.
-bool ContextMap::handle_null()
-{
-    if ((_i+1) < _argc) {
-        setNullValue(_argv[_i + 1]);
-        markUsed(_i - _skipFirstArgs);
-        _i++;
-        markUsed(_i - _skipFirstArgs);
-    }
-    return true;
+	return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
 }
+//
+//
+//bool ContextMap::isValidState()
+//{
+//	if (!ContextIntersect::isValidState()) {
+//		return false;
+//	}
+//}
+//
+//
diff --git a/src/utils/Contexts/ContextMap.h b/src/utils/Contexts/ContextMap.h
index b8ee57f..9b7280e 100644
--- a/src/utils/Contexts/ContextMap.h
+++ b/src/utils/Contexts/ContextMap.h
@@ -9,30 +9,20 @@
 #define CONTEXTMAP_H_
 
 #include "ContextIntersect.h"
+#include "KeyListOps.h"
 
 class ContextMap : public ContextIntersect {
 public:
 	ContextMap();
 	virtual ~ContextMap();
-	virtual bool isValidState();
-
+//	virtual bool isValidState();
+//
 	virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
-
-	int getColumn() const { return _column; }
-	void setColumn(int column) { _column = column; }
-
-	const QuickString & getColumnOperation() const { return _columnOperation; }
-	void setColumnOperation(const QuickString & operation) { _columnOperation = operation; }
-
-	const QuickString & getNullValue() const { return _nullValue; }
-	void setNullValue(const QuickString & nullValue) { _nullValue = nullValue; }
-
+//
     virtual bool hasIntersectMethods() const { return true; }
+//
 
 private:
-	virtual bool handle_c();
-	virtual bool handle_o();
-	virtual bool handle_null();
 
 };
 
diff --git a/src/utils/Contexts/Makefile b/src/utils/Contexts/Makefile
index 7ddc3c6..4b2ed42 100644
--- a/src/utils/Contexts/Makefile
+++ b/src/utils/Contexts/Makefile
@@ -9,6 +9,7 @@ INCLUDES =  -I$(UTILITIES_DIR)/general/ \
 			-I$(UTILITIES_DIR)/FileRecordTools/ \
            -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
            -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
+			-I$(UTILITIES_DIR)/KeyListOps/ \
 			-I$(UTILITIES_DIR)/GenomeFile/ \
  			-I$(UTILITIES_DIR)/BamTools/include \
 			-I$(UTILITIES_DIR)/BamTools/src/ \
diff --git a/src/utils/FileRecordTools/Records/BamRecord.cpp b/src/utils/FileRecordTools/Records/BamRecord.cpp
index 4c5cd8d..f939fef 100644
--- a/src/utils/FileRecordTools/Records/BamRecord.cpp
+++ b/src/utils/FileRecordTools/Records/BamRecord.cpp
@@ -172,5 +172,10 @@ const QuickString &BamRecord::getField(int fieldNum) const
 	return Bed6Interval::getField(fieldNum);
 }
 
+bool BamRecord::isNumericField(int fieldNum) {
+
+	//TBD: As with getField, this isn't defined for BAM.
+	return (fieldNum > 6 ? false : Bed6Interval::isNumericField(fieldNum));
+}
 
 
diff --git a/src/utils/FileRecordTools/Records/BamRecord.h b/src/utils/FileRecordTools/Records/BamRecord.h
index b74dbc2..022ecb4 100644
--- a/src/utils/FileRecordTools/Records/BamRecord.h
+++ b/src/utils/FileRecordTools/Records/BamRecord.h
@@ -40,6 +40,7 @@ public:
 
 	virtual const QuickString &getField(int fieldNum) const;
 	virtual int getNumFields() const  { return 12; }
+	static bool isNumericField(int fieldNum);
 
 protected:
 	BamTools::BamAlignment _bamAlignment;
diff --git a/src/utils/FileRecordTools/Records/Bed12Interval.cpp b/src/utils/FileRecordTools/Records/Bed12Interval.cpp
index 867a69e..0a5a092 100644
--- a/src/utils/FileRecordTools/Records/Bed12Interval.cpp
+++ b/src/utils/FileRecordTools/Records/Bed12Interval.cpp
@@ -146,3 +146,29 @@ const QuickString &Bed12Interval::getField(int fieldNum) const
 	}
 }
 
+bool Bed12Interval::isNumericField(int fieldNum) {
+	switch (fieldNum) {
+	case 7:
+		return true;
+		break;
+	case 8:
+		return true;
+		break;
+	case 9:
+		return false;
+		break;
+	case 10:
+		return true;
+		break;
+	case 11:
+		return false;
+		break;
+	case 12:
+		return false;
+		break;
+	default:
+		return Bed6Interval::isNumericField(fieldNum);
+		break;
+	}
+}
+
diff --git a/src/utils/FileRecordTools/Records/Bed12Interval.h b/src/utils/FileRecordTools/Records/Bed12Interval.h
index 711800c..ffa89f9 100644
--- a/src/utils/FileRecordTools/Records/Bed12Interval.h
+++ b/src/utils/FileRecordTools/Records/Bed12Interval.h
@@ -54,6 +54,7 @@ public:
 
 	virtual const QuickString &getField(int fieldNum) const;
 	virtual int getNumFields() const  { return 12; }
+	static bool isNumericField(int fieldNum);
 
 
 protected:
diff --git a/src/utils/FileRecordTools/Records/Bed3Interval.cpp b/src/utils/FileRecordTools/Records/Bed3Interval.cpp
index 3f896be..e31e43e 100644
--- a/src/utils/FileRecordTools/Records/Bed3Interval.cpp
+++ b/src/utils/FileRecordTools/Records/Bed3Interval.cpp
@@ -79,3 +79,23 @@ const QuickString &Bed3Interval::getField(int fieldNum) const
 		break;
 	}
 }
+
+bool Bed3Interval::isNumericField(int fieldNum) {
+	switch (fieldNum) {
+	case 1:
+		return false; //chrom
+		break;
+	case 2:
+		return true; //startPos
+		break;
+	case 3:
+		return true; //endPos
+		break;
+	default:
+	    cerr << endl << "*****" << endl
+	         << "*****ERROR: requested invalid column " << fieldNum << ". Exiting." << endl
+	          << endl << "*****" << endl;
+	    exit(1);
+	    break;
+	}
+}
diff --git a/src/utils/FileRecordTools/Records/Bed3Interval.h b/src/utils/FileRecordTools/Records/Bed3Interval.h
index 9f1ff11..93377a0 100644
--- a/src/utils/FileRecordTools/Records/Bed3Interval.h
+++ b/src/utils/FileRecordTools/Records/Bed3Interval.h
@@ -32,6 +32,8 @@ public:
 	virtual const QuickString &getField(int fieldNum) const;
 	virtual int getNumFields() const  { return 3; }
 
+	static bool isNumericField(int fieldNum);
+
 protected:
 	virtual ~Bed3Interval();
 
diff --git a/src/utils/FileRecordTools/Records/Bed4Interval.cpp b/src/utils/FileRecordTools/Records/Bed4Interval.cpp
index c1ef81a..27ca9f7 100644
--- a/src/utils/FileRecordTools/Records/Bed4Interval.cpp
+++ b/src/utils/FileRecordTools/Records/Bed4Interval.cpp
@@ -60,3 +60,8 @@ const QuickString &Bed4Interval::getField(int fieldNum) const
 	}
 }
 
+bool Bed4Interval::isNumericField(int fieldNum) {
+	return (fieldNum == 4 ? false : Bed3Interval::isNumericField(fieldNum));
+}
+
+
diff --git a/src/utils/FileRecordTools/Records/Bed4Interval.h b/src/utils/FileRecordTools/Records/Bed4Interval.h
index f42817c..b038446 100644
--- a/src/utils/FileRecordTools/Records/Bed4Interval.h
+++ b/src/utils/FileRecordTools/Records/Bed4Interval.h
@@ -28,6 +28,7 @@ public:
 
 	virtual const QuickString &getField(int fieldNum) const;
 	virtual int getNumFields() const  { return 4; }
+	static bool isNumericField(int fieldNum);
 
 
 protected:
diff --git a/src/utils/FileRecordTools/Records/Bed5Interval.cpp b/src/utils/FileRecordTools/Records/Bed5Interval.cpp
index 7307fb6..130a788 100644
--- a/src/utils/FileRecordTools/Records/Bed5Interval.cpp
+++ b/src/utils/FileRecordTools/Records/Bed5Interval.cpp
@@ -70,3 +70,16 @@ const QuickString &Bed5Interval::getField(int fieldNum) const
 		break;
 	}
 }
+
+bool Bed5Interval::isNumericField(int fieldNum) {
+	switch (fieldNum) {
+	case 4:
+		return false;
+		break;
+	case 5:
+		return true;
+		break;
+	default:
+		return Bed3Interval::isNumericField(fieldNum);
+	}
+}
diff --git a/src/utils/FileRecordTools/Records/Bed5Interval.h b/src/utils/FileRecordTools/Records/Bed5Interval.h
index bc913d1..2064d35 100644
--- a/src/utils/FileRecordTools/Records/Bed5Interval.h
+++ b/src/utils/FileRecordTools/Records/Bed5Interval.h
@@ -27,6 +27,7 @@ public:
 
 	virtual const QuickString &getField(int fieldNum) const;
 	virtual int getNumFields() const  { return 5; }
+	static bool isNumericField(int fieldNum);
 
 
 protected:
diff --git a/src/utils/FileRecordTools/Records/Bed6Interval.cpp b/src/utils/FileRecordTools/Records/Bed6Interval.cpp
index 8371553..5bc783c 100644
--- a/src/utils/FileRecordTools/Records/Bed6Interval.cpp
+++ b/src/utils/FileRecordTools/Records/Bed6Interval.cpp
@@ -81,3 +81,20 @@ const QuickString &Bed6Interval::getField(int fieldNum) const
 		break;
 	}
 }
+
+bool Bed6Interval::isNumericField(int fieldNum) {
+	switch (fieldNum) {
+	case 4:
+		return false;
+		break;
+	case 5:
+		return true;
+		break;
+	case 6:
+		return false;
+		break;
+	default:
+		return Bed3Interval::isNumericField(fieldNum);
+		break;
+	}
+}
diff --git a/src/utils/FileRecordTools/Records/Bed6Interval.h b/src/utils/FileRecordTools/Records/Bed6Interval.h
index 9ad9f80..023683f 100644
--- a/src/utils/FileRecordTools/Records/Bed6Interval.h
+++ b/src/utils/FileRecordTools/Records/Bed6Interval.h
@@ -27,6 +27,7 @@ public:
 
 	virtual const QuickString &getField(int fieldNum) const;
 	virtual int getNumFields() const  { return 6; }
+	static bool isNumericField(int fieldNum);
 
 
 protected:
diff --git a/src/utils/FileRecordTools/Records/BedGraphInterval.cpp b/src/utils/FileRecordTools/Records/BedGraphInterval.cpp
index e080857..9cfda48 100644
--- a/src/utils/FileRecordTools/Records/BedGraphInterval.cpp
+++ b/src/utils/FileRecordTools/Records/BedGraphInterval.cpp
@@ -60,3 +60,14 @@ const QuickString &BedGraphInterval::getField(int fieldNum) const
 	}
 }
 
+bool BedGraphInterval::isNumericField(int fieldNum) {
+	switch (fieldNum) {
+	case 4:
+		return true;
+		break;
+	default:
+		return Bed3Interval::isNumericField(fieldNum);
+		break;
+	}
+}
+
diff --git a/src/utils/FileRecordTools/Records/BedGraphInterval.h b/src/utils/FileRecordTools/Records/BedGraphInterval.h
index 1bdf619..5db6fea 100644
--- a/src/utils/FileRecordTools/Records/BedGraphInterval.h
+++ b/src/utils/FileRecordTools/Records/BedGraphInterval.h
@@ -28,6 +28,7 @@ public:
 	virtual const QuickString &getField(int fieldNum) const;
 	virtual int getNumFields() const  { return 4; }
 
+	static bool isNumericField(int fieldNum);
 
 protected:
 	virtual ~BedGraphInterval();
diff --git a/src/utils/FileRecordTools/Records/BedPlusInterval.cpp b/src/utils/FileRecordTools/Records/BedPlusInterval.cpp
index fc8be36..5819b86 100644
--- a/src/utils/FileRecordTools/Records/BedPlusInterval.cpp
+++ b/src/utils/FileRecordTools/Records/BedPlusInterval.cpp
@@ -117,3 +117,18 @@ const QuickString &BedPlusInterval::getField(int fieldNum) const
 	}
 	return Bed6Interval::getField(fieldNum);
 }
+
+bool BedPlusInterval::isNumericField(int fieldNum) {
+
+	//
+	// TBD: There is no currently no good way to guarantee / enforce whether
+	// fields after the 6th are numeric, so for now we'll give the user the
+	// benefit of the doubt on those.
+	//
+	if (fieldNum > startOtherIdx) {
+		return true;
+	} else {
+		return Bed6Interval::isNumericField(fieldNum);
+	}
+}
+
diff --git a/src/utils/FileRecordTools/Records/BedPlusInterval.h b/src/utils/FileRecordTools/Records/BedPlusInterval.h
index 4b98b4f..077ed93 100644
--- a/src/utils/FileRecordTools/Records/BedPlusInterval.h
+++ b/src/utils/FileRecordTools/Records/BedPlusInterval.h
@@ -38,6 +38,8 @@ public:
 	virtual void setField(int fieldNum, const char *str) { (*(_otherIdxs[fieldNum])) = str; }
 	virtual void setNumPrintFields(int num) { _numPrintFields = num; }
 	virtual int getNumPrintFields() const { return _numPrintFields; }
+	static bool isNumericField(int fieldNum);
+
 
 protected:
 	virtual ~BedPlusInterval();
diff --git a/src/utils/FileRecordTools/Records/GffRecord.cpp b/src/utils/FileRecordTools/Records/GffRecord.cpp
index a91ce15..21cea1d 100644
--- a/src/utils/FileRecordTools/Records/GffRecord.cpp
+++ b/src/utils/FileRecordTools/Records/GffRecord.cpp
@@ -156,4 +156,40 @@ const QuickString &GffRecord::getField(int fieldNum) const
 	}
 }
 
+bool GffRecord::isNumericField(int fieldNum) {
+	switch (fieldNum) {
+	case 1:
+		return false;
+		break;
+	case 2:
+		return false;
+		break;
+	case 3:
+		return false;
+		break;
+	case 4:
+		return true;
+		break;
+	case 5:
+		return true;
+		break;
+	case 6:
+		return true;
+		break;
+	case 7:
+		return false;
+		break;
+	case 8:
+		return false;
+		break;
+	case 9:
+		return false;
+		break;
+	default:
+		return Bed6Interval::isNumericField(fieldNum);
+		break;
+	}
+
+}
+
 
diff --git a/src/utils/FileRecordTools/Records/GffRecord.h b/src/utils/FileRecordTools/Records/GffRecord.h
index b84d96a..e675542 100644
--- a/src/utils/FileRecordTools/Records/GffRecord.h
+++ b/src/utils/FileRecordTools/Records/GffRecord.h
@@ -34,6 +34,7 @@ public:
 	//Note: using the assignment operator in a GffRecord can potentially be a performance hit,
 	//if the number of fields frequently differ between this object and the one being copied.
 	const GffRecord &operator=(const GffRecord &other);
+	static bool isNumericField(int fieldNum);
 
 protected:
 	virtual ~GffRecord();
diff --git a/src/utils/FileRecordTools/Records/Record.cpp b/src/utils/FileRecordTools/Records/Record.cpp
index 2beb4dc..89544ed 100644
--- a/src/utils/FileRecordTools/Records/Record.cpp
+++ b/src/utils/FileRecordTools/Records/Record.cpp
@@ -187,9 +187,9 @@ void Record::undoZeroLength()
 
 ostream &operator << (ostream &out, const Record &record)
 {
-	QuickString errBuf;
-	record.print(errBuf);
-	out << errBuf;
+	QuickString outBuf;
+	record.print(outBuf);
+	out << outBuf;
 	return out;
 }
 
diff --git a/src/utils/FileRecordTools/Records/Record.h b/src/utils/FileRecordTools/Records/Record.h
index 2c303d9..d8071c1 100644
--- a/src/utils/FileRecordTools/Records/Record.h
+++ b/src/utils/FileRecordTools/Records/Record.h
@@ -129,6 +129,8 @@ public:
 	virtual bool sameChromIntersects(const Record *otherRecord,
 			bool sameStrand, bool diffStrand, float overlapFraction, bool reciprocal) const;
 
+//	virtual static bool isNumericField(int fieldNum) const = 0;
+
 
 protected:
 	virtual ~Record(); //by making the destructor protected, only the friend class(es) can actually delete Record objects, or objects derived from Record.
diff --git a/src/utils/GenomeFile/Makefile b/src/utils/GenomeFile/Makefile
index afaeccd..fd17d29 100644
--- a/src/utils/GenomeFile/Makefile
+++ b/src/utils/GenomeFile/Makefile
@@ -6,6 +6,7 @@ UTILITIES_DIR = ../
 # -------------------
 INCLUDES = -I$(UTILITIES_DIR)/general/ \
 		 	-I$(UTILITIES_DIR)/lineFileUtilities/ \
+ 			-I$(UTILITIES_DIR)/KeyListOps/ \
            -I$(UTILITIES_DIR)/BamTools/include/	
 
 # ----------------------------------
diff --git a/src/utils/KeyListOps/KeyListOps.cpp b/src/utils/KeyListOps/KeyListOps.cpp
new file mode 100644
index 0000000..6576350
--- /dev/null
+++ b/src/utils/KeyListOps/KeyListOps.cpp
@@ -0,0 +1,364 @@
+/*
+ * KeyListOps.cpp
+ *
+ *  Created on: Feb 24, 2014
+ *      Author: nek3d
+ */
+#include "KeyListOps.h"
+#include "FileRecordMgr.h"
+#include <cmath> //for isnan
+
+KeyListOps::KeyListOps() {
+	_opCodes["sum"] = SUM;
+	_opCodes["mean"] = MEAN;
+	_opCodes["stddev"] = STDDEV;
+	_opCodes["sample_stddev"] = SAMPLE_STDDEV;
+	_opCodes["median"] = MEDIAN;
+	_opCodes["mode"] = MODE;
+	_opCodes["antimode"] = ANTIMODE;
+	_opCodes["min"] = MIN;
+	_opCodes["max"] = MAX;
+	_opCodes["absmin"] = ABSMIN;
+	_opCodes["absmax"] = ABSMAX;
+	_opCodes["count"] = COUNT;
+	_opCodes["distinct"] = DISTINCT;
+	_opCodes["count_distinct"] = COUNT_DISTINCT;
+	_opCodes["distinct_only"] = DISTINCT_ONLY;
+	_opCodes["collapse"] = COLLAPSE;
+	_opCodes["concat"] = CONCAT;
+	_opCodes["freq_asc"] = FREQ_ASC;
+	_opCodes["freq_desc"] = FREQ_DESC;
+	_opCodes["first"] = FIRST;
+	_opCodes["last"] = LAST;
+
+	_isNumericOp[SUM] = true;
+	_isNumericOp[MEAN] = true;
+	_isNumericOp[STDDEV] = true;
+	_isNumericOp[MEDIAN] = true;
+	_isNumericOp[MODE] = false;
+	_isNumericOp[ANTIMODE] = false;
+	_isNumericOp[MIN] = true;
+	_isNumericOp[MAX] = true;
+	_isNumericOp[ABSMIN] = true;
+	_isNumericOp[COUNT] = false;
+	_isNumericOp[DISTINCT] = false;
+	_isNumericOp[COUNT_DISTINCT] = false;
+	_isNumericOp[DISTINCT_ONLY] = false;
+	_isNumericOp[COLLAPSE] = false;
+	_isNumericOp[CONCAT] = false;
+	_isNumericOp[FREQ_ASC] = false;
+	_isNumericOp[FREQ_DESC] = false;
+	_isNumericOp[FIRST] = false;
+	_isNumericOp[LAST] = false;
+
+	_methods.setDelimStr(",");
+	_methods.setNullValue(".");
+
+	// default to BED score column
+	_columns = "5";
+	// default to "sum"
+	_operations = "sum";
+
+}
+
+bool KeyListOps::isNumericOp(OP_TYPES op) const {
+	map<OP_TYPES, bool>::const_iterator iter = _isNumericOp.find(op);
+	return (iter == _isNumericOp.end() ? false : iter->second);
+}
+
+bool KeyListOps::isNumericOp(const QuickString &op) const {
+	return isNumericOp(getOpCode(op));
+}
+
+KeyListOps::OP_TYPES KeyListOps::getOpCode(const QuickString &operation) const {
+	//If the operation does not exist, return INVALID.
+	//otherwise, return code for given operation.
+	map<QuickString, OP_TYPES>::const_iterator iter = _opCodes.find(operation);
+	if (iter == _opCodes.end()) {
+		return INVALID;
+	}
+	return iter->second;
+}
+
+
+bool KeyListOps::isValidColumnOps(FileRecordMgr *dbFile) {
+
+    if (dbFile->getFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
+         //throw Error
+        cerr << endl << "*****" << endl
+             << "***** ERROR: BAM database file not currently supported for column operations."
+             << endl;
+        exit(1);
+    }
+
+
+	//get the strings from context containing the comma-delimited lists of columns
+	//and operations. Split both of these into vectors. Get the operation code
+	//for each operation string. Finally, make a vector of pairs, where the first
+	//member of each pair is a column number, and the second member is the code for the
+	//operation to perform on that column.
+
+	vector<QuickString> columnsVec;
+	vector<QuickString> opsVec;
+	int numCols = Tokenize(_columns, columnsVec, ',');
+	int numOps = Tokenize(_operations, opsVec, ',');
+
+	if (numOps < 1 || numCols < 1) {
+		 cerr << endl << "*****" << endl
+		             << "***** ERROR: There must be at least one column and at least one operation named." << endl;
+		 return false;
+	}
+	if (numOps > 1 && numCols != numOps) {
+		 cerr << endl << "*****" << endl
+		             << "***** ERROR: There are " << numCols <<" columns given, but there are " << numOps << " operations." << endl;
+		cerr << "\tPlease provide either a single operation that will be applied to all listed columns, " << endl;
+		cerr << "\tor an operation for each column." << endl;
+		return false;
+	}
+	for (int i=0; i < (int)columnsVec.size(); i++) {
+		int col = str2chrPos(columnsVec[i]);
+
+		//check that the column number is valid
+		if (col < 1 || col > dbFile->getNumFields()) {
+			 cerr << endl << "*****" << endl  << "***** ERROR: Requested column " << col << ", but database file "
+					 << dbFile->getFileName() << " only has fields 1 - " << dbFile->getNumFields() << "." << endl;
+			 return false;
+		}
+		const QuickString &operation = opsVec.size() > 1 ? opsVec[i] : opsVec[0];
+		OP_TYPES opCode = getOpCode(operation);
+		if (opCode == INVALID) {
+			cerr << endl << "*****" << endl
+								 << "***** ERROR: " << operation << " is not a valid operation. " << endl;
+			return false;
+		}
+		_colOps.push_back(pair<int, OP_TYPES>(col, opCode));
+	}
+
+
+	//The final step we need to do is check that for each column/operation pair,
+	//if the operation is numeric, see if the database's record type supports
+	//numeric operations for that column. For instance, we can allow the mean
+	//of column 4 for a BedGraph file, because that's numeric, but not for Bed4,
+	//because that isn't.
+
+	for (int i = 0; i < (int)_colOps.size(); i++) {
+		int col = _colOps[i].first;
+		OP_TYPES opCode = _colOps[i].second;
+		FileRecordTypeChecker::RECORD_TYPE recordType = dbFile->getRecordType();
+
+		if (isNumericOp(opCode)) {
+			bool isValidNumOp = false;
+			switch(recordType) {
+				case FileRecordTypeChecker::BED3_RECORD_TYPE:
+					isValidNumOp = Bed3Interval::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::BED4_RECORD_TYPE:
+					isValidNumOp = Bed4Interval::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::BED5_RECORD_TYPE:
+					isValidNumOp = Bed5Interval::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::BEDGRAPH_RECORD_TYPE:
+					isValidNumOp = BedGraphInterval::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::BED6_RECORD_TYPE:
+					isValidNumOp = Bed6Interval::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::BED_PLUS_RECORD_TYPE:
+					isValidNumOp = BedPlusInterval::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::BED12_RECORD_TYPE:
+					isValidNumOp = Bed12Interval::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::BAM_RECORD_TYPE:
+					isValidNumOp = BamRecord::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::VCF_RECORD_TYPE:
+					isValidNumOp = VcfRecord::isNumericField(col);
+					break;
+
+				case FileRecordTypeChecker::GFF_RECORD_TYPE:
+					isValidNumOp = GffRecord::isNumericField(col);
+					break;
+
+				default:
+					break;
+			}
+			if (!isValidNumOp) {
+				 cerr << endl << "*****" << endl  << "***** ERROR: Column " << col << " is not a numeric field for database file "
+						 << dbFile->getFileName() << "." << endl;
+				 return false;
+			}
+		}
+	}
+
+    return true;
+}
+
+const QuickString & KeyListOps::getOpVals(RecordKeyList &hits)
+{
+	//loop through all requested columns, and for each one, call the method needed
+	//for the operation specified.
+	_methods.setKeyList(&hits);
+	_outVals.clear();
+	double val = 0.0;
+	for (int i=0; i < (int)_colOps.size(); i++) {
+		int col = _colOps[i].first;
+		OP_TYPES opCode = _colOps[i].second;
+
+		_methods.setColumn(col);
+		switch (opCode) {
+		case SUM:
+			val = _methods.getSum();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case MEAN:
+			val = _methods.getMean();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case STDDEV:
+			val = _methods.getStddev();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case SAMPLE_STDDEV:
+			val = _methods.getSampleStddev();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case MEDIAN:
+			val = _methods.getMedian();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case MODE:
+			_outVals.append(_methods.getMode());
+			break;
+
+		case ANTIMODE:
+			_outVals.append(_methods.getAntiMode());
+			break;
+
+		case MIN:
+			val = _methods.getMin();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case MAX:
+			val = _methods.getMax();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case ABSMIN:
+			val = _methods.getAbsMin();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case ABSMAX:
+			val = _methods.getAbsMax();
+			if (isnan(val)) {
+				_outVals.append(_methods.getNullValue());
+			} else {
+				_outVals.append(val);
+			}
+			break;
+
+		case COUNT:
+			_outVals.append(_methods.getCount());
+			break;
+
+		case DISTINCT:
+			_outVals.append(_methods.getDistinct());
+			break;
+
+		case COUNT_DISTINCT:
+			_outVals.append(_methods.getCountDistinct());
+			break;
+
+		case DISTINCT_ONLY:
+			_outVals.append(_methods.getDistinctOnly());
+			break;
+
+		case COLLAPSE:
+			_outVals.append(_methods.getCollapse());
+			break;
+
+		case CONCAT:
+			_outVals.append(_methods.getConcat());
+			break;
+
+		case FREQ_ASC:
+			_outVals.append(_methods.getFreqAsc());
+			break;
+
+		case FREQ_DESC:
+			_outVals.append(_methods.getFreqDesc());
+			break;
+
+		case FIRST:
+			_outVals.append(_methods.getFirst());
+			break;
+
+		case LAST:
+			_outVals.append(_methods.getLast());
+			break;
+
+		case INVALID:
+		default:
+			// Any unrecognized operation should have been handled already in the context validation.
+			// It's thus unnecessary to handle it here, but throw an error to help us know if future
+			// refactoring or code changes accidentally bypass the validation phase.
+			cerr << "ERROR: Invalid operation given for column " << col << ". Exiting..." << endl;
+			break;
+		}
+		//if this isn't the last column, add a tab.
+		if (i < (int)_colOps.size() -1) {
+			_outVals.append('\t');
+		}
+	}
+	return _outVals;
+}
+
+
diff --git a/src/utils/KeyListOps/KeyListOps.h b/src/utils/KeyListOps/KeyListOps.h
new file mode 100644
index 0000000..3c26d2c
--- /dev/null
+++ b/src/utils/KeyListOps/KeyListOps.h
@@ -0,0 +1,54 @@
+/*
+ * KeyListOps.h
+ *
+ *  Created on: Feb 24, 2014
+ *      Author: nek3d
+ */
+
+#ifndef KEYLISTOPS_H_
+#define KEYLISTOPS_H_
+
+#include "KeyListOpsMethods.h"
+
+class FileRecordMgr;
+
+class KeyListOps {
+public:
+
+	KeyListOps();
+
+	void setColumns(const QuickString &columns) { _columns = columns; }
+	void setOperations(const QuickString & operation) { _operations = operation; }
+	void setNullValue(const QuickString & nullValue) { _methods.setNullValue(nullValue); }
+	void setDelimStr(const QuickString & delimStr) { _methods.setDelimStr(delimStr); }
+
+	void setKeyList(RecordKeyList *keyList) { _methods.setKeyList(keyList); }
+
+	typedef enum { SUM, MEAN, STDDEV, SAMPLE_STDDEV, MEDIAN, MODE, ANTIMODE, MIN, MAX, ABSMIN, ABSMAX, COUNT, DISTINCT, COUNT_DISTINCT,
+    	DISTINCT_ONLY, COLLAPSE, CONCAT, FREQ_ASC, FREQ_DESC, FIRST, LAST, INVALID } OP_TYPES;
+
+	bool isValidColumnOps(FileRecordMgr *dbFile);
+
+	const QuickString &getOpVals(RecordKeyList &hits);
+
+private:
+    void init();
+
+    QuickString _operations;
+    QuickString _columns;
+
+	KeyListOpsMethods _methods;
+	map<QuickString, OP_TYPES> _opCodes;
+	map<OP_TYPES, bool> _isNumericOp;
+
+    typedef vector<pair<int, OP_TYPES> > colOpsType;
+    colOpsType _colOps;
+    QuickString _outVals;
+
+    OP_TYPES getOpCode(const QuickString &operation) const;
+    bool isNumericOp(OP_TYPES op) const;
+    bool isNumericOp(const QuickString &op) const;
+
+};
+
+#endif /* KEYLISTOPS_H_ */
diff --git a/src/utils/KeyListOps/KeyListOpsMethods.cpp b/src/utils/KeyListOps/KeyListOpsMethods.cpp
new file mode 100644
index 0000000..0b00135
--- /dev/null
+++ b/src/utils/KeyListOps/KeyListOpsMethods.cpp
@@ -0,0 +1,368 @@
+/*
+ * KeyListOpsMethods.cpp
+ *
+ *  Created on: Feb 6, 2014
+ *      Author: nek3d
+ */
+
+#include "KeyListOpsMethods.h"
+#include <cfloat>
+#include <cmath>
+#include <algorithm>
+
+KeyListOpsMethods::KeyListOpsMethods()
+: _keyList(&_nullKeyList),
+  _column(1),
+  _nullVal("."),
+  _delimStr(","),
+  _iter(_nullKeyList.begin())
+{
+}
+
+KeyListOpsMethods::KeyListOpsMethods(RecordKeyList *keyList, int column)
+: _keyList(keyList),
+  _column(column),
+  _nullVal("."),
+  _delimStr(","),
+  _iter(keyList->begin())
+{
+}
+
+
+KeyListOpsMethods::~KeyListOpsMethods() {
+
+}
+
+// return the total of the values in the vector
+double KeyListOpsMethods::getSum() {
+	if (empty()) return NAN;
+
+	double theSum = 0.0;
+	for (begin(); !end(); next()) {
+		theSum += getColValNum();
+	}
+	return theSum;
+}
+
+// return the average value in the vector
+double KeyListOpsMethods::getMean() {
+	if (empty()) return NAN;
+
+	return getSum() / (float)getCount();
+}
+
+
+ // return the standard deviation
+double KeyListOpsMethods::getStddev() {
+	if (empty()) return NAN;
+
+	double avg = getMean();
+	double squareDiffSum = 0.0;
+	for (begin(); !end(); next()) {
+		double val = getColValNum();
+		double diff = val - avg;
+		squareDiffSum += diff * diff;
+	}
+	return squareDiffSum / (float)getCount();
+}
+// return the standard deviation
+double KeyListOpsMethods::getSampleStddev() {
+	if (empty()) return NAN;
+
+	double avg = getMean();
+	double squareDiffSum = 0.0;
+	for (begin(); !end(); next()) {
+		double val = getColValNum();
+		double diff = val - avg;
+		squareDiffSum += diff * diff;
+	}
+	return  squareDiffSum / ((float)getCount() - 1.0);
+}
+
+// return the median value in the vector
+double KeyListOpsMethods::getMedian() {
+	if (empty()) return NAN;
+
+	//get sorted vector. if even number of elems, return middle val.
+	//if odd, average of two.
+	toArray(true, ASC);
+	size_t count = getCount();
+	if (count % 2) {
+		//odd number of elements. Take middle one.
+		return _numArray[count/2];
+	} else {
+		//even numnber of elements. Take average of middle 2.
+		double sum = _numArray[count/2 -1] + _numArray[count/2];
+		return sum / 2.0;
+	}
+}
+
+// return the most common value in the vector
+const QuickString &KeyListOpsMethods::getMode() {
+	if (empty()) return _nullVal;
+
+	makeFreqMap();
+
+	//now pass through the freq map and keep track of which key has the highest occurance.
+	freqMapType::iterator maxIter = _freqMap.begin();
+	int maxVal = 0;
+	for (; _freqIter != _freqMap.end(); _freqIter++) {
+		if (_freqIter->second > maxVal) {
+			maxIter = _freqIter;
+			maxVal = _freqIter->second;
+		}
+	}
+	_retStr = maxIter->first;
+	return _retStr;
+}
+// return the least common value in the vector
+const QuickString &KeyListOpsMethods::getAntiMode() {
+	if (empty()) return _nullVal;
+
+	makeFreqMap();
+
+	//now pass through the freq map and keep track of which key has the highest occurance.
+	freqMapType::iterator minIter = _freqMap.begin();
+	int minVal = INT_MAX;
+	for (; _freqIter != _freqMap.end(); _freqIter++) {
+		if (_freqIter->second < minVal) {
+			minIter = _freqIter;
+			minVal = _freqIter->second;
+		}
+	}
+	_retStr =  minIter->first;
+	return _retStr;
+}
+// return the minimum element of the vector
+double KeyListOpsMethods::getMin() {
+	if (empty()) return NAN;
+
+	double minVal = DBL_MAX;
+	for (begin(); !end(); next()) {
+		double currVal = getColValNum();
+		minVal = (currVal < minVal) ? currVal : minVal;
+	}
+	return  minVal;
+}
+
+// return the maximum element of the vector
+double KeyListOpsMethods::getMax() {
+	if (empty()) return NAN;
+
+	double maxVal = DBL_MIN;
+	for (begin(); !end(); next()) {
+		double currVal = getColValNum();
+		maxVal = (currVal > maxVal) ? currVal : maxVal;
+	}
+	return maxVal;
+}
+
+// return the minimum absolute value of the vector
+double KeyListOpsMethods::getAbsMin() {
+	if (empty()) return NAN;
+
+	double minVal = DBL_MAX;
+	for (begin(); !end(); next()) {
+		double currVal = abs(getColValNum());
+		minVal = (currVal < minVal) ? currVal : minVal;
+	}
+	return minVal;
+}
+// return the maximum absolute value of the vector
+double KeyListOpsMethods::getAbsMax() {
+	if (empty()) return NAN;
+
+	double maxVal = DBL_MIN;
+	for (begin(); !end(); next()) {
+		double currVal = abs(getColValNum());
+		maxVal = (currVal > maxVal) ? currVal : maxVal;
+	}
+	return maxVal;
+}
+// return the count of element in the vector
+uint32_t KeyListOpsMethods::getCount() {
+	return _keyList->size();
+}
+// return a delimited list of the unique elements
+const QuickString &KeyListOpsMethods::getDistinct() {
+	if (empty()) return _nullVal;
+	// separated list of unique values. If something repeats, only report once.
+	makeFreqMap();
+	_retStr.clear();
+	for (; _freqIter != _freqMap.end(); _freqIter++) {
+		if (_freqIter != _freqMap.begin()) _retStr += _delimStr;
+		_retStr.append(_freqIter->first);
+	}
+	return _retStr;
+}
+
+const QuickString &KeyListOpsMethods::getDistinctOnly() {
+	if (empty()) return _nullVal;
+
+	//separated list of only unique values. If item repeats, discard.
+	makeFreqMap();
+	_retStr.clear();
+	for (; _freqIter != _freqMap.end(); _freqIter++) {
+		if (_freqIter->second != 1) continue;
+		if (_freqIter != _freqMap.begin()) _retStr += _delimStr;
+		_retStr.append(_freqIter->first);
+	}
+	return _retStr;
+}
+
+// return a the count of _unique_ elements in the vector
+uint32_t KeyListOpsMethods::getCountDistinct() {
+	if (empty()) return 0;
+
+	makeFreqMap();
+	return _freqMap.size();
+}
+// return a delimiter-separated list of elements
+const QuickString &KeyListOpsMethods::getCollapse(const QuickString &delimiter) {
+	if (empty()) return _nullVal;
+
+	//just put all items in one big separated list.
+	_retStr.clear();
+	int i=0;
+	for (begin(); !end(); next()) {
+		if (i > 0) _retStr += _delimStr;
+		_retStr.append(getColVal());
+		i++;
+	}
+	return _retStr;
+
+}
+// return a concatenation of all elements in the vector
+const QuickString &KeyListOpsMethods::getConcat() {
+	if (empty()) return _nullVal;
+
+	//like collapse but w/o commas. Just a true concat of all vals.
+	//just swap out the delimChar with '' and call collapse, then
+	//restore the delimChar.
+	QuickString oldDelimStr(_delimStr);
+	_delimStr = "";
+	getCollapse(); //this will store it's results in the _retStr method.
+	_delimStr = oldDelimStr;
+	return _retStr;
+}
+
+// return a histogram of values and their freqs. in desc. order of frequency
+const QuickString &KeyListOpsMethods::getFreqDesc() {
+	if (empty()) return _nullVal;
+
+	//for each uniq val, report # occurances, in desc order.
+	makeFreqMap();
+	//put freq map into multimap where key is the freq and val is the item. In other words, basically a reverse freq map.
+	histDescType hist;
+	for (; _freqIter != _freqMap.end(); _freqIter++) {
+		hist.insert(pair<int, QuickString>(_freqIter->second, _freqIter->first));
+	}
+	//now iterate through the reverse map we just made and output it's pairs in val:key format.
+	_retStr.clear();
+	for (histDescType::iterator histIter = hist.begin(); histIter != hist.end(); histIter++) {
+		if (histIter != hist.begin()) _retStr += _delimStr;
+		_retStr.append(histIter->second);
+		_retStr += ":";
+		_retStr.append(histIter->first);
+	}
+	return _retStr;
+}
+// return a histogram of values and their freqs. in asc. order of frequency
+const QuickString &KeyListOpsMethods::getFreqAsc() {
+	if (empty()) return _nullVal;
+
+	//for each uniq val, report # occurances, in asc order.
+	makeFreqMap();
+	//put freq map into multimap where key is the freq and val is the item. In other words, basically a reverse freq map.
+	histAscType hist;
+	for (; _freqIter != _freqMap.end(); _freqIter++) {
+		hist.insert(pair<int, QuickString>(_freqIter->second, _freqIter->first));
+//		hist[*(_freqIter->second)] = _freqIter->first;
+	}
+	//now iterate through the reverse map we just made and output it's pairs in val:key format.
+	_retStr.clear();
+	for (histAscType::iterator histIter = hist.begin(); histIter != hist.end(); histIter++) {
+		if (histIter != hist.begin()) _retStr += _delimStr;
+		_retStr.append(histIter->second);
+		_retStr += ":";
+		_retStr.append(histIter->first);
+	}
+	return _retStr;
+}
+// return the first value in the list
+const QuickString &KeyListOpsMethods::getFirst() {
+	if (empty()) return _nullVal;
+
+	//just the first item.
+	begin();
+	return getColVal();
+}
+// return the last value in the list
+const QuickString &KeyListOpsMethods::getLast() {
+	if (empty()) return _nullVal;
+
+	//just the last item.
+	begin();
+	for (size_t i = 0; i < getCount() -1; i++) {
+		next();
+	}
+	return getColVal();
+}
+
+const QuickString &KeyListOpsMethods::getColVal() {
+	return _iter->value()->getField(_column);
+}
+
+double KeyListOpsMethods::getColValNum() {
+	return atof(_iter->value()->getField(_column).c_str());
+}
+
+void KeyListOpsMethods::toArray(bool useNum, SORT_TYPE sortVal) {
+
+	//TBD: optimize performance with better memory management.
+	if (useNum) {
+		_numArray.resize(_keyList->size());
+		int i=0;
+		for (begin(); !end(); next()) {
+			_numArray[i] = getColValNum();
+			i++;
+		}
+	} else {
+		_qsArray.resize(_keyList->size());
+		int i=0;
+		for (begin(); !end(); next()) {
+			_qsArray[i] = getColVal();
+			i++;
+		}
+	}
+	if (sortVal != UNSORTED) {
+		sortArray(useNum, sortVal == ASC);
+	}
+}
+
+void KeyListOpsMethods::sortArray(bool useNum, bool ascOrder)
+{
+	if (useNum) {
+		if (ascOrder) {
+			sort(_numArray.begin(), _numArray.end(), less<double>());
+		} else {
+			sort(_numArray.begin(), _numArray.end(), greater<double>());
+		}
+	} else {
+		if (ascOrder) {
+			sort(_qsArray.begin(), _qsArray.end(), less<QuickString>());
+		} else {
+			sort(_qsArray.begin(), _qsArray.end(), greater<QuickString>());
+		}
+	}
+}
+
+void KeyListOpsMethods::makeFreqMap() {
+	_freqMap.clear();
+
+	//make a map of values to their number of times occuring.
+	for (begin(); !end(); next()) {
+		_freqMap[getColVal()]++;
+	}
+	_freqIter = _freqMap.begin();
+}
diff --git a/src/utils/KeyListOps/KeyListOpsMethods.h b/src/utils/KeyListOps/KeyListOpsMethods.h
new file mode 100644
index 0000000..0cac9c8
--- /dev/null
+++ b/src/utils/KeyListOps/KeyListOpsMethods.h
@@ -0,0 +1,113 @@
+/*
+ * KeyListOpsMethods.h
+ *
+ *  Created on: Feb 6, 2014
+ *      Author: nek3d
+ */
+
+#ifndef KEYLISTOPSMETHODS_H_
+#define KEYLISTOPSMETHODS_H_
+
+using namespace std;
+
+#include <map>
+#include <utility> //for pair
+#include "QuickString.h"
+#include <stdint.h>
+#include "RecordKeyList.h"
+
+class KeyListOpsMethods {
+public:
+	KeyListOpsMethods();
+	KeyListOpsMethods(RecordKeyList *keyList, int column = 1);
+	~KeyListOpsMethods();
+
+
+	void setKeyList(RecordKeyList *keyList) { _keyList = keyList; }
+	void setColumn(int col) { _column = col; }
+	void setNullValue(const QuickString & nullVal) { _nullVal = nullVal; }
+	const QuickString &getNullValue() const { return _nullVal; }
+	void setDelimStr(const QuickString &delimStr) { _delimStr = delimStr; }
+	const QuickString &getDelimStr() const { return _delimStr; }
+
+    // return the total of the values in the vector
+    double getSum();
+    // return the average value in the vector
+    double getMean();
+     // return the standard deviation
+    double getStddev();
+    // return the sample standard deviation
+    double getSampleStddev();
+    // return the median value in the vector
+    double getMedian();
+    // return the most common value in the vector
+    const QuickString &getMode();
+    // return the least common value in the vector
+    const QuickString &getAntiMode();
+    // return the minimum element of the vector
+    double getMin();
+    // return the maximum element of the vector
+    double getMax();
+    // return the minimum absolute value of the vector
+    double getAbsMin();
+    // return the maximum absolute value of the vector
+    double getAbsMax();
+    // return the count of element in the vector
+    uint32_t getCount();
+    // return a the count of _unique_ elements in the vector
+    uint32_t getCountDistinct();
+    // return only those elements that occur once
+    const QuickString &getDistinctOnly();
+    // return a delimiter-separated list of elements
+    const QuickString & getCollapse(const QuickString & delimiter = ",");
+    // return a concatenation of all elements in the vector
+    const QuickString & getConcat();
+    // return a comma-separated list of the _unique_ elements
+    const QuickString & getDistinct();
+    // return a histogram of values and their freqs. in desc. order of frequency
+    const QuickString & getFreqDesc();
+    // return a histogram of values and their freqs. in asc. order of frequency
+    const QuickString & getFreqAsc();
+    // return the first value in the list
+    const QuickString & getFirst();
+    // return the last value in the list
+    const QuickString & getLast();
+
+private:
+	RecordKeyList *_keyList;
+	int _column;
+	QuickString _nullVal;
+	QuickString _delimStr;
+	QuickString _retStr;
+
+	RecordKeyList _nullKeyList; //this has to exist just so we can initialize _iter, below.
+	RecordKeyList::const_iterator_type _iter;
+
+	// Some methods need to put values into a vector, mostly for sorting.
+	vector<double> _numArray;
+	vector<QuickString> _qsArray;
+
+	typedef map<QuickString, int> freqMapType;
+	freqMapType _freqMap;
+	freqMapType::iterator _freqIter;
+
+	typedef enum { UNSORTED, ASC, DESC} SORT_TYPE;
+
+	typedef multimap<int, QuickString, less<int> > histAscType;
+	typedef multimap<int, QuickString, greater<int> > histDescType;
+	void init();
+	const QuickString &getColVal();
+	double getColValNum();
+	bool empty() { return _keyList->empty(); }
+	void begin() { _iter = _keyList->begin(); }
+	bool end() { return _iter == _keyList->end(); }
+	void next() { _iter = _keyList->next(); }
+	void toArray(bool useNum, SORT_TYPE sortVal = UNSORTED);
+	void sortArray(bool useNum, bool ascOrder);
+	void makeFreqMap();
+
+
+};
+
+
+#endif /* KEYLISTOPSMETHODS_H_ */
diff --git a/src/utils/NewChromsweep/Makefile b/src/utils/KeyListOps/Makefile
similarity index 81%
copy from src/utils/NewChromsweep/Makefile
copy to src/utils/KeyListOps/Makefile
index 8f4d931..0b0ac99 100644
--- a/src/utils/NewChromsweep/Makefile
+++ b/src/utils/KeyListOps/Makefile
@@ -6,7 +6,6 @@ UTILITIES_DIR = ../../utils/
 # -------------------
 INCLUDES = -I$(UTILITIES_DIR)/general/ \
 			-I$(UTILITIES_DIR)/fileType/ \
-			-I$(UTILITIES_DIR)/Contexts/ \
 			-I$(UTILITIES_DIR)/GenomeFile/ \
            -I$(UTILITIES_DIR)/FileRecordTools/ \
            -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
@@ -19,21 +18,26 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
 # ----------------------------------
 # define our source and object files
 # ----------------------------------
-SOURCES= NewChromsweep.cpp NewChromsweep.h 
-OBJECTS= NewChromsweep.o
+SOURCES= KeyListOps.cpp KeyListOps.h KeyListOpsMethods.cpp KeyListOpsMethods.h
+OBJECTS= KeyListOps.o KeyListOpsMethods.o
 _EXT_OBJECTS=
 EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
 BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
 
+all: $(BUILT_OBJECTS)
+
+.PHONY: all
+
 $(BUILT_OBJECTS): $(SOURCES)
 	@echo "  * compiling" $(*F).cpp
 	@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
 
+
 $(EXT_OBJECTS):
 	@$(MAKE) --no-print-directory -C $(INCLUDES)
 
 clean:
 	@echo "Cleaning up."
-	@rm -f $(OBJ_DIR)/NewChromsweep.o $(BIN_DIR)/NewChromsweep.o
+	@rm -f $(OBJ_DIR)/KeyListOps.o $(OBJ_DIR)/KeyListOpsMethods.o
 
 .PHONY: clean
\ No newline at end of file
diff --git a/src/utils/NewChromsweep/Makefile b/src/utils/NewChromsweep/Makefile
index 8f4d931..34fc5d1 100644
--- a/src/utils/NewChromsweep/Makefile
+++ b/src/utils/NewChromsweep/Makefile
@@ -11,6 +11,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
            -I$(UTILITIES_DIR)/FileRecordTools/ \
            -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
            -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
+			-I$(UTILITIES_DIR)/KeyListOps/ \
            -I$(UTILITIES_DIR)/BamTools/include \
            -I$(UTILITIES_DIR)/BamTools/src/ \
             -I$(UTILITIES_DIR)/version/
diff --git a/src/utils/RecordOutputMgr/Makefile b/src/utils/RecordOutputMgr/Makefile
index 2d196ec..346a5c7 100644
--- a/src/utils/RecordOutputMgr/Makefile
+++ b/src/utils/RecordOutputMgr/Makefile
@@ -11,6 +11,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
            -I$(UTILITIES_DIR)/FileRecordTools/ \
            -I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
            -I$(UTILITIES_DIR)/FileRecordTools/Records/ \
+ 			-I$(UTILITIES_DIR)/KeyListOps/ \
            -I$(UTILITIES_DIR)/BamTools/include \
            -I$(UTILITIES_DIR)/BamTools/src/ \
             -I$(UTILITIES_DIR)/version/
diff --git a/src/utils/general/Makefile b/src/utils/general/Makefile
index 43dcfba..0361fab 100644
--- a/src/utils/general/Makefile
+++ b/src/utils/general/Makefile
@@ -4,7 +4,7 @@ UTILITIES_DIR = ../../utils/
 # -------------------
 # define our includes
 # -------------------
-INCLUDES = 
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/
 
 # ----------------------------------
 # define our source and object files
diff --git a/src/utils/general/QuickString.cpp b/src/utils/general/QuickString.cpp
index 831f84a..9e06186 100644
--- a/src/utils/general/QuickString.cpp
+++ b/src/utils/general/QuickString.cpp
@@ -3,6 +3,7 @@
 #include <cstdlib>
 #include <cstdio>
 #include "ParseTools.h"
+#include "lineFileUtilities.h"
 
 QuickString::QuickString(size_t capacity)
 : _buffer(NULL),
@@ -82,6 +83,35 @@ QuickString &QuickString::operator = (const QuickString & inBuf){
 	return *this;
 }
 
+QuickString &QuickString::operator = (char val) {
+	clear();
+	append(val);
+	return *this;
+}
+QuickString &QuickString::operator = (int val) {
+	clear();
+	append(val);
+	return *this;
+}
+
+QuickString &QuickString::operator = (uint32_t val) {
+	clear();
+	append(val);
+	return *this;
+}
+
+QuickString &QuickString::operator = (float val) {
+	clear();
+	append(val);
+	return *this;
+}
+
+QuickString &QuickString::operator = (double val) {
+	clear();
+	append(val);
+	return *this;
+}
+
 
 QuickString &QuickString::operator += (const QuickString & inBuf)
 {
@@ -107,6 +137,26 @@ QuickString &QuickString::operator += (const char *inBuf)
 	return *this;
 }
 
+QuickString &QuickString::operator += (int num) {
+	append(num);
+	return *this;
+}
+
+QuickString &QuickString::operator += (uint32_t num) {
+	append(num);
+	return *this;
+}
+
+QuickString &QuickString::operator += (float num) {
+	append(num);
+	return *this;
+}
+
+QuickString &QuickString::operator += (double num) {
+	append(num);
+	return *this;
+}
+
 bool QuickString::operator == (const QuickString &qs) const {
 	if ( _currSize != qs._currSize) {
 		return false;
@@ -194,6 +244,21 @@ void QuickString::append(const char *inBuf, size_t inBufLen)
 void QuickString::append(int num) {
 	int2str(num, *this, true);
 }
+
+void QuickString::append(uint32_t num) {
+	int2str((int)num, *this, true);
+}
+
+void QuickString::append(float num) {
+	append(ToString(num));
+}
+
+void QuickString::append(double num) {
+	append(ToString(num));
+}
+
+
+
 QuickString &QuickString::assign(const char *inBuf, size_t inBufLen)
 {
 	clear();
diff --git a/src/utils/general/QuickString.h b/src/utils/general/QuickString.h
index 5fdc0fc..a76e5ff 100644
--- a/src/utils/general/QuickString.h
+++ b/src/utils/general/QuickString.h
@@ -10,6 +10,7 @@
 
 using namespace std;
 #include <string>
+#include <stdint.h>
 #include <climits>
 #include <ostream>
 
@@ -32,10 +33,19 @@ public:
 	QuickString &operator = (const string &);
 	QuickString &operator = (const char *);
 	QuickString &operator = (const QuickString &);
+	QuickString &operator = (char);
+	QuickString &operator = (int);
+	QuickString &operator = (uint32_t);
+	QuickString &operator = (float);
+	QuickString &operator = (double);
 	QuickString &operator += (const QuickString &);
 	QuickString &operator += (const string &);
 	QuickString &operator += (const char *);
 	QuickString &operator += (char);
+	QuickString &operator += (int);
+	QuickString &operator += (uint32_t);
+	QuickString &operator += (float);
+	QuickString &operator += (double);
 
 	friend ostream &operator << (ostream &out, const QuickString &str);
 	bool operator == (const QuickString &) const;
@@ -52,7 +62,16 @@ public:
 	void append(const QuickString &str) { append(str.c_str(), str.size()); }
 	void append(const char *buf, size_t bufLen);
 	void append(char c);
+
+	//These are not templated because float and double require a stringstream based
+	//implementation, while the integer append uses a much faster home-brewed algorithm
+	//for better performance.
 	void append(int num);
+	void append(uint32_t num);
+	void append(float num);
+	void append(double num);
+
+
 
 	QuickString &assign(const char *str, size_t n);
 	void resize(size_t n, char c = '\0');
diff --git a/test/map/test-map.sh b/test/map/test-map.sh
index 293d84e..a47b14e 100644
--- a/test/map/test-map.sh
+++ b/test/map/test-map.sh
@@ -499,10 +499,8 @@ echo "    map.t33..\c"
 echo \
 "
 *****
-*****ERROR: requested column 15 , but record only has fields 1 - 12. Exiting.
-
-*****" > exp
-$BT map -a ivls.bed -b test.vcf -c 15 -o collapse 2> obs
+***** ERROR: Requested column 15, but database file test.vcf only has fields 1 - 12." > exp
+$BT map -a ivls.bed -b test.vcf -c 15 -o collapse 2>&1 > /dev/null | head -3> obs
 check obs exp
 rm obs exp
 
@@ -624,12 +622,9 @@ echo "    map.t41..\c"
 echo \
 "
 *****
-*****ERROR: requested column 41 , but record only has fields 1 - 6. Exiting.
-
-*****" > exp
-$BT map -a ivls.bed -b values5.bed -c 41 -o collapse 2> obs
+***** ERROR: Requested column 41, but database file test.vcf only has fields 1 - 12." > exp
+$BT map -a ivls.bed -b test.vcf -c 41 -o collapse 2>&1 > /dev/null | head -3> obs
 check obs exp
-
 rm obs exp
 
 ###########################################################
@@ -639,12 +634,9 @@ echo "    map.t42..\c"
 echo \
 "
 *****
-*****ERROR: requested column -1 , but record only has fields 1 - 6. Exiting.
-
-*****" > exp
-$BT map -a ivls.bed -b values5.bed -c -1 -o collapse 2> obs
+***** ERROR: Requested column -1, but database file test.vcf only has fields 1 - 12." > exp
+$BT map -a ivls.bed -b test.vcf -c -1 -o collapse 2>&1 > /dev/null | head -3> obs
 check obs exp
-
 rm obs exp
 
 ###########################################################
@@ -654,12 +646,9 @@ echo "    map.t43..\c"
 echo \
 "
 *****
-*****ERROR: requested column 0 , but record only has fields 1 - 6. Exiting.
-
-*****" > exp
-$BT map -a ivls.bed -b values5.bed -c 0 -o collapse 2> obs
+***** ERROR: Requested column 0, but database file test.vcf only has fields 1 - 12." > exp
+$BT map -a ivls.bed -b test.vcf -c 0 -o collapse 2>&1 > /dev/null | head -3> obs
 check obs exp
-
 rm obs exp
 
 
@@ -667,7 +656,7 @@ rm obs exp
 #  Test that Bam database is not allowed
 ############################################################
 echo "    map.t44...\c"
-echo -e "\n*****\n***** ERROR: BAM database file not currently supported for the map tool." > exp
+echo -e "\n*****\n***** ERROR: BAM database file not currently supported for column operations." > exp
 $BT map -a ivls.bed -b values.bam 2> obs
 check obs exp
 rm obs exp
@@ -682,3 +671,71 @@ echo "chr1	0	50	three_blocks_match	15	+	0	0	0	3	10,10,10,	0,20,40,	." > exp
 $BT map -o sum -a three_blocks_match.bed -b three_blocks_nomatch.bed -split > obs
 check obs exp
 rm obs exp
+
+
+
+
+
+
+###########################################################
+#
+#
+#  Tests for multiple columns and operations
+#
+#
+############################################################
+
+
+###########################################################
+#  Test that error is given when ops outnumber columns
+############################################################
+echo "    map.t46...\c"
+echo \
+"
+*****
+***** ERROR: There are 1 columns given, but there are 2 operations."  > exp
+../../bin/bedtools map -a ivls.bed -b values.bed -o count,sum 2>&1 > /dev/null | head -3 > obs
+check obs exp
+rm obs exp
+
+
+###########################################################
+#  Test that error is given when columns outnumber ops,
+# if there are two or more ops.
+############################################################
+echo "    map.t47...\c"
+echo \
+"
+*****
+***** ERROR: There are 3 columns given, but there are 2 operations."  > exp
+../../bin/bedtools map -a ivls.bed -b values.bed -c 5,1,2 -o count,sum 2>&1 > /dev/null | head -3 > obs
+check obs exp
+rm obs exp
+
+
+###########################################################
+#  Test that numeric ops for non-numeric columns aren't allowed
+############################################################
+echo "    map.t48...\c"
+echo \
+"
+*****
+***** ERROR: Column 1 is not a numeric field for database file values.bed."  > exp
+../../bin/bedtools map -a ivls.bed -b values.bed -c 1 -o sum 2>&1 > /dev/null | head -3 > obs
+check obs exp
+rm obs exp
+
+
+###########################################################
+#  Test that multiple columns are allowed with a 
+# single operation
+############################################################
+#
+# TBD
+#
+#echo "    map.t49...\c"
+#../../bin/bedtools map -a ivls.bed -b values.bed -c 2 -o sum 2>&1 > /dev/null | head -3 > obs
+#check obs exp
+#rm obs exp
+
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bedtools.git



More information about the debian-med-commit mailing list