[med-svn] [bedtools] 01/14: Created new KeyListOps class to replace vectorOps. Converted map tool to this.
Charles Plessy
plessy at moszumanska.debian.org
Thu Mar 6 22:56:44 UTC 2014
This is an automated email from the git hooks/post-receive script.
plessy pushed a commit to branch master
in repository bedtools.
commit 77c1bdad3c13d1a082da452fe29b410647179165
Author: nkindlon <nek3d at virginia.edu>
Date: Wed Feb 19 13:28:50 2014 -0500
Created new KeyListOps class to replace vectorOps. Converted map tool to this.
---
Makefile | 1 +
src/mapFile/Makefile | 1 +
src/mapFile/mapFile.cpp | 227 +++++++++++-----
src/mapFile/mapFile.h | 88 +-----
src/mapFile/mapMain.cpp | 138 ----------
src/utils/Contexts/ContextBase.cpp | 9 -
src/utils/Contexts/ContextBase.h | 4 -
src/utils/Contexts/ContextIntersect.h | 2 +
src/utils/Contexts/ContextMap.cpp | 77 +++++-
src/utils/Contexts/ContextMap.h | 21 +-
src/utils/Contexts/Makefile | 1 +
src/utils/KeyListOps/KeyListOps.cpp | 405 ++++++++++++++++++++++++++++
src/utils/KeyListOps/KeyListOps.h | 117 ++++++++
src/utils/{Contexts => KeyListOps}/Makefile | 35 ++-
src/utils/general/Makefile | 2 +-
src/utils/general/QuickString.cpp | 65 +++++
src/utils/general/QuickString.h | 19 ++
test/map/test-map.sh | 27 +-
18 files changed, 889 insertions(+), 350 deletions(-)
diff --git a/Makefile b/Makefile
index 139988b..2676748 100644
--- a/Makefile
+++ b/Makefile
@@ -78,6 +78,7 @@ UTIL_SUBDIRS = $(SRC_DIR)/utils/bedFile \
$(SRC_DIR)/utils/gzstream \
$(SRC_DIR)/utils/fileType \
$(SRC_DIR)/utils/bedFilePE \
+ $(SRC_DIR)/utils/KeyListOps \
$(SRC_DIR)/utils/NewChromsweep \
$(SRC_DIR)/utils/sequenceUtilities \
$(SRC_DIR)/utils/tabFile \
diff --git a/src/mapFile/Makefile b/src/mapFile/Makefile
index 17bb42d..8628242 100644
--- a/src/mapFile/Makefile
+++ b/src/mapFile/Makefile
@@ -29,6 +29,7 @@ INCLUDES = -I$(UTILITIES_DIR)/Contexts/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
-I$(UTILITIES_DIR)/RecordOutputMgr/ \
+ -I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/NewChromsweep \
-I$(UTILITIES_DIR)/VectorOps \
-I$(UTILITIES_DIR)/BinTree \
diff --git a/src/mapFile/mapFile.cpp b/src/mapFile/mapFile.cpp
index 88dcc26..7cff531 100644
--- a/src/mapFile/mapFile.cpp
+++ b/src/mapFile/mapFile.cpp
@@ -21,11 +21,14 @@ const int PRECISION = 21;
FileMap::FileMap(ContextMap *context)
: _context(context),
_blockMgr(NULL),
- _recordOutputMgr(NULL)
+ _recordOutputMgr(NULL),
+ _colOps(_context->getColOps())
{
_blockMgr = new BlockMgr(_context->getOverlapFraction(), _context->getReciprocal());
_recordOutputMgr = new RecordOutputMgr();
_recordOutputMgr->init(_context);
+ _keyListOps.setNullValue(_context->getNullValue());
+ _keyListOps.setDelimStr(_context->getDelim());
}
FileMap::~FileMap(void) {
@@ -43,78 +46,174 @@ bool FileMap::mapFiles()
}
RecordKeyList hitSet;
while (sweep.next(hitSet)) {
+ _outputValues.clear();
if (_context->getObeySplits()) {
RecordKeyList keySet(hitSet.getKey());
RecordKeyList resultSet(hitSet.getKey());
_blockMgr->findBlockedOverlaps(keySet, hitSet, resultSet);
- SummarizeHits(resultSet);
- _recordOutputMgr->printRecord(resultSet.getKey(), _output);
+ calculateOutput(resultSet);
+ _recordOutputMgr->printRecord(resultSet.getKey(), _outputValues);
} else {
- SummarizeHits(hitSet);
- _recordOutputMgr->printRecord(hitSet.getKey(), _output);
+ calculateOutput(hitSet);
+ _recordOutputMgr->printRecord(hitSet.getKey(), _outputValues);
}
}
return true;
}
-void FileMap::ExtractColumnFromHits(RecordKeyList &hits) {
- _column_vec.clear();
- RecordKeyList::const_iterator_type iter = hits.begin();
- for (; iter != hits.end(); iter = hits.next())
- {
- _column_vec.push_back(iter->value()->getField(_context->getColumn()).str());
- }
-}
-
-void FileMap::SummarizeHits(RecordKeyList &hits) {
-
- const QuickString & operation = _context->getColumnOperation();
- _output.clear();
-
- if (hits.size() == 0) {
- if (operation == "count" || operation == "count_distinct")
- _output.append("0");
- else
- _output.append(_context->getNullValue().str());
- return;
- }
-
- _tmp_output.str("");
- _tmp_output.clear();
-
- ExtractColumnFromHits(hits);
-
- VectorOps vo(_column_vec);
- if (operation == "sum")
- _tmp_output << setprecision (PRECISION) << vo.GetSum();
- else if (operation == "mean")
- _tmp_output << setprecision (PRECISION) << vo.GetMean();
- else if (operation == "median")
- _tmp_output << setprecision (PRECISION) << vo.GetMedian();
- else if (operation == "min")
- _tmp_output << setprecision (PRECISION) << vo.GetMin();
- else if (operation == "max")
- _tmp_output << setprecision (PRECISION) << vo.GetMax();
- else if (operation == "absmin")
- _tmp_output << setprecision (PRECISION) << vo.GetAbsMin();
- else if (operation == "absmax")
- _tmp_output << setprecision (PRECISION) << vo.GetAbsMax();
- else if (operation == "mode")
- _tmp_output << vo.GetMode();
- else if (operation == "antimode")
- _tmp_output << vo.GetAntiMode();
- else if (operation == "count")
- _tmp_output << setprecision (PRECISION) << vo.GetCount();
- else if (operation == "count_distinct")
- _tmp_output << setprecision (PRECISION) << vo.GetCountDistinct();
- else if (operation == "collapse")
- _tmp_output << vo.GetCollapse();
- else if (operation == "distinct")
- _tmp_output << vo.GetDistinct();
- else {
- cerr << "ERROR: " << operation << " is an unrecognized operation\n";
- exit(1);
- }
- _output.append(_tmp_output.str());
+void FileMap::calculateOutput(RecordKeyList &hits)
+{
+ //loop through all requested columns, and for each one, call the method needed
+ //for the operation specified.
+ _keyListOps.setKeyList(&hits);
+
+ double val = 0.0;
+ for (int i=0; i < (int)_colOps.size(); i++) {
+ int col = _colOps[i].first;
+ KeyListOps::OP_TYPES opCode = _colOps[i].second;
+
+ _keyListOps.setColumn(col);
+ switch (opCode) {
+ case KeyListOps::SUM:
+ val = _keyListOps.getSum();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+
+ case KeyListOps::MEAN:
+ val = _keyListOps.getMean();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+
+ case KeyListOps::STDDEV:
+ val = _keyListOps.getStddev();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+
+ case KeyListOps::SAMPLE_STDDEV:
+ val = _keyListOps.getSampleStddev();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+
+ case KeyListOps::MEDIAN:
+ val = _keyListOps.getMedian();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+
+ case KeyListOps::MODE:
+ _outputValues.append(_keyListOps.getMode());
+ break;
+
+ case KeyListOps::ANTIMODE:
+ _outputValues.append(_keyListOps.getAntiMode());
+ break;
+
+ case KeyListOps::MIN:
+ val = _keyListOps.getMin();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+
+ case KeyListOps::MAX:
+ val = _keyListOps.getMax();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+
+ case KeyListOps::ABSMIN:
+ val = _keyListOps.getAbsMin();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+ case KeyListOps::ABSMAX:
+ val = _keyListOps.getAbsMax();
+ if (isnan(val)) {
+ _outputValues.append(_context->getNullValue());
+ } else {
+ _outputValues.append(val);
+ }
+ break;
+
+ case KeyListOps::COUNT:
+ _outputValues.append(_keyListOps.getCount());
+ break;
+
+ case KeyListOps::DISTINCT:
+ _outputValues.append(_keyListOps.getDistinct());
+ break;
+
+ case KeyListOps::COUNT_DISTINCT:
+ _outputValues.append(_keyListOps.getCountDistinct());
+ break;
+
+ case KeyListOps::DISTINCT_ONLY:
+ _outputValues.append(_keyListOps.getDistinctOnly());
+ break;
+
+ case KeyListOps::COLLAPSE:
+ _outputValues.append(_keyListOps.getCollapse());
+ break;
+
+ case KeyListOps::CONCAT:
+ _outputValues.append(_keyListOps.getConcat());
+ break;
+
+ case KeyListOps::FREQ_ASC:
+ _outputValues.append(_keyListOps.getFreqAsc());
+ break;
+
+ case KeyListOps::FREQ_DESC:
+ _outputValues.append(_keyListOps.getFreqDesc());
+ break;
+
+ case KeyListOps::FIRST:
+ _outputValues.append(_keyListOps.getFirst());
+ break;
+
+ case KeyListOps::LAST:
+ _outputValues.append(_keyListOps.getLast());
+ break;
+
+ case KeyListOps::INVALID:
+ default:
+ // Any unrecognized operation should have been handled already in the context validation.
+ // It's thus unnecessary to handle it here, but throw an error to help us know if future
+ // refactoring or code changes accidentally bypass the validation phase.
+ cerr << "ERROR: Invalid operation given for column " << col << ". Exiting..." << endl;
+ break;
+ }
+ //if this isn't the last column, add a tab.
+ if (i < (int)_colOps.size() -1) {
+ _outputValues.append('\t');
+ }
+ }
}
diff --git a/src/mapFile/mapFile.h b/src/mapFile/mapFile.h
index cb1da08..e2143ef 100644
--- a/src/mapFile/mapFile.h
+++ b/src/mapFile/mapFile.h
@@ -18,10 +18,11 @@ using namespace std;
#include <iomanip>
#include "VectorOps.h"
#include "RecordKeyList.h"
+#include "KeyListOps.h"
+#include "ContextMap.h"
using namespace std;
-class ContextMap;
class BlockMgr;
class RecordOutputMgr;
@@ -35,90 +36,13 @@ public:
private:
ContextMap *_context;
- Record *_queryRec;
- Record *_databaseRec;
BlockMgr *_blockMgr;
RecordOutputMgr *_recordOutputMgr;
+ KeyListOps _keyListOps;
+ const ContextMap::colOpsType & _colOps;
+ QuickString _outputValues; // placeholder for the results of mapping B to each a in A.
- vector<string> _column_vec; // vector to hold current column's worth of data
-
- ostringstream _tmp_output;
- QuickString _output; // placeholder for the results of mapping B to each a in A.
- //------------------------------------------------
- // private methods
- //------------------------------------------------
- void Map();
- void SummarizeHits(RecordKeyList &hits);
- void ExtractColumnFromHits(RecordKeyList &hits);
-
+ void calculateOutput(RecordKeyList &hits);
};
#endif /* MAPFILE_H */
-
-
-/*
-#include "bedFile.h"
-#include "chromsweep.h"
-#include "VectorOps.h"
-#include "api/BamReader.h"
-#include "api/BamWriter.h"
-#include "api/BamAux.h"
-#include "BamAncillary.h"
-using namespace BamTools;
-
-
-#include <vector>
-#include <iostream>
-#include <algorithm>
-#include <numeric>
-#include <fstream>
-#include <iomanip>
-#include <stdlib.h>
-using namespace std;
-
-
-
-class BedMap {
-
-public:
-
- // constructor
- BedMap(string bedAFile, string bedBFile, int column, string operation,
- float overlapFraction, bool sameStrand,
- bool diffStrand, bool reciprocal,
- bool choseNullValue, string nullValue,
- bool printHeader);
-
- // destructor
- ~BedMap(void);
-
-private:
-
- //------------------------------------------------
- // private attributes
- //------------------------------------------------
- string _bedAFile;
- string _bedBFile;
- int _column;
- string _operation;
- bool _sameStrand;
- bool _diffStrand;
- bool _reciprocal;
- float _overlapFraction;
- string _nullValue;
- bool _printHeader;
-
- // instance of a bed file class.
- BedFile *_bedA, *_bedB;
-
- vector<string> _column_vec; // vector to hold current column's worth of data
-
- //------------------------------------------------
- // private methods
- //------------------------------------------------
- void Map();
- string MapHits(const BED &a, const vector<BED> &hits);
- void ExtractColumnFromHits(const vector<BED> &hits);
-};
-*/
-//#endif /* MAPFILE_H */
diff --git a/src/mapFile/mapMain.cpp b/src/mapFile/mapMain.cpp
index a9eeb36..f08e56b 100644
--- a/src/mapFile/mapMain.cpp
+++ b/src/mapFile/mapMain.cpp
@@ -38,144 +38,6 @@ int map_main(int argc, char* argv[]) {
return retVal ? 0 : 1;
}
-
-/*
-int map_main(int argc, char* argv[]) {
-
- // our configuration variables
- bool showHelp = false;
-
- // input files
- string bedAFile;
- string bedBFile;
- int column = 5;
- string operation = "sum";
- string nullValue = ".";
-
- // input arguments
- float overlapFraction = 1E-9;
-
- bool haveBedA = false;
- bool haveBedB = false;
- bool haveColumn = false;
- bool haveOperation = false;
- bool haveFraction = false;
- bool reciprocalFraction = false;
- bool sameStrand = false;
- bool diffStrand = false;
- bool printHeader = false;
- bool choseNullValue = false;
-
- // check to see if we should print out some help
- if(argc <= 1) showHelp = true;
-
- for(int i = 1; i < argc; i++) {
- int parameterLength = (int)strlen(argv[i]);
-
- if((PARAMETER_CHECK("-h", 2, parameterLength)) ||
- (PARAMETER_CHECK("--help", 5, parameterLength))) {
- showHelp = true;
- }
- }
-
- if(showHelp) map_help();
-
- // do some parsing (all of these parameters require 2 strings)
- for(int i = 1; i < argc; i++) {
-
- int parameterLength = (int)strlen(argv[i]);
-
- if(PARAMETER_CHECK("-a", 2, parameterLength)) {
- if ((i+1) < argc) {
- haveBedA = true;
- bedAFile = argv[i + 1];
- i++;
- }
- }
- else if(PARAMETER_CHECK("-b", 2, parameterLength)) {
- if ((i+1) < argc) {
- haveBedB = true;
- bedBFile = argv[i + 1];
- i++;
- }
- }
- else if(PARAMETER_CHECK("-c", 2, parameterLength)) {
- if ((i+1) < argc) {
- haveColumn = true;
- column = atoi(argv[i + 1]);
- i++;
- }
- }
- else if(PARAMETER_CHECK("-o", 2, parameterLength)) {
- if ((i+1) < argc) {
- haveOperation = true;
- operation = argv[i + 1];
- i++;
- }
- }
- else if(PARAMETER_CHECK("-f", 2, parameterLength)) {
- if ((i+1) < argc) {
- haveFraction = true;
- overlapFraction = atof(argv[i + 1]);
- i++;
- }
- }
- else if(PARAMETER_CHECK("-r", 2, parameterLength)) {
- reciprocalFraction = true;
- }
- else if (PARAMETER_CHECK("-s", 2, parameterLength)) {
- sameStrand = true;
- }
- else if (PARAMETER_CHECK("-S", 2, parameterLength)) {
- diffStrand = true;
- }
- else if (PARAMETER_CHECK("-null", 5, parameterLength)) {
- nullValue = argv[i + 1];
- choseNullValue = true;
- i++;
- }
- else if(PARAMETER_CHECK("-header", 7, parameterLength)) {
- printHeader = true;
- }
- else {
- cerr << endl << "*****ERROR: Unrecognized parameter: " << argv[i] << " *****" << endl << endl;
- showHelp = true;
- }
- }
-
- // make sure we have both input files
- if (!haveBedA || !haveBedB) {
- cerr << endl << "*****" << endl << "*****ERROR: Need -a and -b files. " << endl << "*****" << endl;
- showHelp = true;
- }
-
- if (reciprocalFraction && !haveFraction) {
- cerr << endl << "*****" << endl << "*****ERROR: If using -r, you need to define -f." << endl << "*****" << endl;
- showHelp = true;
- }
-
- if (sameStrand && diffStrand) {
- cerr << endl << "*****" << endl << "*****ERROR: Request either -s OR -S, not both." << endl << "*****" << endl;
- showHelp = true;
- }
-
- if (!showHelp) {
-
- BedMap *bm = new BedMap(bedAFile, bedBFile, column, operation,
- overlapFraction, sameStrand,
- diffStrand, reciprocalFraction,
- choseNullValue, nullValue,
- printHeader);
- delete bm;
- return 0;
- }
- else {
- map_help();
- return 0;
- }
-}
-*/
-
void map_help(void) {
cerr << "\nTool: bedtools map (aka mapBed)" << endl;
diff --git a/src/utils/Contexts/ContextBase.cpp b/src/utils/Contexts/ContextBase.cpp
index cd30b20..1f0c7a1 100644
--- a/src/utils/Contexts/ContextBase.cpp
+++ b/src/utils/Contexts/ContextBase.cpp
@@ -57,15 +57,6 @@ ContextBase::ContextBase()
_programNames["intersect"] = INTERSECT;
_programNames["sample"] = SAMPLE;
_programNames["map"] = MAP;
-
- _validScoreOps.insert("sum");
- _validScoreOps.insert("max");
- _validScoreOps.insert("min");
- _validScoreOps.insert("mean");
- _validScoreOps.insert("mode");
- _validScoreOps.insert("median");
- _validScoreOps.insert("antimode");
- _validScoreOps.insert("collapse");
}
ContextBase::~ContextBase()
diff --git a/src/utils/Contexts/ContextBase.h b/src/utils/Contexts/ContextBase.h
index 872193f..7846f62 100644
--- a/src/utils/Contexts/ContextBase.h
+++ b/src/utils/Contexts/ContextBase.h
@@ -191,15 +191,11 @@ protected:
int _bamHeaderAndRefIdx;
int _maxNumDatabaseFields;
bool _useFullBamTags;
- QuickString _columnOperation;
- int _column;
- QuickString _nullValue;
bool _reportCount;
int _maxDistance;
bool _reportNames;
bool _reportScores;
QuickString _scoreOp;
- set<QuickString> _validScoreOps;
int _numOutputRecords;
diff --git a/src/utils/Contexts/ContextIntersect.h b/src/utils/Contexts/ContextIntersect.h
index 0144a12..b066e94 100644
--- a/src/utils/Contexts/ContextIntersect.h
+++ b/src/utils/Contexts/ContextIntersect.h
@@ -21,6 +21,8 @@ public:
//NOTE: Query and database files will only be marked as such by either the
//parseCmdArgs method, or by explicitly setting them.
+ FileRecordMgr *getQueryFile() { return getFile(_queryFileIdx); }
+ FileRecordMgr *getDatabaseFile() { return getFile(_databaseFileIdx); }
int getQueryFileIdx() const { return _queryFileIdx; }
void setQueryFileIdx(int idx) { _queryFileIdx = idx; }
int getDatabaseFileIdx() const { return _databaseFileIdx; }
diff --git a/src/utils/Contexts/ContextMap.cpp b/src/utils/Contexts/ContextMap.cpp
index d94d088..8b20272 100644
--- a/src/utils/Contexts/ContextMap.cpp
+++ b/src/utils/Contexts/ContextMap.cpp
@@ -8,15 +8,16 @@
#include "ContextMap.h"
ContextMap::ContextMap()
+: _delimStr(",")
{
// map requires sorted input
setSortedInput(true);
setLeftJoin(true);
// default to BED score column
- setColumn(5);
+ setColumns("5");
// default to "sum"
- setColumnOperation("sum");
+ setOperations("sum");
// default to "." as a NULL value
setNullValue('.');
}
@@ -53,6 +54,10 @@ bool ContextMap::parseCmdArgs(int argc, char **argv, int skipFirstArgs) {
else if (strcmp(_argv[_i], "-null") == 0) {
if (!handle_null()) return false;
}
+ else if (strcmp(_argv[_i], "-delim") == 0) {
+ if (!handle_delim()) return false;
+ }
+
}
return ContextIntersect::parseCmdArgs(argc, argv, _skipFirstArgs);
}
@@ -66,23 +71,64 @@ bool ContextMap::isValidState()
if (getDatabaseFileType() == FileRecordTypeChecker::BAM_FILE_TYPE) {
//throw Error
- cerr << endl << "*****"
- << endl
+ cerr << endl << "*****" << endl
<< "***** ERROR: BAM database file not currently supported for the map tool."
<< endl;
exit(1);
}
- // TODO
- // enforce any specific checks for Map.
+
+
+ //get the strings from context containing the comma-delimited lists of columns
+ //and operations. Split both of these into vectors. Get the operation code
+ //for each operation string. Finally, make a vector of pairs, where the first
+ //member of each pair is a column number, and the second member is the code for the
+ //operation to perform on that column.
+
+ vector<QuickString> columnsVec;
+ vector<QuickString> opsVec;
+ int numCols = Tokenize(_columns, columnsVec, ',');
+ int numOps = Tokenize(_operations, opsVec, ',');
+
+ if (numOps < 1 || numCols < 1) {
+ cerr << endl << "*****" << endl
+ << "***** ERROR: There must be at least one column and at least one operation named." << endl;
+ return false;
+ }
+ if (numOps > 1 && numCols != numOps) {
+ cerr << endl << "*****" << endl
+ << "***** ERROR: There are " << numCols <<" columns given, but there are " << numOps << " operations. " << endl;
+ cerr << "\tPlease provide either a single operation that will be applied to all listed columns, " << endl;
+ cerr << "\tor an operation for each column." << endl;
+ return false;
+ }
+ KeyListOps keyListOps;
+ for (int i=0; i < (int)columnsVec.size(); i++) {
+ int col = str2chrPos(columnsVec[i]);
+
+ //check that the column number is valid
+ if (col < 1 || col > getDatabaseFile()->getNumFields()) {
+ cerr << endl << "*****" << endl << "***** ERROR: Requested column " << col << ", but database file "
+ << getDatabaseFileName() << " only has fields 1 - " << getDatabaseFile()->getNumFields() << "." << endl;
+ return false;
+ }
+ const QuickString &operation = opsVec.size() > 1 ? opsVec[i] : opsVec[0];
+ KeyListOps::OP_TYPES opCode = keyListOps.getOpCode(operation);
+ if (opCode == KeyListOps::INVALID) {
+ cerr << endl << "*****" << endl
+ << "***** ERROR: " << operation << " is not a valid operation. " << endl;
+ return false;
+ }
+ _colOps.push_back(pair<int, KeyListOps::OP_TYPES>(col, opCode));
+ }
return true;
}
-// for map, -c is the column upon which to operate
+// for map, -c is the string of columns upon which to operate
bool ContextMap::handle_c()
{
if ((_i+1) < _argc) {
- setColumn(atoi(_argv[_i + 1]));
+ setColumns(_argv[_i + 1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
@@ -91,11 +137,11 @@ bool ContextMap::handle_c()
}
-// for map, -o is the operation to apply to the column (-c)
+// for map, -o is the string of operations to apply to the columns (-c)
bool ContextMap::handle_o()
{
if ((_i+1) < _argc) {
- setColumnOperation(_argv[_i + 1]);
+ setOperations(_argv[_i + 1]);
markUsed(_i - _skipFirstArgs);
_i++;
markUsed(_i - _skipFirstArgs);
@@ -116,3 +162,14 @@ bool ContextMap::handle_null()
}
return true;
}
+
+bool ContextMap::handle_delim()
+{
+ if ((_i+1) < _argc) {
+ _delimStr = _argv[_i + 1];
+ markUsed(_i - _skipFirstArgs);
+ _i++;
+ markUsed(_i - _skipFirstArgs);
+ }
+ return true;
+}
diff --git a/src/utils/Contexts/ContextMap.h b/src/utils/Contexts/ContextMap.h
index b8ee57f..460f93b 100644
--- a/src/utils/Contexts/ContextMap.h
+++ b/src/utils/Contexts/ContextMap.h
@@ -9,6 +9,7 @@
#define CONTEXTMAP_H_
#include "ContextIntersect.h"
+#include "KeyListOps.h"
class ContextMap : public ContextIntersect {
public:
@@ -18,21 +19,33 @@ public:
virtual bool parseCmdArgs(int argc, char **argv, int skipFirstArgs);
- int getColumn() const { return _column; }
- void setColumn(int column) { _column = column; }
+ const QuickString &getColumns() const { return _columns; }
+ void setColumns(const QuickString &columns) { _columns = columns; }
- const QuickString & getColumnOperation() const { return _columnOperation; }
- void setColumnOperation(const QuickString & operation) { _columnOperation = operation; }
+ const QuickString & getOperations() const { return _operations; }
+ void setOperations(const QuickString & operation) { _operations = operation; }
const QuickString & getNullValue() const { return _nullValue; }
void setNullValue(const QuickString & nullValue) { _nullValue = nullValue; }
+ const QuickString &getDelim() const { return _delimStr; }
virtual bool hasIntersectMethods() const { return true; }
+ typedef vector<pair<int, KeyListOps::OP_TYPES> > colOpsType;
+ const colOpsType &getColOps() const { return _colOps; }
+
private:
+ QuickString _operations;
+ QuickString _columns;
+ QuickString _nullValue;
+ KeyListOps _keyListOps;
+ colOpsType _colOps;
+ QuickString _delimStr;
+
virtual bool handle_c();
virtual bool handle_o();
virtual bool handle_null();
+ virtual bool handle_delim();
};
diff --git a/src/utils/Contexts/Makefile b/src/utils/Contexts/Makefile
index 7ddc3c6..4b2ed42 100644
--- a/src/utils/Contexts/Makefile
+++ b/src/utils/Contexts/Makefile
@@ -9,6 +9,7 @@ INCLUDES = -I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
+ -I$(UTILITIES_DIR)/KeyListOps/ \
-I$(UTILITIES_DIR)/GenomeFile/ \
-I$(UTILITIES_DIR)/BamTools/include \
-I$(UTILITIES_DIR)/BamTools/src/ \
diff --git a/src/utils/KeyListOps/KeyListOps.cpp b/src/utils/KeyListOps/KeyListOps.cpp
new file mode 100644
index 0000000..05a6040
--- /dev/null
+++ b/src/utils/KeyListOps/KeyListOps.cpp
@@ -0,0 +1,405 @@
+/*
+ * KeyListOps.cpp
+ *
+ * Created on: Feb 6, 2014
+ * Author: nek3d
+ */
+
+#include "KeyListOps.h"
+#include <cfloat>
+#include <cmath>
+#include <algorithm>
+
+KeyListOps::KeyListOps()
+: _keyList(&_nullKeyList),
+ _column(1),
+ _nullVal("."),
+ _delimStr(","),
+ _iter(_nullKeyList.begin())
+{
+ init();
+
+}
+
+KeyListOps::KeyListOps(RecordKeyList *keyList, int column)
+: _keyList(keyList),
+ _column(column),
+ _nullVal("."),
+ _delimStr(","),
+ _iter(keyList->begin())
+{
+ init();
+}
+
+void KeyListOps::init() {
+ _opCodes["sum"] = SUM;
+ _opCodes["mean"] = MEAN;
+ _opCodes["stddev"] = STDDEV;
+ _opCodes["sample_stddev"] = SAMPLE_STDDEV;
+ _opCodes["median"] = MEDIAN;
+ _opCodes["mode"] = MODE;
+ _opCodes["antimode"] = ANTIMODE;
+ _opCodes["min"] = MIN;
+ _opCodes["max"] = MAX;
+ _opCodes["absmin"] = ABSMIN;
+ _opCodes["absmax"] = ABSMAX;
+ _opCodes["count"] = COUNT;
+ _opCodes["distinct"] = DISTINCT;
+ _opCodes["count_distinct"] = COUNT_DISTINCT;
+ _opCodes["distinct_only"] = DISTINCT_ONLY;
+ _opCodes["collapse"] = COLLAPSE;
+ _opCodes["concat"] = CONCAT;
+ _opCodes["freq_asc"] = FREQ_ASC;
+ _opCodes["freq_desc"] = FREQ_DESC;
+ _opCodes["first"] = FIRST;
+ _opCodes["last"] = LAST;
+}
+
+
+KeyListOps::~KeyListOps() {
+
+}
+
+KeyListOps::OP_TYPES KeyListOps::getOpCode(const QuickString &operation) const {
+ //If the operation does not exist, return INVALID.
+ //otherwise, return code for given operation.
+ map<QuickString, OP_TYPES>::const_iterator iter = _opCodes.find(operation);
+ if (iter == _opCodes.end()) {
+ return INVALID;
+ }
+ return iter->second;
+}
+
+// return the total of the values in the vector
+double KeyListOps::getSum() {
+ if (empty()) return NAN;
+
+ double theSum = 0.0;
+ for (begin(); !end(); next()) {
+ theSum += getColValNum();
+ }
+ return theSum;
+}
+
+// return the average value in the vector
+double KeyListOps::getMean() {
+ if (empty()) return NAN;
+
+ return getSum() / (float)getCount();
+}
+
+
+ // return the standard deviation
+double KeyListOps::getStddev() {
+ if (empty()) return NAN;
+
+ double avg = getMean();
+ double squareDiffSum = 0.0;
+ for (begin(); !end(); next()) {
+ double val = getColValNum();
+ double diff = val - avg;
+ squareDiffSum += diff * diff;
+ }
+ return squareDiffSum / (float)getCount();
+}
+// return the standard deviation
+double KeyListOps::getSampleStddev() {
+ if (empty()) return NAN;
+
+ double avg = getMean();
+ double squareDiffSum = 0.0;
+ for (begin(); !end(); next()) {
+ double val = getColValNum();
+ double diff = val - avg;
+ squareDiffSum += diff * diff;
+ }
+ return squareDiffSum / ((float)getCount() - 1.0);
+}
+
+// return the median value in the vector
+double KeyListOps::getMedian() {
+ if (empty()) return NAN;
+
+ //get sorted vector. if even number of elems, return middle val.
+ //if odd, average of two.
+ toArray(true, ASC);
+ size_t count = getCount();
+ if (count % 2) {
+ //odd number of elements. Take middle one.
+ return _numArray[count/2];
+ } else {
+ //even numnber of elements. Take average of middle 2.
+ double sum = _numArray[count/2 -1] + _numArray[count/2];
+ return sum / 2.0;
+ }
+}
+
+// return the most common value in the vector
+const QuickString &KeyListOps::getMode() {
+ if (empty()) return _nullVal;
+
+ makeFreqMap();
+
+ //now pass through the freq map and keep track of which key has the highest occurance.
+ freqMapType::iterator maxIter = _freqMap.begin();
+ int maxVal = 0;
+ for (; _freqIter != _freqMap.end(); _freqIter++) {
+ if (_freqIter->second > maxVal) {
+ maxIter = _freqIter;
+ maxVal = _freqIter->second;
+ }
+ }
+ _retStr = maxIter->first;
+ return _retStr;
+}
+// return the least common value in the vector
+const QuickString &KeyListOps::getAntiMode() {
+ if (empty()) return _nullVal;
+
+ makeFreqMap();
+
+ //now pass through the freq map and keep track of which key has the highest occurance.
+ freqMapType::iterator minIter = _freqMap.begin();
+ int minVal = INT_MAX;
+ for (; _freqIter != _freqMap.end(); _freqIter++) {
+ if (_freqIter->second < minVal) {
+ minIter = _freqIter;
+ minVal = _freqIter->second;
+ }
+ }
+ _retStr = minIter->first;
+ return _retStr;
+}
+// return the minimum element of the vector
+double KeyListOps::getMin() {
+ if (empty()) return NAN;
+
+ double minVal = DBL_MAX;
+ for (begin(); !end(); next()) {
+ double currVal = getColValNum();
+ minVal = (currVal < minVal) ? currVal : minVal;
+ }
+ return minVal;
+}
+
+// return the maximum element of the vector
+double KeyListOps::getMax() {
+ if (empty()) return NAN;
+
+ double maxVal = DBL_MIN;
+ for (begin(); !end(); next()) {
+ double currVal = getColValNum();
+ maxVal = (currVal > maxVal) ? currVal : maxVal;
+ }
+ return maxVal;
+}
+
+// return the minimum absolute value of the vector
+double KeyListOps::getAbsMin() {
+ if (empty()) return NAN;
+
+ double minVal = DBL_MAX;
+ for (begin(); !end(); next()) {
+ double currVal = abs(getColValNum());
+ minVal = (currVal < minVal) ? currVal : minVal;
+ }
+ return minVal;
+}
+// return the maximum absolute value of the vector
+double KeyListOps::getAbsMax() {
+ if (empty()) return NAN;
+
+ double maxVal = DBL_MIN;
+ for (begin(); !end(); next()) {
+ double currVal = abs(getColValNum());
+ maxVal = (currVal > maxVal) ? currVal : maxVal;
+ }
+ return maxVal;
+}
+// return the count of element in the vector
+uint32_t KeyListOps::getCount() {
+ return _keyList->size();
+}
+// return a delimited list of the unique elements
+const QuickString &KeyListOps::getDistinct() {
+ if (empty()) return _nullVal;
+ // separated list of unique values. If something repeats, only report once.
+ makeFreqMap();
+ _retStr.clear();
+ for (; _freqIter != _freqMap.end(); _freqIter++) {
+ if (_freqIter != _freqMap.begin()) _retStr += _delimStr;
+ _retStr.append(_freqIter->first);
+ }
+ return _retStr;
+}
+
+const QuickString &KeyListOps::getDistinctOnly() {
+ if (empty()) return _nullVal;
+
+ //separated list of only unique values. If item repeats, discard.
+ makeFreqMap();
+ _retStr.clear();
+ for (; _freqIter != _freqMap.end(); _freqIter++) {
+ if (_freqIter->second != 1) continue;
+ if (_freqIter != _freqMap.begin()) _retStr += _delimStr;
+ _retStr.append(_freqIter->first);
+ }
+ return _retStr;
+}
+
+// return a the count of _unique_ elements in the vector
+uint32_t KeyListOps::getCountDistinct() {
+ if (empty()) return 0;
+
+ makeFreqMap();
+ return _freqMap.size();
+}
+// return a delimiter-separated list of elements
+const QuickString &KeyListOps::getCollapse(const QuickString &delimiter) {
+ if (empty()) return _nullVal;
+
+ //just put all items in one big separated list.
+ _retStr.clear();
+ int i=0;
+ for (begin(); !end(); next()) {
+ if (i > 0) _retStr += _delimStr;
+ _retStr.append(getColVal());
+ i++;
+ }
+ return _retStr;
+
+}
+// return a concatenation of all elements in the vector
+const QuickString &KeyListOps::getConcat() {
+ if (empty()) return _nullVal;
+
+ //like collapse but w/o commas. Just a true concat of all vals.
+ //just swap out the delimChar with '' and call collapse, then
+ //restore the delimChar.
+ QuickString oldDelimStr(_delimStr);
+ _delimStr = "";
+ getCollapse(); //this will store it's results in the _retStr method.
+ _delimStr = oldDelimStr;
+ return _retStr;
+}
+
+// return a histogram of values and their freqs. in desc. order of frequency
+const QuickString &KeyListOps::getFreqDesc() {
+ if (empty()) return _nullVal;
+
+ //for each uniq val, report # occurances, in desc order.
+ makeFreqMap();
+ //put freq map into multimap where key is the freq and val is the item. In other words, basically a reverse freq map.
+ histDescType hist;
+ for (; _freqIter != _freqMap.end(); _freqIter++) {
+ hist.insert(pair<int, QuickString>(_freqIter->second, _freqIter->first));
+ }
+ //now iterate through the reverse map we just made and output it's pairs in val:key format.
+ _retStr.clear();
+ for (histDescType::iterator histIter = hist.begin(); histIter != hist.end(); histIter++) {
+ if (histIter != hist.begin()) _retStr += _delimStr;
+ _retStr.append(histIter->second);
+ _retStr += ":";
+ _retStr.append(histIter->first);
+ }
+ return _retStr;
+}
+// return a histogram of values and their freqs. in asc. order of frequency
+const QuickString &KeyListOps::getFreqAsc() {
+ if (empty()) return _nullVal;
+
+ //for each uniq val, report # occurances, in asc order.
+ makeFreqMap();
+ //put freq map into multimap where key is the freq and val is the item. In other words, basically a reverse freq map.
+ histAscType hist;
+ for (; _freqIter != _freqMap.end(); _freqIter++) {
+ hist.insert(pair<int, QuickString>(_freqIter->second, _freqIter->first));
+// hist[*(_freqIter->second)] = _freqIter->first;
+ }
+ //now iterate through the reverse map we just made and output it's pairs in val:key format.
+ _retStr.clear();
+ for (histAscType::iterator histIter = hist.begin(); histIter != hist.end(); histIter++) {
+ if (histIter != hist.begin()) _retStr += _delimStr;
+ _retStr.append(histIter->second);
+ _retStr += ":";
+ _retStr.append(histIter->first);
+ }
+ return _retStr;
+}
+// return the first value in the list
+const QuickString &KeyListOps::getFirst() {
+ if (empty()) return _nullVal;
+
+ //just the first item.
+ begin();
+ return getColVal();
+}
+// return the last value in the list
+const QuickString &KeyListOps::getLast() {
+ if (empty()) return _nullVal;
+
+ //just the last item.
+ begin();
+ for (size_t i = 0; i < getCount() -1; i++) {
+ next();
+ }
+ return getColVal();
+}
+
+const QuickString &KeyListOps::getColVal() {
+ return _iter->value()->getField(_column);
+}
+
+double KeyListOps::getColValNum() {
+ return atof(_iter->value()->getField(_column).c_str());
+}
+
+void KeyListOps::toArray(bool useNum, SORT_TYPE sortVal) {
+
+ //TBD: optimize performance with better memory management.
+ if (useNum) {
+ _numArray.resize(_keyList->size());
+ int i=0;
+ for (begin(); !end(); next()) {
+ _numArray[i] = getColValNum();
+ i++;
+ }
+ } else {
+ _qsArray.resize(_keyList->size());
+ int i=0;
+ for (begin(); !end(); next()) {
+ _qsArray[i] = getColVal();
+ i++;
+ }
+ }
+ if (sortVal != UNSORTED) {
+ sortArray(useNum, sortVal == ASC);
+ }
+}
+
+void KeyListOps::sortArray(bool useNum, bool ascOrder)
+{
+ if (useNum) {
+ if (ascOrder) {
+ sort(_numArray.begin(), _numArray.end(), less<double>());
+ } else {
+ sort(_numArray.begin(), _numArray.end(), greater<double>());
+ }
+ } else {
+ if (ascOrder) {
+ sort(_qsArray.begin(), _qsArray.end(), less<QuickString>());
+ } else {
+ sort(_qsArray.begin(), _qsArray.end(), greater<QuickString>());
+ }
+ }
+}
+
+void KeyListOps::makeFreqMap() {
+ _freqMap.clear();
+
+ //make a map of values to their number of times occuring.
+ for (begin(); !end(); next()) {
+ _freqMap[getColVal()]++;
+ }
+ _freqIter = _freqMap.begin();
+}
diff --git a/src/utils/KeyListOps/KeyListOps.h b/src/utils/KeyListOps/KeyListOps.h
new file mode 100644
index 0000000..e294f53
--- /dev/null
+++ b/src/utils/KeyListOps/KeyListOps.h
@@ -0,0 +1,117 @@
+/*
+ * KeyListOps.h
+ *
+ * Created on: Feb 6, 2014
+ * Author: nek3d
+ */
+
+#ifndef KEYLISTOPS_H_
+#define KEYLISTOPS_H_
+
+using namespace std;
+
+#include <map>
+#include <utility> //for pair
+#include "QuickString.h"
+#include <stdint.h>
+#include "RecordKeyList.h"
+
+class KeyListOps {
+public:
+ KeyListOps();
+ KeyListOps(RecordKeyList *keyList, int column = 1);
+ ~KeyListOps();
+
+
+ void setKeyList(RecordKeyList *keyList) { _keyList = keyList; }
+ void setColumn(int col) { _column = col; }
+ void setNullValue(const QuickString & nullVal) { _nullVal = nullVal; }
+ void setDelimStr(const QuickString &delimStr) { _delimStr = delimStr; }
+
+
+ typedef enum { SUM, MEAN, STDDEV, SAMPLE_STDDEV, MEDIAN, MODE, ANTIMODE, MIN, MAX, ABSMIN, ABSMAX, COUNT, DISTINCT, COUNT_DISTINCT,
+ DISTINCT_ONLY, COLLAPSE, CONCAT, FREQ_ASC, FREQ_DESC, FIRST, LAST, INVALID } OP_TYPES;
+
+ OP_TYPES getOpCode(const QuickString &operation) const;
+ // return the total of the values in the vector
+ double getSum();
+ // return the average value in the vector
+ double getMean();
+ // return the standard deviation
+ double getStddev();
+ // return the sample standard deviation
+ double getSampleStddev();
+ // return the median value in the vector
+ double getMedian();
+ // return the most common value in the vector
+ const QuickString &getMode();
+ // return the least common value in the vector
+ const QuickString &getAntiMode();
+ // return the minimum element of the vector
+ double getMin();
+ // return the maximum element of the vector
+ double getMax();
+ // return the minimum absolute value of the vector
+ double getAbsMin();
+ // return the maximum absolute value of the vector
+ double getAbsMax();
+ // return the count of element in the vector
+ uint32_t getCount();
+ // return a the count of _unique_ elements in the vector
+ uint32_t getCountDistinct();
+ // return only those elements that occur once
+ const QuickString &getDistinctOnly();
+ // return a delimiter-separated list of elements
+ const QuickString & getCollapse(const QuickString & delimiter = ",");
+ // return a concatenation of all elements in the vector
+ const QuickString & getConcat();
+ // return a comma-separated list of the _unique_ elements
+ const QuickString & getDistinct();
+ // return a histogram of values and their freqs. in desc. order of frequency
+ const QuickString & getFreqDesc();
+ // return a histogram of values and their freqs. in asc. order of frequency
+ const QuickString & getFreqAsc();
+ // return the first value in the list
+ const QuickString & getFirst();
+ // return the last value in the list
+ const QuickString & getLast();
+
+private:
+ RecordKeyList *_keyList;
+ int _column;
+ QuickString _nullVal;
+ QuickString _delimStr;
+ QuickString _retStr;
+
+ map<QuickString, OP_TYPES> _opCodes;
+ RecordKeyList _nullKeyList; //this has to exist just so we can initialize _iter, below.
+ RecordKeyList::const_iterator_type _iter;
+
+ // Some methods need to put values into a vector, mostly for sorting.
+ vector<double> _numArray;
+ vector<QuickString> _qsArray;
+
+ typedef map<QuickString, int> freqMapType;
+ freqMapType _freqMap;
+ freqMapType::iterator _freqIter;
+
+ typedef enum { UNSORTED, ASC, DESC} SORT_TYPE;
+
+ typedef multimap<int, QuickString, less<int> > histAscType;
+ typedef multimap<int, QuickString, greater<int> > histDescType;
+ void init();
+ const QuickString &getColVal();
+ double getColValNum();
+ bool empty() { return _keyList->empty(); }
+ void begin() { _iter = _keyList->begin(); }
+ bool end() { return _iter == _keyList->end(); }
+ void next() { _iter = _keyList->next(); }
+ void toArray(bool useNum, SORT_TYPE sortVal = UNSORTED);
+ void sortArray(bool useNum, bool ascOrder);
+ void makeFreqMap();
+
+
+};
+
+
+#endif /* KEYLISTOPS_H_ */
diff --git a/src/utils/Contexts/Makefile b/src/utils/KeyListOps/Makefile
similarity index 52%
copy from src/utils/Contexts/Makefile
copy to src/utils/KeyListOps/Makefile
index 7ddc3c6..1797c83 100644
--- a/src/utils/Contexts/Makefile
+++ b/src/utils/KeyListOps/Makefile
@@ -4,39 +4,36 @@ UTILITIES_DIR = ../../utils/
# -------------------
# define our includes
# -------------------
-INCLUDES = -I$(UTILITIES_DIR)/general/ \
+INCLUDES = -I$(UTILITIES_DIR)/general/ \
-I$(UTILITIES_DIR)/fileType/ \
- -I$(UTILITIES_DIR)/FileRecordTools/ \
+ -I$(UTILITIES_DIR)/Contexts/ \
+ -I$(UTILITIES_DIR)/GenomeFile/ \
+ -I$(UTILITIES_DIR)/FileRecordTools/ \
-I$(UTILITIES_DIR)/FileRecordTools/FileReaders/ \
-I$(UTILITIES_DIR)/FileRecordTools/Records/ \
- -I$(UTILITIES_DIR)/GenomeFile/ \
- -I$(UTILITIES_DIR)/BamTools/include \
- -I$(UTILITIES_DIR)/BamTools/src/ \
- -I$(UTILITIES_DIR)/version/
-
-
+ -I$(UTILITIES_DIR)/BamTools/include \
+ -I$(UTILITIES_DIR)/BamTools/src/ \
+ -I$(UTILITIES_DIR)/version/
+
+
# ----------------------------------
# define our source and object files
# ----------------------------------
-SOURCES= ContextBase.cpp ContextBase.h ContextIntersect.cpp ContextIntersect.h ContextMap.cpp ContextMap.h ContextSample.cpp ContextSample.h
-OBJECTS= ContextBase.o ContextIntersect.o ContextMap.o ContextSample.o
-_EXT_OBJECTS=ParseTools.o QuickString.o
+SOURCES= KeyListOps.cpp KeyListOps.h
+OBJECTS= KeyListOps.o
+_EXT_OBJECTS=
EXT_OBJECTS=$(patsubst %,$(OBJ_DIR)/%,$(_EXT_OBJECTS))
BUILT_OBJECTS= $(patsubst %,$(OBJ_DIR)/%,$(OBJECTS))
-all: $(BUILT_OBJECTS)
-
-.PHONY: all
-
$(BUILT_OBJECTS): $(SOURCES)
@echo " * compiling" $(*F).cpp
@$(CXX) -c -o $@ $(*F).cpp $(LDFLAGS) $(CXXFLAGS) $(INCLUDES)
+$(EXT_OBJECTS):
+ @$(MAKE) --no-print-directory -C $(INCLUDES)
+
clean:
@echo "Cleaning up."
- @rm -f $(OBJ_DIR)/ContextBase.o \
- $(OBJ_DIR)/ContextIntersect.o \
- $(OBJ_DIR)/ContextMap.o \
- $(OBJ_DIR)/ContextSample.o
+ @rm -f $(OBJ_DIR)/KeyListOps.o
.PHONY: clean
\ No newline at end of file
diff --git a/src/utils/general/Makefile b/src/utils/general/Makefile
index 43dcfba..0361fab 100644
--- a/src/utils/general/Makefile
+++ b/src/utils/general/Makefile
@@ -4,7 +4,7 @@ UTILITIES_DIR = ../../utils/
# -------------------
# define our includes
# -------------------
-INCLUDES =
+INCLUDES = -I$(UTILITIES_DIR)/lineFileUtilities/
# ----------------------------------
# define our source and object files
diff --git a/src/utils/general/QuickString.cpp b/src/utils/general/QuickString.cpp
index 831f84a..9e06186 100644
--- a/src/utils/general/QuickString.cpp
+++ b/src/utils/general/QuickString.cpp
@@ -3,6 +3,7 @@
#include <cstdlib>
#include <cstdio>
#include "ParseTools.h"
+#include "lineFileUtilities.h"
QuickString::QuickString(size_t capacity)
: _buffer(NULL),
@@ -82,6 +83,35 @@ QuickString &QuickString::operator = (const QuickString & inBuf){
return *this;
}
+QuickString &QuickString::operator = (char val) {
+ clear();
+ append(val);
+ return *this;
+}
+QuickString &QuickString::operator = (int val) {
+ clear();
+ append(val);
+ return *this;
+}
+
+QuickString &QuickString::operator = (uint32_t val) {
+ clear();
+ append(val);
+ return *this;
+}
+
+QuickString &QuickString::operator = (float val) {
+ clear();
+ append(val);
+ return *this;
+}
+
+QuickString &QuickString::operator = (double val) {
+ clear();
+ append(val);
+ return *this;
+}
+
QuickString &QuickString::operator += (const QuickString & inBuf)
{
@@ -107,6 +137,26 @@ QuickString &QuickString::operator += (const char *inBuf)
return *this;
}
+QuickString &QuickString::operator += (int num) {
+ append(num);
+ return *this;
+}
+
+QuickString &QuickString::operator += (uint32_t num) {
+ append(num);
+ return *this;
+}
+
+QuickString &QuickString::operator += (float num) {
+ append(num);
+ return *this;
+}
+
+QuickString &QuickString::operator += (double num) {
+ append(num);
+ return *this;
+}
+
bool QuickString::operator == (const QuickString &qs) const {
if ( _currSize != qs._currSize) {
return false;
@@ -194,6 +244,21 @@ void QuickString::append(const char *inBuf, size_t inBufLen)
void QuickString::append(int num) {
int2str(num, *this, true);
}
+
+void QuickString::append(uint32_t num) {
+ int2str((int)num, *this, true);
+}
+
+void QuickString::append(float num) {
+ append(ToString(num));
+}
+
+void QuickString::append(double num) {
+ append(ToString(num));
+}
+
+
+
QuickString &QuickString::assign(const char *inBuf, size_t inBufLen)
{
clear();
diff --git a/src/utils/general/QuickString.h b/src/utils/general/QuickString.h
index 5fdc0fc..a76e5ff 100644
--- a/src/utils/general/QuickString.h
+++ b/src/utils/general/QuickString.h
@@ -10,6 +10,7 @@
using namespace std;
#include <string>
+#include <stdint.h>
#include <climits>
#include <ostream>
@@ -32,10 +33,19 @@ public:
QuickString &operator = (const string &);
QuickString &operator = (const char *);
QuickString &operator = (const QuickString &);
+ QuickString &operator = (char);
+ QuickString &operator = (int);
+ QuickString &operator = (uint32_t);
+ QuickString &operator = (float);
+ QuickString &operator = (double);
QuickString &operator += (const QuickString &);
QuickString &operator += (const string &);
QuickString &operator += (const char *);
QuickString &operator += (char);
+ QuickString &operator += (int);
+ QuickString &operator += (uint32_t);
+ QuickString &operator += (float);
+ QuickString &operator += (double);
friend ostream &operator << (ostream &out, const QuickString &str);
bool operator == (const QuickString &) const;
@@ -52,7 +62,16 @@ public:
void append(const QuickString &str) { append(str.c_str(), str.size()); }
void append(const char *buf, size_t bufLen);
void append(char c);
+
+ //These are not templated because float and double require a stringstream based
+ //implementation, while the integer append uses a much faster home-brewed algorithm
+ //for better performance.
void append(int num);
+ void append(uint32_t num);
+ void append(float num);
+ void append(double num);
+
+
QuickString &assign(const char *str, size_t n);
void resize(size_t n, char c = '\0');
diff --git a/test/map/test-map.sh b/test/map/test-map.sh
index 293d84e..f62d364 100644
--- a/test/map/test-map.sh
+++ b/test/map/test-map.sh
@@ -499,10 +499,8 @@ echo " map.t33..\c"
echo \
"
*****
-*****ERROR: requested column 15 , but record only has fields 1 - 12. Exiting.
-
-*****" > exp
-$BT map -a ivls.bed -b test.vcf -c 15 -o collapse 2> obs
+***** ERROR: Requested column 15, but database file test.vcf only has fields 1 - 12." > exp
+$BT map -a ivls.bed -b test.vcf -c 15 -o collapse 2>&1 > /dev/null | head -3> obs
check obs exp
rm obs exp
@@ -624,12 +622,9 @@ echo " map.t41..\c"
echo \
"
*****
-*****ERROR: requested column 41 , but record only has fields 1 - 6. Exiting.
-
-*****" > exp
-$BT map -a ivls.bed -b values5.bed -c 41 -o collapse 2> obs
+***** ERROR: Requested column 41, but database file test.vcf only has fields 1 - 12." > exp
+$BT map -a ivls.bed -b test.vcf -c 41 -o collapse 2>&1 > /dev/null | head -3> obs
check obs exp
-
rm obs exp
###########################################################
@@ -639,12 +634,9 @@ echo " map.t42..\c"
echo \
"
*****
-*****ERROR: requested column -1 , but record only has fields 1 - 6. Exiting.
-
-*****" > exp
-$BT map -a ivls.bed -b values5.bed -c -1 -o collapse 2> obs
+***** ERROR: Requested column -1, but database file test.vcf only has fields 1 - 12." > exp
+$BT map -a ivls.bed -b test.vcf -c -1 -o collapse 2>&1 > /dev/null | head -3> obs
check obs exp
-
rm obs exp
###########################################################
@@ -654,12 +646,9 @@ echo " map.t43..\c"
echo \
"
*****
-*****ERROR: requested column 0 , but record only has fields 1 - 6. Exiting.
-
-*****" > exp
-$BT map -a ivls.bed -b values5.bed -c 0 -o collapse 2> obs
+***** ERROR: Requested column 0, but database file test.vcf only has fields 1 - 12." > exp
+$BT map -a ivls.bed -b test.vcf -c 0 -o collapse 2>&1 > /dev/null | head -3> obs
check obs exp
-
rm obs exp
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bedtools.git
More information about the debian-med-commit
mailing list