[med-svn] [filo] 01/08: added new "distinct" operation to groupBy.

Charles Plessy plessy at moszumanska.debian.org
Mon Nov 25 23:48:19 UTC 2013


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch master
in repository filo.

commit 3c9fb01843d797c64be578ecbcd1be683cfe245a
Author: arq5x <aaronquinlan at gmail.com>
Date:   Fri Dec 30 13:15:14 2011 -0500

    added new "distinct" operation to groupBy.
---
 src/groupBy/groupBy.cpp | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/src/groupBy/groupBy.cpp b/src/groupBy/groupBy.cpp
index 09c2dc9..1babc0f 100644
--- a/src/groupBy/groupBy.cpp
+++ b/src/groupBy/groupBy.cpp
@@ -11,6 +11,7 @@ Licenced under the MIT license.
 #include <vector>
 #include <map>
 #include <numeric>
+#include <algorithm>
 #include <iterator>
 #include <iostream>
 #include <iomanip>
@@ -184,8 +185,8 @@ int main(int argc, char* argv[]) {
     for( size_t i = 0; i < ops.size(); i++ ) {
         if ((ops[i] != "sum")  && (ops[i] != "max")    && (ops[i] != "min") && (ops[i] != "mean") &&
             (ops[i] != "mode") && (ops[i] != "median") && (ops[i] != "antimode") && (ops[i] != "stdev") &&
-            (ops[i] != "sstdev") && (ops[i] != "count") && (ops[i] != "collapse") && (ops[i] != "concat") &&
-            (ops[i] != "freqdesc") && (ops[i] != "freqasc")) 
+            (ops[i] != "sstdev") && (ops[i] != "count") && (ops[i] != "collapse") && (ops[i] != "distinct") &&
+            (ops[i] != "concat") && (ops[i] != "freqdesc") && (ops[i] != "freqasc")) 
         {
             cerr << endl << "*****" << endl << "*****ERROR: Invalid operation selection \"" << ops[i] << endl << "\"  *****" << endl;
             showHelp = true;
@@ -262,7 +263,8 @@ void ShowHelp(void) {
     cerr                         << "\t\t\t    sum, count, min, max," << endl;
     cerr                         << "\t\t\t    mean, median, mode, antimode," << endl;
     cerr                         << "\t\t\t    stdev, sstdev (sample standard dev.)," << endl;
-    cerr                         << "\t\t\t    collapse (i.e., print a comma separated list), " << endl;
+    cerr                         << "\t\t\t    collapse (i.e., print a comma separated list (duplicates allowed)), " << endl;
+    cerr                         << "\t\t\t    distinct (i.e., print a comma separated list (NO duplicates allowed)), " << endl;
     cerr                         << "\t\t\t    concat   (i.e., merge values into a single, non-delimited string), " << endl;
     cerr                         << "\t\t\t    freqdesc (i.e., print desc. list of values:freq)" << endl;
     cerr                         << "\t\t\t    freqasc (i.e., print asc. list of values:freq)" << endl;
@@ -316,7 +318,7 @@ void GroupBy (const string &inFile,
     const bool printOriginalLine,
     const bool printHeaderLine,
     const bool InputHaveHeaderLine,
-const bool ignoreCase) {
+    const bool ignoreCase) {
 
     // current line number
     int lineNum = 0;
@@ -431,6 +433,22 @@ void ReportSummary(const vector<string> &group, const vector<vector<string> > &d
             }
             result.push_back(collapse);
         }
+        else if (op == "distinct") {
+            string distinct;
+            // get the current column's data
+            vector<string> col_data = data[i];
+            // remove duplicate entries from the vector
+            // http://stackoverflow.com/questions/1041620/most-efficient-way-to-erase-duplicates-and-sort-a-c-vector
+            sort( col_data.begin(), col_data.end() );
+            col_data.erase( unique( col_data.begin(), col_data.end() ), col_data.end() );
+            
+            for( size_t j = 0; j < col_data.size(); j++ ) {//Ugly, but cannot use back_inserter
+                if (j>0)
+                    distinct.append(",");
+                distinct.append(col_data[j]);
+            }
+            result.push_back(distinct);
+        }
         else if (op == "concat") {
             string concat;
             for( size_t j = 0; j < data[i].size(); j++ ) {//Ugly, but cannot use back_inserter

-- 
Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/filo.git



More information about the debian-med-commit mailing list