[med-svn] [libgtextutils] 38/83: Added String tokenizer.

Charles Plessy plessy at moszumanska.debian.org
Wed Jan 8 13:37:27 UTC 2014


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch debian/unstable
in repository libgtextutils.

commit 695be5e39addc049851cbbce9c0a26cc725bb825
Author: A. Gordon <gordon at cshl.edu>
Date:   Mon Jun 15 17:14:21 2009 -0400

    Added String tokenizer.
---
 src/gtextutils/Makefile.am       |  1 +
 src/gtextutils/string_tokenize.h | 50 ++++++++++++++++++++++++++++
 tests/Makefile.am                |  2 ++
 tests/test_string_tokenize.cpp   | 70 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 123 insertions(+)

diff --git a/src/gtextutils/Makefile.am b/src/gtextutils/Makefile.am
index 60a98a3..3e51a50 100644
--- a/src/gtextutils/Makefile.am
+++ b/src/gtextutils/Makefile.am
@@ -33,4 +33,5 @@ libgtextutils_0_4_a_include_HEADERS = container_join.h \
 		  inbuf1.hpp \
 		  tuple_parser.h \
 		  exit_manip.h \
+		  string_tokenize.h \
 		  pipe_fitter.h
diff --git a/src/gtextutils/string_tokenize.h b/src/gtextutils/string_tokenize.h
new file mode 100644
index 0000000..84e13c8
--- /dev/null
+++ b/src/gtextutils/string_tokenize.h
@@ -0,0 +1,50 @@
+#ifndef __STRING_TOKENIZE_H__
+#define __STRING_TOKENIZE_H__
+
+#include <string>
+#include <iterator>
+
+/*
+   Splits a string into tokens, based on delimiter
+
+   Heavily based on code from:
+   
+   	C++ Programming HOW-TO
+	Al Dev (Alavoor Vasudevan) alavoor[AT]yahoo.com
+	http://oopweb.com/CPP/Documents/CPPHOWTO/Volume/C++Programming-HOWTO-7.html
+
+	(Distributed under GPL)
+
+
+   Usage Example:
+
+      string input = "Hello|Token|World";
+      vector<string> tokens;
+      String_Tokenize ( input, back_inserter<string>(tokens), "|" ); 
+
+*/
+template <typename OutputIterator>
+void String_Tokenize(const std::string& str,
+		OutputIterator 	output_iter,
+		const std::string& delimiters = " ")
+{
+	// Skip delimiters at beginning.
+	std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
+	// Find first "non-delimiter".
+	std::string::size_type pos     = str.find_first_of(delimiters, lastPos);
+
+	while (std::string::npos != pos || std::string::npos != lastPos)
+	{
+		*output_iter = str.substr(lastPos, pos - lastPos);
+		++output_iter;
+
+		// Skip delimiters.  Note the "not_of"
+		lastPos = str.find_first_not_of(delimiters, pos);
+		// Find next "non-delimiter"
+		pos = str.find_first_of(delimiters, lastPos);
+	}
+}
+
+
+#endif
+
diff --git a/tests/Makefile.am b/tests/Makefile.am
index b2dddfd..d60998a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -28,6 +28,7 @@ check_PROGRAMS = test_container_join \
 		 test_fd_inbuf \
 		 test_in_out_buf \
 		 test_pipe_fitter \
+		 test_string_tokenize \
 		 $(TUPLE_PROG)
 
 TESTS = $(check_PROGRAMS)
@@ -46,3 +47,4 @@ test_in_out_buf_SOURCES = test_in_out_buf.cpp tests_assertion.h
 test_pipe_fitter_SOURCES = test_pipe_fitter.c tests_assertion.h
 test_tuple_parser_SOURCES = test_tuple_parser.cpp tests_assertion.h
 test_tuple_parser_file_SOURCES = test_tuple_parser_file.cpp tests_assertion.h
+test_string_tokenize_SOURCES = test_string_tokenize.cpp test_assertion.h
diff --git a/tests/test_string_tokenize.cpp b/tests/test_string_tokenize.cpp
new file mode 100644
index 0000000..108c6a2
--- /dev/null
+++ b/tests/test_string_tokenize.cpp
@@ -0,0 +1,70 @@
+/*
+   Gordon's Text-Utilities Library
+   Copyright (C) 2009 Assaf Gordon (gordon at cshl.edu)
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Affero General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Affero General Public License for more details.
+
+   You should have received a copy of the GNU Affero General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>
+*/
+#include <vector>
+#include <string>
+#include <iostream>
+#include <cassert>
+#include <err.h>
+#include <iterator>
+
+#include "tests_assertion.h"
+#include <gtextutils/string_tokenize.h>
+
+/*
+ * Tiny test suite for string tokenize
+ */
+
+using namespace std;
+
+//Test tokenize by single delimiter character
+void test_token1()
+{
+	string test1 = "Hello|Token|World";
+	vector<string> v;
+
+	String_Tokenize ( test1, back_inserter(v), "|" ) ;
+
+	ASSERT ( v[0] == "Hello" ) ;
+	ASSERT ( v[1] == "Token" ) ;
+	ASSERT ( v[2] == "World" ) ;
+
+}
+
+
+//Test tokenize by multiple delimiters
+void test_token2()
+{
+	string test1 = "Hello Token|World";
+	vector<string> v;
+
+	String_Tokenize ( test1, back_inserter(v), "| " ) ;
+
+	ASSERT ( v[0] == "Hello" ) ;
+	ASSERT ( v[1] == "Token" ) ;
+	ASSERT ( v[2] == "World" ) ;
+
+}
+
+int main()
+{
+	test_token1();
+	test_token2();
+
+
+	return 0;
+}

-- 
Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/libgtextutils.git



More information about the debian-med-commit mailing list