[med-svn] [libgtextutils] 27/83: Added Natural-Sort predicates.

Charles Plessy plessy at moszumanska.debian.org
Wed Jan 8 13:37:27 UTC 2014


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch debian/unstable
in repository libgtextutils.

commit 92c38b649ec15120deaff77ee6f39b17f2e28d7b
Author: A. Gordon <gordon at cshl.edu>
Date:   Tue Mar 24 20:34:46 2009 -0400

    Added Natural-Sort predicates.
---
 src/gtextutils/Makefile.am |  16 ++--
 src/gtextutils/natsort.h   | 110 ++++++++++++++++++++++++++++
 src/gtextutils/strnatcmp.c | 178 +++++++++++++++++++++++++++++++++++++++++++++
 src/gtextutils/strnatcmp.h |  31 ++++++++
 4 files changed, 329 insertions(+), 6 deletions(-)

diff --git a/src/gtextutils/Makefile.am b/src/gtextutils/Makefile.am
index f99ba43..c7ac53b 100644
--- a/src/gtextutils/Makefile.am
+++ b/src/gtextutils/Makefile.am
@@ -9,14 +9,18 @@
 # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
 
-lib_LIBRARIES = libgtextutils-0.1.a
+lib_LIBRARIES = libgtextutils-0.2.a
 
-libgtextutils_0_1_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \
+libgtextutils_0_2_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \
 		          text_line_reader.cpp text_line_reader.h \
-			  print_utils.h
+			  container_join.h \
+			  natsort.h \
+			  strnatcmp.c strnatcmp.h
 
-libgtextutils_0_1_a_includedir = $(includedir)/gtextutils-0.1/gtextutils
+libgtextutils_0_2_a_includedir = $(includedir)/gtextutils-0.2/gtextutils
 
-libgtextutils_0_1_a_include_HEADERS = print_utils.h \
+libgtextutils_0_2_a_include_HEADERS = container_join.h \
 		  text_line_reader.h \
-		  stream_wrapper.h
+		  stream_wrapper.h \
+		  natsort.h \
+		  strnatcmp.h 
diff --git a/src/gtextutils/natsort.h b/src/gtextutils/natsort.h
new file mode 100644
index 0000000..a3520ef
--- /dev/null
+++ b/src/gtextutils/natsort.h
@@ -0,0 +1,110 @@
+/*
+   Gordon's Text-Utilities Library
+   Copyright (C) 2009 Assaf Gordon (gordon at cshl.edu)
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Affero General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Affero General Public License for more details.
+
+   You should have received a copy of the GNU Affero General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>
+*/
+#ifndef __NATURAL_SORT_STL_H__
+#define __NATURAL_SORT_STL_H__
+
+/*
+ * natsort.h - 
+ * STL-compatible interface to Martin Pool's Natural-Order sorting routines.
+ *
+ * see http://sourcefrog.net/projects/natsort/ for more details
+ *
+ * Note 1:
+ * There's a boost equivalent version of 'composable natural sort'
+ * at http://www.boostcookbook.com/Recipe:/1235053
+ * But it requires the boost library (including the regex engine),
+ * and I prefer not to use it for now.
+ *
+ * Note 2:
+ * As of FSF GNU Coreutils version 7.1, the 'sort' progarm as a similar sorting order
+ * called 'version' (with the -V command argument).
+ * Coreutils's implementation is found in <coreutils-7.1>/src/filevercmp.{ch}.
+ * The results are similar to Matrin Poo's NatSort, but not identical
+ * if the sorted strings are more complex than <prefixNUM>.
+ */
+
+/*
+Usage:
+	// sort an array in natural order
+	vector<string> v;
+	v.push_back("chr20");
+	v.push_back("chr10");
+	v.push_back("chr5");
+	v.push_back("chr1");
+	v.push_back("chr2");
+
+	// "regular" sort
+	sort(v.begin(), v.end() );
+	// order will be:  
+	//	chr1
+	//	chr10
+	//	chr2
+	//	chr20
+	//	chr5
+	
+	// "natural order" sort
+	sort(v.begin(), v.end(), natural_sort_predicate() );
+	// order will be: 
+	// 	chr1
+	// 	chr2
+	// 	chr5
+	// 	chr10
+	// 	chr20
+*/
+
+
+extern "C" {
+/*
+  strnatcmp.{h,c} -- Perform 'natural order' comparisons of strings in C.
+  Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
+*/
+#include "strnatcmp.h"
+}
+
+#include <string>
+
+struct natural_sort_predicate : public std::binary_function<std::string, std::string, bool>
+{
+	bool operator() ( const std::string& s1, const std::string& s2 )
+	{
+		return strnatcmp(s1.c_str(), s2.c_str()) < 0 ;
+	}
+};
+
+struct natural_sort_ignore_case_predicate : public std::binary_function<std::string, std::string, bool>
+{
+	bool operator() ( const std::string& s1, const std::string& s2 )
+	{
+		return strnatcasecmp(s1.c_str(), s2.c_str()) < 0 ;
+	}
+};
+
+/*
+inline bool natural_sort_predicate(const std::string& s1, const std::string& s2)
+{
+	return strnatcmp(s1.c_str(), s2.c_str()) < 0 ;
+}
+
+inline bool natural_sort_ignore_case_predicate(const std::string& s1, const std::string& s2)
+{
+	return strnatcasecmp(s1.c_str(), s2.c_str()) < 0 ;
+}*/
+
+
+#endif
+
diff --git a/src/gtextutils/strnatcmp.c b/src/gtextutils/strnatcmp.c
new file mode 100644
index 0000000..74cbb61
--- /dev/null
+++ b/src/gtextutils/strnatcmp.c
@@ -0,0 +1,178 @@
+/* -*- mode: c; c-file-style: "k&r" -*-
+
+  strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
+  Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+/* partial change history:
+ *
+ * 2004-10-10 mbp: Lift out character type dependencies into macros.
+ *
+ * Eric Sosman pointed out that ctype functions take a parameter whose
+ * value must be that of an unsigned int, even on platforms that have
+ * negative chars in their default char type.
+ */
+
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "strnatcmp.h"
+
+
+/* These are defined as macros to make it easier to adapt this code to
+ * different characters types or comparison functions. */
+static inline int
+nat_isdigit(nat_char a)
+{
+     return isdigit((unsigned char) a);
+}
+
+
+static inline int
+nat_isspace(nat_char a)
+{
+     return isspace((unsigned char) a);
+}
+
+
+static inline nat_char
+nat_toupper(nat_char a)
+{
+     return toupper((unsigned char) a);
+}
+
+
+
+static int
+compare_right(nat_char const *a, nat_char const *b)
+{
+     int bias = 0;
+     
+     /* The longest run of digits wins.  That aside, the greatest
+	value wins, but we can't know that it will until we've scanned
+	both numbers to know that they have the same magnitude, so we
+	remember it in BIAS. */
+     for (;; a++, b++) {
+	  if (!nat_isdigit(*a)  &&  !nat_isdigit(*b))
+	       return bias;
+	  else if (!nat_isdigit(*a))
+	       return -1;
+	  else if (!nat_isdigit(*b))
+	       return +1;
+	  else if (*a < *b) {
+	       if (!bias)
+		    bias = -1;
+	  } else if (*a > *b) {
+	       if (!bias)
+		    bias = +1;
+	  } else if (!*a  &&  !*b)
+	       return bias;
+     }
+
+     return 0;
+}
+
+
+static int
+compare_left(nat_char const *a, nat_char const *b)
+{
+     /* Compare two left-aligned numbers: the first to have a
+        different value wins. */
+     for (;; a++, b++) {
+	  if (!nat_isdigit(*a)  &&  !nat_isdigit(*b))
+	       return 0;
+	  else if (!nat_isdigit(*a))
+	       return -1;
+	  else if (!nat_isdigit(*b))
+	       return +1;
+	  else if (*a < *b)
+	       return -1;
+	  else if (*a > *b)
+	       return +1;
+     }
+	  
+     return 0;
+}
+
+
+static int strnatcmp0(nat_char const *a, nat_char const *b, int fold_case)
+{
+     int ai, bi;
+     nat_char ca, cb;
+     int fractional, result;
+     
+     assert(a && b);
+     ai = bi = 0;
+     while (1) {
+	  ca = a[ai]; cb = b[bi];
+
+	  /* skip over leading spaces or zeros */
+	  while (nat_isspace(ca))
+	       ca = a[++ai];
+
+	  while (nat_isspace(cb))
+	       cb = b[++bi];
+
+	  /* process run of digits */
+	  if (nat_isdigit(ca)  &&  nat_isdigit(cb)) {
+	       fractional = (ca == '0' || cb == '0');
+
+	       if (fractional) {
+		    if ((result = compare_left(a+ai, b+bi)) != 0)
+			 return result;
+	       } else {
+		    if ((result = compare_right(a+ai, b+bi)) != 0)
+			 return result;
+	       }
+	  }
+
+	  if (!ca && !cb) {
+	       /* The strings compare the same.  Perhaps the caller
+                  will want to call strcmp to break the tie. */
+	       return 0;
+	  }
+
+	  if (fold_case) {
+	       ca = nat_toupper(ca);
+	       cb = nat_toupper(cb);
+	  }
+	  
+	  if (ca < cb)
+	       return -1;
+	  else if (ca > cb)
+	       return +1;
+
+	  ++ai; ++bi;
+     }
+}
+
+
+
+int strnatcmp(nat_char const *a, nat_char const *b) {
+     return strnatcmp0(a, b, 0);
+}
+
+
+/* Compare, recognizing numeric string and ignoring case. */
+int strnatcasecmp(nat_char const *a, nat_char const *b) {
+     return strnatcmp0(a, b, 1);
+}
diff --git a/src/gtextutils/strnatcmp.h b/src/gtextutils/strnatcmp.h
new file mode 100644
index 0000000..51a3c4e
--- /dev/null
+++ b/src/gtextutils/strnatcmp.h
@@ -0,0 +1,31 @@
+/* -*- mode: c; c-file-style: "k&r" -*-
+
+  strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
+  Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+/* CUSTOMIZATION SECTION
+ *
+ * You can change this typedef, but must then also change the inline
+ * functions in strnatcmp.c */
+typedef char nat_char;
+
+int strnatcmp(nat_char const *a, nat_char const *b);
+int strnatcasecmp(nat_char const *a, nat_char const *b);

-- 
Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/libgtextutils.git



More information about the debian-med-commit mailing list