[med-svn] [libgtextutils] 27/83: Added Natural-Sort predicates.
Charles Plessy
plessy at moszumanska.debian.org
Wed Jan 8 13:37:27 UTC 2014
This is an automated email from the git hooks/post-receive script.
plessy pushed a commit to branch debian/unstable
in repository libgtextutils.
commit 92c38b649ec15120deaff77ee6f39b17f2e28d7b
Author: A. Gordon <gordon at cshl.edu>
Date: Tue Mar 24 20:34:46 2009 -0400
Added Natural-Sort predicates.
---
src/gtextutils/Makefile.am | 16 ++--
src/gtextutils/natsort.h | 110 ++++++++++++++++++++++++++++
src/gtextutils/strnatcmp.c | 178 +++++++++++++++++++++++++++++++++++++++++++++
src/gtextutils/strnatcmp.h | 31 ++++++++
4 files changed, 329 insertions(+), 6 deletions(-)
diff --git a/src/gtextutils/Makefile.am b/src/gtextutils/Makefile.am
index f99ba43..c7ac53b 100644
--- a/src/gtextutils/Makefile.am
+++ b/src/gtextutils/Makefile.am
@@ -9,14 +9,18 @@
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-lib_LIBRARIES = libgtextutils-0.1.a
+lib_LIBRARIES = libgtextutils-0.2.a
-libgtextutils_0_1_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \
+libgtextutils_0_2_a_SOURCES = stream_wrapper.cpp stream_wrapper.h \
text_line_reader.cpp text_line_reader.h \
- print_utils.h
+ container_join.h \
+ natsort.h \
+ strnatcmp.c strnatcmp.h
-libgtextutils_0_1_a_includedir = $(includedir)/gtextutils-0.1/gtextutils
+libgtextutils_0_2_a_includedir = $(includedir)/gtextutils-0.2/gtextutils
-libgtextutils_0_1_a_include_HEADERS = print_utils.h \
+libgtextutils_0_2_a_include_HEADERS = container_join.h \
text_line_reader.h \
- stream_wrapper.h
+ stream_wrapper.h \
+ natsort.h \
+ strnatcmp.h
diff --git a/src/gtextutils/natsort.h b/src/gtextutils/natsort.h
new file mode 100644
index 0000000..a3520ef
--- /dev/null
+++ b/src/gtextutils/natsort.h
@@ -0,0 +1,110 @@
+/*
+ Gordon's Text-Utilities Library
+ Copyright (C) 2009 Assaf Gordon (gordon at cshl.edu)
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>
+*/
+#ifndef __NATURAL_SORT_STL_H__
+#define __NATURAL_SORT_STL_H__
+
+/*
+ * natsort.h -
+ * STL-compatible interface to Martin Pool's Natural-Order sorting routines.
+ *
+ * see http://sourcefrog.net/projects/natsort/ for more details
+ *
+ * Note 1:
+ * There's a boost equivalent version of 'composable natural sort'
+ * at http://www.boostcookbook.com/Recipe:/1235053
+ * But it requires the boost library (including the regex engine),
+ * and I prefer not to use it for now.
+ *
+ * Note 2:
+ * As of FSF GNU Coreutils version 7.1, the 'sort' progarm as a similar sorting order
+ * called 'version' (with the -V command argument).
+ * Coreutils's implementation is found in <coreutils-7.1>/src/filevercmp.{ch}.
+ * The results are similar to Matrin Poo's NatSort, but not identical
+ * if the sorted strings are more complex than <prefixNUM>.
+ */
+
+/*
+Usage:
+ // sort an array in natural order
+ vector<string> v;
+ v.push_back("chr20");
+ v.push_back("chr10");
+ v.push_back("chr5");
+ v.push_back("chr1");
+ v.push_back("chr2");
+
+ // "regular" sort
+ sort(v.begin(), v.end() );
+ // order will be:
+ // chr1
+ // chr10
+ // chr2
+ // chr20
+ // chr5
+
+ // "natural order" sort
+ sort(v.begin(), v.end(), natural_sort_predicate() );
+ // order will be:
+ // chr1
+ // chr2
+ // chr5
+ // chr10
+ // chr20
+*/
+
+
+extern "C" {
+/*
+ strnatcmp.{h,c} -- Perform 'natural order' comparisons of strings in C.
+ Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
+*/
+#include "strnatcmp.h"
+}
+
+#include <string>
+
+struct natural_sort_predicate : public std::binary_function<std::string, std::string, bool>
+{
+ bool operator() ( const std::string& s1, const std::string& s2 )
+ {
+ return strnatcmp(s1.c_str(), s2.c_str()) < 0 ;
+ }
+};
+
+struct natural_sort_ignore_case_predicate : public std::binary_function<std::string, std::string, bool>
+{
+ bool operator() ( const std::string& s1, const std::string& s2 )
+ {
+ return strnatcasecmp(s1.c_str(), s2.c_str()) < 0 ;
+ }
+};
+
+/*
+inline bool natural_sort_predicate(const std::string& s1, const std::string& s2)
+{
+ return strnatcmp(s1.c_str(), s2.c_str()) < 0 ;
+}
+
+inline bool natural_sort_ignore_case_predicate(const std::string& s1, const std::string& s2)
+{
+ return strnatcasecmp(s1.c_str(), s2.c_str()) < 0 ;
+}*/
+
+
+#endif
+
diff --git a/src/gtextutils/strnatcmp.c b/src/gtextutils/strnatcmp.c
new file mode 100644
index 0000000..74cbb61
--- /dev/null
+++ b/src/gtextutils/strnatcmp.c
@@ -0,0 +1,178 @@
+/* -*- mode: c; c-file-style: "k&r" -*-
+
+ strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
+ Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+/* partial change history:
+ *
+ * 2004-10-10 mbp: Lift out character type dependencies into macros.
+ *
+ * Eric Sosman pointed out that ctype functions take a parameter whose
+ * value must be that of an unsigned int, even on platforms that have
+ * negative chars in their default char type.
+ */
+
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "strnatcmp.h"
+
+
+/* These are defined as macros to make it easier to adapt this code to
+ * different characters types or comparison functions. */
+static inline int
+nat_isdigit(nat_char a)
+{
+ return isdigit((unsigned char) a);
+}
+
+
+static inline int
+nat_isspace(nat_char a)
+{
+ return isspace((unsigned char) a);
+}
+
+
+static inline nat_char
+nat_toupper(nat_char a)
+{
+ return toupper((unsigned char) a);
+}
+
+
+
+static int
+compare_right(nat_char const *a, nat_char const *b)
+{
+ int bias = 0;
+
+ /* The longest run of digits wins. That aside, the greatest
+ value wins, but we can't know that it will until we've scanned
+ both numbers to know that they have the same magnitude, so we
+ remember it in BIAS. */
+ for (;; a++, b++) {
+ if (!nat_isdigit(*a) && !nat_isdigit(*b))
+ return bias;
+ else if (!nat_isdigit(*a))
+ return -1;
+ else if (!nat_isdigit(*b))
+ return +1;
+ else if (*a < *b) {
+ if (!bias)
+ bias = -1;
+ } else if (*a > *b) {
+ if (!bias)
+ bias = +1;
+ } else if (!*a && !*b)
+ return bias;
+ }
+
+ return 0;
+}
+
+
+static int
+compare_left(nat_char const *a, nat_char const *b)
+{
+ /* Compare two left-aligned numbers: the first to have a
+ different value wins. */
+ for (;; a++, b++) {
+ if (!nat_isdigit(*a) && !nat_isdigit(*b))
+ return 0;
+ else if (!nat_isdigit(*a))
+ return -1;
+ else if (!nat_isdigit(*b))
+ return +1;
+ else if (*a < *b)
+ return -1;
+ else if (*a > *b)
+ return +1;
+ }
+
+ return 0;
+}
+
+
+static int strnatcmp0(nat_char const *a, nat_char const *b, int fold_case)
+{
+ int ai, bi;
+ nat_char ca, cb;
+ int fractional, result;
+
+ assert(a && b);
+ ai = bi = 0;
+ while (1) {
+ ca = a[ai]; cb = b[bi];
+
+ /* skip over leading spaces or zeros */
+ while (nat_isspace(ca))
+ ca = a[++ai];
+
+ while (nat_isspace(cb))
+ cb = b[++bi];
+
+ /* process run of digits */
+ if (nat_isdigit(ca) && nat_isdigit(cb)) {
+ fractional = (ca == '0' || cb == '0');
+
+ if (fractional) {
+ if ((result = compare_left(a+ai, b+bi)) != 0)
+ return result;
+ } else {
+ if ((result = compare_right(a+ai, b+bi)) != 0)
+ return result;
+ }
+ }
+
+ if (!ca && !cb) {
+ /* The strings compare the same. Perhaps the caller
+ will want to call strcmp to break the tie. */
+ return 0;
+ }
+
+ if (fold_case) {
+ ca = nat_toupper(ca);
+ cb = nat_toupper(cb);
+ }
+
+ if (ca < cb)
+ return -1;
+ else if (ca > cb)
+ return +1;
+
+ ++ai; ++bi;
+ }
+}
+
+
+
+int strnatcmp(nat_char const *a, nat_char const *b) {
+ return strnatcmp0(a, b, 0);
+}
+
+
+/* Compare, recognizing numeric string and ignoring case. */
+int strnatcasecmp(nat_char const *a, nat_char const *b) {
+ return strnatcmp0(a, b, 1);
+}
diff --git a/src/gtextutils/strnatcmp.h b/src/gtextutils/strnatcmp.h
new file mode 100644
index 0000000..51a3c4e
--- /dev/null
+++ b/src/gtextutils/strnatcmp.h
@@ -0,0 +1,31 @@
+/* -*- mode: c; c-file-style: "k&r" -*-
+
+ strnatcmp.c -- Perform 'natural order' comparisons of strings in C.
+ Copyright (C) 2000, 2004 by Martin Pool <mbp sourcefrog net>
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+*/
+
+
+/* CUSTOMIZATION SECTION
+ *
+ * You can change this typedef, but must then also change the inline
+ * functions in strnatcmp.c */
+typedef char nat_char;
+
+int strnatcmp(nat_char const *a, nat_char const *b);
+int strnatcasecmp(nat_char const *a, nat_char const *b);
--
Alioth's /git/debian-med/git-commit-notice on /srv/git.debian.org/git/debian-med/libgtextutils.git
More information about the debian-med-commit
mailing list