[Babel-users] [PATCH] Add inline qsort routine

Dave Taht dave at taht.net
Fri Oct 26 01:34:38 BST 2018


This variant of qsort only checks for less than, but as it has
been used in dnsrbl for a decade and can inline the comparison
it might be a performance improvement.

I've run this on two machines in the lab for several weeks now without
seeing a problem, but I was building up gcc 7 capability and not in a
position to benchmark much.

If you don't look at qsort.h, it doesn't hurt your sensibilities
as much to "just use it". Probably should return 1 or 0, not -1
or 0.

---
 message.c |  31 ++++---
 qsort.h   | 290 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 309 insertions(+), 12 deletions(-)
 create mode 100644 qsort.h

diff --git a/message.c b/message.c
index 3ca92ff..84cf8c5 100644
--- a/message.c
+++ b/message.c
@@ -1195,14 +1195,14 @@ compare_buffered_updates(const void *av, const void *bv)
     int rc, v4a, v4b, ma, mb;
 
     rc = memcmp(a->id, b->id, 8);
-    if(rc != 0)
-        return rc;
-
+    if(rc != 0) {
+        if (rc < 0) return -1; else return 0;
+    }
     v4a = (a->plen >= 96 && v4mapped(a->prefix));
     v4b = (b->plen >= 96 && v4mapped(b->prefix));
 
     if(v4a > v4b)
-        return 1;
+        return 0;
     else if(v4a < v4b)
         return -1;
 
@@ -1210,25 +1210,26 @@ compare_buffered_updates(const void *av, const void *bv)
     mb = (!v4b && b->plen == 128 && memcmp(b->prefix + 8, b->id, 8) == 0);
 
     if(ma > mb)
-        return -1;
+        return 0;
     else if(mb > ma)
-        return 1;
+        return -1;
 
     if(a->plen < b->plen)
-        return 1;
+        return 0;
     else if(a->plen > b->plen)
         return -1;
 
     rc = memcmp(a->prefix, b->prefix, 16);
-    if(rc != 0)
-        return rc;
+    if(rc != 0) {
+        if (rc < 0) return -1; else return 0;
+    }
 
     if(a->src_plen < b->src_plen)
         return -1;
     else if(a->src_plen > b->src_plen)
-        return 1;
+        return 0;
 
-    return memcmp(a->src_prefix, b->src_prefix, 16);
+    return memcmp(a->src_prefix, b->src_prefix, 16) < 0;
 }
 
 void
@@ -1275,7 +1276,13 @@ flushupdates(struct interface *ifp)
                 memcpy(b[i].id, myid, 8);
         }
 
-        qsort(b, n, sizeof(struct buffered_update), compare_buffered_updates);
+#include "qsort.h"
+#define QSORT_TYPE struct buffered_update
+#define QSORT_BASE b
+#define QSORT_NELT n
+#define QSORT_LT compare_buffered_updates
+#define elt_lt(a,b) compare_buffered_updates(a,b)
+	QSORT(struct buffered_update, b, n, elt_lt);
 
         for(i = 0; i < n; i++) {
             /* The same update may be scheduled multiple times before it is
diff --git a/qsort.h b/qsort.h
new file mode 100644
index 0000000..07480d5
--- /dev/null
+++ b/qsort.h
@@ -0,0 +1,290 @@
+/* $Id: qsort.h,v 1.5 2008-01-28 18:16:49 mjt Exp $
+ * Adopted from GNU glibc by Mjt.
+ * See stdlib/qsort.c in glibc */
+
+/* Copyright (C) 1991, 1992, 1996, 1997, 1999 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Written by Douglas C. Schmidt (schmidt at ics.uci.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* in-line qsort implementation.  Differs from traditional qsort() routine
+ * in that it is a macro, not a function, and instead of passing an address
+ * of a comparison routine to the function, it is possible to inline
+ * comparison routine, thus speeding up sorting a lot.
+ *
+ * Usage:
+ *  #include "iqsort.h"
+ *  #define islt(a,b) (strcmp((*a),(*b))<0)
+ *  char *arr[];
+ *  int n;
+ *  QSORT(char*, arr, n, islt);
+ *
+ * The "prototype" and 4 arguments are:
+ *  QSORT(TYPE,BASE,NELT,ISLT)
+ *  1) type of each element, TYPE,
+ *  2) address of the beginning of the array, of type TYPE*,
+ *  3) number of elements in the array, and
+ *  4) comparision routine.
+ * Array pointer and number of elements are referenced only once.
+ * This is similar to a call
+ *  qsort(BASE,NELT,sizeof(TYPE),ISLT)
+ * with the difference in last parameter.
+ * Note the islt macro/routine (it receives pointers to two elements):
+ * the only condition of interest is whenever one element is less than
+ * another, no other conditions (greather than, equal to etc) are tested.
+ * So, for example, to define integer sort, use:
+ *  #define islt(a,b) ((*a)<(*b))
+ *  QSORT(int, arr, n, islt)
+ *
+ * The macro could be used to implement a sorting function (see examples
+ * below), or to implement the sorting algorithm inline.  That is, either
+ * create a sorting function and use it whenever you want to sort something,
+ * or use QSORT() macro directly instead a call to such routine.  Note that
+ * the macro expands to quite some code (compiled size of int qsort on x86
+ * is about 700..800 bytes).
+ *
+ * Using this macro directly it isn't possible to implement traditional
+ * qsort() routine, because the macro assumes sizeof(element) == sizeof(TYPE),
+ * while qsort() allows element size to be different.
+ *
+ * Several ready-to-use examples:
+ *
+ * Sorting array of integers:
+ * void int_qsort(int *arr, unsigned n) {
+ * #define int_lt(a,b) ((*a)<(*b))
+ *   QSORT(int, arr, n, int_lt);
+ * }
+ *
+ * Sorting array of string pointers:
+ * void str_qsort(char *arr[], unsigned n) {
+ * #define str_lt(a,b) (strcmp((*a),(*b)) < 0)
+ *   QSORT(char*, arr, n, str_lt);
+ * }
+ *
+ * Sorting array of structures:
+ *
+ * struct elt {
+ *   int key;
+ *   ...
+ * };
+ * void elt_qsort(struct elt *arr, unsigned n) {
+ * #define elt_lt(a,b) ((a)->key < (b)->key)
+ *  QSORT(struct elt, arr, n, elt_lt);
+ * }
+ *
+ * And so on.
+ */
+
+/* Swap two items pointed to by A and B using temporary buffer t. */
+#define _QSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t)))
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+   This particular magic number was chosen to work best on a Sun 4/260. */
+#define _QSORT_MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations
+ * (inlined in QSORT).
+typedef struct {
+  QSORT_TYPE *_lo, *_hi;
+} qsort_stack_node;
+ */
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+/* The stack needs log (total_elements) entries (we could even subtract
+   log(MAX_THRESH)).  Since total_elements has type unsigned, we get as
+   upper bound for log (total_elements):
+   bits per byte (CHAR_BIT) * sizeof(unsigned).  */
+#define _QSORT_STACK_SIZE	(8 * sizeof(unsigned))
+#define _QSORT_PUSH(top, low, high)	\
+	(((top->_lo = (low)), (top->_hi = (high)), ++top))
+#define	_QSORT_POP(low, high, top)	\
+	((--top, (low = top->_lo), (high = top->_hi)))
+#define	_QSORT_STACK_NOT_EMPTY	(_stack < _top)
+
+
+/* Order size using quicksort.  This implementation incorporates
+   four optimizations discussed in Sedgewick:
+
+   1. Non-recursive, using an explicit stack of pointer that store the
+      next array partition to sort.  To save time, this maximum amount
+      of space required to store an array of SIZE_MAX is allocated on the
+      stack.  Assuming a 32-bit (64 bit) integer for size_t, this needs
+      only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
+      Pretty cheap, actually.
+
+   2. Chose the pivot element using a median-of-three decision tree.
+      This reduces the probability of selecting a bad pivot value and
+      eliminates certain extraneous comparisons.
+
+   3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+      insertion sort to order the MAX_THRESH items within each partition.
+      This is a big win, since insertion sort is faster for small, mostly
+      sorted array segments.
+
+   4. The larger of the two sub-partitions is always pushed onto the
+      stack first, with the algorithm then concentrating on the
+      smaller partition.  This *guarantees* no more than log (total_elems)
+      stack size is needed (actually O(1) in this case)!  */
+
+/* The main code starts here... */
+#define QSORT(QSORT_TYPE,QSORT_BASE,QSORT_NELT,QSORT_LT)		\
+{									\
+  QSORT_TYPE *const _base = (QSORT_BASE);				\
+  const unsigned _elems = (QSORT_NELT);					\
+  QSORT_TYPE _hold;							\
+									\
+  /* Don't declare two variables of type QSORT_TYPE in a single		\
+   * statement: eg `TYPE a, b;', in case if TYPE is a pointer,		\
+   * expands to `type* a, b;' wich isn't what we want.			\
+   */									\
+									\
+  if (_elems > _QSORT_MAX_THRESH) {					\
+    QSORT_TYPE *_lo = _base;						\
+    QSORT_TYPE *_hi = _lo + _elems - 1;					\
+    struct {								\
+      QSORT_TYPE *_hi; QSORT_TYPE *_lo;					\
+    } _stack[_QSORT_STACK_SIZE], *_top = _stack + 1;			\
+									\
+    while (_QSORT_STACK_NOT_EMPTY) {					\
+      QSORT_TYPE *_left_ptr; QSORT_TYPE *_right_ptr;			\
+									\
+      /* Select median value from among LO, MID, and HI. Rearrange	\
+         LO and HI so the three values are sorted. This lowers the	\
+         probability of picking a pathological pivot value and		\
+         skips a comparison for both the LEFT_PTR and RIGHT_PTR in	\
+         the while loops. */						\
+									\
+      QSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1);			\
+									\
+      if (QSORT_LT (_mid, _lo))						\
+        _QSORT_SWAP (_mid, _lo, _hold);					\
+      if (QSORT_LT (_hi, _mid))	{					\
+        _QSORT_SWAP (_mid, _hi, _hold);					\
+        if (QSORT_LT (_mid, _lo))					\
+          _QSORT_SWAP (_mid, _lo, _hold);				\
+      } 								\
+									\
+      _left_ptr  = _lo + 1;						\
+      _right_ptr = _hi - 1;						\
+									\
+      /* Here's the famous ``collapse the walls'' section of quicksort.	\
+         Gotta like those tight inner loops!  They are the main reason	\
+         that this algorithm runs much faster than others. */		\
+      do {								\
+        while (QSORT_LT (_left_ptr, _mid))				\
+         ++_left_ptr;							\
+									\
+        while (QSORT_LT (_mid, _right_ptr))				\
+          --_right_ptr;							\
+									\
+        if (_left_ptr < _right_ptr) {					\
+          _QSORT_SWAP (_left_ptr, _right_ptr, _hold);			\
+          if (_mid == _left_ptr)					\
+            _mid = _right_ptr;						\
+          else if (_mid == _right_ptr)					\
+            _mid = _left_ptr;						\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+        }								\
+        else if (_left_ptr == _right_ptr) {				\
+          ++_left_ptr;							\
+          --_right_ptr;							\
+          break;							\
+        }								\
+      } while (_left_ptr <= _right_ptr);				\
+									\
+     /* Set up pointers for next iteration.  First determine whether	\
+        left and right partitions are below the threshold size.  If so,	\
+        ignore one or both.  Otherwise, push the larger partition's	\
+        bounds on the stack and continue sorting the smaller one. */	\
+									\
+      if (_right_ptr - _lo <= _QSORT_MAX_THRESH) {			\
+        if (_hi - _left_ptr <= _QSORT_MAX_THRESH)			\
+          /* Ignore both small partitions. */				\
+          _QSORT_POP (_lo, _hi, _top);					\
+        else								\
+          /* Ignore small left partition. */				\
+          _lo = _left_ptr;						\
+      }									\
+      else if (_hi - _left_ptr <= _QSORT_MAX_THRESH)			\
+        /* Ignore small right partition. */				\
+        _hi = _right_ptr;						\
+      else if (_right_ptr - _lo > _hi - _left_ptr) {			\
+        /* Push larger left partition indices. */			\
+        _QSORT_PUSH (_top, _lo, _right_ptr);				\
+        _lo = _left_ptr;						\
+      }									\
+      else {								\
+        /* Push larger right partition indices. */			\
+        _QSORT_PUSH (_top, _left_ptr, _hi);				\
+        _hi = _right_ptr;						\
+      }									\
+    }									\
+  }									\
+									\
+  /* Once the BASE array is partially sorted by quicksort the rest	\
+     is completely sorted using insertion sort, since this is efficient	\
+     for partitions below MAX_THRESH size. BASE points to the		\
+     beginning of the array to sort, and END_PTR points at the very	\
+     last element in the array (*not* one beyond it!). */		\
+									\
+  {									\
+    QSORT_TYPE *const _end_ptr = _base + _elems - 1;			\
+    QSORT_TYPE *_tmp_ptr = _base;					\
+    register QSORT_TYPE *_run_ptr;					\
+    QSORT_TYPE *_thresh;						\
+									\
+    _thresh = _base + _QSORT_MAX_THRESH;				\
+    if (_thresh > _end_ptr)						\
+      _thresh = _end_ptr;						\
+									\
+    /* Find smallest element in first threshold and place it at the	\
+       array's beginning.  This is the smallest array element,		\
+       and the operation speeds up insertion sort's inner loop. */	\
+									\
+    for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr)	\
+      if (QSORT_LT (_run_ptr, _tmp_ptr))				\
+        _tmp_ptr = _run_ptr;						\
+									\
+    if (_tmp_ptr != _base)						\
+      _QSORT_SWAP (_tmp_ptr, _base, _hold);				\
+									\
+    /* Insertion sort, running from left-hand-side			\
+     * up to right-hand-side.  */					\
+									\
+    _run_ptr = _base + 1;						\
+    while (++_run_ptr <= _end_ptr) {					\
+      _tmp_ptr = _run_ptr - 1;						\
+      while (QSORT_LT (_run_ptr, _tmp_ptr))				\
+        --_tmp_ptr;							\
+									\
+      ++_tmp_ptr;							\
+      if (_tmp_ptr != _run_ptr) {					\
+        QSORT_TYPE *_trav = _run_ptr + 1;				\
+        while (--_trav >= _run_ptr) {					\
+          QSORT_TYPE *_hi; QSORT_TYPE *_lo;				\
+          _hold = *_trav;						\
+									\
+          for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo)		\
+            *_hi = *_lo;						\
+          *_hi = _hold;							\
+        }								\
+      }									\
+    }									\
+  }									\
+									\
+}
-- 
1.8.3.2




More information about the Babel-users mailing list