[med-svn] [Git][med-team/libatomic-queue][master] 4 commits: Shared library, adapt names to d-shlibs requirements

Sat Oct 24 05:56:11 BST 2020


Andreas Tille pushed to branch master at Debian Med / libatomic-queue


Commits:
129f78eb by Andreas Tille at 2020-10-24T06:51:29+02:00
Shared library, adapt names to d-shlibs requirements

- - - - -
6a543477 by Andreas Tille at 2020-10-24T06:52:37+02:00
Salsa-ci

- - - - -
f1c038f8 by Andreas Tille at 2020-10-24T06:53:22+02:00
New upstream version 0.0+git20201007.df79403
- - - - -
e07f4451 by Andreas Tille at 2020-10-24T06:53:54+02:00
Update upstream source from tag 'upstream/0.0+git20201007.df79403'

Update to upstream version '0.0+git20201007.df79403'
with Debian dir a305cbcbd2730d904cfbf086dbdcc64d98f41eb5
- - - - -


30 changed files:

- + .github/workflows/c-cpp.yml
- Makefile
- README.md
- debian/changelog
- debian/control
- + debian/patches/generate-shared-library.patch
- debian/patches/series
- debian/rules
- debian/salsa-ci.yml.ex → debian/salsa-ci.yml
- html/benchmarks.css
- html/benchmarks.html
- html/benchmarks.js
- include/atomic_queue/atomic_queue_mutex.h
- include/atomic_queue/defs.h
- include/atomic_queue/spinlock.h
- + meson.build
- + results/results-16.20200703T014231.txt
- + results/results-28.20200704T034247.txt
- scripts/latency_to_json.py
- scripts/scalability_to_json.py
- scripts/stats.py
- src/benchmarks.cc
- + src/benchmarks.h
- src/cpu_base_frequency.cc
- include/atomic_queue/cpu_base_frequency.h → src/cpu_base_frequency.h
- + src/example.cc
- src/huge_pages.cc
- include/atomic_queue/huge_pages.h → src/huge_pages.h
- include/atomic_queue/moodycamel.h → src/moodycamel.h
- src/tests.cc


Changes:

=====================================
.github/workflows/c-cpp.yml
=====================================
@@ -0,0 +1,21 @@
+name: C/C++ CI
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-18.04
+
+    steps:
+    - uses: actions/checkout at v2
+    - name: Environment variables
+      run: make env; make TOOLSET=gcc versions; make TOOLSET=clang versions
+    - name: Unit tests with gcc
+      run: make -rj2 TOOLSET=gcc run_tests
+    - name: Unit tests with clang
+      run: make -rj2 TOOLSET=clang run_tests


=====================================
Makefile
=====================================
@@ -34,8 +34,8 @@ cflags.gcc := -pthread -march=native -W{all,extra} -g -fmessage-length=0 ${cxxfl
 
 cxxflags.clang.debug := -O0 -fstack-protector-all
 cxxflags.clang.release := -O3 -mtune=native -ffast-math -falign-functions=64 -DNDEBUG
-cxxflags.clang := -stdlib=libc++ -pthread -march=native -std=gnu++14 -W{all,extra,error,no-{unused-function,unused-local-typedefs}} -g -fmessage-length=0 ${cxxflags.clang.${BUILD}}
-ldflags.clang := -stdlib=libc++ ${ldflags.clang.${BUILD}}
+cxxflags.clang := -stdlib=libstdc++ -pthread -march=native -std=gnu++14 -W{all,extra,error,no-{unused-variable,unused-function,unused-local-typedefs}} -g -fmessage-length=0 ${cxxflags.clang.${BUILD}}
+ldflags.clang := -stdlib=libstdc++ ${ldflags.clang.${BUILD}}
 
 # Additional CPPFLAGS, CXXFLAGS, CFLAGS, LDLIBS, LDFLAGS can come from the command line, e.g. make CPPFLAGS='-I<my-include-dir>', or from environment variables.
 # However, a clean build is required when changing the flags in the command line or in environment variables, this makefile doesn't detect such changes.
@@ -45,6 +45,13 @@ cppflags := ${CPPFLAGS} -Iinclude
 ldflags := -fuse-ld=gold -pthread -g ${ldflags.${TOOLSET}} ${LDFLAGS}
 ldlibs := -lrt ${LDLIBS}
 
+ifdef BOOST_ROOT_1_72_0 # E.g./opt/hostedtoolcache/boost/1.72.0/x64
+boost_unit_test_framework_inc := -I${BOOST_ROOT_1_72_0}
+boost_unit_test_framework_lib := -{L,'Wl,-rpath='}${BOOST_ROOT_1_72_0}/lib -lboost_unit_test_framework-mt-x64
+else
+boost_unit_test_framework_lib := -lboost_unit_test_framework
+endif
+
 cppflags.tbb :=
 ldlibs.tbb := {-L,'-Wl,-rpath='}/usr/local/lib -ltbb
 
@@ -69,21 +76,24 @@ all : ${exes}
 ${exes} : % : ${build_dir}/%
 	ln -sf ${<:${CURDIR}/%=%}
 
-${build_dir}/libatomic_queue.a : $(addprefix ${build_dir}/,cpu_base_frequency.o huge_pages.o)
--include ${build_dir}/cpu_base_frequency.d
--include ${build_dir}/huge_pages.d
-
+benchmarks_src := benchmarks.cc cpu_base_frequency.cc huge_pages.cc
 ${build_dir}/benchmarks : cppflags += ${cppflags.tbb} ${cppflags.moodycamel} ${cppflags.xenium}
 ${build_dir}/benchmarks : ldlibs += ${ldlibs.tbb} ${ldlibs.moodycamel} ${ldlibs.xenium} -ldl
-${build_dir}/benchmarks : ${build_dir}/benchmarks.o ${build_dir}/libatomic_queue.a Makefile | ${build_dir}
+${build_dir}/benchmarks : ${benchmarks_src:%.cc=${build_dir}/%.o} Makefile | ${build_dir}
 	$(strip ${LINK.EXE})
--include ${build_dir}/benchmarks.d
+-include ${benchmarks_src:%.cc=${build_dir}/%.d}
 
-${build_dir}/tests : cppflags += ${cppflags.moodycamel}
-${build_dir}/tests : ldlibs += ${ldlibs.moodycamel} -lboost_unit_test_framework
-${build_dir}/tests : ${build_dir}/tests.o Makefile | ${build_dir}
+tests_src := tests.cc
+${build_dir}/tests : cppflags += ${boost_unit_test_framework_inc} -DBOOST_TEST_DYN_LINK=1
+${build_dir}/tests : ldlibs += ${boost_unit_test_framework_lib}
+${build_dir}/tests : ${tests_src:%.cc=${build_dir}/%.o} Makefile | ${build_dir}
 	$(strip ${LINK.EXE})
--include ${build_dir}/tests.d
+-include ${tests_src:%.cc=${build_dir}/%.d}
+
+example_src := example.cc
+${build_dir}/example : ${example_src:%.cc=${build_dir}/%.o} Makefile | ${build_dir}
+	$(strip ${LINK.EXE})
+-include ${example_src:%.cc=${build_dir}/%.d}
 
 ${build_dir}/%.so : cxxflags += -fPIC
 ${build_dir}/%.so : Makefile | ${build_dir}
@@ -102,6 +112,10 @@ run_tests : ${build_dir}/tests
 	@echo "---- running $< ----"
 	$<
 
+run_% : ${build_dir}/%
+	@echo "---- running $< ----"
+	$<
+
 ${build_dir}/%.o : src/%.cc Makefile | ${build_dir}
 	$(strip ${COMPILE.CXX})
 
@@ -124,4 +138,11 @@ rtags : clean
 clean :
 	rm -rf ${build_dir} ${exes}
 
-.PHONY : rtags run_benchmarks clean all run_%
+env :
+	env | sort
+
+versions:
+	${MAKE} --version | head -n1
+	${CXX} --version | head -n1
+
+.PHONY : env versions rtags run_benchmarks clean all run_%


=====================================
README.md
=====================================
@@ -1,5 +1,7 @@
+[![C/C++ CI](https://github.com/max0x7ba/atomic_queue/workflows/C/C++%20CI/badge.svg)](https://github.com/max0x7ba/atomic_queue/actions?query=workflow%3A%22C%2FC%2B%2B+CI%22)
+
 # atomic_queue
-C++11 multiple-producer-multiple-consumer *lockless* queues based on circular buffer with [`std::atomic`][3].
+C++14 multiple-producer-multiple-consumer *lockless* queues based on circular buffer with [`std::atomic`][3].
 
 The main design principle these queues follow is _simplicity_: the bare minimum of atomic operations, fixed size buffer, value semantics.
 
@@ -75,6 +77,8 @@ The containers support the following APIs:
 * `was_full` - Returns `true` if the container was full during the call. The state may have changed by the time the return value is examined.
 * `capacity` - Returns the maximum number of elements the queue can possibly hold.
 
+See [example.cc](src/example.cc) for a usage example.
+
 TODO: full API reference.
 
 # Implementation Notes


=====================================
debian/changelog
=====================================
@@ -1,4 +1,4 @@
-libatomicqueue (0.0+git20200609.b6da4a9-1) UNRELEASED; urgency=medium
+libatomic-queue (0.0+git20200609.b6da4a9-1) UNRELEASED; urgency=medium
 
   * Initial release (Closes: #963829)
 


=====================================
debian/control
=====================================
@@ -1,4 +1,4 @@
-Source: libatomicqueue
+Source: libatomic-queue
 Priority: optional
 Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
 Uploaders: Steffen Moeller <moeller at debian.org>
@@ -13,14 +13,55 @@ Build-Depends: debhelper-compat (= 12),
 Standards-Version: 4.5.0
 Section: libs
 Homepage: https://github.com/max0x7ba/atomic_queue
-Vcs-Browser: https://salsa.debian.org/med-team/libatomicqueue
-Vcs-Git: https://salsa.debian.org/med-team/libatomicqueue.git
+Vcs-Browser: https://salsa.debian.org/med-team/libatomic-queue
+Vcs-Git: https://salsa.debian.org/med-team/libatomic-queue.git
 Rules-Requires-Root: no
 
-Package: libatomicqueue-dev
+Package: libatomic-queue0
+Architecture: any
+Section: libs
+Depends: ${shlibs:Depends},
+         ${misc:Depends}
+Description: C++ atomic_queue library
+ C++11 multiple-producer-multiple-consumer lockless queues based on
+ circular buffer with std::atomic.  The main design principle these
+ queues follow is simplicity: the bare minimum of atomic operations,
+ fixed size buffer, value semantics.
+ .
+ The circular buffer side-steps the memory reclamation problem inherent
+ in linked-list based queues for the price of fixed buffer size. See
+ Effective memory reclamation for lock-free data structures in C++
+ for more details.
+ .
+ These qualities are also limitations:
+ .
+  * The maximum queue size must be set at compile time or construction time.
+  * There are no OS-blocking push/pop functions.
+ .
+ Nevertheless, ultra-low-latency applications need just that and nothing
+ more. The simplicity pays off, see the throughput and latency benchmarks.
+ .
+ Available containers are:
+ .
+  * AtomicQueue - a fixed size ring-buffer for atomic elements.
+  * OptimistAtomicQueue - a faster fixed size ring-buffer for atomic
+    elements which busy-waits when empty or full.
+  * AtomicQueue2 - a fixed size ring-buffer for non-atomic elements.
+  * OptimistAtomicQueue2 - a faster fixed size ring-buffer for non-atomic
+    elements which busy-waits when empty or full.
+ .
+ These containers have corresponding AtomicQueueB, OptimistAtomicQueueB,
+ AtomicQueueB2, OptimistAtomicQueueB2 versions where the buffer size is
+ specified as an argument to the constructor.
+ .
+ This package contains the dynamic library.
+
+Package: libatomic-queue-dev
 Section: libdevel
-Architecture: all
-Depends: libatomicqueueBROKEN (= ${binary:Version}), ${misc:Depends}
+Architecture: any
+Depends: libatomicqueue0 (= ${binary:Version}),
+         ${shlibs:Depends},
+         ${misc:Depends}
 Description: devel files for C++ atomic_queue library
  C++11 multiple-producer-multiple-consumer lockless queues based on
  circular buffer with std::atomic.  The main design principle these
@@ -52,4 +93,6 @@ Description: devel files for C++ atomic_queue library
  These containers have corresponding AtomicQueueB, OptimistAtomicQueueB,
  AtomicQueueB2, OptimistAtomicQueueB2 versions where the buffer size is
  specified as an argument to the constructor.
+ .
+ This package contains the header files and static library.
 


=====================================
debian/patches/generate-shared-library.patch
=====================================
@@ -0,0 +1,86 @@
+Author: Nilesh Patra <npatra974 at gmail.com>,
+        Andreas Tille <tille at debian.org>
+Last-Update: Fri, 23 Oct 2020 22:10:01 +0200
+Description: Fix unused variable
+
+--- a/Makefile
++++ b/Makefile
+@@ -10,6 +10,7 @@ BUILD := release
+ 
+ TOOLSET := gcc
+ build_dir := ${CURDIR}/build/${BUILD}/${TOOLSET}
++build_dir_shared := ${CURDIR}/build_shared/${BUILD}/${TOOLSET}
+ 
+ cxx.gcc := g++
+ cc.gcc := gcc
+@@ -54,25 +55,30 @@ ldlibs.moodycamel :=
+ cppflags.xenium := -I${abspath ../xenium}
+ ldlibs.xenium :=
+ 
++SOVERSION := 0
+ COMPILE.CXX = ${CXX} -o $@ -c ${cppflags} ${cxxflags} -MD -MP $(abspath $<)
+ COMPILE.S = ${CXX} -o- -S -masm=intel ${cppflags} ${cxxflags} $(abspath $<) | c++filt | egrep -v '^[[:space:]]*\.(loc|cfi|L[A-Z])' > $@
+ PREPROCESS.CXX = ${CXX} -o $@ -E ${cppflags} ${cxxflags} $(abspath $<)
+ COMPILE.C = ${CC} -o $@ -c ${cppflags} ${cflags} -MD -MP $(abspath $<)
+ LINK.EXE = ${LD} -o $@ $(ldflags) $(filter-out Makefile,$^) $(ldlibs)
+-LINK.SO = ${LD} -o $@ -shared $(ldflags) $(filter-out Makefile,$^) $(ldlibs)
++LINK.SO = ${LD} -o $@.$(SOVERSION) -shared $(ldflags) $(filter-out Makefile,$^) $(ldlibs)
+ LINK.A = ${AR} rscT $@ $(filter-out Makefile,$^)
+ 
+ exes := benchmarks tests
+ 
+ all : ${exes}
+ 
+-${exes} : % : ${build_dir}/%
++${exes} : % : ${build_dir}/% ${build_dir_shared}/libatomic_queue.so
+ 	ln -sf ${<:${CURDIR}/%=%}
+ 
+ ${build_dir}/libatomic_queue.a : $(addprefix ${build_dir}/,cpu_base_frequency.o huge_pages.o)
+ -include ${build_dir}/cpu_base_frequency.d
+ -include ${build_dir}/huge_pages.d
+ 
++${build_dir_shared}/libatomic_queue.so : $(addprefix ${build_dir_shared}/,cpu_base_frequency.o huge_pages.o)
++-include ${build_dir_shared}/cpu_base_frequency.d
++-include ${build_dir_shared}/huge_pages.d
++
+ ${build_dir}/benchmarks : cppflags += ${cppflags.tbb} ${cppflags.moodycamel} ${cppflags.xenium}
+ ${build_dir}/benchmarks : ldlibs += ${ldlibs.tbb} ${ldlibs.moodycamel} ${ldlibs.xenium} -ldl
+ ${build_dir}/benchmarks : ${build_dir}/benchmarks.o ${build_dir}/libatomic_queue.a Makefile | ${build_dir}
+@@ -85,9 +91,10 @@ ${build_dir}/tests : ${build_dir}/tests.
+ 	$(strip ${LINK.EXE})
+ -include ${build_dir}/tests.d
+ 
+-${build_dir}/%.so : cxxflags += -fPIC
+-${build_dir}/%.so : Makefile | ${build_dir}
+-	$(strip ${LINK.SO})
++${build_dir_shared}/%.so : cxxflags += -fPIC
++${build_dir_shared}/%.so : Makefile | ${build_dir}
++	${LINK.SO}
++	ln -s $@.$(SOVERSION) $@
+ 
+ ${build_dir}/%.a : Makefile | ${build_dir}
+ 	$(strip ${LINK.A})
+@@ -108,6 +115,13 @@ ${build_dir}/%.o : src/%.cc Makefile | $
+ ${build_dir}/%.o : src/%.c Makefile | ${build_dir}
+ 	$(strip ${COMPILE.C})
+ 
++${build_dir_shared}/%.o : src/%.cc Makefile | ${build_dir_shared}
++	$(strip ${COMPILE.CXX})
++
++${build_dir_shared}/%.o : src/%.c Makefile | ${build_dir_shared}
++	$(strip ${COMPILE.C})
++
++
+ %.S : cppflags += ${cppflags.tbb} ${cppflags.moodycamel} ${cppflags.xenium}
+ %.S : src/%.cc Makefile | ${build_dir}
+ 	$(strip ${COMPILE.S})
+@@ -118,6 +132,9 @@ ${build_dir}/%.o : src/%.c Makefile | ${
+ ${build_dir} :
+ 	mkdir -p $@
+ 
++${build_dir_shared} :
++	mkdir -p $@
++
+ rtags : clean
+ 	${MAKE} -nk | rc -c -; true
+ 


=====================================
debian/patches/series
=====================================
@@ -1 +1,2 @@
 fix_unused_variable.patch
+generate-shared-library.patch


=====================================
debian/rules
=====================================
@@ -24,6 +24,6 @@ override_dh_install:
 		    --exclude-la \
 		    --movedev debian/tmp/usr/include/* usr/include \
 		    --movedev "debian/tmp/usr/lib/*/pkgconfig/*.pc" usr/lib/$(DEB_HOST_MULTIARCH)/pkgconfig \
-		    debian/tmp/usr/lib/*/*.so
+		    build_shared/release/gcc/libatomic_queue.so
 	find debian -name lib*.la -delete
 


=====================================
debian/salsa-ci.yml.ex → debian/salsa-ci.yml
=====================================
@@ -1,10 +1,3 @@
-# For more information on what jobs are run see:
-# https://salsa.debian.org/salsa-ci-team/pipeline
-#
-# To enable the jobs, go to your repository (at salsa.debian.org)
-# and click over Settings > CI/CD > Expand (in General pipelines).
-# In "Custom CI config path" write debian/salsa-ci.yml and click
-# in "Save Changes". The CI tests will run after the next commit.
 ---
 include:
   - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml


=====================================
html/benchmarks.css
=====================================
@@ -23,3 +23,12 @@ p, li {
     color: #A0A0A0;
     margin-left: 20px;
 }
+
+span.tooltip_scalability_title {
+    font-weight: bold;
+    font-size: 1.2em;
+}
+
+table.tooltip_scalability {
+    text-align: right;
+}


=====================================
html/benchmarks.html
=====================================
@@ -15,6 +15,7 @@
     <link rel="stylesheet" href="benchmarks.css">
     <script src="https://code.jquery.com/jquery-3.4.1.slim.min.js" integrity="sha256-pasqAKBDmFT4eHoN2ndd6lN370kFiGUFyTiUHWhU7k8=" crossorigin="anonymous"></script>
     <script src="https://code.highcharts.com/highcharts.js"></script>
+    <script src="https://code.highcharts.com/highcharts-more.js"></script>
     <script src="https://code.highcharts.com/modules/pattern-fill.js"></script>
     <script src="theme.js"></script>
     <script src="benchmarks.js"></script>
@@ -33,9 +34,9 @@
     <h2>Systems details</h2>
     <h3>Intel i9-9900KS system</h3>
     <ul>
-      <li>OS: Ubuntu-18.04.3 LTS
-      <li>Compiler: gcc-8.3.0
-      <li>atomic_queue version: commit de6a8cfd8ba523fb949391183d018357265eaefe
+      <li>OS: Ubuntu-18.04.4 LTS
+      <li>Compiler: gcc-8.4.0
+      <li>atomic_queue version: commit 7e138d21fcd4bad95e030d8d6c8b77d5a4538baa
       <li>Boost version: 1.65.1
       <li>TBB version: 2019_U7, commit 4233fef583b4f8cbf9f781311717600feaaa0694
       <li>moodycamel concurrentqueue version: commit dea078cf5b6e742cd67a0d725e36f872feca4de4
@@ -45,9 +46,9 @@
     <h3>Intel Xeon Gold 6132 system</h3>
     <ul>
       <li>OS: Red Hat Enterprise Linux Server release 6.10 (Santiago)
-      <li>Compiler: gcc-5.4.0
-      <li>atomic_queue version: commit de6a8cfd8ba523fb949391183d018357265eaefe
-      <li>Boost version: 1.61.0
+      <li>Compiler: gcc-8.4.0
+      <li>atomic_queue version: commit 7e138d21fcd4bad95e030d8d6c8b77d5a4538baa
+      <li>Boost version: 1.65.1
       <li>TBB version: 2019_U7, commit 4233fef583b4f8cbf9f781311717600feaaa0694
       <li>moodycamel concurrentqueue version: commit dea078cf5b6e742cd67a0d725e36f872feca4de4
       <li>moodycamel readerwriterqueue version: commit 2ae710de996a1d02bbc7696b2cdff2c6078e76f8


=====================================
html/benchmarks.js
=====================================
@@ -35,49 +35,59 @@ $(function() {
            "OptimistAtomicQueueB2": ['#FFBFBF', 18]
     };
 
-    function scalability_to_series(results) {
-        return Array.from(Object.entries(results)).map(entry => {
-            const name = entry[0];
-            const s = settings[name];
-            return {
-                name: name,
-                color: s[0],
-                index: s[1],
-                data: Array.from(Object.entries(entry[1])).map(xy => { return [parseInt(xy[0]), xy[1]]; })
-            };
-        });
-    }
+    function plot_scalability(div_id, results, title_suffix, max_lin, max_log) {
+        const modes = [
+            {type: 'linear', title: { text: 'throughput, msg/sec (linear scale)'}, max: max_lin, min: 0 },
+            {type: 'logarithmic', title: { text: 'throughput, msg/sec (logarithmic scale)'}, max: max_log, min: 100e3},
+        ];
+        let mode = 0;
 
-    function latency_to_series(results) {
-        const series = Array.from(Object.entries(results)).map(entry => {
-            const name = entry[0];
-            const value = entry[1];
+        const series = Object.entries(results).map(entry => {
+            const [name, stats] = entry;
             const s = settings[name];
             return {
                 name: name,
                 color: s[0],
                 index: s[1],
-                data: [{y: Math.round(value * 1e9), x: s[1]}]
-            };
-        });
-        series.sort((a, b) => { return a.index - b.index; });
-        series.forEach((element, index) => {
-            element.index = index;
-            element.data[0].x = index;
+                type: "column",
+                data: stats.map(a => [a[0], a[3]]),
+                atomic_queue_stats: stats
+            }
         });
-        const categories = series.map(s => { return s.name; });
-        return [series, categories];
-    }
 
-    function plot_scalability(div_id, series, title_suffix, max_lin, max_log) {
-        const modes = [
-            {type: 'linear', title: { text: 'throughput, msg/sec (linear scale)'}, max: max_lin, min: 0 },
-            {type: 'logarithmic', title: { text: 'throughput, msg/sec (logarithmic scale)'}, max: max_log, min: 100e3},
-        ];
-        let mode = 0;
+        const tooltips = []; // Build a tooltip once and then reuse it..
+        const tooltip_formatter = function() {
+            const threads = this.x;
+            let tooltip = tooltips[threads];
+            if(!tooltip) {
+                const data = [];
+                for(const p of this.points) {
+                    const stats = p.series.options.atomic_queue_stats[p.point.index];
+                    data[p.series.options.index] = {
+                        name: p.series.name,
+                        color: p.series.color,
+                        min: Highcharts.numberFormat(stats[1], 0),
+                        max: Highcharts.numberFormat(stats[2], 0),
+                        mean: Highcharts.numberFormat(stats[3], 0),
+                        stdev: Highcharts.numberFormat(stats[4], 0)
+                    };
+                }
+
+                let html = `<span class="tooltip_scalability_title">${threads} producers, ${threads} consumers</span>`;
+                html += '<table class="tooltip_scalability"><tr><th></th><th>mean</th><th>stdev</th><th>min</th><th>max</th></tr>';
+                for(const d of data)
+                    if(d)
+                        html += `<tr><td style="color: ${d.color}">${d.name}: </td><td><strong>${d.mean}</strong></td><td><strong>${d.stdev}</strong></td><td>${d.min}</td><td>${d.max}</td></tr>`;
+                html += '</table>';
+
+                tooltip = html;
+                tooltips[threads] = tooltip;
+            }
+            return tooltip;
+        }
+
         const chart = Highcharts.chart(div_id, {
             chart: {
-                type: 'column',
                 events: {
                     click: function() {
                         mode ^= 1;
@@ -94,40 +104,65 @@ $(function() {
             yAxis: modes[mode],
             tooltip: {
                 followPointer: true,
-                useHTML: true,
                 shared: true,
-                headerFormat: '<span style="font-weight: bold; font-size: 1.2em;">{point.key} producers, {point.key} consumers</span><table>',
-                pointFormat: '<tr><td style="color: {series.color}">{series.name}: </td>' +'<td style="text-align: right"><b>{point.y} msg/sec</b></td></tr>',
-                footerFormat: '</table>'
+                useHTML: true,
+                formatter: tooltip_formatter
             },
             series: series
         });
     }
 
-    function plot_latency(div_id, series_categories, title_suffix) {
-        const [series, categories] = series_categories;
+    function plot_latency(div_id, results, title_suffix) {
+        const series = Object.entries(results).map(entry => {
+            const [name, stats] = entry;
+            const s = settings[name];
+            return {
+                name: name,
+                color: s[0],
+                index: s[1],
+                type: 'bar',
+                data: [[s[1], stats[2]]],
+                atomic_queue_stats: stats
+            };
+        });
+        series.sort((a, b) => { return a.index - b.index; });
+        const categories = series.map(s => { return s.name; });
+
+        const tooltip_formatter = function() {
+            const stats = this.series.options.atomic_queue_stats;
+            const min = Highcharts.numberFormat(stats[0], 0);
+            const max = Highcharts.numberFormat(stats[1], 0);
+            const mean = Highcharts.numberFormat(stats[2], 0);
+            const stdev = Highcharts.numberFormat(stats[3], 0);
+            return `<strong>mean: ${mean} stdev: ${stdev}</strong> min: ${min} max: ${max}<br/>`;
+        };
+
         Highcharts.chart(div_id, {
-            chart: { type: 'bar' },
             plotOptions: {
                 series: { stacking: 'normal'},
                 bar: { dataLabels: { enabled: true, align: 'left', inside: false } }
             },
             title: { text: 'Latency on ' + title_suffix },
             xAxis: { categories: categories },
-            yAxis: { title: { text: 'latency, nanoseconds/round-trip' }, max: 800 },
-            tooltip: { valueSuffix: ' nanoseconds' },
+            yAxis: {
+                title: { text: 'latency, nanoseconds/round-trip' },
+                max: 1000,
+                tickInterval: 100
+            },
+            tooltip: {
+                useHTML: true,
+                formatter: tooltip_formatter
+            },
             series: series
         });
     }
 
-    // TODO: load these from files.
-    const scalability_9900KS = {"AtomicQueue":{"1":301561447.0,"2":12520152.0,"3":10914287.0,"4":8268873.0,"5":8181077.0,"6":8001280.0,"7":8030067.0,"8":7519277.0},"AtomicQueue2":{"1":24652882.0,"2":12127949.0,"3":10186299.0,"4":8133831.0,"5":8111845.0,"6":7942380.0,"7":7965498.0,"8":7427562.0},"AtomicQueueB":{"1":230791271.0,"2":12100863.0,"3":11116064.0,"4":8221578.0,"5":7939528.0,"6":7621657.0,"7":7785393.0,"8":7225373.0},"AtomicQueueB2":{"1":56143379.0,"2":11262466.0,"3":9577584.0,"4":7849112.0,"5":7735283.0,"6":7447186.0,"7":7679252.0,"8":7120325.0},"OptimistAtomicQueue":{"1":827243100.0,"2":32793965.0,"3":37561910.0,"4":39464775.0,"5":48711199.0,"6":50577426.0,"7":53205853.0,"8":50617291.0},"OptimistAtomicQueue2":{"1":683436723.0,"2":29709035.0,"3":33415151.0,"4":35574988.0,"5":43162677.0,"6":49628942.0,"7":51881631.0,"8":49984492.0},"OptimistAtomicQueueB":{"1":804033352.0,"2":32454906.0,"3":37045586.0,"4":38788935.0,"5":47479405.0,"6":49224042.0,"7":51769480.0,"8":49263631.0},"OptimistAtomicQueueB2":{"1":159673423.0,"2":27941976.0,"3":31948919.0,"4":31869639.0,"5":38049134.0,"6":41435707.0,"7":44304810.0,"8":46014506.0},"boost::lockfree::queue":{"1":8260647.0,"2":7800968.0,"3":7603347.0,"4":7097095.0,"5":6872844.0,"6":6387388.0,"7":6170695.0,"8":5834230.0},"boost::lockfree::spsc_queue":{"1":79092145.0,"2":null,"3":null,"4":null,"5":null,"6":null,"7":null,"8":null},"moodycamel::ConcurrentQueue":{"1":23189568.0,"2":15080986.0,"3":13951343.0,"4":14923133.0,"5":19016106.0,"6":19893649.0,"7":20708687.0,"8":20938407.0},"moodycamel::ReaderWriterQueue":{"1":295716975.0,"2":null,"3":null,"4":null,"5":null,"6":null,"7":null,"8":null},"pthread_spinlock":{"1":27229004.0,"2":16192208.0,"3":12181215.0,"4":9094456.0,"5":10571006.0,"6":9315636.0,"7":8583159.0,"8":7597199.0},"std::mutex":{"1":8037432.0,"2":6475278.0,"3":6503282.0,"4":6821664.0,"5":6622013.0,"6":6461594.0,"7":6278018.0,"8":6068950.0},"tbb::concurrent_bounded_queue":{"1":14800295.0,"2":15217234.0,"3":12782825.0,"4":10685498.0,"5":10198063.0,"6":9517751.0,"7":8691071.0,"8":8002795.0},"tbb::spin_mutex":{"1":41437276.0,"2":21828238.0,"3":11612637.0,"4":6693671.0,"5":6049098.0,"6":5428307.0,"7":4878552.0,"8":4244203.0},"xenium::michael_scott_queue":{"1":10215151.0,"2":8700295.0,"3":7804470.0,"4":6570763.0,"5":6836260.0,"6":7109682.0,"7":6705169.0,"8":6300559.0},"xenium::ramalhete_queue":{"1":33427748.0,"2":24557805.0,"3":29519283.0,"4":34093641.0,"5":38783113.0,"6":40289756.0,"7":42287643.0,"8":44722686.0},"xenium::vyukov_bounded_queue":{"1":122621777.0,"2":29341966.0,"3":16399996.0,"4":12759154.0,"5":11548899.0,"6":12816029.0,"7":10425842.0,"8":8568559.0}};
-    const scalability_xeon_gold_6132 = {"AtomicQueue":{"1":158109112.0,"2":4921854.0,"3":3498735.0,"4":2896774.0,"5":2416926.0,"6":2046932.0,"7":1773634.0,"8":1645924.0,"9":1457036.0,"10":1322161.0,"11":1186336.0,"12":1072455.0,"13":930567.0,"14":931606.0},"AtomicQueue2":{"1":130966968.0,"2":4620760.0,"3":3305710.0,"4":2787070.0,"5":2364350.0,"6":1972774.0,"7":1816863.0,"8":1715741.0,"9":1543989.0,"10":1362488.0,"11":1200436.0,"12":1066522.0,"13":956885.0,"14":883559.0},"AtomicQueueB":{"1":150200425.0,"2":4731025.0,"3":3368096.0,"4":2829384.0,"5":2408528.0,"6":1979764.0,"7":1855659.0,"8":1707383.0,"9":1467147.0,"10":1362266.0,"11":1257940.0,"12":1118451.0,"13":986849.0,"14":911597.0},"AtomicQueueB2":{"1":30885730.0,"2":4940112.0,"3":3295637.0,"4":2695437.0,"5":2257248.0,"6":2044260.0,"7":1831373.0,"8":1714119.0,"9":1446334.0,"10":1345247.0,"11":1146609.0,"12":1102961.0,"13":951675.0,"14":946796.0},"OptimistAtomicQueue":{"1":615462112.0,"2":12588449.0,"3":13517952.0,"4":14099926.0,"5":14555742.0,"6":14477634.0,"7":14589043.0,"8":11942734.0,"9":12318122.0,"10":11652615.0,"11":11276576.0,"12":11790362.0,"13":11616924.0,"14":11580480.0},"OptimistAtomicQueue2":{"1":285701790.0,"2":11464345.0,"3":12643790.0,"4":13373738.0,"5":13587917.0,"6":13787959.0,"7":14214689.0,"8":11068029.0,"9":11508394.0,"10":10943725.0,"11":10735351.0,"12":10831674.0,"13":10856099.0,"14":11070676.0},"OptimistAtomicQueueB":{"1":392396088.0,"2":12772847.0,"3":13333742.0,"4":13799277.0,"5":14338043.0,"6":14249719.0,"7":14319209.0,"8":12205595.0,"9":11696373.0,"10":11075294.0,"11":11768276.0,"12":11481230.0,"13":11334782.0,"14":11157997.0},"OptimistAtomicQueueB2":{"1":52277970.0,"2":11010593.0,"3":11902777.0,"4":12363497.0,"5":12904686.0,"6":13074313.0,"7":13206227.0,"8":10537499.0,"9":10484867.0,"10":10087570.0,"11":10107976.0,"12":9929433.0,"13":10750117.0,"14":10061327.0},"boost::lockfree::queue":{"1":3509287.0,"2":2691360.0,"3":2524041.0,"4":2279338.0,"5":2090858.0,"6":1923587.0,"7":1794532.0,"8":1295226.0,"9":1214404.0,"10":1030892.0,"11":948879.0,"12":894742.0,"13":768881.0,"14":782735.0},"boost::lockfree::spsc_queue":{"1":192419130.0,"2":null,"3":null,"4":null,"5":null,"6":null,"7":null,"8":null,"9":null,"10":null,"11":null,"12":null,"13":null,"14":null},"moodycamel::ConcurrentQueue":{"1":11324231.0,"2":6256475.0,"3":6277392.0,"4":6300071.0,"5":5622547.0,"6":5854465.0,"7":5134036.0,"8":3802947.0,"9":3549189.0,"10":3286559.0,"11":3416412.0,"12":3376207.0,"13":3319388.0,"14":3502120.0},"moodycamel::ReaderWriterQueue":{"1":275435749.0,"2":null,"3":null,"4":null,"5":null,"6":null,"7":null,"8":null,"9":null,"10":null,"11":null,"12":null,"13":null,"14":null},"pthread_spinlock":{"1":9636407.0,"2":4638371.0,"3":3549542.0,"4":2780490.0,"5":2484911.0,"6":2042073.0,"7":1893618.0,"8":1317140.0,"9":1074015.0,"10":934007.0,"11":912801.0,"12":852631.0,"13":827944.0,"14":823481.0},"tbb::concurrent_bounded_queue":{"1":6767479.0,"2":5453622.0,"3":4145085.0,"4":3564610.0,"5":3010331.0,"6":2587858.0,"7":2440643.0,"8":2068666.0,"9":2058159.0,"10":1739814.0,"11":1378381.0,"12":1234436.0,"13":1122814.0,"14":1015363.0},"tbb::spin_mutex":{"1":20199929.0,"2":11734715.0,"3":7460630.0,"4":5116921.0,"5":4793972.0,"6":3313624.0,"7":2245725.0,"8":1473631.0,"9":943642.0,"10":757081.0,"11":575810.0,"12":492764.0,"13":486487.0,"14":424400.0},"std::mutex":{"1":6310569.0,"2":4409320.0,"3":3131619.0,"4":3732458.0,"5":3944050.0,"6":4865090.0,"7":4817517.0,"8":3029023.0,"9":2494417.0,"10":2248690.0,"11":2079053.0,"12":2021206.0,"13":2019624.0,"14":2012028.0},"xenium::michael_scott_queue":{"1":4348585.0,"2":2801508.0,"3":2362747.0,"4":1948754.0,"5":1688837.0,"6":1646305.0,"7":1489044.0,"8":1269747.0,"9":1051668.0,"10":901028.0,"11":875082.0,"12":716284.0,"13":652457.0,"14":656109.0},"xenium::ramalhete_queue":{"1":15907512.0,"2":9859400.0,"3":11436032.0,"4":11298981.0,"5":11587121.0,"6":11508362.0,"7":11875851.0,"8":8625635.0,"9":8212694.0,"10":8154679.0,"11":8243399.0,"12":8866873.0,"13":8545991.0,"14":9573085.0},"xenium::vyukov_bounded_queue":{"1":21778808.0,"2":7204578.0,"3":4971188.0,"4":4092418.0,"5":3173930.0,"6":3066766.0,"7":2732360.0,"8":2133157.0,"9":2032751.0,"10":1777709.0,"11":1529543.0,"12":1323655.0,"13":1143511.0,"14":1172102.0}};
-    const latency_9900KS = {"AtomicQueue":0.000000159,"AtomicQueue2":0.000000172,"AtomicQueueB":0.000000167,"AtomicQueueB2":0.000000177,"OptimistAtomicQueue":0.000000144,"OptimistAtomicQueue2":0.000000167,"OptimistAtomicQueueB":0.00000014,"OptimistAtomicQueueB2":0.000000147,"boost::lockfree::queue":0.000000311,"boost::lockfree::spsc_queue":0.000000127,"moodycamel::ConcurrentQueue":0.000000225,"moodycamel::ReaderWriterQueue":0.000000109,"pthread_spinlock":0.00000024,"std::mutex":0.00000043,"tbb::concurrent_bounded_queue":0.000000268,"tbb::spin_mutex":0.000000227,"xenium::michael_scott_queue":0.00000036,"xenium::ramalhete_queue":0.000000253,"xenium::vyukov_bounded_queue":0.000000185};
-    const latency_xeon_gold_6132 = {"AtomicQueue":0.000000233,"AtomicQueue2":0.000000309,"AtomicQueueB":0.000000333,"AtomicQueueB2":0.000000387,"OptimistAtomicQueue":0.000000284,"OptimistAtomicQueue2":0.000000326,"OptimistAtomicQueueB":0.000000324,"OptimistAtomicQueueB2":0.00000035,"boost::lockfree::queue":0.000000695,"boost::lockfree::spsc_queue":0.000000256,"moodycamel::ConcurrentQueue":0.000000393,"moodycamel::ReaderWriterQueue":0.00000022,"pthread_spinlock":0.000000649,"tbb::concurrent_bounded_queue":0.000000593,"tbb::spin_mutex":0.000000515,"std::mutex":0.000001867,"xenium::michael_scott_queue":0.000000752,"xenium::ramalhete_queue":0.000000501,"xenium::vyukov_bounded_queue":0.000000427};
-
-    plot_scalability('scalability-9900KS-5GHz', scalability_to_series(scalability_9900KS), "Intel i9-9900KS (core 5GHz / uncore 4.7GHz)", 60e6, 1000e6);
-    plot_scalability('scalability-xeon-gold-6132', scalability_to_series(scalability_xeon_gold_6132), "Intel Xeon Gold 6132 (stock)", 15e6, 300e6);
-    plot_latency('latency-9900KS-5GHz', latency_to_series(latency_9900KS), "Intel i9-9900KS (core 5GHz / uncore 4.7GHz)");
-    plot_latency('latency-xeon-gold-6132', latency_to_series(latency_xeon_gold_6132), "Intel Xeon Gold 6132 (stock)");
+    const scalability_9900KS = {"AtomicQueue": [[1, 52660493, 286258811, 74231130, 46923128], [2, 11670323, 12511844, 12011858, 270810], [3, 9791407, 10870735, 10354387, 423144], [4, 8124141, 8262334, 8192020, 23767], [5, 7882302, 8164594, 8058345, 45565], [6, 7536832, 7993441, 7709403, 113618], [7, 7011413, 8020563, 7552220, 427030], [8, 6291117, 7515622, 6885968, 545237]], "AtomicQueue2": [[1, 22787102, 61696929, 23153888, 2262406], [2, 11251529, 12267302, 11657086, 212493], [3, 9250720, 10001213, 9472512, 131865], [4, 7958528, 8157226, 8055508, 33266], [5, 7784153, 8097440, 7972636, 61800], [6, 7450035, 7952026, 7641924, 130961], [7, 7005546, 7995642, 7509325, 381599], [8, 6349759, 7441272, 6854003, 471089]], "AtomicQueueB": [[1, 42613077, 228034973, 48968374, 17271281], [2, 11307287, 12122517, 11654762, 192294], [3, 9978460, 11117123, 10580691, 418664], [4, 7820303, 8149391, 8038875, 49723], [5, 7393617, 7922868, 7706848, 116543], [6, 7044646, 7623977, 7432887, 119697], [7, 6771050, 7812016, 7300722, 426304], [8, 6167485, 7214447, 6685564, 449080]], "AtomicQueueB2": [[1, 31747483, 44550020, 34684489, 1949026], [2, 11004660, 11624801, 11264944, 159388], [3, 9311302, 9898647, 9585552, 81750], [4, 7583514, 8026821, 7885529, 68419], [5, 7318917, 7806120, 7600268, 122098], [6, 7004711, 7518179, 7348211, 105453], [7, 6760542, 7775829, 7294366, 408721], [8, 6203358, 7175857, 6682430, 396215]], "OptimistAtomicQueue": [[1, 487380322, 829842979, 661556071, 100346674], [2, 31797501, 32761745, 32437895, 262498], [3, 36537452, 37548890, 37008138, 364848], [4, 39195547, 39453579, 39332552, 57506], [5, 37390896, 48677211, 44454166, 2490283], [6, 41443858, 50559092, 46326029, 3930139], [7, 43825547, 53156863, 48061575, 3621601], [8, 46177415, 50602252, 47828080, 1452954]], "OptimistAtomicQueue2": [[1, 25703634, 682547965, 230538256, 211766068], [2, 21661800, 29516399, 24851671, 1493004], [3, 29291342, 33834235, 30273240, 524342], [4, 32920458, 36241653, 33343018, 441670], [5, 36830993, 43357072, 38976054, 1862089], [6, 39747081, 49741386, 44704047, 4504426], [7, 42479711, 51839802, 46362844, 3648632], [8, 43732450, 49877392, 46347786, 2371894]], "OptimistAtomicQueueB": [[1, 75661057, 738447042, 124305321, 83621261], [2, 31477141, 32474220, 32144227, 176354], [3, 36019269, 37037279, 36563374, 322208], [4, 38357209, 38905937, 38647013, 72549], [5, 36246828, 47608460, 43165102, 2491292], [6, 39494986, 49368578, 44976208, 4044505], [7, 41252863, 51655899, 46076590, 4108616], [8, 43899112, 49215349, 46213653, 1857294]], "OptimistAtomicQueueB2": [[1, 31441458, 495211858, 59246349, 27593701], [2, 21826376, 29825513, 26058597, 2081213], [3, 28756903, 34057706, 29794288, 839909], [4, 31084544, 33672715, 32858135, 485076], [5, 33366524, 40347303, 36955446, 2416293], [6, 36837801, 42786274, 39860539, 2457925], [7, 39946444, 45751323, 42359860, 2112179], [8, 41740252, 46736438, 43950268, 1704291]], "boost::lockfree::queue": [[1, 6746684, 8277185, 7092878, 418709], [2, 7312023, 7803259, 7553075, 87733], [3, 7263517, 7648842, 7476500, 91860], [4, 6359882, 7098293, 6610597, 192715], [5, 6367348, 6773852, 6457372, 46054], [6, 5927503, 6298061, 6055700, 68494], [7, 5746691, 6154693, 5964947, 83543], [8, 5331463, 5801836, 5535251, 89204]], "boost::lockfree::spsc_queue": [[1, 64923339, 78317500, 69086959, 2160846]], "moodycamel::ConcurrentQueue": [[1, 20190901, 29453011, 24985741, 1594915], [2, 14337151, 52431952, 16261043, 4078346], [3, 15291705, 43648056, 17046353, 4143492], [4, 15736506, 45837232, 18228886, 5125409], [5, 16888207, 47841058, 19245549, 5379950], [6, 16998837, 63384866, 20186438, 6382091], [7, 17716036, 66347129, 21038132, 6921929], [8, 17924728, 64375322, 22382013, 8285161]], "moodycamel::ReaderWriterQueue": [[1, 43356419, 538733018, 256503633, 185340411]], "pthread_spinlock": [[1, 23507277, 29932694, 27413691, 1797342], [2, 14270085, 18312194, 16382070, 769144], [3, 8211868, 12289865, 10189163, 1848412], [4, 6395961, 9383867, 7773828, 1275888], [5, 8442872, 10466994, 9009726, 423856], [6, 8112952, 9328919, 8527056, 234738], [7, 7189956, 8492547, 7685023, 190137], [8, 6576974, 7596251, 6917365, 230403]], "std::mutex": [[1, 5006882, 9199394, 6838493, 652022], [2, 4687459, 6598427, 5749404, 387982], [3, 4580302, 6900299, 5685428, 464037], [4, 4941923, 7100935, 6086683, 325998], [5, 5151696, 6739344, 5986755, 186929], [6, 5521016, 6571707, 5918632, 116062], [7, 5532592, 6378700, 5826170, 88618], [8, 5438188, 6181434, 5704761, 76268]], "tbb::concurrent_bounded_queue": [[1, 10925661, 14807665, 13187267, 1088087], [2, 12352037, 15166768, 13521906, 612838], [3, 11099805, 12535211, 11630738, 279433], [4, 9929811, 10656023, 10303443, 177287], [5, 9349138, 10217187, 9704186, 183365], [6, 8548656, 9516659, 8863967, 196987], [7, 7358384, 8693321, 7958661, 218257], [8, 6615544, 8013655, 7136724, 350688]], "tbb::spin_mutex": [[1, 32588344, 41937261, 36432718, 2291145], [2, 17753221, 21806602, 19845873, 1357076], [3, 7201937, 11563566, 9346899, 1335282], [4, 2900531, 6495310, 4753237, 1579671], [5, 5103017, 5929302, 5552236, 189032], [6, 4254932, 5441256, 4834876, 480630], [7, 4223732, 4907625, 4560981, 246626], [8, 3338874, 4286720, 4138009, 129870]], "xenium::michael_scott_queue": [[1, 8417342, 10161353, 9493893, 327033], [2, 8230532, 8706024, 8488596, 76740], [3, 7071683, 7702336, 7404448, 172642], [4, 6177715, 6500382, 6329812, 50090], [5, 6227656, 6844074, 6487028, 190493], [6, 6408222, 7118668, 6666732, 183381], [7, 6220683, 6728490, 6410011, 115700], [8, 5906991, 6324097, 6072896, 89071]], "xenium::ramalhete_queue": [[1, 26889784, 33285933, 31963600, 729718], [2, 22883173, 24719839, 23562698, 341416], [3, 28121330, 29464259, 28838631, 366336], [4, 33312793, 34047588, 33650956, 184508], [5, 31808107, 38717573, 34327553, 2297341], [6, 33560480, 40481895, 36597565, 2593281], [7, 34734954, 42470849, 38204151, 3109357], [8, 35105293, 44944634, 39750343, 4246943]], "xenium::vyukov_bounded_queue": [[1, 60523731, 122827707, 104853037, 23546237], [2, 17367563, 29204433, 25098906, 2910703], [3, 14333973, 16468857, 15718588, 266421], [4, 11678227, 12747022, 12409949, 196985], [5, 10112556, 11532118, 11083680, 290177], [6, 9709516, 12829017, 10969926, 1069776], [7, 9061926, 10421370, 9652587, 457388], [8, 8187699, 8591244, 8371133, 91811]]};
+    const scalability_xeon_gold_6132 = {"AtomicQueue": [[1, 8058966, 85486744, 19861417, 13465781], [2, 2774121, 5150399, 3716822, 529166], [3, 2234209, 3581321, 2844019, 297103], [4, 2189691, 2797820, 2500767, 141748], [5, 2000160, 2556556, 2239114, 108475], [6, 1800361, 2193952, 1967523, 85069], [7, 1339017, 2052080, 1747440, 113355], [8, 499239, 1790395, 1251368, 376126], [9, 457147, 1554831, 1065501, 317655], [10, 499701, 1497940, 933685, 296414], [11, 471438, 1317111, 758521, 284702], [12, 472731, 1223669, 645847, 211406], [13, 475966, 1051905, 607384, 154227], [14, 447298, 915959, 542223, 81608]], "AtomicQueue2": [[1, 6014132, 112250995, 11860821, 13520637], [2, 2828684, 4803110, 3861060, 547933], [3, 2370797, 3402752, 2907770, 290882], [4, 2198966, 2893203, 2481239, 168783], [5, 1922906, 2473517, 2215197, 120928], [6, 1700174, 2163119, 1957391, 98690], [7, 1584156, 1904525, 1752509, 71870], [8, 497167, 1692471, 1211725, 399956], [9, 492465, 1637918, 1032783, 355535], [10, 498320, 1502601, 894903, 322686], [11, 496862, 1287595, 740572, 255373], [12, 479471, 1142817, 669465, 220449], [13, 490420, 1087423, 564978, 132699], [14, 484859, 853987, 561566, 95000]], "AtomicQueueB": [[1, 11312440, 21089399, 14319386, 2322974], [2, 2828641, 4395539, 3598695, 363396], [3, 2383683, 3335368, 2837469, 222254], [4, 2194149, 2838158, 2479930, 155470], [5, 1961892, 2545450, 2206488, 124696], [6, 1704523, 2207219, 1965343, 113058], [7, 1400922, 2184936, 1760002, 125320], [8, 498481, 1680613, 1093922, 406887], [9, 495736, 1581164, 956214, 328532], [10, 498850, 1444846, 840343, 308105], [11, 483922, 1277870, 700261, 269404], [12, 487609, 1134736, 616528, 192809], [13, 494557, 857638, 544687, 81207], [14, 483041, 850197, 558294, 95879]], "AtomicQueueB2": [[1, 7460755, 14951085, 10960441, 1884733], [2, 2741293, 4471488, 3421984, 442894], [3, 2351790, 3354557, 2754730, 237182], [4, 2126512, 2763650, 2451035, 148674], [5, 2033646, 2434559, 2185096, 106060], [6, 1749020, 2318698, 1968299, 112029], [7, 1352736, 1922994, 1752021, 107017], [8, 479497, 1649868, 1094885, 411721], [9, 486573, 1566955, 964595, 345537], [10, 498586, 1511963, 858856, 312525], [11, 484384, 1295858, 693007, 252815], [12, 491452, 1155658, 619410, 194677], [13, 442994, 1058050, 576966, 133949], [14, 469414, 882437, 539996, 70095]], "OptimistAtomicQueue": [[1, 56698745, 429583640, 175629468, 86409817], [2, 6408754, 11931110, 8798271, 1427113], [3, 8066359, 13129768, 10458901, 1514753], [4, 8298306, 13581897, 11250748, 1640968], [5, 8932051, 13944639, 12365031, 1196775], [6, 9446462, 14000610, 12900019, 1207077], [7, 9778505, 14314352, 13477473, 850012], [8, 9215134, 11865416, 10467114, 722175], [9, 8102279, 11617885, 10064154, 979170], [10, 7755919, 11379025, 10007986, 1069232], [11, 7809733, 11642631, 10059359, 1147829], [12, 7678745, 11785406, 10015423, 1121277], [13, 7891823, 11650001, 9852053, 1038603], [14, 7931500, 12177433, 9759040, 1154347]], "OptimistAtomicQueue2": [[1, 13352047, 166577270, 79006910, 30513135], [2, 5809820, 10117510, 7296714, 983486], [3, 7359997, 12559722, 9306742, 1644149], [4, 7729367, 12734246, 10474524, 1667974], [5, 8256529, 13316977, 11173176, 1704466], [6, 8427196, 13658790, 12145214, 1423602], [7, 8972407, 13954602, 12800483, 941189], [8, 8306345, 11031293, 10007828, 701969], [9, 7781010, 11330468, 9562517, 884767], [10, 7270803, 10842898, 9535466, 1017074], [11, 7306288, 11400679, 9630510, 1113066], [12, 7615179, 10905131, 9599169, 993126], [13, 7768507, 10951419, 9495167, 927146], [14, 7939789, 11593058, 9363004, 1002168]], "OptimistAtomicQueueB": [[1, 18005087, 461920680, 43299949, 58590278], [2, 7918458, 13244281, 10554149, 1412045], [3, 8566563, 13834992, 11664903, 1605994], [4, 8776970, 13733282, 12143773, 1339924], [5, 9080446, 14486100, 12540476, 1136728], [6, 9031510, 14144692, 12968928, 1144476], [7, 10260978, 14264523, 13401276, 578048], [8, 7860310, 11677713, 10338906, 733228], [9, 8037599, 11536671, 10046625, 980055], [10, 7666387, 11483247, 9974741, 1077884], [11, 7773342, 11518370, 10097099, 1148028], [12, 7708761, 11962418, 10143672, 1169123], [13, 7725882, 11194790, 9873433, 1054815], [14, 7855188, 11275014, 9646028, 1118131]], "OptimistAtomicQueueB2": [[1, 11400233, 27116940, 21484544, 4456865], [2, 6565091, 11622771, 9409379, 1434258], [3, 7435746, 12559877, 10522656, 1516744], [4, 7776622, 12750010, 10260559, 1589501], [5, 7964167, 13270039, 11437117, 1346754], [6, 8849023, 13722187, 11756287, 1234538], [7, 8997751, 13835002, 12188309, 1192711], [8, 7756541, 10713723, 9591582, 747240], [9, 7314675, 11263412, 9209092, 948300], [10, 7352487, 10748888, 9264018, 1017641], [11, 7141749, 10896155, 9260621, 1076754], [12, 7063191, 10471776, 9248261, 984638], [13, 7358863, 10459869, 9071272, 961738], [14, 7490258, 10858481, 8986939, 1056811]], "boost::lockfree::queue": [[1, 1934482, 3335118, 2968513, 267417], [2, 2020556, 2714547, 2380363, 166177], [3, 1766944, 2481333, 2277536, 154223], [4, 1927815, 2468139, 2215008, 117101], [5, 1913080, 2341598, 2154795, 109277], [6, 1737937, 2239840, 2067750, 101330], [7, 1685532, 2158493, 1965928, 102944], [8, 476300, 1588449, 1057234, 312540], [9, 504256, 1466335, 882380, 236710], [10, 495183, 1249404, 733720, 210184], [11, 496163, 1173368, 615041, 163022], [12, 483550, 1080338, 576774, 125017], [13, 479449, 942173, 552191, 90608], [14, 444801, 789696, 538890, 64254]], "boost::lockfree::spsc_queue": [[1, 21589958, 35612264, 26701941, 3432048]], "moodycamel::ConcurrentQueue": [[1, 5031299, 13152497, 7231628, 2054206], [2, 3106244, 21840508, 5669989, 2480503], [3, 4039871, 18242902, 7384110, 3603375], [4, 4487792, 21071736, 8181695, 3838323], [5, 5209580, 24290350, 9672263, 5127482], [6, 5202954, 24160723, 8472347, 4567541], [7, 5415473, 26165080, 9754203, 5527832], [8, 4290069, 18526789, 7646915, 3740996], [9, 4479809, 35353993, 7585632, 6194437], [10, 4727037, 23405328, 7617742, 4615300], [11, 4631325, 30337177, 8709014, 6268210], [12, 4473005, 27300920, 8026322, 5175124], [13, 4555975, 27789293, 8331006, 5575842], [14, 4102221, 43489396, 11921415, 9787758]], "moodycamel::ReaderWriterQueue": [[1, 12713140, 254602528, 122153284, 81114699]], "pthread_spinlock": [[1, 4306958, 8535650, 5905333, 840994], [2, 2839333, 4736775, 4053457, 456568], [3, 2548628, 3614912, 3201805, 248819], [4, 2087992, 2959824, 2605329, 165780], [5, 1983329, 2542321, 2248467, 138984], [6, 1783286, 2276326, 1986022, 112386], [7, 1536216, 2018246, 1766854, 112798], [8, 507415, 1499893, 1072692, 193480], [9, 501385, 1152617, 766700, 218876], [10, 489327, 1025270, 609721, 149499], [11, 497072, 858980, 604787, 120507], [12, 475489, 849693, 593343, 102672], [13, 463691, 888711, 574088, 96224], [14, 373441, 833012, 549424, 69983]], "std::mutex": [[1, 442267, 6858037, 5283864, 1863950], [2, 4162864, 4959039, 4478520, 180618], [3, 2575706, 3420067, 2946085, 152139], [4, 2601420, 3137460, 2858986, 96306], [5, 3392974, 3797099, 3577014, 80921], [6, 4370258, 4891290, 4579916, 108823], [7, 4837222, 6248120, 5845232, 326581], [8, 4675007, 7221265, 6303575, 552163], [9, 4517060, 6675754, 5604113, 611225], [10, 4450885, 6593358, 5396274, 618943], [11, 4666608, 6758794, 5363476, 530564], [12, 4662177, 7071927, 5362666, 566952], [13, 4496056, 7270498, 5446862, 629130], [14, 4471558, 7214091, 5489034, 703952]], "tbb::concurrent_bounded_queue": [[1, 2741938, 6390144, 4991431, 1081767], [2, 3694771, 5634833, 5092675, 420218], [3, 3475746, 4391484, 4044394, 228584], [4, 2964563, 3890751, 3477907, 203006], [5, 2600081, 3341203, 3069347, 157629], [6, 2448135, 3072604, 2752748, 131448], [7, 2331329, 2770486, 2526461, 106497], [8, 1032645, 2367531, 1609048, 398019], [9, 768399, 2133918, 1378943, 297095], [10, 886747, 1960986, 1287592, 241557], [11, 852994, 1572988, 1213625, 141077], [12, 905349, 1536817, 1207538, 119201], [13, 672137, 1425158, 1150131, 125239], [14, 568180, 1255046, 1002357, 146505]], "tbb::spin_mutex": [[1, 21210988, 25406844, 23208893, 942349], [2, 7466066, 15461111, 13086723, 1647857], [3, 6548025, 10474300, 8916823, 708177], [4, 3503017, 7794311, 6294651, 966794], [5, 2153878, 5637630, 4544841, 631651], [6, 1922531, 4200007, 3254751, 437747], [7, 1534161, 2793915, 2246670, 284381], [8, 767030, 1603044, 1236223, 188171], [9, 664685, 1136499, 875213, 112513], [10, 503884, 920905, 710065, 93160], [11, 429966, 825839, 612632, 95126], [12, 328981, 741818, 536929, 89893], [13, 360477, 620612, 498964, 64207], [14, 343378, 562153, 446904, 49826]], "xenium::michael_scott_queue": [[1, 1770874, 4922580, 3393287, 798045], [2, 1987279, 3672290, 2760207, 374957], [3, 2000056, 2824672, 2385886, 152176], [4, 1827185, 2416437, 2127391, 115719], [5, 1702595, 2145286, 1919895, 91485], [6, 1536137, 1930985, 1748041, 79961], [7, 1426820, 1834610, 1643576, 81903], [8, 498697, 1628919, 1118063, 276128], [9, 452869, 1380436, 834411, 255185], [10, 494632, 1118414, 682696, 203418], [11, 490195, 1028229, 585071, 155611], [12, 484824, 889727, 574498, 120673], [13, 497397, 848913, 548659, 87463], [14, 498987, 845423, 541580, 77173]], "xenium::ramalhete_queue": [[1, 3243963, 16649455, 9804049, 4323515], [2, 4857860, 10891091, 6531145, 1101794], [3, 5681860, 10963393, 7152903, 886425], [4, 6453166, 11687397, 8090624, 1227694], [5, 7515932, 11465916, 8472107, 1003833], [6, 7603204, 11843149, 8816720, 1186933], [7, 7778687, 11444208, 8969099, 1200481], [8, 6620873, 8934784, 7893553, 554709], [9, 7110063, 8505487, 7938195, 307016], [10, 7332561, 8873905, 8083197, 302364], [11, 7650290, 8835820, 8195968, 282168], [12, 7663185, 8824693, 8282478, 271141], [13, 7786817, 9767663, 8710633, 459364], [14, 7888409, 11483491, 9499927, 1182102]], "xenium::vyukov_bounded_queue": [[1, 6620293, 58918128, 36338730, 16662346], [2, 3698951, 10319122, 6978079, 1806086], [3, 3321190, 5064399, 4427496, 329624], [4, 3526724, 4346643, 3923541, 164522], [5, 3316072, 3924131, 3551537, 117605], [6, 3114542, 3481877, 3279592, 91098], [7, 2784557, 3242623, 3020950, 108825], [8, 1278721, 2800348, 1844408, 521532], [9, 1103213, 2357968, 1486304, 324785], [10, 1025767, 1973106, 1342701, 256232], [11, 732921, 1613235, 1194292, 156458], [12, 494928, 1408766, 1053087, 242590], [13, 479926, 1216268, 994219, 184954], [14, 433322, 1122701, 804412, 232255]]};
+    const latency_9900KS = {"AtomicQueue": [157, 171, 166, 0], "AtomicQueue2": [173, 177, 175, 0], "AtomicQueueB": [171, 184, 179, 3], "AtomicQueueB2": [175, 192, 180, 3], "OptimistAtomicQueue": [148, 160, 153, 3], "OptimistAtomicQueue2": [167, 176, 173, 1], "OptimistAtomicQueueB": [140, 154, 141, 1], "OptimistAtomicQueueB2": [149, 155, 150, 1], "boost::lockfree::queue": [310, 338, 319, 4], "boost::lockfree::spsc_queue": [129, 135, 132, 0], "moodycamel::ConcurrentQueue": [208, 254, 231, 7], "moodycamel::ReaderWriterQueue": [110, 167, 137, 12], "pthread_spinlock": [226, 308, 279, 25], "std::mutex": [411, 525, 465, 20], "tbb::concurrent_bounded_queue": [268, 307, 287, 9], "tbb::spin_mutex": [246, 309, 275, 18], "xenium::michael_scott_queue": [357, 407, 371, 6], "xenium::ramalhete_queue": [255, 282, 267, 4], "xenium::vyukov_bounded_queue": [183, 227, 212, 11]};
+    const latency_xeon_gold_6132 = {"AtomicQueue": [231, 479, 321, 72], "AtomicQueue2": [307, 556, 394, 86], "AtomicQueueB": [344, 588, 423, 80], "AtomicQueueB2": [403, 711, 491, 111], "OptimistAtomicQueue": [283, 459, 346, 55], "OptimistAtomicQueue2": [315, 562, 392, 78], "OptimistAtomicQueueB": [321, 507, 378, 69], "OptimistAtomicQueueB2": [345, 572, 409, 84], "boost::lockfree::queue": [726, 1151, 869, 154], "boost::lockfree::spsc_queue": [269, 507, 356, 69], "moodycamel::ConcurrentQueue": [427, 789, 547, 120], "moodycamel::ReaderWriterQueue": [207, 552, 328, 94], "pthread_spinlock": [623, 1899, 946, 308], "std::mutex": [1859, 3202, 2340, 463], "tbb::concurrent_bounded_queue": [565, 993, 683, 155], "tbb::spin_mutex": [561, 1069, 741, 156], "xenium::michael_scott_queue": [733, 1255, 879, 196], "xenium::ramalhete_queue": [493, 887, 596, 139], "xenium::vyukov_bounded_queue": [436, 685, 521, 89]};
+    plot_scalability('scalability-9900KS-5GHz', scalability_9900KS, "Intel i9-9900KS (core 5GHz / uncore 4.7GHz)", 60e6, 1000e6);
+    plot_scalability('scalability-xeon-gold-6132', scalability_xeon_gold_6132, "Intel Xeon Gold 6132 (stock)", 15e6, 300e6);
+    plot_latency('latency-9900KS-5GHz', latency_9900KS, "Intel i9-9900KS (core 5GHz / uncore 4.7GHz)");
+    plot_latency('latency-xeon-gold-6132', latency_xeon_gold_6132, "Intel Xeon Gold 6132 (stock)");
 });


=====================================
include/atomic_queue/atomic_queue_mutex.h
=====================================
@@ -46,31 +46,31 @@ public:
 
     template<class U>
     bool try_push(U&& element) noexcept {
-        ScopedLock lock(this->mutex_);
-        if(this->head_ - this->tail_ < size_) {
-            q_[details::remap_index<SHUFFLE_BITS>(this->head_ % size_)] = std::forward<U>(element);
-            ++this->head_;
+        ScopedLock lock(mutex_);
+        if(ATOMIC_QUEUE_LIKELY(head_ - tail_ < size_)) {
+            q_[details::remap_index<SHUFFLE_BITS>(head_ % size_)] = std::forward<U>(element);
+            ++head_;
             return true;
         }
         return false;
     }
 
     bool try_pop(T& element) noexcept {
-        ScopedLock lock(this->mutex_);
-        if(this->head_ != this->tail_) {
-            element = std::move(q_[details::remap_index<SHUFFLE_BITS>(this->tail_ % size_)]);
-            ++this->tail_;
+        ScopedLock lock(mutex_);
+        if(ATOMIC_QUEUE_LIKELY(head_ != tail_)) {
+            element = std::move(q_[details::remap_index<SHUFFLE_BITS>(tail_ % size_)]);
+            ++tail_;
             return true;
         }
         return false;
     }
 
     bool was_empty() const noexcept {
-        return static_cast<int>(this->head_ - this->tail_) <= 0;
+        return static_cast<int>(head_ - tail_) <= 0;
     }
 
     bool was_full() const noexcept {
-        return static_cast<int>(this->head_ - this->tail_) >= static_cast<int>(size_);
+        return static_cast<int>(head_ - tail_) >= static_cast<int>(size_);
     }
 };
 


=====================================
include/atomic_queue/defs.h
=====================================
@@ -47,9 +47,11 @@ namespace atomic_queue {
 #if defined(__GNUC__) || defined(__clang__)
 #define ATOMIC_QUEUE_LIKELY(expr) __builtin_expect(static_cast<bool>(expr), 1)
 #define ATOMIC_QUEUE_UNLIKELY(expr) __builtin_expect(static_cast<bool>(expr), 0)
+#define ATOMIC_QUEUE_NOINLINE __attribute__((noinline))
 #else
 #define ATOMIC_QUEUE_LIKELY(expr) (expr)
 #define ATOMIC_QUEUE_UNLIKELY(expr) (expr)
+#define ATOMIC_QUEUE_NOINLINE
 #endif
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////


=====================================
include/atomic_queue/spinlock.h
=====================================
@@ -49,20 +49,23 @@ public:
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-class FairSpinlock {
+class TicketSpinlock {
     alignas(CACHE_LINE_SIZE) std::atomic<unsigned> ticket_{0};
     alignas(CACHE_LINE_SIZE) std::atomic<unsigned> next_{0};
 
 public:
     class LockGuard {
-        FairSpinlock* const m_;
+        TicketSpinlock* const m_;
         unsigned const ticket_;
     public:
-        LockGuard(FairSpinlock& m) noexcept
+        LockGuard(TicketSpinlock& m) noexcept
             : m_(&m)
             , ticket_(m.lock())
         {}
 
+        LockGuard(LockGuard const&) = delete;
+        LockGuard& operator=(LockGuard const&) = delete;
+
         ~LockGuard() noexcept {
             m_->unlock(ticket_);
         }
@@ -70,13 +73,20 @@ public:
 
     using scoped_lock = LockGuard;
 
-    FairSpinlock(FairSpinlock const&) = delete;
-    FairSpinlock& operator=(FairSpinlock const&) = delete;
+    TicketSpinlock() noexcept = default;
+    TicketSpinlock(TicketSpinlock const&) = delete;
+    TicketSpinlock& operator=(TicketSpinlock const&) = delete;
 
-    unsigned lock() noexcept {
+    ATOMIC_QUEUE_NOINLINE unsigned lock() noexcept {
         auto ticket = ticket_.fetch_add(1, std::memory_order_relaxed);
-        while(next_.load(std::memory_order_acquire) != ticket)
-            spin_loop_pause();
+        for(;;) {
+            auto position = ticket - next_.load(std::memory_order_acquire);
+            if(ATOMIC_QUEUE_LIKELY(!position))
+                break;
+            do
+                spin_loop_pause();
+            while(--position);
+        }
         return ticket;
     }
 


=====================================
meson.build
=====================================
@@ -0,0 +1,38 @@
+# Copyright (c) 2019 Maxim Egorushkin. MIT License. See the full licence in file LICENSE.
+
+# (rm -rf build; meson build; cd build; time ninja -v)
+
+project(
+  'atomic_queue', 'cpp',
+  license : 'MIT License',
+  default_options : ['cpp_std=gnu++14', 'buildtype=release', 'b_ndebug=if-release']
+)
+
+cxx = meson.get_compiler('cpp')
+tbb = cxx.find_library('tbb', required : true)
+dl = cxx.find_library('dl', required : true)
+threads = dependency('threads')
+unit_test_framework = dependency('boost', modules : ['unit_test_framework'])
+xenium = declare_dependency(include_directories : '../xenium')
+moodycamel = declare_dependency(include_directories : '../')
+
+atomic_queue = declare_dependency(include_directories : ['include'], dependencies : threads)
+
+tests_exe = executable(
+  'tests',
+  'src/tests.cc',
+  dependencies : [atomic_queue, unit_test_framework]
+)
+test('tests', tests_exe)
+
+example_exe = executable(
+  'example',
+  'src/example.cc',
+  dependencies : [atomic_queue]
+)
+
+benchmarks_exe = executable(
+  'benchmarks',
+  ['src/benchmarks.cc', 'src/cpu_base_frequency.cc', 'src/huge_pages.cc'],
+  dependencies : [atomic_queue, xenium, moodycamel, tbb, dl]
+)


=====================================
results/results-16.20200703T014231.txt
=====================================
The diff for this file was not included because it is too large.

=====================================
results/results-28.20200704T034247.txt
=====================================
The diff for this file was not included because it is too large.

=====================================
scripts/latency_to_json.py
=====================================
@@ -4,13 +4,16 @@
 
 import sys
 import pandas as pd
+import json
 from pprint import pprint
 
 from parse_output import *
 
 results = list(parse_output(sys.stdin))
 df = as_latency_df(results)
-df = df.groupby('queue').min()
-pprint(df)
-df["sec/round-trip"].to_json(sys.stdout, orient='columns')
+output = dict() # name: min, max, mean, stdev
+for name, data in df.groupby('queue'):
+    s = data["sec/round-trip"].describe()
+    output[name] = [int(s[f] * 1e9) for f in ['min', 'max', 'mean', 'std']]
+json.dump(output, sys.stdout)
 print()


=====================================
scripts/scalability_to_json.py
=====================================
@@ -4,12 +4,19 @@
 
 import sys
 import pandas as pd
+import json
+from collections import defaultdict
 from pprint import pprint
 
 from parse_output import *
 
 results = list(parse_output(sys.stdin))
 df = as_scalability_df(results)
-df = df.groupby(['queue', 'threads']).max().unstack(level=0).droplevel(0, axis=1)
-df.to_json(sys.stdout, orient='columns')
+
+output = defaultdict(list) # name: thread, min, max, mean, stdev
+for (name, threads), data in df.groupby(['queue', 'threads']):
+    s = data["msg/sec"].describe(percentiles=None)
+    threads = int(threads)
+    output[name].append([threads, *[int(s[f]) for f in ['min', 'max', 'mean', 'std']]])
+json.dump(output, sys.stdout)
 print()


=====================================
scripts/stats.py
=====================================
@@ -5,6 +5,7 @@
 import sys
 import re
 import math
+import csv
 import numpy as np
 from scipy import stats
 from pprint import pprint
@@ -18,19 +19,19 @@ for line in sys.stdin:
     if m:
         results[m.group(1)][m.group(3)].append(float(m.group(2).replace(',', '')))
 
-# pprint(results)
+def format_msg_sec(d, media, benchmark):
+    return "{:11,.0f} {} (median: {:11,.0f}, mean: {:11,.0f} stdev: {:11,.0f})".format(d.minmax[1], benchmark, median, d.mean, math.sqrt(d.variance))
 
-def format_msg_sec(d, benchmark):
-    return "{:11,.0f} {} (mean: {:11,.0f} stdev: {:11,.0f})".format(d.minmax[1], benchmark, d.mean, math.sqrt(d.variance))
-
-def format_round_trip(d, benchmark):
-    return "{:11.9f} {} (mean: {:11.9f} stdev: {:11.9f})".format(d.minmax[0], benchmark, d.mean, math.sqrt(d.variance))
+def format_round_trip(d, media, benchmark):
+    return "{:11.9f} {} (median: {:11,.0f}, mean: {:11.9f} stdev: {:11.9f})".format(d.minmax[0], benchmark, median, d.mean, math.sqrt(d.variance))
 
 fmt = {
     'msg/sec': format_msg_sec,
     'sec/round-trip': format_round_trip
     }
 
+csv_file = csv.writer(open("results.csv", "w"), csv.excel_tab)
+csv_file.writerow(["name", "min", "max", "mean", "stdev"])
 for benchmark in ['msg/sec', 'sec/round-trip']:
     queues = sorted(results.keys())
     for queue in queues:
@@ -39,5 +40,7 @@ for benchmark in ['msg/sec', 'sec/round-trip']:
         if not runs:
             continue
         d = stats.describe(runs)
-        desc = fmt[benchmark](d, benchmark)
+        median = np.median(runs)
+        desc = fmt[benchmark](d, median, benchmark)
         print("{:>40s}: {}".format(queue, desc))
+        csv_file.writerow([queue, d.minmax[0], d.minmax[1], d.mean, math.sqrt(d.variance)])


=====================================
src/benchmarks.cc
=====================================
@@ -5,9 +5,6 @@
 #include "atomic_queue/atomic_queue.h"
 #include "atomic_queue/atomic_queue_mutex.h"
 #include "atomic_queue/barrier.h"
-#include "atomic_queue/cpu_base_frequency.h"
-#include "atomic_queue/huge_pages.h"
-#include "atomic_queue/moodycamel.h"
 
 #include <xenium/michael_scott_queue.hpp>
 #include <xenium/ramalhete_queue.hpp>
@@ -20,6 +17,10 @@
 #include <tbb/concurrent_queue.h>
 #include <tbb/spin_mutex.h>
 
+#include "cpu_base_frequency.h"
+#include "huge_pages.h"
+#include "moodycamel.h"
+
 #include <algorithm>
 #include <clocale>
 #include <cstdint>
@@ -148,6 +149,23 @@ void check_huge_pages_leaks(char const* name, HugePages& hp) {
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
+struct ThreadCound {
+    unsigned producers;
+    unsigned comsumers;
+};
+
+template<class T>
+struct ConstructorAdapter : T{
+    using T::T;
+};
+
+// template<class T>
+// struct ConstructorAdapter : {
+//     using T::T;
+// };
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
 // According to my benchmarking, it looks like the best performance is achieved with the following parameters:
 // * For SPSC: SPSC=true,  MINIMIZE_CONTENTION=false, MAXIMIZE_THROUGHPUT=false.
 // * For MPMC: SPSC=false, MINIMIZE_CONTENTION=true,  MAXIMIZE_THROUGHPUT=true.
@@ -180,8 +198,9 @@ void throughput_producer(unsigned N, Queue* queue, std::atomic<cycles_t>* t0, Ba
     t0->compare_exchange_strong(expected, __builtin_ia32_rdtsc(), std::memory_order_acq_rel, std::memory_order_relaxed);
 
     region_guard_t<Queue> guard;
+    ProducerOf<Queue> producer{*queue};
     for(unsigned n = 1, stop = N + 1; n <= stop; ++n)
-        queue->push(n);
+        producer.push(*queue, n);
 }
 
 template<class Queue>
@@ -190,8 +209,9 @@ void throughput_consumer_impl(unsigned N, Queue* queue, sum_t* consumer_sum, std
     sum_t sum = 0;
 
     region_guard_t<Queue> guard;
+    ConsumerOf<Queue> consumer{*queue};
     for(;;) {
-        unsigned n = queue->pop();
+        unsigned n = consumer.pop(*queue);
         if(n == stop)
             break;
         sum += n;
@@ -216,7 +236,7 @@ cycles_t benchmark_throughput(HugePages& hp, std::vector<unsigned> const& hw_thr
     set_thread_affinity(hw_thread_ids[thread_count * 2 - 1]); // Use this thread for the last consumer.
     unsigned cpu_idx = 0;
 
-    auto queue = hp.create_unique_ptr<Queue>();
+    auto queue = hp.create_unique_ptr<Queue>(ContextOf<Queue>{thread_count, thread_count});
     std::atomic<cycles_t> t0{0};
     cycles_t t1 = 0;
     std::atomic<unsigned> last_consumer{thread_count};
@@ -330,7 +350,7 @@ void run_throughput_benchmarks(HugePages& hp, std::vector<CpuTopologyInfo> const
     run_throughput_mpmc_benchmark("boost::lockfree::queue", hp, hw_thread_ids,
                                   Type<BoostQueueAdapter<boost::lockfree::queue<unsigned, BoostAllocator, boost::lockfree::capacity<SIZE - 2>>>>{});
 
-    // run_throughput_mpmc_benchmark("FairSpinlock", hp, hw_thread_ids, Type<RetryDecorator<AtomicQueueMutex<unsigned, SIZE, FairSpinlock>>>{});
+    run_throughput_mpmc_benchmark("TicketSpinlock", hp, hw_thread_ids, Type<RetryDecorator<AtomicQueueMutex<unsigned, SIZE, TicketSpinlock>>>{});
     // run_throughput_mpmc_benchmark("UnfairSpinlock", hp, hw_thread_ids, Type<RetryDecorator<AtomicQueueMutex<unsigned, SIZE, UnfairSpinlock>>>{});
 
     run_throughput_spsc_benchmark("moodycamel::ReaderWriterQueue", hp, hw_thread_ids, Type<MoodyCamelReaderWriterQueue<unsigned, SIZE>>{});
@@ -385,18 +405,29 @@ void run_throughput_benchmarks(HugePages& hp, std::vector<CpuTopologyInfo> const
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template<class Queue, bool Sender>
-void ping_pong_thread_impl(Queue* q1, Queue* q2, unsigned N, cycles_t* time) {
+template<class Queue>
+void ping_pong_thread_impl(Queue* q1, Queue* q2, unsigned N, cycles_t* time, std::false_type /*sender*/) {
+    cycles_t t0 = __builtin_ia32_rdtsc();
+    region_guard_t<Queue> guard;
+    ConsumerOf<Queue> consumer_q1{*q1};
+    ProducerOf<Queue> producer_q2{*q2};
+    for(unsigned i = 1, j = 0; j < N; ++i) {
+        j = consumer_q1.pop(*q1);
+        producer_q2.push(*q2, i);
+    }
+    cycles_t t1 = __builtin_ia32_rdtsc();
+    *time = t1 - t0;
+}
+
+template<class Queue>
+void ping_pong_thread_impl(Queue* q1, Queue* q2, unsigned N, cycles_t* time, std::true_type /*sender*/) {
     cycles_t t0 = __builtin_ia32_rdtsc();
     region_guard_t<Queue> guard;
+    ProducerOf<Queue> producer_q1{*q1};
+    ConsumerOf<Queue> consumer_q2{*q2};
     for(unsigned i = 1, j = 0; j < N; ++i) {
-        if(Sender) {
-            q1->push(i);
-            j = q2->pop();
-        } else {
-            j = q1->pop();
-            q2->push(i);
-        }
+        producer_q1.push(*q1, i);
+        j = consumer_q2.pop(*q2);
     }
     cycles_t t1 = __builtin_ia32_rdtsc();
     *time = t1 - t0;
@@ -405,20 +436,23 @@ void ping_pong_thread_impl(Queue* q1, Queue* q2, unsigned N, cycles_t* time) {
 template<class Queue>
 inline void ping_pong_thread_receiver(Barrier* barrier, Queue* q1, Queue* q2, unsigned N, cycles_t* time) {
     barrier->wait();
-    ping_pong_thread_impl<Queue, false>(q1, q2, N, time);
+    std::false_type constexpr sender;
+    ping_pong_thread_impl<Queue>(q1, q2, N, time, sender);
 }
 
 template<class Queue>
 inline void ping_pong_thread_sender(Barrier* barrier, Queue* q1, Queue* q2, unsigned N, cycles_t* time) {
     barrier->release(1);
-    ping_pong_thread_impl<Queue, true>(q1, q2, N, time);
+    std::true_type constexpr sender;
+    ping_pong_thread_impl<Queue>(q1, q2, N, time, sender);
 }
 
 template<class Queue>
 inline std::array<cycles_t, 2> ping_pong_benchmark(unsigned N, HugePages& hp, unsigned const (&cpus)[2]) {
     set_thread_affinity(cpus[0]); // This thread is the sender.
-    auto q1 = hp.create_unique_ptr<Queue>();
-    auto q2 = hp.create_unique_ptr<Queue>();
+    ContextOf<Queue> const ctx{1, 1};
+    auto q1 = hp.create_unique_ptr<Queue>(ctx);
+    auto q2 = hp.create_unique_ptr<Queue>(ctx);
     Barrier barrier;
     std::array<cycles_t, 2> times;
     set_default_thread_affinity(cpus[1]);
@@ -467,7 +501,7 @@ void run_ping_pong_benchmarks(HugePages& hp, std::vector<CpuTopologyInfo> const&
     run_ping_pong_benchmark<BoostQueueAdapter<boost::lockfree::queue<unsigned, BoostAllocator, boost::lockfree::capacity<SIZE>>>>("boost::lockfree::queue", hp,
                                                                                                                                   hw_thread_ids);
 
-    // run_ping_pong_benchmark<RetryDecorator<AtomicQueueMutex<unsigned, SIZE, FairSpinlock>>>("FairSpinlock", hp, hw_thread_ids);
+    run_ping_pong_benchmark<RetryDecorator<AtomicQueueMutex<unsigned, SIZE, TicketSpinlock>>>("TicketSpinlock", hp, hw_thread_ids);
     // run_ping_pong_benchmark<RetryDecorator<AtomicQueueMutex<unsigned, SIZE, UnfairSpinlock>>>("UnfairSpinlock", hp, hw_thread_ids);
 
     run_ping_pong_benchmark<MoodyCamelReaderWriterQueue<unsigned, SIZE>>("moodycamel::ReaderWriterQueue", hp, hw_thread_ids);


=====================================
src/benchmarks.h
=====================================
@@ -0,0 +1,58 @@
+/* -*- mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+#ifndef ATOMIC_QUEUE_BENCHMARKS_H_INCLUDED
+#define ATOMIC_QUEUE_BENCHMARKS_H_INCLUDED
+
+#include <utility>
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+namespace atomic_queue {
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+struct Context {
+    unsigned producers;
+    unsigned consumers;
+};
+
+struct NoContext {
+    template<class... Args>
+    constexpr NoContext(Args&&...) noexcept {}
+};
+
+template<class T> typename T::ContextType context_of_(int);
+template<class T> NoContext context_of_(long);
+template<class T> using ContextOf = decltype(context_of_<T>(0));
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+struct NoToken {
+    template<class... Args>
+    constexpr NoToken(Args&&...) noexcept {}
+
+    template<class Queue, class T>
+    static void push(Queue& q, T&& element) noexcept {
+        q.push(std::forward<T>(element));
+    }
+
+    template<class Queue>
+    static auto pop(Queue& q) noexcept {
+        return q.pop();
+    }
+};
+
+template<class T> typename T::Producer producer_of_(int);
+template<class T> NoToken producer_of_(long);
+template<class T> using ProducerOf = decltype(producer_of_<T>(1));
+
+template<class T> typename T::Consumer consumer_of_(int);
+template<class T> NoToken consumer_of_(long);
+template<class T> using ConsumerOf = decltype(consumer_of_<T>(1));
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+} // atomic_queue
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#endif // ATOMIC_QUEUE_BENCHMARKS_H_INCLUDED


=====================================
src/cpu_base_frequency.cc
=====================================
@@ -2,7 +2,7 @@
 
 // Copyright (c) 2019 Maxim Egorushkin. MIT License. See the full licence in file LICENSE.
 
-#include "atomic_queue/cpu_base_frequency.h"
+#include "cpu_base_frequency.h"
 
 #include <fstream>
 #include <tuple>


=====================================
include/atomic_queue/cpu_base_frequency.h → src/cpu_base_frequency.h
=====================================


=====================================
src/example.cc
=====================================
@@ -0,0 +1,69 @@
+/* -*- mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+
+// Copyright (c) 2019 Maxim Egorushkin. MIT License. See the full licence in file LICENSE.
+
+#include "atomic_queue/atomic_queue.h"
+
+#include <thread>
+#include <cstdint>
+#include <iostream>
+
+int main() {
+    int constexpr PRODUCERS = 1;
+    int constexpr CONSUMERS = 2;
+    unsigned constexpr CAPACITY = 1024;
+    unsigned constexpr N = 1000000;
+
+    using Element = uint32_t;
+    Element constexpr NIL = static_cast<Element>(-1);
+
+    using Queue = atomic_queue::AtomicQueueB<Element, std::allocator<Element>, NIL>;
+
+    // Create a queue shared between producers and consumers.
+    Queue q{CAPACITY};
+
+    // Start consumers.
+    uint64_t results[CONSUMERS];
+    std::thread consumers[CONSUMERS];
+    for(int i = 0; i < CONSUMERS; ++i)
+        consumers[i] = std::thread([&q, &r = results[i]]() {
+            uint64_t sum = 0;
+            while(Element n = q.pop()) // Stop when 0 is received.
+                sum += n;
+            r = sum;
+        });
+
+    // Start producers.
+    std::thread producers[PRODUCERS];
+    for(int i = 0; i < PRODUCERS; ++i)
+        producers[i] = std::thread([&q]() {
+            for(Element n = N; n; --n)
+                q.push(n);
+        });
+
+    // Wait till producers complete and terminate.
+    for(auto& t : producers)
+        t.join();
+
+    // Stop consumers.
+    for(int i = CONSUMERS; i--;)
+        q.push(0);
+    // Wait till consumers complete and terminate.
+    for(auto& t : consumers)
+        t.join();
+
+    // Verify the results.
+    uint64_t result = 0;
+    for(auto& r : results) {
+        result += r;
+        if(!r)
+            std::cerr << "WARNING: consumer " << (&r - results) << " received no messages.\n";
+    }
+    uint64_t constexpr expected_result = (N + 1) / 2. * N * PRODUCERS;
+    if(int64_t result_diff = result - expected_result) {
+        std::cerr << "ERROR: unexpected result difference " << result_diff << '\n';
+        return EXIT_FAILURE;
+    }
+
+    return EXIT_SUCCESS;
+}


=====================================
src/huge_pages.cc
=====================================
@@ -1,5 +1,5 @@
 /* -*- mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
-#include "atomic_queue/huge_pages.h"
+#include "huge_pages.h"
 
 #include <system_error>
 


=====================================
include/atomic_queue/huge_pages.h → src/huge_pages.h
=====================================
@@ -2,6 +2,8 @@
 #ifndef HUGE_PAGES_H_INCLUDED
 #define HUGE_PAGES_H_INCLUDED
 
+#include "benchmarks.h"
+
 #include <new>
 #include <memory>
 #include <utility>
@@ -103,6 +105,11 @@ public:
         return std::unique_ptr<T, Deleter>{new(this->allocate(sizeof(T))) T{std::forward<Args>(args)...}, Deleter{this}};
     }
 
+    template<class T, class... Args>
+    std::unique_ptr<T, Deleter> create_unique_ptr(NoContext, Args&&... args) {
+        return std::unique_ptr<T, Deleter>{new(this->allocate(sizeof(T))) T{std::forward<Args>(args)...}, Deleter{this}};
+    }
+
     template<class T>
     void destroy(T* p) {
         void* q = p;


=====================================
include/atomic_queue/moodycamel.h → src/moodycamel.h
=====================================
@@ -4,10 +4,12 @@
 
 // Copyright (c) 2019 Maxim Egorushkin. MIT License. See the full licence in file LICENSE.
 
+#include "benchmarks.h"
+
 #include <concurrentqueue/concurrentqueue.h>
 #include <readerwriterqueue/readerwriterqueue.h>
 
-#include "defs.h"
+#include "atomic_queue/defs.h"
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
@@ -15,29 +17,62 @@ namespace atomic_queue {
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-template<class T, unsigned Capacity, class Queue>
-struct MoodyCamelAdapter : Queue {
-    MoodyCamelAdapter()
-        : Queue(Capacity) {}
+template<class T, unsigned Capacity>
+struct MoodyCamelQueue : moodycamel::ConcurrentQueue<T> {
+    using producer_token_t = typename moodycamel::ConcurrentQueue<T>::producer_token_t;
+    using consumer_token_t = typename moodycamel::ConcurrentQueue<T>::consumer_token_t;
 
-    void push(T element) {
-        while(!this->try_enqueue(element))
+    using ContextType = Context;
+
+    struct Producer {
+        producer_token_t t_;
+        Producer(MoodyCamelQueue& q) noexcept : t_(q) {}
+        void push(MoodyCamelQueue& q, T element) { q.push(t_, element); }
+    };
+
+    struct Consumer {
+        consumer_token_t t_;
+        Consumer(MoodyCamelQueue& q) noexcept : t_(q) {}
+        T pop(MoodyCamelQueue& q) { return q.pop(t_); }
+    };
+
+    MoodyCamelQueue(Context context)
+        : moodycamel::ConcurrentQueue<T>(Capacity, context.producers, 0)
+    {}
+
+    void push(producer_token_t& tok, T element) noexcept {
+        while(!this->try_enqueue(tok, element))
             spin_loop_pause();
     }
 
-    T pop() {
+    T pop(consumer_token_t& tok) noexcept {
         T element;
-        while(!this->try_dequeue(element))
+        while(!this->try_dequeue(tok, element))
             spin_loop_pause();
         return element;
     }
 };
 
-template<class T, unsigned Capacity>
-using MoodyCamelQueue = MoodyCamelAdapter<T, Capacity, moodycamel::ConcurrentQueue<T>>;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 
 template<class T, unsigned Capacity>
-using MoodyCamelReaderWriterQueue = MoodyCamelAdapter<T, Capacity, moodycamel::ReaderWriterQueue<T>>;
+struct MoodyCamelReaderWriterQueue : moodycamel::ReaderWriterQueue<T> {
+    MoodyCamelReaderWriterQueue()
+        : moodycamel::ReaderWriterQueue<T>(Capacity)
+    {}
+
+    void push(T element) noexcept {
+        while(!this->try_enqueue(element))
+            spin_loop_pause();
+    }
+
+    T pop() noexcept {
+        T element;
+        while(!this->try_dequeue(element))
+            spin_loop_pause();
+        return element;
+    }
+};
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 


=====================================
src/tests.cc
=====================================
@@ -2,7 +2,6 @@
 
 // Copyright (c) 2019 Maxim Egorushkin. MIT License. See the full licence in file LICENSE.
 
-#define BOOST_TEST_DYN_LINK
 #define BOOST_TEST_MODULE atomic_queue
 #include <boost/test/unit_test.hpp>
 
@@ -65,7 +64,7 @@ void stress() {
 
     uint64_t result = 0;
     for(auto& r : results) {
-        BOOST_CHECK_GT(r, expected_result / (CONSUMERS + 1)); // Make sure a consumer didn't starve. False positives are possible here.
+        BOOST_CHECK_GT(r, (expected_result / CONSUMERS) / 10); // Make sure a consumer didn't starve. False positives are possible here.
         result += r;
     }
 
@@ -78,30 +77,30 @@ void stress() {
 template<class Q>
 void test_unique_ptr_int(Q& q) {
     BOOST_CHECK(q.was_empty());
-    BOOST_CHECK_EQUAL(q.was_size(), 0);
+    BOOST_CHECK_EQUAL(q.was_size(), 0u);
     std::unique_ptr<int> p{new int{1}};
     BOOST_REQUIRE(q.try_push(move(p)));
     BOOST_CHECK(!p);
     BOOST_CHECK(!q.was_empty());
-    BOOST_CHECK_EQUAL(q.was_size(), 1);
+    BOOST_CHECK_EQUAL(q.was_size(), 1u);
 
     p.reset(new int{2});
     q.push(move(p));
     BOOST_REQUIRE(!p);
     BOOST_CHECK(!q.was_empty());
-    BOOST_CHECK_EQUAL(q.was_size(), 2);
+    BOOST_CHECK_EQUAL(q.was_size(), 2u);
 
     BOOST_REQUIRE(q.try_pop(p));
     BOOST_REQUIRE(p.get());
     BOOST_CHECK_EQUAL(*p, 1);
     BOOST_CHECK(!q.was_empty());
-    BOOST_CHECK_EQUAL(q.was_size(), 1);
+    BOOST_CHECK_EQUAL(q.was_size(), 1u);
 
     p = q.pop();
     BOOST_REQUIRE(p.get());
     BOOST_CHECK_EQUAL(*p, 2);
     BOOST_CHECK(q.was_empty());
-    BOOST_CHECK_EQUAL(q.was_size(), 0);
+    BOOST_CHECK_EQUAL(q.was_size(), 0u);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -149,4 +148,32 @@ BOOST_AUTO_TEST_CASE(move_constructor_assignment) {
     p = std::move(p2);
 }
 
+BOOST_AUTO_TEST_CASE(try_push) {
+    using Queue = atomic_queue::AtomicQueueB2<
+      /* T = */ float,
+      /* A = */ std::allocator<float>,
+      /* MAXIMIZE_THROUGHPUT */ true,
+      /* TOTAL_ORDER = */ true,
+      /* SPSC = */ true
+      >;
+
+    constexpr unsigned CAPACITY = 4096;
+    Queue q(CAPACITY);
+    BOOST_CHECK_EQUAL(q.capacity(), CAPACITY);
+    BOOST_CHECK(q.was_empty());
+    BOOST_CHECK_EQUAL(q.was_size(), 0u);
+
+    for(unsigned i = 1; i <= CAPACITY; ++i)
+        BOOST_CHECK(q.try_push(i));
+
+    BOOST_CHECK(!q.was_empty());
+    BOOST_CHECK_EQUAL(q.was_size(), CAPACITY);
+
+    for(unsigned i = 1; i <= CAPACITY; ++i)
+        BOOST_CHECK(!q.try_push(i));
+
+    BOOST_CHECK(!q.was_empty());
+    BOOST_CHECK_EQUAL(q.was_size(), CAPACITY);
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////



View it on GitLab: https://salsa.debian.org/med-team/libatomic-queue/-/compare/5283b9f1903aacaa18c8acf848c970949481af5f...e07f4451b88a9a15dad2641eee9229327728dff2

-- 
View it on GitLab: https://salsa.debian.org/med-team/libatomic-queue/-/compare/5283b9f1903aacaa18c8acf848c970949481af5f...e07f4451b88a9a15dad2641eee9229327728dff2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201024/557ec747/attachment-0001.html>