[Likwid-commit] [likwid] 01/05: New upstream version 4.2.1+dfsg1
Christoph Martin
chrism at debian.org
Wed Aug 30 08:29:41 UTC 2017
This is an automated email from the git hooks/post-receive script.
chrism pushed a commit to branch master
in repository likwid.
commit 277056e24d7fccc517f26ddd673b67939602be1e
Author: Christoph Martin <martin at uni-mainz.de>
Date: Tue Aug 29 11:42:42 2017 +0200
New upstream version 4.2.1+dfsg1
---
Makefile | 62 +--
bench/includes/allocator.h | 4 +-
bench/includes/allocator_types.h | 4 +-
bench/includes/barrier.h | 4 +-
bench/includes/barrier_types.h | 4 +-
bench/includes/strUtil.h | 4 +-
bench/includes/test_types.h | 4 +-
bench/includes/threads.h | 4 +-
bench/includes/threads_types.h | 4 +-
bench/likwid-bench.c | 26 +-
bench/src/allocator.c | 4 +-
bench/src/barrier.c | 4 +-
bench/src/bench.c | 4 +-
bench/src/strUtil.c | 4 +-
bench/src/threads.c | 4 +-
bench/x86-64/sum_avx512.ptt | 33 +-
config.mk | 2 +-
doc/likwid-pin.1 | 7 +
examples/C-likwidAPI.c | 6 +-
examples/C-markerAPI.c | 56 +--
examples/F-markerAPI.F90 | 6 +-
examples/Lua-likwidAPI.lua | 11 +-
examples/Makefile | 26 +-
groups/broadwell/L3.txt | 4 +-
groups/knl/ENERGY.txt | 4 +-
groups/sandybridge/L3CACHE.txt | 4 +-
groups/sandybridgeEP/L3CACHE.txt | 4 +-
perl/set_license.pl | 10 +-
src/access-daemon/Makefile | 4 +-
src/access-daemon/accessDaemon.c | 5 +-
src/access-daemon/setFreq.c | 4 +-
src/access.c | 4 +-
src/access_client.c | 4 +-
src/access_x86.c | 4 +-
src/access_x86_msr.c | 4 +-
src/access_x86_pci.c | 4 +-
src/affinity.c | 60 +--
src/applications/likwid-agent.lua | 4 +-
src/applications/likwid-features.lua | 4 +-
src/applications/likwid-genTopoCfg.lua | 4 +-
src/applications/likwid-memsweeper.lua | 4 +-
src/applications/likwid-mpirun.lua | 33 +-
src/applications/likwid-perfctr.lua | 80 ++--
src/applications/likwid-perfscope.lua | 4 +-
src/applications/likwid-pin.lua | 70 ++--
src/applications/likwid-powermeter.lua | 4 +-
src/applications/likwid-setFrequencies.lua | 160 +-------
src/applications/likwid-topology.lua | 5 +-
src/applications/likwid.lua | 4 +-
src/bitUtil.c | 4 +-
src/calculator.c | 16 +-
src/configuration.c | 4 +-
src/cpuFeatures.c | 8 +-
src/cpustring.c | 8 +-
src/frequency.c | 4 +-
src/hashTable.c | 4 +-
src/includes/access.h | 4 +-
src/includes/access_client.h | 4 +-
src/includes/access_client_types.h | 4 +-
src/includes/access_x86.h | 4 +-
src/includes/access_x86_msr.h | 4 +-
src/includes/access_x86_pci.h | 4 +-
src/includes/affinity.h | 4 +-
src/includes/bitUtil.h | 4 +-
src/includes/calculator.h | 4 +-
src/includes/calculator_stack.h | 4 +-
src/includes/configuration.h | 4 +-
src/includes/cpuFeatures.h | 6 +-
src/includes/cpuFeatures_types.h | 4 +-
src/includes/cpuid.h | 4 +-
src/includes/error.h | 4 +-
src/includes/frequency.h | 30 ++
src/includes/hashTable.h | 4 +-
src/includes/libperfctr_types.h | 4 +-
src/includes/likwid.h | 4 +-
src/includes/lock.h | 4 +-
src/includes/memsweep.h | 4 +-
src/includes/numa.h | 6 +-
src/includes/numa_hwloc.h | 4 +-
src/includes/numa_proc.h | 4 +-
src/includes/pci_hwloc.h | 4 +-
src/includes/pci_proc.h | 4 +-
src/includes/pci_types.h | 4 +-
src/includes/perfgroup.h | 4 +-
src/includes/perfmon.h | 4 +-
src/includes/perfmon_atom.h | 4 +-
src/includes/perfmon_atom_events.txt | 4 +-
src/includes/perfmon_broadwell.h | 12 +-
src/includes/perfmon_broadwellEP_counters.h | 6 +-
src/includes/perfmon_broadwellEP_events.txt | 6 +-
src/includes/perfmon_broadwell_counters.h | 6 +-
src/includes/perfmon_broadwell_events.txt | 6 +-
src/includes/perfmon_broadwelld_counters.h | 6 +-
src/includes/perfmon_broadwelld_events.txt | 6 +-
src/includes/perfmon_core2.h | 6 +-
src/includes/perfmon_core2_counters.h | 6 +-
src/includes/perfmon_core2_events.txt | 6 +-
src/includes/perfmon_goldmont.h | 4 +-
src/includes/perfmon_goldmont_counters.h | 4 +-
src/includes/perfmon_goldmont_events.txt | 4 +-
src/includes/perfmon_haswell.h | 6 +-
src/includes/perfmon_haswellEP_counters.h | 6 +-
src/includes/perfmon_haswellEP_events.txt | 6 +-
src/includes/perfmon_haswell_counters.h | 6 +-
src/includes/perfmon_haswell_events.txt | 6 +-
src/includes/perfmon_interlagos.h | 6 +-
src/includes/perfmon_interlagos_counters.h | 6 +-
src/includes/perfmon_interlagos_events.txt | 6 +-
src/includes/perfmon_ivybridge.h | 27 +-
src/includes/perfmon_ivybridgeEP_counters.h | 6 +-
src/includes/perfmon_ivybridgeEP_events.txt | 6 +-
src/includes/perfmon_ivybridge_counters.h | 6 +-
src/includes/perfmon_ivybridge_events.txt | 6 +-
src/includes/perfmon_k10.h | 6 +-
src/includes/perfmon_k10_counters.h | 6 +-
src/includes/perfmon_k10_events.txt | 6 +-
src/includes/perfmon_k8.h | 6 +-
src/includes/perfmon_k8_events.txt | 6 +-
src/includes/perfmon_kabini.h | 6 +-
src/includes/perfmon_kabini_counters.h | 6 +-
src/includes/perfmon_kabini_events.txt | 62 ++-
src/includes/perfmon_knl.h | 8 +-
src/includes/perfmon_knl_counters.h | 6 +-
src/includes/perfmon_knl_events.txt | 6 +-
src/includes/perfmon_nehalem.h | 6 +-
src/includes/perfmon_nehalemEX.h | 6 +-
src/includes/perfmon_nehalemEX_counters.h | 6 +-
src/includes/perfmon_nehalemEX_events.txt | 6 +-
src/includes/perfmon_nehalemEX_westmereEX_common.h | 6 +-
src/includes/perfmon_nehalem_counters.h | 6 +-
src/includes/perfmon_nehalem_events.txt | 6 +-
src/includes/perfmon_p6_events.txt | 6 +-
src/includes/perfmon_perf.h | 6 +-
src/includes/perfmon_perfevent.h | 6 +-
src/includes/perfmon_phi.h | 6 +-
src/includes/perfmon_phi_counters.h | 6 +-
src/includes/perfmon_phi_events.txt | 6 +-
src/includes/perfmon_pm.h | 6 +-
src/includes/perfmon_pm_counters.h | 6 +-
src/includes/perfmon_pm_events.txt | 6 +-
src/includes/perfmon_sandybridge.h | 6 +-
src/includes/perfmon_sandybridgeEP_counters.h | 6 +-
src/includes/perfmon_sandybridgeEP_events.txt | 6 +-
src/includes/perfmon_sandybridge_counters.h | 6 +-
src/includes/perfmon_sandybridge_events.txt | 6 +-
src/includes/perfmon_silvermont.h | 6 +-
src/includes/perfmon_silvermont_counters.h | 6 +-
src/includes/perfmon_silvermont_events.txt | 6 +-
src/includes/perfmon_skylake.h | 30 +-
src/includes/perfmon_skylake_counters.h | 26 +-
src/includes/perfmon_skylake_events.txt | 19 +-
src/includes/perfmon_types.h | 6 +-
src/includes/perfmon_westmere.h | 6 +-
src/includes/perfmon_westmereEX.h | 6 +-
src/includes/perfmon_westmereEX_counters.h | 6 +-
src/includes/perfmon_westmereEX_events.txt | 6 +-
src/includes/perfmon_westmere_events.txt | 6 +-
src/includes/power.h | 4 +-
src/includes/power_types.h | 4 +-
src/includes/registers.h | 44 +--
src/includes/registers_types.h | 4 +-
src/includes/textcolor.h | 4 +-
src/includes/thermal.h | 4 +-
src/includes/thermal_types.h | 4 +-
src/includes/timer.h | 4 +-
src/includes/timer_types.h | 4 +-
src/includes/tlb-info.h | 4 +-
src/includes/topology.h | 5 +-
src/includes/topology_cpuid.h | 4 +-
src/includes/topology_hwloc.h | 4 +-
src/includes/topology_proc.h | 4 +-
src/includes/topology_types.h | 4 +-
src/includes/tree.h | 4 +-
src/includes/tree_types.h | 4 +-
src/includes/types.h | 4 +-
src/libperfctr.c | 39 +-
src/likwid.f90 | 6 +-
src/likwid_f90_interface.c | 4 +-
src/luawid.c | 69 ++--
src/memsweep.c | 4 +-
src/numa.c | 6 +-
src/numa_hwloc.c | 7 +-
src/numa_proc.c | 9 +-
src/pci_hwloc.c | 4 +-
src/pci_proc.c | 4 +-
src/perfgroup.c | 98 ++---
src/perfmon.c | 27 +-
src/perfmon_perf.c | 4 +-
src/power.c | 5 +-
src/pthread-overload/Makefile | 4 +-
src/pthread-overload/pthread-overload.c | 38 +-
src/thermal.c | 4 +-
src/timer.c | 4 +-
src/topology.c | 10 +-
src/topology_cpuid.c | 4 +-
src/topology_hwloc.c | 8 +-
src/topology_proc.c | 4 +-
src/tree.c | 4 +-
test/stream-API.c | 437 ---------------------
test/stream.c | 249 ------------
test/stream.cc | 227 -----------
test/stream_cilk.c | 217 ----------
202 files changed, 1069 insertions(+), 2080 deletions(-)
diff --git a/Makefile b/Makefile
index bc126b7..aa08122 100644
--- a/Makefile
+++ b/Makefile
@@ -4,13 +4,13 @@
#
# Description: Central Makefile
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
#
-# Copyright (C) 2016 Jan Treibig
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
@@ -248,11 +248,11 @@ ifneq ($(COMPILER),MIC)
install_daemon:
@echo "===> INSTALL access daemon to $(ACCESSDAEMON)"
@mkdir -p `dirname $(ACCESSDAEMON)`
- @install -m 4775 $(INSTALL_CHOWN) $(DAEMON_TARGET) $(ACCESSDAEMON)
+ @install -m 4755 $(INSTALL_CHOWN) $(DAEMON_TARGET) $(ACCESSDAEMON)
move_daemon:
@echo "===> MOVE access daemon from $(ACCESSDAEMON) to $(INSTALLED_ACCESSDAEMON)"
@mkdir -p `dirname $(INSTALLED_ACCESSDAEMON)`
- @install -m 4775 $(INSTALL_CHOWN) $(ACCESSDAEMON) $(INSTALLED_ACCESSDAEMON)
+ @install -m 4755 $(INSTALL_CHOWN) $(ACCESSDAEMON) $(INSTALLED_ACCESSDAEMON)
uninstall_daemon:
@echo "===> REMOVING access daemon from $(ACCESSDAEMON)"
@rm -f $(ACCESSDAEMON)
@@ -285,11 +285,11 @@ ifneq ($(COMPILER),MIC)
install_freq:
@echo "===> INSTALL setFrequencies tool to $(PREFIX)/sbin/$(FREQ_TARGET)"
@mkdir -p $(PREFIX)/sbin
- @install -m 4775 $(INSTALL_CHOWN) $(FREQ_TARGET) $(PREFIX)/sbin/$(FREQ_TARGET)
+ @install -m 4755 $(INSTALL_CHOWN) $(FREQ_TARGET) $(PREFIX)/sbin/$(FREQ_TARGET)
move_freq:
@echo "===> MOVE setFrequencies tool from $(PREFIX)/sbin/$(FREQ_TARGET) to $(INSTALLED_PREFIX)/sbin/$(FREQ_TARGET)"
@mkdir -p $(INSTALLED_PREFIX)/sbin
- @install -m 4775 $(INSTALL_CHOWN) $(PREFIX)/sbin/$(FREQ_TARGET) $(INSTALLED_PREFIX)/sbin/$(FREQ_TARGET)
+ @install -m 4755 $(INSTALL_CHOWN) $(PREFIX)/sbin/$(FREQ_TARGET) $(INSTALLED_PREFIX)/sbin/$(FREQ_TARGET)
uninstall_freq:
@echo "===> REMOVING setFrequencies tool from $(PREFIX)/sbin/$(FREQ_TARGET)"
@rm -f $(PREFIX)/sbin/$(FREQ_TARGET)
@@ -320,7 +320,7 @@ endif
install: install_daemon install_freq
@echo "===> INSTALL applications to $(BINPREFIX)"
@mkdir -p $(BINPREFIX)
- @chmod 775 $(BINPREFIX)
+ @chmod 755 $(BINPREFIX)
@for APP in $(L_APPS); do \
install -m 755 $$APP $(BINPREFIX); \
done
@@ -334,11 +334,11 @@ install: install_daemon install_freq
@install -m 755 perl/feedGnuplot $(BINPREFIX)
@echo "===> INSTALL lua to likwid interface to $(PREFIX)/share/lua"
@mkdir -p $(PREFIX)/share/lua
- @chmod 775 $(PREFIX)/share/lua
+ @chmod 755 $(PREFIX)/share/lua
@install -m 644 likwid.lua $(PREFIX)/share/lua
@echo "===> INSTALL libraries to $(LIBPREFIX)"
@mkdir -p $(LIBPREFIX)
- @chmod 775 $(LIBPREFIX)
+ @chmod 755 $(LIBPREFIX)
@install -m 755 $(TARGET_LIB) $(LIBPREFIX)/$(TARGET_LIB).$(VERSION).$(RELEASE)
@install -m 755 liblikwidpin.so $(LIBPREFIX)/liblikwidpin.so.$(VERSION).$(RELEASE)
@install -m 755 $(TARGET_HWLOC_LIB) $(LIBPREFIX)/$(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE)
@@ -357,7 +357,7 @@ install: install_daemon install_freq
fi
@echo "===> INSTALL man pages to $(MANPREFIX)/man1"
@mkdir -p $(MANPREFIX)/man1
- @chmod 775 $(MANPREFIX)/man1
+ @chmod 755 $(MANPREFIX)/man1
@sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-topology.1 > $(MANPREFIX)/man1/likwid-topology.1
@sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" -e "s+<PREFIX>+$(PREFIX)+g" < $(DOC_DIR)/likwid-perfctr.1 > $(MANPREFIX)/man1/likwid-perfctr.1
@sed -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" < $(DOC_DIR)/likwid-powermeter.1 > $(MANPREFIX)/man1/likwid-powermeter.1
@@ -377,28 +377,28 @@ install: install_daemon install_freq
@chmod 644 $(MANPREFIX)/man1/likwid-*
@echo "===> INSTALL headers to $(PREFIX)/include"
@mkdir -p $(PREFIX)/include
- @chmod 775 $(PREFIX)/include
+ @chmod 755 $(PREFIX)/include
@install -m 644 src/includes/likwid.h $(PREFIX)/include/
@install -m 644 src/includes/bstrlib.h $(PREFIX)/include/
$(FORTRAN_INSTALL)
@echo "===> INSTALL groups to $(PREFIX)/share/likwid/perfgroups"
@mkdir -p $(PREFIX)/share/likwid/perfgroups
- @chmod 775 $(PREFIX)/share/likwid
- @chmod 775 $(PREFIX)/share/likwid/perfgroups
+ @chmod 755 $(PREFIX)/share/likwid
+ @chmod 755 $(PREFIX)/share/likwid/perfgroups
@cp -rf groups/* $(PREFIX)/share/likwid/perfgroups
- @chmod 775 $(PREFIX)/share/likwid/perfgroups/*
+ @chmod 755 $(PREFIX)/share/likwid/perfgroups/*
@find $(PREFIX)/share/likwid/perfgroups -name "*.txt" -exec chmod 644 {} \;
@echo "===> INSTALL monitoring groups to $(PREFIX)/share/likwid/mongroups"
@mkdir -p $(PREFIX)/share/likwid/mongroups
- @chmod 775 $(PREFIX)/share/likwid/mongroups
+ @chmod 755 $(PREFIX)/share/likwid/mongroups
@cp -rf monitoring/groups/* $(PREFIX)/share/likwid/mongroups
- @chmod 775 $(PREFIX)/share/likwid/mongroups/*
+ @chmod 755 $(PREFIX)/share/likwid/mongroups/*
@find $(PREFIX)/share/likwid/mongroups -name "*.txt" -exec chmod 644 {} \;
@mkdir -p $(PREFIX)/share/likwid/docs
- @chmod 775 $(PREFIX)/share/likwid/docs
+ @chmod 755 $(PREFIX)/share/likwid/docs
@install -m 644 doc/bstrlib.txt $(PREFIX)/share/likwid/docs
@mkdir -p $(PREFIX)/share/likwid/examples
- @chmod 775 $(PREFIX)/share/likwid/examples
+ @chmod 755 $(PREFIX)/share/likwid/examples
@install -m 644 examples/* $(PREFIX)/share/likwid/examples
@echo "===> INSTALL default likwid-agent.conf to $(PREFIX)/share/likwid/mongroups"
@sed -e "s+<PREFIX>+$(PREFIX)+g" monitoring/likwid-agent.conf > $(PREFIX)/share/likwid/mongroups/likwid-agent.conf
@@ -413,7 +413,7 @@ install: install_daemon install_freq
move: move_daemon move_freq
@echo "===> MOVE applications from $(BINPREFIX) to $(INSTALLED_BINPREFIX)"
@mkdir -p $(INSTALLED_BINPREFIX)
- @chmod 775 $(INSTALLED_BINPREFIX)
+ @chmod 755 $(INSTALLED_BINPREFIX)
@for APP in $(L_APPS); do \
install -m 755 $(BINPREFIX)/$$APP $(INSTALLED_BINPREFIX); \
done
@@ -425,11 +425,11 @@ move: move_daemon move_freq
@install -m 755 $(BINPREFIX)/feedGnuplot $(INSTALLED_BINPREFIX)
@echo "===> MOVE lua to likwid interface from $(PREFIX)/share/lua to $(INSTALLED_PREFIX)/share/lua"
@mkdir -p $(INSTALLED_PREFIX)/share/lua
- @chmod 775 $(INSTALLED_PREFIX)/share/lua
+ @chmod 755 $(INSTALLED_PREFIX)/share/lua
@install -m 644 $(PREFIX)/share/lua/likwid.lua $(INSTALLED_PREFIX)/share/lua
@echo "===> MOVE libraries from $(LIBPREFIX) to $(INSTALLED_LIBPREFIX)"
@mkdir -p $(INSTALLED_LIBPREFIX)
- @chmod 775 $(INSTALLED_LIBPREFIX)
+ @chmod 755 $(INSTALLED_LIBPREFIX)
@install -m 755 $(LIBPREFIX)/$(TARGET_LIB).$(VERSION).$(RELEASE) $(INSTALLED_LIBPREFIX)/$(TARGET_LIB).$(VERSION).$(RELEASE)
@install -m 755 $(LIBPREFIX)/$(PINLIB).$(VERSION).$(RELEASE) $(INSTALLED_LIBPREFIX)/$(PINLIB).$(VERSION).$(RELEASE)
@install -m 755 $(LIBPREFIX)/$(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE) $(INSTALLED_LIBPREFIX)/$(shell basename $(TARGET_HWLOC_LIB)).$(VERSION).$(RELEASE)
@@ -444,32 +444,32 @@ move: move_daemon move_freq
@cd $(INSTALLED_LIBPREFIX) && ln -fs $(shell basename $(TARGET_LUA_LIB)).$(VERSION).$(RELEASE) $(shell basename $(TARGET_LUA_LIB)).$(VERSION)
@echo "===> MOVE man pages from $(MANPREFIX)/man1 to $(INSTALLED_MANPREFIX)/man1"
@mkdir -p $(INSTALLED_MANPREFIX)/man1
- @chmod 775 $(INSTALLED_MANPREFIX)/man1
+ @chmod 755 $(INSTALLED_MANPREFIX)/man1
@install -m 644 $(MANPREFIX)/man1/*.1 $(INSTALLED_MANPREFIX)/man1
@echo "===> MOVE headers from $(PREFIX)/include to $(INSTALLED_PREFIX)/include"
@mkdir -p $(INSTALLED_PREFIX)/include
- @chmod 775 $(INSTALLED_PREFIX)/include
+ @chmod 755 $(INSTALLED_PREFIX)/include
@install -m 644 $(PREFIX)/include/likwid.h $(INSTALLED_PREFIX)/include/likwid.h
@install -m 644 $(PREFIX)/include/bstrlib.h $(INSTALLED_PREFIX)/include/bstrlib.h
@if [ -e $(PREFIX)/include/likwid.mod ]; then install $(PREFIX)/include/likwid.mod $(INSTALLED_PREFIX)/include/likwid.mod; fi
@echo "===> MOVE groups from $(PREFIX)/share/likwid/perfgroups to $(INSTALLED_PREFIX)/share/likwid/perfgroups"
@mkdir -p $(INSTALLED_PREFIX)/share/likwid/perfgroups
- @chmod 775 $(INSTALLED_PREFIX)/share/likwid
- @chmod 775 $(INSTALLED_PREFIX)/share/likwid/perfgroups
+ @chmod 755 $(INSTALLED_PREFIX)/share/likwid
+ @chmod 755 $(INSTALLED_PREFIX)/share/likwid/perfgroups
@cp -rf $(PREFIX)/share/likwid/perfgroups/* $(INSTALLED_PREFIX)/share/likwid/perfgroups
- @chmod 775 $(INSTALLED_PREFIX)/share/likwid/perfgroups/*
+ @chmod 755 $(INSTALLED_PREFIX)/share/likwid/perfgroups/*
@find $(INSTALLED_PREFIX)/share/likwid/perfgroups -name "*.txt" -exec chmod 644 {} \;
@echo "===> MOVE monitoring groups from $(PREFIX)/share/likwid/mongroups to $(INSTALLED_PREFIX)/share/likwid/mongroups"
@mkdir -p $(INSTALLED_PREFIX)/share/likwid/mongroups
- @chmod 775 $(INSTALLED_PREFIX)/share/likwid/mongroups
+ @chmod 755 $(INSTALLED_PREFIX)/share/likwid/mongroups
@cp -rf $(PREFIX)/share/likwid/mongroups/* $(INSTALLED_PREFIX)/share/likwid/mongroups
- @chmod 775 $(INSTALLED_PREFIX)/share/likwid/mongroups/*
+ @chmod 755 $(INSTALLED_PREFIX)/share/likwid/mongroups/*
@find $(INSTALLED_PREFIX)/share/likwid/mongroups -name "*.txt" -exec chmod 644 {} \;
@mkdir -p $(INSTALLED_PREFIX)/share/likwid/docs
- @chmod 775 $(INSTALLED_PREFIX)/share/likwid/docs
+ @chmod 755 $(INSTALLED_PREFIX)/share/likwid/docs
@install -m 644 $(PREFIX)/share/likwid/docs/bstrlib.txt $(INSTALLED_PREFIX)/share/likwid/docs
@mkdir -p $(INSTALLED_PREFIX)/share/likwid/examples
- @chmod 775 $(INSTALLED_PREFIX)/share/likwid/examples
+ @chmod 755 $(INSTALLED_PREFIX)/share/likwid/examples
@install -m 644 examples/* $(INSTALLED_PREFIX)/share/likwid/examples
@echo "===> MOVE default likwid-agent.conf from $(PREFIX)/share/likwid/mongroups to $(INSTALLED_PREFIX)/share/likwid/mongroups"
@install $(PREFIX)/share/likwid/mongroups/likwid-agent.conf $(INSTALLED_PREFIX)/share/likwid/mongroups/likwid-agent.conf
diff --git a/bench/includes/allocator.h b/bench/includes/allocator.h
index bb1da23..09f359b 100644
--- a/bench/includes/allocator.h
+++ b/bench/includes/allocator.h
@@ -5,8 +5,8 @@
*
* Description: Header File allocator Module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: none
diff --git a/bench/includes/allocator_types.h b/bench/includes/allocator_types.h
index 0f3aae9..76a6eb7 100644
--- a/bench/includes/allocator_types.h
+++ b/bench/includes/allocator_types.h
@@ -5,8 +5,8 @@
*
* Description: Header File types of allocator Module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: none
diff --git a/bench/includes/barrier.h b/bench/includes/barrier.h
index bb9b969..9832ece 100644
--- a/bench/includes/barrier.h
+++ b/bench/includes/barrier.h
@@ -5,8 +5,8 @@
*
* Description: Header File barrier Module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/includes/barrier_types.h b/bench/includes/barrier_types.h
index 8ec038b..d5b92d0 100644
--- a/bench/includes/barrier_types.h
+++ b/bench/includes/barrier_types.h
@@ -5,8 +5,8 @@
*
* Description: Type Definitions for barrier Module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/includes/strUtil.h b/bench/includes/strUtil.h
index 6672237..0b1e502 100644
--- a/bench/includes/strUtil.h
+++ b/bench/includes/strUtil.h
@@ -4,8 +4,8 @@
*
* Description: Some sting functions
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/includes/test_types.h b/bench/includes/test_types.h
index b4080d1..ffb11bc 100644
--- a/bench/includes/test_types.h
+++ b/bench/includes/test_types.h
@@ -5,8 +5,8 @@
*
* Description: Type definitions for benchmarking framework
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/includes/threads.h b/bench/includes/threads.h
index f0953b5..bca27af 100644
--- a/bench/includes/threads.h
+++ b/bench/includes/threads.h
@@ -5,8 +5,8 @@
*
* Description: Header file of pthread interface module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/includes/threads_types.h b/bench/includes/threads_types.h
index 5ddab9b..65c951d 100644
--- a/bench/includes/threads_types.h
+++ b/bench/includes/threads_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for threads module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/likwid-bench.c b/bench/likwid-bench.c
index 28d40a9..3d66203 100644
--- a/bench/likwid-bench.c
+++ b/bench/likwid-bench.c
@@ -5,8 +5,8 @@
*
* Description: A flexible and extensible benchmarking toolbox
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -518,19 +518,31 @@ int main(int argc, char** argv)
ownprintf("MByte/s:\t\t%.2f\n",
1.0E-06 * ( (double) threads_data[0].data.iter * realSize * test->bytes/ time));
- cycPerCL = ((double) maxCycles / (double) (threads_data[0].data.iter*(realSize/test->streams)* 8 / 64.0));
- switch ( test->type )
+ size_t destsize = 0;
+ size_t datasize = 0;
+ double perUpFactor = 0.0;
+ switch (test->type)
{
case INT:
+ datasize = test->bytes/sizeof(int);
+ destsize = test->bytes/test->streams;
+ perUpFactor = (64.0/sizeof(int));
+ break;
case SINGLE:
- cycPerUp = cycPerCL/(16.0*test->streams);
+ datasize = test->bytes/sizeof(float);
+ destsize = test->bytes/test->streams;
+ perUpFactor = (64.0/sizeof(float));
break;
case DOUBLE:
- cycPerUp = cycPerCL/(8.0*test->streams);
+ datasize = test->bytes/sizeof(double);
+ destsize = test->bytes/test->streams;
+ perUpFactor = (64.0/sizeof(double));
break;
}
- ownprintf("Cycles per update:\t%f\n", cycPerUp);
+ cycPerCL = (double) maxCycles/(threads_data[0].data.iter*realSize*destsize/64);
+ ownprintf("Cycles per update:\t%f\n", cycPerCL/perUpFactor);
ownprintf("Cycles per cacheline:\t%f\n", cycPerCL);
+
ownprintf("Loads per update:\t%ld\n", test->loads );
ownprintf("Stores per update:\t%ld\n", test->stores );
if (test->loads > 0 && test->stores > 0)
diff --git a/bench/src/allocator.c b/bench/src/allocator.c
index 3c37755..f730b60 100644
--- a/bench/src/allocator.c
+++ b/bench/src/allocator.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of allocator module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/src/barrier.c b/bench/src/barrier.c
index 4abe827..c7bf6d6 100644
--- a/bench/src/barrier.c
+++ b/bench/src/barrier.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of threaded spin loop barrier
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/src/bench.c b/bench/src/bench.c
index 83b89d7..a6a9212 100644
--- a/bench/src/bench.c
+++ b/bench/src/bench.c
@@ -5,8 +5,8 @@
*
* Description: Benchmarking framework for likwid-bench
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/bench/src/strUtil.c b/bench/src/strUtil.c
index c9e1c7a..0f6074e 100644
--- a/bench/src/strUtil.c
+++ b/bench/src/strUtil.c
@@ -5,8 +5,8 @@
*
* Description: Utility string routines building upon bstrlib
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com.
* Project: likwid
diff --git a/bench/src/threads.c b/bench/src/threads.c
index 8e99a77..76225b7 100644
--- a/bench/src/threads.c
+++ b/bench/src/threads.c
@@ -5,8 +5,8 @@
*
* Description: High level interface to pthreads
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/bench/x86-64/sum_avx512.ptt b/bench/x86-64/sum_avx512.ptt
index 081701b..c01ecf5 100644
--- a/bench/x86-64/sum_avx512.ptt
+++ b/bench/x86-64/sum_avx512.ptt
@@ -8,23 +8,22 @@ STORES 0
INSTR_CONST 24
INSTR_LOOP 11
UOPS 18
-vxorpd FPR9, FPR9, FPR9
-vxorpd FPR1, FPR1, FPR1
-vmovapd FPR2, FPR1
-vmovapd FPR3, FPR1
-vmovapd FPR4, FPR1
-vmovapd FPR5, FPR1
-vmovapd FPR6, FPR1
-vmovapd FPR7, FPR1
-vmovapd FPR8, FPR1
+vpxorq zmm1, zmm1, zmm1
+vmovapd zmm2, zmm1
+vmovapd zmm3, zmm1
+vmovapd zmm4, zmm1
+vmovapd zmm5, zmm1
+vmovapd zmm6, zmm1
+vmovapd zmm7, zmm1
+vmovapd zmm8, zmm1
LOOP 64
-vaddpd FPR1, FPR1, [STR0 + GPR1 * 8]
-vaddpd FPR2, FPR2, [STR0 + GPR1 * 8 + 64]
-vaddpd FPR3, FPR3, [STR0 + GPR1 * 8 + 128]
-vaddpd FPR4, FPR4, [STR0 + GPR1 * 8 + 192]
-vaddpd FPR5, FPR5, [STR0 + GPR1 * 8 + 256]
-vaddpd FPR6, FPR6, [STR0 + GPR1 * 8 + 320]
-vaddpd FPR7, FPR7, [STR0 + GPR1 * 8 + 384]
-vaddpd FPR8, FPR8, [STR0 + GPR1 * 8 + 448]
+vaddpd zmm1, zmm1, [STR0 + GPR1 * 8]
+vaddpd zmm2, zmm2, [STR0 + GPR1 * 8 + 64]
+vaddpd zmm3, zmm3, [STR0 + GPR1 * 8 + 128]
+vaddpd zmm4, zmm4, [STR0 + GPR1 * 8 + 192]
+vaddpd zmm5, zmm5, [STR0 + GPR1 * 8 + 256]
+vaddpd zmm6, zmm6, [STR0 + GPR1 * 8 + 320]
+vaddpd zmm7, zmm7, [STR0 + GPR1 * 8 + 384]
+vaddpd zmm8, zmm8, [STR0 + GPR1 * 8 + 448]
diff --git a/config.mk b/config.mk
index d12c19c..ead3571 100644
--- a/config.mk
+++ b/config.mk
@@ -93,6 +93,6 @@ DATE = 22.12.2016
RPATHS = -Wl,-rpath=$(INSTALLED_LIBPREFIX)
LIKWIDLOCKPATH = /var/run/likwid.lock
LIKWIDSOCKETBASE = /tmp/likwid # -%d will be added automatically to the socket name
-LIBLIKWIDPIN = $(abspath $(INSTALLED_PREFIX)/lib/liblikwidpin.so.$(VERSION).$(RELEASE))
+LIBLIKWIDPIN = $(abspath $(INSTALLED_LIBPREFIX)/liblikwidpin.so.$(VERSION).$(RELEASE))
LIKWIDFILTERPATH = $(abspath $(INSTALLED_PREFIX)/share/likwid/filter)
LIKWIDGROUPPATH = $(abspath $(INSTALLED_PREFIX)/share/likwid/perfgroups)
diff --git a/doc/likwid-pin.1 b/doc/likwid-pin.1
index 264d5cc..a2ccf0e 100644
--- a/doc/likwid-pin.1
+++ b/doc/likwid-pin.1
@@ -175,6 +175,13 @@ A common use-case for the numbering by expression is pinning of an application o
This command schedules one thread per physical CPU core for
.B ./myApp.
+.SH IMPORTANT NOTICE
+The detection of shepard threads works for Intel's/LLVM OpenMP runtime (>=12.0), for GCC's OpenMP untime as well as for PGI's OpenMP runtime. If you encounter problems with pinning,
+please set a proper skip mask to skip the not-detected shepard threads.
+Intel OpenMP runtime 11.0/11.1 requires to set a skip mask of
+.B 0x1.
+
+
.SH AUTHOR
Written by Thomas Roehl <thomas.roehl at googlemail.com>.
.SH BUGS
diff --git a/examples/C-likwidAPI.c b/examples/C-likwidAPI.c
index d33b1cf..5faa752 100644
--- a/examples/C-likwidAPI.c
+++ b/examples/C-likwidAPI.c
@@ -5,13 +5,13 @@
*
* Description: Example how to use the LIKWID API in C/C++ applications
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/examples/C-markerAPI.c b/examples/C-markerAPI.c
index 3faaccc..3a722c1 100644
--- a/examples/C-markerAPI.c
+++ b/examples/C-markerAPI.c
@@ -5,13 +5,13 @@
*
* Description: Example how to use the C/C++ Marker API
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +30,7 @@
#include <stdlib.h>
#include <stdio.h>
+#include <unistd.h>
#include <omp.h>
#include <likwid.h>
@@ -38,7 +39,7 @@
int main(int argc, char* argv[])
{
- int i;
+ int i, g;
int nevents = 10;
double events[10];
double time;
@@ -54,29 +55,34 @@ int main(int argc, char* argv[])
LIKWID_MARKER_REGISTER("example");
}
-
- #pragma omp parallel
+ // perfmon_getNumberOfGroups is not part of the MarkerAPI,
+ // it belongs to the normal LIKWID API. But the MarkerAPI
+ // has no function to get the number of configured groups.
+ for (g=0;g < perfmon_getNumberOfGroups(); g++)
{
- printf("Thread %d sleeps now for %d seconds\n", omp_get_thread_num(), SLEEPTIME);
- // Start measurements inside a parallel region
- LIKWID_MARKER_START("example");
- // Insert your code here.
- // Often contains an OpenMP for pragma. Regions can be nested.
- sleep(SLEEPTIME);
- // Stop measurements inside a parallel region
- LIKWID_MARKER_STOP("example");
- printf("Thread %d wakes up again\n", omp_get_thread_num());
- // If multiple groups given, you can switch to the next group
- LIKWID_MARKER_SWITCH;
- // If you need the performance data inside your application, use
- LIKWID_MARKER_GET("example", &nevents, events, &time, &count);
- // where events is an array of doubles with nevents entries,
- // time is a double* and count an int*.
- printf("Region example measures %d events, total measurement time is %f\n", nevents, time);
- printf("The region was called %d times\n", count);
- for (i = 0; i < nevents; i++)
+ #pragma omp parallel
{
- printf("Event %d: %f\n", i, events[i]);
+ printf("Thread %d sleeps now for %d seconds\n", omp_get_thread_num(), SLEEPTIME);
+ // Start measurements inside a parallel region
+ LIKWID_MARKER_START("example");
+ // Insert your code here.
+ // Often contains an OpenMP for pragma. Regions can be nested.
+ sleep(SLEEPTIME);
+ // Stop measurements inside a parallel region
+ LIKWID_MARKER_STOP("example");
+ printf("Thread %d wakes up again\n", omp_get_thread_num());
+ // If you need the performance data inside your application, use
+ LIKWID_MARKER_GET("example", &nevents, events, &time, &count);
+ // where events is an array of doubles with nevents entries,
+ // time is a double* and count an int*.
+ printf("Region example measures %d events, total measurement time is %f\n", nevents, time);
+ printf("The region was called %d times\n", count);
+ for (i = 0; i < nevents; i++)
+ {
+ printf("Event %d: %f\n", i, events[i]);
+ }
+ // If multiple groups given, you can switch to the next group
+ LIKWID_MARKER_SWITCH;
}
}
diff --git a/examples/F-markerAPI.F90 b/examples/F-markerAPI.F90
index 30a0457..c1eb5e7 100644
--- a/examples/F-markerAPI.F90
+++ b/examples/F-markerAPI.F90
@@ -4,13 +4,13 @@
!
! Description: Example how to use the Fortran90 Marker API
!
-! Version: <VERSION>
-! Released: <DATE>
+! Version: 4.2
+! Released: 22.12.2016
!
! Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
! Project: likwid
!
-! Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+! Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
!
! This program is free software: you can redistribute it and/or modify it under
! the terms of the GNU General Public License as published by the Free Software
diff --git a/examples/Lua-likwidAPI.lua b/examples/Lua-likwidAPI.lua
index d5d4ca2..3ad8729 100644
--- a/examples/Lua-likwidAPI.lua
+++ b/examples/Lua-likwidAPI.lua
@@ -7,13 +7,13 @@
*
* Description: Example how to use the LIKWID API in Lua scripts
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
@@ -73,8 +73,12 @@ if likwid.startCounters() < 0 then
likwid.putTopology()
os.exit(1)
end
+
+
-- Application code
likwid.sleep(2)
+
+
if likwid.stopCounters() < 0 then
printf(string.format("Failed to stop group %d in LIKWID's performance monitoring module\n", gid))
likwid.finalize()
@@ -89,5 +93,4 @@ for i,cpu in pairs(cpus) do
end
-likwid.putTopology()
likwid.finalize()
diff --git a/examples/Makefile b/examples/Makefile
index cc21c3c..c271574 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -2,10 +2,13 @@
include ../config.mk
include ../make/include_$(COMPILER).mk
+LIKWID_BINDIR ?= $(PREFIX)/bin
LIKWID_INCLUDE ?= -I$(PREFIX)/include
-LIKWID_LIB ?= -L$(PREFIX)/lib -llikwid
+LIKWID_LIBDIR ?= -L$(PREFIX)/lib
+LIKWID_LIB ?= -llikwid
+LIKWID_DEFINES ?= -DLIKWID_PERFMON
-all: C-markerAPI C-likwidAPI F-markerAPI Lua-likwidAPI C-markerAPI-run C-likwidAPI-run F-markerAPI-run Lua-likwidAPI-run
+all: C-markerAPI C-likwidAPI F-markerAPI Lua-likwidAPI C-markerAPI-run C-likwidAPI-run Lua-likwidAPI-run F-markerAPI-run
help:
@echo "Help message for examples included in LIKWID"
@@ -22,34 +25,34 @@ help:
@echo "To run the built example append '-run' to the name and add it to make: make C-likwidAPI-run"
C-markerAPI:
- $(CC) -fopenmp -DLIKWID_PERFMON -I$(PREFIX)/include -L$(PREFIX)/lib C-markerAPI.c -o C-markerAPI -llikwid -lm
+ $(CC) -fopenmp $(LIKWID_DEFINES) $(LIKWID_INCLUDE) $(LIKWID_LIBDIR) C-markerAPI.c -o C-markerAPI $(LIKWID_LIB) -lm
C-markerAPI-run: C-markerAPI
- $(PREFIX)/bin/likwid-perfctr -C 0 -g INSTR_RETIRED_ANY:FIXC0 -m ./C-markerAPI
+ $(LIKWID_BINDIR)/likwid-perfctr -C 0 -g INSTR_RETIRED_ANY:FIXC0 -m ./C-markerAPI
C-likwidAPI:
- $(CC) -fopenmp -I$(PREFIX)/include -L$(PREFIX)/lib C-likwidAPI.c -o C-likwidAPI -llikwid -lm
+ $(CC) -fopenmp $(LIKWID_INCLUDE) $(LIKWID_LIBDIR) C-likwidAPI.c -o C-likwidAPI $(LIKWID_LIB) -lm
C-likwidAPI-run: C-likwidAPI
./C-likwidAPI
C-internalMarkerAPI:
- $(CC) -g -fopenmp -DLIKWID_PERFMON -I$(PREFIX)/include -L$(PREFIX)/lib C-internalMarkerAPI.c -o C-internalMarkerAPI -llikwid -lm
+ $(CC) -g -fopenmp $(LIKWID_DEFINES) $(LIKWID_INCLUDE) $(LIKWID_LIBDIR) C-internalMarkerAPI.c -o C-internalMarkerAPI $(LIKWID_LIB) -lm
C-internalMarkerAPI-run: C-internalMarkerAPI
OMP_NUM_THREADS=3 ./C-internalMarkerAPI
monitoring:
- $(CC) -I$(PREFIX)/include -L$(PREFIX)/lib monitoring.c -o monitoring -llikwid -lm
+ $(CC) $(LIKWID_INCLUDE) $(LIKWID_LIBDIR) monitoring.c -o monitoring $(LIKWID_LIB) -lm
monitoring-run: monitoring
./monitoring
-F-markerAPI:
- $(FC) -fopenmp -DLIKWID_PERFMON -I$(PREFIX) -L$(PREFIX) F-markerAPI.F90 -o F-markerAPI -llikwid -lm
-
+F-markerAPI: F-markerAPI.F90
+ @if [ -x $(FC) ]; then $(FC) -fopenmp $(LIKWID_DEFINES) $(LIKWID_INCLUDE) $(LIKWID_LIBDIR) F-markerAPI.F90 -o F-markerAPI $(LIKWID_LIB) -lm; else echo "No Fortran compiler found"; fi
F-markerAPI-run: F-markerAPI
- $(PREFIX)/bin/likwid-perfctr -C 0 -g INSTR_RETIRED_ANY:FIXC0 -m ./F-markerAPI
+ @if [ -x F-markerAPI ]; then $(LIKWID_BINDIR)/likwid-perfctr -C 0 -g INSTR_RETIRED_ANY:FIXC0 -m ./F-markerAPI; else echo "No executable F-markerAPI found"; fi
+
Lua-likwidAPI:
sed -e "s+<PREFIX>+$(PREFIX)+g" Lua-likwidAPI.lua > Lua-likwidAPI
@@ -62,3 +65,4 @@ clean:
rm -f C-markerAPI C-likwidAPI F-markerAPI Lua-likwidAPI monitoring C-internalMarkerAPI
.PHONY: clean C-markerAPI C-likwidAPI F-markerAPI Lua-likwidAPI monitoring C-internalMarkerAPI
+
diff --git a/groups/broadwell/L3.txt b/groups/broadwell/L3.txt
index 4026f85..7d84636 100644
--- a/groups/broadwell/L3.txt
+++ b/groups/broadwell/L3.txt
@@ -17,7 +17,7 @@ L3 load data volume [GBytes] 1.0E-09*PMC0*64.0
L3 evict bandwidth [MBytes/s] 1.0E-06*PMC1*64.0/time
L3 evict data volume [GBytes] 1.0E-09*PMC1*64.0
L3 bandwidth [MBytes/s] 1.0E-06*(PMC0+PMC1)*64.0/time
-L2 data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0
+L3 data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0
LONG
Formulas:
@@ -26,7 +26,7 @@ L3 load data volume [GBytes] = 1.0E-09*L2_LINES_IN_ALL*64.0
L3 evict bandwidth [MBytes/s] = 1.0E-06*L2_LINES_OUT_DEMAND_DIRTY*64.0/time
L3 evict data volume [GBytes] = 1.0E-09*L2_LINES_OUT_DEMAND_DIRTY*64.0
L3 bandwidth [MBytes/s] = 1.0E-06*(L2_LINES_IN_ALL+L2_LINES_OUT_DEMAND_DIRTY)*64/time
-L2 data volume [GBytes] = 1.0E-09*(L2_LINES_IN_ALL+L2_LINES_OUT_DEMAND_DIRTY)*64
+L3 data volume [GBytes] = 1.0E-09*(L2_LINES_IN_ALL+L2_LINES_OUT_DEMAND_DIRTY)*64
-
Profiling group to measure L3 cache bandwidth. The bandwidth is computed by the
number of cache line allocated in the L2 and the number of modified cache lines
diff --git a/groups/knl/ENERGY.txt b/groups/knl/ENERGY.txt
index 9fd5045..df8092d 100644
--- a/groups/knl/ENERGY.txt
+++ b/groups/knl/ENERGY.txt
@@ -19,8 +19,8 @@ Energy [J] PWR0
Power [W] PWR0/time
Energy PP0 [J] PWR1
Power PP0 [W] PWR1/time
-Energy DRAM [J] PWR1
-Power DRAM [W] PWR1/time
+Energy DRAM [J] PWR3
+Power DRAM [W] PWR3/time
LONG
Formula:
diff --git a/groups/sandybridge/L3CACHE.txt b/groups/sandybridge/L3CACHE.txt
index d15e6e3..3926518 100644
--- a/groups/sandybridge/L3CACHE.txt
+++ b/groups/sandybridge/L3CACHE.txt
@@ -15,13 +15,13 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
L3 request rate PMC1:MATCH0=0x0081:MATCH1=0x1/FIXC0
L3 miss rate PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/FIXC0
-L3 miss ratio PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/PMC1:MATCH0=0081:MATCH1=0x1
+L3 miss ratio PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/PMC1:MATCH0=0x0081:MATCH1=0x1
LONG
Formulas:
L3 request rate = OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x0081:MATCH1=0x1/INSTR_RETIRED_ANY
L3 miss rate = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/INSTR_RETIRED_ANY
-L3 miss ratio = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0081:MATCH1=0x1
+L3 miss ratio = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x0081:MATCH1=0x1
-
This group measures the locality of your data accesses with regard to the
L3 cache. L3 request rate tells you how data intensive your code is
diff --git a/groups/sandybridgeEP/L3CACHE.txt b/groups/sandybridgeEP/L3CACHE.txt
index d15e6e3..3926518 100644
--- a/groups/sandybridgeEP/L3CACHE.txt
+++ b/groups/sandybridgeEP/L3CACHE.txt
@@ -15,13 +15,13 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
L3 request rate PMC1:MATCH0=0x0081:MATCH1=0x1/FIXC0
L3 miss rate PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/FIXC0
-L3 miss ratio PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/PMC1:MATCH0=0081:MATCH1=0x1
+L3 miss ratio PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/PMC1:MATCH0=0x0081:MATCH1=0x1
LONG
Formulas:
L3 request rate = OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x0081:MATCH1=0x1/INSTR_RETIRED_ANY
L3 miss rate = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/INSTR_RETIRED_ANY
-L3 miss ratio = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0081:MATCH1=0x1
+L3 miss ratio = OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x0081:MATCH1=0x3fffc0/OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x0081:MATCH1=0x1
-
This group measures the locality of your data accesses with regard to the
L3 cache. L3 request rate tells you how data intensive your code is
diff --git a/perl/set_license.pl b/perl/set_license.pl
index fb099a6..0935c4f 100755
--- a/perl/set_license.pl
+++ b/perl/set_license.pl
@@ -10,11 +10,11 @@ my $cc = ' *';
my $fc = '!';
my $lc = ' *';
-my $VERSION = '<VERSION>';
-my $DATE = '<DATE>';
-#my $VERSION = '4.0';
-#my $DATE = '16.6.2015';
-my $YEAR = '2015';
+#my $VERSION = '<VERSION>';
+#my $DATE = '<DATE>';
+my $VERSION = '4.2';
+my $DATE = '22.12.2016';
+my $YEAR = '2016';
my $AUTHOR = 'RRZE, University Erlangen-Nuremberg';
my $LICENSE = 'gpl';
diff --git a/src/access-daemon/Makefile b/src/access-daemon/Makefile
index cb196a1..fa09830 100644
--- a/src/access-daemon/Makefile
+++ b/src/access-daemon/Makefile
@@ -4,8 +4,8 @@
#
# Description: accessDaemon Makefile
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
diff --git a/src/access-daemon/accessDaemon.c b/src/access-daemon/accessDaemon.c
index caa173b..855f2ca 100644
--- a/src/access-daemon/accessDaemon.c
+++ b/src/access-daemon/accessDaemon.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of access daemon.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Michael Meier, michael.meier at rrze.fau.de
* Jan Treibig (jt), jan.treibig at gmail.com,
@@ -1082,6 +1082,7 @@ int main(void)
(model == HASWELL_M1) ||
(model == HASWELL_M2) ||
(model == BROADWELL) ||
+ (model == BROADWELL_E3) ||
(model == SKYLAKE1) ||
(model == SKYLAKE2) ||
(model == KABYLAKE1) ||
diff --git a/src/access-daemon/setFreq.c b/src/access-daemon/setFreq.c
index 091758e..600d827 100644
--- a/src/access-daemon/setFreq.c
+++ b/src/access-daemon/setFreq.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of frequency daemon
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/access.c b/src/access.c
index 6ea0b24..3bf67d1 100644
--- a/src/access.c
+++ b/src/access.c
@@ -5,8 +5,8 @@
*
* Description: Interface for the different register access modules.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/access_client.c b/src/access_client.c
index be28287..a178811 100644
--- a/src/access_client.c
+++ b/src/access_client.c
@@ -5,8 +5,8 @@
*
* Description: Interface to the access daemon for the access module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/access_x86.c b/src/access_x86.c
index 1e73055..bc50ecc 100644
--- a/src/access_x86.c
+++ b/src/access_x86.c
@@ -5,8 +5,8 @@
*
* Description: Interface to x86 related functions for the access module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/access_x86_msr.c b/src/access_x86_msr.c
index e198389..16a3aaa 100644
--- a/src/access_x86_msr.c
+++ b/src/access_x86_msr.c
@@ -9,8 +9,8 @@
* sys interface of the Linux 2.6 kernel. This module
* is based on the msr-util tools.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com.
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/access_x86_pci.c b/src/access_x86_pci.c
index b6ff223..2e1b731 100644
--- a/src/access_x86_pci.c
+++ b/src/access_x86_pci.c
@@ -8,8 +8,8 @@
* performance monitoring registers in PCI Cfg space
* for Intel Sandy Bridge Processors.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/affinity.c b/src/affinity.c
index 30e0033..b7baaaa 100644
--- a/src/affinity.c
+++ b/src/affinity.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of affinity module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -90,11 +90,15 @@ static int
treeFillNextEntries(
TreeNode* tree,
int* processorIds,
+ int startidx,
int socketId,
- int offset,
- int numberOfEntries )
+ int coreOffset,
+ int coreSpan,
+ int numberOfEntries)
{
int counter = numberOfEntries;
+ int skip = 0;
+ int c, t, c_count = 0;
TreeNode* node = tree;
TreeNode* thread;
node = tree_getChildNode(node);
@@ -103,7 +107,6 @@ treeFillNextEntries(
for (int i=0; i<socketId; i++)
{
node = tree_getNextNode(node);
-
if ( node == NULL )
{
DEBUG_PRINT(DEBUGLEV_DEVELOP, Cannot find socket %d in topology tree, i);
@@ -112,7 +115,7 @@ treeFillNextEntries(
node = tree_getChildNode(node);
/* skip offset cores */
- for (int i=0; i<offset; i++)
+ for (int i=0; i<coreOffset; i++)
{
node = tree_getNextNode(node);
@@ -123,17 +126,17 @@ treeFillNextEntries(
}
/* Traverse horizontal */
- while ( node != NULL )
+ while ( node != NULL && c_count < coreSpan)
{
if ( !counter ) break;
thread = tree_getChildNode(node);
- while ( thread != NULL )
+ while ( thread != NULL && (numberOfEntries-counter) < numberOfEntries )
{
if (cpuid_topology.threadPool[thread->id].inCpuSet)
{
- processorIds[numberOfEntries-counter] = thread->id;
+ processorIds[startidx+(numberOfEntries-counter)] = thread->id;
thread = tree_getNextNode(thread);
counter--;
}
@@ -142,11 +145,11 @@ treeFillNextEntries(
thread = tree_getNextNode(thread);
}
}
+ c_count++;
node = tree_getNextNode(node);
}
return numberOfEntries-counter;
}
-
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
void
@@ -217,16 +220,20 @@ affinity_init()
for (int i=0; i<numberOfSocketDomains; i++)
{
tmp = treeFillNextEntries(cpuid_topology.topologyTree,
- domains[0].processorList + offset,
- i, 0, numberOfProcessorsPerSocket);
+ domains[0].processorList, offset,
+ i, 0,
+ cpuid_topology.numCoresPerSocket,
+ numberOfProcessorsPerSocket);
offset += tmp;
}
}
else
{
tmp = treeFillNextEntries(cpuid_topology.topologyTree,
- domains[0].processorList,
- 0, 0, domains[0].numberOfProcessors);
+ domains[0].processorList, 0,
+ 0, 0,
+ domains[0].numberOfCores,
+ domains[0].numberOfProcessors);
domains[0].numberOfProcessors = tmp;
}
@@ -248,8 +255,9 @@ affinity_init()
}
tmp = treeFillNextEntries(cpuid_topology.topologyTree,
- domains[currentDomain + i].processorList,
- i, 0, domains[currentDomain + i].numberOfProcessors);
+ domains[currentDomain + i].processorList, 0,
+ i, 0, cpuid_topology.numCoresPerSocket,
+ domains[currentDomain + i].numberOfProcessors);
tmp = MIN(tmp, domains[currentDomain + i].numberOfProcessors);
for ( int j = 0; j < tmp; j++ )
{
@@ -281,8 +289,8 @@ affinity_init()
}
tmp = treeFillNextEntries(cpuid_topology.topologyTree,
- domains[currentDomain + subCounter].processorList,
- i, offset,
+ domains[currentDomain + subCounter].processorList, 0,
+ i, offset, numberOfCoresPerCache,
domains[currentDomain + subCounter].numberOfProcessors);
domains[currentDomain + subCounter].numberOfProcessors = tmp;
@@ -323,10 +331,17 @@ affinity_init()
bdata(domains[currentDomain + subCounter].tag));
return;
}
+ // Skip memory domain if all CPUs are already attached to others
+ // This happens for example on Intel Xeon Phi (KNL) where the
+ // NUMA domains of the MCDRAM don't have CPUs attached.
+ if (offset >= cpuid_topology.numCoresPerSocket*cpuid_topology.numSockets)
+ {
+ continue;
+ }
tmp = treeFillNextEntries(cpuid_topology.topologyTree,
- domains[currentDomain + subCounter].processorList,
- i, offset,
+ domains[currentDomain + subCounter].processorList, 0,
+ i, offset, domains[currentDomain + subCounter].numberOfCores,
domains[currentDomain + subCounter].numberOfProcessors);
domains[currentDomain + subCounter].numberOfProcessors = tmp;
offset += domains[currentDomain + subCounter].numberOfCores;
@@ -361,8 +376,9 @@ affinity_init()
{
tmp += treeFillNextEntries(
cpuid_topology.topologyTree,
- &(domains[currentDomain + subCounter].processorList[offset]),
- i, 0, numberOfProcessorsPerSocket);
+ domains[currentDomain + subCounter].processorList, tmp,
+ i, 0, domains[currentDomain + subCounter].numberOfCores,
+ numberOfProcessorsPerSocket);
offset += numberOfProcessorsPerSocket;
}
domains[currentDomain + subCounter].numberOfProcessors = tmp;
diff --git a/src/applications/likwid-agent.lua b/src/applications/likwid-agent.lua
index c162ed7..15db59d 100644
--- a/src/applications/likwid-agent.lua
+++ b/src/applications/likwid-agent.lua
@@ -6,8 +6,8 @@
*
* Description: A monitoring daemon for hardware performance counters.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/applications/likwid-features.lua b/src/applications/likwid-features.lua
index 470df45..2b20825 100644
--- a/src/applications/likwid-features.lua
+++ b/src/applications/likwid-features.lua
@@ -6,8 +6,8 @@
*
* Description: A application to retrieve and manipulate CPU features.
*
- * Version: 4.0
- * Released: 28.04.2015
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/applications/likwid-genTopoCfg.lua b/src/applications/likwid-genTopoCfg.lua
index 5f42bf8..ff71b8c 100644
--- a/src/applications/likwid-genTopoCfg.lua
+++ b/src/applications/likwid-genTopoCfg.lua
@@ -8,8 +8,8 @@
* that is used by likwid to avoid reading the systems architecture at
* each start.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/applications/likwid-memsweeper.lua b/src/applications/likwid-memsweeper.lua
index bf71a91..dc81822 100644
--- a/src/applications/likwid-memsweeper.lua
+++ b/src/applications/likwid-memsweeper.lua
@@ -6,8 +6,8 @@
*
* Description: An application to clean up NUMA memory domains.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/applications/likwid-mpirun.lua b/src/applications/likwid-mpirun.lua
index de6bad2..3da0191 100644
--- a/src/applications/likwid-mpirun.lua
+++ b/src/applications/likwid-mpirun.lua
@@ -7,8 +7,8 @@
* Description: A wrapper script to pin threads spawned by MPI processes and
* measure hardware performance counters
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
@@ -1277,14 +1277,14 @@ local function parseOutputFile(filename)
table.remove(linelist, j)
end
end
- if results[gidx][idx] == nil then
- results[gidx][idx] = {}
+ if results[gidx][counter] == nil then
+ results[gidx][counter] = {}
end
for j, value in pairs(linelist) do
if event:match("[Rr]untime") then
results[gidx]["time"][cpulist[j]] = tonumber(value)
else
- results[gidx][idx][cpulist[j]] = tonumber(value)
+ results[gidx][counter][cpulist[j]] = tonumber(value)
end
end
if not event:match("[Rr]untime") then
@@ -1387,9 +1387,11 @@ local function parseMarkerOutputFile(filename)
end
end
end
- elseif parse_reg_output then
+ elseif parse_reg_output and not line:match("^%s*$") then
linelist = likwid.stringsplit(line,",")
if linelist[2] ~= "TSC" then
+ event = linelist[1]
+ counter = linelist[2]
table.remove(linelist,1)
table.remove(linelist,1)
for j=#linelist,1,-1 do
@@ -1397,11 +1399,11 @@ local function parseMarkerOutputFile(filename)
table.remove(linelist, j)
end
end
- if results[current_region][gidx][idx] == nil then
- results[current_region][gidx][idx] = {}
+ if results[current_region][gidx][counter] == nil then
+ results[current_region][gidx][counter] = {}
end
for j, value in pairs(linelist) do
- results[current_region][gidx][idx][cpulist[j]] = tonumber(value)
+ results[current_region][gidx][counter][cpulist[j]] = tonumber(value)
end
idx = idx + 1
end
@@ -1510,9 +1512,10 @@ function printMpiOutput(group_list, all_results, regionname)
end
for j=1,#gdata["Events"] do
local value = "0"
- if all_results[rank]["results"][gidx][j] and
- all_results[rank]["results"][gidx][j][cpu] then
- value = likwid.num2str(all_results[rank]["results"][gidx][j][cpu])
+ cname = gdata["Events"][j]["Counter"]
+ if all_results[rank]["results"][gidx][cname] and
+ all_results[rank]["results"][gidx][cname][cpu] then
+ value = likwid.num2str(all_results[rank]["results"][gidx][cname][cpu])
end
table.insert(column, value)
end
@@ -1535,9 +1538,9 @@ function printMpiOutput(group_list, all_results, regionname)
for j=1,#gdata["Events"] do
local counter = gdata["Events"][j]["Counter"]
counterlist[counter] = 0
- if all_results[rank]["results"][gidx][j] ~= nil and
- all_results[rank]["results"][gidx][j][cpu] ~= nil then
- counterlist[counter] = all_results[rank]["results"][gidx][j][cpu]
+ if all_results[rank]["results"][gidx][counter] ~= nil and
+ all_results[rank]["results"][gidx][counter][cpu] ~= nil then
+ counterlist[counter] = all_results[rank]["results"][gidx][counter][cpu]
end
end
counterlist["time"] = all_results[rank]["results"][gidx]["time"][cpu]
diff --git a/src/applications/likwid-perfctr.lua b/src/applications/likwid-perfctr.lua
index 7c19d00..f8d5106 100644
--- a/src/applications/likwid-perfctr.lua
+++ b/src/applications/likwid-perfctr.lua
@@ -7,8 +7,8 @@
* Description: An application to read out performance counter registers
* on x86 processors
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
@@ -76,6 +76,10 @@ local function usage()
io.stdout:write("Modes:")
io.stdout:write("-S <time>\t\t Stethoscope mode with duration in s, ms or us, e.g 20ms\n")
io.stdout:write("-t <time>\t\t Timeline mode with frequency in s, ms or us, e.g. 300ms\n")
+ io.stdout:write("\t\t\t The output format (to stderr) is:\n")
+ io.stdout:write("\t\t\t <groupID> <nrEvents> <nrThreads> <Timestamp> <Event1_Thread1> <Event1_Thread2> ... <EventN_ThreadN>\n")
+ io.stdout:write("\t\t\t or\n")
+ io.stdout:write("\t\t\t <groupID> <nrEvents> <nrThreads> <Timestamp> <Metric1_Thread1> <Metric1_Thread2> ... <MetricN_ThreadN>\n")
io.stdout:write("-m, --marker\t\t Use Marker API inside code\n")
io.stdout:write("Output options:\n")
io.stdout:write("-o, --output <file>\t Store output to file. (Optional: Apply text filter according to filename suffix)\n")
@@ -245,7 +249,7 @@ for opt,arg in likwid.getopt(arg, {"a", "c:", "C:", "e", "E:", "g:", "h", "H", "
verbose = true
elseif opt == "m" or opt == "marker" then
use_marker = true
- use_wrapper = true
+ --use_wrapper = true
elseif (opt == "S") then
use_stethoscope = true
if arg ~= nil and arg:match("%d+%a?s") then
@@ -439,10 +443,14 @@ end
avail_groups = likwid.getGroups()
if print_groups == true then
- print_stdout(string.format("%11s\t%s","Group name", "Description"))
- print_stdout(likwid.hline)
- for i,g in pairs(avail_groups) do
- print_stdout(string.format("%11s\t%s",g["Name"], g["Info"]))
+ if avail_groups then
+ print_stdout(string.format("%11s\t%s","Group name", "Description"))
+ print_stdout(likwid.hline)
+ for i,g in pairs(avail_groups) do
+ print_stdout(string.format("%11s\t%s",g["Name"], g["Info"]))
+ end
+ else
+ print_stdout(string.format("No groups defined for %s",cpuinfo["name"]))
end
likwid.putTopology()
likwid.putConfiguration()
@@ -571,6 +579,12 @@ if pin_cpus then
elseif num_cpus > tonumber(omp_threads) then
print_stderr(string.format("Environment variable OMP_NUM_THREADS already set to %s but %d cpus required", omp_threads,num_cpus))
end
+ if omp_threads and tonumber(omp_threads) < num_cpus then
+ num_cpus = tonumber(omp_threads)
+ for i=#cpulist,num_cpus+1,-1 do
+ cpulist[i] = nil
+ end
+ end
if os.getenv("CILK_NWORKERS") == nil then
likwid.setenv("CILK_NWORKERS", tostring(math.tointeger(num_cpus)))
end
@@ -690,16 +704,16 @@ if use_timeline == true then
for i, cpu in pairs(cpulist) do
cores_string = cores_string .. tostring(cpu) .. "|"
end
- print_stderr("# "..cores_string:sub(1,cores_string:len()-1).."\n")
- for gid, group in pairs(group_list) do
+ print_stderr("# "..cores_string:sub(1,cores_string:len()-1))
+ for i, gid in pairs(group_ids) do
local strlist = {}
- if group["Metrics"] == nil then
- for i,e in pairs(group["Events"]) do
- table.insert(strlist, e["Event"])
+ if likwid.getNumberOfMetrics(gid) == 0 then
+ for e=1,likwid.getNumberOfEvents(gid) do
+ table.insert(strlist, likwid.getNameOfEvent(gid, e))
end
else
- for i,e in pairs(group["Metrics"]) do
- table.insert(strlist, e["description"])
+ for m=1,likwid.getNumberOfMetrics(gid) do
+ table.insert(strlist, likwid.getNameOfMetric(gid, m))
end
end
print_stderr("# "..table.concat(strlist, "|").."\n")
@@ -749,6 +763,10 @@ if use_wrapper or use_timeline then
else
start = likwid.startClock()
groupTime[activeGroup] = 0
+ timeline_delim = " "
+ if use_csv then
+ timeline_delim = ","
+ end
while true do
if likwid.getSignalState() ~= 0 then
if execString:len() > 0 then
@@ -773,13 +791,20 @@ if use_wrapper or use_timeline then
else
results = likwid.getLastMetrics()
end
- str = tostring(math.tointeger(activeGroup)) .. " "..tostring(#results[activeGroup]).." "..tostring(#cpulist).." "..tostring(time)
+ --str = tostring(math.tointeger(activeGroup)) .. " "..tostring(#results[activeGroup]).." "..tostring(#cpulist).." "..tostring(time)
+ local outList = {}
+ table.insert(outList, tostring(math.tointeger(activeGroup)))
+ table.insert(outList, tostring(#results[activeGroup]))
+ table.insert(outList, tostring(#cpulist))
+ table.insert(outList, tostring(time))
for i,l1 in pairs(results[activeGroup]) do
for j, value in pairs(l1) do
- str = str .. " " .. tostring(value)
+ --str = str .. " " .. tostring(value)
+ table.insert(outList, tostring(value))
end
end
- io.stderr:write(str.."\n")
+ --io.stderr:write(str.."\n")
+ io.stderr:write(table.concat(outList, timeline_delim).."\n")
groupTime[activeGroup] = time
else
likwid.readCounters()
@@ -803,11 +828,6 @@ elseif use_stethoscope then
end
likwid.sleep(duration)
elseif use_marker then
- local ret = likwid.startCounters()
- if ret < 0 then
- print_stderr(string.format("Error starting counters for cpu %d.",cpulist[ret * (-1)]))
- os.exit(1)
- end
local ret = os.execute(execString)
if ret == nil then
print_stderr("Failed to execute command: ".. execString)
@@ -815,13 +835,15 @@ elseif use_marker then
end
end
-local ret = likwid.stopCounters()
-if ret < 0 then
- print_stderr(string.format("Error stopping counters for thread %d.",ret * (-1)))
- likwid.finalize()
- likwid.putTopology()
- likwid.putConfiguration()
- os.exit(exitvalue)
+if not use_marker then
+ local ret = likwid.stopCounters()
+ if ret < 0 then
+ print_stderr(string.format("Error stopping counters for thread %d.",ret * (-1)))
+ likwid.finalize()
+ likwid.putTopology()
+ likwid.putConfiguration()
+ os.exit(exitvalue)
+ end
end
io.stdout:flush()
if outfile == nil then
diff --git a/src/applications/likwid-perfscope.lua b/src/applications/likwid-perfscope.lua
index 1263e87..0f645fb 100644
--- a/src/applications/likwid-perfscope.lua
+++ b/src/applications/likwid-perfscope.lua
@@ -7,8 +7,8 @@
* Description: An application to use the timeline mode of likwid-perfctr to generate
* realtime plots using feedGnuplot
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/applications/likwid-pin.lua b/src/applications/likwid-pin.lua
index 4747746..5bafe2a 100644
--- a/src/applications/likwid-pin.lua
+++ b/src/applications/likwid-pin.lua
@@ -6,8 +6,8 @@
*
* Description: An application to pin a program including threads
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
@@ -89,6 +89,13 @@ local function usage()
examples()
end
+local function close_and_exit(code)
+ likwid.putTopology()
+ likwid.putAffinityInfo()
+ likwid.putConfiguration()
+ os.exit(code)
+end
+
delimiter = ','
quiet = 0
sweep_sockets = false
@@ -100,6 +107,7 @@ affinity = nil
num_threads = 0
+likwid.setenv("LIKWID_NO_ACCESS", "1")
config = likwid.getConfiguration()
cputopo = likwid.getCpuTopology()
affinity = likwid.getAffinityInfo()
@@ -112,16 +120,10 @@ end
for opt,arg in likwid.getopt(arg, {"c:", "d:", "h", "i", "p", "q", "s:", "S", "t:", "v", "V:", "verbose:", "help", "version", "skip","sweep", "quiet"}) do
if opt == "h" or opt == "help" then
usage()
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(0)
+ close_and_exit(0)
elseif opt == "v" or opt == "version" then
version()
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(0)
+ close_and_exit(0)
elseif opt == "V" or opt == "verbose" then
verbose = tonumber(arg)
likwid.setVerbosity(verbose)
@@ -133,20 +135,14 @@ for opt,arg in likwid.getopt(arg, {"c:", "d:", "h", "i", "p", "q", "s:", "S", "t
end
if (num_threads == 0) then
print_stderr("Failed to parse cpulist " .. arg)
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(1)
+ close_and_exit(1)
end
elseif (opt == "d") then
delimiter = arg
elseif opt == "S" or opt == "sweep" then
if (affinity == nil) then
print_stderr("Option -S is not supported for unknown processor!")
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(1)
+ close_and_exit(1)
end
sweep_sockets = true
elseif (opt == "i") then
@@ -157,7 +153,7 @@ for opt,arg in likwid.getopt(arg, {"c:", "d:", "h", "i", "p", "q", "s:", "S", "t
local s,e = arg:find("0x")
if s == nil then
print_stderr("Skip mask must be given in hex, hence start with 0x")
- os.exit(1)
+ close_and_exit(1)
end
skip_mask = arg
elseif opt == "q" or opt == "quiet" then
@@ -165,16 +161,10 @@ for opt,arg in likwid.getopt(arg, {"c:", "d:", "h", "i", "p", "q", "s:", "S", "t
quiet = 1
elseif opt == "?" then
print_stderr("Invalid commandline option -"..arg)
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(1)
+ close_and_exit(1)
elseif opt == "!" then
print_stderr("Option requires an argument")
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(1)
+ close_and_exit(1)
end
end
@@ -185,20 +175,14 @@ if print_domains and num_threads > 0 then
outstr = outstr .. delimiter .. cpu
end
print_stdout(outstr:sub(2,outstr:len()))
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(0)
+ close_and_exit(0)
elseif print_domains then
for k,v in pairs(affinity["domains"]) do
print_stdout(string.format("Domain %s:", v["tag"]))
print_stdout("\t" .. table.concat(v["processorList"], ","))
print_stdout("")
end
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(0)
+ close_and_exit(0)
end
if num_threads == 0 then
@@ -206,7 +190,7 @@ if num_threads == 0 then
end
if (#arg == 0) then
print_stderr("Executable must be given on commandline")
- os.exit(1)
+ close_and_exit(1)
end
if interleaved_policy then
@@ -229,6 +213,9 @@ if omp_threads == nil then
elseif num_threads > tonumber(omp_threads) and quiet == 0 then
print_stdout(string.format("Environment variable OMP_NUM_THREADS already set to %s but %d cpus required", omp_threads,num_threads))
end
+if omp_threads and tonumber(omp_threads) < num_threads then
+ num_threads = tonumber(omp_threads)
+end
likwid.setenv("KMP_AFFINITY","disabled")
@@ -269,15 +256,8 @@ local exec = table.concat(arg," ",1, likwid.tablelength(arg)-2)
local pid = likwid.startProgram(exec, num_threads, cpu_list)
if (pid == nil) then
print_stderr("Failed to execute command: ".. exec)
- likwid.putTopology()
- likwid.putAffinityInfo()
- likwid.putConfiguration()
- os.exit(1)
+ close_and_exit(1)
end
local exitvalue = likwid.waitpid(pid)
-
-likwid.putAffinityInfo()
-likwid.putTopology()
-likwid.putConfiguration()
-os.exit(exitvalue)
+close_and_exit(exitvalue)
diff --git a/src/applications/likwid-powermeter.lua b/src/applications/likwid-powermeter.lua
index 3525b52..99862d8 100644
--- a/src/applications/likwid-powermeter.lua
+++ b/src/applications/likwid-powermeter.lua
@@ -7,8 +7,8 @@
* Description: An application to get information about power
* consumption on architectures implementing the RAPL interface.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/applications/likwid-setFrequencies.lua b/src/applications/likwid-setFrequencies.lua
index 40dee09..d2f3ff0 100644
--- a/src/applications/likwid-setFrequencies.lua
+++ b/src/applications/likwid-setFrequencies.lua
@@ -6,8 +6,8 @@
*
* Description: A application to set the CPU frequency of CPU cores and domains.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
@@ -56,6 +56,7 @@ function usage()
print_stdout("-f/--freq freq\t Set current frequency, implicitly sets userspace governor")
print_stdout("-x/--min freq\t Set minimal frequency")
print_stdout("-y/--max freq\t Set maximal frequency")
+ print_stdout("-F\t Pin CPU to frequency (sets min, max and cur frequency)")
print_stdout("-p\t Print current frequencies")
print_stdout("-l\t List available frequencies")
print_stdout("-m\t List available governors")
@@ -65,155 +66,6 @@ function usage()
print_stdout("to the turbo related frequency. The governor is set to 'performance'.")
end
---[[function getCurrentMinFreq(cpuid)
- local min = 10000000
- if cpuid == nil or cpuid < 0 then
- for cpuid=0,topo["numHWThreads"]-1 do
- fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_min_freq")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_min_freq" )
- end
- line = fp:read("*l")
- if tonumber(line)/1E6 < min then
- min = tonumber(line)/1E6
- end
- fp:close()
- end
- else
- fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_min_freq")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_min_freq" )
- end
- line = fp:read("*l")
- if tonumber(line)/1E6 < min then
- min = tonumber(line)/1E6
- end
- fp:close()
- end
- return min
-end
-
-function getCurrentMaxFreq(cpuid)
- local max = 0
- if cpuid == nil or cpuid < 0 then
- for cpuid=0,topo["numHWThreads"]-1 do
- fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_max_freq")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_max_freq" )
- end
- line = fp:read("*l")
- if tonumber(line)/1E6 > max then
- max = tonumber(line)/1E6
- end
- fp:close()
- end
- else
- fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_max_freq")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_max_freq" )
- end
- line = fp:read("*l")
- if tonumber(line)/1E6 > max then
- max = tonumber(line)/1E6
- end
- fp:close()
- end
- return max
-end
-
-
-function getAvailFreq(cpuid)
- if cpuid == nil then
- cpuid = 0
- end
- if cpuid < 0 then
- cpuid = 0
- end
- fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_available_frequencies")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_available_frequencies" )
- end
- line = fp:read("*l")
- fp:close()
- local tmp = likwid.stringsplit(line:gsub("^%s*(.-)%s*$", "%1"), " ", nil, " ")
- local avail = {}
- local turbo = tonumber(tmp[1])/1E6
- local j = 1
- for i=2,#tmp do
- local freq = tonumber(tmp[i])/1E6
- avail[j] = tostring(freq)
- if not avail[j]:match("%d+.%d+") then
- avail[j] = avail[j] ..".0"
- end
- j = j + 1
- end
- if verbosity == 1 then
- print_stdout(string.format("The system provides %d scaling frequencies, frequency %s is taken as turbo mode", #avail,turbo))
- end
- return avail, tostring(turbo)
-end
-
-function getCurFreq()
- local freqs = {}
- local govs = {}
- for cpuid=0,topo["numHWThreads"]-1 do
- local fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_cur_freq")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_cur_freq" )
- end
- local line = fp:read("*l")
- fp:close()
- freqs[cpuid] = tostring(tonumber(line)/1E6)
- if not freqs[cpuid]:match("%d.%d") then
- freqs[cpuid] = freqs[cpuid] ..".0"
- end
- local fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_governor")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_governor" )
- end
- local line = fp:read("*l")
- fp:close()
- govs[cpuid] = line
- end
- return freqs, govs
-end
-
-function getAvailGovs(cpuid)
- if (cpuid == nil) or (cpuid < 1) then
- cpuid = 0
- end
- local fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_available_governors")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",cpuid) .. "/cpufreq/scaling_available_governors" )
- end
- local line = fp:read("*l")
- fp:close()
- local avail = likwid.stringsplit(line:gsub("^%s*(.-)%s*$", "%1"), "%s+", nil, "%s+")
- for i=1,#avail do
- if avail[i] == "userspace" then
- table.remove(avail, i)
- break
- end
- end
- table.insert(avail, "turbo")
- if verbosity == 1 then
- print_stdout(string.format("The system provides %d scaling governors", #avail))
- end
- return avail
-end
-
-local function testDriver()
- local fp = io.open(sys_base_path .. "/" .. string.format("cpu%d",0) .. "/cpufreq/scaling_driver")
- if verbosity == 3 then
- print_stdout("Reading "..sys_base_path .. "/" .. string.format("cpu%d",0) .. "/cpufreq/scaling_driver" )
- end
- local line = fp:read("*l")
- fp:close()
- if line == "acpi-cpufreq" then
- return true
- end
- return false
-end]]
verbosity = 0
governor = nil
@@ -231,7 +83,7 @@ if #arg == 0 then
end
-for opt,arg in likwid.getopt(arg, {"g:", "c:", "f:", "l", "p", "h", "v", "m", "x:", "y:", "help","version","freq:", "min:", "max:"}) do
+for opt,arg in likwid.getopt(arg, {"g:", "c:", "f:", "l", "p", "h", "v", "m", "x:", "y:", "help","version","freq:", "min:", "max:", "F:"}) do
if opt == "h" or opt == "help" then
usage()
os.exit(0)
@@ -248,6 +100,10 @@ for opt,arg in likwid.getopt(arg, {"g:", "c:", "f:", "l", "p", "h", "v", "m", "x
min_freq = arg
elseif opt == "y" or opt == "max" then
max_freq = arg
+ elseif opt == "F" then
+ frequency = arg
+ min_freq = arg
+ max_freq = arg
elseif (opt == "p") then
printCurFreq = true
elseif (opt == "l") then
diff --git a/src/applications/likwid-topology.lua b/src/applications/likwid-topology.lua
index bbbbc87..65f6ba0 100644
--- a/src/applications/likwid-topology.lua
+++ b/src/applications/likwid-topology.lua
@@ -7,8 +7,8 @@
* Description: A application to determine the thread and cache topology
* on x86 processors.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
@@ -109,6 +109,7 @@ for opt,arg in likwid.getopt(arg, {"h","v","c","C","g","o:","V:","O","help","ver
end
end
+likwid.setenv("LIKWID_NO_ACCESS", "1")
local config = likwid.getConfiguration()
local cpuinfo = likwid.getCpuInfo()
local cputopo = likwid.getCpuTopology()
diff --git a/src/applications/likwid.lua b/src/applications/likwid.lua
index bf9b424..92ad30a 100644
--- a/src/applications/likwid.lua
+++ b/src/applications/likwid.lua
@@ -5,8 +5,8 @@
*
* Description: Lua LIKWID interface library
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/bitUtil.c b/src/bitUtil.c
index 8c8415a..f4b7691 100644
--- a/src/bitUtil.c
+++ b/src/bitUtil.c
@@ -5,8 +5,8 @@
*
* Description: Utility routines manipulating bit arrays.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/calculator.c b/src/calculator.c
index bb0a314..2ca1126 100644
--- a/src/calculator.c
+++ b/src/calculator.c
@@ -5,8 +5,8 @@
*
* Description: Infix calculator
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Brandon Mills (bm), mills.brandont at gmail.com
* Project: likwid
@@ -36,8 +36,8 @@
*
* Some changes done for the integration in LIKWID, see inline comments
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at gmail.com
@@ -151,7 +151,7 @@ void raise(Error err)
msg = "Function input missing";
break;
}
- printf("\tError: %s\n", msg);
+ //printf("\tError: %s\n", msg);
}
inline unsigned int
@@ -749,7 +749,7 @@ int tokenize(char *str, char *(**tokensRef))
/*if(tokens == NULL) // First allocation
tokens = (char**)malloc(numTokens * sizeof(char*));
else*/
-
+
newToken = malloc((strlen(tmpToken)+1) * sizeof(char));
if (!newToken)
{
@@ -794,7 +794,7 @@ bool leftAssoc(token op)
{
case addop:
case multop:
-
+
ret = true;
break;
case function:
@@ -827,7 +827,7 @@ int precedence(token op1, token op2)
else if(tokenType(op1) == expop
&& tokenType(op2) == multop) // op1 has higher precedence
ret = 1;
- else if (tokenType(op1) == function
+ else if (tokenType(op1) == function
&& (tokenType(op2) == addop || tokenType(op2) == multop || tokenType(op2) == expop || tokenType(op2) == lparen))
ret = 1;
else if ((tokenType(op1) == addop || tokenType(op1) == multop || tokenType(op1) == expop)
diff --git a/src/configuration.c b/src/configuration.c
index 4fd1977..382bc3a 100644
--- a/src/configuration.c
+++ b/src/configuration.c
@@ -5,8 +5,8 @@
*
* Description: Configuration file module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/cpuFeatures.c b/src/cpuFeatures.c
index 3348271..27e10ba 100644
--- a/src/cpuFeatures.c
+++ b/src/cpuFeatures.c
@@ -9,8 +9,8 @@
* Allows to turn on and off the Hardware prefetcher
* available.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -156,6 +156,7 @@ cpuFeatures_update(int cpu)
(cpuid_info.model == HASWELL_M2) ||
(cpuid_info.model == HASWELL_EP) ||
(cpuid_info.model == BROADWELL) ||
+ (cpuid_info.model == BROADWELL_E3) ||
(cpuid_info.model == BROADWELL_D) ||
(cpuid_info.model == BROADWELL_E) ||
(cpuid_info.model == SKYLAKE1) ||
@@ -182,6 +183,7 @@ cpuFeatures_update(int cpu)
(cpuid_info.model == HASWELL_M2) ||
(cpuid_info.model == HASWELL_EP) ||
(cpuid_info.model == BROADWELL) ||
+ (cpuid_info.model == BROADWELL_E3) ||
(cpuid_info.model == BROADWELL_D) ||
(cpuid_info.model == BROADWELL_E) ||
(cpuid_info.model == SKYLAKE1) ||
@@ -319,6 +321,7 @@ cpuFeatures_enable(int cpu, CpuFeature type, int print)
(cpuid_info.model == HASWELL_M2) ||
(cpuid_info.model == HASWELL_EP) ||
(cpuid_info.model == BROADWELL) ||
+ (cpuid_info.model == BROADWELL_E3) ||
(cpuid_info.model == BROADWELL_D) ||
(cpuid_info.model == BROADWELL_E) ||
(cpuid_info.model == SKYLAKE1) ||
@@ -448,6 +451,7 @@ cpuFeatures_disable(int cpu, CpuFeature type, int print)
(cpuid_info.model == HASWELL_M2) ||
(cpuid_info.model == HASWELL_EP) ||
(cpuid_info.model == BROADWELL) ||
+ (cpuid_info.model == BROADWELL_E3) ||
(cpuid_info.model == BROADWELL_D) ||
(cpuid_info.model == BROADWELL_E) ||
(cpuid_info.model == SKYLAKE1) ||
diff --git a/src/cpustring.c b/src/cpustring.c
index 040ff76..cb36a81 100644
--- a/src/cpustring.c
+++ b/src/cpustring.c
@@ -5,8 +5,8 @@
*
* Description: Parser for CPU selection strings
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
@@ -381,9 +381,9 @@ logical_redo:
{
for (int j=atoi(bdata(indexlist->entry[0])); j<=atoi(bdata(indexlist->entry[1])) && (insert_offset+insert < require);j++)
{
+ inlist_idx = j;
cpulist[insert_offset + insert] = inlist[inlist_idx % ret];
insert++;
- inlist_idx++;
if (insert == ret)
{
bstrListDestroy(indexlist);
@@ -399,9 +399,9 @@ logical_redo:
for (int j=atoi(bdata(indexlist->entry[0]));
j>=atoi(bdata(indexlist->entry[1])) && (insert_offset+insert < require); j--)
{
+ inlist_idx = j;
cpulist[insert_offset + insert] = inlist[inlist_idx % ret];
insert++;
- inlist_idx++;
if (insert == ret)
{
bstrListDestroy(indexlist);
diff --git a/src/frequency.c b/src/frequency.c
index f53d53c..9ef4516 100644
--- a/src/frequency.c
+++ b/src/frequency.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing an interface for frequency manipulation
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Jan Treibig (jt), jan.treibig at gmail.com
diff --git a/src/hashTable.c b/src/hashTable.c
index 07f64ec..f78fdd1 100644
--- a/src/hashTable.c
+++ b/src/hashTable.c
@@ -6,8 +6,8 @@
* Description: Hashtable implementation based on SGLIB.
* Used for Marker API result handling.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/access.h b/src/includes/access.h
index 39faee5..f9cb032 100644
--- a/src/includes/access.h
+++ b/src/includes/access.h
@@ -5,8 +5,8 @@
*
* Description: Header File HPM access Module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/includes/access_client.h b/src/includes/access_client.h
index c601c37..d824394 100644
--- a/src/includes/access_client.h
+++ b/src/includes/access_client.h
@@ -5,8 +5,8 @@
*
* Description: Header file for interface to the access daemon for the access module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/access_client_types.h b/src/includes/access_client_types.h
index eaee956..1c1d9b4 100644
--- a/src/includes/access_client_types.h
+++ b/src/includes/access_client_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for access_client access module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/access_x86.h b/src/includes/access_x86.h
index 9240b2f..2dcfbad 100644
--- a/src/includes/access_x86.h
+++ b/src/includes/access_x86.h
@@ -5,8 +5,8 @@
*
* Description: Header file for the interface to x86 related functions for the access module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/access_x86_msr.h b/src/includes/access_x86_msr.h
index d12cc21..1ade728 100644
--- a/src/includes/access_x86_msr.h
+++ b/src/includes/access_x86_msr.h
@@ -5,8 +5,8 @@
*
* Description: Header file for the interface to x86 MSR functions for the access module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/access_x86_pci.h b/src/includes/access_x86_pci.h
index a7f41ff..8dc68cd 100644
--- a/src/includes/access_x86_pci.h
+++ b/src/includes/access_x86_pci.h
@@ -5,8 +5,8 @@
*
* Description: Header file for the interface to x86 PCI functions for the access module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/affinity.h b/src/includes/affinity.h
index ae31670..1ea72da 100644
--- a/src/includes/affinity.h
+++ b/src/includes/affinity.h
@@ -5,8 +5,8 @@
*
* Description: Header File affinity Module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at gmail.com
diff --git a/src/includes/bitUtil.h b/src/includes/bitUtil.h
index 05690bc..50eeac3 100644
--- a/src/includes/bitUtil.h
+++ b/src/includes/bitUtil.h
@@ -6,8 +6,8 @@
* Description: Header File bitUtil Module.
* Helper routines for dealing with bit manipulations
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/calculator.h b/src/includes/calculator.h
index 4ac7d93..edca1d9 100644
--- a/src/includes/calculator.h
+++ b/src/includes/calculator.h
@@ -5,8 +5,8 @@
*
* Description: Header file for infix calculator
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at gmail.com
diff --git a/src/includes/calculator_stack.h b/src/includes/calculator_stack.h
index fb875f5..2358980 100644
--- a/src/includes/calculator_stack.h
+++ b/src/includes/calculator_stack.h
@@ -5,8 +5,8 @@
*
* Description: Stack implementation for infix calculator
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Brandon Mills (bm), mills.brandont at gmail.com
* Project: likwid
diff --git a/src/includes/configuration.h b/src/includes/configuration.h
index 4cc8932..9cfdf80 100644
--- a/src/includes/configuration.h
+++ b/src/includes/configuration.h
@@ -5,8 +5,8 @@
*
* Description: Header File of Module configuration.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/includes/cpuFeatures.h b/src/includes/cpuFeatures.h
index 9c6a97f..da9c4c3 100644
--- a/src/includes/cpuFeatures.h
+++ b/src/includes/cpuFeatures.h
@@ -5,13 +5,13 @@
*
* Description: Header File of Module cpuFeatures.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/cpuFeatures_types.h b/src/includes/cpuFeatures_types.h
index a8ccb12..5d8e6bc 100644
--- a/src/includes/cpuFeatures_types.h
+++ b/src/includes/cpuFeatures_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for CpuFeature module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/cpuid.h b/src/includes/cpuid.h
index 9fd0afd..273e179 100644
--- a/src/includes/cpuid.h
+++ b/src/includes/cpuid.h
@@ -5,8 +5,8 @@
*
* Description: Common macro definition for CPUID instruction
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/includes/error.h b/src/includes/error.h
index 231b590..ab34ca1 100644
--- a/src/includes/error.h
+++ b/src/includes/error.h
@@ -5,8 +5,8 @@
*
* Description: Central error handling macros
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at gmail.com
diff --git a/src/includes/frequency.h b/src/includes/frequency.h
index cdf556b..212669d 100644
--- a/src/includes/frequency.h
+++ b/src/includes/frequency.h
@@ -1,3 +1,33 @@
+/*
+ * =======================================================================================
+ *
+ * Filename: frequency.h
+ *
+ * Description: Header File for frequency module
+ *
+ * Version: 4.2
+ * Released: 22.12.2016
+ *
+ * Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
+ * Project: likwid
+ *
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
+ *
+ * This program is free software: you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later
+ * version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * =======================================================================================
+ */
+
#ifndef FREQUENCY_H
#define FREQUENCY_H
diff --git a/src/includes/hashTable.h b/src/includes/hashTable.h
index 13b5558..8f32025 100644
--- a/src/includes/hashTable.h
+++ b/src/includes/hashTable.h
@@ -7,8 +7,8 @@
* Wrapper for HashTable data structure holding thread
* specific region information.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/libperfctr_types.h b/src/includes/libperfctr_types.h
index e5f4edd..cea758a 100644
--- a/src/includes/libperfctr_types.h
+++ b/src/includes/libperfctr_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for libperfctr module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/likwid.h b/src/includes/likwid.h
index b7063a2..8039b3b 100644
--- a/src/includes/likwid.h
+++ b/src/includes/likwid.h
@@ -5,8 +5,8 @@
*
* Description: Header File of likwid API
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Thomas Roehl (tr), thomas.roehl at googlemail.com
*
diff --git a/src/includes/lock.h b/src/includes/lock.h
index e0104d5..158660d 100644
--- a/src/includes/lock.h
+++ b/src/includes/lock.h
@@ -5,8 +5,8 @@
*
* Description: Header File Locking primitive Module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/memsweep.h b/src/includes/memsweep.h
index 083ebf2..9ab7133 100644
--- a/src/includes/memsweep.h
+++ b/src/includes/memsweep.h
@@ -6,8 +6,8 @@
* Description: Header File memsweep module for internal use. External functions are
* defined in likwid.h
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/numa.h b/src/includes/numa.h
index aaeaab3..dba72c6 100644
--- a/src/includes/numa.h
+++ b/src/includes/numa.h
@@ -6,8 +6,8 @@
* Description: Header File NUMA module for internal use. External functions are
* defined in likwid.h
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
@@ -39,6 +39,8 @@
#include <numa_hwloc.h>
#include <numa_proc.h>
+extern int numaInitialized;
+
extern int str2int(const char* str);
struct numa_functions {
diff --git a/src/includes/numa_hwloc.h b/src/includes/numa_hwloc.h
index dbf4091..5a3fbba 100644
--- a/src/includes/numa_hwloc.h
+++ b/src/includes/numa_hwloc.h
@@ -5,8 +5,8 @@
*
* Description: Header File hwloc NUMA backend
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/numa_proc.h b/src/includes/numa_proc.h
index e03ef1e..6c3ffde 100644
--- a/src/includes/numa_proc.h
+++ b/src/includes/numa_proc.h
@@ -5,8 +5,8 @@
*
* Description: Header File procfs/sysfs NUMA backend
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/pci_hwloc.h b/src/includes/pci_hwloc.h
index 23223c7..9cb2c32 100644
--- a/src/includes/pci_hwloc.h
+++ b/src/includes/pci_hwloc.h
@@ -5,8 +5,8 @@
*
* Description: Header File hwloc based PCI lookup backend
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/pci_proc.h b/src/includes/pci_proc.h
index 61d733f..734d369 100644
--- a/src/includes/pci_proc.h
+++ b/src/includes/pci_proc.h
@@ -5,8 +5,8 @@
*
* Description: Header File procfs based PCI lookup backend
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/pci_types.h b/src/includes/pci_types.h
index 8089f97..aaed79e 100644
--- a/src/includes/pci_types.h
+++ b/src/includes/pci_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for pci module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/perfgroup.h b/src/includes/perfgroup.h
index 0e332f4..bb63e2a 100644
--- a/src/includes/perfgroup.h
+++ b/src/includes/perfgroup.h
@@ -5,8 +5,8 @@
*
* Description: Header File of performance group and event set handler
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon.h b/src/includes/perfmon.h
index 17efaf8..cc821b9 100644
--- a/src/includes/perfmon.h
+++ b/src/includes/perfmon.h
@@ -7,8 +7,8 @@
* Configures and reads out performance counters
* on x86 based architectures. Supports multi threading.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/perfmon_atom.h b/src/includes/perfmon_atom.h
index 57a4892..1a9f9c3 100644
--- a/src/includes/perfmon_atom.h
+++ b/src/includes/perfmon_atom.h
@@ -5,8 +5,8 @@
*
* Description: Header file of perfmon module for Atom
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/perfmon_atom_events.txt b/src/includes/perfmon_atom_events.txt
index 28f07ee..b533378 100644
--- a/src/includes/perfmon_atom_events.txt
+++ b/src/includes/perfmon_atom_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Atom
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/perfmon_broadwell.h b/src/includes/perfmon_broadwell.h
index b913d9c..b4dbb89 100644
--- a/src/includes/perfmon_broadwell.h
+++ b/src/includes/perfmon_broadwell.h
@@ -5,13 +5,13 @@
*
* Description: Header File of perfmon module for Intel Broadwell.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
@@ -76,7 +76,7 @@ int perfmon_init_broadwell(int cpu_id)
broadwell_cbox_setup = bdwep_cbox_setup;
bdw_did_cbox_check = 1;
}
- else if (cpuid_info.model == BROADWELL &&
+ else if ((cpuid_info.model == BROADWELL || cpuid_info.model == BROADWELL_E3) &&
socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id &&
bdw_did_cbox_check == 0)
{
@@ -986,7 +986,7 @@ int bdw_qbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDevi
VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<31), FREEZE_UNCORE); \
CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<31))); \
} \
- else if (haveLock && MEASURE_UNCORE(eventSet) && cpuid_info.model == BROADWELL) \
+ else if (haveLock && MEASURE_UNCORE(eventSet) && (cpuid_info.model == BROADWELL || cpuid_info.model == BROADWELL_E3)) \
{ \
uint64_t data = 0x0ULL; \
CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data)); \
@@ -1004,7 +1004,7 @@ int bdw_qbox_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event, PciDevi
VERBOSEPRINTREG(cpu_id, MSR_UNC_V3_U_PMON_GLOBAL_CTL, LLU_CAST (1ULL<<29), UNFREEZE_UNCORE); \
CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_UNC_V3_U_PMON_GLOBAL_CTL, (1ULL<<29))); \
} \
- else if (haveLock && MEASURE_UNCORE(eventSet) && cpuid_info.model == BROADWELL) \
+ else if (haveLock && MEASURE_UNCORE(eventSet) && (cpuid_info.model == BROADWELL || cpuid_info.model == BROADWELL_E3)) \
{ \
uint64_t data = 0x0ULL; \
CHECK_MSR_READ_ERROR(HPMread(cpu_id, MSR_DEV, MSR_UNCORE_PERF_GLOBAL_CTRL, &data)); \
diff --git a/src/includes/perfmon_broadwellEP_counters.h b/src/includes/perfmon_broadwellEP_counters.h
index eefb990..59c85f8 100644
--- a/src/includes/perfmon_broadwellEP_counters.h
+++ b/src/includes/perfmon_broadwellEP_counters.h
@@ -5,13 +5,13 @@
*
* Description: Counter Header File of perfmon module for Broadwell EP/EN/EX.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_broadwellEP_events.txt b/src/includes/perfmon_broadwellEP_events.txt
index 9a6221a..00ec49e 100644
--- a/src/includes/perfmon_broadwellEP_events.txt
+++ b/src/includes/perfmon_broadwellEP_events.txt
@@ -4,13 +4,13 @@
#
# Description: Event list for Intel Broadwell EP/EN/EX.
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_broadwell_counters.h b/src/includes/perfmon_broadwell_counters.h
index 1f2ea84..a1a7528 100644
--- a/src/includes/perfmon_broadwell_counters.h
+++ b/src/includes/perfmon_broadwell_counters.h
@@ -5,13 +5,13 @@
*
* Description: Counter Header File of perfmon module for Broadwell.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_broadwell_events.txt b/src/includes/perfmon_broadwell_events.txt
index b9293ef..7d78d7d 100644
--- a/src/includes/perfmon_broadwell_events.txt
+++ b/src/includes/perfmon_broadwell_events.txt
@@ -4,13 +4,13 @@
#
# Description: Event list for Intel Broadwell
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_broadwelld_counters.h b/src/includes/perfmon_broadwelld_counters.h
index e8deabc..a0aeb6e 100644
--- a/src/includes/perfmon_broadwelld_counters.h
+++ b/src/includes/perfmon_broadwelld_counters.h
@@ -5,13 +5,13 @@
*
* Description: Counter Header File of perfmon module for Broadwell D.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_broadwelld_events.txt b/src/includes/perfmon_broadwelld_events.txt
index a74e0d9..1bdeb97 100644
--- a/src/includes/perfmon_broadwelld_events.txt
+++ b/src/includes/perfmon_broadwelld_events.txt
@@ -4,13 +4,13 @@
#
# Description: Event list for Intel Broadwell D
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_core2.h b/src/includes/perfmon_core2.h
index 02c55d3..2feea4a 100644
--- a/src/includes/perfmon_core2.h
+++ b/src/includes/perfmon_core2.h
@@ -5,14 +5,14 @@
*
* Description: Header file of perfmon module for Intel Core 2
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_core2_counters.h b/src/includes/perfmon_core2_counters.h
index ad7e088..10d6a78 100644
--- a/src/includes/perfmon_core2_counters.h
+++ b/src/includes/perfmon_core2_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter header file of perfmon module for Intel Core 2
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_core2_events.txt b/src/includes/perfmon_core2_events.txt
index 767666b..42c1fc4 100644
--- a/src/includes/perfmon_core2_events.txt
+++ b/src/includes/perfmon_core2_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Core 2
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_goldmont.h b/src/includes/perfmon_goldmont.h
index 7c3c006..4f69e76 100644
--- a/src/includes/perfmon_goldmont.h
+++ b/src/includes/perfmon_goldmont.h
@@ -5,8 +5,8 @@
*
* Description: Header File of perfmon module for Intel Goldmont.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/perfmon_goldmont_counters.h b/src/includes/perfmon_goldmont_counters.h
index d80572f..374a6a7 100644
--- a/src/includes/perfmon_goldmont_counters.h
+++ b/src/includes/perfmon_goldmont_counters.h
@@ -5,8 +5,8 @@
*
* Description: Counter Header File of perfmon module for Intel Goldmont.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/perfmon_goldmont_events.txt b/src/includes/perfmon_goldmont_events.txt
index f87660d..439b151 100644
--- a/src/includes/perfmon_goldmont_events.txt
+++ b/src/includes/perfmon_goldmont_events.txt
@@ -4,8 +4,8 @@
#
# Description: Event list for Intel Goldmont
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/perfmon_haswell.h b/src/includes/perfmon_haswell.h
index 38aeeda..5039c83 100644
--- a/src/includes/perfmon_haswell.h
+++ b/src/includes/perfmon_haswell.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Haswell.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_haswellEP_counters.h b/src/includes/perfmon_haswellEP_counters.h
index 67773ef..aa64010 100644
--- a/src/includes/perfmon_haswellEP_counters.h
+++ b/src/includes/perfmon_haswellEP_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for Intel Haswell EP/EN/EX.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_haswellEP_events.txt b/src/includes/perfmon_haswellEP_events.txt
index bfea428..1968f92 100644
--- a/src/includes/perfmon_haswellEP_events.txt
+++ b/src/includes/perfmon_haswellEP_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Haswell EP/EN/EX
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_haswell_counters.h b/src/includes/perfmon_haswell_counters.h
index 3a7b7a6..cf1aee8 100644
--- a/src/includes/perfmon_haswell_counters.h
+++ b/src/includes/perfmon_haswell_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for Intel Haswell.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_haswell_events.txt b/src/includes/perfmon_haswell_events.txt
index 190b7a4..fd59b8e 100644
--- a/src/includes/perfmon_haswell_events.txt
+++ b/src/includes/perfmon_haswell_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Haswell
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_interlagos.h b/src/includes/perfmon_interlagos.h
index e5faf5b..a11b8e8 100644
--- a/src/includes/perfmon_interlagos.h
+++ b/src/includes/perfmon_interlagos.h
@@ -5,14 +5,14 @@
*
* Description: Header file of perfmon module for AMD Interlagos
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_interlagos_counters.h b/src/includes/perfmon_interlagos_counters.h
index 0f7f23a..08c4cce 100644
--- a/src/includes/perfmon_interlagos_counters.h
+++ b/src/includes/perfmon_interlagos_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for AMD Interlagos
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_interlagos_events.txt b/src/includes/perfmon_interlagos_events.txt
index 6d28687..c94ea02 100644
--- a/src/includes/perfmon_interlagos_events.txt
+++ b/src/includes/perfmon_interlagos_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for AMD Interlagos
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_ivybridge.h b/src/includes/perfmon_ivybridge.h
index 75c2b5b..c4a354b 100644
--- a/src/includes/perfmon_ivybridge.h
+++ b/src/includes/perfmon_ivybridge.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Ivy Bridge.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
@@ -68,7 +68,7 @@ int perfmon_init_ivybridge(int cpu_id)
ivy_cbox_setup = ivbep_cbox_setup;
ivb_did_cbox_test = 1;
}
- else if (cpuid_info.model == IVYBRIDGE &&
+ else if (cpuid_info.model == IVYBRIDGE &&
socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id &&
ivb_did_cbox_test == 0)
{
@@ -924,6 +924,14 @@ int perfmon_startCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventS
eventSet->events[i].threadCounter[thread_id].startData = field64(tmp, 0, box_map[type].regWidth);
}
break;
+ case WBOX0FIX:
+ case WBOX1FIX:
+ if (haveLock)
+ {
+ CHECK_PCI_READ_ERROR(HPMread(cpu_id, counter_map[index].device, counter1, &tmp));
+ eventSet->events[i].threadCounter[thread_id].startData = field64(tmp, 0, box_map[type].regWidth);
+ }
+ break;
default:
if (type >= UNCORE && haveLock)
@@ -1209,6 +1217,17 @@ int perfmon_stopCountersThread_ivybridge(int thread_id, PerfmonEventSet* eventSe
ivb_uncore_overflow(cpu_id, index, event, overflows, counter_result,
*current, box_map[type].ovflOffset, 0);
break;
+ case WBOX0FIX:
+ case WBOX1FIX:
+ if (haveLock)
+ {
+ CHECK_PCI_READ_ERROR(HPMread(cpu_id, dev, counter1, &counter_result));
+ if (counter_result < *current)
+ {
+ (*overflows)++;
+ }
+ }
+ break;
case IBOX1:
counter_result = ivb_uncore_read(cpu_id, index, event, FREEZE_FLAG_CLEAR_CTR);
diff --git a/src/includes/perfmon_ivybridgeEP_counters.h b/src/includes/perfmon_ivybridgeEP_counters.h
index 404bc36..5f8e77b 100644
--- a/src/includes/perfmon_ivybridgeEP_counters.h
+++ b/src/includes/perfmon_ivybridgeEP_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter header file of perfmon module for Intel Ivy Bridge EP.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_ivybridgeEP_events.txt b/src/includes/perfmon_ivybridgeEP_events.txt
index 86f764d..92f5a6f 100644
--- a/src/includes/perfmon_ivybridgeEP_events.txt
+++ b/src/includes/perfmon_ivybridgeEP_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Ivy Bridge EP/EN/EX
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_ivybridge_counters.h b/src/includes/perfmon_ivybridge_counters.h
index dcc7491..79d09f1 100644
--- a/src/includes/perfmon_ivybridge_counters.h
+++ b/src/includes/perfmon_ivybridge_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter header file of perfmon module for Intel Ivy Bridge.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_ivybridge_events.txt b/src/includes/perfmon_ivybridge_events.txt
index d789667..a05bf05 100644
--- a/src/includes/perfmon_ivybridge_events.txt
+++ b/src/includes/perfmon_ivybridge_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Ivy Bridge
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_k10.h b/src/includes/perfmon_k10.h
index c2effb3..a713768 100644
--- a/src/includes/perfmon_k10.h
+++ b/src/includes/perfmon_k10.h
@@ -5,14 +5,14 @@
*
* Description: Header file of perfmon module for AMD K10
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_k10_counters.h b/src/includes/perfmon_k10_counters.h
index 71278af..30b32eb 100644
--- a/src/includes/perfmon_k10_counters.h
+++ b/src/includes/perfmon_k10_counters.h
@@ -5,14 +5,14 @@
*
* Description: AMD K10 performance counter definition. Also used for AMD K8.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_k10_events.txt b/src/includes/perfmon_k10_events.txt
index c66931b..62465b0 100644
--- a/src/includes/perfmon_k10_events.txt
+++ b/src/includes/perfmon_k10_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for AMD K10
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_k8.h b/src/includes/perfmon_k8.h
index 4c5f8dc..753e520 100644
--- a/src/includes/perfmon_k8.h
+++ b/src/includes/perfmon_k8.h
@@ -6,14 +6,14 @@
* Description: Header File of perfmon module for AMD K8 support.
* The setup routines and registers are similar to AMD K10
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_k8_events.txt b/src/includes/perfmon_k8_events.txt
index a431b72..6b86346 100644
--- a/src/includes/perfmon_k8_events.txt
+++ b/src/includes/perfmon_k8_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for AMD K8
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_kabini.h b/src/includes/perfmon_kabini.h
index 9441b54..7aac840 100644
--- a/src/includes/perfmon_kabini.h
+++ b/src/includes/perfmon_kabini.h
@@ -5,14 +5,14 @@
*
* Description: Header file of perfmon module for AMD Family 16
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_kabini_counters.h b/src/includes/perfmon_kabini_counters.h
index 1171ac9..fdfeb2f 100644
--- a/src/includes/perfmon_kabini_counters.h
+++ b/src/includes/perfmon_kabini_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for AMD Family 16
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_kabini_events.txt b/src/includes/perfmon_kabini_events.txt
index 3fc1a2e..68775c4 100644
--- a/src/includes/perfmon_kabini_events.txt
+++ b/src/includes/perfmon_kabini_events.txt
@@ -4,13 +4,13 @@
#
# Description: Event list for AMD Kabini
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: saravanan.ekanathan at amd.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
@@ -329,11 +329,63 @@ UMASK_IBS_OPS 0x0
EVENT_IBS_RETIRED_OPS 0x1D0 PMC
UMASK_IBS_RETIRED_OPS 0x0
+EVENT_CMD_RELATED_TO_VICTIM_BUFFERS 0x60 CPMC
+UMASK_CMD_RELATED_TO_VICTIM_BUFFERS_WR_VIC_BLK_RINSING 0x04
+UMASK_CMD_RELATED_TO_VICTIM_BUFFERS_WR_VIC_BLK_NON_RINSING 0x08
+UMASK_CMD_RELATED_TO_VICTIM_BUFFERS_CLEAN_VIC_CMD 0x10
+UMASK_CMD_RELATED_TO_VICTIM_BUFFERS_LOCK 0x80
+
+EVENT_CMD_RELATED_TO_MASKED_OPS 0x61 CPMC
+UMASK_CMD_RELATED_TO_MASKED_OPS_READ_BYTE 0x01
+UMASK_CMD_RELATED_TO_MASKED_OPS_READ_DBL_WORD 0x04
+UMASK_CMD_RELATED_TO_MASKED_OPS_WRITE_BYTE 0x10
+UMASK_CMD_RELATED_TO_MASKED_OPS_WRITE_DBL_WORD 0x20
+
+EVENT_CMD_RELATED_TO_READ_BLOCK_OPS 0x62 CPMC
+UMASK_CMD_RELATED_TO_READ_BLOCK_OPS_READ_BLOCK 0x01
+UMASK_CMD_RELATED_TO_READ_BLOCK_OPS_READ_BLOCK_M 0x02
+UMASK_CMD_RELATED_TO_READ_BLOCK_OPS_READ_BLOCK_S 0x04
+
+EVENT_CMD_RELATED_TO_CHANGE_TO_DIRTY_OPS 0x63 CPMC
+UMASK_CMD_RELATED_TO_CHANGE_TO_DIRTY_OPS 0x10
+
+EVENT_RESP_FROM_SYS_ON_CACHE_REFILLS 0x6C CPMC
+UMASK_RESP_FROM_SYS_ON_CACHE_REFILLS_EXCLUSIVE 0x01
+UMASK_RESP_FROM_SYS_ON_CACHE_REFILLS_MODIFIED 0x02
+UMASK_RESP_FROM_SYS_ON_CACHE_REFILLS_SHARED 0x04
+UMASK_RESP_FROM_SYS_ON_CACHE_REFILLS_OWNED 0x08
+UMASK_RESP_FROM_SYS_ON_CACHE_REFILLS_DATA_ERROR 0x10
+UMASK_RESP_FROM_SYS_ON_CACHE_REFILLS_CHG_TO_DIRTY_SUCCESS 0x20
+UMASK_RESP_FROM_SYS_ON_CACHE_REFILLS_UNCACHEABLE 0x40
+
+EVENT_REQ_TO_L2_CACHE 0x7D CPMC
+UMASK_REQ_TO_L2_CACHE_ICACHE_FILL 0x01
+UMASK_REQ_TO_L2_CACHE_DATA_FILL 0x02
+UMASK_REQ_TO_L2_CACHE_NB_PROBE_REQ 0x08
+UMASK_REQ_TO_L2_CACHE_PREF_REQ 0x40
+
+EVENT_L2_CACHE_MISSES 0x7E CPMC
+UMASK_L2_CACHE_MISSES_ICACHE_FILL 0x01
+UMASK_L2_CACHE_MISSES_DATA_FILL 0x02
+UMASK_L2_CACHE_MISSES_PREF_REQ 0x10
+
+EVENT_L2_FILLS_WRITEBACK 0x7F CPMC
+UMASK_L2_FILLS_WRITEBACK_L2_FILLS 0x01
+UMASK_L2_FILLS_WRITEBACK_L2_WRITEBACKS 0x02
+UMASK_L2_FILLS_WRITEBACK_L2_CLEAN_WRITEBACKS 0x04
+
EVENT_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION 0x0E4 UPMC
UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_MEM_HIGH 0x01
UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_MEM_MED 0x02
-UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION__DCQ 0x04
-UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED 0x20
+UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED_DCT0 0x20
+UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED_DCT1 0x24
+UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED_DCT2 0x28
+UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED_DCT3 0x2C
+UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED_DCT4 0x30
+UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED_DCT5 0x34
+UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED_DCT6 0x38
+UMASK_UNC_MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION_DCQ_SATURATED_DCT7 0x3C
+
EVENT_UNC_THERMAL_STATUS 0x0E8 UPMC
UMASK_UNC_THERMAL_STATUS_HTC_TRIP_POINT_CROSSED 0x04
diff --git a/src/includes/perfmon_knl.h b/src/includes/perfmon_knl.h
index 0010b08..a609a95 100644
--- a/src/includes/perfmon_knl.h
+++ b/src/includes/perfmon_knl.h
@@ -5,13 +5,13 @@
*
* Description: Header file of perfmon module for Intel Xeon Phi (Knights Landing)
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
@@ -847,7 +847,7 @@ int knl_uncore_read(int cpu_id, RegisterIndex index, PerfmonEvent *event,
{ \
eventSet->events[i].threadCounter[thread_id].overflows++; \
} \
- CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_STATUS, (1ULL<<offset))); \
+ CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_PERF_GLOBAL_OVF_CTRL, (1ULL<<offset))); \
}
int perfmon_stopCountersThread_knl(int thread_id, PerfmonEventSet* eventSet)
diff --git a/src/includes/perfmon_knl_counters.h b/src/includes/perfmon_knl_counters.h
index 9dc5b93..530bf09 100644
--- a/src/includes/perfmon_knl_counters.h
+++ b/src/includes/perfmon_knl_counters.h
@@ -5,13 +5,13 @@
*
* Description: Counter header file of perfmon module for Intel Atom (Silvermont)
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_knl_events.txt b/src/includes/perfmon_knl_events.txt
index 091ed95..9aaeea9 100644
--- a/src/includes/perfmon_knl_events.txt
+++ b/src/includes/perfmon_knl_events.txt
@@ -4,13 +4,13 @@
#
# Description: Event list for Intel Xeon Phi (Knights Landing)
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalem.h b/src/includes/perfmon_nehalem.h
index 9c25137..df3dee2 100644
--- a/src/includes/perfmon_nehalem.h
+++ b/src/includes/perfmon_nehalem.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Nehalem.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalemEX.h b/src/includes/perfmon_nehalemEX.h
index 7c37fbb..653b7e9 100644
--- a/src/includes/perfmon_nehalemEX.h
+++ b/src/includes/perfmon_nehalemEX.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Nehalem EX.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalemEX_counters.h b/src/includes/perfmon_nehalemEX_counters.h
index c0fefad..9d313da 100644
--- a/src/includes/perfmon_nehalemEX_counters.h
+++ b/src/includes/perfmon_nehalemEX_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for Intel Westmere EX.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalemEX_events.txt b/src/includes/perfmon_nehalemEX_events.txt
index 50446b3..8406091 100644
--- a/src/includes/perfmon_nehalemEX_events.txt
+++ b/src/includes/perfmon_nehalemEX_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Nehalem EX
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalemEX_westmereEX_common.h b/src/includes/perfmon_nehalemEX_westmereEX_common.h
index 7f8767f..33497fa 100644
--- a/src/includes/perfmon_nehalemEX_westmereEX_common.h
+++ b/src/includes/perfmon_nehalemEX_westmereEX_common.h
@@ -5,13 +5,13 @@
*
* Description: Common definitions for Intel Nehalem EX and Westmere EX
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalem_counters.h b/src/includes/perfmon_nehalem_counters.h
index 53f20e0..d4b97a7 100644
--- a/src/includes/perfmon_nehalem_counters.h
+++ b/src/includes/perfmon_nehalem_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for Intel Nehalem.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_nehalem_events.txt b/src/includes/perfmon_nehalem_events.txt
index a240ba9..c42d186 100644
--- a/src/includes/perfmon_nehalem_events.txt
+++ b/src/includes/perfmon_nehalem_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Nehalem
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_p6_events.txt b/src/includes/perfmon_p6_events.txt
index 74b5fe1..fe2a500 100644
--- a/src/includes/perfmon_p6_events.txt
+++ b/src/includes/perfmon_p6_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Pentium 3
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_perf.h b/src/includes/perfmon_perf.h
index 56ff2c8..0451a8d 100644
--- a/src/includes/perfmon_perf.h
+++ b/src/includes/perfmon_perf.h
@@ -6,14 +6,14 @@
* Description: Header file of example perfmon module for software events using
* the perf_event interface
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_perfevent.h b/src/includes/perfmon_perfevent.h
index 868fd25..bcd53e2 100644
--- a/src/includes/perfmon_perfevent.h
+++ b/src/includes/perfmon_perfevent.h
@@ -5,13 +5,13 @@
*
* Description: Header File of perfmon module for perf_event kernel interface.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_phi.h b/src/includes/perfmon_phi.h
index 7a005ac..5e8e174 100644
--- a/src/includes/perfmon_phi.h
+++ b/src/includes/perfmon_phi.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Xeon Phi.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_phi_counters.h b/src/includes/perfmon_phi_counters.h
index ebed2da..01bfbd0 100644
--- a/src/includes/perfmon_phi_counters.h
+++ b/src/includes/perfmon_phi_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for Intel Xeon Phi.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_phi_events.txt b/src/includes/perfmon_phi_events.txt
index d2d9a6d..e0dca61 100644
--- a/src/includes/perfmon_phi_events.txt
+++ b/src/includes/perfmon_phi_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Xeon Phi
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_pm.h b/src/includes/perfmon_pm.h
index d49c4f3..e0c22e1 100644
--- a/src/includes/perfmon_pm.h
+++ b/src/includes/perfmon_pm.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module Pentium M.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_pm_counters.h b/src/includes/perfmon_pm_counters.h
index 9ac4275..968a812 100644
--- a/src/includes/perfmon_pm_counters.h
+++ b/src/includes/perfmon_pm_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for Intel Pentium M.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_pm_events.txt b/src/includes/perfmon_pm_events.txt
index 7c63f11..028b41f 100644
--- a/src/includes/perfmon_pm_events.txt
+++ b/src/includes/perfmon_pm_events.txt
@@ -4,13 +4,13 @@
#
# Description: Event list for Intel Pentium M
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_sandybridge.h b/src/includes/perfmon_sandybridge.h
index da8eefb..f379ec5 100644
--- a/src/includes/perfmon_sandybridge.h
+++ b/src/includes/perfmon_sandybridge.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Sandy Bridge.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_sandybridgeEP_counters.h b/src/includes/perfmon_sandybridgeEP_counters.h
index 940be74..89fdc29 100644
--- a/src/includes/perfmon_sandybridgeEP_counters.h
+++ b/src/includes/perfmon_sandybridgeEP_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter header file of perfmon module for Intel Sandy Bridge EP.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_sandybridgeEP_events.txt b/src/includes/perfmon_sandybridgeEP_events.txt
index ebac2fd..eba79bc 100644
--- a/src/includes/perfmon_sandybridgeEP_events.txt
+++ b/src/includes/perfmon_sandybridgeEP_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel SandyBridge EP
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_sandybridge_counters.h b/src/includes/perfmon_sandybridge_counters.h
index e571d7a..13a83cc 100644
--- a/src/includes/perfmon_sandybridge_counters.h
+++ b/src/includes/perfmon_sandybridge_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter header file of perfmon module for Intel Sandy Bridge.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_sandybridge_events.txt b/src/includes/perfmon_sandybridge_events.txt
index b1cf8cf..dcc4435 100644
--- a/src/includes/perfmon_sandybridge_events.txt
+++ b/src/includes/perfmon_sandybridge_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel SandyBridge
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_silvermont.h b/src/includes/perfmon_silvermont.h
index d6e2cab..db4bb92 100644
--- a/src/includes/perfmon_silvermont.h
+++ b/src/includes/perfmon_silvermont.h
@@ -5,13 +5,13 @@
*
* Description: Header file of perfmon module for Intel Atom (Silvermont)
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_silvermont_counters.h b/src/includes/perfmon_silvermont_counters.h
index c16edfc..70020ce 100644
--- a/src/includes/perfmon_silvermont_counters.h
+++ b/src/includes/perfmon_silvermont_counters.h
@@ -5,13 +5,13 @@
*
* Description: Counter header file of perfmon module for Intel Atom (Silvermont)
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_silvermont_events.txt b/src/includes/perfmon_silvermont_events.txt
index 1c222d3..6198372 100644
--- a/src/includes/perfmon_silvermont_events.txt
+++ b/src/includes/perfmon_silvermont_events.txt
@@ -4,13 +4,13 @@
#
# Description: Event list for Intel Atom (Silvermont)
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_skylake.h b/src/includes/perfmon_skylake.h
index f01a4a7..9061784 100644
--- a/src/includes/perfmon_skylake.h
+++ b/src/includes/perfmon_skylake.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Skylake.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
@@ -145,6 +145,11 @@ int skl_pmc_setup(int cpu_id, RegisterIndex index, PerfmonEvent *event)
VERBOSEPRINTREG(cpu_id, MSR_OFFCORE_RESP1, LLU_CAST offcore_flags, SETUP_PMC_OFFCORE);
CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_OFFCORE_RESP1, offcore_flags));
}
+ else if (event->eventId == 0xC6)
+ {
+ VERBOSEPRINTREG(cpu_id, MSR_V4_PEBS_FRONTEND, LLU_CAST event->cmask, SETUP_PMC_FRONTEND);
+ CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_V4_PEBS_FRONTEND, event->cmask));
+ }
if (flags != currentConfig[cpu_id][index])
{
VERBOSEPRINTREG(cpu_id, counter_map[index].configRegister, LLU_CAST flags, SETUP_PMC)
@@ -509,6 +514,7 @@ int perfmon_stopCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
if (counter_result < eventSet->events[i].threadCounter[thread_id].counterData)
{
+ VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, OVERFLOW_POWER);
eventSet->events[i].threadCounter[thread_id].overflows++;
}
*current = field64(counter_result, 0, box_map[type].regWidth);
@@ -633,9 +639,10 @@ int perfmon_readCountersThread_skylake(int thread_id, PerfmonEventSet* eventSet)
if (haveLock)
{
CHECK_POWER_READ_ERROR(power_read(cpu_id, counter1, (uint32_t*)&counter_result));
- VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, STOP_POWER)
+ VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, READ_POWER)
if (counter_result < eventSet->events[i].threadCounter[thread_id].counterData)
{
+ VERBOSEPRINTREG(cpu_id, counter1, LLU_CAST counter_result, OVERFLOW_POWER);
eventSet->events[i].threadCounter[thread_id].overflows++;
}
eventSet->events[i].threadCounter[thread_id].counterData = field64(counter_result, 0, box_map[type].regWidth);
@@ -707,7 +714,7 @@ int perfmon_finalizeCountersThread_skylake(int thread_id, PerfmonEventSet* event
int haveTileLock = 0;
int clearPBS = 0;
uint64_t ovf_values_core = (1ULL<<63)|(1ULL<<62);
- uint64_t ovf_values_UBOXFIX = 0x0ULL;
+ uint64_t ovf_values_uncore = 0x0ULL;
int cpu_id = groupSet->threads[thread_id].processorId;
if (socket_lock[affinity_core2node_lookup[cpu_id]] == cpu_id)
@@ -742,6 +749,11 @@ int perfmon_finalizeCountersThread_skylake(int thread_id, PerfmonEventSet* event
VERBOSEPRINTREG(cpu_id, MSR_OFFCORE_RESP1, 0x0ULL, CLEAR_OFFCORE_RESP1);
CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_OFFCORE_RESP1, 0x0ULL));
}
+ else if (eventSet->events[i].event.eventId == 0xC6)
+ {
+ VERBOSEPRINTREG(cpu_id, MSR_V4_PEBS_FRONTEND, 0x0ULL, CLEAR_PMC_FRONTEND);
+ CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, MSR_DEV, MSR_V4_PEBS_FRONTEND, 0x0ULL));
+ }
break;
case FIXED:
ovf_values_core |= (1ULL<<(index+32));
@@ -749,11 +761,11 @@ int perfmon_finalizeCountersThread_skylake(int thread_id, PerfmonEventSet* event
default:
break;
}
- if ((reg) && (((type == PMC)||(type == FIXED))||((type >= UBOXFIX) && (haveLock))))
+ if ((reg) && (((type == PMC)||(type == FIXED))||((type >= UNCORE) && (haveLock))))
{
- CHECK_MSR_READ_ERROR(HPMread(cpu_id, dev, reg, &ovf_values_UBOXFIX));
- VERBOSEPRINTPCIREG(cpu_id, dev, reg, ovf_values_UBOXFIX, SHOW_CTL);
- ovf_values_UBOXFIX = 0x0ULL;
+ CHECK_MSR_READ_ERROR(HPMread(cpu_id, dev, reg, &ovf_values_uncore));
+ VERBOSEPRINTPCIREG(cpu_id, dev, reg, ovf_values_uncore, SHOW_CTL);
+ ovf_values_uncore = 0x0ULL;
VERBOSEPRINTPCIREG(cpu_id, dev, reg, 0x0ULL, CLEAR_CTL);
CHECK_MSR_WRITE_ERROR(HPMwrite(cpu_id, dev, reg, 0x0ULL));
if ((type >= SBOX0) && (type <= SBOX3))
diff --git a/src/includes/perfmon_skylake_counters.h b/src/includes/perfmon_skylake_counters.h
index afba36b..d47c28d 100644
--- a/src/includes/perfmon_skylake_counters.h
+++ b/src/includes/perfmon_skylake_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for Intel Skylake.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
@@ -64,16 +64,16 @@ static RegisterMap skylake_counter_map[NUM_COUNTERS_SKYLAKE] = {
{"PWR4", PMC16, POWER, 0, MSR_PLATFORM_ENERGY_STATUS, 0, 0, EVENT_OPTION_NONE_MASK},
/* Test */
{"UBOXFIX", PMC17, UBOXFIX, MSR_UNC_PERF_FIXED_CTRL, MSR_UNC_PERF_FIXED_CTR, 0, 0, EVENT_OPTION_NONE_MASK},
- {"UBOX0", PMC18, UBOX, MSR_V4_ARB_PERF_FIXED_CTRL0, MSR_V4_ARB_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_UBOX},
- {"UBOX1", PMC19, UBOX, MSR_V4_ARB_PERF_FIXED_CTRL1, MSR_V4_ARB_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_UBOX},
- {"CBOX0C0", PMC20, CBOX0, MSR_V4_C0_PERF_FIXED_CTRL0, MSR_V4_C0_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
- {"CBOX0C1", PMC21, CBOX0, MSR_V4_C0_PERF_FIXED_CTRL1, MSR_V4_C0_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
- {"CBOX1C0", PMC22, CBOX1, MSR_V4_C1_PERF_FIXED_CTRL0, MSR_V4_C1_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
- {"CBOX1C1", PMC23, CBOX1, MSR_V4_C1_PERF_FIXED_CTRL1, MSR_V4_C1_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
- {"CBOX2C0", PMC24, CBOX2, MSR_V4_C2_PERF_FIXED_CTRL0, MSR_V4_C2_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
- {"CBOX2C1", PMC25, CBOX2, MSR_V4_C2_PERF_FIXED_CTRL1, MSR_V4_C2_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
- {"CBOX3C0", PMC26, CBOX3, MSR_V4_C3_PERF_FIXED_CTRL0, MSR_V4_C3_PERF_FIXED_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
- {"CBOX3C1", PMC27, CBOX3, MSR_V4_C3_PERF_FIXED_CTRL1, MSR_V4_C3_PERF_FIXED_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
+ {"UBOX0", PMC18, UBOX, MSR_V4_ARB_PERF_CTRL0, MSR_V4_ARB_PERF_CTR0, 0, 0, SKL_VALID_OPTIONS_UBOX},
+ {"UBOX1", PMC19, UBOX, MSR_V4_ARB_PERF_CTRL1, MSR_V4_ARB_PERF_CTR1, 0, 0, SKL_VALID_OPTIONS_UBOX},
+ {"CBOX0C0", PMC20, CBOX0, MSR_V4_C0_PERF_CTRL0, MSR_V4_C0_PERF_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
+ {"CBOX0C1", PMC21, CBOX0, MSR_V4_C0_PERF_CTRL1, MSR_V4_C0_PERF_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
+ {"CBOX1C0", PMC22, CBOX1, MSR_V4_C1_PERF_CTRL0, MSR_V4_C1_PERF_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
+ {"CBOX1C1", PMC23, CBOX1, MSR_V4_C1_PERF_CTRL1, MSR_V4_C1_PERF_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
+ {"CBOX2C0", PMC24, CBOX2, MSR_V4_C2_PERF_CTRL0, MSR_V4_C2_PERF_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
+ {"CBOX2C1", PMC25, CBOX2, MSR_V4_C2_PERF_CTRL1, MSR_V4_C2_PERF_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
+ {"CBOX3C0", PMC26, CBOX3, MSR_V4_C3_PERF_CTRL0, MSR_V4_C3_PERF_CTR0, 0, 0, SKL_VALID_OPTIONS_CBOX},
+ {"CBOX3C1", PMC27, CBOX3, MSR_V4_C3_PERF_CTRL1, MSR_V4_C3_PERF_CTR1, 0, 0, SKL_VALID_OPTIONS_CBOX},
};
diff --git a/src/includes/perfmon_skylake_events.txt b/src/includes/perfmon_skylake_events.txt
index 1948bde..b204348 100644
--- a/src/includes/perfmon_skylake_events.txt
+++ b/src/includes/perfmon_skylake_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Skylake
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
@@ -395,19 +395,19 @@ UMASK_CYCLE_ACTIVITY_CYCLES_LDM_PENDING 0x10
DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_LDM_PENDING EVENT_OPTION_THRESHOLD=0x14
UMASK_CYCLE_ACTIVITY_STALLS_LDM_PENDING 0x14
-EVENT_CYCLE_ACTIVITY_CYCLES_L1D_MISS 0xA3 PMC2
+EVENT_CYCLE_ACTIVITY_CYCLES_L1D_MISS 0xA3 PMC
DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_MISS EVENT_OPTION_THRESHOLD=0x8
UMASK_CYCLE_ACTIVITY_CYCLES_L1D_MISS 0x08
-EVENT_CYCLE_ACTIVITY_STALLS_L1D_MISS 0xA3 PMC2
+EVENT_CYCLE_ACTIVITY_STALLS_L1D_MISS 0xA3 PMC
DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_MISS EVENT_OPTION_THRESHOLD=0xC
UMASK_CYCLE_ACTIVITY_STALLS_L1D_MISS 0x0C
-EVENT_CYCLE_ACTIVITY_CYCLES_L1D_PENDING 0xA3 PMC2
+EVENT_CYCLE_ACTIVITY_CYCLES_L1D_PENDING 0xA3 PMC
DEFAULT_OPTIONS_CYCLE_ACTIVITY_CYCLES_L1D_PENDING EVENT_OPTION_THRESHOLD=0x8
UMASK_CYCLE_ACTIVITY_CYCLES_L1D_PENDING 0x08
-EVENT_CYCLE_ACTIVITY_STALLS_L1D_PENDING 0xA3 PMC2
+EVENT_CYCLE_ACTIVITY_STALLS_L1D_PENDING 0xA3 PMC
DEFAULT_OPTIONS_CYCLE_ACTIVITY_STALLS_L1D_PENDING EVENT_OPTION_THRESHOLD=0xC
UMASK_CYCLE_ACTIVITY_STALLS_L1D_PENDING 0x0C
@@ -681,6 +681,11 @@ UMASK_L2_LINES_IN_S 0x02
UMASK_L2_LINES_IN_E 0x04
UMASK_L2_LINES_IN_ALL 0x07
+EVENT_L2_LINES_OUT 0xF2 PMC
+UMASK_L2_LINES_OUT_SILENT 0x01
+UMASK_L2_LINES_OUT_NON_SILENT 0x02
+UMASK_L2_LINES_OUT_USELESS_PREF 0x04
+
EVENT_ARITH_DIVIDER_ACTIVE 0x14 PMC
UMASK_ARITH_DIVIDER_ACTIVE 0x01
DEFAULT_OPTIONS_ARITH_DIVIDER_COUNT EVENT_OPTION_EDGE=0x1
diff --git a/src/includes/perfmon_types.h b/src/includes/perfmon_types.h
index 8351ce5..f652480 100644
--- a/src/includes/perfmon_types.h
+++ b/src/includes/perfmon_types.h
@@ -7,14 +7,14 @@
* Configures and reads out performance counters
* on x86 based architectures. Supports multi threading.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_westmere.h b/src/includes/perfmon_westmere.h
index d9b3782..3992a28 100644
--- a/src/includes/perfmon_westmere.h
+++ b/src/includes/perfmon_westmere.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Westmere.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_westmereEX.h b/src/includes/perfmon_westmereEX.h
index 20c86db..3e5dfa6 100644
--- a/src/includes/perfmon_westmereEX.h
+++ b/src/includes/perfmon_westmereEX.h
@@ -5,14 +5,14 @@
*
* Description: Header File of perfmon module for Intel Westmere EX.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_westmereEX_counters.h b/src/includes/perfmon_westmereEX_counters.h
index 63f77c5..08f9414 100644
--- a/src/includes/perfmon_westmereEX_counters.h
+++ b/src/includes/perfmon_westmereEX_counters.h
@@ -5,14 +5,14 @@
*
* Description: Counter Header File of perfmon module for Westmere EX.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
*
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+ * Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_westmereEX_events.txt b/src/includes/perfmon_westmereEX_events.txt
index 9dedb39..69747b6 100644
--- a/src/includes/perfmon_westmereEX_events.txt
+++ b/src/includes/perfmon_westmereEX_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel WestmereEX
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/perfmon_westmere_events.txt b/src/includes/perfmon_westmere_events.txt
index ff18125..df8a0d1 100644
--- a/src/includes/perfmon_westmere_events.txt
+++ b/src/includes/perfmon_westmere_events.txt
@@ -4,14 +4,14 @@
#
# Description: Event list for Intel Westmere
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
# Project: likwid
#
-# Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+# Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
diff --git a/src/includes/power.h b/src/includes/power.h
index 82b26ab..1a0c4e2 100644
--- a/src/includes/power.h
+++ b/src/includes/power.h
@@ -6,8 +6,8 @@
* Description: Header File Power Module
* Implements Intel RAPL Interface.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/power_types.h b/src/includes/power_types.h
index 2f33329..6f0dce8 100644
--- a/src/includes/power_types.h
+++ b/src/includes/power_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for power module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/registers.h b/src/includes/registers.h
index 6448d8e..c459c1a 100644
--- a/src/includes/registers.h
+++ b/src/includes/registers.h
@@ -5,8 +5,8 @@
*
* Description: Register Defines for the perfmon module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -131,26 +131,26 @@
#define MSR_V4_UNC_PERF_GLOBAL_STATUS 0xE02
#define MSR_V4_UNC_PERF_FIXED_CTRL 0x394
#define MSR_V4_UNC_PERF_FIXED_CTR 0x395
-#define MSR_V4_ARB_PERF_FIXED_CTRL0 0x3B2
-#define MSR_V4_ARB_PERF_FIXED_CTR0 0x3B0
-#define MSR_V4_ARB_PERF_FIXED_CTRL1 0x3B3
-#define MSR_V4_ARB_PERF_FIXED_CTR1 0x3B1
-#define MSR_V4_C0_PERF_FIXED_CTRL0 0x700
-#define MSR_V4_C0_PERF_FIXED_CTR0 0x706
-#define MSR_V4_C0_PERF_FIXED_CTRL1 0x701
-#define MSR_V4_C0_PERF_FIXED_CTR1 0x707
-#define MSR_V4_C1_PERF_FIXED_CTRL0 0x710
-#define MSR_V4_C1_PERF_FIXED_CTR0 0x716
-#define MSR_V4_C1_PERF_FIXED_CTRL1 0x711
-#define MSR_V4_C1_PERF_FIXED_CTR1 0x717
-#define MSR_V4_C2_PERF_FIXED_CTRL0 0x720
-#define MSR_V4_C2_PERF_FIXED_CTR0 0x726
-#define MSR_V4_C2_PERF_FIXED_CTRL1 0x721
-#define MSR_V4_C2_PERF_FIXED_CTR1 0x727
-#define MSR_V4_C3_PERF_FIXED_CTRL0 0x730
-#define MSR_V4_C3_PERF_FIXED_CTR0 0x736
-#define MSR_V4_C3_PERF_FIXED_CTRL1 0x731
-#define MSR_V4_C3_PERF_FIXED_CTR1 0x737
+#define MSR_V4_ARB_PERF_CTRL0 0x3B2
+#define MSR_V4_ARB_PERF_CTR0 0x3B0
+#define MSR_V4_ARB_PERF_CTRL1 0x3B3
+#define MSR_V4_ARB_PERF_CTR1 0x3B1
+#define MSR_V4_C0_PERF_CTRL0 0x700
+#define MSR_V4_C0_PERF_CTR0 0x706
+#define MSR_V4_C0_PERF_CTRL1 0x701
+#define MSR_V4_C0_PERF_CTR1 0x707
+#define MSR_V4_C1_PERF_CTRL0 0x710
+#define MSR_V4_C1_PERF_CTR0 0x716
+#define MSR_V4_C1_PERF_CTRL1 0x711
+#define MSR_V4_C1_PERF_CTR1 0x717
+#define MSR_V4_C2_PERF_CTRL0 0x720
+#define MSR_V4_C2_PERF_CTR0 0x726
+#define MSR_V4_C2_PERF_CTRL1 0x721
+#define MSR_V4_C2_PERF_CTR1 0x727
+#define MSR_V4_C3_PERF_CTRL0 0x730
+#define MSR_V4_C3_PERF_CTR0 0x736
+#define MSR_V4_C3_PERF_CTRL1 0x731
+#define MSR_V4_C3_PERF_CTR1 0x737
/* V4 Uncore registers the same as in V3 */
/* Xeon Phi */
diff --git a/src/includes/registers_types.h b/src/includes/registers_types.h
index f1a5ec2..68da3d2 100644
--- a/src/includes/registers_types.h
+++ b/src/includes/registers_types.h
@@ -5,8 +5,8 @@
*
* Description: Header File of registers.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/textcolor.h b/src/includes/textcolor.h
index 03b2c51..17a9bf9 100644
--- a/src/includes/textcolor.h
+++ b/src/includes/textcolor.h
@@ -7,8 +7,8 @@
* Allows toggling of terminal escape sequences for
* colored text.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/thermal.h b/src/includes/thermal.h
index 126981d..fdbbc77 100644
--- a/src/includes/thermal.h
+++ b/src/includes/thermal.h
@@ -6,8 +6,8 @@
* Description: Header File Thermal Module.
* Implements Intel TM/TM2 Interface.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/thermal_types.h b/src/includes/thermal_types.h
index 12ddbcf..849cf85 100644
--- a/src/includes/thermal_types.h
+++ b/src/includes/thermal_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for thermal module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/timer.h b/src/includes/timer.h
index 58c0aa3..c2b9e60 100644
--- a/src/includes/timer.h
+++ b/src/includes/timer.h
@@ -10,8 +10,8 @@
* with rdtsc of 100 cycles in the worst case. Therefore sensible
* measurements should be over 1000 cycles.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/timer_types.h b/src/includes/timer_types.h
index 0899cec..2a12b0a 100644
--- a/src/includes/timer_types.h
+++ b/src/includes/timer_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for timer module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/tlb-info.h b/src/includes/tlb-info.h
index 365dbe0..9cc1f5f 100644
--- a/src/includes/tlb-info.h
+++ b/src/includes/tlb-info.h
@@ -6,8 +6,8 @@
* Description: Header File of topology module that contains the TLB
* describing strings. Not used currently.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/topology.h b/src/includes/topology.h
index 57b14d9..8f5ee4a 100644
--- a/src/includes/topology.h
+++ b/src/includes/topology.h
@@ -5,8 +5,8 @@
*
* Description: Header File of topology module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -91,6 +91,7 @@ struct topology_functions {
#define WESTMERE_EX 0x2FU
#define XEON_MP 0x1DU
#define BROADWELL 0x3DU
+#define BROADWELL_E3 0x47U
#define BROADWELL_E 0x4FU
#define BROADWELL_D 0x56U
#define SKYLAKE1 0x4EU
diff --git a/src/includes/topology_cpuid.h b/src/includes/topology_cpuid.h
index 24c60c5..d7d47a6 100644
--- a/src/includes/topology_cpuid.h
+++ b/src/includes/topology_cpuid.h
@@ -5,8 +5,8 @@
*
* Description: Header File of topology backend using cpuid instruction.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/topology_hwloc.h b/src/includes/topology_hwloc.h
index a8a3cdc..bd5a398 100644
--- a/src/includes/topology_hwloc.h
+++ b/src/includes/topology_hwloc.h
@@ -5,8 +5,8 @@
*
* Description: Header File of topology backend using the hwloc library
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/topology_proc.h b/src/includes/topology_proc.h
index 640cbd3..f5aee2d 100644
--- a/src/includes/topology_proc.h
+++ b/src/includes/topology_proc.h
@@ -5,8 +5,8 @@
*
* Description: Header File of topology backend using procfs/sysfs
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/includes/topology_types.h b/src/includes/topology_types.h
index af20379..06e2f3a 100644
--- a/src/includes/topology_types.h
+++ b/src/includes/topology_types.h
@@ -6,8 +6,8 @@
* Description: Types file for topology module. External definitions are
* in likwid.h
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/includes/tree.h b/src/includes/tree.h
index 445bcfb..5fc7a10 100644
--- a/src/includes/tree.h
+++ b/src/includes/tree.h
@@ -6,8 +6,8 @@
* Description: Header File tree Module.
* Implements a simple tree data structure.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/tree_types.h b/src/includes/tree_types.h
index fd1d50b..1ef63be 100644
--- a/src/includes/tree_types.h
+++ b/src/includes/tree_types.h
@@ -5,8 +5,8 @@
*
* Description: Types file for tree module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/includes/types.h b/src/includes/types.h
index 56e77e9..a45fdb8 100644
--- a/src/includes/types.h
+++ b/src/includes/types.h
@@ -5,8 +5,8 @@
*
* Description: Global Types file
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/libperfctr.c b/src/libperfctr.c
index 5b63f2a..852de8c 100644
--- a/src/libperfctr.c
+++ b/src/libperfctr.c
@@ -5,8 +5,8 @@
*
* Description: Marker API interface of module perfmon
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -179,9 +179,9 @@ likwid_markerInit(void)
hashTable_init();
for(int i=0; i<MAX_NUM_NODES; i++) socket_lock[i] = LOCK_INIT;
-#ifndef LIKWID_USE_PERFEVENT
+
HPMmode(atoi(modeStr));
-#endif
+
if (getenv("LIKWID_DEBUG") != NULL)
{
perfmon_verbosity = atoi(getenv("LIKWID_DEBUG"));
@@ -255,23 +255,39 @@ likwid_markerInit(void)
likwid_init = 1;
}
groupSet->activeGroup = 0;
-#ifdef LIKWID_USE_PERFEVENT
+ threads2Pthread[registered_cpus] = pthread_self();
+ registered_cpus++;
+
perfmon_setupCounters(groupSet->activeGroup);
perfmon_startCounters();
-#endif
+
}
void
likwid_markerThreadInit(void)
{
- int myID;
+ int myID = 0, i = 0;
+ pthread_t t;
if ( !likwid_init )
{
return;
}
pthread_mutex_lock(&globalLock);
- myID = registered_cpus++;
+ t = pthread_self();
+ for (i=0; i<registered_cpus; i++)
+ {
+ if (pthread_equal(t, threads2Pthread[i]))
+ {
+ t = 0;
+ }
+ }
+ if (t != 0)
+ {
+ threads2Pthread[registered_cpus] = t;
+ myID = registered_cpus++;
+ }
+ //myID = registered_cpus++;
pthread_mutex_unlock(&globalLock);
if (getenv("LIKWID_PIN") != NULL)
@@ -282,7 +298,7 @@ likwid_markerThreadInit(void)
if ((CPU_COUNT(&cpuset) > 1) || (likwid_getProcessorId() != threads2Cpu[myID % num_cpus]))
{
likwid_pinThread(threads2Cpu[myID % num_cpus]);
- DEBUG_PRINT(DEBUGLEV_DEVELOP, "Pin thread %lu to CPU %d\n", gettid(), threads2Cpu[myID % num_cpus]);
+ DEBUG_PRINT(DEBUGLEV_DEVELOP, "Pin thread %lu to CPU %d currently %d\n", gettid(), threads2Cpu[myID % num_cpus], sched_getcpu());
}
}
}
@@ -301,6 +317,7 @@ likwid_markerNextGroup(void)
next_group = (groupSet->activeGroup + 1) % numberOfGroups;
if (next_group != groupSet->activeGroup)
{
+ DEBUG_PRINT(DEBUGLEV_DEVELOP, Switch from group %d to group %d, groupSet->activeGroup, next_group);
i = perfmon_switchActiveGroup(next_group);
}
return;
@@ -437,11 +454,7 @@ likwid_markerRegisterRegion(const char* regionTag)
bcatcstr(tag, groupSuffix);
int cpu_id = hashTable_get(tag, &results);
bdestroy(tag);
-#ifdef LIKWID_USE_PERFEVENT
return HPMaddThread(cpu_id);
-#else
- return 0;
-#endif
}
int
diff --git a/src/likwid.f90 b/src/likwid.f90
index 20ca8b7..81c8076 100644
--- a/src/likwid.f90
+++ b/src/likwid.f90
@@ -4,14 +4,14 @@
!
! Description: Marker API f90 module
!
-! Version: <VERSION>
-! Released: <DATE>
+! Version: 4.2
+! Released: 22.12.2016
!
! Authors: Jan Treibig (jt), jan.treibig at gmail.com,
! Thomas Roehl (tr), thomas.roehl at googlemail.com
! Project: likwid
!
-! Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
+! Copyright (C) 2016 RRZE, University Erlangen-Nuremberg
!
! This program is free software: you can redistribute it and/or modify it under
! the terms of the GNU General Public License as published by the Free Software
diff --git a/src/likwid_f90_interface.c b/src/likwid_f90_interface.c
index 96ec8e9..d451c5b 100644
--- a/src/likwid_f90_interface.c
+++ b/src/likwid_f90_interface.c
@@ -5,8 +5,8 @@
*
* Description: F90 interface for marker API
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/luawid.c b/src/luawid.c
index c872d72..96b1c3f 100644
--- a/src/luawid.c
+++ b/src/luawid.c
@@ -5,8 +5,8 @@
*
* Description: C part of the Likwid Lua interface
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
@@ -120,29 +120,39 @@ lua_likwid_getConfiguration(lua_State* L)
{
configfile = get_configuration();
}
- lua_newtable(L);
- lua_pushstring(L, "configFile");
- lua_pushstring(L, configfile->configFileName);
- lua_settable(L,-3);
- lua_pushstring(L, "topologyFile");
- lua_pushstring(L, configfile->topologyCfgFileName);
- lua_settable(L,-3);
- lua_pushstring(L, "daemonPath");
- lua_pushstring(L, configfile->daemonPath);
- lua_settable(L,-3);
- lua_pushstring(L, "groupPath");
- lua_pushstring(L, configfile->groupPath);
- lua_settable(L,-3);
- lua_pushstring(L, "daemonMode");
- lua_pushinteger(L, (int)configfile->daemonMode);
- lua_settable(L,-3);
- lua_pushstring(L, "maxNumThreads");
- lua_pushinteger(L, configfile->maxNumThreads);
- lua_settable(L,-3);
- lua_pushstring(L, "maxNumNodes");
- lua_pushinteger(L, configfile->maxNumNodes);
- lua_settable(L,-3);
- return 1;
+ if (configfile)
+ {
+ lua_newtable(L);
+ lua_pushstring(L, "configFile");
+ if (configfile->configFileName != NULL)
+ lua_pushstring(L, configfile->configFileName);
+ else
+ lua_pushnil(L);
+ lua_settable(L,-3);
+ lua_pushstring(L, "topologyFile");
+ lua_pushstring(L, configfile->topologyCfgFileName);
+ lua_settable(L,-3);
+ lua_pushstring(L, "daemonPath");
+ if (configfile->daemonPath != NULL)
+ lua_pushstring(L, configfile->daemonPath);
+ else
+ lua_pushnil(L);
+ lua_settable(L,-3);
+ lua_pushstring(L, "groupPath");
+ lua_pushstring(L, configfile->groupPath);
+ lua_settable(L,-3);
+ lua_pushstring(L, "daemonMode");
+ lua_pushinteger(L, (int)configfile->daemonMode);
+ lua_settable(L,-3);
+ lua_pushstring(L, "maxNumThreads");
+ lua_pushinteger(L, configfile->maxNumThreads);
+ lua_settable(L,-3);
+ lua_pushstring(L, "maxNumNodes");
+ lua_pushinteger(L, configfile->maxNumNodes);
+ lua_settable(L,-3);
+ return 1;
+ }
+ return 0;
}
static int
@@ -940,7 +950,7 @@ lua_likwid_putTopology(lua_State* L)
static int
lua_likwid_getEventsAndCounters(lua_State* L)
{
- int i;
+ int i = 0, insert = 1;
if (topology_isInitialized == 0)
{
@@ -962,7 +972,7 @@ lua_likwid_getEventsAndCounters(lua_State* L)
if (counter_map[i-1].type == NOTYPE)
continue;
bstring optString = bfromcstr("");
- lua_pushinteger(L, (lua_Integer)(i));
+ lua_pushinteger(L, (lua_Integer)(insert));
lua_newtable(L);
lua_pushstring(L,"Name");
lua_pushstring(L,counter_map[i-1].key);
@@ -991,7 +1001,9 @@ lua_likwid_getEventsAndCounters(lua_State* L)
lua_settable(L,-3);
lua_settable(L,-3);
bdestroy(optString);
+ insert++;
}
+ insert = 1;
lua_settable(L,-3);
lua_pushstring(L,"Events");
lua_newtable(L);
@@ -1000,7 +1012,7 @@ lua_likwid_getEventsAndCounters(lua_State* L)
if (strlen(eventHash[i-1].limit) == 0)
continue;
bstring optString = bfromcstr("");
- lua_pushinteger(L, (lua_Integer)(i));
+ lua_pushinteger(L, (lua_Integer)(insert));
lua_newtable(L);
lua_pushstring(L,"Name");
lua_pushstring(L,eventHash[i-1].name);
@@ -1029,6 +1041,7 @@ lua_likwid_getEventsAndCounters(lua_State* L)
lua_settable(L,-3);
lua_settable(L,-3);
bdestroy(optString);
+ insert++;
}
lua_settable(L,-3);
HPMfinalize();
diff --git a/src/memsweep.c b/src/memsweep.c
index b87e297..c6debef 100644
--- a/src/memsweep.c
+++ b/src/memsweep.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of sweeper module.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/numa.c b/src/numa.c
index 11efa63..d8b67c9 100644
--- a/src/numa.c
+++ b/src/numa.c
@@ -6,8 +6,8 @@
* Description: Implementation of Linux NUMA interface. Selects between hwloc and
* procfs/sysfs backends.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -68,7 +68,7 @@ NumaTopology numa_info = {0,NULL};
/* ##### VARIABLES - LOCAL TO THIS SOURCE FILE ###################### */
-static int numaInitialized = 0;
+int numaInitialized = 0;
/* ##### FUNCTION DEFINITIONS - EXPORTED FUNCTIONS ################## */
diff --git a/src/numa_hwloc.c b/src/numa_hwloc.c
index 5a85059..7d7d5b2 100644
--- a/src/numa_hwloc.c
+++ b/src/numa_hwloc.c
@@ -5,8 +5,8 @@
*
* Description: Interface to hwloc for NUMA topology
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
@@ -222,6 +222,8 @@ hwloc_numa_init(void)
hwloc_obj_t obj;
const struct hwloc_distances_s* distances;
hwloc_obj_type_t hwloc_type = HWLOC_OBJ_NODE;
+ if (numaInitialized > 0 || numa_info.numberOfNodes > 0)
+ return 0;
if (!hwloc_topology)
{
@@ -349,6 +351,7 @@ hwloc_numa_init(void)
}
else
{
+ numaInitialized = 1;
return 0;
}
}
diff --git a/src/numa_proc.c b/src/numa_proc.c
index 5ed4617..a0d0583 100644
--- a/src/numa_proc.c
+++ b/src/numa_proc.c
@@ -5,8 +5,8 @@
*
* Description: Get NUMA topology from procfs and sysfs
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -296,6 +296,9 @@ int proc_numa_init(void)
uint32_t i;
uint64_t nrCPUs = 0;
+ if (numaInitialized > 0 || numa_info.numberOfNodes > 0)
+ return 0;
+
if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
{
numa_info.numberOfNodes = 0;
@@ -334,6 +337,8 @@ int proc_numa_init(void)
numa_info.nodes[i].numberOfDistances = nodeDistanceList(i, numa_info.numberOfNodes, &numa_info.nodes[i].distances);
}
+ if (err == 0)
+ numaInitialized = 1;
return err;
}
diff --git a/src/pci_hwloc.c b/src/pci_hwloc.c
index d853e5d..4a1226f 100644
--- a/src/pci_hwloc.c
+++ b/src/pci_hwloc.c
@@ -5,8 +5,8 @@
*
* Description: Interface to hwloc for PCI device lookup
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/pci_proc.c b/src/pci_proc.c
index fada99c..dcce425 100644
--- a/src/pci_proc.c
+++ b/src/pci_proc.c
@@ -5,8 +5,8 @@
*
* Description: Interface to procfs/sysfs for PCI device lookup
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/perfgroup.c b/src/perfgroup.c
index d810825..23602fd 100644
--- a/src/perfgroup.c
+++ b/src/perfgroup.c
@@ -5,8 +5,8 @@
*
* Description: Handler for performance groups and event sets
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at gmail.com
@@ -612,29 +612,32 @@ int custom_group(const char* eventStr, GroupInfo* ginfo)
eventList = bsplit(eventBstr, delim);
ginfo->nevents = eventList->qty;
- if (binstr(eventBstr, 0, fix0) > 0)
+ if (cpuid_info.isIntel)
{
- has_fix0 = 1;
- }
- else
- {
- ginfo->nevents++;
- }
- if (binstr(eventBstr, 0, fix1) > 0)
- {
- has_fix1 = 1;
- }
- else
- {
- ginfo->nevents++;
- }
- if (binstr(eventBstr, 0, fix2) > 0)
- {
- has_fix2 = 1;
- }
- else
- {
- ginfo->nevents++;
+ if (binstr(eventBstr, 0, fix0) > 0)
+ {
+ has_fix0 = 1;
+ }
+ else
+ {
+ ginfo->nevents++;
+ }
+ if (binstr(eventBstr, 0, fix1) > 0)
+ {
+ has_fix1 = 1;
+ }
+ else
+ {
+ ginfo->nevents++;
+ }
+ if (binstr(eventBstr, 0, fix2) > 0)
+ {
+ has_fix2 = 1;
+ }
+ else
+ {
+ ginfo->nevents++;
+ }
}
bdestroy(eventBstr);
@@ -687,29 +690,32 @@ int custom_group(const char* eventStr, GroupInfo* ginfo)
bstrListDestroy(elist);
}
i = eventList->qty;
- if (!has_fix0)
+ if (cpuid_info.isIntel)
{
- ginfo->events[i] = malloc(18 * sizeof(char));
- ginfo->counters[i] = malloc(6 * sizeof(char));
- sprintf(ginfo->events[i], "%s", "INSTR_RETIRED_ANY");
- sprintf(ginfo->counters[i], "%s", "FIXC0");
- i++;
- }
- if (!has_fix1)
- {
- ginfo->events[i] = malloc(22 * sizeof(char));
- ginfo->counters[i] = malloc(6 * sizeof(char));
- sprintf(ginfo->events[i], "%s", "CPU_CLK_UNHALTED_CORE");
- sprintf(ginfo->counters[i], "%s", "FIXC1");
- i++;
- }
- if (!has_fix2)
- {
- ginfo->events[i] = malloc(21 * sizeof(char));
- ginfo->counters[i] = malloc(6 * sizeof(char));
- sprintf(ginfo->events[i], "%s", "CPU_CLK_UNHALTED_REF");
- sprintf(ginfo->counters[i], "%s", "FIXC2");
- i++;
+ if (!has_fix0)
+ {
+ ginfo->events[i] = malloc(18 * sizeof(char));
+ ginfo->counters[i] = malloc(6 * sizeof(char));
+ sprintf(ginfo->events[i], "%s", "INSTR_RETIRED_ANY");
+ sprintf(ginfo->counters[i], "%s", "FIXC0");
+ i++;
+ }
+ if (!has_fix1)
+ {
+ ginfo->events[i] = malloc(22 * sizeof(char));
+ ginfo->counters[i] = malloc(6 * sizeof(char));
+ sprintf(ginfo->events[i], "%s", "CPU_CLK_UNHALTED_CORE");
+ sprintf(ginfo->counters[i], "%s", "FIXC1");
+ i++;
+ }
+ if (!has_fix2)
+ {
+ ginfo->events[i] = malloc(21 * sizeof(char));
+ ginfo->counters[i] = malloc(6 * sizeof(char));
+ sprintf(ginfo->events[i], "%s", "CPU_CLK_UNHALTED_REF");
+ sprintf(ginfo->counters[i], "%s", "FIXC2");
+ i++;
+ }
}
bstrListDestroy(eventList);
diff --git a/src/perfmon.c b/src/perfmon.c
index 521b11c..413813f 100644
--- a/src/perfmon.c
+++ b/src/perfmon.c
@@ -5,8 +5,8 @@
*
* Description: Main implementation of the performance monitoring module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -590,6 +590,7 @@ calculateResult(int groupId, int eventId, int threadId)
PerfmonCounter* counter;
int cpu_id;
double result = 0.0;
+ uint64_t maxValue = 0ULL;
if (groupSet->groups[groupId].events[eventId].type == NOTYPE)
return result;
@@ -601,11 +602,13 @@ calculateResult(int groupId, int eventId, int threadId)
}
else if (counter->overflows > 0)
{
- result += (double) ((perfmon_getMaxCounterValue(counter_map[event->index].type) -
- counter->startData) + counter->counterData);
- counter->overflows--;
+ maxValue = perfmon_getMaxCounterValue(counter_map[event->index].type);
+ result += (double) ((maxValue - counter->startData) + counter->counterData);
+ if (counter->overflows > 1)
+ {
+ result += (double) ((counter->overflows-1) * maxValue);
+ }
}
- result += (double) (counter->overflows * perfmon_getMaxCounterValue(counter_map[event->index].type));
if (counter_map[event->index].type == POWER)
{
result *= power_getEnergyUnit(getCounterTypeOffset(event->index));
@@ -709,7 +712,10 @@ perfmon_check_counter_map(int cpu_id)
int found = 0;
bstring estr = bfromcstr(eventHash[i].name);
if (i > 0 && strlen(eventHash[i-1].limit) != 0 && strcmp(eventHash[i-1].limit, eventHash[i].limit) == 0)
+ {
+ bdestroy(estr);
continue;
+ }
for (int j=0;j<perfmon_numCounters; j++)
{
if (counter_map[j].type == NOTYPE)
@@ -719,6 +725,7 @@ perfmon_check_counter_map(int cpu_id)
if (getEvent(estr, cstr, &event))
{
found = 1;
+ bdestroy(cstr);
break;
}
bdestroy(cstr);
@@ -893,6 +900,7 @@ perfmon_init_maps(void)
break;
case BROADWELL:
+ case BROADWELL_E3:
box_map = broadwell_box_map;
eventHash = broadwell_arch_events;
counter_map = broadwell_counter_map;
@@ -1158,6 +1166,7 @@ perfmon_init_funcs(int* init_power, int* init_temp)
break;
case BROADWELL:
+ case BROADWELL_E3:
case BROADWELL_E:
case BROADWELL_D:
initialize_power = TRUE;
@@ -1570,15 +1579,11 @@ perfmon_addEventSet(const char* eventCString)
return -ENOMEM;
}
eventSet->numberOfEvents = 0;
-#ifdef __x86_64
-// eventSet->regTypeMask = ((__uint128_t)0x0ULL<<64)|0x0ULL;
+
eventSet->regTypeMask1 = 0x0ULL;
eventSet->regTypeMask2 = 0x0ULL;
eventSet->regTypeMask3 = 0x0ULL;
eventSet->regTypeMask4 = 0x0ULL;
-#else
- eventSet->regTypeMask = 0x0ULL;
-#endif
int forceOverwrite = 0;
int valid_events = 0;
diff --git a/src/perfmon_perf.c b/src/perfmon_perf.c
index 87a8c42..8414548 100644
--- a/src/perfmon_perf.c
+++ b/src/perfmon_perf.c
@@ -6,8 +6,8 @@
* Description: Example perfmon module for software events through perf_event
* Currently not integrated in perfmon.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
* Project: likwid
diff --git a/src/power.c b/src/power.c
index 94c7ea4..7276353 100644
--- a/src/power.c
+++ b/src/power.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing Intel RAPL interface
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -89,6 +89,7 @@ power_init(int cpuId)
case ATOM_SILVERMONT_AIR:
case ATOM_SILVERMONT_GOLD:
case BROADWELL:
+ case BROADWELL_E3:
case BROADWELL_E:
case BROADWELL_D:
case HASWELL_M1:
diff --git a/src/pthread-overload/Makefile b/src/pthread-overload/Makefile
index 613e595..57452c9 100644
--- a/src/pthread-overload/Makefile
+++ b/src/pthread-overload/Makefile
@@ -4,8 +4,8 @@
#
# Description: pthread-overload Makefile
#
-# Version: <VERSION>
-# Released: <DATE>
+# Version: 4.2
+# Released: 22.12.2016
#
# Author: Jan Treibig (jt), jan.treibig at gmail.com
# Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/pthread-overload/pthread-overload.c b/src/pthread-overload/pthread-overload.c
index b4c2b28..377588b 100644
--- a/src/pthread-overload/pthread-overload.c
+++ b/src/pthread-overload/pthread-overload.c
@@ -6,8 +6,8 @@
* Description: Overloaded library for pthread_create call.
* Implements pinning of threads together with likwid-pin.
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
@@ -92,6 +92,9 @@ pthread_create(pthread_t* thread,
static int pin_ids[MAX_NUM_THREADS];
static uint64_t skipMask = 0x0;
static int ncpus = 0;
+ static int shepard = 0;
+ static long online_cpus = 0;
+ online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
/* On first entry: Get Evironment Variable and initialize pin_ids */
if (ncalled == 0)
@@ -169,6 +172,27 @@ pthread_create(pthread_t* thread,
overflow = ncpus-1;
}
+ Dl_info info;
+ if (dladdr(start_routine, &info) > 0)
+ {
+ FILE* fpipe;
+ char cmd[512];
+ char buff[512];
+ buff[0] = '\0';
+ sprintf(cmd, "nm %s 2>/dev/null | grep %x ", info.dli_fname, ((void*)start_routine) - info.dli_fbase);
+ if ( !(fpipe = (FILE*)popen(cmd,"r")) )
+ { // If fpipe is NULL
+ fprintf(stderr, "Problems");
+ }
+ char* t = fgets(buff, 512, fpipe);
+ char* tmp = strstr(buff, "monitor");
+ if (tmp != NULL)
+ {
+ shepard = 1;
+ skipMask |= 1ULL<<(ncalled);
+ }
+ pclose(fpipe);
+ }
/* Handle dll related stuff */
do
@@ -210,9 +234,17 @@ pthread_create(pthread_t* thread,
if ((ncalled<64) && (skipMask&(1ULL<<(ncalled))))
{
+ CPU_ZERO(&cpuset);
+ for (int i=0; i<online_cpus; i++)
+ CPU_SET(i, &cpuset);
+ pthread_setaffinity_np(*thread, sizeof(cpu_set_t), &cpuset);
if (!silent)
{
- color_print("\tthreadid %lu -> SKIP \n", *thread);
+ if (shepard)
+ color_print("\tthreadid %lu -> SKIP SHEPARD\n", *thread);
+ else
+ color_print("\tthreadid %lu -> SKIP \n", *thread);
+ shepard = 0;
}
}
else
diff --git a/src/thermal.c b/src/thermal.c
index c967e09..5f42b10 100644
--- a/src/thermal.c
+++ b/src/thermal.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing Intel TM/TM2 interface
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/timer.c b/src/timer.c
index f75c5d8..36157ff 100644
--- a/src/timer.c
+++ b/src/timer.c
@@ -5,8 +5,8 @@
*
* Description: Implementation of timer module
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/src/topology.c b/src/topology.c
index ad70ab7..4f7a95c 100644
--- a/src/topology.c
+++ b/src/topology.c
@@ -5,8 +5,8 @@
*
* Description: Interface to the topology backends
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
@@ -70,6 +70,7 @@ static char* sandybridge_ep_str = "Intel Xeon SandyBridge EN/EP processor";
static char* haswell_str = "Intel Core Haswell processor";
static char* haswell_ep_str = "Intel Xeon Haswell EN/EP/EX processor";
static char* broadwell_str = "Intel Core Broadwell processor";
+static char* broadwell_e3_str = "Intel Xeon E3 Broadwell processor";
static char* broadwell_d_str = "Intel Xeon D Broadwell processor";
static char* broadwell_ep_str = "Intel Xeon Broadwell EN/EP/EX processor";
static char* skylake_str = "Intel Skylake processor";
@@ -615,6 +616,10 @@ topology_setName(void)
cpuid_info.name = broadwell_str;
cpuid_info.short_name = short_broadwell;
break;
+ case BROADWELL_E3:
+ cpuid_info.name = broadwell_e3_str;
+ cpuid_info.short_name = short_broadwell;
+ break;
case BROADWELL_D:
cpuid_info.supportUncore = 1;
cpuid_info.name = broadwell_d_str;
@@ -1037,6 +1042,7 @@ print_supportedCPUs (void)
printf("\t%s\n",broadwell_ep_str);
printf("\t%s\n",skylake_str);
printf("\t%s\n",atom_goldmont_str);
+ printf("\t%s\n",xeon_phi2_string);
printf("\n");
printf("Supported AMD processors:\n");
printf("\t%s\n",opteron_sc_str);
diff --git a/src/topology_cpuid.c b/src/topology_cpuid.c
index 1e9a5eb..de80b1a 100644
--- a/src/topology_cpuid.c
+++ b/src/topology_cpuid.c
@@ -5,8 +5,8 @@
*
* Description: Interface to the cpuid based topology backend
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/topology_hwloc.c b/src/topology_hwloc.c
index 13991f1..2e128bc 100644
--- a/src/topology_hwloc.c
+++ b/src/topology_hwloc.c
@@ -5,8 +5,8 @@
*
* Description: Interface to the hwloc based topology backend
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Thomas Roehl (tr), thomas.roehl at googlemail.com
*
@@ -349,8 +349,8 @@ hwloc_init_cacheTopology(void)
}
else if (cpuid_info.isIntel)
{
- DEBUG_PLAIN_PRINT(DEBUGLEV_ONLY_ERROR, Processor is not supported);
- break;
+ DEBUG_PLAIN_PRINT(DEBUGLEV_ONLY_ERROR, Cannot read cache inclusiveness);
+ cachePool[id].inclusive = 0;
}
#endif
#if defined(_ARCH_PPC)
diff --git a/src/topology_proc.c b/src/topology_proc.c
index 530f8e2..8ceb64d 100644
--- a/src/topology_proc.c
+++ b/src/topology_proc.c
@@ -5,8 +5,8 @@
*
* Description: Interface to the procfs/sysfs based topology backend
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Authors: Jan Treibig (jt), jan.treibig at gmail.com,
* Thomas Roehl (tr), thomas.roehl at googlemail.com
diff --git a/src/tree.c b/src/tree.c
index 836444d..bbf8668 100644
--- a/src/tree.c
+++ b/src/tree.c
@@ -5,8 +5,8 @@
*
* Description: Module implementing a tree data structure
*
- * Version: <VERSION>
- * Released: <DATE>
+ * Version: 4.2
+ * Released: 22.12.2016
*
* Author: Jan Treibig (jt), jan.treibig at gmail.com
* Project: likwid
diff --git a/test/stream-API.c b/test/stream-API.c
deleted file mode 100644
index 0fe292e..0000000
--- a/test/stream-API.c
+++ /dev/null
@@ -1,437 +0,0 @@
-/*
- * =======================================================================================
- *
- * Filename: streamAPI.c
- *
- * Description: Copy of the STREAM benchmark (only copy and triad) with hardware
- * performance measurement instrumentation using LIKWID
- *
- * Version: <VERSION>
- * Released: <DATE>
- *
- * Author: Thomas Roehl (tr), thomas.roehl at googlemail.com
- * Project: likwid
- *
- * Copyright (C) 2015 RRZE, University Erlangen-Nuremberg
- *
- * This program is free software: you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free Software
- * Foundation, either version 3 of the License, or (at your option) any later
- * version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
- * PARTICULAR PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * =======================================================================================
- */
-
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-#ifdef _OPENMP
-#include <omp.h>
-# endif
-#include <stdint.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sched.h>
-#include <pthread.h>
-
-#define ITER 100
-#define SIZE 40000000
-#define DATATYPE float
-
-#define gettid() syscall(SYS_gettid)
-#include <likwid.h>
-#define HLINE "-------------------------------------------------------------\n"
-
-#ifndef MIN
-#define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-
-typedef struct {
- struct timeval before;
- struct timeval after;
-} TimeData;
-
-
-void time_start(TimeData* time)
-{
- gettimeofday(&(time->before),NULL);
-}
-
-
-void time_stop(TimeData* time)
-{
- gettimeofday(&(time->after),NULL);
-}
-
-double time_print(TimeData* time)
-{
- long int sec;
- double timeDuration;
-
- sec = time->after.tv_sec - time->before.tv_sec;
- timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
-
- return (timeDuration/1000000);
-}
-
-static int
-getProcessorID(cpu_set_t* cpu_set)
-{
- int processorId;
-
- for (processorId=0;processorId<128;processorId++)
- {
- if (CPU_ISSET(processorId,cpu_set))
- {
- break;
- }
- }
- return processorId;
-}
-
-int threadGetProcessorId()
-{
- cpu_set_t cpu_set;
- CPU_ZERO(&cpu_set);
- sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
-
- return getProcessorID(&cpu_set);
-}
-
-void allocate_vector(DATATYPE** ptr, uint64_t size)
-{
- int errorCode;
-
- errorCode = posix_memalign((void**) ptr, 64, size*sizeof(DATATYPE));
-
- if (errorCode)
- {
- if (errorCode == EINVAL)
- {
- fprintf(stderr,
- "Alignment parameter is not a power of two\n");
- exit(EXIT_FAILURE);
- }
- if (errorCode == ENOMEM)
- {
- fprintf(stderr,
- "Insufficient memory to fulfill the request\n");
- exit(EXIT_FAILURE);
- }
- }
-}
-
-
-int main(int argn, char** argc)
-{
- int err, i ,j;
- int numCPUs = 0;
- int gid;
- DATATYPE *a,*b,*c,*d;
- TimeData timer;
- double triad_time, copy_time, scale_time, stream_time;
- char estr[1024];
- double result, scalar = 3.0;
- char* ptr;
-
- if (argn != 3)
- {
- printf("Usage: %s <cpustr> <events>\n", argc[0]);
- return 1;
- }
-
- strcpy(estr, argc[2]);
-
- allocate_vector(&a, SIZE);
- allocate_vector(&b, SIZE);
- allocate_vector(&c, SIZE);
- allocate_vector(&d, SIZE);
-
- err = topology_init();
- if (err < 0)
- {
- printf("Failed to initialize LIKWID's topology module\n");
- return 1;
- }
- CpuTopology_t topo = get_cpuTopology();
- affinity_init();
- int* cpus = (int*)malloc(topo->numHWThreads * sizeof(int));
- if (!cpus)
- return 1;
- numCPUs = cpustr_to_cpulist(argc[1], cpus, topo->numHWThreads);
- omp_set_num_threads(numCPUs);
- err = perfmon_init(numCPUs, cpus);
- if (err < 0)
- {
- printf("Failed to initialize LIKWID's performance monitoring module\n");
- affinity_finalize();
- topology_finalize();
- return 1;
- }
- gid = perfmon_addEventSet(estr);
- if (gid < 0)
- {
- printf("Failed to add event string %s to LIKWID's performance monitoring module\n", estr);
- perfmon_finalize();
- affinity_finalize();
- topology_finalize();
- return 1;
- }
-
- err = perfmon_setupCounters(gid);
- if (err < 0)
- {
- printf("Failed to setup group %d in LIKWID's performance monitoring module\n", gid);
- perfmon_finalize();
- affinity_finalize();
- topology_finalize();
- return 1;
- }
-
-#ifdef _OPENMP
- printf(HLINE);
-#pragma omp parallel
- {
-#pragma omp master
- {
- printf ("Number of Threads requested = %i\n",omp_get_num_threads());
- }
- likwid_pinThread(cpus[omp_get_thread_num()]);
- printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
- }
-#endif
-
-#pragma omp parallel for
- for (int j=0; j<SIZE; j++) {
- a[j] = 1.0;
- b[j] = 2.0;
- c[j] = 0.0;
- d[j] = 1.0;
- }
-
- err = perfmon_startCounters();
- if (err < 0)
- {
- printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
- perfmon_finalize();
- topology_finalize();
- return 1;
- }
- time_start(&timer);
-#pragma omp parallel
- {
- for (int k=0; k<ITER; k++)
- {
- LIKWID_MARKER_START("copy");
-#pragma omp for
- for (int j=0; j<SIZE; j++)
- {
- c[j] = a[j];
- }
- LIKWID_MARKER_STOP("copy");
- }
- }
- time_stop(&timer);
- err = perfmon_stopCounters();
- copy_time = time_print(&timer)/(double)ITER;
- if (err < 0)
- {
- printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
- perfmon_finalize();
- topology_finalize();
- return 1;
- }
-
- printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(2*SIZE*sizeof(DATATYPE)),
- copy_time,
- 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
-
- ptr = strtok(estr,",");
- j = 0;
- while (ptr != NULL)
- {
- for (i = 0;i < numCPUs; i++)
- {
- result = perfmon_getResult(gid, j, cpus[i]);
- printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
- }
- ptr = strtok(NULL,",");
- j++;
- }
- strcpy(estr, argc[2]);
- perfmon_setupCounters(gid);
-
- err = perfmon_startCounters();
- if (err < 0)
- {
- printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
- perfmon_finalize();
- topology_finalize();
- return 1;
- }
- time_start(&timer);
-#pragma omp parallel
- {
- for (int k=0; k<ITER; k++)
- {
- LIKWID_MARKER_START("scale");
-#pragma omp for
- for (int j=0; j<SIZE; j++)
- {
- b[j] = scalar*c[j];
- }
- LIKWID_MARKER_STOP("scale");
- }
- }
- time_stop(&timer);
- err = perfmon_stopCounters();
- scale_time = time_print(&timer)/(double)ITER;
- if (err < 0)
- {
- printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
- perfmon_finalize();
- topology_finalize();
- return 1;
- }
-
- printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(2*SIZE*sizeof(DATATYPE)),
- copy_time,
- 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
-
- ptr = strtok(estr,",");
- j = 0;
- while (ptr != NULL)
- {
- for (i = 0;i < numCPUs; i++)
- {
- result = perfmon_getResult(gid, j, cpus[i]);
- printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
- }
- ptr = strtok(NULL,",");
- j++;
- }
- strcpy(estr, argc[2]);
- perfmon_setupCounters(gid);
- err = perfmon_startCounters();
- if (err < 0)
- {
- printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
- perfmon_finalize();
- topology_finalize();
- return 1;
- }
- time_start(&timer);
-#pragma omp parallel
- {
- for (int k=0; k<ITER; k++)
- {
- LIKWID_MARKER_START("stream");
-#pragma omp for
- for (int j=0; j<SIZE; j++)
- {
- c[j] = a[j] + b[j];
- }
- LIKWID_MARKER_STOP("stream");
- }
- }
- time_stop(&timer);
- err = perfmon_stopCounters();
- stream_time = time_print(&timer)/(double)ITER;
- if (err < 0)
- {
- printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
- perfmon_finalize();
- topology_finalize();
- return 1;
- }
-
- printf("Processed %.1f Mbyte at stream benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(2*SIZE*sizeof(DATATYPE)),
- copy_time,
- 1E-6*((2*SIZE*sizeof(DATATYPE))/copy_time));
-
- ptr = strtok(estr,",");
- j = 0;
- while (ptr != NULL)
- {
- for (i = 0;i < numCPUs; i++)
- {
- result = perfmon_getResult(gid, j, cpus[i]);
- printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
- }
- ptr = strtok(NULL,",");
- j++;
- }
- strcpy(estr, argc[2]);
- perfmon_setupCounters(gid);
- err = perfmon_startCounters();
- if (err < 0)
- {
- printf("Failed to start counters for group %d for thread %d\n",gid, (-1*err)-1);
- perfmon_finalize();
- topology_finalize();
- return 1;
- }
- time_start(&timer);
-#pragma omp parallel
- {
- for (int k=0; k<ITER; k++)
- {
-
- LIKWID_MARKER_START("triad");
-#pragma omp for
- for (int j=0; j<SIZE; j++)
- {
- a[j] = b[j] + c[j] * scalar;
- }
- LIKWID_MARKER_STOP("triad");
- }
- }
- time_stop(&timer);
- err = perfmon_stopCounters();
- triad_time = time_print(&timer)/(double)ITER;
- if (err < 0)
- {
- printf("Failed to stop counters for group %d for thread %d\n",gid, (-1*err)-1);
- perfmon_finalize();
- topology_finalize();
- return 1;
- }
-
-
-
- printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(4*SIZE*sizeof(DATATYPE)),
- triad_time,
- 1E-6*((4*SIZE*sizeof(DATATYPE))/triad_time));
- ptr = strtok(estr,",");
- j = 0;
- while (ptr != NULL)
- {
- for (i = 0;i < numCPUs; i++)
- {
- result = perfmon_getResult(gid, j, cpus[i]);
- printf("Measurement result for event set %s at CPU %d: %f\n", ptr, cpus[i], result);
- }
- ptr = strtok(NULL,",");
- j++;
- }
-
- perfmon_finalize();
- affinity_finalize();
- topology_finalize();
- return 0;
-}
-
diff --git a/test/stream.c b/test/stream.c
deleted file mode 100644
index ea811a2..0000000
--- a/test/stream.c
+++ /dev/null
@@ -1,249 +0,0 @@
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-#ifdef _OPENMP
-#include <omp.h>
-# endif
-#include <stdint.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sched.h>
-#include <pthread.h>
-
-#define ITER 10
-#define SIZE 40000000
-
-#define gettid() syscall(SYS_gettid)
-#include <likwid.h>
-#define HLINE "-------------------------------------------------------------\n"
-
-#ifndef MIN
-#define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-
-typedef struct {
- struct timeval before;
- struct timeval after;
-} TimeData;
-
-
-void time_start(TimeData* time)
-{
- gettimeofday(&(time->before),NULL);
-}
-
-
-void time_stop(TimeData* time)
-{
- gettimeofday(&(time->after),NULL);
-}
-
-double time_print(TimeData* time)
-{
- long int sec;
- double timeDuration;
-
- sec = time->after.tv_sec - time->before.tv_sec;
- timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
-
- return (timeDuration/1000000);
-}
-
-static int
-getProcessorID(cpu_set_t* cpu_set)
-{
- int processorId;
-
- for (processorId=0;processorId<128;processorId++)
- {
- if (CPU_ISSET(processorId,cpu_set))
- {
- break;
- }
- }
- return processorId;
-}
-
-int threadGetProcessorId()
-{
- cpu_set_t cpu_set;
- CPU_ZERO(&cpu_set);
- sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
-
- return getProcessorID(&cpu_set);
-}
-
-void allocate_vector(double** ptr, uint64_t size)
-{
- int errorCode;
-
- errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
-
- if (errorCode)
- {
- if (errorCode == EINVAL)
- {
- fprintf(stderr,
- "Alignment parameter is not a power of two\n");
- exit(EXIT_FAILURE);
- }
- if (errorCode == ENOMEM)
- {
- fprintf(stderr,
- "Insufficient memory to fulfill the request\n");
- exit(EXIT_FAILURE);
- }
- }
-}
-
-
-int main(int argn, char** argc)
-{
- double *a,*b,*c,*d;
- double scalar = 3.0;
- TimeData timer;
- double triad_time, copy_time, scale_time, add_time;
-
- allocate_vector(&a, SIZE);
- allocate_vector(&b, SIZE);
- allocate_vector(&c, SIZE);
- allocate_vector(&d, SIZE);
-
-#ifdef LIKWID_PERFMON
- printf("Using likwid\n");
-#endif
-
- LIKWID_MARKER_INIT;
-
-#ifdef _OPENMP
- printf(HLINE);
-#pragma omp parallel
- {
-#pragma omp master
- {
- printf ("Number of Threads requested = %i\n",omp_get_num_threads());
- }
- printf ("Thread %d running on processor %d ....\n",omp_get_thread_num(),sched_getcpu());
- }
- printf(HLINE);
-#endif
-
-#pragma omp parallel for
- for (int j=0; j<SIZE; j++) {
- a[j] = 1.0;
- b[j] = 2.0;
- c[j] = 0.0;
- d[j] = 1.0;
- }
-
- time_start(&timer);
-#pragma omp parallel
- {
- for (int k=0; k<ITER; k++)
- {
- LIKWID_MARKER_START("copy");
-#pragma simd
-#pragma omp for
- for (int j=0; j<SIZE; j++)
- {
- c[j] = a[j];
- }
- LIKWID_MARKER_STOP("copy");
- }
- }
- time_stop(&timer);
- copy_time = time_print(&timer)/(double)ITER;
-
- time_start(&timer);
-#pragma omp parallel
- {
- for (int k=0; k<ITER; k++)
- {
-
- LIKWID_MARKER_START("scale");
-#pragma simd
-#pragma omp for
- for (int j=0; j<SIZE; j++)
- {
- b[j] = scalar * c[j];
- }
- LIKWID_MARKER_STOP("scale");
- }
- }
- time_stop(&timer);
- scale_time = time_print(&timer)/(double)ITER;
-
- time_start(&timer);
-#pragma omp parallel
- {
- for (int k=0; k<ITER; k++)
- {
-
- LIKWID_MARKER_START("add");
-#pragma simd
-#pragma omp for
- for (int j=0; j<SIZE; j++)
- {
- c[j] = a[j] + b[j];
- }
- LIKWID_MARKER_STOP("add");
- }
- }
- time_stop(&timer);
- add_time = time_print(&timer)/(double)ITER;
-
- time_start(&timer);
-#pragma omp parallel
- {
- LIKWID_MARKER_START("triad_total");
- for (int k=0; k<ITER; k++)
- {
-
- LIKWID_MARKER_START("triad");
-#pragma simd
-#pragma omp for
- for (int j=0; j<SIZE; j++)
- {
-
- a[j] = b[j] + c[j] * d[j];
- }
- LIKWID_MARKER_STOP("triad");
- }
- LIKWID_MARKER_STOP("triad_total");
- }
- time_stop(&timer);
- triad_time = time_print(&timer)/(double)ITER;
-
-
- printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(2*SIZE*sizeof(double)),
- copy_time,
- 1E-6*((2*SIZE*sizeof(double))/copy_time));
- printf("Processed %.1f Mbyte at scale benchmark in %.4f seconds: %.2f MByte/s %.2f MFLOP/s\n",
- 1E-6*(2*SIZE*sizeof(double)),
- scale_time,
- 1E-6*((2*SIZE*sizeof(double))/scale_time),
- 1E-6*(SIZE/scale_time));
- printf("Processed %.1f Mbyte at add benchmark in %.4f seconds: %.2f MByte/s %.2f MFLOP/s\n",
- 1E-6*(3*SIZE*sizeof(double)),
- add_time,
- 1E-6*((3*SIZE*sizeof(double))/add_time),
- 1E-6*(SIZE/add_time));
- printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s %.2f MFLOP/s\n",
- 1E-6*(4*SIZE*sizeof(double)),
- triad_time,
- 1E-6*((4*SIZE*sizeof(double))/triad_time),
- 1E-6*((2*SIZE)/triad_time));
-
-
- LIKWID_MARKER_CLOSE;
- free(a);
- free(b);
- free(c);
- free(d);
- return 0;
-}
-
diff --git a/test/stream.cc b/test/stream.cc
deleted file mode 100644
index 489bb09..0000000
--- a/test/stream.cc
+++ /dev/null
@@ -1,227 +0,0 @@
-#include <sys/types.h>
-#include <unistd.h>
-#include <iostream>
-#include <string>
-#include <atomic>
-#include <thread>
-#include <likwid.h>
-#include <sched.h>
-#include <syscall.h>
-#include <sys/time.h>
-
-#define gettid() syscall(SYS_gettid)
-#define ITER 10
-#define SIZE 40000000
-#ifdef __GNUG__
-#define RESTRICT __restrict__
-#else
-#define RESTRICT restrict
-#endif
-using namespace std;
-
-typedef struct {
- struct timeval before;
- struct timeval after;
-} TimeData;
-
-
-void time_start(TimeData* time)
-{
- gettimeofday(&(time->before),NULL);
-}
-
-
-void time_stop(TimeData* time)
-{
- gettimeofday(&(time->after),NULL);
-}
-
-double time_print(TimeData* time)
-{
- long int sec;
- double timeDuration;
-
- sec = time->after.tv_sec - time->before.tv_sec;
- timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
-
- return (timeDuration/1000000);
-}
-
-static int
-getProcessorID(cpu_set_t* cpu_set)
-{
- int processorId;
-
- for (processorId=0;processorId<128;processorId++)
- {
- if (CPU_ISSET(processorId,cpu_set))
- {
- break;
- }
- }
- return processorId;
-}
-
-int threadGetProcessorId()
-{
- cpu_set_t cpu_set;
- CPU_ZERO(&cpu_set);
- sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
- if (CPU_COUNT(&cpu_set) > 1)
- {
- return sched_getcpu();
- }
- else
- {
- return getProcessorID(&cpu_set);
- }
- return -1;
-}
-
-
-double copy_times[CPU_SETSIZE];
-double triad_times[CPU_SETSIZE];
-
-void allocate_vector(double** ptr, uint64_t size)
-{
- int errorCode;
-
- errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
-
- if (errorCode)
- {
- if (errorCode == EINVAL)
- {
- fprintf(stderr,
- "Alignment parameter is not a power of two\n");
- exit(EXIT_FAILURE);
- }
- if (errorCode == ENOMEM)
- {
- fprintf(stderr,
- "Insufficient memory to fulfill the request\n");
- exit(EXIT_FAILURE);
- }
- }
-}
-
-
-
-int calc_thread(double* RESTRICT a, double* RESTRICT b, double* RESTRICT c, double* RESTRICT d, int id, int all)
-{
- int i;
- int start;
- int end;
- TimeData timer;
- start = id*(SIZE/all);
- end = start+(SIZE/all);
-
- LIKWID_MARKER_THREADINIT;
-
- printf ("Thread %d running on processor %d ....\n", id, threadGetProcessorId());
-
- time_start(&timer);
- for (int k=0; k<ITER; k++)
- {
- LIKWID_MARKER_START("copy");
- #pragma simd
- for(i=start;i<end;i++)
- {
- c[i] = a[i];
- }
- LIKWID_MARKER_STOP("copy");
- }
- time_stop(&timer);
- copy_times[id] = time_print(&timer);
-
- time_start(&timer);
- for (int k=0; k<ITER; k++)
- {
- LIKWID_MARKER_START("triad");
- #pragma simd
- for(i=start;i<end;i++)
- {
- a[i] = b[i] + c[i] * d[i];
- }
- LIKWID_MARKER_STOP("triad");
- }
- time_stop(&timer);
- triad_times[id] = time_print(&timer);
- return 0;
-}
-
-int
-main(int argc, char ** argv)
-{
- cpu_set_t cpuset;
- sched_getaffinity(getpid(),sizeof(cpu_set_t), &cpuset);
- std::thread t[CPU_SETSIZE];
- double *a,*b,*c,*d;
- double copy_time = 0.0;
- double triad_time = 0.0;
- int num_threads = 0;
- int id = 0;
-
- for (int i=0;i<CPU_SETSIZE; i++)
- {
- if (CPU_ISSET(i, &cpuset))
- {
- num_threads++;
- }
- copy_times[i] = 0.0;
- triad_times[i] = 0.0;
- }
-
- printf ("Number of Threads requested = %i\n",num_threads);
-
- allocate_vector(&a, SIZE);
- allocate_vector(&b, SIZE);
- allocate_vector(&c, SIZE);
- allocate_vector(&d, SIZE);
- LIKWID_MARKER_INIT;
-
- #pragma ivdep
- for (int j=0; j<SIZE; ++j) {
- a[j] = 1.0;
- b[j] = 2.0;
- c[j] = 0.0;
- d[j] = 1.0;
- }
-
- for (int i=0;i<CPU_SETSIZE; i++)
- {
- if (CPU_ISSET(i, &cpuset))
- {
- t[i] = std::thread( calc_thread, a, b, c, d, id, num_threads);
- id++;
- if (id >= num_threads)
- break;
- }
- }
- id = 0;
- for (int i=0;i<CPU_SETSIZE; i++)
- {
- if (CPU_ISSET(i, &cpuset))
- {
- t[i].join();
- copy_time += copy_times[id]/(double)ITER;
- triad_time += triad_times[id]/(double)ITER;
- id++;
- if (id >= num_threads)
- break;
- }
- }
-
- printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(2*SIZE*sizeof(double)),
- copy_time,
- 1E-6*((2*SIZE*sizeof(double))/copy_time));
- printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(4*SIZE*sizeof(double)),
- triad_time,
- 1E-6*((4*SIZE*sizeof(double))/triad_time));
-
- LIKWID_MARKER_CLOSE;
-
- return 0;
-}
diff --git a/test/stream_cilk.c b/test/stream_cilk.c
deleted file mode 100644
index fcbe1a7..0000000
--- a/test/stream_cilk.c
+++ /dev/null
@@ -1,217 +0,0 @@
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-#include <stdint.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sched.h>
-
-#include <cilk/cilk.h>
-#include <cilk/cilk_api.h>
-
-
-#define ITER 10
-#define SIZE 40000000
-
-#define gettid() syscall(SYS_gettid)
-#include <likwid.h>
-#define HLINE "-------------------------------------------------------------\n"
-
-#ifndef MIN
-#define MIN(x,y) ((x)<(y)?(x):(y))
-#endif
-
-typedef struct {
- struct timeval before;
- struct timeval after;
-} TimeData;
-
-
-void time_start(TimeData* time)
-{
- gettimeofday(&(time->before),NULL);
-}
-
-
-void time_stop(TimeData* time)
-{
- gettimeofday(&(time->after),NULL);
-}
-
-double time_print(TimeData* time)
-{
- long int sec;
- double timeDuration;
-
- sec = time->after.tv_sec - time->before.tv_sec;
- timeDuration = ((double)((sec*1000000)+time->after.tv_usec) - (double) time->before.tv_usec);
-
- return (timeDuration/1000000);
-}
-
-static int nprocessors = 0;
-
-static int
-getProcessorID(cpu_set_t* cpu_set)
-{
- int processorId;
-
- for (processorId=0;processorId<nprocessors;processorId++)
- {
- if (CPU_ISSET(processorId,cpu_set))
- {
- break;
- }
- }
- return processorId;
-}
-
-
-int threadProcessorId()
-{
- cpu_set_t cpu_set;
- CPU_ZERO(&cpu_set);
- sched_getaffinity(gettid(),sizeof(cpu_set_t), &cpu_set);
-
- return getProcessorID(&cpu_set);
-}
-
-void allocate_vector(double** ptr, uint64_t size)
-{
- int errorCode;
-
- errorCode = posix_memalign((void**) ptr, 64, size*sizeof(double));
-
- if (errorCode)
- {
- if (errorCode == EINVAL)
- {
- fprintf(stderr,
- "Alignment parameter is not a power of two\n");
- exit(EXIT_FAILURE);
- }
- if (errorCode == ENOMEM)
- {
- fprintf(stderr,
- "Insufficient memory to fulfill the request\n");
- exit(EXIT_FAILURE);
- }
- }
-}
-
-
-
-static int get_nworkers()
-{
- return __cilkrts_get_nworkers();
-}
-static int get_totalworkers()
-{
- return __cilkrts_get_total_workers();
-}
-
-static int show_thread()
-{
- int ID = __cilkrts_get_worker_number();
- printf("Thread %d TID %lu CPU %d\n", ID, gettid(), sched_getcpu());
- return 0;
-}
-
-int main(){
- int i, k;
- int nworkers, totalworkers;
- char cpuCount[20];
- double *a, *b, *c, *d;
- double sums[2000];
- cpu_set_t cpuset;
- TimeData timer;
- double triad_time, copy_time, total = 0;
-
- nprocessors = sysconf(_SC_NPROCESSORS_CONF);
-
- nworkers = cilk_spawn get_nworkers();
- totalworkers = cilk_spawn get_totalworkers();
-
- for (i=0;i<nworkers;i++)
- {
- sums[i] = 0;
- }
-
- LIKWID_MARKER_INIT;
-
- cilk_spawn allocate_vector(&a, SIZE);
- cilk_spawn allocate_vector(&b, SIZE);
- cilk_spawn allocate_vector(&c, SIZE);
- cilk_spawn allocate_vector(&d, SIZE);
- cilk_sync;
-
- for (i=0; i<SIZE; i++) {
- a[i] = 1.0;
- b[i] = 2.0;
- c[i] = 0.0;
- d[i] = 1.0;
- }
-
- time_start(&timer);
- for (k=0; k<ITER; k++)
- {
- for (i=0;i<nworkers;i++)
- {
- cilk_spawn LIKWID_MARKER_START("copy");
- }
- cilk_sync;
- cilk_for(i=0;i<SIZE;i++)
- {
- c[i] = a[i];
- }
- for (i=0;i<nworkers;i++)
- {
- cilk_spawn LIKWID_MARKER_STOP("copy");
- }
- cilk_sync;
- }
- time_stop(&timer);
- copy_time = time_print(&timer)/(double)ITER;
-
- time_start(&timer);
- for (k=0; k<ITER; k++)
- {
- for (i=0;i<nworkers;i++)
- {
- cilk_spawn LIKWID_MARKER_START("triad");
- }
- cilk_sync;
- cilk_for(i=0;i<SIZE;i++)
- {
- a[i] = b[i] + c[i] * d[i];
- }
- for (i=0;i<nworkers;i++)
- {
- cilk_spawn LIKWID_MARKER_STOP("triad");
- }
- cilk_sync;
- }
- time_stop(&timer);
- triad_time = time_print(&timer)/(double)ITER;
-
- printf("Processed %.1f Mbyte at copy benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(2*SIZE*sizeof(double)),
- copy_time,
- 1E-6*((2*SIZE*sizeof(double))/copy_time));
- printf("Processed %.1f Mbyte at triad benchmark in %.4f seconds: %.2f MByte/s\n",
- 1E-6*(4*SIZE*sizeof(double)),
- triad_time,
- 1E-6*((4*SIZE*sizeof(double))/triad_time));
-
- printf("Main PID %d\n",getpid());
- for (i=0;i<nworkers;i++)
- {
- cilk_spawn show_thread();
- }
- cilk_sync;
-
- LIKWID_MARKER_CLOSE;
-}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/likwid/likwid.git
More information about the Likwid-commit
mailing list